org.apache.avro.generic.GenericData.Record Scala Examples
The following examples show how to use org.apache.avro.generic.GenericData.Record.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: GenericAvroSerializerSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.serializer import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.nio.ByteBuffer import com.esotericsoftware.kryo.io.{Input, Output} import org.apache.avro.{Schema, SchemaBuilder} import org.apache.avro.generic.GenericData.Record import org.apache.spark.{SharedSparkContext, SparkFunSuite} class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext { conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") val schema : Schema = SchemaBuilder .record("testRecord").fields() .requiredString("data") .endRecord() val record = new Record(schema) record.put("data", "test data") test("schema compression and decompression") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema)))) } test("record serialization and deserialization") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val outputStream = new ByteArrayOutputStream() val output = new Output(outputStream) genericSer.serializeDatum(record, output) output.flush() output.close() val input = new Input(new ByteArrayInputStream(outputStream.toByteArray)) assert(genericSer.deserializeDatum(input) === record) } test("uses schema fingerprint to decrease message size") { val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema) val output = new Output(new ByteArrayOutputStream()) val beginningNormalPosition = output.total() genericSerFull.serializeDatum(record, output) output.flush() val normalLength = output.total - beginningNormalPosition conf.registerAvroSchemas(schema) val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema) val beginningFingerprintPosition = output.total() genericSerFinger.serializeDatum(record, output) val fingerprintLength = output.total - beginningFingerprintPosition assert(fingerprintLength < normalLength) } test("caches previously seen schemas") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val compressedSchema = genericSer.compress(schema) val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema)) assert(compressedSchema.eq(genericSer.compress(schema))) assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema)))) } }
Example 2
Source File: GenericAvroSerializerSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.serializer import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.nio.ByteBuffer import com.esotericsoftware.kryo.io.{Input, Output} import org.apache.avro.{Schema, SchemaBuilder} import org.apache.avro.generic.GenericData.Record import org.apache.spark.{SharedSparkContext, SparkFunSuite} class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext { conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") val schema : Schema = SchemaBuilder .record("testRecord").fields() .requiredString("data") .endRecord() val record = new Record(schema) record.put("data", "test data") test("schema compression and decompression") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema)))) } test("record serialization and deserialization") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val outputStream = new ByteArrayOutputStream() val output = new Output(outputStream) genericSer.serializeDatum(record, output) output.flush() output.close() val input = new Input(new ByteArrayInputStream(outputStream.toByteArray)) assert(genericSer.deserializeDatum(input) === record) } test("uses schema fingerprint to decrease message size") { val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema) val output = new Output(new ByteArrayOutputStream()) val beginningNormalPosition = output.total() genericSerFull.serializeDatum(record, output) output.flush() val normalLength = output.total - beginningNormalPosition conf.registerAvroSchemas(schema) val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema) val beginningFingerprintPosition = output.total() genericSerFinger.serializeDatum(record, output) val fingerprintLength = output.total - beginningFingerprintPosition assert(fingerprintLength < normalLength) } test("caches previously seen schemas") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val compressedSchema = genericSer.compress(schema) val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema)) assert(compressedSchema.eq(genericSer.compress(schema))) assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema)))) } }
Example 3
Source File: GenericAvroSerializerSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.serializer import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.nio.ByteBuffer import com.esotericsoftware.kryo.io.{Input, Output} import org.apache.avro.{Schema, SchemaBuilder} import org.apache.avro.generic.GenericData.Record import org.apache.spark.{SharedSparkContext, SparkFunSuite} class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext { conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") val schema : Schema = SchemaBuilder .record("testRecord").fields() .requiredString("data") .endRecord() val record = new Record(schema) record.put("data", "test data") test("schema compression and decompression") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema)))) } test("record serialization and deserialization") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val outputStream = new ByteArrayOutputStream() val output = new Output(outputStream) genericSer.serializeDatum(record, output) output.flush() output.close() val input = new Input(new ByteArrayInputStream(outputStream.toByteArray)) assert(genericSer.deserializeDatum(input) === record) } test("uses schema fingerprint to decrease message size") { val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema) val output = new Output(new ByteArrayOutputStream()) val beginningNormalPosition = output.total() genericSerFull.serializeDatum(record, output) output.flush() val normalLength = output.total - beginningNormalPosition conf.registerAvroSchemas(schema) val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema) val beginningFingerprintPosition = output.total() genericSerFinger.serializeDatum(record, output) val fingerprintLength = output.total - beginningFingerprintPosition assert(fingerprintLength < normalLength) } test("caches previously seen schemas") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val compressedSchema = genericSer.compress(schema) val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema)) assert(compressedSchema.eq(genericSer.compress(schema))) assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema)))) } }
Example 4
Source File: GenericAvroSerializerSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.serializer import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.nio.ByteBuffer import com.esotericsoftware.kryo.io.{Output, Input} import org.apache.avro.{SchemaBuilder, Schema} import org.apache.avro.generic.GenericData.Record import org.apache.spark.{SparkFunSuite, SharedSparkContext} class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext { conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") val schema : Schema = SchemaBuilder .record("testRecord").fields() .requiredString("data") .endRecord() val record = new Record(schema) record.put("data", "test data") test("schema compression and decompression") {//模式压缩与解压缩 val genericSer = new GenericAvroSerializer(conf.getAvroSchema) assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema)))) } test("record serialization and deserialization") {//记录序列化和反序列化 val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val outputStream = new ByteArrayOutputStream() val output = new Output(outputStream) genericSer.serializeDatum(record, output) output.flush() output.close() val input = new Input(new ByteArrayInputStream(outputStream.toByteArray)) assert(genericSer.deserializeDatum(input) === record) } //使用模式指纹以减少信息大小 test("uses schema fingerprint to decrease message size") { val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema) val output = new Output(new ByteArrayOutputStream()) val beginningNormalPosition = output.total() genericSerFull.serializeDatum(record, output) output.flush() val normalLength = output.total - beginningNormalPosition conf.registerAvroSchemas(schema) val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema) val beginningFingerprintPosition = output.total() genericSerFinger.serializeDatum(record, output) val fingerprintLength = output.total - beginningFingerprintPosition assert(fingerprintLength < normalLength) } test("caches previously seen schemas") {//缓存之前模式 val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val compressedSchema = genericSer.compress(schema) val decompressedScheam = genericSer.decompress(ByteBuffer.wrap(compressedSchema)) assert(compressedSchema.eq(genericSer.compress(schema))) assert(decompressedScheam.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema)))) } }
Example 5
Source File: GenericAvroSerializerSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.serializer import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.nio.ByteBuffer import com.esotericsoftware.kryo.io.{Input, Output} import org.apache.avro.{Schema, SchemaBuilder} import org.apache.avro.generic.GenericData.Record import org.apache.spark.{SharedSparkContext, SparkFunSuite} class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext { conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") val schema : Schema = SchemaBuilder .record("testRecord").fields() .requiredString("data") .endRecord() val record = new Record(schema) record.put("data", "test data") test("schema compression and decompression") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema)))) } test("record serialization and deserialization") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val outputStream = new ByteArrayOutputStream() val output = new Output(outputStream) genericSer.serializeDatum(record, output) output.flush() output.close() val input = new Input(new ByteArrayInputStream(outputStream.toByteArray)) assert(genericSer.deserializeDatum(input) === record) } test("uses schema fingerprint to decrease message size") { val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema) val output = new Output(new ByteArrayOutputStream()) val beginningNormalPosition = output.total() genericSerFull.serializeDatum(record, output) output.flush() val normalLength = output.total - beginningNormalPosition conf.registerAvroSchemas(schema) val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema) val beginningFingerprintPosition = output.total() genericSerFinger.serializeDatum(record, output) val fingerprintLength = output.total - beginningFingerprintPosition assert(fingerprintLength < normalLength) } test("caches previously seen schemas") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val compressedSchema = genericSer.compress(schema) val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema)) assert(compressedSchema.eq(genericSer.compress(schema))) assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema)))) } }
Example 6
Source File: GenericAvroSerializerSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.serializer import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.nio.ByteBuffer import com.esotericsoftware.kryo.io.{Output, Input} import org.apache.avro.{SchemaBuilder, Schema} import org.apache.avro.generic.GenericData.Record import org.apache.spark.{SparkFunSuite, SharedSparkContext} class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext { conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") val schema : Schema = SchemaBuilder .record("testRecord").fields() .requiredString("data") .endRecord() val record = new Record(schema) record.put("data", "test data") test("schema compression and decompression") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema)))) } test("record serialization and deserialization") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val outputStream = new ByteArrayOutputStream() val output = new Output(outputStream) genericSer.serializeDatum(record, output) output.flush() output.close() val input = new Input(new ByteArrayInputStream(outputStream.toByteArray)) assert(genericSer.deserializeDatum(input) === record) } test("uses schema fingerprint to decrease message size") { val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema) val output = new Output(new ByteArrayOutputStream()) val beginningNormalPosition = output.total() genericSerFull.serializeDatum(record, output) output.flush() val normalLength = output.total - beginningNormalPosition conf.registerAvroSchemas(schema) val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema) val beginningFingerprintPosition = output.total() genericSerFinger.serializeDatum(record, output) val fingerprintLength = output.total - beginningFingerprintPosition assert(fingerprintLength < normalLength) } test("caches previously seen schemas") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val compressedSchema = genericSer.compress(schema) val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema)) assert(compressedSchema.eq(genericSer.compress(schema))) assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema)))) } }
Example 7
Source File: AvroSerializer.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.bloomberg.avro import java.io.ByteArrayOutputStream import com.datamountaineer.streamreactor.connect.bloomberg.BloombergData import com.datamountaineer.streamreactor.connect.bloomberg.avro.AvroSchemaGenerator._ import org.apache.avro.Schema import org.apache.avro.generic.GenericData.Record import org.apache.avro.generic.{GenericData, GenericDatumWriter, GenericRecord} import org.apache.avro.io.EncoderFactory import scala.collection.JavaConverters._ object AvroSerializer { private def recursive(record: GenericData.Record, schema: Schema, fieldName: String, value: Any): Unit = { value match { case _: Boolean => record.put(fieldName, value) case _: Int => record.put(fieldName, value) case _: Long => record.put(fieldName, value) case _: Double => record.put(fieldName, value) case _: Char => record.put(fieldName, value) case _: Float => record.put(fieldName, value) case _: String => record.put(fieldName, value) case list: java.util.List[_] => val tmpSchema = schema.getField(fieldName).schema() val itemSchema = if (tmpSchema.getType == Schema.Type.UNION) tmpSchema.getTypes.get(1) else tmpSchema require(itemSchema.getType == Schema.Type.ARRAY) //we might have a record not a primitive if (itemSchema.getElementType.getType == Schema.Type.RECORD) { val items = new GenericData.Array[GenericData.Record](list.size(), itemSchema) list.asScala.foreach { i => //only map is allowed val m = i.asInstanceOf[java.util.Map[String, Any]] items.add(m.toAvroRecord(itemSchema.getElementType)) } record.put(fieldName, items) } else { val items = new GenericData.Array[Any](list.size(), itemSchema) items.addAll(list) record.put(fieldName, items) } case map: java.util.LinkedHashMap[String @unchecked, _] => //record schema val fieldSchema = schema.getField(fieldName).schema() val nestedSchema = if (fieldSchema.getType == Schema.Type.UNION) fieldSchema.getTypes.get(1) else fieldSchema val nestedRecord = new Record(nestedSchema) map.entrySet().asScala.foreach(e => recursive(nestedRecord, nestedSchema, e.getKey, e.getValue)) record.put(fieldName, nestedRecord) } } } }
Example 8
Source File: JsonToAvroConverter.scala From ksql-streams with Apache License 2.0 | 5 votes |
package com.landoop.kstreams.sql.transform import java.util import com.fasterxml.jackson.databind.node._ import com.sksamuel.avro4s.ScaleAndPrecision import io.confluent.kafka.serializers import io.confluent.kafka.serializers.NonRecordContainer import org.apache.avro.generic.GenericContainer import org.apache.avro.generic.GenericData.Record import org.apache.avro.{LogicalTypes, Schema} class JsonToAvroConverter(namespace: String, avroStringTypeIsString: Boolean = false) { import org.json4s._ import org.json4s.native.JsonMethods._ def convert(name: String, str: String) (implicit schema: Option[Schema], sp: ScaleAndPrecision): GenericContainer = convert(name, parse(str)) def convert(name: String, value: JValue) (implicit aggregatedSchema: Option[Schema], sp: ScaleAndPrecision): GenericContainer = { value match { case JArray(arr) => val values = new java.util.ArrayList[AnyRef]() val prevSchema = aggregatedSchema.map(_.getField(name)).map(_.schema) val result = convert(name, arr.head)(prevSchema, sp) result match { case n: NonRecordContainer => values.add(n.getValue) case _ => values.add(result) } arr.tail.foreach { v => convert(name, v)(prevSchema, sp) match { case n: NonRecordContainer => values.add(n.getValue) case other => values.add(other) } } new NonRecordContainer(Schema.createArray(result.getSchema), values) case JBool(b) => new NonRecordContainer(Schema.create(Schema.Type.BOOLEAN), b) case JDecimal(d) => val schema = Schema.create(Schema.Type.BYTES) val decimal = LogicalTypes.decimal(sp.precision, sp.scale) decimal.addToSchema(schema) new serializers.NonRecordContainer(schema, d.bigDecimal.unscaledValue().toByteArray) case JDouble(d) => new serializers.NonRecordContainer(Schema.create(Schema.Type.DOUBLE), d) case JInt(i) => new serializers.NonRecordContainer(Schema.create(Schema.Type.LONG), i.toLong) case JLong(l) => new serializers.NonRecordContainer(Schema.create(Schema.Type.LONG), l) case JNothing => new NonRecordContainer(Schema.create(Schema.Type.NULL), null) case JNull => val schema = Schema.createUnion(java.util.Arrays.asList(Schema.create(Schema.Type.NULL), createStringSchema)) new serializers.NonRecordContainer(schema, null) case JString(s) => val schema = createStringSchema new serializers.NonRecordContainer(schema, s) case JObject(values) => val schema = Schema.createRecord(name, "", namespace, false) val fields = new util.ArrayList[Schema.Field]() val default: AnyRef = null val fieldsMap = values.map { case (n, v) => val prevSchema = aggregatedSchema.map(_.getField(n)).map(_.schema()) val result = convert(n, v)(prevSchema, sp) //schema.setFields(java.util.Arrays.asList())) fields.add(new Schema.Field(n, result.getSchema, "", default)) n -> result }.toMap import scala.collection.JavaConversions._ aggregatedSchema .foreach { schema => schema.getFields .withFilter(f => !fieldsMap.contains(f.name())) .foreach { f => fields.add(new Schema.Field(f.name(), f.schema(), "", default)) } } schema.setFields(fields) val record = new Record(schema) fieldsMap.foreach { case (field, v: NonRecordContainer) => record.put(field, v.getValue) case (field, v: GenericContainer) => record.put(field, v) } record } } private def createStringSchema = { val schema = Schema.create(Schema.Type.STRING) if (avroStringTypeIsString) schema.addProp("avro.java.string", new TextNode("String")) schema } }