Java Code Examples for org.apache.avro.generic.GenericDatumWriter#write()

The following examples show how to use org.apache.avro.generic.GenericDatumWriter#write() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: beam   File: BigQueryIOStorageReadTest.java    License: Apache License 2.0 6 votes vote down vote up
private static ReadRowsResponse createResponse(
    Schema schema, Collection<GenericRecord> genericRecords, double fractionConsumed)
    throws Exception {
  GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema);
  ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
  Encoder binaryEncoder = ENCODER_FACTORY.binaryEncoder(outputStream, null);
  for (GenericRecord genericRecord : genericRecords) {
    writer.write(genericRecord, binaryEncoder);
  }

  binaryEncoder.flush();

  return ReadRowsResponse.newBuilder()
      .setAvroRows(
          AvroRows.newBuilder()
              .setSerializedBinaryRows(ByteString.copyFrom(outputStream.toByteArray()))
              .setRowCount(genericRecords.size()))
      .setStatus(StreamStatus.newBuilder().setFractionConsumed((float) fractionConsumed))
      .build();
}
 
Example 2
Source Project: beam   File: BigQueryIOStorageQueryTest.java    License: Apache License 2.0 6 votes vote down vote up
private static ReadRowsResponse createResponse(
    Schema schema, Collection<GenericRecord> genericRecords, double fractionConsumed)
    throws Exception {
  GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema);
  ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
  Encoder binaryEncoder = ENCODER_FACTORY.binaryEncoder(outputStream, null);
  for (GenericRecord genericRecord : genericRecords) {
    writer.write(genericRecord, binaryEncoder);
  }

  binaryEncoder.flush();

  return ReadRowsResponse.newBuilder()
      .setAvroRows(
          AvroRows.newBuilder()
              .setSerializedBinaryRows(ByteString.copyFrom(outputStream.toByteArray()))
              .setRowCount(genericRecords.size()))
      .setStatus(StreamStatus.newBuilder().setFractionConsumed((float) fractionConsumed))
      .build();
}
 
Example 3
public static <T> Decoder genericDataAsDecoder(T data, Schema schema) {
  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  Encoder binaryEncoder = AvroCompatibilityHelper.newBinaryEncoder(baos, true, null);

  try {
    GenericDatumWriter<T> writer = new GenericDatumWriter<>(schema);
    writer.write(data, binaryEncoder);
    binaryEncoder.flush();
  } catch (Exception e) {
    throw new RuntimeException(e);
  }

  return DecoderFactory.defaultFactory().createBinaryDecoder(baos.toByteArray(), null);
}
 
Example 4
@Test
public void testAvro14DoesntValidateNamespace() throws Exception {
  AvroVersion runtimeVersion = AvroCompatibilityHelper.getRuntimeAvroVersion();
  if (runtimeVersion != AvroVersion.AVRO_1_4) {
    throw new SkipException("only supported under " + AvroVersion.AVRO_1_4 + ". runtime version detected as " + runtimeVersion);
  }
  String withAvsc = TestUtil.load("HasNamespace.avsc");
  Schema with = Schema.parse(withAvsc);
  String withoutAvsc = TestUtil.load("HasNoNamespace.avsc");
  Schema without = Schema.parse(withoutAvsc);

  GenericData.Record record = new GenericData.Record(without);
  record.put("f", AvroCompatibilityHelper.newEnumSymbol(without.getField("f").schema(), "B"));

  ByteArrayOutputStream os = new ByteArrayOutputStream();
  GenericDatumWriter writer = new GenericDatumWriter(without);
  BinaryEncoder encoder = AvroCompatibilityHelper.newBinaryEncoder(os);
  //noinspection unchecked
  writer.write(record, encoder);
  encoder.flush();
  byte[] bytes = os.toByteArray();

  GenericDatumReader<GenericData.Record> reader = new GenericDatumReader<>(without, with);
  BinaryDecoder decoder = DecoderFactory.defaultFactory().createBinaryDecoder(bytes, null);
  GenericData.Record read = reader.read(null, decoder);

  String value = String.valueOf(read.get("f"));
  Assert.assertEquals(value, "B");
}
 
Example 5
@Test
public void testModernAvroValidatesNamespaces() throws Exception {
  AvroVersion runtimeVersion = AvroCompatibilityHelper.getRuntimeAvroVersion();
  if (!runtimeVersion.laterThan(AvroVersion.AVRO_1_4)) {
    throw new SkipException("only supported under modern avro. runtime version detected as " + runtimeVersion);
  }
  String withAvsc = TestUtil.load("HasNamespace.avsc");
  Schema with = Schema.parse(withAvsc);
  String withoutAvsc = TestUtil.load("HasNoNamespace.avsc");
  Schema without = Schema.parse(withoutAvsc);

  GenericData.Record record = new GenericData.Record(without);
  record.put("f", AvroCompatibilityHelper.newEnumSymbol(without.getField("f").schema(), "B"));

  ByteArrayOutputStream os = new ByteArrayOutputStream();
  GenericDatumWriter writer = new GenericDatumWriter(without);
  BinaryEncoder encoder = AvroCompatibilityHelper.newBinaryEncoder(os);
  //noinspection unchecked
  writer.write(record, encoder);
  encoder.flush();
  byte[] bytes = os.toByteArray();

  GenericDatumReader<GenericData.Record> reader = new GenericDatumReader<>(without, with);
  BinaryDecoder decoder = DecoderFactory.defaultFactory().createBinaryDecoder(bytes, null);
  try {
    GenericData.Record read = reader.read(null, decoder);
    Assert.fail("deserialization was expected to fail");
  } catch (Exception expected) {
    Assert.assertTrue(expected.getMessage().contains("Found EnumType, expecting com.acme.EnumType"));
  }
}
 
Example 6
public static <T extends SpecificRecord> T toSpecificRecord(GenericData.Record record) throws IOException {
    GenericDatumWriter<GenericData.Record> datumWriter = new GenericDatumWriter<>(record.getSchema());
    ByteArrayOutputStream baos = new ByteArrayOutputStream();

    Encoder binaryEncoder = EncoderFactory.get().binaryEncoder(baos, null);
    datumWriter.write(record, binaryEncoder);
    binaryEncoder.flush();

    SpecificDatumReader<T> datumReader = new SpecificDatumReader<>(record.getSchema());
    return datumReader.read(null, DecoderFactory.get().binaryDecoder(baos.toByteArray(), null));
}
 
Example 7
@Setup
public void init() throws Exception {
    final GenericDatumWriter<GenericData.Record> datumWriter = new GenericDatumWriter<>(specificRecordSchema);
    for (int i = 0; i < 1000; i++) {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        Encoder encoder = EncoderFactory.get().binaryEncoder(baos, null);

        genericRecords.add(FastSerdeBenchmarkSupport.generateRandomRecordData(specificRecordSchema));
        specificRecords
                .add(FastSerdeBenchmarkSupport.toSpecificRecord(genericRecords.get(genericRecords.size() - 1)));

        datumWriter.write(genericRecords.get(genericRecords.size() - 1), encoder);
        encoder.flush();

        recordBytes.add(baos.toByteArray());
    }
    fastGenericDatumReader = new FastGenericDatumReader<>(
            specificRecordSchema, cache);
    fastGenericDatumWriter = new FastGenericDatumWriter<>(specificRecordSchema, cache);

    genericDatumReader = new GenericDatumReader<>(specificRecordSchema);
    genericDatumWriter = new GenericDatumWriter<>(specificRecordSchema);

    fastSpecificDatumReader = new FastSpecificDatumReader<>(
            specificRecordSchema, cache);
    fastSpecificDatumWriter = new FastSpecificDatumWriter<>(specificRecordSchema, cache);

    specificDatumReader = new SpecificDatumReader<>(specificRecordSchema);
    specificDatumWriter = new SpecificDatumWriter<>(specificRecordSchema);
}
 
Example 8
public static <T> Decoder serializeGeneric(T data, Schema schema) {
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    BinaryEncoder binaryEncoder = EncoderFactory.get().directBinaryEncoder(baos, null);

    try {
        GenericDatumWriter<T> writer = new GenericDatumWriter<>(schema);
        writer.write(data, binaryEncoder);
        binaryEncoder.flush();

    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    return DecoderFactory.get().binaryDecoder(baos.toByteArray(), null);
}
 
Example 9
Source Project: javabase   File: AvroSupport.java    License: Apache License 2.0 5 votes vote down vote up
public static byte[] dataToByteArray(Schema schema, GenericRecord datum) throws IOException {
    GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema);
    ByteArrayOutputStream os = new ByteArrayOutputStream();
    try {
        Encoder e = EncoderFactory.get().binaryEncoder(os, null);
        writer.write(datum, e);
        e.flush();
        byte[] byteData = os.toByteArray();
        return byteData;
    } finally {
        os.close();
    }
}
 
Example 10
Source Project: kafka-monitor   File: Utils.java    License: Apache License 2.0 5 votes vote down vote up
public static String jsonFromGenericRecord(GenericRecord record) {
  ByteArrayOutputStream out = new ByteArrayOutputStream();
  GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<>(DefaultTopicSchema.MESSAGE_V0);

  try {
    Encoder encoder = new JsonEncoder(DefaultTopicSchema.MESSAGE_V0, out);
    writer.write(record, encoder);
    encoder.flush();
  } catch (IOException e) {
    LOG.error("Unable to serialize avro record due to error " + e);
  }
  return out.toString();
}
 
Example 11
Source Project: hudi   File: HoodieAvroUtils.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Convert a given avro record to bytes.
 */
public static byte[] avroToBytes(GenericRecord record) throws IOException {
  GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<>(record.getSchema());
  ByteArrayOutputStream out = new ByteArrayOutputStream();
  BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, reuseEncoder.get());
  reuseEncoder.set(encoder);
  writer.write(record, encoder);
  encoder.flush();
  out.close();
  return out.toByteArray();
}
 
Example 12
Source Project: components   File: DatasetContentWriter.java    License: Apache License 2.0 5 votes vote down vote up
private void writeIndexedRecord(GenericDatumWriter<IndexedRecord> writer, Encoder encoder, IndexedRecord indexedRecord) {
    try {
        writer.write(indexedRecord, encoder);
    } catch (IOException e) {
        log.warn("Couldn't serialize Avro record.", e);
    }
}
 
Example 13
/**
 * Push the records from the given Avro files into a Kafka stream.
 *
 * @param avroFiles List of Avro files
 * @param kafkaBroker Kafka broker config
 * @param kafkaTopic Kafka topic
 * @param maxNumKafkaMessagesPerBatch Maximum number of Kafka messages per batch
 * @param header Optional Kafka message header
 * @param partitionColumn Optional partition column
 * @throws Exception
 */
public static void pushAvroIntoKafka(List<File> avroFiles, String kafkaBroker, String kafkaTopic,
    int maxNumKafkaMessagesPerBatch, @Nullable byte[] header, @Nullable String partitionColumn)
    throws Exception {
  Properties properties = new Properties();
  properties.put("metadata.broker.list", kafkaBroker);
  properties.put("serializer.class", "kafka.serializer.DefaultEncoder");
  properties.put("request.required.acks", "1");
  properties.put("partitioner.class", "kafka.producer.ByteArrayPartitioner");

  StreamDataProducer producer =
      StreamDataProvider.getStreamDataProducer(KafkaStarterUtils.KAFKA_PRODUCER_CLASS_NAME, properties);

  try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream(65536)) {
    for (File avroFile : avroFiles) {
      try (DataFileStream<GenericRecord> reader = AvroUtils.getAvroReader(avroFile)) {
        BinaryEncoder binaryEncoder = new EncoderFactory().directBinaryEncoder(outputStream, null);
        GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(reader.getSchema());
        for (GenericRecord genericRecord : reader) {
          outputStream.reset();
          if (header != null && 0 < header.length) {
            outputStream.write(header);
          }
          datumWriter.write(genericRecord, binaryEncoder);
          binaryEncoder.flush();

          byte[] keyBytes = (partitionColumn == null) ? Longs.toByteArray(System.currentTimeMillis())
              : (genericRecord.get(partitionColumn)).toString().getBytes();
          byte[] bytes = outputStream.toByteArray();
          producer.produce(kafkaTopic, keyBytes, bytes);
        }
      }
    }
  }
}
 
Example 14
FakePartitionLevelConsumer(int partition, StreamConfig streamConfig) {

    // TODO: this logic can move to a FakeStreamProducer instead of being inside the Consumer
    File tempDir = new File(FileUtils.getTempDirectory(), getClass().getSimpleName());
    File outputDir = new File(tempDir, String.valueOf(partition));

    int offset = 0;

    try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream(65536)) {
      File avroFile = unpackAvroTarFile(outputDir).get(0);

      int numPartitions = FakeStreamConfigUtils.getNumPartitions(streamConfig);

      try (DataFileStream<GenericRecord> reader = AvroUtils.getAvroReader(avroFile)) {
        BinaryEncoder binaryEncoder = new EncoderFactory().directBinaryEncoder(outputStream, null);
        GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(reader.getSchema());

        int recordNumber = 0;
        for (GenericRecord genericRecord : reader) {
          if (getPartitionNumber(recordNumber++, numPartitions) != partition) {
            continue;
          }
          outputStream.reset();

          datumWriter.write(genericRecord, binaryEncoder);
          binaryEncoder.flush();

          byte[] bytes = outputStream.toByteArray();
          // contiguous offsets
          messageOffsets.add(offset++);
          messageBytes.add(bytes);
        }
      }
    } catch (Exception e) {
      LOGGER.error("Could not create {}", FakePartitionLevelConsumer.class.getName(), e);
    } finally {
      FileUtils.deleteQuietly(outputDir);
    }
  }
 
Example 15
Source Project: jstorm   File: AbstractAvroSerializer.java    License: Apache License 2.0 5 votes vote down vote up
public void write(Kryo kryo, Output output, GenericContainer record) {

        String fingerPrint = this.getFingerprint(record.getSchema());
        output.writeString(fingerPrint);
        GenericDatumWriter<GenericContainer> writer = new GenericDatumWriter<>(record.getSchema());

        BinaryEncoder encoder = EncoderFactory
                .get()
                .directBinaryEncoder(output, null);
        try {
            writer.write(record, encoder);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
 
Example 16
@Test
public void testSerializeDeserialize() throws Exception {
	final DataType dataType = ROW(
		FIELD("bool", BOOLEAN()),
		FIELD("tinyint", TINYINT()),
		FIELD("smallint", SMALLINT()),
		FIELD("int", INT()),
		FIELD("bigint", BIGINT()),
		FIELD("float", FLOAT()),
		FIELD("double", DOUBLE()),
		FIELD("name", STRING()),
		FIELD("bytes", BYTES()),
		FIELD("decimal", DECIMAL(19, 6)),
		FIELD("doubles", ARRAY(DOUBLE())),
		FIELD("time", TIME(0)),
		FIELD("date", DATE()),
		FIELD("timestamp3", TIMESTAMP(3)),
		FIELD("timestamp3_2", TIMESTAMP(3)),
		FIELD("map", MAP(STRING(), BIGINT())),
		FIELD("map2map", MAP(STRING(), MAP(STRING(), INT()))),
		FIELD("map2array", MAP(STRING(), ARRAY(INT()))));
	final RowType rowType = (RowType) dataType.getLogicalType();
	final TypeInformation<RowData> typeInfo = new RowDataTypeInfo(rowType);

	final Schema schema = AvroSchemaConverter.convertToSchema(rowType);
	final GenericRecord record = new GenericData.Record(schema);
	record.put(0, true);
	record.put(1, (int) Byte.MAX_VALUE);
	record.put(2, (int) Short.MAX_VALUE);
	record.put(3, 33);
	record.put(4, 44L);
	record.put(5, 12.34F);
	record.put(6, 23.45);
	record.put(7, "hello avro");
	record.put(8, ByteBuffer.wrap(new byte[]{1, 2, 4, 5, 6, 7, 8, 12}));

	record.put(9, ByteBuffer.wrap(
			BigDecimal.valueOf(123456789, 6).unscaledValue().toByteArray()));

	List<Double> doubles = new ArrayList<>();
	doubles.add(1.2);
	doubles.add(3.4);
	doubles.add(567.8901);
	record.put(10, doubles);

	record.put(11, 18397);
	record.put(12, 10087);
	record.put(13, 1589530213123L);
	record.put(14, 1589530213122L);

	Map<String, Long> map = new HashMap<>();
	map.put("flink", 12L);
	map.put("avro", 23L);
	record.put(15, map);

	Map<String, Map<String, Integer>> map2map = new HashMap<>();
	Map<String, Integer> innerMap = new HashMap<>();
	innerMap.put("inner_key1", 123);
	innerMap.put("inner_key2", 234);
	map2map.put("outer_key", innerMap);
	record.put(16, map2map);

	List<Integer> list1 = Arrays.asList(1, 2, 3, 4, 5, 6);
	List<Integer> list2 = Arrays.asList(11, 22, 33, 44, 55);
	Map<String, List<Integer>> map2list = new HashMap<>();
	map2list.put("list1", list1);
	map2list.put("list2", list2);
	record.put(17, map2list);

	AvroRowDataSerializationSchema serializationSchema = new AvroRowDataSerializationSchema(rowType);
	serializationSchema.open(null);
	AvroRowDataDeserializationSchema deserializationSchema =
		new AvroRowDataDeserializationSchema(rowType, typeInfo);
	deserializationSchema.open(null);

	ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
	GenericDatumWriter<IndexedRecord> datumWriter = new GenericDatumWriter<>(schema);
	Encoder encoder = EncoderFactory.get().binaryEncoder(byteArrayOutputStream, null);
	datumWriter.write(record, encoder);
	encoder.flush();
	byte[] input = byteArrayOutputStream.toByteArray();

	RowData rowData = deserializationSchema.deserialize(input);
	byte[] output = serializationSchema.serialize(rowData);

	assertArrayEquals(input, output);
}
 
Example 17
Source Project: incubator-gobblin   File: AvroUtilsTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testDecorateRecordWithNestedField() throws IOException {
  Schema inputRecordSchema = SchemaBuilder.record("test").fields()
          .name("integer1")
          .prop("innerProp", "innerVal")
          .type().intBuilder().endInt().noDefault()
          .requiredString("string1")
          .endRecord();

  GenericRecord inputRecord = new GenericData.Record(inputRecordSchema);
  inputRecord.put("integer1", 10);
  inputRecord.put("string1", "hello");

  Schema nestedFieldSchema = SchemaBuilder.builder().record("metadata")
          .fields()
          .requiredString("source")
          .requiredLong("timestamp")
          .endRecord();

  Schema.Field nestedField = new Schema.Field("metadata", nestedFieldSchema, "I am a nested field", null);

  Schema outputRecordSchema = AvroUtils.decorateRecordSchema(inputRecordSchema, Collections.singletonList(nestedField));
  Map<String, Object> newFields = new HashMap<>();

  GenericData.Record metadataRecord = new GenericData.Record(nestedFieldSchema);
  metadataRecord.put("source", "oracle");
  metadataRecord.put("timestamp", 1234L);

  newFields.put("metadata", metadataRecord);

  GenericRecord outputRecord = AvroUtils.decorateRecord(inputRecord, newFields, outputRecordSchema);
  Assert.assertEquals(outputRecord.get("integer1"), 10);
  Assert.assertEquals(outputRecord.get("string1"), "hello");
  Assert.assertEquals(outputRecord.get("metadata"), metadataRecord);


  // Test that serializing and deserializing this record works.
  GenericDatumWriter writer = new GenericDatumWriter(outputRecordSchema);
  ByteArrayOutputStream baos = new ByteArrayOutputStream(1000);
  Encoder binaryEncoder = EncoderFactory.get().binaryEncoder(baos, null);
  writer.write(outputRecord, binaryEncoder);
  binaryEncoder.flush();
  baos.close();

  ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
  Decoder binaryDecoder = DecoderFactory.get().binaryDecoder(bais, null);
  GenericDatumReader reader = new GenericDatumReader(outputRecordSchema);
  GenericRecord deserialized = (GenericRecord) reader.read(null, binaryDecoder);
  Assert.assertEquals(deserialized.get("integer1"), 10);
  Assert.assertEquals(deserialized.get("string1").toString(), "hello"); //extra toString: avro returns Utf8
  Assert.assertEquals(deserialized.get("metadata"), metadataRecord);
}
 
Example 18
/**
 * Push random generated
 *
 * @param avroFile Sample Avro file used to extract the Avro schema
 * @param kafkaBroker Kafka broker config
 * @param kafkaTopic Kafka topic
 * @param numKafkaMessagesToPush Number of Kafka messages to push
 * @param maxNumKafkaMessagesPerBatch Maximum number of Kafka messages per batch
 * @param header Optional Kafka message header
 * @param partitionColumn Optional partition column
 * @throws Exception
 */
@SuppressWarnings("unused")
public static void pushRandomAvroIntoKafka(File avroFile, String kafkaBroker, String kafkaTopic,
    int numKafkaMessagesToPush, int maxNumKafkaMessagesPerBatch, @Nullable byte[] header,
    @Nullable String partitionColumn)
    throws Exception {
  Properties properties = new Properties();
  properties.put("metadata.broker.list", kafkaBroker);
  properties.put("serializer.class", "kafka.serializer.DefaultEncoder");
  properties.put("request.required.acks", "1");
  properties.put("partitioner.class", "kafka.producer.ByteArrayPartitioner");

  StreamDataProducer producer =
      StreamDataProvider.getStreamDataProducer(KafkaStarterUtils.KAFKA_PRODUCER_CLASS_NAME, properties);
  try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream(65536)) {
    try (DataFileStream<GenericRecord> reader = AvroUtils.getAvroReader(avroFile)) {
      BinaryEncoder binaryEncoder = new EncoderFactory().directBinaryEncoder(outputStream, null);
      Schema avroSchema = reader.getSchema();
      GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(avroSchema);
      GenericRecord genericRecord = new GenericData.Record(avroSchema);

      while (numKafkaMessagesToPush > 0) {
        generateRandomRecord(genericRecord, avroSchema);

        outputStream.reset();
        if (header != null && 0 < header.length) {
          outputStream.write(header);
        }
        datumWriter.write(genericRecord, binaryEncoder);
        binaryEncoder.flush();

        byte[] keyBytes = (partitionColumn == null) ? Longs.toByteArray(System.currentTimeMillis())
            : (genericRecord.get(partitionColumn)).toString().getBytes();
        byte[] bytes = outputStream.toByteArray();

        producer.produce(kafkaTopic, keyBytes, bytes);
        numKafkaMessagesToPush--;
      }
    }
  }
}