Java Code Examples for org.apache.avro.generic.GenericDatumReader#read()

The following examples show how to use org.apache.avro.generic.GenericDatumReader#read() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroTestTools.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
private static RecordIterator readRecordsFromJsonInputStream(Schema schema, InputStream is, Decoder decoder) {
  GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema);

  return new RecordIterator(schema, new AbstractIterator<GenericRecord>() {
    @Override
    protected GenericRecord computeNext() {
      try {
        return reader.read(null, decoder);
      } catch (IOException ioe) {
        try {
          is.close();
        } catch (IOException exc) {
          log.warn("Failed to close input stream.", exc);
        }
        endOfData();
        return null;
      }
    }
  });
}
 
Example 2
Source File: AvroDeserializationSchema.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public T deserialize(byte[] message) throws IOException {
	// read record
	checkAvroInitialized();
	inputStream.setBuffer(message);
	Schema readerSchema = getReaderSchema();
	GenericDatumReader<T> datumReader = getDatumReader();

	datumReader.setSchema(readerSchema);

	return datumReader.read(null, decoder);
}
 
Example 3
Source File: OrcTestTools.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
/**
 * Deserialize json object into a list of java object as a row, and transform each of java object
 * into {@link Writable} counterpart for constructing {@link OrcStruct}, in convenience of Orc reading and writing.
 *
 * @param typeInfo The ORC schema in {@link TypeInfo} format.
 * @param file The file name in String format.
 * @return
 */
private OrcRowIterator readRecordsFromJsonInputStream(TypeInfo typeInfo, String file) throws IOException {

  InputStream is = OrcTestTools.class.getClassLoader().getResourceAsStream(file);


  // This getParent.getParent is dirty due to we need to simulate multiple-partitions scenarios in iTest.
  String schemaResourceName = new File(new File(file).getParentFile().getParent(), "schema.avsc").toString();

  Schema attemptedSchema = readAvscSchema(schemaResourceName, OrcTestTools.class);
  final Schema avroSchema =
      attemptedSchema == null ? readAvscSchema(new File(new File(file).getParent(), "schema.avsc").toString(),
          OrcTestTools.class) : attemptedSchema;

  GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(avroSchema);
  Decoder decoder = DecoderFactory.get().jsonDecoder(avroSchema, is);

  return new OrcRowIterator(typeInfo, new AbstractIterator<Writable>() {
    @Override
    protected Writable computeNext() {
      try {
        GenericRecord record = reader.read(null, decoder);
        return getAvroWritable(record, avroSchema);
      } catch (IOException e) {
        try {
          is.close();
        } catch (IOException ioec) {
          log.warn("Failed to read record from inputstream, will close it immediately", ioec);
        }
        endOfData();
        return null;
      }
    }
  });
}
 
Example 4
Source File: SnowflakeAvroConverter.java    From snowflake-kafka-connector with Apache License 2.0 5 votes vote down vote up
/**
 * Parse Avro record with schema
 *
 * @param bytes  avro data
 * @param schema avro schema
 * @return JsonNode  array
 */
private JsonNode parseAvroWithSchema(final byte[] bytes, Schema schema)
{
  GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema);
  InputStream input = new ByteArrayInputStream(bytes);
  Decoder decoder = DecoderFactory.get().binaryDecoder(input, null);
  try
  {
    GenericRecord datum = reader.read(null, decoder);
    return mapper.readTree(datum.toString());
  } catch (IOException e)
  {
    throw SnowflakeErrors.ERROR_0010.getException("Failed to parse AVRO " +
      "record\n" + e.toString());
  }
}
 
Example 5
Source File: TestLog4jAppenderWithAvro.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testAvroGeneric() throws IOException {
  loadProperties("flume-log4jtest-avro-generic.properties");
  PropertyConfigurator.configure(props);
  Logger logger = LogManager.getLogger(TestLog4jAppenderWithAvro.class);
  String msg = "This is log message number " + String.valueOf(0);

  Schema schema = new Schema.Parser().parse(
      getClass().getClassLoader().getResource("myrecord.avsc").openStream());
  GenericRecordBuilder builder = new GenericRecordBuilder(schema);
  GenericRecord record = builder.set("message", msg).build();

  logger.info(record);

  Transaction transaction = ch.getTransaction();
  transaction.begin();
  Event event = ch.take();
  Assert.assertNotNull(event);

  GenericDatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schema);
  BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(event.getBody(), null);
  GenericRecord recordFromEvent = reader.read(null, decoder);
  Assert.assertEquals(msg, recordFromEvent.get("message").toString());

  Map<String, String> hdrs = event.getHeaders();

  Assert.assertNull(hdrs.get(Log4jAvroHeaders.MESSAGE_ENCODING.toString()));

  Assert.assertEquals("Schema URL should be set",
      "file:///tmp/myrecord.avsc", hdrs.get(Log4jAvroHeaders.AVRO_SCHEMA_URL.toString()));
  Assert.assertNull("Schema string should not be set",
      hdrs.get(Log4jAvroHeaders.AVRO_SCHEMA_LITERAL.toString()));

  transaction.commit();
  transaction.close();

}
 
Example 6
Source File: AbstractAvroSerializer.java    From jstorm with Apache License 2.0 5 votes vote down vote up
public GenericContainer read(Kryo kryo, Input input, Class<GenericContainer> aClass) {
    Schema theSchema = this.getSchema(input.readString());
    GenericDatumReader<GenericContainer> reader = new GenericDatumReader<>(theSchema);
    Decoder decoder = DecoderFactory
            .get()
            .directBinaryDecoder(input, null);

    GenericContainer foo;
    try {
        foo = reader.read(null, decoder);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    return foo;
}
 
Example 7
Source File: NamespaceValidationTest.java    From avro-util with BSD 2-Clause "Simplified" License 5 votes vote down vote up
@Test
public void testAvro14DoesntValidateNamespace() throws Exception {
  AvroVersion runtimeVersion = AvroCompatibilityHelper.getRuntimeAvroVersion();
  if (runtimeVersion != AvroVersion.AVRO_1_4) {
    throw new SkipException("only supported under " + AvroVersion.AVRO_1_4 + ". runtime version detected as " + runtimeVersion);
  }
  String withAvsc = TestUtil.load("HasNamespace.avsc");
  Schema with = Schema.parse(withAvsc);
  String withoutAvsc = TestUtil.load("HasNoNamespace.avsc");
  Schema without = Schema.parse(withoutAvsc);

  GenericData.Record record = new GenericData.Record(without);
  record.put("f", AvroCompatibilityHelper.newEnumSymbol(without.getField("f").schema(), "B"));

  ByteArrayOutputStream os = new ByteArrayOutputStream();
  GenericDatumWriter writer = new GenericDatumWriter(without);
  BinaryEncoder encoder = AvroCompatibilityHelper.newBinaryEncoder(os);
  //noinspection unchecked
  writer.write(record, encoder);
  encoder.flush();
  byte[] bytes = os.toByteArray();

  GenericDatumReader<GenericData.Record> reader = new GenericDatumReader<>(without, with);
  BinaryDecoder decoder = DecoderFactory.defaultFactory().createBinaryDecoder(bytes, null);
  GenericData.Record read = reader.read(null, decoder);

  String value = String.valueOf(read.get("f"));
  Assert.assertEquals(value, "B");
}
 
Example 8
Source File: NamespaceValidationTest.java    From avro-util with BSD 2-Clause "Simplified" License 5 votes vote down vote up
@Test
public void testModernAvroValidatesNamespaces() throws Exception {
  AvroVersion runtimeVersion = AvroCompatibilityHelper.getRuntimeAvroVersion();
  if (!runtimeVersion.laterThan(AvroVersion.AVRO_1_4)) {
    throw new SkipException("only supported under modern avro. runtime version detected as " + runtimeVersion);
  }
  String withAvsc = TestUtil.load("HasNamespace.avsc");
  Schema with = Schema.parse(withAvsc);
  String withoutAvsc = TestUtil.load("HasNoNamespace.avsc");
  Schema without = Schema.parse(withoutAvsc);

  GenericData.Record record = new GenericData.Record(without);
  record.put("f", AvroCompatibilityHelper.newEnumSymbol(without.getField("f").schema(), "B"));

  ByteArrayOutputStream os = new ByteArrayOutputStream();
  GenericDatumWriter writer = new GenericDatumWriter(without);
  BinaryEncoder encoder = AvroCompatibilityHelper.newBinaryEncoder(os);
  //noinspection unchecked
  writer.write(record, encoder);
  encoder.flush();
  byte[] bytes = os.toByteArray();

  GenericDatumReader<GenericData.Record> reader = new GenericDatumReader<>(without, with);
  BinaryDecoder decoder = DecoderFactory.defaultFactory().createBinaryDecoder(bytes, null);
  try {
    GenericData.Record read = reader.read(null, decoder);
    Assert.fail("deserialization was expected to fail");
  } catch (Exception expected) {
    Assert.assertTrue(expected.getMessage().contains("Found EnumType, expecting com.acme.EnumType"));
  }
}
 
Example 9
Source File: FastSerdeTestsSupport.java    From avro-fastserde with Apache License 2.0 5 votes vote down vote up
public static <T> T deserializeGeneric(Schema schema, Decoder decoder) {
    GenericDatumReader<T> datumReader = new GenericDatumReader<>(schema);
    try {
        return datumReader.read(null, decoder);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
 
Example 10
Source File: ReplicatorKafkaAvroTest.java    From replicator with Apache License 2.0 5 votes vote down vote up
public static String avroToJson(byte[] avro, Schema schema) throws IOException {
    boolean pretty = false;
    GenericDatumReader<Object> reader = new GenericDatumReader<>(schema);
    DatumWriter<Object> writer = new GenericDatumWriter<>(schema);
    ByteArrayOutputStream output = new ByteArrayOutputStream();
    JsonEncoder encoder = EncoderFactory.get().jsonEncoder(schema, output, pretty);
    Decoder decoder = DecoderFactory.get().binaryDecoder(avro, null);
    Object datum = reader.read(null, decoder);
    writer.write(datum, encoder);
    encoder.flush();
    output.flush();
    return new String(output.toByteArray(), "UTF-8");
}
 
Example 11
Source File: HoodieAvroUtils.java    From hudi with Apache License 2.0 5 votes vote down vote up
/**
 * Convert serialized bytes back into avro record.
 */
public static GenericRecord bytesToAvro(byte[] bytes, Schema schema) throws IOException {
  BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(bytes, reuseDecoder.get());
  reuseDecoder.set(decoder);
  GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema);
  return reader.read(null, decoder);
}
 
Example 12
Source File: HoodieAvroUtils.java    From hudi with Apache License 2.0 5 votes vote down vote up
/**
 * Convert json bytes back into avro record.
 */
public static GenericRecord jsonBytesToAvro(byte[] bytes, Schema schema) throws IOException {
  ByteArrayInputStream bio = new ByteArrayInputStream(bytes);
  JsonDecoder jsonDecoder = DecoderFactory.get().jsonDecoder(schema, bio);
  GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema);
  return reader.read(null, jsonDecoder);
}
 
Example 13
Source File: HoodieAvroDataBlock.java    From hudi with Apache License 2.0 5 votes vote down vote up
@Override
protected void deserializeRecords() throws IOException {
  SizeAwareDataInputStream dis =
      new SizeAwareDataInputStream(new DataInputStream(new ByteArrayInputStream(getContent().get())));

  // 1. Read version for this data block
  int version = dis.readInt();
  HoodieAvroDataBlockVersion logBlockVersion = new HoodieAvroDataBlockVersion(version);

  // Get schema from the header
  Schema writerSchema = new Schema.Parser().parse(super.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));

  // If readerSchema was not present, use writerSchema
  if (schema == null) {
    schema = writerSchema;
  }

  GenericDatumReader<IndexedRecord> reader = new GenericDatumReader<>(writerSchema, schema);
  // 2. Get the total records
  int totalRecords = 0;
  if (logBlockVersion.hasRecordCount()) {
    totalRecords = dis.readInt();
  }
  List<IndexedRecord> records = new ArrayList<>(totalRecords);

  // 3. Read the content
  for (int i = 0; i < totalRecords; i++) {
    int recordLength = dis.readInt();
    BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(getContent().get(), dis.getNumberOfBytesRead(),
        recordLength, decoderCache.get());
    decoderCache.set(decoder);
    IndexedRecord record = reader.read(null, decoder);
    records.add(record);
    dis.skipBytes(recordLength);
  }
  dis.close();
  this.records = records;
  // Free up content to be GC'd, deflate
  deflate();
}
 
Example 14
Source File: RegistryAvroDeserializationSchema.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public T deserialize(byte[] message) throws IOException {
		checkAvroInitialized();
		getInputStream().setBuffer(message);
		Schema writerSchema = schemaCoder.readSchema(getInputStream());
		Schema readerSchema = getReaderSchema();

		GenericDatumReader<T> datumReader = getDatumReader();

		datumReader.setSchema(writerSchema);
		datumReader.setExpected(readerSchema);

		return datumReader.read(null, getDecoder());
}
 
Example 15
Source File: AvroSerDeFactory.java    From samza with Apache License 2.0 4 votes vote down vote up
private static <T> T genericRecordFromBytes(byte[] bytes, Schema schema) throws IOException {
  BinaryDecoder binDecoder = DecoderFactory.defaultFactory().createBinaryDecoder(bytes, null);
  GenericDatumReader<T> reader = new GenericDatumReader<>(schema);
  return reader.read(null, binDecoder);
}
 
Example 16
Source File: TestAvroRelConversion.java    From samza with Apache License 2.0 4 votes vote down vote up
private static <T> T genericRecordFromBytes(byte[] bytes, Schema schema) throws IOException {
  BinaryDecoder binDecoder = DecoderFactory.defaultFactory().createBinaryDecoder(bytes, null);
  GenericDatumReader<T> reader = new GenericDatumReader<>(schema);
  return reader.read(null, binDecoder);
}
 
Example 17
Source File: TestWriteAvroResultWithoutSchema.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
protected GenericRecord readRecord(final InputStream in, final Schema schema) throws IOException {
    final BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(in, null);
    final GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema);
    return reader.read(null, decoder);
}
 
Example 18
Source File: AvroUtilsTest.java    From incubator-gobblin with Apache License 2.0 4 votes vote down vote up
@Test
public void testDecorateRecordWithNestedField() throws IOException {
  Schema inputRecordSchema = SchemaBuilder.record("test").fields()
          .name("integer1")
          .prop("innerProp", "innerVal")
          .type().intBuilder().endInt().noDefault()
          .requiredString("string1")
          .endRecord();

  GenericRecord inputRecord = new GenericData.Record(inputRecordSchema);
  inputRecord.put("integer1", 10);
  inputRecord.put("string1", "hello");

  Schema nestedFieldSchema = SchemaBuilder.builder().record("metadata")
          .fields()
          .requiredString("source")
          .requiredLong("timestamp")
          .endRecord();

  Schema.Field nestedField = new Schema.Field("metadata", nestedFieldSchema, "I am a nested field", null);

  Schema outputRecordSchema = AvroUtils.decorateRecordSchema(inputRecordSchema, Collections.singletonList(nestedField));
  Map<String, Object> newFields = new HashMap<>();

  GenericData.Record metadataRecord = new GenericData.Record(nestedFieldSchema);
  metadataRecord.put("source", "oracle");
  metadataRecord.put("timestamp", 1234L);

  newFields.put("metadata", metadataRecord);

  GenericRecord outputRecord = AvroUtils.decorateRecord(inputRecord, newFields, outputRecordSchema);
  Assert.assertEquals(outputRecord.get("integer1"), 10);
  Assert.assertEquals(outputRecord.get("string1"), "hello");
  Assert.assertEquals(outputRecord.get("metadata"), metadataRecord);


  // Test that serializing and deserializing this record works.
  GenericDatumWriter writer = new GenericDatumWriter(outputRecordSchema);
  ByteArrayOutputStream baos = new ByteArrayOutputStream(1000);
  Encoder binaryEncoder = EncoderFactory.get().binaryEncoder(baos, null);
  writer.write(outputRecord, binaryEncoder);
  binaryEncoder.flush();
  baos.close();

  ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
  Decoder binaryDecoder = DecoderFactory.get().binaryDecoder(bais, null);
  GenericDatumReader reader = new GenericDatumReader(outputRecordSchema);
  GenericRecord deserialized = (GenericRecord) reader.read(null, binaryDecoder);
  Assert.assertEquals(deserialized.get("integer1"), 10);
  Assert.assertEquals(deserialized.get("string1").toString(), "hello"); //extra toString: avro returns Utf8
  Assert.assertEquals(deserialized.get("metadata"), metadataRecord);
}
 
Example 19
Source File: AvroUtils.java    From incubator-gobblin with Apache License 2.0 4 votes vote down vote up
/**
 * Deserialize a {@link GenericRecord} from a byte array. This method is not intended for high performance.
 */
public static GenericRecord slowDeserializeGenericRecord(byte[] serializedRecord, Schema schema) throws IOException {
  Decoder decoder = DecoderFactory.get().binaryDecoder(serializedRecord, null);
  GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema);
  return reader.read(null, decoder);
}
 
Example 20
Source File: AvroUtils.java    From brooklin with BSD 2-Clause "Simplified" License 2 votes vote down vote up
/**
 * Decode and deserialize the Json byte array into an instance of an Avro record
 * @param schema schema describing the expected information of the bytes.
 * @param bytes Json string in bytes to decode
 * @return decoded instance of GenericRecord
 */
public static <T> T decodeJsonAsAvroGenericRecord(Schema schema, byte[] bytes, T reuse) throws IOException {
  JsonDecoder jsonDecoder = DecoderFactory.get().jsonDecoder(schema, new String(bytes, StandardCharsets.UTF_8));
  GenericDatumReader<T> reader = new GenericDatumReader<>(schema);
  return reader.read(reuse, jsonDecoder);
}