org.apache.avro.io.DecoderFactory Java Examples

The following examples show how to use org.apache.avro.io.DecoderFactory. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FastSpecificSerializerGeneratorTest.java    From avro-fastserde with Apache License 2.0 6 votes vote down vote up
private <T> Decoder serializeSpecificFast(T data, Schema schema) {
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    BinaryEncoder binaryEncoder = EncoderFactory.get().directBinaryEncoder(baos, null);

    try {
        FastSpecificSerializerGenerator<T> fastSpecificSerializerGenerator = new FastSpecificSerializerGenerator<>(
                schema, tempDir, classLoader, null);
        FastSerializer<T> fastSerializer = fastSpecificSerializerGenerator.generateSerializer();
        fastSerializer.serialize(data, binaryEncoder);
        binaryEncoder.flush();

    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    return DecoderFactory.get().binaryDecoder(baos.toByteArray(), null);
}
 
Example #2
Source File: TestIOUtils.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
/**
 * Reads all records from a json file as {@link GenericRecord}s
 */
public static List<GenericRecord> readAllRecords(String jsonDataPath, String schemaPath)
    throws Exception {
  List<GenericRecord> records = new ArrayList<>();
  File jsonDataFile = new File(jsonDataPath);
  File schemaFile = new File(schemaPath);

  Schema schema = new Schema.Parser().parse(schemaFile);
  GenericDatumReader<GenericRecord> datumReader = new GenericDatumReader<>(schema);

  try (InputStream is = new FileInputStream(jsonDataFile)) {
    Decoder decoder = DecoderFactory.get().jsonDecoder(schema, is);
    while (true) {
      records.add(datumReader.read(null, decoder));
    }
  } catch (EOFException eof) {
    // read all records
  }

  return records;
}
 
Example #3
Source File: AvroSerializer.java    From flume-elasticsearch-sink with Apache License 2.0 6 votes vote down vote up
/**
 * Converts the avro binary data to the json format
 */
@Override
public XContentBuilder serialize(Event event) {
    XContentBuilder builder = null;
    try {
        if (datumReader != null) {
            Decoder decoder = new DecoderFactory().binaryDecoder(event.getBody(), null);
            GenericRecord data = datumReader.read(null, decoder);
            logger.trace("Record in event " + data);
            XContentParser parser = XContentFactory
                    .xContent(XContentType.JSON)
                    .createParser(NamedXContentRegistry.EMPTY,
                            DeprecationHandler.THROW_UNSUPPORTED_OPERATION,
                            data.toString());
            builder = jsonBuilder().copyCurrentStructure(parser);
            parser.close();
        } else {
            logger.error("Schema File is not configured");
        }
    } catch (IOException e) {
        logger.error("Exception in parsing avro format data but continuing serialization to process further records",
                e.getMessage(), e);
    }
    return builder;
}
 
Example #4
Source File: YarnClusterSubmissionFromCS.java    From reef with Apache License 2.0 6 votes vote down vote up
static YarnClusterSubmissionFromCS readYarnClusterSubmissionFromCSFromInputStream(
    final InputStream appInputStream, final InputStream jobInputStream) throws IOException {
  final JsonDecoder appDecoder = DecoderFactory.get().jsonDecoder(
      AvroYarnAppSubmissionParameters.getClassSchema(), appInputStream);
  final SpecificDatumReader<AvroYarnAppSubmissionParameters> appReader = new SpecificDatumReader<>(
      AvroYarnAppSubmissionParameters.class);
  final AvroYarnAppSubmissionParameters yarnClusterAppSubmissionParameters = appReader.read(null, appDecoder);

  final JsonDecoder jobDecoder = DecoderFactory.get().jsonDecoder(
      AvroYarnClusterJobSubmissionParameters.getClassSchema(), jobInputStream);
  final SpecificDatumReader<AvroYarnClusterJobSubmissionParameters> jobReader = new SpecificDatumReader<>(
      AvroYarnClusterJobSubmissionParameters.class);
  final AvroYarnClusterJobSubmissionParameters yarnClusterJobSubmissionParameters = jobReader.read(null, jobDecoder);

  return new YarnClusterSubmissionFromCS(yarnClusterAppSubmissionParameters, yarnClusterJobSubmissionParameters);
}
 
Example #5
Source File: AvroSource.java    From beam with Apache License 2.0 6 votes vote down vote up
AvroBlock(byte[] data, long numRecords, Mode<T> mode, String writerSchemaString, String codec)
    throws IOException {
  this.mode = mode;
  this.numRecords = numRecords;
  checkNotNull(writerSchemaString, "writerSchemaString");
  Schema writerSchema = internOrParseSchemaString(writerSchemaString);
  Schema readerSchema =
      internOrParseSchemaString(
          MoreObjects.firstNonNull(mode.readerSchemaString, writerSchemaString));

  this.reader = mode.createReader(writerSchema, readerSchema);

  if (codec.equals(DataFileConstants.NULL_CODEC)) {
    // Avro can read from a byte[] using a more efficient implementation.  If the input is not
    // compressed, pass the data in directly.
    this.decoder = DecoderFactory.get().binaryDecoder(data, null);
  } else {
    this.decoder = DecoderFactory.get().binaryDecoder(decodeAsInputStream(data, codec), null);
  }
}
 
Example #6
Source File: KafkaAvroJobStatusMonitor.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
@Override
public org.apache.gobblin.configuration.State parseJobStatus(byte[] message)
    throws IOException {
  InputStream is = new ByteArrayInputStream(message);
  schemaVersionWriter.readSchemaVersioningInformation(new DataInputStream(is));

  Decoder decoder = DecoderFactory.get().binaryDecoder(is, this.decoder.get());
  try {
    GobblinTrackingEvent decodedMessage = this.reader.get().read(null, decoder);
    return parseJobStatus(decodedMessage);
  } catch (AvroRuntimeException | IOException exc) {
    this.messageParseFailures.mark();
    if (this.messageParseFailures.getFiveMinuteRate() < 1) {
      log.warn("Unable to decode input message.", exc);
    } else {
      log.warn("Unable to decode input message.");
    }
    return null;
  }
}
 
Example #7
Source File: AvroUtils.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
/**
 * Change the schema of an Avro record.
 * @param record The Avro record whose schema is to be changed.
 * @param newSchema The target schema. It must be compatible as reader schema with record.getSchema() as writer schema.
 * @return a new Avro record with the new schema.
 * @throws IOException if conversion failed.
 */
public static GenericRecord convertRecordSchema(GenericRecord record, Schema newSchema) throws IOException {
  if (record.getSchema().equals(newSchema)) {
    return record;
  }

  try {
    BinaryDecoder decoder = new DecoderFactory().binaryDecoder(recordToByteArray(record), null);
    DatumReader<GenericRecord> reader = new GenericDatumReader<>(record.getSchema(), newSchema);
    return reader.read(null, decoder);
  } catch (IOException e) {
    throw new IOException(
        String.format("Cannot convert avro record to new schema. Original schema = %s, new schema = %s",
            record.getSchema(), newSchema),
        e);
  }
}
 
Example #8
Source File: AvroTestUtil.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public static void testReflect(Object value, Type type, String schema)
  throws Exception {

  // check that schema matches expected
  Schema s = ReflectData.get().getSchema(type);
  assertEquals(Schema.parse(schema), s);

  // check that value is serialized correctly
  ReflectDatumWriter<Object> writer = new ReflectDatumWriter<Object>(s);
  ByteArrayOutputStream out = new ByteArrayOutputStream();
  writer.write(value, EncoderFactory.get().directBinaryEncoder(out, null));
  ReflectDatumReader<Object> reader = new ReflectDatumReader<Object>(s);
  Object after =
    reader.read(null,
                DecoderFactory.get().binaryDecoder(out.toByteArray(), null));
  assertEquals(value, after);
}
 
Example #9
Source File: FastSpecificSerializerGeneratorTest.java    From avro-util with BSD 2-Clause "Simplified" License 6 votes vote down vote up
public <T> Decoder dataAsDecoder(T data, Schema schema) {
  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  Encoder binaryEncoder = AvroCompatibilityHelper.newBinaryEncoder(baos, true, null);

  try {
    FastSpecificSerializerGenerator<T> fastSpecificSerializerGenerator =
        new FastSpecificSerializerGenerator<>(schema, tempDir, classLoader, null);
    FastSerializer<T> fastSerializer = fastSpecificSerializerGenerator.generateSerializer();
    fastSerializer.serialize(data, binaryEncoder);
    binaryEncoder.flush();
  } catch (Exception e) {
    throw new RuntimeException(e);
  }

  return DecoderFactory.defaultFactory().createBinaryDecoder(baos.toByteArray(), null);
}
 
Example #10
Source File: AvroDeserializationSchema.java    From flink with Apache License 2.0 6 votes vote down vote up
void checkAvroInitialized() {
	if (datumReader != null) {
		return;
	}

	ClassLoader cl = Thread.currentThread().getContextClassLoader();
	if (SpecificRecord.class.isAssignableFrom(recordClazz)) {
		SpecificData specificData = new SpecificData(cl);
		this.datumReader = new SpecificDatumReader<>(specificData);
		this.reader = specificData.getSchema(recordClazz);
	} else {
		this.reader = new Schema.Parser().parse(schemaString);
		GenericData genericData = new GenericData(cl);
		this.datumReader = new GenericDatumReader<>(null, this.reader, genericData);
	}

	this.inputStream = new MutableByteArrayInputStream();
	this.decoder = DecoderFactory.get().binaryDecoder(inputStream, null);
}
 
Example #11
Source File: AvroDeserializationSchema.java    From jMetalSP with MIT License 5 votes vote down vote up
@Override
public T deserialize(byte[] message) {
    ensureInitialized();
    try {
        //decoder = DecoderFactory.get().binaryDecoder(message, decoder);
        File file = new File(path);
        Schema schema = new Schema.Parser().parse(file);
        SpecificDatumReader<T> reader = new SpecificDatumReader<>(schema);
        decoder = DecoderFactory.get().binaryDecoder(message,null);
        return reader.read(null, decoder);
    } catch (Exception ex) {
        throw new RuntimeException(ex);
    }
}
 
Example #12
Source File: KinesisInputRuntime.java    From components with Apache License 2.0 5 votes vote down vote up
@DoFn.ProcessElement
public void processElement(ProcessContext c) throws IOException {
    if (schema == null) {
        schema = new Schema.Parser().parse(schemaStr);
        datumReader = new GenericDatumReader<GenericRecord>(schema);
    }
    decoder = DecoderFactory.get().binaryDecoder(c.element().getDataAsBytes(), decoder);
    GenericRecord record = datumReader.read(null, decoder);
    c.output(record);
}
 
Example #13
Source File: KafkaAvroSerDesApp.java    From registry with Apache License 2.0 5 votes vote down vote up
private Object jsonToAvro(String jsonString, Schema schema) throws Exception {
    DatumReader<Object> reader = new GenericDatumReader<>(schema);
    Object object = reader.read(null, DecoderFactory.get().jsonDecoder(schema, jsonString));

    if (schema.getType().equals(Schema.Type.STRING)) {
        object = object.toString();
    }
    return object;
}
 
Example #14
Source File: GoogleCloudPubSubFlusherTest.java    From divolte-collector with Apache License 2.0 5 votes vote down vote up
@Test
public void testMessageBodyIsNakedAvroRecord() throws IOException {
    processSingleMessage();
    final PubsubMessage deliveredMessage = getFirstPublishedMessage();
    final ByteString body = deliveredMessage.getData();

    final DatumReader<GenericRecord> reader = new GenericDatumReader<>(MINIMAL_SCHEMA);
    final Decoder decoder = DecoderFactory.get().binaryDecoder(body.newInput(), null);
    final GenericRecord record = reader.read(null, decoder);
    assertEquals(partyId.orElseThrow(IllegalStateException::new).toString(), record.get("partyId").toString());
    assertEquals(sessionId.orElseThrow(IllegalStateException::new).toString(), record.get("sessionId").toString());
    assertEquals(0L, record.get("counter"));
}
 
Example #15
Source File: EventReader.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Create a new Event Reader
 * @param in
 * @throws IOException
 */
@SuppressWarnings("deprecation")
public EventReader(DataInputStream in) throws IOException {
  this.in = in;
  this.version = in.readLine();
  
  if (!EventWriter.VERSION.equals(version)) {
    throw new IOException("Incompatible event log version: "+version);
  }

  Schema myschema = new SpecificData(Event.class.getClassLoader()).getSchema(Event.class);
  this.schema = Schema.parse(in.readLine());
  this.reader = new SpecificDatumReader(schema, myschema);
  this.decoder = DecoderFactory.get().jsonDecoder(schema, in);
}
 
Example #16
Source File: FastSerdeTestsSupport.java    From avro-fastserde with Apache License 2.0 5 votes vote down vote up
public static <T> Decoder serializeSpecific(T record, Schema schema) {
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    BinaryEncoder binaryEncoder = EncoderFactory.get().directBinaryEncoder(baos, null);

    try {
        SpecificDatumWriter<T> writer = new SpecificDatumWriter<>(schema);
        writer.write(record, binaryEncoder);
        binaryEncoder.flush();

    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    return DecoderFactory.get().binaryDecoder(baos.toByteArray(), null);
}
 
Example #17
Source File: EventReader.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Create a new Event Reader
 * @param in
 * @throws IOException
 */
@SuppressWarnings("deprecation")
public EventReader(DataInputStream in) throws IOException {
  this.in = in;
  this.version = in.readLine();
  
  if (!EventWriter.VERSION.equals(version)) {
    throw new IOException("Incompatible event log version: "+version);
  }

  Schema myschema = new SpecificData(Event.class.getClassLoader()).getSchema(Event.class);
  this.schema = Schema.parse(in.readLine());
  this.reader = new SpecificDatumReader(schema, myschema);
  this.decoder = DecoderFactory.get().jsonDecoder(schema, in);
}
 
Example #18
Source File: DocumentInterceptor.java    From examples with Apache License 2.0 5 votes vote down vote up
protected Document cellToAvro(Cell cell, Document reuse) throws IOException {
  byte[] value =
      Bytes.copy(cell.getValueArray(), cell.getValueOffset(),
        cell.getValueLength());
  decoder = DecoderFactory.get().binaryDecoder(value, decoder);
  reuse = reader.read(reuse, decoder);
  return reuse;
}
 
Example #19
Source File: Person.java    From components with Apache License 2.0 5 votes vote down vote up
public static Person desFromAvroBytes(byte[] record) throws IOException {
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
    BinaryDecoder decoder = null;
    decoder = DecoderFactory.get().binaryDecoder(record, decoder);
    GenericRecord avroValue = datumReader.read(null, decoder);
    return fromAvroRecord(avroValue);
}
 
Example #20
Source File: FastSerdeTestsSupport.java    From avro-fastserde with Apache License 2.0 5 votes vote down vote up
public static <T> Decoder serializeGeneric(T data, Schema schema) {
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    BinaryEncoder binaryEncoder = EncoderFactory.get().directBinaryEncoder(baos, null);

    try {
        GenericDatumWriter<T> writer = new GenericDatumWriter<>(schema);
        writer.write(data, binaryEncoder);
        binaryEncoder.flush();

    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    return DecoderFactory.get().binaryDecoder(baos.toByteArray(), null);
}
 
Example #21
Source File: OrcTestTools.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
/**
 * Deserialize json object into a list of java object as a row, and transform each of java object
 * into {@link Writable} counterpart for constructing {@link OrcStruct}, in convenience of Orc reading and writing.
 *
 * @param typeInfo The ORC schema in {@link TypeInfo} format.
 * @param file The file name in String format.
 * @return
 */
private OrcRowIterator readRecordsFromJsonInputStream(TypeInfo typeInfo, String file) throws IOException {

  InputStream is = OrcTestTools.class.getClassLoader().getResourceAsStream(file);


  // This getParent.getParent is dirty due to we need to simulate multiple-partitions scenarios in iTest.
  String schemaResourceName = new File(new File(file).getParentFile().getParent(), "schema.avsc").toString();

  Schema attemptedSchema = readAvscSchema(schemaResourceName, OrcTestTools.class);
  final Schema avroSchema =
      attemptedSchema == null ? readAvscSchema(new File(new File(file).getParent(), "schema.avsc").toString(),
          OrcTestTools.class) : attemptedSchema;

  GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(avroSchema);
  Decoder decoder = DecoderFactory.get().jsonDecoder(avroSchema, is);

  return new OrcRowIterator(typeInfo, new AbstractIterator<Writable>() {
    @Override
    protected Writable computeNext() {
      try {
        GenericRecord record = reader.read(null, decoder);
        return getAvroWritable(record, avroSchema);
      } catch (IOException e) {
        try {
          is.close();
        } catch (IOException ioec) {
          log.warn("Failed to read record from inputstream, will close it immediately", ioec);
        }
        endOfData();
        return null;
      }
    }
  });
}
 
Example #22
Source File: GenericSchemaDecoder.java    From DBus with Apache License 2.0 5 votes vote down vote up
/**
 * 解析被generic schema封装的实际数据
 *
 * @param schema  schema对象
 * @param payload 实际数据
 * @return List<GenericRecord>
 * @throws Exception
 */
public List<GenericRecord> decode(Schema schema, byte[] payload, int start, int len) throws Exception {
    logger.trace("Schema:" + schema.toString() + " schema payload:" + new String(payload, "utf-8"));
    List<GenericRecord> list = new LinkedList<>();
    DatumReader<GenericRecord> reader = new GenericDatumReader<>(schema);
    BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(payload, start, len, null);
    while (!decoder.isEnd()) {
        list.add(reader.read(null, decoder));
    }
    return list;
}
 
Example #23
Source File: BytesToAvroConverter.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
@Override
public Iterable<GenericRecord> convertRecord(Schema outputSchema, byte[] inputRecord, WorkUnitState workUnit)
    throws DataConversionException {
  Preconditions.checkNotNull(recordReader, "Must have called convertSchema!");

  BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(inputRecord, decoderCache.get());
  try {
    GenericRecord parsedRecord = recordReader.read(null, decoder);
    decoderCache.set(decoder);
    return Collections.singleton(parsedRecord);
  } catch (IOException e) {
    throw new DataConversionException("Error parsing record", e);
  }

}
 
Example #24
Source File: AvroReader.java    From pulsar with Apache License 2.0 5 votes vote down vote up
@Override
public T read(byte[] bytes, int offset, int length) {
    try {
        BinaryDecoder decoderFromCache = decoders.get();
        BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(bytes, offset, length, decoderFromCache);
        if (decoderFromCache == null) {
            decoders.set(decoder);
        }
        return reader.read(null, DecoderFactory.get().binaryDecoder(bytes, offset, length, decoder));
    } catch (IOException e) {
        throw new SchemaSerializationException(e);
    }
}
 
Example #25
Source File: AvroRowDeserializationSchema.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a Avro deserialization schema for the given Avro schema string.
 *
 * @param avroSchemaString Avro schema string to deserialize Avro's record to Flink's row
 */
public AvroRowDeserializationSchema(String avroSchemaString) {
	Preconditions.checkNotNull(avroSchemaString, "Avro schema must not be null.");
	recordClazz = null;
	final TypeInformation<?> typeInfo = AvroSchemaConverter.convertToTypeInfo(avroSchemaString);
	Preconditions.checkArgument(typeInfo instanceof RowTypeInfo, "Row type information expected.");
	this.typeInfo = (RowTypeInfo) typeInfo;
	schemaString = avroSchemaString;
	schema = new Schema.Parser().parse(avroSchemaString);
	record = new GenericData.Record(schema);
	datumReader = new GenericDatumReader<>(schema);
	inputStream = new MutableByteArrayInputStream();
	decoder = DecoderFactory.get().binaryDecoder(inputStream, null);
}
 
Example #26
Source File: AVROIntermediateDataFormat.java    From sqoop-on-spark with Apache License 2.0 5 votes vote down vote up
/**
 * {@inheritDoc}
 */
@Override
public void read(DataInput in) throws IOException {
  DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(avroSchema);
  Decoder decoder = DecoderFactory.get().binaryDecoder((InputStream) in, null);
  data = reader.read(null, decoder);
}
 
Example #27
Source File: FastSerdeTestsSupport.java    From avro-util with BSD 2-Clause "Simplified" License 5 votes vote down vote up
public static <T> Decoder specificDataAsDecoder(T record, Schema schema) {
  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  Encoder binaryEncoder = AvroCompatibilityHelper.newBinaryEncoder(baos, true, null);

  try {
    SpecificDatumWriter<T> writer = new SpecificDatumWriter<>(schema);
    writer.write(record, binaryEncoder);
    binaryEncoder.flush();
  } catch (Exception e) {
    throw new RuntimeException(e);
  }

  return DecoderFactory.defaultFactory().createBinaryDecoder(baos.toByteArray(), null);
}
 
Example #28
Source File: Person.java    From components with Apache License 2.0 5 votes vote down vote up
public static Person desFromAvroBytes(byte[] record) throws IOException {
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
    BinaryDecoder decoder = null;
    decoder = DecoderFactory.get().binaryDecoder(record, decoder);
    GenericRecord avroValue = datumReader.read(null, decoder);
    return fromAvroRecord(avroValue);
}
 
Example #29
Source File: AvroJobSpecDeserializer.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
@Override
public AvroJobSpec deserialize(String topic, byte[] data) {
  try (InputStream is = new ByteArrayInputStream(data)) {
    _versionWriter.readSchemaVersioningInformation(new DataInputStream(is));

    Decoder decoder = DecoderFactory.get().binaryDecoder(is, _decoder);

    return _reader.read(null, decoder);
  } catch (IOException e) {
    throw new RuntimeException("Could not decode message");
  }
}
 
Example #30
Source File: JsonToAvroConverter.java    From celos with Apache License 2.0 5 votes vote down vote up
@Override
public FixFile convert(TestRun tr, FixFile ff) throws Exception {
    Schema schema = new Schema.Parser().parse(schemaCreator.create(tr).getContent());
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    InputStream input = ff.getContent();
    DataFileWriter<Object> writer;;
    try {
        DatumReader<Object> reader = new GenericDatumReader<>(schema);
        DataInputStream din = new DataInputStream(input);
        writer = new DataFileWriter<>(new GenericDatumWriter<>());
        writer.create(schema, baos);
        Decoder decoder = DecoderFactory.get().jsonDecoder(schema, din);
        Object datum;
        while (true) {
            try {
                datum = reader.read(null, decoder);
            } catch (EOFException eofe) {
                break;
            }
            writer.append(datum);
        }
        writer.flush();
    } finally {
        input.close();
    }
    return new FixFile(new ByteArrayInputStream(baos.toByteArray()));
}