org.apache.avro.io.BinaryDecoder Java Examples

The following examples show how to use org.apache.avro.io.BinaryDecoder. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SecurityTokensReader.java    From reef with Apache License 2.0 6 votes vote down vote up
/**
 * Read tokens from a file and add them to the user's credentials.
 * @param ugi user's credentials to add tokens to.
 * @throws IOException if there are errors in reading the tokens' file.
 */
void addTokensFromFile(final UserGroupInformation ugi) throws IOException {
  LOG.log(Level.FINE, "Reading security tokens from file: {0}", this.securityTokensFile);

  try (FileInputStream stream = new FileInputStream(securityTokensFile)) {
    final BinaryDecoder decoder = decoderFactory.binaryDecoder(stream, null);

    while (!decoder.isEnd()) {
      final SecurityToken token = tokenDatumReader.read(null, decoder);

      final Token<TokenIdentifier> yarnToken = new Token<>(
          token.getKey().array(),
          token.getPassword().array(),
          new Text(token.getKind().toString()),
          new Text(token.getService().toString()));

      LOG.log(Level.FINE, "addToken for {0}", yarnToken.getKind());

      ugi.addToken(yarnToken);
    }
  }
}
 
Example #2
Source File: LaserFeatureListenser.java    From laser with Apache License 2.0 6 votes vote down vote up
public synchronized void recieveMessages(Message message) {
	final DatumReader<B5MEvent> reader = new SpecificDatumReader<B5MEvent>(
			B5MEvent.SCHEMA$);

	final B5MEvent b5mEvent = new B5MEvent();

	byte[] data = message.getData();

	BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(data, null);
	try {
		reader.read(b5mEvent, decoder);
		for (LaserMessageConsumer consumer : this.consumer) {
			consumer.write(b5mEvent);
		}
	} catch (Exception e) {
		e.printStackTrace();
	}
}
 
Example #3
Source File: KafkaAvroJobMonitor.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
public KafkaAvroJobMonitor(String topic, MutableJobCatalog catalog, Config config, Schema schema,
    SchemaVersionWriter<?> versionWriter) {
  super(topic, catalog, config);

  this.schema = schema;
  this.decoder = new ThreadLocal<BinaryDecoder>() {
    @Override
    protected BinaryDecoder initialValue() {
      InputStream dummyInputStream = new ByteArrayInputStream(new byte[0]);
      return DecoderFactory.get().binaryDecoder(dummyInputStream, null);
    }
  };
  this.reader = new ThreadLocal<SpecificDatumReader<T>>() {
    @Override
    protected SpecificDatumReader<T> initialValue() {
      return new SpecificDatumReader<>(KafkaAvroJobMonitor.this.schema);
    }
  };
  this.versionWriter = versionWriter;
}
 
Example #4
Source File: AvroUtils.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
/**
 * Change the schema of an Avro record.
 * @param record The Avro record whose schema is to be changed.
 * @param newSchema The target schema. It must be compatible as reader schema with record.getSchema() as writer schema.
 * @return a new Avro record with the new schema.
 * @throws IOException if conversion failed.
 */
public static GenericRecord convertRecordSchema(GenericRecord record, Schema newSchema) throws IOException {
  if (record.getSchema().equals(newSchema)) {
    return record;
  }

  try {
    BinaryDecoder decoder = new DecoderFactory().binaryDecoder(recordToByteArray(record), null);
    DatumReader<GenericRecord> reader = new GenericDatumReader<>(record.getSchema(), newSchema);
    return reader.read(null, decoder);
  } catch (IOException e) {
    throw new IOException(
        String.format("Cannot convert avro record to new schema. Original schema = %s, new schema = %s",
            record.getSchema(), newSchema),
        e);
  }
}
 
Example #5
Source File: CassandraKeyComparator.java    From hdfs2cass with Apache License 2.0 6 votes vote down vote up
@Override
public int compare(byte[] o1, int s1, int l1, byte[] o2, int s2, int l2) {
  try {
    final BinaryDecoder d1 = DECODER_FACTORY.binaryDecoder(o1, s1, l1, null);
    final ByteBuffer key1 = d1.readBytes(null);

    // re-use the decoder instance, but do not re-use the byte buffer,
    // because DecoratedKey stores a reference
    final BinaryDecoder d2 = DECODER_FACTORY.binaryDecoder(o2, s2, l2, d1);
    final ByteBuffer key2 = d2.readBytes(null);

    return compare(key1, key2);
  } catch (final IOException e) {
    throw Throwables.propagate(e);
  }
}
 
Example #6
Source File: DoctorKafkaActionsServlet.java    From doctorkafka with Apache License 2.0 6 votes vote down vote up
@Override
 public void renderJSON(PrintWriter writer, Map<String, String> params) {
   JsonArray json = new JsonArray();

   for (ConsumerRecord<byte[], byte[]> record : Lists.reverse(retrieveActionReportMessages())) {
     try {
JsonObject jsonRecord = new JsonObject();
BinaryDecoder binaryDecoder = avroDecoderFactory.binaryDecoder(record.value(), null);
SpecificDatumReader<OperatorAction> reader =
  new SpecificDatumReader<>(operatorActionSchema);

OperatorAction result = new OperatorAction();
reader.read(result, binaryDecoder);

jsonRecord.add("date",gson.toJsonTree(new Date(result.getTimestamp())));
jsonRecord.add("clusterName",gson.toJsonTree(result.getClusterName()));
jsonRecord.add("description",gson.toJsonTree(result.getDescription()));
json.add(jsonRecord);
     } catch (Exception e) {
LOG.info("Fail to decode an message", e);
     }
   }
   writer.print(json);
 }
 
Example #7
Source File: AvroCompatibilityHelperBinaryCodecsTest.java    From avro-util with BSD 2-Clause "Simplified" License 6 votes vote down vote up
@Test
public void testBinaryCodecs() throws Exception {
  AtomicReference<BinaryEncoder> bufferedEncoderRef = new AtomicReference<>(null);
  AtomicReference<BinaryEncoder> directEncoderRef = new AtomicReference<>(null);
  AtomicReference<BinaryDecoder> bufferedDecoderRef = new AtomicReference<>(null);
  AtomicReference<BinaryDecoder> directDecoderRef = new AtomicReference<>(null);

  for (boolean reuseEncoder : Arrays.asList(false, true)) { //false 1st
    for (boolean reuseDecoder : Arrays.asList(false, true)) { //false 1st
      for (boolean useBufferedEncoder : Arrays.asList(true, false)) {
        for (boolean useBufferedDecoder : Arrays.asList(true, false)) {

          runBinaryEncodeDecodeCycle(
              bufferedEncoderRef, directEncoderRef, bufferedDecoderRef, directDecoderRef,
              reuseEncoder, reuseDecoder, useBufferedEncoder, useBufferedDecoder
          );

        }
      }
    }
  }
}
 
Example #8
Source File: GenericSchemaDecoder.java    From DBus with Apache License 2.0 6 votes vote down vote up
/**
 * unwrap the osource generic schema
 *
 * @param input data
 * @return List<GenericData>
 * @throws Exception
 */
public List<GenericData> unwrap(byte[] input) throws Exception {
    BinaryDecoder decoder = getBinaryDecoder(input);
    List<GenericData> list = new LinkedList<>();
    while (!decoder.isEnd()) {
        try {
            GenericRecord record = datumReader.read(null, decoder);
            GenericData schemaBean = new GenericData();
            Utf8 utf8 = (Utf8) record.get(GenericData.TABLE_NAME);
            schemaBean.setTableName(utf8.toString());
            schemaBean.setSchemaId((Integer) record.get(GenericData.SCHEMA_ID));
            Utf8 offset = (Utf8) record.get(GenericData.OFFSET);
            schemaBean.setOffset(offset.toString());
            ByteBuffer buffer = (ByteBuffer) record.get(GenericData.PAYLOAD);
            schemaBean.setPayload(buffer.array());
            list.add(schemaBean);
        } catch (Exception e) {
            throw e;
        }
    }
    logger.trace("message count:" + list.size());
    return list;
}
 
Example #9
Source File: GenericSchemaDecoder.java    From DBus with Apache License 2.0 6 votes vote down vote up
/**
 * unwrap the osource generic schema
 *
 * @param input data
 * @return List<GenericData>
 * @throws Exception
 */
public List<GenericData> unwrap(byte[] input) throws Exception {
    BinaryDecoder decoder = getBinaryDecoder(input);
    List<GenericData> list = new LinkedList<>();
    while (!decoder.isEnd()) {
        try {
            GenericRecord record = datumReader.read(null, decoder);
            GenericData schemaBean = new GenericData();
            Utf8 utf8 = (Utf8) record.get(GenericData.TABLE_NAME);
            schemaBean.setTableName(utf8.toString());
            schemaBean.setSchemaHash((Integer) record.get(GenericData.SCHEMA_HASH));
            ByteBuffer buffer = (ByteBuffer) record.get(GenericData.PAYLOAD);
            schemaBean.setPayload(buffer.array());
            list.add(schemaBean);
        } catch (Exception e) {
            throw e;
        }
    }
    logger.trace("message count:" + list.size());
    return list;
}
 
Example #10
Source File: IcebergDecoder.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
public D decode(InputStream stream, D reuse) {
  BinaryDecoder decoder = DecoderFactory.get().directBinaryDecoder(stream, DECODER.get());
  DECODER.set(decoder);
  try {
    return reader.read(reuse, decoder);
  } catch (IOException e) {
    throw new AvroRuntimeException("Decoding datum failed", e);
  }
}
 
Example #11
Source File: OperatorUtil.java    From doctorkafka with Apache License 2.0 5 votes vote down vote up
public static BrokerStats deserializeBrokerStats(ConsumerRecord<byte[], byte[]> record) {
  try {
    BinaryDecoder binaryDecoder = avroDecoderFactory.binaryDecoder(record.value(), null);
    BrokerStats stats = new BrokerStats();
    brokerStatsReader.read(stats, binaryDecoder);
    return stats;
  } catch (Exception e) {
    LOG.debug("Fail to decode an message", e);
    return null;
  }
}
 
Example #12
Source File: TestWriteAvroResultWithoutSchema.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Override
protected List<GenericRecord> readRecords(final InputStream in, final Schema schema, final int recordCount) throws IOException {
    final BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(in, null);
    final GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema);

    List<GenericRecord> records = new ArrayList<>();
    for (int i = 0; i < recordCount; i++) {
        records.add(reader.read(null, decoder));
    }

    return records;
}
 
Example #13
Source File: BytesToAvroConverter.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
@Override
public Iterable<GenericRecord> convertRecord(Schema outputSchema, byte[] inputRecord, WorkUnitState workUnit)
    throws DataConversionException {
  Preconditions.checkNotNull(recordReader, "Must have called convertSchema!");

  BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(inputRecord, decoderCache.get());
  try {
    GenericRecord parsedRecord = recordReader.read(null, decoder);
    decoderCache.set(decoder);
    return Collections.singleton(parsedRecord);
  } catch (IOException e) {
    throw new DataConversionException("Error parsing record", e);
  }

}
 
Example #14
Source File: Person.java    From components with Apache License 2.0 5 votes vote down vote up
public static Person desFromAvroBytes(byte[] record) throws IOException {
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
    BinaryDecoder decoder = null;
    decoder = DecoderFactory.get().binaryDecoder(record, decoder);
    GenericRecord avroValue = datumReader.read(null, decoder);
    return fromAvroRecord(avroValue);
}
 
Example #15
Source File: Person.java    From components with Apache License 2.0 5 votes vote down vote up
public static Person desFromAvroBytes(byte[] record) throws IOException {
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
    BinaryDecoder decoder = null;
    decoder = DecoderFactory.get().binaryDecoder(record, decoder);
    GenericRecord avroValue = datumReader.read(null, decoder);
    return fromAvroRecord(avroValue);
}
 
Example #16
Source File: Person.java    From components with Apache License 2.0 5 votes vote down vote up
public static Person desFromAvroBytes(byte[] record) throws IOException {
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
    BinaryDecoder decoder = null;
    decoder = DecoderFactory.get().binaryDecoder(record, decoder);
    GenericRecord avroValue = datumReader.read(null, decoder);
    return fromAvroRecord(avroValue);
}
 
Example #17
Source File: TestLog4jAppenderWithAvro.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testAvroReflect() throws IOException {
  loadProperties("flume-log4jtest-avro-reflect.properties");
  PropertyConfigurator.configure(props);
  Logger logger = LogManager.getLogger(TestLog4jAppenderWithAvro.class);
  String msg = "This is log message number " + String.valueOf(0);

  AppEvent appEvent = new AppEvent();
  appEvent.setMessage(msg);

  logger.info(appEvent);

  Transaction transaction = ch.getTransaction();
  transaction.begin();
  Event event = ch.take();
  Assert.assertNotNull(event);

  Schema schema = ReflectData.get().getSchema(appEvent.getClass());

  ReflectDatumReader<AppEvent> reader = new ReflectDatumReader<AppEvent>(AppEvent.class);
  BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(event.getBody(), null);
  AppEvent recordFromEvent = reader.read(null, decoder);
  Assert.assertEquals(msg, recordFromEvent.getMessage());

  Map<String, String> hdrs = event.getHeaders();

  Assert.assertNull(hdrs.get(Log4jAvroHeaders.MESSAGE_ENCODING.toString()));

  Assert.assertNull("Schema URL should not be set",
      hdrs.get(Log4jAvroHeaders.AVRO_SCHEMA_URL.toString()));
  Assert.assertEquals("Schema string should be set", schema.toString(),
      hdrs.get(Log4jAvroHeaders.AVRO_SCHEMA_LITERAL.toString()));

  transaction.commit();
  transaction.close();

}
 
Example #18
Source File: TestFlumeFailoverTarget.java    From datacollector with Apache License 2.0 5 votes vote down vote up
@Test
public void testWriteAvroRecordsDropSchema() throws InterruptedException, StageException, IOException {

  DataGeneratorFormatConfig dataGeneratorFormatConfig = new DataGeneratorFormatConfig();
  dataGeneratorFormatConfig.avroSchema = SdcAvroTestUtil.AVRO_SCHEMA1;
  dataGeneratorFormatConfig.avroSchemaSource = INLINE;
  dataGeneratorFormatConfig.includeSchema = false;
  dataGeneratorFormatConfig.avroCompression = AvroCompression.NULL;
  FlumeTarget flumeTarget = FlumeTestUtil.createFlumeTarget(
    FlumeTestUtil.createDefaultFlumeConfig(port, false),
    DataFormat.AVRO,
    dataGeneratorFormatConfig
  );
  TargetRunner targetRunner = new TargetRunner.Builder(FlumeDTarget.class, flumeTarget).build();

  targetRunner.runInit();
  List<Record> records = SdcAvroTestUtil.getRecords1();
  targetRunner.runWrite(records);
  targetRunner.runDestroy();

  List<GenericRecord> genericRecords = new ArrayList<>();
  DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(); //Reader schema argument is optional
  datumReader.setSchema(new Schema.Parser().parse(SdcAvroTestUtil.AVRO_SCHEMA1));

  Transaction transaction = ch.getTransaction();
  transaction.begin();
  Event event = ch.take();
  while(event != null) {
    BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(event.getBody(), null);
    GenericRecord read = datumReader.read(null, decoder);
    genericRecords.add(read);
    event = ch.take();
  }
  transaction.commit();
  transaction.close();

  Assert.assertEquals(3, genericRecords.size());
  SdcAvroTestUtil.compare1(genericRecords);
}
 
Example #19
Source File: HoodieAvroDataBlock.java    From hudi with Apache License 2.0 5 votes vote down vote up
@Override
protected void deserializeRecords() throws IOException {
  SizeAwareDataInputStream dis =
      new SizeAwareDataInputStream(new DataInputStream(new ByteArrayInputStream(getContent().get())));

  // 1. Read version for this data block
  int version = dis.readInt();
  HoodieAvroDataBlockVersion logBlockVersion = new HoodieAvroDataBlockVersion(version);

  // Get schema from the header
  Schema writerSchema = new Schema.Parser().parse(super.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));

  // If readerSchema was not present, use writerSchema
  if (schema == null) {
    schema = writerSchema;
  }

  GenericDatumReader<IndexedRecord> reader = new GenericDatumReader<>(writerSchema, schema);
  // 2. Get the total records
  int totalRecords = 0;
  if (logBlockVersion.hasRecordCount()) {
    totalRecords = dis.readInt();
  }
  List<IndexedRecord> records = new ArrayList<>(totalRecords);

  // 3. Read the content
  for (int i = 0; i < totalRecords; i++) {
    int recordLength = dis.readInt();
    BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(getContent().get(), dis.getNumberOfBytesRead(),
        recordLength, decoderCache.get());
    decoderCache.set(decoder);
    IndexedRecord record = reader.read(null, decoder);
    records.add(record);
    dis.skipBytes(recordLength);
  }
  dis.close();
  this.records = records;
  // Free up content to be GC'd, deflate
  deflate();
}
 
Example #20
Source File: AvroGenericDeserializer.java    From avro-util with BSD 2-Clause "Simplified" License 5 votes vote down vote up
public V deserialize(V reuseRecord, BinaryDecoder decoder) throws Exception {
  try {
    return datumReader.read(reuseRecord, decoder);
  } catch (Exception e) {
    throw new RuntimeException("Could not deserialize bytes back into Avro object", e);
  }
}
 
Example #21
Source File: GeoWaveAvroFeatureUtils.java    From geowave with Apache License 2.0 5 votes vote down vote up
/**
 * * Deserialize byte stream into an AvroSimpleFeature
 *
 * @param avroData serialized bytes of AvroSimpleFeature
 * @param avroObjectToReuse null or AvroSimpleFeature instance to be re-used. If null a new object
 *        will be allocated.
 * @return instance of AvroSimpleFeature with values parsed from avroData
 * @throws IOException
 */
private static AvroSimpleFeature deserializeASF(
    final byte[] avroData,
    AvroSimpleFeature avroObjectToReuse) throws IOException {
  final BinaryDecoder decoder = DECODER_FACTORY.binaryDecoder(avroData, null);
  if (avroObjectToReuse == null) {
    avroObjectToReuse = new AvroSimpleFeature();
  }

  DATUM_READER.setSchema(avroObjectToReuse.getSchema());
  return DATUM_READER.read(avroObjectToReuse, decoder);
}
 
Example #22
Source File: GenericSchemaDecoder.java    From DBus with Apache License 2.0 5 votes vote down vote up
/**
 * 解析被generic schema封装的实际数据
 *
 * @param schema  schema对象
 * @param payload 实际数据
 * @return List<GenericRecord>
 * @throws Exception
 */
public List<GenericRecord> decode(Schema schema, byte[] payload, int start, int len) throws Exception {
    logger.trace("Schema:" + schema.toString() + " schema payload:" + new String(payload, "utf-8"));
    List<GenericRecord> list = new LinkedList<>();
    DatumReader<GenericRecord> reader = new GenericDatumReader<>(schema);
    BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(payload, start, len, null);
    while (!decoder.isEnd()) {
        list.add(reader.read(null, decoder));
    }
    return list;
}
 
Example #23
Source File: GenericSchemaDecoder.java    From DBus with Apache License 2.0 5 votes vote down vote up
/**
     * 解析被generic schema封装的实际数据
     *
     * @param schema  schema对象
     * @param payload 实际数据
     * @return List<GenericRecord>
     * @throws Exception
     */
    public List<GenericRecord> decode(Schema schema, byte[] payload) throws Exception {
        logger.trace("Schema:" + schema.toString() + " schema payload:" + new String(payload, "utf-8"));
//        List<GenericRecord> list = new LinkedList<>();
//        DatumReader<GenericRecord> reader = new GenericDatumReader<>(schema);
//        BinaryDecoder decoder = getBinaryDecoder(payload);
//        while (!decoder.isEnd()) {
//            list.add(reader.read(null, decoder));
//        }
//        return list;

        ByteBuffer buffer = ByteBuffer.wrap(payload);

        if (buffer.get() != Constants.MAGIC_BYTE) {
            logger.error("Unknown magic byte!");
        }

        int id = buffer.getInt();

        try {
            schema = schemaRegistry.getById(id);
        } catch (RestClientException e) {
            logger.error("Schema Registry RestClientException: " + e);
        }

        int length = buffer.limit() - 5;
        int start = buffer.position() + buffer.arrayOffset();

        logger.debug("Schema:" + schema.toString() + " schema payload:" + new String(payload, "utf-8"));
        List<GenericRecord> list = new LinkedList<>();
        DatumReader<GenericRecord> reader = new GenericDatumReader<>(schema);
        BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(buffer.array(), start, length, null);

        GenericRecord genericRecord = reader.read(null, decoder);
        list.add((GenericRecord) genericRecord);

        return list;
    }
 
Example #24
Source File: TestLog4jAppenderWithAvro.java    From mt-flume with Apache License 2.0 5 votes vote down vote up
@Test
public void testAvroReflect() throws IOException {
  loadProperties("flume-log4jtest-avro-reflect.properties");
  PropertyConfigurator.configure(props);
  Logger logger = LogManager.getLogger(TestLog4jAppenderWithAvro.class);
  String msg = "This is log message number " + String.valueOf(0);

  AppEvent appEvent = new AppEvent();
  appEvent.setMessage(msg);

  logger.info(appEvent);

  Transaction transaction = ch.getTransaction();
  transaction.begin();
  Event event = ch.take();
  Assert.assertNotNull(event);

  Schema schema = ReflectData.get().getSchema(appEvent.getClass());

  ReflectDatumReader<AppEvent> reader = new ReflectDatumReader<AppEvent>(AppEvent.class);
  BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(event.getBody(), null);
  AppEvent recordFromEvent = reader.read(null, decoder);
  Assert.assertEquals(msg, recordFromEvent.getMessage());

  Map<String, String> hdrs = event.getHeaders();

  Assert.assertNull(hdrs.get(Log4jAvroHeaders.MESSAGE_ENCODING.toString()));

  Assert.assertNull("Schema URL should not be set",
      hdrs.get(Log4jAvroHeaders.AVRO_SCHEMA_URL.toString()));
  Assert.assertEquals("Schema string should be set", schema.toString(),
      hdrs.get(Log4jAvroHeaders.AVRO_SCHEMA_LITERAL.toString()));

  transaction.commit();
  transaction.close();

}
 
Example #25
Source File: GenericSchemaDecoder.java    From DBus with Apache License 2.0 5 votes vote down vote up
/**
 * 解析被generic schema封装的实际数据
 *
 * @param schema  schema对象
 * @param payload 实际数据
 * @return List<GenericRecord>
 * @throws Exception
 */
public List<GenericRecord> decode(Schema schema, byte[] payload) throws Exception {
    logger.trace("Schema:" + schema.toString() + " schema payload:" + new String(payload, "utf-8"));
    List<GenericRecord> list = new LinkedList<>();
    DatumReader<GenericRecord> reader = new GenericDatumReader<>(schema);
    BinaryDecoder decoder = getBinaryDecoder(payload);
    while (!decoder.isEnd()) {
        list.add(reader.read(null, decoder));
    }
    return list;
}
 
Example #26
Source File: ImportTransform.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c) {
  KV<String, String> kv = c.element();

  String schema = null;
  ResourceId resourceId = FileSystems.matchNewResource(kv.getValue(), false);
  try (InputStream stream = Channels.newInputStream(FileSystems.open(resourceId))) {
    BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(stream, null);
    byte[] magic = new byte[DataFileConstants.MAGIC.length];
    decoder.readFixed(magic);
    if (!Arrays.equals(magic, DataFileConstants.MAGIC)) {
      throw new IOException("Missing Avro file signature: " + kv.getValue());
    }

    // Read the metadata to find the codec and schema.
    ByteBuffer valueBuffer = ByteBuffer.allocate(512);
    long numRecords = decoder.readMapStart();
    while (numRecords > 0 && schema == null) {
      for (long recordIndex = 0; recordIndex < numRecords; recordIndex++) {
        String key = decoder.readString();
        // readBytes() clears the buffer and returns a buffer where:
        // - position is the start of the bytes read
        // - limit is the end of the bytes read
        valueBuffer = decoder.readBytes(valueBuffer);
        byte[] bytes = new byte[valueBuffer.remaining()];
        valueBuffer.get(bytes);
        if (key.equals(DataFileConstants.SCHEMA)) {
          schema = new String(bytes, "UTF-8");
          break;
        }
      }
      numRecords = decoder.mapNext();
    }
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
  c.output(KV.of(kv.getKey(), schema));
}
 
Example #27
Source File: NamespaceValidationTest.java    From avro-util with BSD 2-Clause "Simplified" License 5 votes vote down vote up
@Test
public void testModernAvroValidatesNamespaces() throws Exception {
  AvroVersion runtimeVersion = AvroCompatibilityHelper.getRuntimeAvroVersion();
  if (!runtimeVersion.laterThan(AvroVersion.AVRO_1_4)) {
    throw new SkipException("only supported under modern avro. runtime version detected as " + runtimeVersion);
  }
  String withAvsc = TestUtil.load("HasNamespace.avsc");
  Schema with = Schema.parse(withAvsc);
  String withoutAvsc = TestUtil.load("HasNoNamespace.avsc");
  Schema without = Schema.parse(withoutAvsc);

  GenericData.Record record = new GenericData.Record(without);
  record.put("f", AvroCompatibilityHelper.newEnumSymbol(without.getField("f").schema(), "B"));

  ByteArrayOutputStream os = new ByteArrayOutputStream();
  GenericDatumWriter writer = new GenericDatumWriter(without);
  BinaryEncoder encoder = AvroCompatibilityHelper.newBinaryEncoder(os);
  //noinspection unchecked
  writer.write(record, encoder);
  encoder.flush();
  byte[] bytes = os.toByteArray();

  GenericDatumReader<GenericData.Record> reader = new GenericDatumReader<>(without, with);
  BinaryDecoder decoder = DecoderFactory.defaultFactory().createBinaryDecoder(bytes, null);
  try {
    GenericData.Record read = reader.read(null, decoder);
    Assert.fail("deserialization was expected to fail");
  } catch (Exception expected) {
    Assert.assertTrue(expected.getMessage().contains("Found EnumType, expecting com.acme.EnumType"));
  }
}
 
Example #28
Source File: NamespaceValidationTest.java    From avro-util with BSD 2-Clause "Simplified" License 5 votes vote down vote up
@Test
public void testAvro14DoesntValidateNamespace() throws Exception {
  AvroVersion runtimeVersion = AvroCompatibilityHelper.getRuntimeAvroVersion();
  if (runtimeVersion != AvroVersion.AVRO_1_4) {
    throw new SkipException("only supported under " + AvroVersion.AVRO_1_4 + ". runtime version detected as " + runtimeVersion);
  }
  String withAvsc = TestUtil.load("HasNamespace.avsc");
  Schema with = Schema.parse(withAvsc);
  String withoutAvsc = TestUtil.load("HasNoNamespace.avsc");
  Schema without = Schema.parse(withoutAvsc);

  GenericData.Record record = new GenericData.Record(without);
  record.put("f", AvroCompatibilityHelper.newEnumSymbol(without.getField("f").schema(), "B"));

  ByteArrayOutputStream os = new ByteArrayOutputStream();
  GenericDatumWriter writer = new GenericDatumWriter(without);
  BinaryEncoder encoder = AvroCompatibilityHelper.newBinaryEncoder(os);
  //noinspection unchecked
  writer.write(record, encoder);
  encoder.flush();
  byte[] bytes = os.toByteArray();

  GenericDatumReader<GenericData.Record> reader = new GenericDatumReader<>(without, with);
  BinaryDecoder decoder = DecoderFactory.defaultFactory().createBinaryDecoder(bytes, null);
  GenericData.Record read = reader.read(null, decoder);

  String value = String.valueOf(read.get("f"));
  Assert.assertEquals(value, "B");
}
 
Example #29
Source File: AvroGenericDeserializer.java    From avro-util with BSD 2-Clause "Simplified" License 5 votes vote down vote up
public Iterable<V> deserializeObjects(BinaryDecoder decoder) throws Exception {

    List<V> objects = new ArrayList();
    try {
      while (!decoder.isEnd()) {
        objects.add(datumReader.read(null, decoder));
      }
    } catch (Exception e) {
      throw new RuntimeException("Could not deserialize bytes back into Avro objects", e);
    }

    return objects;
  }
 
Example #30
Source File: OnrampImplTest.java    From data-highway with Apache License 2.0 5 votes vote down vote up
private void assertRecord(byte[] value, String expected) throws IOException {
  ByteBuffer buffer = ByteBuffer.wrap(value);

  assertThat(buffer.get(), is((byte) 0));
  assertThat(buffer.getInt(), is(1));

  BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(value, buffer.position(), buffer.remaining(), null);
  Record read = new GenericDatumReader<Record>(SCHEMA).read(null, decoder);
  assertThat(read.get("f").toString(), is(expected));
}