Java Code Examples for org.apache.avro.generic.GenericDatumReader

The following examples show how to use org.apache.avro.generic.GenericDatumReader. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: data-highway   Source File: AvroRecordWriterTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void typical() throws Exception {
  Schema schema = SchemaBuilder
      .builder()
      .record("record")
      .fields()
      .requiredLong("id")
      .requiredString("name")
      .endRecord();
  Record value = new GenericRecordBuilder(schema).set("id", 1L).set("name", "hello").build();
  ByteArrayOutputStream output = new ByteArrayOutputStream();

  Factory factory = new Factory(CodecFactory.nullCodec());
  RecordWriter writer = factory.create(schema, output);
  writer.write(value);
  writer.close();

  SeekableInput input = new SeekableByteArrayInput(output.toByteArray());
  DatumReader<Record> datumReader = new GenericDatumReader<>(schema);
  DataFileReader<Record> dataFileReader = new DataFileReader<>(input, datumReader);
  assertThat(dataFileReader.next(), is(value));
  assertThat(dataFileReader.hasNext(), is(false));
  dataFileReader.close();
}
 
Example 2
Source Project: incubator-gobblin   Source File: TestHelper.java    License: Apache License 2.0 6 votes vote down vote up
public static void assertGenericRecords(File outputAvroFile, Schema schema) throws IOException {
  try (DataFileReader<GenericRecord> reader =
      new DataFileReader<>(outputAvroFile, new GenericDatumReader<GenericRecord>(schema))) {
    Iterator<GenericRecord> iterator = reader.iterator();

    GenericRecord record = iterator.next();
    Assert.assertEquals(record.get("name").toString(), "Alyssa");

    record = iterator.next();
    Assert.assertEquals(record.get("name").toString(), "Ben");

    record = iterator.next();
    Assert.assertEquals(record.get("name").toString(), "Charlie");

    Assert.assertFalse(iterator.hasNext());
  }
}
 
Example 3
Source Project: tajo   Source File: AvroScanner.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Initializes the AvroScanner.
 */
@Override
public void init() throws IOException {
  if (targets == null) {
    targets = schema.toArray();
  }
  prepareProjection(targets);
  outTuple = new VTuple(projectionMap.length);

  Schema avroSchema = AvroUtil.getAvroSchema(meta, conf);
  avroFields = avroSchema.getFields();

  DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(avroSchema);
  SeekableInput input = new FsInput(fragment.getPath(), conf);
  dataFileReader = new DataFileReader<>(input, datumReader);
  super.init();
}
 
Example 4
Source Project: kite   Source File: AvroMorphlineTest.java    License: Apache License 2.0 6 votes vote down vote up
private void runTweetContainer(String morphlineConfigFile, String[] fieldNames) throws Exception {
  File file = new File(RESOURCES_DIR + "/test-documents/sample-statuses-20120906-141433-medium.avro");
  morphline = createMorphline(morphlineConfigFile);    
  for (int j = 0; j < 3; j++) { // also test reuse of objects and low level avro buffers
    Record record = new Record();
    byte[] body = Files.toByteArray(file);    
    record.put(Fields.ATTACHMENT_BODY, body);
    collector.reset();
    startSession();
    Notifications.notifyBeginTransaction(morphline);
    assertTrue(morphline.process(record));
    assertEquals(1, collector.getNumStartEvents());
    assertEquals(2104, collector.getRecords().size());
    
    FileReader<GenericData.Record> reader = new DataFileReader(file, new GenericDatumReader());
    int i = 0;
    while (reader.hasNext()) {
      Record actual = collector.getRecords().get(i);
      GenericData.Record expected = reader.next();
      assertTweetEquals(expected, actual, fieldNames, i);
      i++;
    }    
    assertEquals(collector.getRecords().size(), i);
  }
}
 
Example 5
Source Project: nifi   Source File: TestSplitAvro.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testRecordSplitDatafileOutputWithoutMetadata() throws IOException {
    final TestRunner runner = TestRunners.newTestRunner(new SplitAvro());
    runner.setProperty(SplitAvro.TRANSFER_METADATA, "false");

    runner.enqueue(users.toByteArray());
    runner.run();

    runner.assertTransferCount(SplitAvro.REL_SPLIT, 100);
    runner.assertTransferCount(SplitAvro.REL_ORIGINAL, 1);
    runner.assertTransferCount(SplitAvro.REL_FAILURE, 0);

    runner.getFlowFilesForRelationship(SplitAvro.REL_ORIGINAL).get(0).assertAttributeEquals(FRAGMENT_COUNT.key(), "100");
    final List<MockFlowFile> flowFiles = runner.getFlowFilesForRelationship(SplitAvro.REL_SPLIT);
    checkDataFileSplitSize(flowFiles, 1, false);

    for (final MockFlowFile flowFile : flowFiles) {
        try (final ByteArrayInputStream in = new ByteArrayInputStream(flowFile.toByteArray());
             final DataFileStream<GenericRecord> reader = new DataFileStream<>(in, new GenericDatumReader<GenericRecord>())) {
            Assert.assertFalse(reader.getMetaKeys().contains(META_KEY1));
            Assert.assertFalse(reader.getMetaKeys().contains(META_KEY2));
            Assert.assertFalse(reader.getMetaKeys().contains(META_KEY3));
        }
    }
}
 
Example 6
Source Project: mt-flume   Source File: AvroEventDeserializer.java    License: Apache License 2.0 6 votes vote down vote up
private void initialize() throws IOException, NoSuchAlgorithmException {
  SeekableResettableInputBridge in = new SeekableResettableInputBridge(ris);
  long pos = in.tell();
  in.seek(0L);
  fileReader = new DataFileReader<GenericRecord>(in,
      new GenericDatumReader<GenericRecord>());
  fileReader.sync(pos);

  schema = fileReader.getSchema();
  datumWriter = new GenericDatumWriter(schema);
  out = new ByteArrayOutputStream();
  encoder = EncoderFactory.get().binaryEncoder(out, encoder);

  schemaHash = SchemaNormalization.parsingFingerprint("CRC-64-AVRO", schema);
  schemaHashString = Hex.encodeHexString(schemaHash);
}
 
Example 7
private void testConversion(RestEntry<JsonObject> expected, WorkUnitState actualWorkUnitState) throws DataConversionException, IOException, JSONException {
  Schema schema = new Schema.Parser().parse(getClass().getResourceAsStream("/converter/nested.avsc"));
  GenericDatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);

  File tmp = File.createTempFile(this.getClass().getSimpleName(), null);
  tmp.deleteOnExit();
  try {
    FileUtils.copyInputStreamToFile(getClass().getResourceAsStream("/converter/nested.avro"), tmp);
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(tmp, datumReader);
    GenericRecord avroRecord = dataFileReader.next();

    AvroToRestJsonEntryConverter converter = new AvroToRestJsonEntryConverter();
    RestEntry<JsonObject> actual = converter.convertRecord(null, avroRecord, actualWorkUnitState).iterator().next();

    Assert.assertEquals(actual.getResourcePath(), expected.getResourcePath());
    JSONAssert.assertEquals(expected.getRestEntryVal().toString(), actual.getRestEntryVal().toString(), false);

    converter.close();
    dataFileReader.close();
  } finally {
    if (tmp != null) {
      tmp.delete();
    }
  }
}
 
Example 8
Source Project: celos   Source File: AvroToJsonConverter.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public FixFile convert(TestRun testRun, FixFile ff) throws IOException {
    byte[] bytes = IOUtils.toByteArray(ff.getContent());
    if (bytes.length == 0) {
        return ff;
    }
    ByteArrayOutputStream os = new ByteArrayOutputStream();
    GenericDatumReader<Object> reader = new GenericDatumReader<>();
    FileReader<Object> fileReader =  DataFileReader.openReader(new SeekableByteArrayInput(bytes), reader);
    try {
        Schema schema = fileReader.getSchema();
        DatumWriter<Object> writer = new GenericDatumWriter<>(schema);
        JsonEncoder encoder = EncoderFactory.get().jsonEncoder(schema, os);

        for (Object datum : fileReader) {
            writer.write(datum, encoder);
        }
        encoder.flush();
    } finally {
        fileReader.close();
    }
    return new FixFile(new ByteArrayInputStream(os.toByteArray()));
}
 
Example 9
Source Project: xml-avro   Source File: Converter.java    License: Apache License 2.0 6 votes vote down vote up
public static void avroToXml(File avroFile, File xmlFile) throws IOException {
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(protocol.getType("Element"));
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(avroFile, datumReader);

    GenericRecord record = dataFileReader.next();

    Document doc;
    try {
        doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
    } catch (ParserConfigurationException e) {
        throw new RuntimeException(e);
    }

    Element el = unwrapElement(record, doc);
    doc.appendChild(el);

    saveDocument(doc, xmlFile);
}
 
Example 10
Source Project: localization_nifi   Source File: TestSplitAvro.java    License: Apache License 2.0 6 votes vote down vote up
private void checkDataFileSplitSize(List<MockFlowFile> flowFiles, int expectedRecordsPerSplit, boolean checkMetadata) throws IOException {
    for (final MockFlowFile flowFile : flowFiles) {
        try (final ByteArrayInputStream in = new ByteArrayInputStream(flowFile.toByteArray());
            final DataFileStream<GenericRecord> reader = new DataFileStream<>(in, new GenericDatumReader<GenericRecord>())) {

            int count = 0;
            GenericRecord record = null;
            while (reader.hasNext()) {
                record = reader.next(record);
                Assert.assertNotNull(record.get("name"));
                Assert.assertNotNull(record.get("favorite_number"));
                count++;
            }
            assertEquals(expectedRecordsPerSplit, count);

            if (checkMetadata) {
                assertEquals(META_VALUE1, reader.getMetaString(META_KEY1));
                assertEquals(META_VALUE2, reader.getMetaLong(META_KEY2));
                assertEquals(META_VALUE3, new String(reader.getMeta(META_KEY3), "UTF-8"));
            }
        }
    }
}
 
Example 11
Source Project: datafu   Source File: AvroDateRangeMetadata.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Reads the date range from the metadata stored in an Avro file.
 * 
 * @param fs file system to access path
 * @param path path to get date range for
 * @return date range
 * @throws IOException IOException
 */
public static DateRange getOutputFileDateRange(FileSystem fs, Path path) throws IOException
{
  path = fs.listStatus(path, PathUtils.nonHiddenPathFilter)[0].getPath();
  FSDataInputStream dataInputStream = fs.open(path);
  DatumReader <GenericRecord> reader = new GenericDatumReader<GenericRecord>();
  DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(dataInputStream, reader);
  
  try
  {
    return new DateRange(new Date(Long.parseLong(dataFileStream.getMetaString(METADATA_DATE_START))),
                         new Date(Long.parseLong(dataFileStream.getMetaString(METADATA_DATE_END))));
  }
  finally
  {
    dataFileStream.close();
    dataInputStream.close();
  }
}
 
Example 12
Source Project: nifi   Source File: TestSelectHive3QL.java    License: Apache License 2.0 6 votes vote down vote up
private long getNumberOfRecordsFromStream(InputStream in) throws IOException {
    final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
    try (DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(in, datumReader)) {
        GenericRecord record = null;
        long recordsFromStream = 0;
        while (dataFileReader.hasNext()) {
            // Reuse record object by passing it to next(). This saves us from
            // allocating and garbage collecting many objects for files with
            // many items.
            record = dataFileReader.next(record);
            recordsFromStream += 1;
        }

        return recordsFromStream;
    }
}
 
Example 13
Source Project: incubator-pinot   Source File: MapOutputValue.java    License: Apache License 2.0 6 votes vote down vote up
public static MapOutputValue fromBytes(byte[] bytes, Map<String, Schema> schemaMap)
    throws IOException {
  DataInputStream dataInputStream = new DataInputStream(new ByteArrayInputStream(bytes));
  int length = dataInputStream.readInt();
  byte[] sourceNameBytes = new byte[length];
  dataInputStream.read(sourceNameBytes);
  String schemaName = new String(sourceNameBytes);

  int recordDataLength = dataInputStream.readInt();

  byte[] recordBytes = new byte[recordDataLength];
  dataInputStream.read(recordBytes);
  Schema schema = schemaMap.get(schemaName);
  GenericRecord record = new GenericData.Record(schema);
  binaryDecoder = DecoderFactory.get().binaryDecoder(recordBytes, binaryDecoder);
  GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>(schema);
  gdr.read(record, binaryDecoder);
  return new MapOutputValue(schemaName, record);
}
 
Example 14
Source Project: incubator-gobblin   Source File: AvroUtils.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Get the latest avro schema for a directory
 * @param directory the input dir that contains avro files
 * @param fs the {@link FileSystem} for the given directory.
 * @param latest true to return latest schema, false to return oldest schema
 * @return the latest/oldest schema in the directory
 * @throws IOException
 */
public static Schema getDirectorySchema(Path directory, FileSystem fs, boolean latest) throws IOException {
  Schema schema = null;
  try (Closer closer = Closer.create()) {
    List<FileStatus> files = getDirectorySchemaHelper(directory, fs);
    if (files == null || files.size() == 0) {
      LOG.warn("There is no previous avro file in the directory: " + directory);
    } else {
      FileStatus file = latest ? files.get(0) : files.get(files.size() - 1);
      LOG.debug("Path to get the avro schema: " + file);
      FsInput fi = new FsInput(file.getPath(), fs.getConf());
      GenericDatumReader<GenericRecord> genReader = new GenericDatumReader<>();
      schema = closer.register(new DataFileReader<>(fi, genReader)).getSchema();
    }
  } catch (IOException ioe) {
    throw new IOException("Cannot get the schema for directory " + directory, ioe);
  }
  return schema;
}
 
Example 15
Source Project: DBus   Source File: OracleGenericSchemaDecoder.java    License: Apache License 2.0 6 votes vote down vote up
private void initDecoder() {
    try {
        genericSchema = OracleGenericSchemaProvider.getInstance().getSchema("generic_wrapper.avsc");

        fullPullSchema = OracleGenericSchemaProvider.getInstance().getSchema("DBUS.DB_FULL_PULL_REQUESTS.avsc");
        fullPullHash = OracleGenericSchemaProvider.getInstance().getSchemaHash("DBUS.DB_FULL_PULL_REQUESTS.avsc");

        syncEventSchema = OracleGenericSchemaProvider.getInstance().getSchema("DBUS.META_SYNC_EVENT.avsc");
        syncEventHash = OracleGenericSchemaProvider.getInstance().getSchemaHash("DBUS.META_SYNC_EVENT.avsc");

        heartbeatSchema = OracleGenericSchemaProvider.getInstance().getSchema("DBUS.DB_HEARTBEAT_MONITOR.avsc");
        heartbeatHash = OracleGenericSchemaProvider.getInstance().getSchemaHash("DBUS.DB_HEARTBEAT_MONITOR.avsc");

        datumReader = new GenericDatumReader<>(genericSchema);
        datumWriter = new GenericDatumWriter<>(genericSchema);
    } catch (Exception e) {
        logger.error("OracleGenericSchemaDecoder Initialization Error!", e);
        e.printStackTrace();

    }
}
 
Example 16
Source Project: hiped2   Source File: StockMessageDecoder.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public CamusWrapper<GenericData.Record> decode(byte[] bytes) {

  DatumReader<GenericData.Record> reader = new GenericDatumReader<GenericData.Record>(Stock.SCHEMA$);

  try {
    GenericData.Record record = reader.read(null, factory.binaryDecoder(bytes, null));
    System.out.println("Decoded " + record);
    return new CamusWrapper<GenericData.Record>(record);
  } catch (IOException e) {
    e.printStackTrace();
    throw new RuntimeException("Got IO exception", e);
  }
}
 
Example 17
Source Project: datacollector   Source File: TestAvroDataGenerator.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testAvroGeneratorDecimalType() throws Exception {
  Map<String, Field> map = new LinkedHashMap<>();
  map.put("decimal", Field.create(Field.Type.DECIMAL, BigDecimal.valueOf(1.5)));
  Record record = RecordCreator.create();
  record.set(Field.create(map));

  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  DataGenerator gen = new AvroDataOutputStreamGenerator(
    false,
    baos,
    COMPRESSION_CODEC_DEFAULT,
    DECIMAL_SCHEMA,
    new HashMap<String, Object>(),
    null,
    null,
    0
  );
  gen.write(record);
  gen.close();

  //reader schema must be extracted from the data file
  GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(null);
  DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(
      new SeekableByteArrayInput(baos.toByteArray()), reader);
  Assert.assertTrue(dataFileReader.hasNext());
  GenericRecord readRecord = dataFileReader.next();

  Assert.assertArrayEquals(new byte[] {0x0F}, ((ByteBuffer)readRecord.get("decimal")).array());
  Assert.assertFalse(dataFileReader.hasNext());
}
 
Example 18
Source Project: datacollector   Source File: TestAvroDataGenerator.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testAvroGeneratorListMapType() throws Exception {
  LinkedHashMap<String, Field> linkedHashMap = new LinkedHashMap<>();
  linkedHashMap.put("name", Field.create("Jon Natkins"));
  linkedHashMap.put("age", Field.create(29));
  linkedHashMap.put("emails", Field.create(ImmutableList.of(Field.create("[email protected]"))));
  linkedHashMap.put("boss", Field.create(Field.Type.MAP, null));
  Field listMapField = Field.createListMap(linkedHashMap);
  Record record = RecordCreator.create();
  record.set(listMapField);

  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  DataGenerator gen = new AvroDataOutputStreamGenerator(
      false,
      baos,
      COMPRESSION_CODEC_DEFAULT,
      SCHEMA,
      new HashMap<String, Object>(),
      null,
      null,
      0
  );
  gen.write(record);
  gen.close();

  //reader schema must be extracted from the data file
  GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(null);
  DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(
      new SeekableByteArrayInput(baos.toByteArray()), reader);
  Assert.assertTrue(dataFileReader.hasNext());
  GenericRecord readRecord = dataFileReader.next();

  Assert.assertEquals("Jon Natkins", readRecord.get("name").toString());
  Assert.assertEquals(29, readRecord.get("age"));
  Assert.assertFalse(dataFileReader.hasNext());
}
 
Example 19
Source Project: pulsar   Source File: GenericAvroReader.java    License: Apache License 2.0 5 votes vote down vote up
public GenericAvroReader(Schema writerSchema, Schema readerSchema, byte[] schemaVersion) {
    this.schema = readerSchema;
    this.fields = schema.getFields()
            .stream()
            .map(f -> new Field(f.name(), f.pos()))
            .collect(Collectors.toList());
    this.schemaVersion = schemaVersion;
    if (writerSchema == null) {
        this.reader = new GenericDatumReader<>(readerSchema);
    } else {
        this.reader = new GenericDatumReader<>(writerSchema, readerSchema);
    }
    this.byteArrayOutputStream = new ByteArrayOutputStream();
    this.encoder = EncoderFactory.get().binaryEncoder(this.byteArrayOutputStream, encoder);

    if (schema.getObjectProp(GenericAvroSchema.OFFSET_PROP) != null) {
        this.offset = Integer.parseInt(schema.getObjectProp(GenericAvroSchema.OFFSET_PROP).toString());
    } else {
        this.offset = 0;
    }

}
 
Example 20
Source Project: kareldb   Source File: KafkaValueDeserializer.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public NavigableMap<Long, VersionedValue> deserialize(String topic, byte[] payload) throws SerializationException {
    if (payload == null) {
        return null;
    }
    try {
        ByteBuffer buffer = getByteBuffer(payload);
        int version = buffer.getInt();
        int length = buffer.limit() - 1 - VERSION_SIZE;
        int start = buffer.position() + buffer.arrayOffset();
        DatumReader<GenericArray<GenericRecord>> reader = readers.get(version);
        if (reader == null) {
            KafkaSchema schema = (KafkaSchema) table.getSchema();
            KafkaSchemaValue schemaValue = schema.getSchemaValue(table.getName(), version);
            Schema writerSchema = AvroUtils.parseSchema(schemaValue.getSchema());
            Pair<Schema, Schema> schemas = getKeyValueSchemas(writerSchema);
            Schema valueSchema = schemas.right;
            reader = new GenericDatumReader<>(valueSchema, avroSchema, KafkaTable.GENERIC);
            readers.put(version, reader);
        }
        GenericArray<GenericRecord> array = reader.read(
            null, decoderFactory.binaryDecoder(buffer.array(), start, length, null));
        return toValue(array);
    } catch (IOException | RuntimeException e) {
        // avro deserialization may throw AvroRuntimeException, NullPointerException, etc
        LOG.error("Error deserializing Avro value " + e.getMessage());
        throw new SerializationException("Error deserializing Avro value", e);
    }
}
 
Example 21
Source Project: datafu   Source File: PartitionCollapsingJoinTest.java    License: Apache License 2.0 5 votes vote down vote up
private HashMap<Long,ImpressionClick> loadOutputCounts(String timestamp) throws IOException
{
  HashMap<Long,ImpressionClick> counts = new HashMap<Long,ImpressionClick>();
  FileSystem fs = getFileSystem();
  Assert.assertTrue(fs.exists(new Path(_outputPath, timestamp)));
  for (FileStatus stat : fs.globStatus(new Path(_outputPath,timestamp + "/*.avro")))
  {
    _log.info(String.format("found: %s (%d bytes)",stat.getPath(),stat.getLen()));
    FSDataInputStream is = fs.open(stat.getPath());
    DatumReader <GenericRecord> reader = new GenericDatumReader<GenericRecord>();
    DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(is, reader);
    
    try
    {
      while (dataFileStream.hasNext())
      {
        GenericRecord r = dataFileStream.next();
        Long memberId = (Long)((GenericRecord)r.get("key")).get("id");
        Integer impressions = (Integer)((GenericRecord)r.get("value")).get("impressions");    
        Integer clicks = (Integer)((GenericRecord)r.get("value")).get("clicks");         
        Assert.assertFalse(counts.containsKey(memberId));
        ImpressionClick data = new ImpressionClick();
        data.clicks = clicks;
        data.impressions = impressions;
        counts.put(memberId, data);
      }
    }
    finally
    {
      dataFileStream.close();
    }
  }
  return counts;
}
 
Example 22
Source Project: HBase-ToHDFS   Source File: AvroReader.java    License: Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws IOException {
  if (args.length == 0) {
    System.out.println("AvroReader {dataFile} {schemaFile} {max.lines.to.read.optional}");
  }
  
  
  String dataFile = args[0];
  String schemaFile = args[1];
  int recordsToRead = Integer.MAX_VALUE;
  if (args.length > 2) {
    recordsToRead = Integer.parseInt(args[2]);
  }
  
  Schema.Parser parser = new Schema.Parser();
  Configuration config = new Configuration();
  FileSystem fs = FileSystem.get(config);
  
  Schema schema = parser.parse(fs.open(new Path(schemaFile)));
  
  Path dataFilePath = new Path(dataFile);
  FileStatus fileStatus = fs.getFileStatus(dataFilePath);
  
  AvroFSInput input = new AvroFSInput(fs.open(dataFilePath), fileStatus.getLen());
  
  DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
  DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(input, datumReader);
  System.out.println("Schema: " + dataFileReader.getSchema());
  System.out.println();
  int counter = 0;
  while (dataFileReader.hasNext() && counter++ < recordsToRead) {
    GenericRecord r = dataFileReader.next();
    System.out.println(counter + " : " + r);
  }
}
 
Example 23
Source Project: mt-flume   Source File: TestHDFSEventSink.java    License: Apache License 2.0 5 votes vote down vote up
private void verifyOutputAvroFiles(FileSystem fs, Configuration conf, String dir, String prefix, List<String> bodies) throws IOException {
  int found = 0;
  int expected = bodies.size();
  for(String outputFile : getAllFiles(dir)) {
    String name = (new File(outputFile)).getName();
    if(name.startsWith(prefix)) {
      FSDataInputStream input = fs.open(new Path(outputFile));
      DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
      DataFileStream<GenericRecord> avroStream =
          new DataFileStream<GenericRecord>(input, reader);
      GenericRecord record = new GenericData.Record(avroStream.getSchema());
      while (avroStream.hasNext()) {
        avroStream.next(record);
        ByteBuffer body = (ByteBuffer) record.get("body");
        CharsetDecoder decoder = Charsets.UTF_8.newDecoder();
        String bodyStr = decoder.decode(body).toString();
        LOG.debug("Removing event: {}", bodyStr);
        bodies.remove(bodyStr);
        found++;
      }
      avroStream.close();
      input.close();
    }
  }
  Assert.assertTrue("Found = " + found + ", Expected = "  +
      expected + ", Left = " + bodies.size() + " " + bodies,
        bodies.size() == 0);
}
 
Example 24
Source Project: hudi   Source File: HoodieAvroDataBlock.java    License: Apache License 2.0 5 votes vote down vote up
@Override
protected void deserializeRecords() throws IOException {
  SizeAwareDataInputStream dis =
      new SizeAwareDataInputStream(new DataInputStream(new ByteArrayInputStream(getContent().get())));

  // 1. Read version for this data block
  int version = dis.readInt();
  HoodieAvroDataBlockVersion logBlockVersion = new HoodieAvroDataBlockVersion(version);

  // Get schema from the header
  Schema writerSchema = new Schema.Parser().parse(super.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));

  // If readerSchema was not present, use writerSchema
  if (schema == null) {
    schema = writerSchema;
  }

  GenericDatumReader<IndexedRecord> reader = new GenericDatumReader<>(writerSchema, schema);
  // 2. Get the total records
  int totalRecords = 0;
  if (logBlockVersion.hasRecordCount()) {
    totalRecords = dis.readInt();
  }
  List<IndexedRecord> records = new ArrayList<>(totalRecords);

  // 3. Read the content
  for (int i = 0; i < totalRecords; i++) {
    int recordLength = dis.readInt();
    BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(getContent().get(), dis.getNumberOfBytesRead(),
        recordLength, decoderCache.get());
    decoderCache.set(decoder);
    IndexedRecord record = reader.read(null, decoder);
    records.add(record);
    dis.skipBytes(recordLength);
  }
  dis.close();
  this.records = records;
  // Free up content to be GC'd, deflate
  deflate();
}
 
Example 25
Source Project: registry   Source File: DefaultAvroSerDesHandler.java    License: Apache License 2.0 5 votes vote down vote up
private DatumReader getDatumReader(Schema writerSchema, Schema readerSchema, boolean useSpecificAvroReader) {
    if (useSpecificAvroReader) {
        if (readerSchema == null) {
            readerSchema = this.getReaderSchema(writerSchema);
        }

        return new SpecificDatumReader(writerSchema, readerSchema);
    } else {
        return readerSchema == null ? new GenericDatumReader(writerSchema) : new GenericDatumReader(writerSchema, readerSchema);
    }
}
 
Example 26
Source Project: kite   Source File: DataModelUtil.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Get the DatumReader for the given type.
 *
 * @param <E> The entity type
 * @param type The Java class of the entity type
 * @param writerSchema The {@link Schema} for entities
 * @return The DatumReader for the given type
 */
@SuppressWarnings("unchecked")
public static <E> DatumReader<E> getDatumReaderForType(Class<E> type, Schema writerSchema) {
  Schema readerSchema = getReaderSchema(type, writerSchema);
  GenericData dataModel = getDataModelForType(type);
  if (dataModel instanceof ReflectData) {
    return new ReflectDatumReader<E>(writerSchema, readerSchema, (ReflectData)dataModel);
  } else if (dataModel instanceof SpecificData) {
    return new SpecificDatumReader<E>(writerSchema, readerSchema, (SpecificData)dataModel);
  } else {
    return new GenericDatumReader<E>(writerSchema, readerSchema, dataModel);
  }
}
 
Example 27
Source Project: datacollector   Source File: TestAvroDataGenerator.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testSchemaInHeader() throws Exception {
  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  DataGenerator gen = new AvroDataOutputStreamGenerator(
    true,
    baos,
    COMPRESSION_CODEC_DEFAULT,
    null,
    null,
    null,
    null,
    0
  );
  Record record = createRecord();
  record.getHeader().setAttribute(BaseAvroDataGenerator.AVRO_SCHEMA_HEADER, AVRO_SCHEMA);
  gen.write(record);
  gen.close();

  GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(null);
  DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(
    new SeekableByteArrayInput(baos.toByteArray()), reader);
  Assert.assertTrue(dataFileReader.hasNext());
  GenericRecord readRecord = dataFileReader.next();

  Assert.assertEquals("hari", readRecord.get("name").toString());
  Assert.assertEquals(3100, readRecord.get("age"));
  Assert.assertFalse(dataFileReader.hasNext());
}
 
Example 28
Source Project: kite   Source File: AvroEntitySerDe.java    License: Apache License 2.0 5 votes vote down vote up
private DatumReader<Object> buildDatumReader(Schema schema,
    Schema writtenSchema) {
  if (specific) {
    return new SpecificDatumReader<Object>(writtenSchema, schema);
  } else {
    return new GenericDatumReader<Object>(writtenSchema, schema);
  }
}
 
Example 29
Source Project: components   Source File: Person.java    License: Apache License 2.0 5 votes vote down vote up
public static Person desFromAvroBytes(byte[] record) throws IOException {
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
    BinaryDecoder decoder = null;
    decoder = DecoderFactory.get().binaryDecoder(record, decoder);
    GenericRecord avroValue = datumReader.read(null, decoder);
    return fromAvroRecord(avroValue);
}
 
Example 30
Source Project: incubator-pinot   Source File: ClusterTest.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void init(Map<String, String> props, Set<String> fieldsToRead, String topicName)
    throws Exception {
  // Load Avro schema
  try (DataFileStream<GenericRecord> reader = AvroUtils.getAvroReader(avroFile)) {
    _avroSchema = reader.getSchema();
  }
  _recordExtractor = new AvroRecordExtractor();
  _recordExtractor.init(fieldsToRead, null);
  _reader = new GenericDatumReader<>(_avroSchema);
}