org.apache.avro.file.DataFileReader Java Examples
The following examples show how to use
org.apache.avro.file.DataFileReader.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: LobAvroImportTestCase.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 6 votes |
/** Import blob data that is smaller than inline lob limit. Blob data * should be saved as Avro bytes. * @throws IOException * @throws SQLException */ public void testBlobAvroImportInline() throws IOException, SQLException { String [] types = { getBlobType() }; String expectedVal = "This is short BLOB data"; String [] vals = { getBlobInsertStr(expectedVal) }; createTableWithColTypes(types, vals); runImport(getArgv()); Path outputFile = new Path(getTablePath(), "part-m-00000.avro"); DataFileReader<GenericRecord> reader = read(outputFile); GenericRecord record = reader.next(); // Verify that blob data is imported as Avro bytes. ByteBuffer buf = (ByteBuffer) record.get(getColName(0)); String returnVal = new String(buf.array()); assertEquals(getColName(0), expectedVal, returnVal); }
Example #2
Source File: AvroInputFormat.java From flink with Apache License 2.0 | 6 votes |
private DataFileReader<E> initReader(FileInputSplit split) throws IOException { DatumReader<E> datumReader; if (org.apache.avro.generic.GenericRecord.class == avroValueType) { datumReader = new GenericDatumReader<E>(); } else { datumReader = org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType) ? new SpecificDatumReader<E>(avroValueType) : new ReflectDatumReader<E>(avroValueType); } if (LOG.isInfoEnabled()) { LOG.info("Opening split {}", split); } SeekableInput in = new FSDataInputStreamWrapper(stream, split.getPath().getFileSystem().getFileStatus(split.getPath()).getLen()); DataFileReader<E> dataFileReader = (DataFileReader) DataFileReader.openReader(in, datumReader); if (LOG.isDebugEnabled()) { LOG.debug("Loaded SCHEMA: {}", dataFileReader.getSchema()); } end = split.getStart() + split.getLength(); recordsReadSinceLastSync = 0; return dataFileReader; }
Example #3
Source File: AvroOutputFormatTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testGenericRecord() throws IOException { final Path outputPath = new Path(File.createTempFile("avro-output-file", "generic.avro").getAbsolutePath()); final AvroOutputFormat<GenericRecord> outputFormat = new AvroOutputFormat<>(outputPath, GenericRecord.class); Schema schema = new Schema.Parser().parse("{\"type\":\"record\", \"name\":\"user\", \"fields\": [{\"name\":\"user_name\", \"type\":\"string\"}, {\"name\":\"favorite_number\", \"type\":\"int\"}, {\"name\":\"favorite_color\", \"type\":\"string\"}]}"); outputFormat.setWriteMode(FileSystem.WriteMode.OVERWRITE); outputFormat.setSchema(schema); output(outputFormat, schema); GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(new File(outputPath.getPath()), reader); while (dataFileReader.hasNext()) { GenericRecord record = dataFileReader.next(); assertEquals(record.get("user_name").toString(), "testUser"); assertEquals(record.get("favorite_number"), 1); assertEquals(record.get("favorite_color").toString(), "blue"); } //cleanup FileSystem fs = FileSystem.getLocalFileSystem(); fs.delete(outputPath, false); }
Example #4
Source File: Purge.java From Cubert with Apache License 2.0 | 6 votes |
private DataFileReader<GenericRecord> createDataFileReader(String filename, boolean localFS) throws IOException { DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(); DataFileReader<GenericRecord> dataFileReader; if (localFS) { dataFileReader = new DataFileReader<GenericRecord>(new File(filename), datumReader); } else { Path path = new Path(filename); SeekableInput input = new FsInput(path, conf); dataFileReader = new DataFileReader<GenericRecord>(input, datumReader); } return dataFileReader; }
Example #5
Source File: AvroOutputFormatTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testGenericRecord() throws IOException { final Path outputPath = new Path(File.createTempFile("avro-output-file", "generic.avro").getAbsolutePath()); final AvroOutputFormat<GenericRecord> outputFormat = new AvroOutputFormat<>(outputPath, GenericRecord.class); Schema schema = new Schema.Parser().parse("{\"type\":\"record\", \"name\":\"user\", \"fields\": [{\"name\":\"user_name\", \"type\":\"string\"}, {\"name\":\"favorite_number\", \"type\":\"int\"}, {\"name\":\"favorite_color\", \"type\":\"string\"}]}"); outputFormat.setWriteMode(FileSystem.WriteMode.OVERWRITE); outputFormat.setSchema(schema); output(outputFormat, schema); GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(new File(outputPath.getPath()), reader); while (dataFileReader.hasNext()) { GenericRecord record = dataFileReader.next(); assertEquals(record.get("user_name").toString(), "testUser"); assertEquals(record.get("favorite_number"), 1); assertEquals(record.get("favorite_color").toString(), "blue"); } //cleanup FileSystem fs = FileSystem.getLocalFileSystem(); fs.delete(outputPath, false); }
Example #6
Source File: AvroInputFormat.java From flink with Apache License 2.0 | 6 votes |
private DataFileReader<E> initReader(FileInputSplit split) throws IOException { DatumReader<E> datumReader; if (org.apache.avro.generic.GenericRecord.class == avroValueType) { datumReader = new GenericDatumReader<E>(); } else { datumReader = org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType) ? new SpecificDatumReader<E>(avroValueType) : new ReflectDatumReader<E>(avroValueType); } if (LOG.isInfoEnabled()) { LOG.info("Opening split {}", split); } SeekableInput in = new FSDataInputStreamWrapper(stream, split.getPath().getFileSystem().getFileStatus(split.getPath()).getLen()); DataFileReader<E> dataFileReader = (DataFileReader) DataFileReader.openReader(in, datumReader); if (LOG.isDebugEnabled()) { LOG.debug("Loaded SCHEMA: {}", dataFileReader.getSchema()); } end = split.getStart() + split.getLength(); recordsReadSinceLastSync = 0; return dataFileReader; }
Example #7
Source File: AvroToOrcRecordConverter.java From datacollector with Apache License 2.0 | 6 votes |
public void convert(SeekableInput avroInputFile, Path orcOutputFile) throws IOException { DatumReader<GenericRecord> reader = new GenericDatumReader<>(); try (FileReader<GenericRecord> fileReader = DataFileReader.openReader(avroInputFile, reader)) { Schema avroSchema = fileReader.getSchema(); initializeWriter(avroSchema, orcOutputFile); while (fileReader.hasNext()) { GenericRecord record = fileReader.next(); addAvroRecord(record); } closeWriter(); } }
Example #8
Source File: AvroOutputFormatTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testGenericRecord() throws IOException { final Path outputPath = new Path(File.createTempFile("avro-output-file", "generic.avro").getAbsolutePath()); final AvroOutputFormat<GenericRecord> outputFormat = new AvroOutputFormat<>(outputPath, GenericRecord.class); Schema schema = new Schema.Parser().parse("{\"type\":\"record\", \"name\":\"user\", \"fields\": [{\"name\":\"user_name\", \"type\":\"string\"}, {\"name\":\"favorite_number\", \"type\":\"int\"}, {\"name\":\"favorite_color\", \"type\":\"string\"}]}"); outputFormat.setWriteMode(FileSystem.WriteMode.OVERWRITE); outputFormat.setSchema(schema); output(outputFormat, schema); GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(new File(outputPath.getPath()), reader); while (dataFileReader.hasNext()) { GenericRecord record = dataFileReader.next(); assertEquals(record.get("user_name").toString(), "testUser"); assertEquals(record.get("favorite_number"), 1); assertEquals(record.get("favorite_color").toString(), "blue"); } //cleanup FileSystem fs = FileSystem.getLocalFileSystem(); fs.delete(outputPath, false); }
Example #9
Source File: AvroUtils.java From incubator-gobblin with Apache License 2.0 | 6 votes |
/** * Get the latest avro schema for a directory * @param directory the input dir that contains avro files * @param fs the {@link FileSystem} for the given directory. * @param latest true to return latest schema, false to return oldest schema * @return the latest/oldest schema in the directory * @throws IOException */ public static Schema getDirectorySchema(Path directory, FileSystem fs, boolean latest) throws IOException { Schema schema = null; try (Closer closer = Closer.create()) { List<FileStatus> files = getDirectorySchemaHelper(directory, fs); if (files == null || files.size() == 0) { LOG.warn("There is no previous avro file in the directory: " + directory); } else { FileStatus file = latest ? files.get(0) : files.get(files.size() - 1); LOG.debug("Path to get the avro schema: " + file); FsInput fi = new FsInput(file.getPath(), fs.getConf()); GenericDatumReader<GenericRecord> genReader = new GenericDatumReader<>(); schema = closer.register(new DataFileReader<>(fi, genReader)).getSchema(); } } catch (IOException ioe) { throw new IOException("Cannot get the schema for directory " + directory, ioe); } return schema; }
Example #10
Source File: TestAvroEventSerializer.java From mt-flume with Apache License 2.0 | 6 votes |
public void validateAvroFile(File file) throws IOException { // read the events back using GenericRecord DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(); DataFileReader<GenericRecord> fileReader = new DataFileReader<GenericRecord>(file, reader); GenericRecord record = new GenericData.Record(fileReader.getSchema()); int numEvents = 0; while (fileReader.hasNext()) { fileReader.next(record); String bodyStr = record.get("message").toString(); System.out.println(bodyStr); numEvents++; } fileReader.close(); Assert.assertEquals("Should have found a total of 3 events", 3, numEvents); }
Example #11
Source File: FileSystemDatasetReader.java From kite with Apache License 2.0 | 6 votes |
@Override public void initialize() { Preconditions.checkState(state.equals(ReaderWriterState.NEW), "A reader may not be opened more than once - current state:%s", state); LOG.debug("Opening reader on path:{}", path); try { reader = new DataFileReader<E>(new AvroFSInput(fileSystem.open(path), fileSystem.getFileStatus(path).getLen()), DataModelUtil.getDatumReaderForType(type, schema)); } catch (IOException e) { throw new DatasetIOException("Unable to create reader path:" + path, e); } state = ReaderWriterState.OPEN; }
Example #12
Source File: AvroMorphlineTest.java From kite with Apache License 2.0 | 6 votes |
private void runTweetContainer(String morphlineConfigFile, String[] fieldNames) throws Exception { File file = new File(RESOURCES_DIR + "/test-documents/sample-statuses-20120906-141433-medium.avro"); morphline = createMorphline(morphlineConfigFile); for (int j = 0; j < 3; j++) { // also test reuse of objects and low level avro buffers Record record = new Record(); byte[] body = Files.toByteArray(file); record.put(Fields.ATTACHMENT_BODY, body); collector.reset(); startSession(); Notifications.notifyBeginTransaction(morphline); assertTrue(morphline.process(record)); assertEquals(1, collector.getNumStartEvents()); assertEquals(2104, collector.getRecords().size()); FileReader<GenericData.Record> reader = new DataFileReader(file, new GenericDatumReader()); int i = 0; while (reader.hasNext()) { Record actual = collector.getRecords().get(i); GenericData.Record expected = reader.next(); assertTweetEquals(expected, actual, fieldNames, i); i++; } assertEquals(collector.getRecords().size(), i); } }
Example #13
Source File: AvroToRestJsonEntryConverterTest.java From incubator-gobblin with Apache License 2.0 | 6 votes |
private void testConversion(RestEntry<JsonObject> expected, WorkUnitState actualWorkUnitState) throws DataConversionException, IOException, JSONException { Schema schema = new Schema.Parser().parse(getClass().getResourceAsStream("/converter/nested.avsc")); GenericDatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema); File tmp = File.createTempFile(this.getClass().getSimpleName(), null); tmp.deleteOnExit(); try { FileUtils.copyInputStreamToFile(getClass().getResourceAsStream("/converter/nested.avro"), tmp); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(tmp, datumReader); GenericRecord avroRecord = dataFileReader.next(); AvroToRestJsonEntryConverter converter = new AvroToRestJsonEntryConverter(); RestEntry<JsonObject> actual = converter.convertRecord(null, avroRecord, actualWorkUnitState).iterator().next(); Assert.assertEquals(actual.getResourcePath(), expected.getResourcePath()); JSONAssert.assertEquals(expected.getRestEntryVal().toString(), actual.getRestEntryVal().toString(), false); converter.close(); dataFileReader.close(); } finally { if (tmp != null) { tmp.delete(); } } }
Example #14
Source File: Display.java From hadoop with Apache License 2.0 | 6 votes |
public AvroFileInputStream(FileStatus status) throws IOException { pos = 0; buffer = new byte[0]; GenericDatumReader<Object> reader = new GenericDatumReader<Object>(); FileContext fc = FileContext.getFileContext(new Configuration()); fileReader = DataFileReader.openReader(new AvroFSInput(fc, status.getPath()),reader); Schema schema = fileReader.getSchema(); writer = new GenericDatumWriter<Object>(schema); output = new ByteArrayOutputStream(); JsonGenerator generator = new JsonFactory().createJsonGenerator(output, JsonEncoding.UTF8); MinimalPrettyPrinter prettyPrinter = new MinimalPrettyPrinter(); prettyPrinter.setRootValueSeparator(System.getProperty("line.separator")); generator.setPrettyPrinter(prettyPrinter); encoder = EncoderFactory.get().jsonEncoder(schema, generator); }
Example #15
Source File: Converter.java From xml-avro with Apache License 2.0 | 6 votes |
public static void avroToXml(File avroFile, File xmlFile) throws IOException { DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(protocol.getType("Element")); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(avroFile, datumReader); GenericRecord record = dataFileReader.next(); Document doc; try { doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument(); } catch (ParserConfigurationException e) { throw new RuntimeException(e); } Element el = unwrapElement(record, doc); doc.appendChild(el); saveDocument(doc, xmlFile); }
Example #16
Source File: TestAvroImport.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 6 votes |
public void testFirstUnderscoreInColumnName() throws IOException { String [] names = { "_NAME" }; String [] types = { "INT" }; String [] vals = { "1987" }; createTableWithColTypesAndNames(names, types, vals); runImport(getOutputArgv(true, null)); Path outputFile = new Path(getTablePath(), "part-m-00000.avro"); DataFileReader<GenericRecord> reader = read(outputFile); Schema schema = reader.getSchema(); assertEquals(Schema.Type.RECORD, schema.getType()); List<Field> fields = schema.getFields(); assertEquals(types.length, fields.size()); checkField(fields.get(0), "__NAME", Type.INT); GenericRecord record1 = reader.next(); assertEquals("__NAME", 1987, record1.get("__NAME")); }
Example #17
Source File: TestAvroImport.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 6 votes |
public void testOverrideTypeMapping() throws IOException { String [] types = { "INT" }; String [] vals = { "10" }; createTableWithColTypes(types, vals); String [] extraArgs = { "--map-column-java", "DATA_COL0=String"}; runImport(getOutputArgv(true, extraArgs)); Path outputFile = new Path(getTablePath(), "part-m-00000.avro"); DataFileReader<GenericRecord> reader = read(outputFile); Schema schema = reader.getSchema(); assertEquals(Schema.Type.RECORD, schema.getType()); List<Field> fields = schema.getFields(); assertEquals(types.length, fields.size()); checkField(fields.get(0), "DATA_COL0", Schema.Type.STRING); GenericRecord record1 = reader.next(); assertEquals("DATA_COL0", new Utf8("10"), record1.get("DATA_COL0")); }
Example #18
Source File: AvroInputFormat.java From stratosphere with Apache License 2.0 | 6 votes |
@Override public void open(FileInputSplit split) throws IOException { super.open(split); DatumReader<E> datumReader; if (org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)) { datumReader = new SpecificDatumReader<E>(avroValueType); } else { datumReader = new ReflectDatumReader<E>(avroValueType); } LOG.info("Opening split " + split); SeekableInput in = new FSDataInputStreamWrapper(stream, (int) split.getLength()); dataFileReader = DataFileReader.openReader(in, datumReader); dataFileReader.sync(split.getStart()); }
Example #19
Source File: AvroScanner.java From tajo with Apache License 2.0 | 6 votes |
/** * Initializes the AvroScanner. */ @Override public void init() throws IOException { if (targets == null) { targets = schema.toArray(); } prepareProjection(targets); outTuple = new VTuple(projectionMap.length); Schema avroSchema = AvroUtil.getAvroSchema(meta, conf); avroFields = avroSchema.getFields(); DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(avroSchema); SeekableInput input = new FsInput(fragment.getPath(), conf); dataFileReader = new DataFileReader<>(input, datumReader); super.init(); }
Example #20
Source File: AvroToDdlTool.java From DataflowTemplates with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws IOException { if (args.length == 0) { System.out.println("Please specify the avro files"); System.exit(1); } List<Schema> schemaList = new ArrayList<>(); for (String filePath : args) { DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(new File(filePath), datumReader); Schema schema = dataFileReader.getSchema(); System.out.println(schema.toString(true)); schemaList.add(schema); } Ddl ddl = new AvroSchemaToDdlConverter().toDdl(schemaList); ddl.prettyPrint(System.out); }
Example #21
Source File: AvroFileReader.java From streamx with Apache License 2.0 | 5 votes |
@Override public Schema getSchema(Configuration conf, Path path) throws IOException { SeekableInput input = new FsInput(path, conf); DatumReader<Object> reader = new GenericDatumReader<>(); FileReader<Object> fileReader = DataFileReader.openReader(input, reader); org.apache.avro.Schema schema = fileReader.getSchema(); fileReader.close(); return avroData.toConnectSchema(schema); }
Example #22
Source File: Purge.java From Cubert with Apache License 2.0 | 5 votes |
private void purge(String src, String dst) throws IOException { DataFileReader<GenericRecord> dataFileReader = createDataFileReader(src, false); DataFileWriter<GenericRecord> writer = createDataFileWriter(dataFileReader); numRecords = 0; recordsPurged = 0; remainingRecords = 0; // Copy while (dataFileReader.hasNext()) { numRecords++; GenericRecord record = dataFileReader.next(); if (record == null) { continue; } Number column = (Number) record.get(columnName); if ((column == null) || (!membersToPurge.contains(column.intValue()))) { remainingRecords++; writer.append(record); } } recordsPurged = numRecords - remainingRecords; writer.close(); dataFileReader.close(); }
Example #23
Source File: TestMergeContent.java From nifi with Apache License 2.0 | 5 votes |
private Map<String, GenericRecord> getGenericRecordMap(byte[] data, Schema schema, String key) throws IOException { // create a reader for the merged contet DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(schema); SeekableByteArrayInput input = new SeekableByteArrayInput(data); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(input, datumReader); // read all the records into a map to verify all the records are there Map<String,GenericRecord> records = new HashMap<>(); while (dataFileReader.hasNext()) { GenericRecord user = dataFileReader.next(); records.put(user.get(key).toString(), user); } return records; }
Example #24
Source File: FileFlusherLocalHdfsTest.java From divolte-collector with Apache License 2.0 | 5 votes |
private DataFileReader<Record> readAvroFile(final Schema schema, final File file) { final DatumReader<Record> dr = new GenericDatumReader<>(schema); try { return new DataFileReader<>(file, dr); } catch (final IOException e) { throw new UncheckedIOException(e); } }
Example #25
Source File: QueryGenerator.java From incubator-pinot with Apache License 2.0 | 5 votes |
/** * Helper method to read in an Avro file and add data to the storage. * * @param avroFile Avro file. */ private void addAvroData(File avroFile) { // Read in records and update the values stored. GenericDatumReader<GenericRecord> datumReader = new GenericDatumReader<>(); try (DataFileReader<GenericRecord> fileReader = new DataFileReader<>(avroFile, datumReader)) { for (GenericRecord genericRecord : fileReader) { for (String columnName : _columnNames) { Set<String> values = _columnToValueSet.get(columnName); // Turn the Avro value into a valid SQL String token. Object avroValue = genericRecord.get(columnName); if (avroValue != null) { Integer storedMaxNumElements = _multiValueColumnMaxNumElements.get(columnName); if (storedMaxNumElements != null) { // Multi-value column GenericData.Array array = (GenericData.Array) avroValue; int numElements = array.size(); if (storedMaxNumElements < numElements) { _multiValueColumnMaxNumElements.put(columnName, numElements); } for (Object element : array) { storeAvroValueIntoValueSet(values, element); } } else { // Single-value column storeAvroValueIntoValueSet(values, avroValue); } } } } } catch (Exception e) { throw new RuntimeException(e); } }
Example #26
Source File: ProtoGetSchemaTool.java From gcs-tools with Apache License 2.0 | 5 votes |
@Override public int run(InputStream in, PrintStream out, PrintStream err, List<String> args) throws Exception { if (args.size() != 1) { err.println("Expected 1 argument: input_file"); return 1; } DataFileReader<Void> reader = new DataFileReader<>(Util.openSeekableFromFS(args.get(0)), new GenericDatumReader<Void>()); out.println(reader.getMetaString("protobuf.generic.schema")); return 0; }
Example #27
Source File: TimelineMetadataUtils.java From hudi with Apache License 2.0 | 5 votes |
public static <T extends SpecificRecordBase> T deserializeAvroMetadata(byte[] bytes, Class<T> clazz) throws IOException { DatumReader<T> reader = new SpecificDatumReader<>(clazz); FileReader<T> fileReader = DataFileReader.openReader(new SeekableByteArrayInput(bytes), reader); ValidationUtils.checkArgument(fileReader.hasNext(), "Could not deserialize metadata of type " + clazz); return fileReader.next(); }
Example #28
Source File: FakeJobService.java From beam with Apache License 2.0 | 5 votes |
private List<TableRow> readAvroTableRows(String filename, TableSchema tableSchema) throws IOException { List<TableRow> tableRows = Lists.newArrayList(); FileReader<GenericRecord> dfr = DataFileReader.openReader(new File(filename), new GenericDatumReader<>()); while (dfr.hasNext()) { GenericRecord record = dfr.next(null); tableRows.add(BigQueryUtils.convertGenericRecordToTableRow(record, tableSchema)); } return tableRows; }
Example #29
Source File: AvroUtils.java From incubator-gobblin with Apache License 2.0 | 5 votes |
/** * Get Avro schema from an Avro data file. */ public static Schema getSchemaFromDataFile(Path dataFile, FileSystem fs) throws IOException { try (SeekableInput sin = new FsInput(dataFile, fs.getConf()); DataFileReader<GenericRecord> reader = new DataFileReader<>(sin, new GenericDatumReader<GenericRecord>())) { return reader.getSchema(); } }
Example #30
Source File: TestAvroDataGenerator.java From datacollector with Apache License 2.0 | 5 votes |
@Test public void testSchemaInHeader() throws Exception { ByteArrayOutputStream baos = new ByteArrayOutputStream(); DataGenerator gen = new AvroDataOutputStreamGenerator( true, baos, COMPRESSION_CODEC_DEFAULT, null, null, null, null, 0 ); Record record = createRecord(); record.getHeader().setAttribute(BaseAvroDataGenerator.AVRO_SCHEMA_HEADER, AVRO_SCHEMA); gen.write(record); gen.close(); GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(null); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>( new SeekableByteArrayInput(baos.toByteArray()), reader); Assert.assertTrue(dataFileReader.hasNext()); GenericRecord readRecord = dataFileReader.next(); Assert.assertEquals("hari", readRecord.get("name").toString()); Assert.assertEquals(3100, readRecord.get("age")); Assert.assertFalse(dataFileReader.hasNext()); }