org.apache.avro.io.DatumReader Java Examples
The following examples show how to use
org.apache.avro.io.DatumReader.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestSelectHive3QL.java From nifi with Apache License 2.0 | 6 votes |
private long getNumberOfRecordsFromStream(InputStream in) throws IOException { final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(); try (DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(in, datumReader)) { GenericRecord record = null; long recordsFromStream = 0; while (dataFileReader.hasNext()) { // Reuse record object by passing it to next(). This saves us from // allocating and garbage collecting many objects for files with // many items. record = dataFileReader.next(record); recordsFromStream += 1; } return recordsFromStream; } }
Example #2
Source File: AvroEntitySerDe.java From kite with Apache License 2.0 | 6 votes |
@Override public Object deserializeColumnValueFromBytes(String fieldName, byte[] bytes) { Field field = avroSchema.getAvroSchema().getField(fieldName); DatumReader<Object> datumReader = fieldDatumReaders.get(fieldName); if (field == null) { throw new ValidationException("Invalid field name " + fieldName + " for schema " + avroSchema.toString()); } if (datumReader == null) { throw new ValidationException("No datum reader for field name: " + fieldName); } ByteArrayInputStream byteIn = new ByteArrayInputStream(bytes); Decoder decoder = getColumnDecoder(field.schema(), byteIn); return AvroUtils.readAvroEntity(decoder, datumReader); }
Example #3
Source File: AvroEntitySerDe.java From kite with Apache License 2.0 | 6 votes |
private void initKACRecordDatumMaps(String fieldName, Schema fieldSchema, Schema writtenFieldSchema) { Map<String, DatumReader<Object>> recordFieldReaderMap = new HashMap<String, DatumReader<Object>>(); Map<String, DatumWriter<Object>> recordFieldWriterMap = new HashMap<String, DatumWriter<Object>>(); kacRecordDatumReaders.put(fieldName, recordFieldReaderMap); kacRecordDatumWriters.put(fieldName, recordFieldWriterMap); for (Field recordField : fieldSchema.getFields()) { Field writtenRecordField = writtenFieldSchema .getField(recordField.name()); if (writtenRecordField == null) { continue; } recordFieldReaderMap.put(recordField.name(), buildDatumReader(recordField.schema(), writtenRecordField.schema())); recordFieldWriterMap.put(recordField.name(), buildDatumWriter(recordField.schema())); } }
Example #4
Source File: AvroInputFormat.java From flink with Apache License 2.0 | 6 votes |
private DataFileReader<E> initReader(FileInputSplit split) throws IOException { DatumReader<E> datumReader; if (org.apache.avro.generic.GenericRecord.class == avroValueType) { datumReader = new GenericDatumReader<E>(); } else { datumReader = org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType) ? new SpecificDatumReader<E>(avroValueType) : new ReflectDatumReader<E>(avroValueType); } if (LOG.isInfoEnabled()) { LOG.info("Opening split {}", split); } SeekableInput in = new FSDataInputStreamWrapper(stream, split.getPath().getFileSystem().getFileStatus(split.getPath()).getLen()); DataFileReader<E> dataFileReader = (DataFileReader) DataFileReader.openReader(in, datumReader); if (LOG.isDebugEnabled()) { LOG.debug("Loaded SCHEMA: {}", dataFileReader.getSchema()); } end = split.getStart() + split.getLength(); recordsReadSinceLastSync = 0; return dataFileReader; }
Example #5
Source File: AvroInputFormat.java From flink with Apache License 2.0 | 6 votes |
private DataFileReader<E> initReader(FileInputSplit split) throws IOException { DatumReader<E> datumReader; if (org.apache.avro.generic.GenericRecord.class == avroValueType) { datumReader = new GenericDatumReader<E>(); } else { datumReader = org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType) ? new SpecificDatumReader<E>(avroValueType) : new ReflectDatumReader<E>(avroValueType); } if (LOG.isInfoEnabled()) { LOG.info("Opening split {}", split); } SeekableInput in = new FSDataInputStreamWrapper(stream, split.getPath().getFileSystem().getFileStatus(split.getPath()).getLen()); DataFileReader<E> dataFileReader = (DataFileReader) DataFileReader.openReader(in, datumReader); if (LOG.isDebugEnabled()) { LOG.debug("Loaded SCHEMA: {}", dataFileReader.getSchema()); } end = split.getStart() + split.getLength(); recordsReadSinceLastSync = 0; return dataFileReader; }
Example #6
Source File: PentahoAvroInputFormat.java From pentaho-hadoop-shims with Apache License 2.0 | 6 votes |
private DataFileStream<GenericRecord> createDataFileStream() throws Exception { DatumReader<GenericRecord> datumReader; if ( useFieldAsInputStream ) { datumReader = new GenericDatumReader<GenericRecord>(); inputStream.reset(); return new DataFileStream<GenericRecord>( inputStream, datumReader ); } if ( schemaFileName != null && schemaFileName.length() > 0 ) { Schema schema = new Schema.Parser().parse( KettleVFS.getInputStream( schemaFileName, variableSpace ) ); datumReader = new GenericDatumReader<GenericRecord>( schema ); } else { datumReader = new GenericDatumReader<GenericRecord>(); } FileObject fileObject = KettleVFS.getFileObject( fileName, variableSpace ); if ( fileObject.isFile() ) { this.inputStream = fileObject.getContent().getInputStream(); return new DataFileStream<>( inputStream, datumReader ); } else { FileObject[] avroFiles = fileObject.findFiles( new FileExtensionSelector( "avro" ) ); if ( !Utils.isEmpty( avroFiles ) ) { this.inputStream = avroFiles[ 0 ].getContent().getInputStream(); return new DataFileStream<>( inputStream, datumReader ); } return null; } }
Example #7
Source File: AvroStreamingFileSinkITCase.java From flink with Apache License 2.0 | 6 votes |
private static <T> void validateResults(File folder, DatumReader<T> datumReader, List<T> expected) throws Exception { File[] buckets = folder.listFiles(); assertNotNull(buckets); assertEquals(1, buckets.length); File[] partFiles = buckets[0].listFiles(); assertNotNull(partFiles); assertEquals(2, partFiles.length); for (File partFile : partFiles) { assertTrue(partFile.length() > 0); final List<T> fileContent = readAvroFile(partFile, datumReader); assertEquals(expected, fileContent); } }
Example #8
Source File: QueryDatabaseTableTest.java From nifi with Apache License 2.0 | 6 votes |
private long getNumberOfRecordsFromStream(InputStream in) throws IOException { final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(); try (DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(in, datumReader)) { GenericRecord record = null; long recordsFromStream = 0; while (dataFileReader.hasNext()) { // Reuse record object by passing it to next(). This saves us from // allocating and garbage collecting many objects for files with // many items. record = dataFileReader.next(record); recordsFromStream += 1; } return recordsFromStream; } }
Example #9
Source File: KafkaAvroMessageDecoder.java From HiveKa with Apache License 2.0 | 6 votes |
public AvroGenericRecordWritable decode(byte[] payload) { try { MessageDecoderHelper helper = new MessageDecoderHelper(registry, topicName, payload).invoke(); DatumReader<Record> reader = new GenericDatumReader<Record>(helper.getTargetSchema()); log.debug("Trying to read kafka payload"); log.debug("buffer: " + helper.getBuffer()); log.debug("start: " + helper.getStart()); log.debug("length: " + helper.getLength()); log.debug("target schema: " + helper.getTargetSchema()); log.debug("schema: " + helper.getSchema()); GenericRecord record = reader.read(null, decoderFactory.binaryDecoder(helper.getBuffer().array(), helper.getStart(), helper.getLength(), null)); log.debug("Read kafka payload as " + record); AvroGenericRecordWritable grw = new AvroGenericRecordWritable(record); grw.setFileSchema(latestSchema); return grw; } catch (IOException e) { throw new MessageDecoderException(e); } }
Example #10
Source File: TestFlumeEventAvroEventSerializer.java From mt-flume with Apache License 2.0 | 6 votes |
public void validateAvroFile(File file) throws IOException { // read the events back using GenericRecord DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(); DataFileReader<GenericRecord> fileReader = new DataFileReader<GenericRecord>(file, reader); GenericRecord record = new GenericData.Record(fileReader.getSchema()); int numEvents = 0; while (fileReader.hasNext()) { fileReader.next(record); ByteBuffer body = (ByteBuffer) record.get("body"); CharsetDecoder decoder = Charsets.UTF_8.newDecoder(); String bodyStr = decoder.decode(body).toString(); System.out.println(bodyStr); numEvents++; } fileReader.close(); Assert.assertEquals("Should have found a total of 3 events", 3, numEvents); }
Example #11
Source File: Purge.java From Cubert with Apache License 2.0 | 6 votes |
private DataFileReader<GenericRecord> createDataFileReader(String filename, boolean localFS) throws IOException { DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(); DataFileReader<GenericRecord> dataFileReader; if (localFS) { dataFileReader = new DataFileReader<GenericRecord>(new File(filename), datumReader); } else { Path path = new Path(filename); SeekableInput input = new FsInput(path, conf); dataFileReader = new DataFileReader<GenericRecord>(input, datumReader); } return dataFileReader; }
Example #12
Source File: ReadActivityFile.java From big-data-lite with MIT License | 6 votes |
/** *Reads the avro file * @throws IOException */ private void readFile() throws IOException { // Deserialize Activities from disk File file = new File(filename); DatumReader<Activity> activityDatumReader = new SpecificDatumReader<Activity>(Activity.class); DataFileReader<Activity> dataFileReader = new DataFileReader<Activity>(file, activityDatumReader); Activity activity = null; int i = 0; while (dataFileReader.hasNext() && i < numrecs) { i++; activity = dataFileReader.next(activity); System.out.println(activity); } }
Example #13
Source File: Converter.java From xml-avro with Apache License 2.0 | 6 votes |
public static void avroToXml(File avroFile, File xmlFile) throws IOException { DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(protocol.getType("Element")); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(avroFile, datumReader); GenericRecord record = dataFileReader.next(); Document doc; try { doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument(); } catch (ParserConfigurationException e) { throw new RuntimeException(e); } Element el = unwrapElement(record, doc); doc.appendChild(el); saveDocument(doc, xmlFile); }
Example #14
Source File: WholeFileTransformerProcessor.java From datacollector with Apache License 2.0 | 5 votes |
/** * Return the Avro file reader * @param is the {@link java.io.InputStream} input stream of the source Avro file * @param sourceFileName the source Avro file name */ private DataFileStream<GenericRecord> getFileReader(InputStream is, String sourceFileName) throws StageException { try { DatumReader<GenericRecord> reader = new GenericDatumReader<>(); DataFileStream<GenericRecord> fileReader = new DataFileStream<>(is, reader); return fileReader; } catch (IOException ex) { throw new TransformerStageCheckedException(Errors.CONVERT_11, sourceFileName, ex); } }
Example #15
Source File: AvroFileReader.java From streamx with Apache License 2.0 | 5 votes |
@Override public Collection<Object> readData(Configuration conf, Path path) throws IOException { ArrayList<Object> collection = new ArrayList<>(); SeekableInput input = new FsInput(path, conf); DatumReader<Object> reader = new GenericDatumReader<>(); FileReader<Object> fileReader = DataFileReader.openReader(input, reader); for (Object object: fileReader) { collection.add(object); } fileReader.close(); return collection; }
Example #16
Source File: AvroIterable.java From iceberg with Apache License 2.0 | 5 votes |
AvroIterable(InputFile file, DatumReader<D> reader, Long start, Long length, boolean reuseContainers) { this.file = file; this.reader = reader; this.start = start; this.end = start != null ? start + length : null; this.reuseContainers = reuseContainers; }
Example #17
Source File: AvroUtils.java From Cubert with Apache License 2.0 | 5 votes |
public static Schema getSchema(SeekableInput input) throws IOException { DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(input, datumReader); Schema schema = dataFileReader.getSchema(); if (PadDefaultNullsToSchema) { // a list of "cloned" fields, with optional default value set to null ArrayList<Field> paddedFields = new ArrayList<Field>(); for (Field field: schema.getFields()) { // should this field be padded? boolean needsNullPadding = (field.schema() != null) // the field has nested schema && (field.schema().getType().equals(Type.UNION)) // the nested schema is UNION && (field.schema().getTypes().get(0).getType().equals(Type.NULL)); // the first element of union is NULL type JsonNode defValue = needsNullPadding ? NullNode.getInstance() : field.defaultValue(); Field f = new Field(field.name(), field.schema(), field.doc(), defValue); paddedFields.add(f); } schema = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.isError()); schema.setFields(paddedFields); } return schema; }
Example #18
Source File: TestConvertAvroToORC.java From nifi with Apache License 2.0 | 5 votes |
@Test public void test_onTrigger_routing_to_failure_fixed_type() throws Exception { String testString = "Hello!"; GenericData.Record record = TestNiFiOrcUtils.buildAvroRecordWithFixed(testString); DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema()); DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer); ByteArrayOutputStream out = new ByteArrayOutputStream(); fileWriter.create(record.getSchema(), out); fileWriter.append(record); fileWriter.flush(); fileWriter.close(); out.close(); Map<String, String> attributes = new HashMap<String, String>() {{ put(CoreAttributes.FILENAME.key(), "test.avro"); }}; runner.enqueue(out.toByteArray(), attributes); runner.run(); runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_FAILURE, 1); MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_FAILURE).get(0); assertEquals("test.avro", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); final InputStream in = new ByteArrayInputStream(resultFlowFile.toByteArray()); final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(); try (DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(in, datumReader)) { assertTrue(dataFileReader.hasNext()); GenericRecord testedRecord = dataFileReader.next(); assertNotNull(testedRecord.get("fixed")); assertArrayEquals(testString.getBytes(StandardCharsets.UTF_8), ((GenericData.Fixed) testedRecord.get("fixed")).bytes()); } }
Example #19
Source File: TestDataModelUtil.java From kite with Apache License 2.0 | 5 votes |
@Test public void testGetDatumReaderForGenericType() { Class<GenericData.Record> type = GenericData.Record.class; Schema writerSchema = StandardEvent.getClassSchema(); DatumReader result = DataModelUtil.getDatumReaderForType(type, writerSchema); assertEquals(GenericDatumReader.class, result.getClass()); }
Example #20
Source File: AvroReflectSerialization.java From big-c with Apache License 2.0 | 5 votes |
@InterfaceAudience.Private @Override public DatumReader getReader(Class<Object> clazz) { try { return new ReflectDatumReader(clazz); } catch (Exception e) { throw new RuntimeException(e); } }
Example #21
Source File: RegistrylessAvroConverter.java From registryless-avro-converter with Apache License 2.0 | 5 votes |
@Override public SchemaAndValue toConnectData(String topic, byte[] value) { DatumReader<GenericRecord> datumReader; if (avroSchema != null) { datumReader = new GenericDatumReader<>(avroSchema); } else { datumReader = new GenericDatumReader<>(); } GenericRecord instance = null; try ( SeekableByteArrayInput sbai = new SeekableByteArrayInput(value); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(sbai, datumReader); ) { instance = dataFileReader.next(instance); if (instance == null) { logger.warn("Instance was null"); } if (avroSchema != null) { return avroDataHelper.toConnectData(avroSchema, instance); } else { return avroDataHelper.toConnectData(instance.getSchema(), instance); } } catch (IOException ioe) { throw new DataException("Failed to deserialize Avro data from topic %s :".format(topic), ioe); } }
Example #22
Source File: AvroFileReaderWriterFactory.java From secor with Apache License 2.0 | 5 votes |
public AvroFileReader(LogFilePath logFilePath, CompressionCodec codec) throws IOException { file = new File(logFilePath.getLogFilePath()); file.getParentFile().mkdirs(); topic = logFilePath.getTopic(); Schema schema = schemaRegistry.getSchema(topic); DatumReader datumReader = new SpecificDatumReader(schema); try { reader = new DataFileReader(file, datumReader); } catch (IOException e) { throw new RuntimeException(e); } offset = logFilePath.getOffset(); }
Example #23
Source File: FastStringableTest.java From avro-util with BSD 2-Clause "Simplified" License | 5 votes |
private <T> T readWithSlowAvro(Schema readerSchema, Schema writerSchema, Decoder decoder, boolean specific) { DatumReader<T> datumReader; if (specific) { datumReader = new SpecificDatumReader<>(writerSchema, readerSchema); } else { datumReader = new GenericDatumReader<>(writerSchema, readerSchema); } try { return datumReader.read(null, decoder); } catch (IOException e) { e.printStackTrace(); return null; } }
Example #24
Source File: AvroCodec.java From schema-evolution-samples with Apache License 2.0 | 5 votes |
private DatumReader getDatumReader(Class<?> type, Schema writer){ DatumReader reader = null; if(SpecificRecord.class.isAssignableFrom(type)){ reader = new SpecificDatumReader<>(writer,getReaderSchema(writer)); } else if(GenericRecord.class.isAssignableFrom(type)){ reader = new GenericDatumReader<>(writer,getReaderSchema(writer)); }else{ reader = new ReflectDatumReader<>(writer,getReaderSchema(writer)); } return reader; }
Example #25
Source File: AvroDataFileParser.java From datacollector with Apache License 2.0 | 5 votes |
public AvroDataFileParser(ProtoConfigurableEntity.Context context, Schema schema, File file, String readerOffset, int maxObjectLength, boolean skipUnionIndexes) throws IOException { this.context = context; this.file = file; this.skipUnionIndexes = skipUnionIndexes; DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(schema, schema, GenericData.get()); sin = new SeekableOverrunFileInputStream( new FileInputStream(file), maxObjectLength, true); dataFileReader = new DataFileReader<>(sin, datumReader); if(readerOffset != null && !readerOffset.isEmpty() && !"0".equals(readerOffset)) { String[] split = readerOffset.split(OFFSET_SEPARATOR); if(split.length == 3) { //split[0] is the file name previousSync = Long.parseLong(split[1]); recordCount = Long.parseLong(split[2]); seekToOffset(); } else if (split.length == 2) { previousSync = Long.parseLong(split[0]); recordCount = Long.parseLong(split[1]); seekToOffset(); } else { throw new IllegalArgumentException(Utils.format("Invalid offset {}", readerOffset)); } } else { recordCount = 0; previousSync = dataFileReader.previousSync(); } }
Example #26
Source File: AvroRecordInputFormatTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * This test validates proper serialization with specific (generated POJO) types. */ @Test public void testDeserializeToSpecificType() throws IOException { DatumReader<User> datumReader = new SpecificDatumReader<>(userSchema); try (FileReader<User> dataFileReader = DataFileReader.openReader(testFile, datumReader)) { User rec = dataFileReader.next(); // check if record has been read correctly assertNotNull(rec); assertEquals("name not equal", TEST_NAME, rec.get("name").toString()); assertEquals("enum not equal", TEST_ENUM_COLOR.toString(), rec.get("type_enum").toString()); // now serialize it with our framework: ExecutionConfig ec = new ExecutionConfig(); TypeInformation<User> te = TypeExtractor.createTypeInfo(User.class); assertEquals(AvroTypeInfo.class, te.getClass()); TypeSerializer<User> tser = te.createSerializer(ec); ByteArrayOutputStream out = new ByteArrayOutputStream(); try (DataOutputViewStreamWrapper outView = new DataOutputViewStreamWrapper(out)) { tser.serialize(rec, outView); } User newRec; try (DataInputViewStreamWrapper inView = new DataInputViewStreamWrapper( new ByteArrayInputStream(out.toByteArray()))) { newRec = tser.deserialize(inView); } // check if it is still the same assertNotNull(newRec); assertEquals("name not equal", TEST_NAME, newRec.getName().toString()); assertEquals("enum not equal", TEST_ENUM_COLOR.toString(), newRec.getTypeEnum().toString()); } }
Example #27
Source File: AvroDeSerealizer.java From tutorials with MIT License | 5 votes |
public AvroHttpRequest deSerealizeAvroHttpRequestJSON(byte[] data) { DatumReader<AvroHttpRequest> reader = new SpecificDatumReader<>(AvroHttpRequest.class); Decoder decoder = null; try { decoder = DecoderFactory.get() .jsonDecoder(AvroHttpRequest.getClassSchema(), new String(data)); return reader.read(null, decoder); } catch (IOException e) { logger.error("Deserialization error" + e.getMessage()); } return null; }
Example #28
Source File: GoogleCloudPubSubFlusherTest.java From divolte-collector with Apache License 2.0 | 5 votes |
@Test public void testMessageBodyIsNakedAvroRecord() throws IOException { processSingleMessage(); final PubsubMessage deliveredMessage = getFirstPublishedMessage(); final ByteString body = deliveredMessage.getData(); final DatumReader<GenericRecord> reader = new GenericDatumReader<>(MINIMAL_SCHEMA); final Decoder decoder = DecoderFactory.get().binaryDecoder(body.newInput(), null); final GenericRecord record = reader.read(null, decoder); assertEquals(partyId.orElseThrow(IllegalStateException::new).toString(), record.get("partyId").toString()); assertEquals(sessionId.orElseThrow(IllegalStateException::new).toString(), record.get("sessionId").toString()); assertEquals(0L, record.get("counter")); }
Example #29
Source File: AvroUtilities.java From pxf with Apache License 2.0 | 5 votes |
private static Schema readSchemaFromAvroDataSource(Configuration configuration, String dataSource) throws IOException { DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(); FsInput inStream = new FsInput(new Path(dataSource), configuration); try (DataFileReader<GenericRecord> fileReader = new DataFileReader<>(inStream, datumReader)) { return fileReader.getSchema(); } }
Example #30
Source File: TestAvroExtractor.java From incubator-gobblin with Apache License 2.0 | 5 votes |
public static List<GenericRecord> getRecordFromFile(String path) throws IOException { Configuration config = new Configuration(); SeekableInput input = new FsInput(new Path(path), config); DatumReader<GenericRecord> reader1 = new GenericDatumReader<>(); FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader1); List<GenericRecord> records = new ArrayList<>(); for (GenericRecord datum : fileReader) { records.add(datum); } fileReader.close(); return records; }