org.apache.avro.generic.GenericDatumWriter Java Examples
The following examples show how to use
org.apache.avro.generic.GenericDatumWriter.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JsonUtils.java From localization_nifi with Apache License 2.0 | 8 votes |
/** * Writes provided {@link GenericRecord} into the provided * {@link OutputStream} as JSON. */ public static void write(GenericRecord record, OutputStream out) { try { DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(record.getSchema()); JsonEncoder encoder = EncoderFactory.get().jsonEncoder(record.getSchema(), out); writer.write(record, encoder); encoder.flush(); } catch (Exception e) { throw new IllegalStateException("Failed to read GenericRecord", e); } }
Example #2
Source File: TestConvertAvroToJSON.java From localization_nifi with Apache License 2.0 | 7 votes |
@Test public void testSingleAvroMessage() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ConvertAvroToJSON()); final Schema schema = new Schema.Parser().parse(new File("src/test/resources/user.avsc")); final GenericRecord user1 = new GenericData.Record(schema); user1.put("name", "Alyssa"); user1.put("favorite_number", 256); final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); final ByteArrayOutputStream out1 = AvroTestUtil.serializeAvroRecord(schema, datumWriter, user1); runner.enqueue(out1.toByteArray()); runner.run(); runner.assertAllFlowFilesTransferred(ConvertAvroToJSON.REL_SUCCESS, 1); final MockFlowFile out = runner.getFlowFilesForRelationship(ConvertAvroToJSON.REL_SUCCESS).get(0); out.assertContentEquals("{\"name\": \"Alyssa\", \"favorite_number\": 256, \"favorite_color\": null}"); }
Example #3
Source File: AzureBlobAvroWriter.java From samza with Apache License 2.0 | 7 votes |
@VisibleForTesting byte[] encodeRecord(IndexedRecord record) { ByteArrayOutputStream out = new ByteArrayOutputStream(); Schema schema = record.getSchema(); try { EncoderFactory encoderfactory = new EncoderFactory(); BinaryEncoder encoder = encoderfactory.binaryEncoder(out, null); DatumWriter<IndexedRecord> writer; if (record instanceof SpecificRecord) { writer = new SpecificDatumWriter<>(schema); } else { writer = new GenericDatumWriter<>(schema); } writer.write(record, encoder); encoder.flush(); //encoder may buffer } catch (Exception e) { throw new SamzaException("Unable to serialize Avro record using schema within the record: " + schema.toString(), e); } return out.toByteArray(); }
Example #4
Source File: ParquetReader.java From reef with Apache License 2.0 | 7 votes |
/** * Serialize Avro data to a in-memory ByteBuffer. * @return A ByteBuffer that contains avro data. * @throws IOException if the parquet file couldn't be parsed correctly. */ public ByteBuffer serializeToByteBuffer() throws IOException { final ByteArrayOutputStream stream = new ByteArrayOutputStream(); final Encoder encoder = EncoderFactory.get().binaryEncoder(stream, null); final DatumWriter writer = new GenericDatumWriter<GenericRecord>(); writer.setSchema(createAvroSchema()); final AvroParquetReader<GenericRecord> reader = createAvroReader(); GenericRecord record = reader.read(); while (record != null) { writer.write(record, encoder); record = reader.read(); } try { reader.close(); } catch (IOException ex){ LOG.log(Level.SEVERE, ex.getMessage()); throw ex; } encoder.flush(); final ByteBuffer buf = ByteBuffer.wrap(stream.toByteArray()); buf.order(ByteOrder.LITTLE_ENDIAN); return buf; }
Example #5
Source File: TestConvertAvroToORC.java From nifi with Apache License 2.0 | 6 votes |
@Test public void test_onTrigger_routing_to_failure_null_type() throws Exception { String testString = "Hello World"; GenericData.Record record = TestNiFiOrcUtils.buildAvroRecordWithNull(testString); DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema()); DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer); ByteArrayOutputStream out = new ByteArrayOutputStream(); fileWriter.create(record.getSchema(), out); fileWriter.append(record); fileWriter.flush(); fileWriter.close(); out.close(); Map<String, String> attributes = new HashMap<String, String>() {{ put(CoreAttributes.FILENAME.key(), "test.avro"); }}; runner.enqueue(out.toByteArray(), attributes); runner.run(); runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1); MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0); assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS test_record (string STRING, null BOOLEAN) STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE)); }
Example #6
Source File: TestConvertAvroToJSON.java From nifi with Apache License 2.0 | 6 votes |
@Test public void testZeroRecords() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ConvertAvroToJSON()); final Schema schema = new Schema.Parser().parse(new File("src/test/resources/user.avsc")); final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); final ByteArrayOutputStream out1 = serializeAvroRecord(schema, datumWriter); runner.enqueue(out1.toByteArray()); runner.run(); runner.assertAllFlowFilesTransferred(ConvertAvroToJSON.REL_SUCCESS, 1); final MockFlowFile out = runner.getFlowFilesForRelationship(ConvertAvroToJSON.REL_SUCCESS).get(0); out.assertContentEquals("{}"); }
Example #7
Source File: OracleGenericSchemaDecoder.java From DBus with Apache License 2.0 | 6 votes |
private void initDecoder() { try { genericSchema = OracleGenericSchemaProvider.getInstance().getSchema("generic_wrapper.avsc"); fullPullSchema = OracleGenericSchemaProvider.getInstance().getSchema("DBUS.DB_FULL_PULL_REQUESTS.avsc"); fullPullHash = OracleGenericSchemaProvider.getInstance().getSchemaHash("DBUS.DB_FULL_PULL_REQUESTS.avsc"); syncEventSchema = OracleGenericSchemaProvider.getInstance().getSchema("DBUS.META_SYNC_EVENT.avsc"); syncEventHash = OracleGenericSchemaProvider.getInstance().getSchemaHash("DBUS.META_SYNC_EVENT.avsc"); heartbeatSchema = OracleGenericSchemaProvider.getInstance().getSchema("DBUS.DB_HEARTBEAT_MONITOR.avsc"); heartbeatHash = OracleGenericSchemaProvider.getInstance().getSchemaHash("DBUS.DB_HEARTBEAT_MONITOR.avsc"); datumReader = new GenericDatumReader<>(genericSchema); datumWriter = new GenericDatumWriter<>(genericSchema); } catch (Exception e) { logger.error("OracleGenericSchemaDecoder Initialization Error!", e); e.printStackTrace(); } }
Example #8
Source File: DatasetContentWriter.java From components with Apache License 2.0 | 6 votes |
private Consumer<IndexedRecord> getWritingConsumer(Encoder[] encoder) { return new Consumer<IndexedRecord>() { GenericDatumWriter<IndexedRecord> writer = null; @Override public void accept(IndexedRecord ir) { if (writer == null) { writer = new GenericDatumWriter<>(ir.getSchema()); try { if (json) { encoder[0] = EncoderFactory.get().jsonEncoder(ir.getSchema(), output); } else { encoder[0] = EncoderFactory.get().binaryEncoder(output, null); } } catch (IOException ioe) { throw new RuntimeException(ioe); } } writeIndexedRecord(writer, encoder[0], ir); } }; }
Example #9
Source File: AvroKeyValueFileWrite.java From hiped2 with Apache License 2.0 | 6 votes |
public static void writeToAvro(File inputFile, OutputStream outputStream) throws IOException { DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>( new GenericDatumWriter<GenericRecord>()); writer.setCodec(CodecFactory.snappyCodec()); writer.create(SCHEMA, outputStream); for (Stock stock : AvroStockUtils.fromCsvFile(inputFile)) { AvroKeyValue<CharSequence, Stock> record = new AvroKeyValue<CharSequence, Stock>(new GenericData.Record(SCHEMA)); record.setKey(stock.getSymbol()); record.setValue(stock); writer.append(record.get()); } IOUtils.closeStream(writer); IOUtils.closeStream(outputStream); }
Example #10
Source File: TransformTest.java From schema-registry-transfer-smt with Apache License 2.0 | 6 votes |
private ByteArrayOutputStream encodeAvroObject(org.apache.avro.Schema schema, int sourceId, Object datum) throws IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); out.write(MAGIC_BYTE); out.write(ByteBuffer.allocate(ID_SIZE).putInt(sourceId).array()); EncoderFactory encoderFactory = EncoderFactory.get(); BinaryEncoder encoder = encoderFactory.directBinaryEncoder(out, null); Object value = datum instanceof NonRecordContainer ? ((NonRecordContainer) datum).getValue() : datum; DatumWriter<Object> writer = new GenericDatumWriter<>(schema); writer.write(value, encoder); encoder.flush(); return out; }
Example #11
Source File: TestConvertAvroToJSON.java From localization_nifi with Apache License 2.0 | 6 votes |
@Test public void testZeroRecords_wrapSingleRecord() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ConvertAvroToJSON()); runner.setProperty(ConvertAvroToJSON.WRAP_SINGLE_RECORD, Boolean.toString(true)); final Schema schema = new Schema.Parser().parse(new File("src/test/resources/user.avsc")); final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); final ByteArrayOutputStream out1 = serializeAvroRecord(schema, datumWriter); runner.enqueue(out1.toByteArray()); runner.run(); runner.assertAllFlowFilesTransferred(ConvertAvroToJSON.REL_SUCCESS, 1); final MockFlowFile out = runner.getFlowFilesForRelationship(ConvertAvroToJSON.REL_SUCCESS).get(0); out.assertContentEquals("[{}]"); }
Example #12
Source File: Display.java From big-c with Apache License 2.0 | 6 votes |
public AvroFileInputStream(FileStatus status) throws IOException { pos = 0; buffer = new byte[0]; GenericDatumReader<Object> reader = new GenericDatumReader<Object>(); FileContext fc = FileContext.getFileContext(new Configuration()); fileReader = DataFileReader.openReader(new AvroFSInput(fc, status.getPath()),reader); Schema schema = fileReader.getSchema(); writer = new GenericDatumWriter<Object>(schema); output = new ByteArrayOutputStream(); JsonGenerator generator = new JsonFactory().createJsonGenerator(output, JsonEncoding.UTF8); MinimalPrettyPrinter prettyPrinter = new MinimalPrettyPrinter(); prettyPrinter.setRootValueSeparator(System.getProperty("line.separator")); generator.setPrettyPrinter(prettyPrinter); encoder = EncoderFactory.get().jsonEncoder(schema, generator); }
Example #13
Source File: RedshiftIT.java From digdag with Apache License 2.0 | 6 votes |
private byte[] avroTestData(List<Schema.Field> fields, List<Map<String, Object>> records) throws IOException { Schema schema = Schema.createRecord("testdata", null, null, false); schema.setFields(fields); ByteArrayOutputStream out = new ByteArrayOutputStream(); GenericDatumWriter<GenericData.Record> datum = new GenericDatumWriter<>(schema); DataFileWriter<GenericData.Record> writer = new DataFileWriter<>(datum); writer.create(schema, out); for (Map<String, Object> record : records) { GenericData.Record r = new GenericData.Record(schema); for (Map.Entry<String, Object> item : record.entrySet()) { r.put(item.getKey(), item.getValue()); } writer.append(r); } writer.close(); return out.toByteArray(); }
Example #14
Source File: TestConvertAvroToJSON.java From localization_nifi with Apache License 2.0 | 6 votes |
@Test public void testSingleAvroMessage_wrapSingleMessage_noContainer() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ConvertAvroToJSON()); // Verify we do not wrap output for a single record if not configured to use a container runner.setProperty(ConvertAvroToJSON.CONTAINER_OPTIONS, ConvertAvroToJSON.CONTAINER_NONE); runner.setProperty(ConvertAvroToJSON.WRAP_SINGLE_RECORD, Boolean.toString(true)); final Schema schema = new Schema.Parser().parse(new File("src/test/resources/user.avsc")); final GenericRecord user1 = new GenericData.Record(schema); user1.put("name", "Alyssa"); user1.put("favorite_number", 256); final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); final ByteArrayOutputStream out1 = AvroTestUtil.serializeAvroRecord(schema, datumWriter, user1); runner.enqueue(out1.toByteArray()); runner.run(); runner.assertAllFlowFilesTransferred(ConvertAvroToJSON.REL_SUCCESS, 1); final MockFlowFile out = runner.getFlowFilesForRelationship(ConvertAvroToJSON.REL_SUCCESS).get(0); out.assertContentEquals("{\"name\": \"Alyssa\", \"favorite_number\": 256, \"favorite_color\": null}"); }
Example #15
Source File: TestExtractAvroMetadata.java From localization_nifi with Apache License 2.0 | 6 votes |
@Test public void testExtractionWithCodec() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ExtractAvroMetadata()); runner.setProperty(ExtractAvroMetadata.METADATA_KEYS, AVRO_CODEC_ATTR); // test dynamic attribute avro.codec final Schema schema = new Schema.Parser().parse(new File("src/test/resources/array.avsc")); final GenericData.Array<String> data = new GenericData.Array<>(schema, Arrays.asList("one", "two", "three")); final DatumWriter<GenericData.Array<String>> datumWriter = new GenericDatumWriter<>(schema); final ByteArrayOutputStream out = new ByteArrayOutputStream(); final DataFileWriter<GenericData.Array<String>> dataFileWriter = new DataFileWriter<>(datumWriter); dataFileWriter.setCodec(CodecFactory.deflateCodec(1)); dataFileWriter.create(schema, out); dataFileWriter.append(data); dataFileWriter.close(); runner.enqueue(out.toByteArray()); runner.run(); runner.assertAllFlowFilesTransferred(ExtractAvroMetadata.REL_SUCCESS, 1); final MockFlowFile flowFile = runner.getFlowFilesForRelationship(ExtractAvroMetadata.REL_SUCCESS).get(0); flowFile.assertAttributeEquals("avro.codec", "deflate"); }
Example #16
Source File: AVROIntermediateDataFormat.java From sqoop-on-spark with Apache License 2.0 | 5 votes |
/** * {@inheritDoc} */ @Override public void write(DataOutput out) throws IOException { // do we need to write the schema? DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(avroSchema); BinaryEncoder encoder = EncoderFactory.get().directBinaryEncoder((DataOutputStream) out, null); writer.write(data, encoder); }
Example #17
Source File: AvroEventSerializer.java From Transwarp-Sample-Code with MIT License | 5 votes |
private void initialize(Event event) throws IOException { Schema schema = null; String schemaUrl = event.getHeaders().get(AVRO_SCHEMA_URL_HEADER); if (schemaUrl != null) { schema = schemaCache.get(schemaUrl); if (schema == null) { schema = loadFromUrl(schemaUrl); schemaCache.put(schemaUrl, schema); } } if (schema == null) { String schemaString = event.getHeaders().get(AVRO_SCHEMA_LITERAL_HEADER); if (schemaString == null) { throw new FlumeException("Could not find schema for event " + event); } schema = new Schema.Parser().parse(schemaString); } writer = new GenericDatumWriter<Object>(schema); dataFileWriter = new DataFileWriter<Object>(writer); dataFileWriter.setSyncInterval(syncIntervalBytes); try { CodecFactory codecFactory = CodecFactory.fromString(compressionCodec); dataFileWriter.setCodec(codecFactory); } catch (AvroRuntimeException e) { logger.warn("Unable to instantiate avro codec with name (" + compressionCodec + "). Compression disabled. Exception follows.", e); } dataFileWriter.create(schema, out); }
Example #18
Source File: Purge.java From Cubert with Apache License 2.0 | 5 votes |
private DataFileWriter<GenericRecord> createDataFileWriter(DataFileReader<GenericRecord> dataFileReader) throws IllegalArgumentException, IOException { Schema schema = dataFileReader.getSchema(); DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema); DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>(datumWriter); // Get the codec of the reader String codecStr = dataFileReader.getMetaString(DataFileConstants.CODEC); int level = conf.getInt("avro.mapred.deflate.level", 1); String codecName = conf.get("avro.output.codec", codecStr); CodecFactory factory = codecName.equals("deflate") ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName); // Set the codec of the writer writer.setCodec(factory); writer.setSyncInterval(conf.getInt("avro.mapred.sync.interval", Math.max(conf.getInt("io.file.buffer.size", 16000), 16000))); writer.create(schema, new Path(tempFileName).getFileSystem(conf) .create(new Path(tempFileName))); return writer; }
Example #19
Source File: AvroCodec.java From schema-evolution-samples with Apache License 2.0 | 5 votes |
private DatumWriter getDatumWriter(Class<?> type, Schema schema){ DatumWriter writer = null; logger.debug("Finding correct DatumWriter for type {}",type.getName()); if(SpecificRecord.class.isAssignableFrom(type)){ writer = new SpecificDatumWriter<>(schema); }else if(GenericRecord.class.isAssignableFrom(type)){ writer = new GenericDatumWriter<>(schema); }else{ writer = new ReflectDatumWriter<>(schema); } logger.debug("DatumWriter of type {} selected",writer.getClass().getName()); return writer; }
Example #20
Source File: SparkVerifierTest.java From tablasco with Apache License 2.0 | 5 votes |
private static void writeAvroData(List<GenericRecord> data, File avroFile) throws IOException { FileUtils.forceMkdir(avroFile.getParentFile()); Schema schema = data.get(0).getSchema(); DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter); dataFileWriter.create(schema, avroFile); for (GenericRecord genericRecord : data) { dataFileWriter.append(genericRecord); } dataFileWriter.close(); }
Example #21
Source File: PartitionCollapsingExecutionPlannerTests.java From datafu with Apache License 2.0 | 5 votes |
private void createOutput(DateRange dateRange) throws IOException { DataFileWriter<GenericRecord> dataWriter; OutputStream outputStream; Path path = new Path(_outputPath,PathUtils.datedPathFormat.format(dateRange.getEndDate())); Schema ouputSchema = Schemas.createRecordSchema(PartitionCollapsingTests.class, "Output", new Field("id", Schema.create(Type.LONG), "ID", null)); outputStream = getFileSystem().create(new Path(path, "part-00000.avro")); GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(); dataWriter = new DataFileWriter<GenericRecord>(writer); dataWriter.setMeta(AvroDateRangeMetadata.METADATA_DATE_START, Long.toString(dateRange.getBeginDate().getTime())); dataWriter.setMeta(AvroDateRangeMetadata.METADATA_DATE_END, Long.toString(dateRange.getEndDate().getTime())); dataWriter.create(ouputSchema, outputStream); // empty file dataWriter.close(); outputStream.close(); dataWriter = null; outputStream = null; }
Example #22
Source File: AvroCoder.java From beam with Apache License 2.0 | 5 votes |
protected AvroCoder(Class<T> type, Schema schema) { this.type = type; this.schemaSupplier = new SerializableSchemaSupplier(schema); typeDescriptor = TypeDescriptor.of(type); nonDeterministicReasons = new AvroDeterminismChecker().check(TypeDescriptor.of(type), schema); // Decoder and Encoder start off null for each thread. They are allocated and potentially // reused inside encode/decode. this.decoder = new EmptyOnDeserializationThreadLocal<>(); this.encoder = new EmptyOnDeserializationThreadLocal<>(); this.reflectData = Suppliers.memoize(new SerializableReflectDataSupplier(getType())); // Reader and writer are allocated once per thread per Coder this.reader = new EmptyOnDeserializationThreadLocal<DatumReader<T>>() { private final AvroCoder<T> myCoder = AvroCoder.this; @Override public DatumReader<T> initialValue() { return myCoder.getType().equals(GenericRecord.class) ? new GenericDatumReader<>(myCoder.getSchema()) : new ReflectDatumReader<>( myCoder.getSchema(), myCoder.getSchema(), myCoder.reflectData.get()); } }; this.writer = new EmptyOnDeserializationThreadLocal<DatumWriter<T>>() { private final AvroCoder<T> myCoder = AvroCoder.this; @Override public DatumWriter<T> initialValue() { return myCoder.getType().equals(GenericRecord.class) ? new GenericDatumWriter<>(myCoder.getSchema()) : new ReflectDatumWriter<>(myCoder.getSchema(), myCoder.reflectData.get()); } }; }
Example #23
Source File: AvroConsoleProducer.java From HiveKa with Apache License 2.0 | 5 votes |
public static byte[] serializeAvro(Schema schema, GenericRecord event) throws IOException { ByteArrayOutputStream stream = new ByteArrayOutputStream(); BinaryEncoder binaryEncoder = EncoderFactory.get().binaryEncoder(stream, null); DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema); datumWriter.write(event, binaryEncoder); binaryEncoder.flush(); IOUtils.closeQuietly(stream); return stream.toByteArray(); }
Example #24
Source File: TestConvertAvroToJSON.java From localization_nifi with Apache License 2.0 | 5 votes |
@Test public void testSingleSchemalessAvroMessage_wrapSingleMessage() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ConvertAvroToJSON()); runner.setProperty(ConvertAvroToJSON.CONTAINER_OPTIONS, ConvertAvroToJSON.CONTAINER_ARRAY); runner.setProperty(ConvertAvroToJSON.WRAP_SINGLE_RECORD, Boolean.toString(true)); Schema schema = new Schema.Parser().parse(new File("src/test/resources/user.avsc")); String stringSchema = schema.toString(); runner.setProperty(ConvertAvroToJSON.SCHEMA, stringSchema); final GenericRecord user1 = new GenericData.Record(schema); user1.put("name", "Alyssa"); user1.put("favorite_number", 256); final ByteArrayOutputStream out1 = new ByteArrayOutputStream(); final BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out1, null); final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); datumWriter.write(user1, encoder); encoder.flush(); out1.flush(); byte[] test = out1.toByteArray(); runner.enqueue(test); runner.run(); runner.assertAllFlowFilesTransferred(ConvertAvroToJSON.REL_SUCCESS, 1); final MockFlowFile out = runner.getFlowFilesForRelationship(ConvertAvroToJSON.REL_SUCCESS).get(0); out.assertContentEquals("[{\"name\": \"Alyssa\", \"favorite_number\": 256, \"favorite_color\": null}]"); }
Example #25
Source File: AvroHdfsFileWriter.java From ml-ease with Apache License 2.0 | 5 votes |
public AvroHdfsFileWriter (JobConf conf, String path, Schema schema) throws IOException { FileSystem fs = FileSystem.get(conf); FSDataOutputStream out = fs.create(new Path(path)); DatumWriter<T> writer = new GenericDatumWriter<T>(schema); _recordWriter = new DataFileWriter<T>(writer); _recordWriter.create(schema, out); }
Example #26
Source File: RecordBenchmarkBase.java From avro-fastserde with Apache License 2.0 | 5 votes |
@Setup public void init() throws Exception { final GenericDatumWriter<GenericData.Record> datumWriter = new GenericDatumWriter<>(specificRecordSchema); for (int i = 0; i < 1000; i++) { ByteArrayOutputStream baos = new ByteArrayOutputStream(); Encoder encoder = EncoderFactory.get().binaryEncoder(baos, null); genericRecords.add(FastSerdeBenchmarkSupport.generateRandomRecordData(specificRecordSchema)); specificRecords .add(FastSerdeBenchmarkSupport.toSpecificRecord(genericRecords.get(genericRecords.size() - 1))); datumWriter.write(genericRecords.get(genericRecords.size() - 1), encoder); encoder.flush(); recordBytes.add(baos.toByteArray()); } fastGenericDatumReader = new FastGenericDatumReader<>( specificRecordSchema, cache); fastGenericDatumWriter = new FastGenericDatumWriter<>(specificRecordSchema, cache); genericDatumReader = new GenericDatumReader<>(specificRecordSchema); genericDatumWriter = new GenericDatumWriter<>(specificRecordSchema); fastSpecificDatumReader = new FastSpecificDatumReader<>( specificRecordSchema, cache); fastSpecificDatumWriter = new FastSpecificDatumWriter<>(specificRecordSchema, cache); specificDatumReader = new SpecificDatumReader<>(specificRecordSchema); specificDatumWriter = new SpecificDatumWriter<>(specificRecordSchema); }
Example #27
Source File: AvroCompactionTaskTest.java From incubator-gobblin with Apache License 2.0 | 5 votes |
public void createAvroFileWithRepeatingRecords(File file, GenericRecord r, int count, Optional<Schema> schema) throws IOException { DataFileWriter<GenericRecord> writer = new DataFileWriter<>(new GenericDatumWriter<GenericRecord>()); writer.create(schema.isPresent() ? schema.get() : getSchema(), new FileOutputStream(file)); for (int i = 0; i < count; ++i) { writer.append(r); } writer.close(); }
Example #28
Source File: BaseProducer.java From HiveKa with Apache License 2.0 | 5 votes |
public static byte[] serializeAvro(Schema schema, GenericRecord event) throws IOException { ByteArrayOutputStream stream = new ByteArrayOutputStream(); BinaryEncoder binaryEncoder = EncoderFactory.get().binaryEncoder(stream, null); DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema); datumWriter.write(event, binaryEncoder); binaryEncoder.flush(); IOUtils.closeQuietly(stream); return stream.toByteArray(); }
Example #29
Source File: DefaultAvroDatumProvider.java From apicurio-registry with Apache License 2.0 | 5 votes |
@Override public DatumWriter<T> createDatumWriter(T data, Schema schema) { if (data instanceof SpecificRecord) { return new SpecificDatumWriter<>(schema); } else { return new GenericDatumWriter<>(schema); } }
Example #30
Source File: AvroTeeWriter.java From Cubert with Apache License 2.0 | 5 votes |
@Override public void open(Configuration conf, JsonNode json, BlockSchema schema, Path root, String filename) throws IOException { Path teePath = new Path(root, filename + ".avro"); FileSystem fs = FileSystem.get(conf); Schema avroSchema = AvroUtils.convertFromBlockSchema("record", schema); GenericDatumWriter<Object> datumWriter = new PigAvroDatumWriter(avroSchema); dataFileWriter = new DataFileWriter<Object>(datumWriter); // if compression is requested, set the proper compression codec if (PhaseContext.getConf().getBoolean("mapred.output.compress", false)) { int level = conf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL); String codecName = conf.get(OUTPUT_CODEC, DEFLATE_CODEC); CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName); dataFileWriter.setCodec(factory); } dataFileWriter.create(avroSchema, fs.create(teePath)); }