org.apache.avro.generic.GenericDatumWriter Java Examples

The following examples show how to use org.apache.avro.generic.GenericDatumWriter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JsonUtils.java    From localization_nifi with Apache License 2.0 8 votes vote down vote up
/**
 * Writes provided {@link GenericRecord} into the provided
 * {@link OutputStream} as JSON.
 */
public static void write(GenericRecord record, OutputStream out) {
    try {
        DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(record.getSchema());
        JsonEncoder encoder = EncoderFactory.get().jsonEncoder(record.getSchema(), out);
        writer.write(record, encoder);
        encoder.flush();
    } catch (Exception e) {
        throw new IllegalStateException("Failed to read GenericRecord", e);
    }
}
 
Example #2
Source File: TestConvertAvroToJSON.java    From localization_nifi with Apache License 2.0 7 votes vote down vote up
@Test
public void testSingleAvroMessage() throws IOException {
    final TestRunner runner = TestRunners.newTestRunner(new ConvertAvroToJSON());
    final Schema schema = new Schema.Parser().parse(new File("src/test/resources/user.avsc"));

    final GenericRecord user1 = new GenericData.Record(schema);
    user1.put("name", "Alyssa");
    user1.put("favorite_number", 256);

    final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
    final ByteArrayOutputStream out1 = AvroTestUtil.serializeAvroRecord(schema, datumWriter, user1);
    runner.enqueue(out1.toByteArray());

    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToJSON.REL_SUCCESS, 1);
    final MockFlowFile out = runner.getFlowFilesForRelationship(ConvertAvroToJSON.REL_SUCCESS).get(0);
    out.assertContentEquals("{\"name\": \"Alyssa\", \"favorite_number\": 256, \"favorite_color\": null}");
}
 
Example #3
Source File: AzureBlobAvroWriter.java    From samza with Apache License 2.0 7 votes vote down vote up
@VisibleForTesting
byte[] encodeRecord(IndexedRecord record) {
  ByteArrayOutputStream out = new ByteArrayOutputStream();
  Schema schema = record.getSchema();
  try {
    EncoderFactory encoderfactory = new EncoderFactory();
    BinaryEncoder encoder = encoderfactory.binaryEncoder(out, null);
    DatumWriter<IndexedRecord> writer;
    if (record instanceof SpecificRecord) {
      writer = new SpecificDatumWriter<>(schema);
    } else {
      writer = new GenericDatumWriter<>(schema);
    }
    writer.write(record, encoder);
    encoder.flush(); //encoder may buffer
  } catch (Exception e) {
    throw new SamzaException("Unable to serialize Avro record using schema within the record: " + schema.toString(), e);
  }
  return out.toByteArray();
}
 
Example #4
Source File: ParquetReader.java    From reef with Apache License 2.0 7 votes vote down vote up
/**
 * Serialize Avro data to a in-memory ByteBuffer.
 * @return A ByteBuffer that contains avro data.
 * @throws IOException if the parquet file couldn't be parsed correctly.
 */
public ByteBuffer serializeToByteBuffer() throws IOException {
  final ByteArrayOutputStream stream = new ByteArrayOutputStream();
  final Encoder encoder = EncoderFactory.get().binaryEncoder(stream, null);
  final DatumWriter writer = new GenericDatumWriter<GenericRecord>();
  writer.setSchema(createAvroSchema());
  final AvroParquetReader<GenericRecord> reader = createAvroReader();

  GenericRecord record = reader.read();
  while (record != null) {
    writer.write(record, encoder);
    record = reader.read();
  }

  try {
    reader.close();
  } catch (IOException ex){
    LOG.log(Level.SEVERE, ex.getMessage());
    throw ex;
  }

  encoder.flush();
  final ByteBuffer buf = ByteBuffer.wrap(stream.toByteArray());
  buf.order(ByteOrder.LITTLE_ENDIAN);
  return buf;
}
 
Example #5
Source File: TestConvertAvroToORC.java    From nifi with Apache License 2.0 6 votes vote down vote up
@Test
public void test_onTrigger_routing_to_failure_null_type() throws Exception {
    String testString = "Hello World";
    GenericData.Record record = TestNiFiOrcUtils.buildAvroRecordWithNull(testString);

    DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema());
    DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    fileWriter.create(record.getSchema(), out);
    fileWriter.append(record);
    fileWriter.flush();
    fileWriter.close();
    out.close();

    Map<String, String> attributes = new HashMap<String, String>() {{
        put(CoreAttributes.FILENAME.key(), "test.avro");
    }};
    runner.enqueue(out.toByteArray(), attributes);
    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);
    MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
    assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
    assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS test_record (string STRING, null BOOLEAN) STORED AS ORC",
            resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
}
 
Example #6
Source File: TestConvertAvroToJSON.java    From nifi with Apache License 2.0 6 votes vote down vote up
@Test
public void testZeroRecords() throws IOException {
    final TestRunner runner = TestRunners.newTestRunner(new ConvertAvroToJSON());
    final Schema schema = new Schema.Parser().parse(new File("src/test/resources/user.avsc"));


    final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
    final ByteArrayOutputStream out1 = serializeAvroRecord(schema, datumWriter);
    runner.enqueue(out1.toByteArray());

    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToJSON.REL_SUCCESS, 1);
    final MockFlowFile out = runner.getFlowFilesForRelationship(ConvertAvroToJSON.REL_SUCCESS).get(0);
    out.assertContentEquals("{}");

}
 
Example #7
Source File: OracleGenericSchemaDecoder.java    From DBus with Apache License 2.0 6 votes vote down vote up
private void initDecoder() {
    try {
        genericSchema = OracleGenericSchemaProvider.getInstance().getSchema("generic_wrapper.avsc");

        fullPullSchema = OracleGenericSchemaProvider.getInstance().getSchema("DBUS.DB_FULL_PULL_REQUESTS.avsc");
        fullPullHash = OracleGenericSchemaProvider.getInstance().getSchemaHash("DBUS.DB_FULL_PULL_REQUESTS.avsc");

        syncEventSchema = OracleGenericSchemaProvider.getInstance().getSchema("DBUS.META_SYNC_EVENT.avsc");
        syncEventHash = OracleGenericSchemaProvider.getInstance().getSchemaHash("DBUS.META_SYNC_EVENT.avsc");

        heartbeatSchema = OracleGenericSchemaProvider.getInstance().getSchema("DBUS.DB_HEARTBEAT_MONITOR.avsc");
        heartbeatHash = OracleGenericSchemaProvider.getInstance().getSchemaHash("DBUS.DB_HEARTBEAT_MONITOR.avsc");

        datumReader = new GenericDatumReader<>(genericSchema);
        datumWriter = new GenericDatumWriter<>(genericSchema);
    } catch (Exception e) {
        logger.error("OracleGenericSchemaDecoder Initialization Error!", e);
        e.printStackTrace();

    }
}
 
Example #8
Source File: DatasetContentWriter.java    From components with Apache License 2.0 6 votes vote down vote up
private Consumer<IndexedRecord> getWritingConsumer(Encoder[] encoder) {
    return new Consumer<IndexedRecord>() {

        GenericDatumWriter<IndexedRecord> writer = null;

        @Override
        public void accept(IndexedRecord ir) {
            if (writer == null) {
                writer = new GenericDatumWriter<>(ir.getSchema());
                try {
                    if (json) {
                        encoder[0] = EncoderFactory.get().jsonEncoder(ir.getSchema(), output);
                    } else {
                        encoder[0] = EncoderFactory.get().binaryEncoder(output, null);
                    }
                } catch (IOException ioe) {
                    throw new RuntimeException(ioe);
                }

            }
            writeIndexedRecord(writer, encoder[0], ir);
        }
    };
}
 
Example #9
Source File: AvroKeyValueFileWrite.java    From hiped2 with Apache License 2.0 6 votes vote down vote up
public static void writeToAvro(File inputFile, OutputStream outputStream)
    throws IOException {

  DataFileWriter<GenericRecord> writer =
      new DataFileWriter<GenericRecord>(
          new GenericDatumWriter<GenericRecord>());

  writer.setCodec(CodecFactory.snappyCodec());
  writer.create(SCHEMA, outputStream);

  for (Stock stock : AvroStockUtils.fromCsvFile(inputFile)) {

    AvroKeyValue<CharSequence, Stock> record
        = new AvroKeyValue<CharSequence, Stock>(new GenericData.Record(SCHEMA));
    record.setKey(stock.getSymbol());
    record.setValue(stock);

    writer.append(record.get());
  }

  IOUtils.closeStream(writer);
  IOUtils.closeStream(outputStream);
}
 
Example #10
Source File: TransformTest.java    From schema-registry-transfer-smt with Apache License 2.0 6 votes vote down vote up
private ByteArrayOutputStream encodeAvroObject(org.apache.avro.Schema schema, int sourceId, Object datum) throws IOException {
    ByteArrayOutputStream out = new ByteArrayOutputStream();

    out.write(MAGIC_BYTE);
    out.write(ByteBuffer.allocate(ID_SIZE).putInt(sourceId).array());

    EncoderFactory encoderFactory = EncoderFactory.get();
    BinaryEncoder encoder = encoderFactory.directBinaryEncoder(out, null);
    Object
            value =
            datum instanceof NonRecordContainer ? ((NonRecordContainer) datum).getValue()
                    : datum;
    DatumWriter<Object> writer = new GenericDatumWriter<>(schema);
    writer.write(value, encoder);
    encoder.flush();

    return out;
}
 
Example #11
Source File: TestConvertAvroToJSON.java    From localization_nifi with Apache License 2.0 6 votes vote down vote up
@Test
public void testZeroRecords_wrapSingleRecord() throws IOException {
    final TestRunner runner = TestRunners.newTestRunner(new ConvertAvroToJSON());
    runner.setProperty(ConvertAvroToJSON.WRAP_SINGLE_RECORD, Boolean.toString(true));
    final Schema schema = new Schema.Parser().parse(new File("src/test/resources/user.avsc"));


    final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
    final ByteArrayOutputStream out1 = serializeAvroRecord(schema, datumWriter);
    runner.enqueue(out1.toByteArray());

    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToJSON.REL_SUCCESS, 1);
    final MockFlowFile out = runner.getFlowFilesForRelationship(ConvertAvroToJSON.REL_SUCCESS).get(0);
    out.assertContentEquals("[{}]");

}
 
Example #12
Source File: Display.java    From big-c with Apache License 2.0 6 votes vote down vote up
public AvroFileInputStream(FileStatus status) throws IOException {
  pos = 0;
  buffer = new byte[0];
  GenericDatumReader<Object> reader = new GenericDatumReader<Object>();
  FileContext fc = FileContext.getFileContext(new Configuration());
  fileReader =
    DataFileReader.openReader(new AvroFSInput(fc, status.getPath()),reader);
  Schema schema = fileReader.getSchema();
  writer = new GenericDatumWriter<Object>(schema);
  output = new ByteArrayOutputStream();
  JsonGenerator generator =
    new JsonFactory().createJsonGenerator(output, JsonEncoding.UTF8);
  MinimalPrettyPrinter prettyPrinter = new MinimalPrettyPrinter();
  prettyPrinter.setRootValueSeparator(System.getProperty("line.separator"));
  generator.setPrettyPrinter(prettyPrinter);
  encoder = EncoderFactory.get().jsonEncoder(schema, generator);
}
 
Example #13
Source File: RedshiftIT.java    From digdag with Apache License 2.0 6 votes vote down vote up
private byte[] avroTestData(List<Schema.Field> fields, List<Map<String, Object>> records)
        throws IOException
{
    Schema schema = Schema.createRecord("testdata", null, null, false);
    schema.setFields(fields);

    ByteArrayOutputStream out = new ByteArrayOutputStream();
    GenericDatumWriter<GenericData.Record> datum = new GenericDatumWriter<>(schema);
    DataFileWriter<GenericData.Record> writer = new DataFileWriter<>(datum);
    writer.create(schema, out);
    for (Map<String, Object> record : records) {
        GenericData.Record r = new GenericData.Record(schema);
        for (Map.Entry<String, Object> item : record.entrySet()) {
            r.put(item.getKey(), item.getValue());
        }
        writer.append(r);
    }
    writer.close();

    return out.toByteArray();
}
 
Example #14
Source File: TestConvertAvroToJSON.java    From localization_nifi with Apache License 2.0 6 votes vote down vote up
@Test
public void testSingleAvroMessage_wrapSingleMessage_noContainer() throws IOException {
    final TestRunner runner = TestRunners.newTestRunner(new ConvertAvroToJSON());
    // Verify we do not wrap output for a single record if not configured to use a container
    runner.setProperty(ConvertAvroToJSON.CONTAINER_OPTIONS, ConvertAvroToJSON.CONTAINER_NONE);
    runner.setProperty(ConvertAvroToJSON.WRAP_SINGLE_RECORD, Boolean.toString(true));
    final Schema schema = new Schema.Parser().parse(new File("src/test/resources/user.avsc"));

    final GenericRecord user1 = new GenericData.Record(schema);
    user1.put("name", "Alyssa");
    user1.put("favorite_number", 256);

    final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
    final ByteArrayOutputStream out1 = AvroTestUtil.serializeAvroRecord(schema, datumWriter, user1);
    runner.enqueue(out1.toByteArray());

    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToJSON.REL_SUCCESS, 1);
    final MockFlowFile out = runner.getFlowFilesForRelationship(ConvertAvroToJSON.REL_SUCCESS).get(0);
    out.assertContentEquals("{\"name\": \"Alyssa\", \"favorite_number\": 256, \"favorite_color\": null}");
}
 
Example #15
Source File: TestExtractAvroMetadata.java    From localization_nifi with Apache License 2.0 6 votes vote down vote up
@Test
public void testExtractionWithCodec() throws IOException {
    final TestRunner runner = TestRunners.newTestRunner(new ExtractAvroMetadata());
    runner.setProperty(ExtractAvroMetadata.METADATA_KEYS, AVRO_CODEC_ATTR); // test dynamic attribute avro.codec

    final Schema schema = new Schema.Parser().parse(new File("src/test/resources/array.avsc"));

    final GenericData.Array<String> data = new GenericData.Array<>(schema, Arrays.asList("one", "two", "three"));
    final DatumWriter<GenericData.Array<String>> datumWriter = new GenericDatumWriter<>(schema);

    final ByteArrayOutputStream out = new ByteArrayOutputStream();
    final DataFileWriter<GenericData.Array<String>> dataFileWriter = new DataFileWriter<>(datumWriter);
    dataFileWriter.setCodec(CodecFactory.deflateCodec(1));
    dataFileWriter.create(schema, out);
    dataFileWriter.append(data);
    dataFileWriter.close();

    runner.enqueue(out.toByteArray());
    runner.run();

    runner.assertAllFlowFilesTransferred(ExtractAvroMetadata.REL_SUCCESS, 1);

    final MockFlowFile flowFile = runner.getFlowFilesForRelationship(ExtractAvroMetadata.REL_SUCCESS).get(0);
    flowFile.assertAttributeEquals("avro.codec", "deflate");
}
 
Example #16
Source File: AVROIntermediateDataFormat.java    From sqoop-on-spark with Apache License 2.0 5 votes vote down vote up
/**
 * {@inheritDoc}
 */
@Override
public void write(DataOutput out) throws IOException {
  // do we need to write the schema?
  DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(avroSchema);
  BinaryEncoder encoder = EncoderFactory.get().directBinaryEncoder((DataOutputStream) out, null);
  writer.write(data, encoder);
}
 
Example #17
Source File: AvroEventSerializer.java    From Transwarp-Sample-Code with MIT License 5 votes vote down vote up
private void initialize(Event event) throws IOException {
  Schema schema = null;
  String schemaUrl = event.getHeaders().get(AVRO_SCHEMA_URL_HEADER);
  if (schemaUrl != null) {
    schema = schemaCache.get(schemaUrl);
    if (schema == null) {
      schema = loadFromUrl(schemaUrl);
      schemaCache.put(schemaUrl, schema);
    }
  }
  if (schema == null) {
    String schemaString = event.getHeaders().get(AVRO_SCHEMA_LITERAL_HEADER);
    if (schemaString == null) {
      throw new FlumeException("Could not find schema for event " + event);
    }
    schema = new Schema.Parser().parse(schemaString);
  }

  writer = new GenericDatumWriter<Object>(schema);
  dataFileWriter = new DataFileWriter<Object>(writer);

  dataFileWriter.setSyncInterval(syncIntervalBytes);

  try {
    CodecFactory codecFactory = CodecFactory.fromString(compressionCodec);
    dataFileWriter.setCodec(codecFactory);
  } catch (AvroRuntimeException e) {
    logger.warn("Unable to instantiate avro codec with name (" +
        compressionCodec + "). Compression disabled. Exception follows.", e);
  }

  dataFileWriter.create(schema, out);
}
 
Example #18
Source File: Purge.java    From Cubert with Apache License 2.0 5 votes vote down vote up
private DataFileWriter<GenericRecord> createDataFileWriter(DataFileReader<GenericRecord> dataFileReader) throws IllegalArgumentException,
        IOException
{
    Schema schema = dataFileReader.getSchema();
    DatumWriter<GenericRecord> datumWriter =
            new GenericDatumWriter<GenericRecord>(schema);
    DataFileWriter<GenericRecord> writer =
            new DataFileWriter<GenericRecord>(datumWriter);

    // Get the codec of the reader
    String codecStr = dataFileReader.getMetaString(DataFileConstants.CODEC);
    int level = conf.getInt("avro.mapred.deflate.level", 1);
    String codecName = conf.get("avro.output.codec", codecStr);
    CodecFactory factory =
            codecName.equals("deflate") ? CodecFactory.deflateCodec(level)
                    : CodecFactory.fromString(codecName);

    // Set the codec of the writer
    writer.setCodec(factory);

    writer.setSyncInterval(conf.getInt("avro.mapred.sync.interval",
                                       Math.max(conf.getInt("io.file.buffer.size",
                                                            16000), 16000)));

    writer.create(schema,
                  new Path(tempFileName).getFileSystem(conf)
                                        .create(new Path(tempFileName)));
    return writer;
}
 
Example #19
Source File: AvroCodec.java    From schema-evolution-samples with Apache License 2.0 5 votes vote down vote up
private DatumWriter getDatumWriter(Class<?> type, Schema schema){
	DatumWriter writer = null;
	logger.debug("Finding correct DatumWriter for type {}",type.getName());
	if(SpecificRecord.class.isAssignableFrom(type)){
		writer = new SpecificDatumWriter<>(schema);
	}else if(GenericRecord.class.isAssignableFrom(type)){
		writer = new GenericDatumWriter<>(schema);
	}else{
		writer = new ReflectDatumWriter<>(schema);
	}
	logger.debug("DatumWriter of type {} selected",writer.getClass().getName());
	return writer;
}
 
Example #20
Source File: SparkVerifierTest.java    From tablasco with Apache License 2.0 5 votes vote down vote up
private static void writeAvroData(List<GenericRecord> data, File avroFile) throws IOException
{
    FileUtils.forceMkdir(avroFile.getParentFile());
    Schema schema = data.get(0).getSchema();
    DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter);
    dataFileWriter.create(schema, avroFile);
    for (GenericRecord genericRecord : data)
    {
        dataFileWriter.append(genericRecord);
    }
    dataFileWriter.close();
}
 
Example #21
Source File: PartitionCollapsingExecutionPlannerTests.java    From datafu with Apache License 2.0 5 votes vote down vote up
private void createOutput(DateRange dateRange) throws IOException
{
  DataFileWriter<GenericRecord> dataWriter;
  OutputStream outputStream;
  
  Path path = new Path(_outputPath,PathUtils.datedPathFormat.format(dateRange.getEndDate()));
  
  Schema ouputSchema = Schemas.createRecordSchema(PartitionCollapsingTests.class, "Output",
                                            new Field("id", Schema.create(Type.LONG), "ID", null));
  
  outputStream = getFileSystem().create(new Path(path, "part-00000.avro"));
  
  GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>();
  dataWriter = new DataFileWriter<GenericRecord>(writer);      
  
  dataWriter.setMeta(AvroDateRangeMetadata.METADATA_DATE_START,
                     Long.toString(dateRange.getBeginDate().getTime()));
  
  dataWriter.setMeta(AvroDateRangeMetadata.METADATA_DATE_END,
                     Long.toString(dateRange.getEndDate().getTime()));
  
  dataWriter.create(ouputSchema, outputStream);
      
  // empty file
  
  dataWriter.close();
  outputStream.close();
  dataWriter = null;
  outputStream = null; 
}
 
Example #22
Source File: AvroCoder.java    From beam with Apache License 2.0 5 votes vote down vote up
protected AvroCoder(Class<T> type, Schema schema) {
  this.type = type;
  this.schemaSupplier = new SerializableSchemaSupplier(schema);
  typeDescriptor = TypeDescriptor.of(type);
  nonDeterministicReasons = new AvroDeterminismChecker().check(TypeDescriptor.of(type), schema);

  // Decoder and Encoder start off null for each thread. They are allocated and potentially
  // reused inside encode/decode.
  this.decoder = new EmptyOnDeserializationThreadLocal<>();
  this.encoder = new EmptyOnDeserializationThreadLocal<>();

  this.reflectData = Suppliers.memoize(new SerializableReflectDataSupplier(getType()));

  // Reader and writer are allocated once per thread per Coder
  this.reader =
      new EmptyOnDeserializationThreadLocal<DatumReader<T>>() {
        private final AvroCoder<T> myCoder = AvroCoder.this;

        @Override
        public DatumReader<T> initialValue() {
          return myCoder.getType().equals(GenericRecord.class)
              ? new GenericDatumReader<>(myCoder.getSchema())
              : new ReflectDatumReader<>(
                  myCoder.getSchema(), myCoder.getSchema(), myCoder.reflectData.get());
        }
      };

  this.writer =
      new EmptyOnDeserializationThreadLocal<DatumWriter<T>>() {
        private final AvroCoder<T> myCoder = AvroCoder.this;

        @Override
        public DatumWriter<T> initialValue() {
          return myCoder.getType().equals(GenericRecord.class)
              ? new GenericDatumWriter<>(myCoder.getSchema())
              : new ReflectDatumWriter<>(myCoder.getSchema(), myCoder.reflectData.get());
        }
      };
}
 
Example #23
Source File: AvroConsoleProducer.java    From HiveKa with Apache License 2.0 5 votes vote down vote up
public static byte[] serializeAvro(Schema schema, GenericRecord event) throws IOException {
  ByteArrayOutputStream stream = new ByteArrayOutputStream();
  BinaryEncoder binaryEncoder = EncoderFactory.get().binaryEncoder(stream, null);
  DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);
  datumWriter.write(event, binaryEncoder);
  binaryEncoder.flush();
  IOUtils.closeQuietly(stream);


  return stream.toByteArray();
}
 
Example #24
Source File: TestConvertAvroToJSON.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
@Test
public void testSingleSchemalessAvroMessage_wrapSingleMessage() throws IOException {
    final TestRunner runner = TestRunners.newTestRunner(new ConvertAvroToJSON());
    runner.setProperty(ConvertAvroToJSON.CONTAINER_OPTIONS, ConvertAvroToJSON.CONTAINER_ARRAY);
    runner.setProperty(ConvertAvroToJSON.WRAP_SINGLE_RECORD, Boolean.toString(true));
    Schema schema = new Schema.Parser().parse(new File("src/test/resources/user.avsc"));
    String stringSchema = schema.toString();
    runner.setProperty(ConvertAvroToJSON.SCHEMA, stringSchema);

    final GenericRecord user1 = new GenericData.Record(schema);
    user1.put("name", "Alyssa");
    user1.put("favorite_number", 256);

    final ByteArrayOutputStream out1 = new ByteArrayOutputStream();
    final BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out1, null);
    final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
    datumWriter.write(user1, encoder);

    encoder.flush();
    out1.flush();
    byte[] test = out1.toByteArray();
    runner.enqueue(test);

    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToJSON.REL_SUCCESS, 1);
    final MockFlowFile out = runner.getFlowFilesForRelationship(ConvertAvroToJSON.REL_SUCCESS).get(0);
    out.assertContentEquals("[{\"name\": \"Alyssa\", \"favorite_number\": 256, \"favorite_color\": null}]");
}
 
Example #25
Source File: AvroHdfsFileWriter.java    From ml-ease with Apache License 2.0 5 votes vote down vote up
public AvroHdfsFileWriter (JobConf conf, String path, Schema schema) throws IOException
{
      FileSystem fs = FileSystem.get(conf);
      FSDataOutputStream out = fs.create(new Path(path));
      DatumWriter<T> writer = new GenericDatumWriter<T>(schema);
      _recordWriter = new DataFileWriter<T>(writer);
      _recordWriter.create(schema, out);
}
 
Example #26
Source File: RecordBenchmarkBase.java    From avro-fastserde with Apache License 2.0 5 votes vote down vote up
@Setup
public void init() throws Exception {
    final GenericDatumWriter<GenericData.Record> datumWriter = new GenericDatumWriter<>(specificRecordSchema);
    for (int i = 0; i < 1000; i++) {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        Encoder encoder = EncoderFactory.get().binaryEncoder(baos, null);

        genericRecords.add(FastSerdeBenchmarkSupport.generateRandomRecordData(specificRecordSchema));
        specificRecords
                .add(FastSerdeBenchmarkSupport.toSpecificRecord(genericRecords.get(genericRecords.size() - 1)));

        datumWriter.write(genericRecords.get(genericRecords.size() - 1), encoder);
        encoder.flush();

        recordBytes.add(baos.toByteArray());
    }
    fastGenericDatumReader = new FastGenericDatumReader<>(
            specificRecordSchema, cache);
    fastGenericDatumWriter = new FastGenericDatumWriter<>(specificRecordSchema, cache);

    genericDatumReader = new GenericDatumReader<>(specificRecordSchema);
    genericDatumWriter = new GenericDatumWriter<>(specificRecordSchema);

    fastSpecificDatumReader = new FastSpecificDatumReader<>(
            specificRecordSchema, cache);
    fastSpecificDatumWriter = new FastSpecificDatumWriter<>(specificRecordSchema, cache);

    specificDatumReader = new SpecificDatumReader<>(specificRecordSchema);
    specificDatumWriter = new SpecificDatumWriter<>(specificRecordSchema);
}
 
Example #27
Source File: AvroCompactionTaskTest.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
public void createAvroFileWithRepeatingRecords(File file, GenericRecord r, int count, Optional<Schema> schema) throws IOException {
    DataFileWriter<GenericRecord> writer = new DataFileWriter<>(new GenericDatumWriter<GenericRecord>());
    writer.create(schema.isPresent() ? schema.get() : getSchema(), new FileOutputStream(file));
    for (int i = 0; i < count; ++i) {
      writer.append(r);
    }
    writer.close();
}
 
Example #28
Source File: BaseProducer.java    From HiveKa with Apache License 2.0 5 votes vote down vote up
public static byte[] serializeAvro(Schema schema, GenericRecord event) throws IOException {
  ByteArrayOutputStream stream = new ByteArrayOutputStream();
  BinaryEncoder binaryEncoder = EncoderFactory.get().binaryEncoder(stream, null);
  DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);
  datumWriter.write(event, binaryEncoder);
  binaryEncoder.flush();
  IOUtils.closeQuietly(stream);


  return stream.toByteArray();
}
 
Example #29
Source File: DefaultAvroDatumProvider.java    From apicurio-registry with Apache License 2.0 5 votes vote down vote up
@Override
public DatumWriter<T> createDatumWriter(T data, Schema schema) {
    if (data instanceof SpecificRecord) {
        return new SpecificDatumWriter<>(schema);
    } else {
        return new GenericDatumWriter<>(schema);
    }
}
 
Example #30
Source File: AvroTeeWriter.java    From Cubert with Apache License 2.0 5 votes vote down vote up
@Override
public void open(Configuration conf,
                 JsonNode json,
                 BlockSchema schema,
                 Path root,
                 String filename) throws IOException
{
    Path teePath = new Path(root, filename + ".avro");
    FileSystem fs = FileSystem.get(conf);

    Schema avroSchema = AvroUtils.convertFromBlockSchema("record", schema);

    GenericDatumWriter<Object> datumWriter =
            new PigAvroDatumWriter(avroSchema);
    dataFileWriter = new DataFileWriter<Object>(datumWriter);

    // if compression is requested, set the proper compression codec
    if (PhaseContext.getConf().getBoolean("mapred.output.compress", false))
    {
        int level = conf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
        String codecName = conf.get(OUTPUT_CODEC, DEFLATE_CODEC);
        CodecFactory factory =
                codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level)
                        : CodecFactory.fromString(codecName);
        dataFileWriter.setCodec(factory);
    }

    dataFileWriter.create(avroSchema, fs.create(teePath));
}