Java Code Examples for org.apache.avro.generic.GenericDatumWriter

The following examples show how to use org.apache.avro.generic.GenericDatumWriter. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: samza   Source File: AzureBlobAvroWriter.java    License: Apache License 2.0 7 votes vote down vote up
@VisibleForTesting
byte[] encodeRecord(IndexedRecord record) {
  ByteArrayOutputStream out = new ByteArrayOutputStream();
  Schema schema = record.getSchema();
  try {
    EncoderFactory encoderfactory = new EncoderFactory();
    BinaryEncoder encoder = encoderfactory.binaryEncoder(out, null);
    DatumWriter<IndexedRecord> writer;
    if (record instanceof SpecificRecord) {
      writer = new SpecificDatumWriter<>(schema);
    } else {
      writer = new GenericDatumWriter<>(schema);
    }
    writer.write(record, encoder);
    encoder.flush(); //encoder may buffer
  } catch (Exception e) {
    throw new SamzaException("Unable to serialize Avro record using schema within the record: " + schema.toString(), e);
  }
  return out.toByteArray();
}
 
Example 2
Source Project: localization_nifi   Source File: JsonUtils.java    License: Apache License 2.0 7 votes vote down vote up
/**
 * Writes provided {@link GenericRecord} into the provided
 * {@link OutputStream} as JSON.
 */
public static void write(GenericRecord record, OutputStream out) {
    try {
        DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(record.getSchema());
        JsonEncoder encoder = EncoderFactory.get().jsonEncoder(record.getSchema(), out);
        writer.write(record, encoder);
        encoder.flush();
    } catch (Exception e) {
        throw new IllegalStateException("Failed to read GenericRecord", e);
    }
}
 
Example 3
private ByteArrayOutputStream encodeAvroObject(org.apache.avro.Schema schema, int sourceId, Object datum) throws IOException {
    ByteArrayOutputStream out = new ByteArrayOutputStream();

    out.write(MAGIC_BYTE);
    out.write(ByteBuffer.allocate(ID_SIZE).putInt(sourceId).array());

    EncoderFactory encoderFactory = EncoderFactory.get();
    BinaryEncoder encoder = encoderFactory.directBinaryEncoder(out, null);
    Object
            value =
            datum instanceof NonRecordContainer ? ((NonRecordContainer) datum).getValue()
                    : datum;
    DatumWriter<Object> writer = new GenericDatumWriter<>(schema);
    writer.write(value, encoder);
    encoder.flush();

    return out;
}
 
Example 4
Source Project: localization_nifi   Source File: TestExtractAvroMetadata.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testExtractionWithCodec() throws IOException {
    final TestRunner runner = TestRunners.newTestRunner(new ExtractAvroMetadata());
    runner.setProperty(ExtractAvroMetadata.METADATA_KEYS, AVRO_CODEC_ATTR); // test dynamic attribute avro.codec

    final Schema schema = new Schema.Parser().parse(new File("src/test/resources/array.avsc"));

    final GenericData.Array<String> data = new GenericData.Array<>(schema, Arrays.asList("one", "two", "three"));
    final DatumWriter<GenericData.Array<String>> datumWriter = new GenericDatumWriter<>(schema);

    final ByteArrayOutputStream out = new ByteArrayOutputStream();
    final DataFileWriter<GenericData.Array<String>> dataFileWriter = new DataFileWriter<>(datumWriter);
    dataFileWriter.setCodec(CodecFactory.deflateCodec(1));
    dataFileWriter.create(schema, out);
    dataFileWriter.append(data);
    dataFileWriter.close();

    runner.enqueue(out.toByteArray());
    runner.run();

    runner.assertAllFlowFilesTransferred(ExtractAvroMetadata.REL_SUCCESS, 1);

    final MockFlowFile flowFile = runner.getFlowFilesForRelationship(ExtractAvroMetadata.REL_SUCCESS).get(0);
    flowFile.assertAttributeEquals("avro.codec", "deflate");
}
 
Example 5
Source Project: localization_nifi   Source File: TestConvertAvroToJSON.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSingleAvroMessage() throws IOException {
    final TestRunner runner = TestRunners.newTestRunner(new ConvertAvroToJSON());
    final Schema schema = new Schema.Parser().parse(new File("src/test/resources/user.avsc"));

    final GenericRecord user1 = new GenericData.Record(schema);
    user1.put("name", "Alyssa");
    user1.put("favorite_number", 256);

    final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
    final ByteArrayOutputStream out1 = AvroTestUtil.serializeAvroRecord(schema, datumWriter, user1);
    runner.enqueue(out1.toByteArray());

    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToJSON.REL_SUCCESS, 1);
    final MockFlowFile out = runner.getFlowFilesForRelationship(ConvertAvroToJSON.REL_SUCCESS).get(0);
    out.assertContentEquals("{\"name\": \"Alyssa\", \"favorite_number\": 256, \"favorite_color\": null}");
}
 
Example 6
Source Project: localization_nifi   Source File: TestConvertAvroToJSON.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSingleAvroMessage_wrapSingleMessage_noContainer() throws IOException {
    final TestRunner runner = TestRunners.newTestRunner(new ConvertAvroToJSON());
    // Verify we do not wrap output for a single record if not configured to use a container
    runner.setProperty(ConvertAvroToJSON.CONTAINER_OPTIONS, ConvertAvroToJSON.CONTAINER_NONE);
    runner.setProperty(ConvertAvroToJSON.WRAP_SINGLE_RECORD, Boolean.toString(true));
    final Schema schema = new Schema.Parser().parse(new File("src/test/resources/user.avsc"));

    final GenericRecord user1 = new GenericData.Record(schema);
    user1.put("name", "Alyssa");
    user1.put("favorite_number", 256);

    final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
    final ByteArrayOutputStream out1 = AvroTestUtil.serializeAvroRecord(schema, datumWriter, user1);
    runner.enqueue(out1.toByteArray());

    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToJSON.REL_SUCCESS, 1);
    final MockFlowFile out = runner.getFlowFilesForRelationship(ConvertAvroToJSON.REL_SUCCESS).get(0);
    out.assertContentEquals("{\"name\": \"Alyssa\", \"favorite_number\": 256, \"favorite_color\": null}");
}
 
Example 7
Source Project: digdag   Source File: RedshiftIT.java    License: Apache License 2.0 6 votes vote down vote up
private byte[] avroTestData(List<Schema.Field> fields, List<Map<String, Object>> records)
        throws IOException
{
    Schema schema = Schema.createRecord("testdata", null, null, false);
    schema.setFields(fields);

    ByteArrayOutputStream out = new ByteArrayOutputStream();
    GenericDatumWriter<GenericData.Record> datum = new GenericDatumWriter<>(schema);
    DataFileWriter<GenericData.Record> writer = new DataFileWriter<>(datum);
    writer.create(schema, out);
    for (Map<String, Object> record : records) {
        GenericData.Record r = new GenericData.Record(schema);
        for (Map.Entry<String, Object> item : record.entrySet()) {
            r.put(item.getKey(), item.getValue());
        }
        writer.append(r);
    }
    writer.close();

    return out.toByteArray();
}
 
Example 8
Source Project: big-c   Source File: Display.java    License: Apache License 2.0 6 votes vote down vote up
public AvroFileInputStream(FileStatus status) throws IOException {
  pos = 0;
  buffer = new byte[0];
  GenericDatumReader<Object> reader = new GenericDatumReader<Object>();
  FileContext fc = FileContext.getFileContext(new Configuration());
  fileReader =
    DataFileReader.openReader(new AvroFSInput(fc, status.getPath()),reader);
  Schema schema = fileReader.getSchema();
  writer = new GenericDatumWriter<Object>(schema);
  output = new ByteArrayOutputStream();
  JsonGenerator generator =
    new JsonFactory().createJsonGenerator(output, JsonEncoding.UTF8);
  MinimalPrettyPrinter prettyPrinter = new MinimalPrettyPrinter();
  prettyPrinter.setRootValueSeparator(System.getProperty("line.separator"));
  generator.setPrettyPrinter(prettyPrinter);
  encoder = EncoderFactory.get().jsonEncoder(schema, generator);
}
 
Example 9
Source Project: localization_nifi   Source File: TestConvertAvroToJSON.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testZeroRecords_wrapSingleRecord() throws IOException {
    final TestRunner runner = TestRunners.newTestRunner(new ConvertAvroToJSON());
    runner.setProperty(ConvertAvroToJSON.WRAP_SINGLE_RECORD, Boolean.toString(true));
    final Schema schema = new Schema.Parser().parse(new File("src/test/resources/user.avsc"));


    final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
    final ByteArrayOutputStream out1 = serializeAvroRecord(schema, datumWriter);
    runner.enqueue(out1.toByteArray());

    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToJSON.REL_SUCCESS, 1);
    final MockFlowFile out = runner.getFlowFilesForRelationship(ConvertAvroToJSON.REL_SUCCESS).get(0);
    out.assertContentEquals("[{}]");

}
 
Example 10
Source Project: reef   Source File: ParquetReader.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Serialize Avro data to a in-memory ByteBuffer.
 * @return A ByteBuffer that contains avro data.
 * @throws IOException if the parquet file couldn't be parsed correctly.
 */
public ByteBuffer serializeToByteBuffer() throws IOException {
  final ByteArrayOutputStream stream = new ByteArrayOutputStream();
  final Encoder encoder = EncoderFactory.get().binaryEncoder(stream, null);
  final DatumWriter writer = new GenericDatumWriter<GenericRecord>();
  writer.setSchema(createAvroSchema());
  final AvroParquetReader<GenericRecord> reader = createAvroReader();

  GenericRecord record = reader.read();
  while (record != null) {
    writer.write(record, encoder);
    record = reader.read();
  }

  try {
    reader.close();
  } catch (IOException ex){
    LOG.log(Level.SEVERE, ex.getMessage());
    throw ex;
  }

  encoder.flush();
  final ByteBuffer buf = ByteBuffer.wrap(stream.toByteArray());
  buf.order(ByteOrder.LITTLE_ENDIAN);
  return buf;
}
 
Example 11
Source Project: components   Source File: DatasetContentWriter.java    License: Apache License 2.0 6 votes vote down vote up
private Consumer<IndexedRecord> getWritingConsumer(Encoder[] encoder) {
    return new Consumer<IndexedRecord>() {

        GenericDatumWriter<IndexedRecord> writer = null;

        @Override
        public void accept(IndexedRecord ir) {
            if (writer == null) {
                writer = new GenericDatumWriter<>(ir.getSchema());
                try {
                    if (json) {
                        encoder[0] = EncoderFactory.get().jsonEncoder(ir.getSchema(), output);
                    } else {
                        encoder[0] = EncoderFactory.get().binaryEncoder(output, null);
                    }
                } catch (IOException ioe) {
                    throw new RuntimeException(ioe);
                }

            }
            writeIndexedRecord(writer, encoder[0], ir);
        }
    };
}
 
Example 12
Source Project: hiped2   Source File: AvroKeyValueFileWrite.java    License: Apache License 2.0 6 votes vote down vote up
public static void writeToAvro(File inputFile, OutputStream outputStream)
    throws IOException {

  DataFileWriter<GenericRecord> writer =
      new DataFileWriter<GenericRecord>(
          new GenericDatumWriter<GenericRecord>());

  writer.setCodec(CodecFactory.snappyCodec());
  writer.create(SCHEMA, outputStream);

  for (Stock stock : AvroStockUtils.fromCsvFile(inputFile)) {

    AvroKeyValue<CharSequence, Stock> record
        = new AvroKeyValue<CharSequence, Stock>(new GenericData.Record(SCHEMA));
    record.setKey(stock.getSymbol());
    record.setValue(stock);

    writer.append(record.get());
  }

  IOUtils.closeStream(writer);
  IOUtils.closeStream(outputStream);
}
 
Example 13
Source Project: DBus   Source File: OracleGenericSchemaDecoder.java    License: Apache License 2.0 6 votes vote down vote up
private void initDecoder() {
    try {
        genericSchema = OracleGenericSchemaProvider.getInstance().getSchema("generic_wrapper.avsc");

        fullPullSchema = OracleGenericSchemaProvider.getInstance().getSchema("DBUS.DB_FULL_PULL_REQUESTS.avsc");
        fullPullHash = OracleGenericSchemaProvider.getInstance().getSchemaHash("DBUS.DB_FULL_PULL_REQUESTS.avsc");

        syncEventSchema = OracleGenericSchemaProvider.getInstance().getSchema("DBUS.META_SYNC_EVENT.avsc");
        syncEventHash = OracleGenericSchemaProvider.getInstance().getSchemaHash("DBUS.META_SYNC_EVENT.avsc");

        heartbeatSchema = OracleGenericSchemaProvider.getInstance().getSchema("DBUS.DB_HEARTBEAT_MONITOR.avsc");
        heartbeatHash = OracleGenericSchemaProvider.getInstance().getSchemaHash("DBUS.DB_HEARTBEAT_MONITOR.avsc");

        datumReader = new GenericDatumReader<>(genericSchema);
        datumWriter = new GenericDatumWriter<>(genericSchema);
    } catch (Exception e) {
        logger.error("OracleGenericSchemaDecoder Initialization Error!", e);
        e.printStackTrace();

    }
}
 
Example 14
Source Project: nifi   Source File: TestConvertAvroToORC.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void test_onTrigger_routing_to_failure_null_type() throws Exception {
    String testString = "Hello World";
    GenericData.Record record = TestNiFiOrcUtils.buildAvroRecordWithNull(testString);

    DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema());
    DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    fileWriter.create(record.getSchema(), out);
    fileWriter.append(record);
    fileWriter.flush();
    fileWriter.close();
    out.close();

    Map<String, String> attributes = new HashMap<String, String>() {{
        put(CoreAttributes.FILENAME.key(), "test.avro");
    }};
    runner.enqueue(out.toByteArray(), attributes);
    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);
    MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
    assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
    assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS test_record (string STRING, null BOOLEAN) STORED AS ORC",
            resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
}
 
Example 15
Source Project: nifi   Source File: TestConvertAvroToJSON.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testZeroRecords() throws IOException {
    final TestRunner runner = TestRunners.newTestRunner(new ConvertAvroToJSON());
    final Schema schema = new Schema.Parser().parse(new File("src/test/resources/user.avsc"));


    final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
    final ByteArrayOutputStream out1 = serializeAvroRecord(schema, datumWriter);
    runner.enqueue(out1.toByteArray());

    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToJSON.REL_SUCCESS, 1);
    final MockFlowFile out = runner.getFlowFilesForRelationship(ConvertAvroToJSON.REL_SUCCESS).get(0);
    out.assertContentEquals("{}");

}
 
Example 16
@Override
public DatumWriter<T> createDatumWriter(T data, Schema schema) {
    if (data instanceof SpecificRecord) {
        return new SpecificDatumWriter<>(schema);
    } else {
        return new GenericDatumWriter<>(schema);
    }
}
 
Example 17
Source Project: hiped2   Source File: AvroTextMapReduce.java    License: Apache License 2.0 5 votes vote down vote up
public static void writeLinesBytesFile(OutputStream os)
    throws IOException {
  DatumWriter<ByteBuffer>
      writer = new GenericDatumWriter<ByteBuffer>();
  DataFileWriter<ByteBuffer> out =
      new DataFileWriter<ByteBuffer>(writer);
  out.create(Schema.create(Schema.Type.BYTES), os);
  for (String line : LINES) {
    out.append(ByteBuffer.wrap(line.getBytes("UTF-8")));
  }
  out.close();
}
 
Example 18
Source Project: datacollector   Source File: TestAvroDataFileParser.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testIncorrectOffset() throws Exception {
  File avroDataFile = SdcAvroTestUtil.createAvroDataFile();
  avroDataFile.delete();
  Schema schema = new Schema.Parser().parse(AVRO_SCHEMA);
  DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
  DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter);
  dataFileWriter.create(schema, avroDataFile);
  for (int i = 0; i < 5; i++) {
    GenericRecord r = new GenericData.Record(schema);
    r.put("name", NAMES[i % NAMES.length]);
    r.put("id", i);
    dataFileWriter.setSyncInterval(1073741824);
    dataFileWriter.append(r);
    dataFileWriter.sync();
  }
  dataFileWriter.flush();
  dataFileWriter.close();
  DataParserFactoryBuilder dataParserFactoryBuilder = new DataParserFactoryBuilder(getContext(),
    DataParserFormat.AVRO);
  DataParserFactory factory = dataParserFactoryBuilder
      .setMaxDataLen(1024 * 1024)
      .setOverRunLimit(1000 * 1000)
      .setConfig(SCHEMA_SOURCE_KEY, SOURCE)
      .build();
  DataParser dataParser = factory.getParser(avroDataFile, null);
  Map<String, Record> records = new HashMap<>();
  Record record;
  while((record = dataParser.parse()) != null) {
    records.put(dataParser.getOffset(), record);
  }
  Assert.assertEquals(String.valueOf(records), 5, records.size());
  Assert.assertEquals(0, records.get("141::1").get("/id").getValueAsInteger());
  Assert.assertEquals(1, records.get("166::1").get("/id").getValueAsInteger());
  Assert.assertEquals(2, records.get("190::1").get("/id").getValueAsInteger());
  Assert.assertEquals(3, records.get("215::1").get("/id").getValueAsInteger());
  Assert.assertEquals(4, records.get("239::1").get("/id").getValueAsInteger());
}
 
Example 19
Source Project: data-highway   Source File: DataDeserializerGdprTest.java    License: Apache License 2.0 5 votes vote down vote up
private byte[] toAvroBinary(Schema schema, Object value, int version) {
  try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
    baos.write(0x0);
    baos.write(Ints.toByteArray(version));
    DatumWriter<Object> writer = new GenericDatumWriter<>(schema);
    Encoder encoder = EncoderFactory.get().directBinaryEncoder(baos, null);
    writer.write(value, encoder);
    encoder.flush();
    return baos.toByteArray();
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}
 
Example 20
private File createAvroFile()
    throws Exception {
  org.apache.avro.Schema avroSchema = org.apache.avro.Schema.createRecord("myRecord", null, null, false);
  List<Field> fields = Arrays.asList(new Field("myMapStr", org.apache.avro.Schema.create(Type.STRING), null, null),
      new Field("complexMapStr", org.apache.avro.Schema.create(Type.STRING), null, null));
  avroSchema.setFields(fields);

  File avroFile = new File(_tempDir, "data.avro");
  try (DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(new GenericDatumWriter<>(avroSchema))) {
    fileWriter.create(avroSchema, avroFile);
    for (int i = 0; i < NUM_TOTAL_DOCS; i++) {
      Map<String, String> map = new HashMap<>();
      map.put("k1", "value-k1-" + i);
      map.put("k2", "value-k2-" + i);
      GenericData.Record record = new GenericData.Record(avroSchema);
      record.put("myMapStr", JsonUtils.objectToString(map));

      Map<String, Object> complexMap = new HashMap<>();
      complexMap.put("k1", "value-k1-" + i);
      complexMap.put("k2", "value-k2-" + i);
      complexMap.put("k3", Arrays.asList("value-k3-0-" + i, "value-k3-1-" + i, "value-k3-2-" + i));
      complexMap.put("k4", ImmutableMap
          .of("k4-k1", "value-k4-k1-" + i, "k4-k2", "value-k4-k2-" + i, "k4-k3", "value-k4-k3-" + i, "met", i));
      record.put("complexMapStr", JsonUtils.objectToString(complexMap));
      fileWriter.append(record);
      sortedSequenceIds.add(String.valueOf(i));
    }
  }
  Collections.sort(sortedSequenceIds);

  return avroFile;
}
 
Example 21
Source Project: data-highway   Source File: OfframpIntegrationTest.java    License: Apache License 2.0 5 votes vote down vote up
private static byte[] encode(Record record) throws IOException {
  try (ByteArrayOutputStream buffer = new ByteArrayOutputStream()) {
    Encoder encoder = EncoderFactory.get().directBinaryEncoder(buffer, null);
    DatumWriter<Object> writer = new GenericDatumWriter<>(record.getSchema());
    buffer.write(0x00);
    buffer.write(Ints.toByteArray(1));
    writer.write(record, encoder);
    encoder.flush();
    return buffer.toByteArray();
  }
}
 
Example 22
Source Project: presto   Source File: TestKafkaAvroSmokeTest.java    License: Apache License 2.0 5 votes vote down vote up
private static byte[] convertRecordToAvro(Schema schema, Map<String, Object> values)
{
    ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
    GenericData.Record record = new GenericData.Record(schema);
    values.forEach(record::put);
    try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(new GenericDatumWriter<>(schema))) {
        dataFileWriter.create(schema, outputStream);
        dataFileWriter.append(record);
    }
    catch (IOException e) {
        throw new UncheckedIOException("Failed to convert to Avro.", e);
    }
    return outputStream.toByteArray();
}
 
Example 23
Source Project: flink   Source File: AvroWriters.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Creates an {@link AvroWriterFactory} that accepts and writes Avro generic types.
 * The Avro writers will use the given schema to build and write the records.
 *
 * @param schema The schema of the generic type.
 */
public static AvroWriterFactory<GenericRecord> forGenericRecord(Schema schema) {
	String schemaString = schema.toString();
	AvroBuilder<GenericRecord> builder = (out) -> createAvroDataFileWriter(
		schemaString,
		GenericDatumWriter::new,
		out);
	return new AvroWriterFactory<>(builder);
}
 
Example 24
Source Project: presto   Source File: TestAvroDecoder.java    License: Apache License 2.0 5 votes vote down vote up
private static GenericData.Record buildAvroRecord(Schema schema, ByteArrayOutputStream outputStream, Map<String, Object> values)
{
    GenericData.Record record = new GenericData.Record(schema);
    values.forEach(record::put);
    try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(new GenericDatumWriter<>(schema))) {
        dataFileWriter.create(schema, outputStream);
        dataFileWriter.append(record);
    }
    catch (IOException e) {
        throw new RuntimeException("Failed to convert to Avro.", e);
    }
    return record;
}
 
Example 25
Source Project: kareldb   Source File: KafkaValueSerializer.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void configure(Map<String, ?> configs, boolean isKey) {
    table = (KafkaTable) configs.get("table");
    avroSchema = (Schema) configs.get("avroSchema");
    version = (Integer) configs.get("version");
    writer = new GenericDatumWriter<>(avroSchema, KafkaTable.GENERIC);
}
 
Example 26
Source Project: spork   Source File: AvroRecordWriter.java    License: Apache License 2.0 5 votes vote down vote up
public void prepareToWrite(Schema s) throws IOException {
  if (s == null) {
    throw new IOException(
        this.getClass().getName() + ".prepareToWrite called with null schema");
  }
  schema = s;
  DatumWriter<GenericData.Record> datumWriter =
      new GenericDatumWriter<GenericData.Record>(s);
  writer = new DataFileWriter<GenericData.Record>(datumWriter);
  configureDataFileWriter(writer, new JobConf(conf));
  writer.create(s, out.getFileSystem(conf).create(out));

}
 
Example 27
Source Project: kite   Source File: AvroEntitySerDe.java    License: Apache License 2.0 5 votes vote down vote up
private DatumWriter<Object> buildDatumWriter(Schema schema) {
  if (specific) {
    return new SpecificDatumWriter<Object>(schema);
  } else {
    return new GenericDatumWriter<Object>(schema);
  }
}
 
Example 28
Source Project: incubator-gobblin   Source File: FsAuditSink.java    License: Apache License 2.0 5 votes vote down vote up
public FsAuditSink(Config config, ValueAuditRuntimeMetadata auditMetadata) throws IOException {

    this.auditDirPath = new Path(ConfigUtils.getString(config, FS_SINK_AUDIT_OUTPUT_PATH_KEY, FS_SINK_AUDIT_OUTPUT_DEFAULT_PATH));
    this.fs = this.auditDirPath.getFileSystem(new Configuration());
    this.auditMetadata = auditMetadata;
    this.auditFileOutputStream = closer.register(fs.create(getAuditFilePath()));
    DataFileWriter<GenericRecord> dataFileWriter = this.closer.register(new DataFileWriter<GenericRecord>(new GenericDatumWriter<GenericRecord>()));
    this.writer = this.closer.register(dataFileWriter.create(this.auditMetadata.getTableMetadata().getTableSchema(), this.auditFileOutputStream));
  }
 
Example 29
Source Project: parquet-mr   Source File: TestStringBehavior.java    License: Apache License 2.0 5 votes vote down vote up
@Before
public void writeDataFiles() throws IOException {
  // convert BIG_DECIMAL to string by hand so generic can write it
  GenericRecord record = new GenericRecordBuilder(SCHEMA)
      .set("default_class", "default")
      .set("string_class", "string")
      .set("stringable_class", BIG_DECIMAL.toString())
      .set("default_map", ImmutableMap.of("default_key", 34))
      .set("string_map", ImmutableMap.of("string_key", 35))
      .set("stringable_map", ImmutableMap.of(BIG_DECIMAL.toString(), 36))
      .build();

  File file = temp.newFile("parquet");
  file.delete();
  file.deleteOnExit();

  parquetFile = new Path(file.getPath());
  try(ParquetWriter<GenericRecord> parquet = AvroParquetWriter
      .<GenericRecord>builder(parquetFile)
      .withDataModel(GenericData.get())
      .withSchema(SCHEMA)
      .build()) {
    parquet.write(record);
  }

  avroFile = temp.newFile("avro");
  avroFile.delete();
  avroFile.deleteOnExit();
  try(DataFileWriter<GenericRecord> avro = new DataFileWriter<GenericRecord>(
    new GenericDatumWriter<>(SCHEMA)).create(SCHEMA, avroFile)) {
    avro.append(record);
  }
}
 
Example 30
Source Project: javabase   Source File: AvroSupport.java    License: Apache License 2.0 5 votes vote down vote up
public static byte[] dataToByteArray(Schema schema, GenericRecord datum) throws IOException {
    GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema);
    ByteArrayOutputStream os = new ByteArrayOutputStream();
    try {
        Encoder e = EncoderFactory.get().binaryEncoder(os, null);
        writer.write(datum, e);
        e.flush();
        byte[] byteData = os.toByteArray();
        return byteData;
    } finally {
        os.close();
    }
}