Java Code Examples for org.apache.avro.file.DataFileWriter

The following examples show how to use org.apache.avro.file.DataFileWriter. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: datafu   Source File: DailyTrackingWriter.java    License: Apache License 2.0 7 votes vote down vote up
public void open(int year, int month, int day) throws IOException
{
  if (_dataWriter != null)
  {
    throw new RuntimeException("Already have data writer");
  }

  Path dailyPath = _outputPath;
  Path path = new Path(dailyPath,String.format("%04d/%02d/%02d",year,month,day));
  
  _outputStream = _fs.create(new Path(path, "part-00000.avro"));
  
  GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>();
  _dataWriter = new DataFileWriter<GenericRecord>(writer);        
  _dataWriter.create(_schema, _outputStream);
}
 
Example 2
Source Project: presto   Source File: AvroRecordWriter.java    License: Apache License 2.0 6 votes vote down vote up
public AvroRecordWriter(Path path, JobConf jobConf, boolean isCompressed, Properties properties)
        throws IOException
{
    Schema schema;
    try {
        schema = AvroSerdeUtils.determineSchemaOrThrowException(jobConf, properties);
    }
    catch (AvroSerdeException e) {
        throw new IOException(e);
    }
    GenericDatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<>(schema);
    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(genericDatumWriter);

    if (isCompressed) {
        int level = jobConf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
        String codecName = jobConf.get(OUTPUT_CODEC, DEFLATE_CODEC);
        CodecFactory factory = codecName.equals(DEFLATE_CODEC)
                ? CodecFactory.deflateCodec(level)
                : CodecFactory.fromString(codecName);
        dataFileWriter.setCodec(factory);
    }

    outputStream = path.getFileSystem(jobConf).create(path);
    dataFileWriter.create(schema, outputStream);
    delegate = new AvroGenericRecordWriter(dataFileWriter);
}
 
Example 3
Source Project: mt-flume   Source File: AbstractAvroEventSerializer.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void configure(Context context) {

  int syncIntervalBytes =
      context.getInteger(SYNC_INTERVAL_BYTES, DEFAULT_SYNC_INTERVAL_BYTES);
  String compressionCodec =
      context.getString(COMPRESSION_CODEC, DEFAULT_COMPRESSION_CODEC);

  writer = new ReflectDatumWriter<T>(getSchema());
  dataFileWriter = new DataFileWriter<T>(writer);

  dataFileWriter.setSyncInterval(syncIntervalBytes);

  try {
    CodecFactory codecFactory = CodecFactory.fromString(compressionCodec);
    dataFileWriter.setCodec(codecFactory);
  } catch (AvroRuntimeException e) {
    logger.warn("Unable to instantiate avro codec with name (" +
        compressionCodec + "). Compression disabled. Exception follows.", e);
  }
}
 
Example 4
Source Project: incubator-gobblin   Source File: FsSpecProducer.java    License: Apache License 2.0 6 votes vote down vote up
private void writeAvroJobSpec(AvroJobSpec jobSpec) throws IOException {
  DatumWriter<AvroJobSpec> datumWriter = new SpecificDatumWriter<>(AvroJobSpec.SCHEMA$);
  DataFileWriter<AvroJobSpec> dataFileWriter = new DataFileWriter<>(datumWriter);

  Path jobSpecPath = new Path(this.specConsumerPath, jobSpec.getUri());

  //Write the new JobSpec to a temporary path first.
  Path tmpDir = new Path(this.specConsumerPath, "_tmp");
  if (!fs.exists(tmpDir)) {
    fs.mkdirs(tmpDir);
  }

  Path tmpJobSpecPath = new Path(tmpDir, jobSpec.getUri());

  OutputStream out = fs.create(tmpJobSpecPath);

  dataFileWriter.create(AvroJobSpec.SCHEMA$, out);
  dataFileWriter.append(jobSpec);
  dataFileWriter.close();

  //Rename the JobSpec from temporary to final location.
  HadoopUtils.renamePath(fs, tmpJobSpecPath, jobSpecPath, true);
}
 
Example 5
Source Project: localization_nifi   Source File: TestExtractAvroMetadata.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testExtractionWithCodec() throws IOException {
    final TestRunner runner = TestRunners.newTestRunner(new ExtractAvroMetadata());
    runner.setProperty(ExtractAvroMetadata.METADATA_KEYS, AVRO_CODEC_ATTR); // test dynamic attribute avro.codec

    final Schema schema = new Schema.Parser().parse(new File("src/test/resources/array.avsc"));

    final GenericData.Array<String> data = new GenericData.Array<>(schema, Arrays.asList("one", "two", "three"));
    final DatumWriter<GenericData.Array<String>> datumWriter = new GenericDatumWriter<>(schema);

    final ByteArrayOutputStream out = new ByteArrayOutputStream();
    final DataFileWriter<GenericData.Array<String>> dataFileWriter = new DataFileWriter<>(datumWriter);
    dataFileWriter.setCodec(CodecFactory.deflateCodec(1));
    dataFileWriter.create(schema, out);
    dataFileWriter.append(data);
    dataFileWriter.close();

    runner.enqueue(out.toByteArray());
    runner.run();

    runner.assertAllFlowFilesTransferred(ExtractAvroMetadata.REL_SUCCESS, 1);

    final MockFlowFile flowFile = runner.getFlowFilesForRelationship(ExtractAvroMetadata.REL_SUCCESS).get(0);
    flowFile.assertAttributeEquals("avro.codec", "deflate");
}
 
Example 6
Source Project: localization_nifi   Source File: PutHiveStreaming.java    License: Apache License 2.0 6 votes vote down vote up
private void appendRecordsToFlowFile(ProcessSession session,
                                     List<HiveStreamingRecord> records,
                                     AtomicReference<FlowFile> appendFlowFile,
                                     DataFileWriter<GenericRecord> avroWriter,
                                     DataFileStream<GenericRecord> reader) throws IOException {

    appendFlowFile.set(session.append(appendFlowFile.get(), (out) -> {

        try (DataFileWriter<GenericRecord> writer = avroWriter.create(reader.getSchema(), out)) {
            for (HiveStreamingRecord sRecord : records) {
                writer.append(sRecord.getRecord());
            }
            writer.flush();
        }
    }));
}
 
Example 7
Source Project: mt-flume   Source File: TestAvroEventDeserializer.java    License: Apache License 2.0 6 votes vote down vote up
private File newTestFile(boolean deleteOnExit) throws IOException {
  File tempFile = File.createTempFile("testDirectFile", "tmp");
  if (deleteOnExit) {
    tempFile.deleteOnExit();
  }

  DataFileWriter<GenericRecord> writer =
      new DataFileWriter<GenericRecord>(
          new GenericDatumWriter<GenericRecord>(schema));
  writer.create(schema, tempFile);
  GenericRecordBuilder recordBuilder;
  recordBuilder = new GenericRecordBuilder(schema);
  recordBuilder.set("foo", "bar");
  GenericRecord record = recordBuilder.build();
  writer.append(record);
  writer.sync();
  recordBuilder = new GenericRecordBuilder(schema);
  recordBuilder.set("foo", "baz");
  record = recordBuilder.build();
  writer.append(record);
  writer.sync();
  writer.flush();
  writer.close();

  return tempFile;
}
 
Example 8
Source Project: nifi   Source File: PutHiveStreaming.java    License: Apache License 2.0 6 votes vote down vote up
private byte[] initAvroWriter(ProcessSession session, String codec, DataFileStream<GenericRecord> reader,
                                     DataFileWriter<GenericRecord> writer, AtomicReference<FlowFile> flowFileRef) {

    writer.setCodec(CodecFactory.fromString(codec));
    // Transfer metadata (this is a subset of the incoming file)
    for (String metaKey : reader.getMetaKeys()) {
        if (!RESERVED_METADATA.contains(metaKey)) {
            writer.setMeta(metaKey, reader.getMeta(metaKey));
        }
    }

    final ByteArrayOutputStream avroHeader = new ByteArrayOutputStream();
    flowFileRef.set(session.append(flowFileRef.get(), (out) -> {
        // Create writer so that records can be appended later.
        writer.create(reader.getSchema(), avroHeader);
        writer.close();

        final byte[] header = avroHeader.toByteArray();
        out.write(header);
    }));

    // Capture the Avro header byte array that is just written to the FlowFile.
    // This is needed when Avro records are appended to the same FlowFile.
    return avroHeader.toByteArray();
}
 
Example 9
Source Project: dbeam   Source File: JdbcAvroIO.java    License: Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("deprecation") // uses internal test functionality.
@Override
protected void prepareWrite(WritableByteChannel channel) throws Exception {
  logger.info("jdbcavroio : Preparing write...");
  connection = jdbcAvroArgs.jdbcConnectionConfiguration().createConnection();
  Void destination = getDestination();
  Schema schema = dynamicDestinations.getSchema(destination);
  dataFileWriter =
      new DataFileWriter<>(new GenericDatumWriter<GenericRecord>(schema))
          .setCodec(jdbcAvroArgs.getCodecFactory())
          .setSyncInterval(syncInterval);
  dataFileWriter.setMeta("created_by", this.getClass().getCanonicalName());
  this.countingOutputStream = new CountingOutputStream(Channels.newOutputStream(channel));
  dataFileWriter.create(schema, this.countingOutputStream);
  logger.info("jdbcavroio : Write prepared");
}
 
Example 10
Source Project: tajo   Source File: AvroAppender.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Initializes the Appender.
 */
public void init() throws IOException {
  FileSystem fs = path.getFileSystem(conf);

  FSDataOutputStream outputStream = fs.create(path, false);

  avroSchema = AvroUtil.getAvroSchema(meta, conf);
  avroFields = avroSchema.getFields();

  DatumWriter<GenericRecord> datumWriter =
          new GenericDatumWriter<>(avroSchema);
  dataFileWriter = new DataFileWriter<>(datumWriter);
  dataFileWriter.create(avroSchema, outputStream);

  if (tableStatsEnabled) {
    this.stats = new TableStatistics(schema, columnStatsEnabled);
  }
  super.init();
}
 
Example 11
@Override
public IPentahoRecordWriter createRecordWriter() throws Exception {
  validate();
  if ( fields == null || StringUtils.isEmpty( nameSpace ) || StringUtils.isEmpty( recordName ) || StringUtils
    .isEmpty( outputFilename ) ) {
    throw new Exception(
      "Invalid state.  One of the following required fields is null:  'nameSpace', 'recordNum', or 'outputFileName" );
  }
  Schema schema = getSchema();
  writeAvroSchemaToFile( schemaFilename );
  DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>( schema );
  DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>( datumWriter );
  dataFileWriter.setCodec( codecFactory );
  dataFileWriter.create( schema, KettleVFS.getOutputStream( outputFilename, variableSpace, false ) );
  return new PentahoAvroRecordWriter( dataFileWriter, schema, fields );
}
 
Example 12
Source Project: nifi   Source File: TestConvertAvroToORC.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void test_onTrigger_routing_to_failure_null_type() throws Exception {
    String testString = "Hello World";
    GenericData.Record record = TestNiFiOrcUtils.buildAvroRecordWithNull(testString);

    DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema());
    DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    fileWriter.create(record.getSchema(), out);
    fileWriter.append(record);
    fileWriter.flush();
    fileWriter.close();
    out.close();

    Map<String, String> attributes = new HashMap<String, String>() {{
        put(CoreAttributes.FILENAME.key(), "test.avro");
    }};
    runner.enqueue(out.toByteArray(), attributes);
    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);
    MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
    assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
    assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS test_record (string STRING, null BOOLEAN) STORED AS ORC",
            resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
}
 
Example 13
Source Project: Cubert   Source File: AvroUtils.java    License: Apache License 2.0 6 votes vote down vote up
public static void createFileIfNotExists(BlockSchema fileSchema, String path) throws IOException
{
    Configuration conf = new JobConf();
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(new Path(path)))
        return;

    Schema avroSchema = convertFromBlockSchema("CUBERT_MV_RECORD", fileSchema);
    System.out.println("Creating avro file with schema = " + avroSchema);
    GenericDatumWriter<GenericRecord> datumWriter =
            new GenericDatumWriter<GenericRecord>(avroSchema);
    DataFileWriter<GenericRecord> writer =
            new DataFileWriter<GenericRecord>(datumWriter);

    FSDataOutputStream fout =
            FileSystem.create(fs,
                              new Path(path),
                              new FsPermission(FsAction.ALL,
                                               FsAction.READ_EXECUTE,
                                               FsAction.READ_EXECUTE));
    writer.create(avroSchema, fout);
    writer.flush();
    writer.close();

}
 
Example 14
Source Project: samza   Source File: TestAzureBlobAvroWriter.java    License: Apache License 2.0 6 votes vote down vote up
@Before
public void setup() throws Exception {
  threadPool = new ThreadPoolExecutor(1, 1, 60,  TimeUnit.SECONDS, new LinkedBlockingDeque<>());
  ome = createOME("Topic1");

  encodedRecord = new byte[100];
  BlobContainerAsyncClient mockContainerAsyncClient = PowerMockito.mock(BlobContainerAsyncClient.class);
  mockDataFileWriter = mock(DataFileWriter.class);
  mockAzureBlobOutputStream = mock(AzureBlobOutputStream.class);
  mockBlockBlobAsyncClient = PowerMockito.mock(BlockBlobAsyncClient.class);
  when(mockBlockBlobAsyncClient.getBlobUrl()).thenReturn("https://samza.blob.core.windows.net/fake-blob-url");

  mockCompression = CompressionFactory.getInstance().getCompression(CompressionType.GZIP);
  azureBlobAvroWriter =
      spy(new AzureBlobAvroWriter(mockContainerAsyncClient, mock(AzureBlobWriterMetrics.class), threadPool, THRESHOLD,
          60000, "test", mockDataFileWriter, mockAzureBlobOutputStream, mockBlockBlobAsyncClient,
          blobMetadataGeneratorFactory, blobMetadataGeneratorConfig, STREAM_NAME,
          Long.MAX_VALUE, Long.MAX_VALUE, mockCompression, false)); // keeping blob size and number of records unlimited
  doReturn(encodedRecord).when(azureBlobAvroWriter).encodeRecord((IndexedRecord) ome.getMessage());
}
 
Example 15
Source Project: digdag   Source File: RedshiftIT.java    License: Apache License 2.0 6 votes vote down vote up
private byte[] avroTestData(List<Schema.Field> fields, List<Map<String, Object>> records)
        throws IOException
{
    Schema schema = Schema.createRecord("testdata", null, null, false);
    schema.setFields(fields);

    ByteArrayOutputStream out = new ByteArrayOutputStream();
    GenericDatumWriter<GenericData.Record> datum = new GenericDatumWriter<>(schema);
    DataFileWriter<GenericData.Record> writer = new DataFileWriter<>(datum);
    writer.create(schema, out);
    for (Map<String, Object> record : records) {
        GenericData.Record r = new GenericData.Record(schema);
        for (Map.Entry<String, Object> item : record.entrySet()) {
            r.put(item.getKey(), item.getValue());
        }
        writer.append(r);
    }
    writer.close();

    return out.toByteArray();
}
 
Example 16
Source Project: nifi   Source File: PutHiveStreaming.java    License: Apache License 2.0 6 votes vote down vote up
private void appendAvroRecords(ProcessSession session, byte[] avroHeader, DataFileWriter<GenericRecord> writer,
                               AtomicReference<FlowFile> flowFileRef, List<HiveStreamingRecord> hRecords) {

    flowFileRef.set(session.append(flowFileRef.get(), (out) -> {
        if (hRecords != null) {
            // Initialize the writer again as append mode, so that Avro header is written only once.
            writer.appendTo(new SeekableByteArrayInput(avroHeader), out);
            try {
                for (HiveStreamingRecord hRecord : hRecords) {
                    writer.append(hRecord.getRecord());
                }
            } catch (IOException ioe) {
                // The records were put to Hive Streaming successfully, but there was an error while writing the
                // Avro records to the flow file. Log as an error and move on.
                logger.error("Error writing Avro records (which were sent successfully to Hive Streaming) to the flow file, " + ioe, ioe);
            }
        }
        writer.close();
    }));
}
 
Example 17
Source Project: geowave   Source File: StageRunData.java    License: Apache License 2.0 6 votes vote down vote up
private synchronized DataFileWriter getDataWriterCreateIfNull(
    final String typeName,
    final GeoWaveAvroFormatPlugin plugin) {
  if (!cachedWriters.containsKey(typeName)) {
    FSDataOutputStream out = null;
    final DataFileWriter dfw = new DataFileWriter(new GenericDatumWriter());
    cachedWriters.put(typeName, dfw);
    dfw.setCodec(CodecFactory.snappyCodec());
    try {
      // TODO: we should probably clean up the type name to make it
      // HDFS path safe in case there are invalid characters
      // also, if a file already exists do we want to delete it or
      // append to it?
      out = fs.create(new Path(hdfsBaseDirectory, typeName));
      dfw.create(plugin.getAvroSchema(), out);

    } catch (final IOException e) {
      LOGGER.error("Unable to create output stream", e);
      // cache a null value so we don't continually try to recreate
      cachedWriters.put(typeName, null);
      return null;
    }
  }
  return cachedWriters.get(typeName);
}
 
Example 18
Source Project: presto   Source File: TestKafkaAvroSmokeTest.java    License: Apache License 2.0 5 votes vote down vote up
private static byte[] convertRecordToAvro(Schema schema, Map<String, Object> values)
{
    ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
    GenericData.Record record = new GenericData.Record(schema);
    values.forEach(record::put);
    try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(new GenericDatumWriter<>(schema))) {
        dataFileWriter.create(schema, outputStream);
        dataFileWriter.append(record);
    }
    catch (IOException e) {
        throw new UncheckedIOException("Failed to convert to Avro.", e);
    }
    return outputStream.toByteArray();
}
 
Example 19
Source Project: reef   Source File: AvroClassHierarchySerializer.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void toFile(final ClassHierarchy classHierarchy, final File file) throws IOException {
  final AvroNode avroNode = toAvro(classHierarchy);
  final DatumWriter<AvroNode> avroNodeWriter = new SpecificDatumWriter<>(AvroNode.class);
  try (DataFileWriter<AvroNode> dataFileWriter = new DataFileWriter<>(avroNodeWriter)) {
    dataFileWriter.create(avroNode.getSchema(), file);
    dataFileWriter.append(avroNode);
  }
}
 
Example 20
Source Project: mt-flume   Source File: AvroEventSerializer.java    License: Apache License 2.0 5 votes vote down vote up
private void initialize(Event event) throws IOException {
  Schema schema = null;
  String schemaUrl = event.getHeaders().get(AVRO_SCHEMA_URL_HEADER);
  if (schemaUrl != null) {
    schema = schemaCache.get(schemaUrl);
    if (schema == null) {
      schema = loadFromUrl(schemaUrl);
      schemaCache.put(schemaUrl, schema);
    }
  }
  if (schema == null) {
    String schemaString = event.getHeaders().get(AVRO_SCHEMA_LITERAL_HEADER);
    if (schemaString == null) {
      throw new FlumeException("Could not find schema for event " + event);
    }
    schema = new Schema.Parser().parse(schemaString);
  }

  writer = new GenericDatumWriter<Object>(schema);
  dataFileWriter = new DataFileWriter<Object>(writer);

  dataFileWriter.setSyncInterval(syncIntervalBytes);

  try {
    CodecFactory codecFactory = CodecFactory.fromString(compressionCodec);
    dataFileWriter.setCodec(codecFactory);
  } catch (AvroRuntimeException e) {
    logger.warn("Unable to instantiate avro codec with name (" +
        compressionCodec + "). Compression disabled. Exception follows.", e);
  }

  dataFileWriter.create(schema, out);
}
 
Example 21
Source Project: incubator-gobblin   Source File: AvroHdfsDataWriter.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Create a new {@link DataFileWriter} for writing Avro records.
 *
 * @param codecFactory a {@link CodecFactory} object for building the compression codec
 * @throws IOException if there is something wrong creating a new {@link DataFileWriter}
 */
private DataFileWriter<GenericRecord> createDataFileWriter(CodecFactory codecFactory) throws IOException {
  @SuppressWarnings("resource")
  DataFileWriter<GenericRecord> writer = new DataFileWriter<>(this.datumWriter);
  writer.setCodec(codecFactory);

  // Open the file and return the DataFileWriter
  return writer.create(this.schema, this.stagingFileOutputStream);
}
 
Example 22
Source Project: hiped2   Source File: AvroTextMapReduce.java    License: Apache License 2.0 5 votes vote down vote up
public static void writeLinesBytesFile(OutputStream os)
    throws IOException {
  DatumWriter<ByteBuffer>
      writer = new GenericDatumWriter<ByteBuffer>();
  DataFileWriter<ByteBuffer> out =
      new DataFileWriter<ByteBuffer>(writer);
  out.create(Schema.create(Schema.Type.BYTES), os);
  for (String line : LINES) {
    out.append(ByteBuffer.wrap(line.getBytes("UTF-8")));
  }
  out.close();
}
 
Example 23
Source Project: kite   Source File: AvroAppender.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void append(E entity) throws IOException {
  try {
    dataFileWriter.append(entity);
  } catch (DataFileWriter.AppendWriteException e) {
    throw new DatasetRecordException("Failed to append record", e);
  }
}
 
Example 24
Source Project: Cubert   Source File: AvroTeeWriter.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void open(Configuration conf,
                 JsonNode json,
                 BlockSchema schema,
                 Path root,
                 String filename) throws IOException
{
    Path teePath = new Path(root, filename + ".avro");
    FileSystem fs = FileSystem.get(conf);

    Schema avroSchema = AvroUtils.convertFromBlockSchema("record", schema);

    GenericDatumWriter<Object> datumWriter =
            new PigAvroDatumWriter(avroSchema);
    dataFileWriter = new DataFileWriter<Object>(datumWriter);

    // if compression is requested, set the proper compression codec
    if (PhaseContext.getConf().getBoolean("mapred.output.compress", false))
    {
        int level = conf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
        String codecName = conf.get(OUTPUT_CODEC, DEFLATE_CODEC);
        CodecFactory factory =
                codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level)
                        : CodecFactory.fromString(codecName);
        dataFileWriter.setCodec(factory);
    }

    dataFileWriter.create(avroSchema, fs.create(teePath));
}
 
Example 25
Source Project: localization_nifi   Source File: TestUtil.java    License: Apache License 2.0 5 votes vote down vote up
private static byte[] bytesFor(List<Record> records) throws IOException {
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    DataFileWriter<Record> writer = new DataFileWriter<>(
            AvroUtil.newDatumWriter(records.get(0).getSchema(), Record.class));
    writer.setCodec(CodecFactory.snappyCodec());
    writer = writer.create(records.get(0).getSchema(), out);

    for (Record record : records) {
        writer.append(record);
    }

    writer.flush();

    return out.toByteArray();
}
 
Example 26
Source Project: localization_nifi   Source File: TestMergeContent.java    License: Apache License 2.0 5 votes vote down vote up
private ByteArrayOutputStream serializeAvroRecord(Schema schema, GenericRecord user2, DatumWriter<GenericRecord> datumWriter) throws IOException {
    ByteArrayOutputStream out2 = new ByteArrayOutputStream();
    DataFileWriter<GenericRecord> dataFileWriter2 = new DataFileWriter<GenericRecord>(datumWriter);
    dataFileWriter2.create(schema, out2);
    dataFileWriter2.append(user2);
    dataFileWriter2.close();
    return out2;
}
 
Example 27
Source Project: geowave   Source File: StageRunData.java    License: Apache License 2.0 5 votes vote down vote up
public synchronized void close() {
  for (final DataFileWriter dfw : cachedWriters.values()) {
    try {
      dfw.close();
    } catch (final IOException e) {
      LOGGER.warn("Unable to close sequence file stream", e);
    }
  }
  cachedWriters.clear();
}
 
Example 28
Source Project: localization_nifi   Source File: SplitAvro.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void init(final DataFileStream<GenericRecord> reader, final String codec, final OutputStream out) throws IOException {
    writer = new DataFileWriter<>(new GenericDatumWriter<GenericRecord>());

    if (transferMetadata) {
        for (String metaKey : reader.getMetaKeys()) {
            if (!RESERVED_METADATA.contains(metaKey)) {
                writer.setMeta(metaKey, reader.getMeta(metaKey));
            }
        }
    }

    writer.setCodec(CodecFactory.fromString(codec));
    writer.create(reader.getSchema(), out);
}
 
Example 29
Source Project: localization_nifi   Source File: TestExtractAvroMetadata.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testExtractionWithNonRecordSchema() throws IOException {
    final TestRunner runner = TestRunners.newTestRunner(new ExtractAvroMetadata());
    runner.setProperty(ExtractAvroMetadata.COUNT_ITEMS, "true");

    final Schema schema = new Schema.Parser().parse(new File("src/test/resources/array.avsc"));

    final GenericData.Array<String> data = new GenericData.Array<>(schema, Arrays.asList("one", "two", "three"));
    final DatumWriter<GenericData.Array<String>> datumWriter = new GenericDatumWriter<>(schema);

    final ByteArrayOutputStream out = new ByteArrayOutputStream();
    final DataFileWriter<GenericData.Array<String>> dataFileWriter = new DataFileWriter<>(datumWriter);
    dataFileWriter.create(schema, out);
    dataFileWriter.append(data);
    dataFileWriter.append(data);
    dataFileWriter.close();

    runner.enqueue(out.toByteArray());
    runner.run();

    runner.assertAllFlowFilesTransferred(ExtractAvroMetadata.REL_SUCCESS, 1);

    final MockFlowFile flowFile = runner.getFlowFilesForRelationship(ExtractAvroMetadata.REL_SUCCESS).get(0);
    flowFile.assertAttributeExists(ExtractAvroMetadata.SCHEMA_FINGERPRINT_ATTR);
    flowFile.assertAttributeEquals(ExtractAvroMetadata.SCHEMA_TYPE_ATTR, Schema.Type.ARRAY.getName());
    flowFile.assertAttributeEquals(ExtractAvroMetadata.SCHEMA_NAME_ATTR, "array");
    flowFile.assertAttributeEquals(ExtractAvroMetadata.ITEM_COUNT_ATTR, "2"); // number of arrays, not elements
}
 
Example 30
Source Project: localization_nifi   Source File: AvroTestUtil.java    License: Apache License 2.0 5 votes vote down vote up
public static ByteArrayOutputStream serializeAvroRecord(final Schema schema, final DatumWriter<GenericRecord> datumWriter, final GenericRecord... users) throws IOException {
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) {
        dataFileWriter.create(schema, out);
        for (final GenericRecord user : users) {
            dataFileWriter.append(user);
        }
    }
    return out;
}