Java Code Examples for org.apache.avro.file.DataFileWriter#setMeta()

The following examples show how to use org.apache.avro.file.DataFileWriter#setMeta() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroFileAppender.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private static <D> DataFileWriter<D> newAvroWriter(
    Schema schema, OutputFile file, Function<Schema, DatumWriter<?>> createWriterFunc,
    CodecFactory codec, Map<String, String> metadata) throws IOException {
  DataFileWriter<D> writer = new DataFileWriter<>(
      (DatumWriter<D>) createWriterFunc.apply(schema));

  writer.setCodec(codec);

  for (Map.Entry<String, String> entry : metadata.entrySet()) {
    writer.setMeta(entry.getKey(), entry.getValue());
  }

  // TODO: support overwrite
  return writer.create(schema, file.create());
}
 
Example 2
Source File: AvroIO.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public void open(WritableByteChannel channel) throws IOException {
  this.schema = new Schema.Parser().parse(getJsonSchema());
  DataFileWriter<?> writer;
  if (getRecordFormatter() == null) {
    writer = reflectWriter = new DataFileWriter<>(new ReflectDatumWriter<>(schema));
  } else {
    writer = genericWriter = new DataFileWriter<>(new GenericDatumWriter<>(schema));
  }
  writer.setCodec(getCodec().getCodec());
  for (Map.Entry<String, Object> entry : getMetadata().entrySet()) {
    Object v = entry.getValue();
    if (v instanceof String) {
      writer.setMeta(entry.getKey(), (String) v);
    } else if (v instanceof Long) {
      writer.setMeta(entry.getKey(), (Long) v);
    } else if (v instanceof byte[]) {
      writer.setMeta(entry.getKey(), (byte[]) v);
    } else {
      throw new IllegalStateException(
          "Metadata value type must be one of String, Long, or byte[]. Found "
              + v.getClass().getSimpleName());
    }
  }
  writer.create(schema, Channels.newOutputStream(channel));
}
 
Example 3
Source File: AvroAsJsonOutputFormat.java    From iow-hadoop-streaming with Apache License 2.0 6 votes vote down vote up
static <K> void configureDataFileWriter(DataFileWriter<K> writer,
    JobConf job) throws UnsupportedEncodingException {

    if (FileOutputFormat.getCompressOutput(job)) {
        int level = job.getInt(org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY,
                org.apache.avro.mapred.AvroOutputFormat.DEFAULT_DEFLATE_LEVEL);
        String codecName = job.get(AvroJob.OUTPUT_CODEC, DEFLATE_CODEC);
        CodecFactory factory = codecName.equals(DEFLATE_CODEC) ?
            CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName);
        writer.setCodec(factory);
    }

    writer.setSyncInterval(job.getInt(org.apache.avro.mapred.AvroOutputFormat.SYNC_INTERVAL_KEY,
            DEFAULT_SYNC_INTERVAL));

    // copy metadata from job
    for (Map.Entry<String,String> e : job) {
        if (e.getKey().startsWith(AvroJob.TEXT_PREFIX))
            writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()),e.getValue());
        if (e.getKey().startsWith(AvroJob.BINARY_PREFIX))
            writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()),
                   URLDecoder.decode(e.getValue(), "ISO-8859-1")
                   .getBytes("ISO-8859-1"));
    }
}
 
Example 4
Source File: PutHiveStreaming.java    From nifi with Apache License 2.0 6 votes vote down vote up
private byte[] initAvroWriter(ProcessSession session, String codec, DataFileStream<GenericRecord> reader,
                                     DataFileWriter<GenericRecord> writer, AtomicReference<FlowFile> flowFileRef) {

    writer.setCodec(CodecFactory.fromString(codec));
    // Transfer metadata (this is a subset of the incoming file)
    for (String metaKey : reader.getMetaKeys()) {
        if (!RESERVED_METADATA.contains(metaKey)) {
            writer.setMeta(metaKey, reader.getMeta(metaKey));
        }
    }

    final ByteArrayOutputStream avroHeader = new ByteArrayOutputStream();
    flowFileRef.set(session.append(flowFileRef.get(), (out) -> {
        // Create writer so that records can be appended later.
        writer.create(reader.getSchema(), avroHeader);
        writer.close();

        final byte[] header = avroHeader.toByteArray();
        out.write(header);
    }));

    // Capture the Avro header byte array that is just written to the FlowFile.
    // This is needed when Avro records are appended to the same FlowFile.
    return avroHeader.toByteArray();
}
 
Example 5
Source File: AvroFileAppender.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private static <D> DataFileWriter<D> newAvroWriter(
    Schema schema, PositionOutputStream stream, Function<Schema, DatumWriter<?>> createWriterFunc,
    CodecFactory codec, Map<String, String> metadata) throws IOException {
  DataFileWriter<D> writer = new DataFileWriter<>(
      (DatumWriter<D>) createWriterFunc.apply(schema));

  writer.setCodec(codec);

  for (Map.Entry<String, String> entry : metadata.entrySet()) {
    writer.setMeta(entry.getKey(), entry.getValue());
  }

  return writer.create(schema, stream);
}
 
Example 6
Source File: AvroOutputFormat.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
static <T> void configureDataFileWriter(DataFileWriter<T> writer,
  TaskAttemptContext context) throws UnsupportedEncodingException {
  if (FileOutputFormat.getCompressOutput(context)) {
    int level = context.getConfiguration()
      .getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
    String codecName = context.getConfiguration()
      .get(org.apache.avro.mapred.AvroJob.OUTPUT_CODEC, DEFLATE_CODEC);
    CodecFactory factory =
      codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level)
        : CodecFactory.fromString(codecName);
    writer.setCodec(factory);
  }

  writer.setSyncInterval(context.getConfiguration()
    .getInt(SYNC_INTERVAL_KEY, DEFAULT_SYNC_INTERVAL));

  // copy metadata from job
  for (Map.Entry<String, String> e : context.getConfiguration()) {
    if (e.getKey().startsWith(org.apache.avro.mapred.AvroJob.TEXT_PREFIX)) {
      writer.setMeta(e.getKey()
        .substring(org.apache.avro.mapred.AvroJob.TEXT_PREFIX.length()),
        e.getValue());
    }
    if (e.getKey().startsWith(org.apache.avro.mapred.AvroJob.BINARY_PREFIX)) {
      writer.setMeta(e.getKey()
        .substring(org.apache.avro.mapred.AvroJob.BINARY_PREFIX.length()),
        URLDecoder.decode(e.getValue(), "ISO-8859-1").getBytes("ISO-8859-1"));
    }
  }
}
 
Example 7
Source File: AvroRecordWriter.java    From spork with Apache License 2.0 5 votes vote down vote up
static void configureDataFileWriter(DataFileWriter<GenericData.Record> writer,
    JobConf job) throws UnsupportedEncodingException {
  if (FileOutputFormat.getCompressOutput(job)) {
    int level = job.getInt(DEFLATE_LEVEL_KEY,
        DEFAULT_DEFLATE_LEVEL);
    String codecName = job.get(AvroJob.OUTPUT_CODEC, DEFLATE_CODEC);
    CodecFactory factory = codecName.equals(DEFLATE_CODEC)
      ? CodecFactory.deflateCodec(level)
      : CodecFactory.fromString(codecName);
    writer.setCodec(factory);
  }

  // Do max as core-default.xml has io.file.buffer.size as 4K
  writer.setSyncInterval(job.getInt(SYNC_INTERVAL_KEY, Math.max(
          job.getInt("io.file.buffer.size", DEFAULT_SYNC_INTERVAL), DEFAULT_SYNC_INTERVAL)));

  // copy metadata from job
  for (Map.Entry<String,String> e : job) {
    if (e.getKey().startsWith(AvroJob.TEXT_PREFIX))
      writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()),
                     e.getValue());
    if (e.getKey().startsWith(AvroJob.BINARY_PREFIX))
      writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()),
                     URLDecoder.decode(e.getValue(), "ISO-8859-1")
                     .getBytes("ISO-8859-1"));
  }
}
 
Example 8
Source File: PartitionCollapsingExecutionPlannerTests.java    From datafu with Apache License 2.0 5 votes vote down vote up
private void createOutput(DateRange dateRange) throws IOException
{
  DataFileWriter<GenericRecord> dataWriter;
  OutputStream outputStream;
  
  Path path = new Path(_outputPath,PathUtils.datedPathFormat.format(dateRange.getEndDate()));
  
  Schema ouputSchema = Schemas.createRecordSchema(PartitionCollapsingTests.class, "Output",
                                            new Field("id", Schema.create(Type.LONG), "ID", null));
  
  outputStream = getFileSystem().create(new Path(path, "part-00000.avro"));
  
  GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>();
  dataWriter = new DataFileWriter<GenericRecord>(writer);      
  
  dataWriter.setMeta(AvroDateRangeMetadata.METADATA_DATE_START,
                     Long.toString(dateRange.getBeginDate().getTime()));
  
  dataWriter.setMeta(AvroDateRangeMetadata.METADATA_DATE_END,
                     Long.toString(dateRange.getEndDate().getTime()));
  
  dataWriter.create(ouputSchema, outputStream);
      
  // empty file
  
  dataWriter.close();
  outputStream.close();
  dataWriter = null;
  outputStream = null; 
}