Java Code Examples for org.apache.avro.file.DataFileWriter#setSyncInterval()

The following examples show how to use org.apache.avro.file.DataFileWriter#setSyncInterval() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroAsJsonOutputFormat.java    From iow-hadoop-streaming with Apache License 2.0 6 votes vote down vote up
static <K> void configureDataFileWriter(DataFileWriter<K> writer,
    JobConf job) throws UnsupportedEncodingException {

    if (FileOutputFormat.getCompressOutput(job)) {
        int level = job.getInt(org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY,
                org.apache.avro.mapred.AvroOutputFormat.DEFAULT_DEFLATE_LEVEL);
        String codecName = job.get(AvroJob.OUTPUT_CODEC, DEFLATE_CODEC);
        CodecFactory factory = codecName.equals(DEFLATE_CODEC) ?
            CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName);
        writer.setCodec(factory);
    }

    writer.setSyncInterval(job.getInt(org.apache.avro.mapred.AvroOutputFormat.SYNC_INTERVAL_KEY,
            DEFAULT_SYNC_INTERVAL));

    // copy metadata from job
    for (Map.Entry<String,String> e : job) {
        if (e.getKey().startsWith(AvroJob.TEXT_PREFIX))
            writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()),e.getValue());
        if (e.getKey().startsWith(AvroJob.BINARY_PREFIX))
            writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()),
                   URLDecoder.decode(e.getValue(), "ISO-8859-1")
                   .getBytes("ISO-8859-1"));
    }
}
 
Example 2
Source File: AvroOutputFormat.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
static <T> void configureDataFileWriter(DataFileWriter<T> writer,
  TaskAttemptContext context) throws UnsupportedEncodingException {
  if (FileOutputFormat.getCompressOutput(context)) {
    int level = context.getConfiguration()
      .getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
    String codecName = context.getConfiguration()
      .get(org.apache.avro.mapred.AvroJob.OUTPUT_CODEC, DEFLATE_CODEC);
    CodecFactory factory =
      codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level)
        : CodecFactory.fromString(codecName);
    writer.setCodec(factory);
  }

  writer.setSyncInterval(context.getConfiguration()
    .getInt(SYNC_INTERVAL_KEY, DEFAULT_SYNC_INTERVAL));

  // copy metadata from job
  for (Map.Entry<String, String> e : context.getConfiguration()) {
    if (e.getKey().startsWith(org.apache.avro.mapred.AvroJob.TEXT_PREFIX)) {
      writer.setMeta(e.getKey()
        .substring(org.apache.avro.mapred.AvroJob.TEXT_PREFIX.length()),
        e.getValue());
    }
    if (e.getKey().startsWith(org.apache.avro.mapred.AvroJob.BINARY_PREFIX)) {
      writer.setMeta(e.getKey()
        .substring(org.apache.avro.mapred.AvroJob.BINARY_PREFIX.length()),
        URLDecoder.decode(e.getValue(), "ISO-8859-1").getBytes("ISO-8859-1"));
    }
  }
}
 
Example 3
Source File: TestAvroDataFileParser.java    From datacollector with Apache License 2.0 5 votes vote down vote up
@Test
public void testIncorrectOffset() throws Exception {
  File avroDataFile = SdcAvroTestUtil.createAvroDataFile();
  avroDataFile.delete();
  Schema schema = new Schema.Parser().parse(AVRO_SCHEMA);
  DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
  DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter);
  dataFileWriter.create(schema, avroDataFile);
  for (int i = 0; i < 5; i++) {
    GenericRecord r = new GenericData.Record(schema);
    r.put("name", NAMES[i % NAMES.length]);
    r.put("id", i);
    dataFileWriter.setSyncInterval(1073741824);
    dataFileWriter.append(r);
    dataFileWriter.sync();
  }
  dataFileWriter.flush();
  dataFileWriter.close();
  DataParserFactoryBuilder dataParserFactoryBuilder = new DataParserFactoryBuilder(getContext(),
    DataParserFormat.AVRO);
  DataParserFactory factory = dataParserFactoryBuilder
      .setMaxDataLen(1024 * 1024)
      .setOverRunLimit(1000 * 1000)
      .setConfig(SCHEMA_SOURCE_KEY, SOURCE)
      .build();
  DataParser dataParser = factory.getParser(avroDataFile, null);
  Map<String, Record> records = new HashMap<>();
  Record record;
  while((record = dataParser.parse()) != null) {
    records.put(dataParser.getOffset(), record);
  }
  Assert.assertEquals(String.valueOf(records), 5, records.size());
  Assert.assertEquals(0, records.get("141::1").get("/id").getValueAsInteger());
  Assert.assertEquals(1, records.get("166::1").get("/id").getValueAsInteger());
  Assert.assertEquals(2, records.get("190::1").get("/id").getValueAsInteger());
  Assert.assertEquals(3, records.get("215::1").get("/id").getValueAsInteger());
  Assert.assertEquals(4, records.get("239::1").get("/id").getValueAsInteger());
}
 
Example 4
Source File: Purge.java    From Cubert with Apache License 2.0 5 votes vote down vote up
private DataFileWriter<GenericRecord> createDataFileWriter(DataFileReader<GenericRecord> dataFileReader) throws IllegalArgumentException,
        IOException
{
    Schema schema = dataFileReader.getSchema();
    DatumWriter<GenericRecord> datumWriter =
            new GenericDatumWriter<GenericRecord>(schema);
    DataFileWriter<GenericRecord> writer =
            new DataFileWriter<GenericRecord>(datumWriter);

    // Get the codec of the reader
    String codecStr = dataFileReader.getMetaString(DataFileConstants.CODEC);
    int level = conf.getInt("avro.mapred.deflate.level", 1);
    String codecName = conf.get("avro.output.codec", codecStr);
    CodecFactory factory =
            codecName.equals("deflate") ? CodecFactory.deflateCodec(level)
                    : CodecFactory.fromString(codecName);

    // Set the codec of the writer
    writer.setCodec(factory);

    writer.setSyncInterval(conf.getInt("avro.mapred.sync.interval",
                                       Math.max(conf.getInt("io.file.buffer.size",
                                                            16000), 16000)));

    writer.create(schema,
                  new Path(tempFileName).getFileSystem(conf)
                                        .create(new Path(tempFileName)));
    return writer;
}
 
Example 5
Source File: PigAvroOutputFormat.java    From Cubert with Apache License 2.0 5 votes vote down vote up
@Override
public RecordWriter<NullWritable, Object> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {

    if (schema == null)
        throw new IOException("Must provide a schema");

    Configuration conf = context.getConfiguration();

    DataFileWriter<Object> writer = new DataFileWriter<Object>(new PigAvroDatumWriter(schema));

    if (FileOutputFormat.getCompressOutput(context)) {
        int level = conf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
        String codecName = conf.get(OUTPUT_CODEC, DEFLATE_CODEC);
        CodecFactory factory = codecName.equals(DEFLATE_CODEC)
            ? CodecFactory.deflateCodec(level)
            : CodecFactory.fromString(codecName);
        writer.setCodec(factory);
    }

    // Do max as core-default.xml has io.file.buffer.size as 4K
    writer.setSyncInterval(conf.getInt(SYNC_INTERVAL_KEY, Math.max(
            conf.getInt("io.file.buffer.size", DEFAULT_SYNC_INTERVAL), DEFAULT_SYNC_INTERVAL)));

    Path path = getDefaultWorkFile(context, EXT);
    writer.create(schema, path.getFileSystem(conf).create(path));
    return new PigAvroRecordWriter(writer);
}
 
Example 6
Source File: AvroRecordWriter.java    From spork with Apache License 2.0 5 votes vote down vote up
static void configureDataFileWriter(DataFileWriter<GenericData.Record> writer,
    JobConf job) throws UnsupportedEncodingException {
  if (FileOutputFormat.getCompressOutput(job)) {
    int level = job.getInt(DEFLATE_LEVEL_KEY,
        DEFAULT_DEFLATE_LEVEL);
    String codecName = job.get(AvroJob.OUTPUT_CODEC, DEFLATE_CODEC);
    CodecFactory factory = codecName.equals(DEFLATE_CODEC)
      ? CodecFactory.deflateCodec(level)
      : CodecFactory.fromString(codecName);
    writer.setCodec(factory);
  }

  // Do max as core-default.xml has io.file.buffer.size as 4K
  writer.setSyncInterval(job.getInt(SYNC_INTERVAL_KEY, Math.max(
          job.getInt("io.file.buffer.size", DEFAULT_SYNC_INTERVAL), DEFAULT_SYNC_INTERVAL)));

  // copy metadata from job
  for (Map.Entry<String,String> e : job) {
    if (e.getKey().startsWith(AvroJob.TEXT_PREFIX))
      writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()),
                     e.getValue());
    if (e.getKey().startsWith(AvroJob.BINARY_PREFIX))
      writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()),
                     URLDecoder.decode(e.getValue(), "ISO-8859-1")
                     .getBytes("ISO-8859-1"));
  }
}
 
Example 7
Source File: PigAvroOutputFormat.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public RecordWriter<NullWritable, Object> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {

    if (schema == null)
        throw new IOException("Must provide a schema");

    Configuration conf = context.getConfiguration();

    DataFileWriter<Object> writer = new DataFileWriter<Object>(new PigAvroDatumWriter(schema));

    if (FileOutputFormat.getCompressOutput(context)) {
        int level = conf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
        String codecName = conf.get(OUTPUT_CODEC, DEFLATE_CODEC);
        CodecFactory factory = codecName.equals(DEFLATE_CODEC)
            ? CodecFactory.deflateCodec(level)
            : CodecFactory.fromString(codecName);
        writer.setCodec(factory);
    }

    // Do max as core-default.xml has io.file.buffer.size as 4K
    writer.setSyncInterval(conf.getInt(SYNC_INTERVAL_KEY, Math.max(
            conf.getInt("io.file.buffer.size", DEFAULT_SYNC_INTERVAL), DEFAULT_SYNC_INTERVAL)));

    Path path = getDefaultWorkFile(context, EXT);
    writer.create(schema, path.getFileSystem(conf).create(path));
    return new PigAvroRecordWriter(writer);
}