Java Code Examples for org.apache.avro.file.CodecFactory#fromString()

The following examples show how to use org.apache.avro.file.CodecFactory#fromString() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroRecordWriter.java    From presto with Apache License 2.0 6 votes vote down vote up
public AvroRecordWriter(Path path, JobConf jobConf, boolean isCompressed, Properties properties)
        throws IOException
{
    Schema schema;
    try {
        schema = AvroSerdeUtils.determineSchemaOrThrowException(jobConf, properties);
    }
    catch (AvroSerdeException e) {
        throw new IOException(e);
    }
    GenericDatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<>(schema);
    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(genericDatumWriter);

    if (isCompressed) {
        int level = jobConf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
        String codecName = jobConf.get(OUTPUT_CODEC, DEFLATE_CODEC);
        CodecFactory factory = codecName.equals(DEFLATE_CODEC)
                ? CodecFactory.deflateCodec(level)
                : CodecFactory.fromString(codecName);
        dataFileWriter.setCodec(factory);
    }

    outputStream = path.getFileSystem(jobConf).create(path);
    dataFileWriter.create(schema, outputStream);
    delegate = new AvroGenericRecordWriter(dataFileWriter);
}
 
Example 2
Source File: AvroKeyValueSinkWriter.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
private CodecFactory getCompressionCodec(Map<String, String> conf) {
	if (getBoolean(conf, CONF_COMPRESS, false)) {
		int deflateLevel = getInt(conf, CONF_DEFLATE_LEVEL, CodecFactory.DEFAULT_DEFLATE_LEVEL);
		int xzLevel = getInt(conf, CONF_XZ_LEVEL, CodecFactory.DEFAULT_XZ_LEVEL);

		String outputCodec = conf.get(CONF_COMPRESS_CODEC);

		if (DataFileConstants.DEFLATE_CODEC.equals(outputCodec)) {
			return CodecFactory.deflateCodec(deflateLevel);
		} else if (DataFileConstants.XZ_CODEC.equals(outputCodec)) {
			return CodecFactory.xzCodec(xzLevel);
		} else {
			return CodecFactory.fromString(outputCodec);
		}
	}
	return CodecFactory.nullCodec();
}
 
Example 3
Source File: AvroKeyValueSinkWriter.java    From flink with Apache License 2.0 6 votes vote down vote up
private CodecFactory getCompressionCodec(Map<String, String> conf) {
	if (getBoolean(conf, CONF_COMPRESS, false)) {
		int deflateLevel = getInt(conf, CONF_DEFLATE_LEVEL, CodecFactory.DEFAULT_DEFLATE_LEVEL);
		int xzLevel = getInt(conf, CONF_XZ_LEVEL, CodecFactory.DEFAULT_XZ_LEVEL);

		String outputCodec = conf.get(CONF_COMPRESS_CODEC);

		if (DataFileConstants.DEFLATE_CODEC.equals(outputCodec)) {
			return CodecFactory.deflateCodec(deflateLevel);
		} else if (DataFileConstants.XZ_CODEC.equals(outputCodec)) {
			return CodecFactory.xzCodec(xzLevel);
		} else {
			return CodecFactory.fromString(outputCodec);
		}
	}
	return CodecFactory.nullCodec();
}
 
Example 4
Source File: AbstractAvroEventSerializer.java    From mt-flume with Apache License 2.0 6 votes vote down vote up
@Override
public void configure(Context context) {

  int syncIntervalBytes =
      context.getInteger(SYNC_INTERVAL_BYTES, DEFAULT_SYNC_INTERVAL_BYTES);
  String compressionCodec =
      context.getString(COMPRESSION_CODEC, DEFAULT_COMPRESSION_CODEC);

  writer = new ReflectDatumWriter<T>(getSchema());
  dataFileWriter = new DataFileWriter<T>(writer);

  dataFileWriter.setSyncInterval(syncIntervalBytes);

  try {
    CodecFactory codecFactory = CodecFactory.fromString(compressionCodec);
    dataFileWriter.setCodec(codecFactory);
  } catch (AvroRuntimeException e) {
    logger.warn("Unable to instantiate avro codec with name (" +
        compressionCodec + "). Compression disabled. Exception follows.", e);
  }
}
 
Example 5
Source File: SerializableAvroCodecFactory.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
  final String codecStr = in.readUTF();

  switch (codecStr) {
    case NULL_CODEC:
    case SNAPPY_CODEC:
    case BZIP2_CODEC:
      codecFactory = CodecFactory.fromString(codecStr);
      return;
  }

  Matcher deflateMatcher = deflatePattern.matcher(codecStr);
  if (deflateMatcher.find()) {
    codecFactory = CodecFactory.deflateCodec(Integer.parseInt(deflateMatcher.group("level")));
    return;
  }

  Matcher xzMatcher = xzPattern.matcher(codecStr);
  if (xzMatcher.find()) {
    codecFactory = CodecFactory.xzCodec(Integer.parseInt(xzMatcher.group("level")));
    return;
  }

  throw new IllegalStateException(codecStr + " is not supported");
}
 
Example 6
Source File: AvroAsJsonOutputFormat.java    From iow-hadoop-streaming with Apache License 2.0 6 votes vote down vote up
static <K> void configureDataFileWriter(DataFileWriter<K> writer,
    JobConf job) throws UnsupportedEncodingException {

    if (FileOutputFormat.getCompressOutput(job)) {
        int level = job.getInt(org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY,
                org.apache.avro.mapred.AvroOutputFormat.DEFAULT_DEFLATE_LEVEL);
        String codecName = job.get(AvroJob.OUTPUT_CODEC, DEFLATE_CODEC);
        CodecFactory factory = codecName.equals(DEFLATE_CODEC) ?
            CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName);
        writer.setCodec(factory);
    }

    writer.setSyncInterval(job.getInt(org.apache.avro.mapred.AvroOutputFormat.SYNC_INTERVAL_KEY,
            DEFAULT_SYNC_INTERVAL));

    // copy metadata from job
    for (Map.Entry<String,String> e : job) {
        if (e.getKey().startsWith(AvroJob.TEXT_PREFIX))
            writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()),e.getValue());
        if (e.getKey().startsWith(AvroJob.BINARY_PREFIX))
            writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()),
                   URLDecoder.decode(e.getValue(), "ISO-8859-1")
                   .getBytes("ISO-8859-1"));
    }
}
 
Example 7
Source File: WriteAvroToByteArrayBuilder.java    From kite with Apache License 2.0 6 votes vote down vote up
public WriteAvroToByteArray(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
  super(builder, config, parent, child, context);      
  this.format = new Validator<Format>().validateEnum(
      config,
      getConfigs().getString(config, "format", Format.container.toString()),
      Format.class);
  
  String codec = getConfigs().getString(config, "codec", null);
  if (codec == null) {
    this.codecFactory = null;
  } else {
    this.codecFactory = CodecFactory.fromString(codec);
  }
  
  Config metadataConfig = getConfigs().getConfig(config, "metadata", ConfigFactory.empty());
  for (Map.Entry<String, Object> entry : new Configs().getEntrySet(metadataConfig)) {
    this.metadata.put(entry.getKey(), entry.getValue().toString());
  }
  
  validateArguments();
}
 
Example 8
Source File: AvroKeyValueSinkWriter.java    From flink with Apache License 2.0 6 votes vote down vote up
private CodecFactory getCompressionCodec(Map<String, String> conf) {
	if (getBoolean(conf, CONF_COMPRESS, false)) {
		int deflateLevel = getInt(conf, CONF_DEFLATE_LEVEL, CodecFactory.DEFAULT_DEFLATE_LEVEL);
		int xzLevel = getInt(conf, CONF_XZ_LEVEL, CodecFactory.DEFAULT_XZ_LEVEL);

		String outputCodec = conf.get(CONF_COMPRESS_CODEC);

		if (DataFileConstants.DEFLATE_CODEC.equals(outputCodec)) {
			return CodecFactory.deflateCodec(deflateLevel);
		} else if (DataFileConstants.XZ_CODEC.equals(outputCodec)) {
			return CodecFactory.xzCodec(xzLevel);
		} else {
			return CodecFactory.fromString(outputCodec);
		}
	}
	return CodecFactory.nullCodec();
}
 
Example 9
Source File: AvroEventSerializer.java    From mt-flume with Apache License 2.0 5 votes vote down vote up
private void initialize(Event event) throws IOException {
  Schema schema = null;
  String schemaUrl = event.getHeaders().get(AVRO_SCHEMA_URL_HEADER);
  if (schemaUrl != null) {
    schema = schemaCache.get(schemaUrl);
    if (schema == null) {
      schema = loadFromUrl(schemaUrl);
      schemaCache.put(schemaUrl, schema);
    }
  }
  if (schema == null) {
    String schemaString = event.getHeaders().get(AVRO_SCHEMA_LITERAL_HEADER);
    if (schemaString == null) {
      throw new FlumeException("Could not find schema for event " + event);
    }
    schema = new Schema.Parser().parse(schemaString);
  }

  writer = new GenericDatumWriter<Object>(schema);
  dataFileWriter = new DataFileWriter<Object>(writer);

  dataFileWriter.setSyncInterval(syncIntervalBytes);

  try {
    CodecFactory codecFactory = CodecFactory.fromString(compressionCodec);
    dataFileWriter.setCodec(codecFactory);
  } catch (AvroRuntimeException e) {
    logger.warn("Unable to instantiate avro codec with name (" +
        compressionCodec + "). Compression disabled. Exception follows.", e);
  }

  dataFileWriter.create(schema, out);
}
 
Example 10
Source File: AvroConfiguration.java    From data-highway with Apache License 2.0 5 votes vote down vote up
@Bean
CodecFactory codecFactory(
    @Value("${avroCodec.name:deflate}") String codecName,
    @Value("${avroCodec.level:3}") String compressionLevel) {
  switch (codecName) {
  case DEFLATE_CODEC:
    return CodecFactory.deflateCodec(level(compressionLevel, DEFAULT_DEFLATE_LEVEL));
  case XZ_CODEC:
    return CodecFactory.xzCodec(level(compressionLevel, DEFAULT_XZ_LEVEL));
  default:
    return CodecFactory.fromString(codecName);
  }
}
 
Example 11
Source File: PigAvroOutputFormat.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public RecordWriter<NullWritable, Object> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {

    if (schema == null)
        throw new IOException("Must provide a schema");

    Configuration conf = context.getConfiguration();

    DataFileWriter<Object> writer = new DataFileWriter<Object>(new PigAvroDatumWriter(schema));

    if (FileOutputFormat.getCompressOutput(context)) {
        int level = conf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
        String codecName = conf.get(OUTPUT_CODEC, DEFLATE_CODEC);
        CodecFactory factory = codecName.equals(DEFLATE_CODEC)
            ? CodecFactory.deflateCodec(level)
            : CodecFactory.fromString(codecName);
        writer.setCodec(factory);
    }

    // Do max as core-default.xml has io.file.buffer.size as 4K
    writer.setSyncInterval(conf.getInt(SYNC_INTERVAL_KEY, Math.max(
            conf.getInt("io.file.buffer.size", DEFAULT_SYNC_INTERVAL), DEFAULT_SYNC_INTERVAL)));

    Path path = getDefaultWorkFile(context, EXT);
    writer.create(schema, path.getFileSystem(conf).create(path));
    return new PigAvroRecordWriter(writer);
}
 
Example 12
Source File: AvroRecordWriter.java    From spork with Apache License 2.0 5 votes vote down vote up
static void configureDataFileWriter(DataFileWriter<GenericData.Record> writer,
    JobConf job) throws UnsupportedEncodingException {
  if (FileOutputFormat.getCompressOutput(job)) {
    int level = job.getInt(DEFLATE_LEVEL_KEY,
        DEFAULT_DEFLATE_LEVEL);
    String codecName = job.get(AvroJob.OUTPUT_CODEC, DEFLATE_CODEC);
    CodecFactory factory = codecName.equals(DEFLATE_CODEC)
      ? CodecFactory.deflateCodec(level)
      : CodecFactory.fromString(codecName);
    writer.setCodec(factory);
  }

  // Do max as core-default.xml has io.file.buffer.size as 4K
  writer.setSyncInterval(job.getInt(SYNC_INTERVAL_KEY, Math.max(
          job.getInt("io.file.buffer.size", DEFAULT_SYNC_INTERVAL), DEFAULT_SYNC_INTERVAL)));

  // copy metadata from job
  for (Map.Entry<String,String> e : job) {
    if (e.getKey().startsWith(AvroJob.TEXT_PREFIX))
      writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()),
                     e.getValue());
    if (e.getKey().startsWith(AvroJob.BINARY_PREFIX))
      writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()),
                     URLDecoder.decode(e.getValue(), "ISO-8859-1")
                     .getBytes("ISO-8859-1"));
  }
}
 
Example 13
Source File: AvroFileReaderWriterFactory.java    From secor with Apache License 2.0 5 votes vote down vote up
private CodecFactory getCodecFactory(CompressionCodec codec) {
    CompressionCodecName codecName = CompressionCodecName
            .fromCompressionCodec(codec != null ? codec.getClass() : null);
    try {
        return CodecFactory.fromString(codecName.name().toLowerCase());
    } catch (AvroRuntimeException e) {
        LOG.error("Error creating codec factory", e);
    }
    return CodecFactory.fromString("null");
}
 
Example 14
Source File: WriterUtils.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a {@link CodecFactory} based on the specified codec name and deflate level. If codecName is absent, then
 * a {@link CodecFactory#deflateCodec(int)} is returned. Otherwise the codecName is converted into a
 * {@link CodecFactory} via the {@link CodecFactory#fromString(String)} method.
 *
 * @param codecName the name of the codec to use (e.g. deflate, snappy, xz, etc.).
 * @param deflateLevel must be an integer from [0-9], and is only applicable if the codecName is "deflate".
 * @return a {@link CodecFactory}.
 */
public static CodecFactory getCodecFactory(Optional<String> codecName, Optional<String> deflateLevel) {
  if (!codecName.isPresent()) {
    return CodecFactory.deflateCodec(ConfigurationKeys.DEFAULT_DEFLATE_LEVEL);
  } else if (codecName.get().equalsIgnoreCase(DataFileConstants.DEFLATE_CODEC)) {
    if (!deflateLevel.isPresent()) {
      return CodecFactory.deflateCodec(ConfigurationKeys.DEFAULT_DEFLATE_LEVEL);
    }
    return CodecFactory.deflateCodec(Integer.parseInt(deflateLevel.get()));
  } else {
    return CodecFactory.fromString(codecName.get().toLowerCase());
  }
}
 
Example 15
Source File: AvroTeeWriter.java    From Cubert with Apache License 2.0 5 votes vote down vote up
@Override
public void open(Configuration conf,
                 JsonNode json,
                 BlockSchema schema,
                 Path root,
                 String filename) throws IOException
{
    Path teePath = new Path(root, filename + ".avro");
    FileSystem fs = FileSystem.get(conf);

    Schema avroSchema = AvroUtils.convertFromBlockSchema("record", schema);

    GenericDatumWriter<Object> datumWriter =
            new PigAvroDatumWriter(avroSchema);
    dataFileWriter = new DataFileWriter<Object>(datumWriter);

    // if compression is requested, set the proper compression codec
    if (PhaseContext.getConf().getBoolean("mapred.output.compress", false))
    {
        int level = conf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
        String codecName = conf.get(OUTPUT_CODEC, DEFLATE_CODEC);
        CodecFactory factory =
                codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level)
                        : CodecFactory.fromString(codecName);
        dataFileWriter.setCodec(factory);
    }

    dataFileWriter.create(avroSchema, fs.create(teePath));
}
 
Example 16
Source File: PigAvroOutputFormat.java    From Cubert with Apache License 2.0 5 votes vote down vote up
@Override
public RecordWriter<NullWritable, Object> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {

    if (schema == null)
        throw new IOException("Must provide a schema");

    Configuration conf = context.getConfiguration();

    DataFileWriter<Object> writer = new DataFileWriter<Object>(new PigAvroDatumWriter(schema));

    if (FileOutputFormat.getCompressOutput(context)) {
        int level = conf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
        String codecName = conf.get(OUTPUT_CODEC, DEFLATE_CODEC);
        CodecFactory factory = codecName.equals(DEFLATE_CODEC)
            ? CodecFactory.deflateCodec(level)
            : CodecFactory.fromString(codecName);
        writer.setCodec(factory);
    }

    // Do max as core-default.xml has io.file.buffer.size as 4K
    writer.setSyncInterval(conf.getInt(SYNC_INTERVAL_KEY, Math.max(
            conf.getInt("io.file.buffer.size", DEFAULT_SYNC_INTERVAL), DEFAULT_SYNC_INTERVAL)));

    Path path = getDefaultWorkFile(context, EXT);
    writer.create(schema, path.getFileSystem(conf).create(path));
    return new PigAvroRecordWriter(writer);
}
 
Example 17
Source File: Purge.java    From Cubert with Apache License 2.0 5 votes vote down vote up
private DataFileWriter<GenericRecord> createDataFileWriter(DataFileReader<GenericRecord> dataFileReader) throws IllegalArgumentException,
        IOException
{
    Schema schema = dataFileReader.getSchema();
    DatumWriter<GenericRecord> datumWriter =
            new GenericDatumWriter<GenericRecord>(schema);
    DataFileWriter<GenericRecord> writer =
            new DataFileWriter<GenericRecord>(datumWriter);

    // Get the codec of the reader
    String codecStr = dataFileReader.getMetaString(DataFileConstants.CODEC);
    int level = conf.getInt("avro.mapred.deflate.level", 1);
    String codecName = conf.get("avro.output.codec", codecStr);
    CodecFactory factory =
            codecName.equals("deflate") ? CodecFactory.deflateCodec(level)
                    : CodecFactory.fromString(codecName);

    // Set the codec of the writer
    writer.setCodec(factory);

    writer.setSyncInterval(conf.getInt("avro.mapred.sync.interval",
                                       Math.max(conf.getInt("io.file.buffer.size",
                                                            16000), 16000)));

    writer.create(schema,
                  new Path(tempFileName).getFileSystem(conf)
                                        .create(new Path(tempFileName)));
    return writer;
}
 
Example 18
Source File: SerializableAvroCodecFactoryTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testDefaultCodecsIn() throws Exception {
  for (String codec : avroCodecs) {
    SerializableAvroCodecFactory codecFactory =
        new SerializableAvroCodecFactory(CodecFactory.fromString(codec));

    assertEquals(CodecFactory.fromString(codec).toString(), codecFactory.getCodec().toString());
  }
}
 
Example 19
Source File: AvroEventSerializer.java    From Transwarp-Sample-Code with MIT License 5 votes vote down vote up
private void initialize(Event event) throws IOException {
  Schema schema = null;
  String schemaUrl = event.getHeaders().get(AVRO_SCHEMA_URL_HEADER);
  if (schemaUrl != null) {
    schema = schemaCache.get(schemaUrl);
    if (schema == null) {
      schema = loadFromUrl(schemaUrl);
      schemaCache.put(schemaUrl, schema);
    }
  }
  if (schema == null) {
    String schemaString = event.getHeaders().get(AVRO_SCHEMA_LITERAL_HEADER);
    if (schemaString == null) {
      throw new FlumeException("Could not find schema for event " + event);
    }
    schema = new Schema.Parser().parse(schemaString);
  }

  writer = new GenericDatumWriter<Object>(schema);
  dataFileWriter = new DataFileWriter<Object>(writer);

  dataFileWriter.setSyncInterval(syncIntervalBytes);

  try {
    CodecFactory codecFactory = CodecFactory.fromString(compressionCodec);
    dataFileWriter.setCodec(codecFactory);
  } catch (AvroRuntimeException e) {
    logger.warn("Unable to instantiate avro codec with name (" +
        compressionCodec + "). Compression disabled. Exception follows.", e);
  }

  dataFileWriter.create(schema, out);
}
 
Example 20
Source File: AvroOutputFormat.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
static <T> void configureDataFileWriter(DataFileWriter<T> writer,
  TaskAttemptContext context) throws UnsupportedEncodingException {
  if (FileOutputFormat.getCompressOutput(context)) {
    int level = context.getConfiguration()
      .getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
    String codecName = context.getConfiguration()
      .get(org.apache.avro.mapred.AvroJob.OUTPUT_CODEC, DEFLATE_CODEC);
    CodecFactory factory =
      codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level)
        : CodecFactory.fromString(codecName);
    writer.setCodec(factory);
  }

  writer.setSyncInterval(context.getConfiguration()
    .getInt(SYNC_INTERVAL_KEY, DEFAULT_SYNC_INTERVAL));

  // copy metadata from job
  for (Map.Entry<String, String> e : context.getConfiguration()) {
    if (e.getKey().startsWith(org.apache.avro.mapred.AvroJob.TEXT_PREFIX)) {
      writer.setMeta(e.getKey()
        .substring(org.apache.avro.mapred.AvroJob.TEXT_PREFIX.length()),
        e.getValue());
    }
    if (e.getKey().startsWith(org.apache.avro.mapred.AvroJob.BINARY_PREFIX)) {
      writer.setMeta(e.getKey()
        .substring(org.apache.avro.mapred.AvroJob.BINARY_PREFIX.length()),
        URLDecoder.decode(e.getValue(), "ISO-8859-1").getBytes("ISO-8859-1"));
    }
  }
}