org.apache.avro.file.CodecFactory Java Examples

The following examples show how to use org.apache.avro.file.CodecFactory. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AvroKeyValueFileWrite.java    From hiped2 with Apache License 2.0 6 votes vote down vote up
public static void writeToAvro(File inputFile, OutputStream outputStream)
    throws IOException {

  DataFileWriter<GenericRecord> writer =
      new DataFileWriter<GenericRecord>(
          new GenericDatumWriter<GenericRecord>());

  writer.setCodec(CodecFactory.snappyCodec());
  writer.create(SCHEMA, outputStream);

  for (Stock stock : AvroStockUtils.fromCsvFile(inputFile)) {

    AvroKeyValue<CharSequence, Stock> record
        = new AvroKeyValue<CharSequence, Stock>(new GenericData.Record(SCHEMA));
    record.setKey(stock.getSymbol());
    record.setValue(stock);

    writer.append(record.get());
  }

  IOUtils.closeStream(writer);
  IOUtils.closeStream(outputStream);
}
 
Example #2
Source File: AvroKeyValueSinkWriter.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
AvroKeyValueWriter(Schema keySchema, Schema valueSchema,
		CodecFactory compressionCodec, OutputStream outputStream,
		int syncInterval) throws IOException {
	// Create the generic record schema for the key/value pair.
	mKeyValuePairSchema = AvroKeyValue
			.getSchema(keySchema, valueSchema);

	// Create an Avro container file and a writer to it.
	DatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<GenericRecord>(
			mKeyValuePairSchema);
	mAvroFileWriter = new DataFileWriter<GenericRecord>(
			genericDatumWriter);
	mAvroFileWriter.setCodec(compressionCodec);
	mAvroFileWriter.setSyncInterval(syncInterval);
	mAvroFileWriter.create(mKeyValuePairSchema, outputStream);

	// Create a reusable output record.
	mOutputRecord = new AvroKeyValue<Object, Object>(
			new GenericData.Record(mKeyValuePairSchema));
}
 
Example #3
Source File: AvroAppender.java    From kite with Apache License 2.0 6 votes vote down vote up
private CodecFactory getCodecFactory() {
  switch (compressionType) {
    case Snappy:
      return CodecFactory.snappyCodec();

    case Deflate:
      return CodecFactory.deflateCodec(9);

    case Bzip2:
      return CodecFactory.bzip2Codec();

    default:
      throw new IllegalArgumentException(String.format(
          "Unsupported compression format %s. Supported formats: %s",
          compressionType.getName(), Arrays.toString(
              Formats.AVRO.getSupportedCompressionTypes().toArray())));
  }
}
 
Example #4
Source File: AvroKeyValueSinkWriter.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Override
@SuppressWarnings("deprecation")
public void open(FileSystem fs, Path path) throws IOException {
	super.open(fs, path);

	try {
		CodecFactory compressionCodec = getCompressionCodec(properties);
		Schema keySchema = Schema.parse(properties.get(CONF_OUTPUT_KEY_SCHEMA));
		Schema valueSchema = Schema.parse(properties.get(CONF_OUTPUT_VALUE_SCHEMA));
		keyValueWriter = new AvroKeyValueWriter<K, V>(
			keySchema,
			valueSchema,
			compressionCodec,
			getStream());
	} finally {
		if (keyValueWriter == null) {
			close();
		}
	}
}
 
Example #5
Source File: AvroKeyValueSinkWriter.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
private CodecFactory getCompressionCodec(Map<String, String> conf) {
	if (getBoolean(conf, CONF_COMPRESS, false)) {
		int deflateLevel = getInt(conf, CONF_DEFLATE_LEVEL, CodecFactory.DEFAULT_DEFLATE_LEVEL);
		int xzLevel = getInt(conf, CONF_XZ_LEVEL, CodecFactory.DEFAULT_XZ_LEVEL);

		String outputCodec = conf.get(CONF_COMPRESS_CODEC);

		if (DataFileConstants.DEFLATE_CODEC.equals(outputCodec)) {
			return CodecFactory.deflateCodec(deflateLevel);
		} else if (DataFileConstants.XZ_CODEC.equals(outputCodec)) {
			return CodecFactory.xzCodec(xzLevel);
		} else {
			return CodecFactory.fromString(outputCodec);
		}
	}
	return CodecFactory.nullCodec();
}
 
Example #6
Source File: AvroKeyValueSinkWriter.java    From flink with Apache License 2.0 6 votes vote down vote up
AvroKeyValueWriter(Schema keySchema, Schema valueSchema,
		CodecFactory compressionCodec, OutputStream outputStream,
		int syncInterval) throws IOException {
	// Create the generic record schema for the key/value pair.
	mKeyValuePairSchema = AvroKeyValue
			.getSchema(keySchema, valueSchema);

	// Create an Avro container file and a writer to it.
	DatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<GenericRecord>(
			mKeyValuePairSchema);
	mAvroFileWriter = new DataFileWriter<GenericRecord>(
			genericDatumWriter);
	mAvroFileWriter.setCodec(compressionCodec);
	mAvroFileWriter.setSyncInterval(syncInterval);
	mAvroFileWriter.create(mKeyValuePairSchema, outputStream);

	// Create a reusable output record.
	mOutputRecord = new AvroKeyValue<Object, Object>(
			new GenericData.Record(mKeyValuePairSchema));
}
 
Example #7
Source File: AvroKeyValueSinkWriter.java    From flink with Apache License 2.0 6 votes vote down vote up
private CodecFactory getCompressionCodec(Map<String, String> conf) {
	if (getBoolean(conf, CONF_COMPRESS, false)) {
		int deflateLevel = getInt(conf, CONF_DEFLATE_LEVEL, CodecFactory.DEFAULT_DEFLATE_LEVEL);
		int xzLevel = getInt(conf, CONF_XZ_LEVEL, CodecFactory.DEFAULT_XZ_LEVEL);

		String outputCodec = conf.get(CONF_COMPRESS_CODEC);

		if (DataFileConstants.DEFLATE_CODEC.equals(outputCodec)) {
			return CodecFactory.deflateCodec(deflateLevel);
		} else if (DataFileConstants.XZ_CODEC.equals(outputCodec)) {
			return CodecFactory.xzCodec(xzLevel);
		} else {
			return CodecFactory.fromString(outputCodec);
		}
	}
	return CodecFactory.nullCodec();
}
 
Example #8
Source File: AvroKeyValueSinkWriter.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
@SuppressWarnings("deprecation")
public void open(FileSystem fs, Path path) throws IOException {
	super.open(fs, path);

	try {
		CodecFactory compressionCodec = getCompressionCodec(properties);
		Schema keySchema = Schema.parse(properties.get(CONF_OUTPUT_KEY_SCHEMA));
		Schema valueSchema = Schema.parse(properties.get(CONF_OUTPUT_VALUE_SCHEMA));
		keyValueWriter = new AvroKeyValueWriter<K, V>(
			keySchema,
			valueSchema,
			compressionCodec,
			getStream());
	} finally {
		if (keyValueWriter == null) {
			close();
		}
	}
}
 
Example #9
Source File: AvroKeyValueSinkWriter.java    From flink with Apache License 2.0 6 votes vote down vote up
AvroKeyValueWriter(Schema keySchema, Schema valueSchema,
		CodecFactory compressionCodec, OutputStream outputStream,
		int syncInterval) throws IOException {
	// Create the generic record schema for the key/value pair.
	mKeyValuePairSchema = AvroKeyValue
			.getSchema(keySchema, valueSchema);

	// Create an Avro container file and a writer to it.
	DatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<GenericRecord>(
			mKeyValuePairSchema);
	mAvroFileWriter = new DataFileWriter<GenericRecord>(
			genericDatumWriter);
	mAvroFileWriter.setCodec(compressionCodec);
	mAvroFileWriter.setSyncInterval(syncInterval);
	mAvroFileWriter.create(mKeyValuePairSchema, outputStream);

	// Create a reusable output record.
	mOutputRecord = new AvroKeyValue<Object, Object>(
			new GenericData.Record(mKeyValuePairSchema));
}
 
Example #10
Source File: AvroKeyValueSinkWriter.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
@SuppressWarnings("deprecation")
public void open(FileSystem fs, Path path) throws IOException {
	super.open(fs, path);

	try {
		CodecFactory compressionCodec = getCompressionCodec(properties);
		Schema keySchema = Schema.parse(properties.get(CONF_OUTPUT_KEY_SCHEMA));
		Schema valueSchema = Schema.parse(properties.get(CONF_OUTPUT_VALUE_SCHEMA));
		keyValueWriter = new AvroKeyValueWriter<K, V>(
			keySchema,
			valueSchema,
			compressionCodec,
			getStream());
	} finally {
		if (keyValueWriter == null) {
			close();
		}
	}
}
 
Example #11
Source File: WriteAvroToByteArrayBuilder.java    From kite with Apache License 2.0 6 votes vote down vote up
public WriteAvroToByteArray(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
  super(builder, config, parent, child, context);      
  this.format = new Validator<Format>().validateEnum(
      config,
      getConfigs().getString(config, "format", Format.container.toString()),
      Format.class);
  
  String codec = getConfigs().getString(config, "codec", null);
  if (codec == null) {
    this.codecFactory = null;
  } else {
    this.codecFactory = CodecFactory.fromString(codec);
  }
  
  Config metadataConfig = getConfigs().getConfig(config, "metadata", ConfigFactory.empty());
  for (Map.Entry<String, Object> entry : new Configs().getEntrySet(metadataConfig)) {
    this.metadata.put(entry.getKey(), entry.getValue().toString());
  }
  
  validateArguments();
}
 
Example #12
Source File: AvroStockFileWrite.java    From hiped2 with Apache License 2.0 6 votes vote down vote up
public static void writeToAvro(File inputFile, OutputStream outputStream)
    throws IOException {

  DataFileWriter<Stock> writer =
      new DataFileWriter<Stock>(
          new SpecificDatumWriter<Stock>());

  writer.setCodec(CodecFactory.snappyCodec());
  writer.create(Stock.SCHEMA$, outputStream);

  for (Stock stock : AvroStockUtils.fromCsvFile(inputFile)) {
    writer.append(stock);
  }

  IOUtils.closeStream(writer);
  IOUtils.closeStream(outputStream);
}
 
Example #13
Source File: AbstractKiteConvertProcessor.java    From localization_nifi with Apache License 2.0 6 votes vote down vote up
protected CodecFactory getCodecFactory(String property) {
    CodecType type = CodecType.valueOf(property);
    switch (type) {
    case BZIP2:
        return CodecFactory.bzip2Codec();
    case DEFLATE:
        return CodecFactory.deflateCodec(CodecFactory.DEFAULT_DEFLATE_LEVEL);
    case NONE:
        return CodecFactory.nullCodec();
    case LZO:
        return CodecFactory.xzCodec(CodecFactory.DEFAULT_XZ_LEVEL);
    case SNAPPY:
    default:
        return CodecFactory.snappyCodec();
    }
}
 
Example #14
Source File: AvroUtil.java    From nifi with Apache License 2.0 6 votes vote down vote up
public static CodecFactory getCodecFactory(String property) {
    CodecType type = CodecType.valueOf(property);
    switch (type) {
        case BZIP2:
            return CodecFactory.bzip2Codec();
        case DEFLATE:
            return CodecFactory.deflateCodec(CodecFactory.DEFAULT_DEFLATE_LEVEL);
        case LZO:
            return CodecFactory.xzCodec(CodecFactory.DEFAULT_XZ_LEVEL);
        case SNAPPY:
            return CodecFactory.snappyCodec();
        case NONE:
        default:
            return CodecFactory.nullCodec();
    }
}
 
Example #15
Source File: AvroRecordSetWriter.java    From nifi with Apache License 2.0 6 votes vote down vote up
private CodecFactory getCodecFactory(String property) {
    CodecType type = CodecType.valueOf(property);
    switch (type) {
    case BZIP2:
        return CodecFactory.bzip2Codec();
    case DEFLATE:
        return CodecFactory.deflateCodec(CodecFactory.DEFAULT_DEFLATE_LEVEL);
    case LZO:
        return CodecFactory.xzCodec(CodecFactory.DEFAULT_XZ_LEVEL);
    case SNAPPY:
        return CodecFactory.snappyCodec();
    case NONE:
    default:
        return CodecFactory.nullCodec();
    }
}
 
Example #16
Source File: AvroAsJsonOutputFormat.java    From iow-hadoop-streaming with Apache License 2.0 6 votes vote down vote up
static <K> void configureDataFileWriter(DataFileWriter<K> writer,
    JobConf job) throws UnsupportedEncodingException {

    if (FileOutputFormat.getCompressOutput(job)) {
        int level = job.getInt(org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY,
                org.apache.avro.mapred.AvroOutputFormat.DEFAULT_DEFLATE_LEVEL);
        String codecName = job.get(AvroJob.OUTPUT_CODEC, DEFLATE_CODEC);
        CodecFactory factory = codecName.equals(DEFLATE_CODEC) ?
            CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName);
        writer.setCodec(factory);
    }

    writer.setSyncInterval(job.getInt(org.apache.avro.mapred.AvroOutputFormat.SYNC_INTERVAL_KEY,
            DEFAULT_SYNC_INTERVAL));

    // copy metadata from job
    for (Map.Entry<String,String> e : job) {
        if (e.getKey().startsWith(AvroJob.TEXT_PREFIX))
            writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()),e.getValue());
        if (e.getKey().startsWith(AvroJob.BINARY_PREFIX))
            writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()),
                   URLDecoder.decode(e.getValue(), "ISO-8859-1")
                   .getBytes("ISO-8859-1"));
    }
}
 
Example #17
Source File: AbstractAvroEventSerializer.java    From mt-flume with Apache License 2.0 6 votes vote down vote up
@Override
public void configure(Context context) {

  int syncIntervalBytes =
      context.getInteger(SYNC_INTERVAL_BYTES, DEFAULT_SYNC_INTERVAL_BYTES);
  String compressionCodec =
      context.getString(COMPRESSION_CODEC, DEFAULT_COMPRESSION_CODEC);

  writer = new ReflectDatumWriter<T>(getSchema());
  dataFileWriter = new DataFileWriter<T>(writer);

  dataFileWriter.setSyncInterval(syncIntervalBytes);

  try {
    CodecFactory codecFactory = CodecFactory.fromString(compressionCodec);
    dataFileWriter.setCodec(codecFactory);
  } catch (AvroRuntimeException e) {
    logger.warn("Unable to instantiate avro codec with name (" +
        compressionCodec + "). Compression disabled. Exception follows.", e);
  }
}
 
Example #18
Source File: AvroFileAppender.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private static <D> DataFileWriter<D> newAvroWriter(
    Schema schema, OutputFile file, Function<Schema, DatumWriter<?>> createWriterFunc,
    CodecFactory codec, Map<String, String> metadata) throws IOException {
  DataFileWriter<D> writer = new DataFileWriter<>(
      (DatumWriter<D>) createWriterFunc.apply(schema));

  writer.setCodec(codec);

  for (Map.Entry<String, String> entry : metadata.entrySet()) {
    writer.setMeta(entry.getKey(), entry.getValue());
  }

  // TODO: support overwrite
  return writer.create(schema, file.create());
}
 
Example #19
Source File: AvroIOTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteDisplayData() {
  AvroIO.Write<GenericClass> write =
      AvroIO.write(GenericClass.class)
          .to("/foo")
          .withShardNameTemplate("-SS-of-NN-")
          .withSuffix("bar")
          .withNumShards(100)
          .withCodec(CodecFactory.deflateCodec(6));

  DisplayData displayData = DisplayData.from(write);

  assertThat(displayData, hasDisplayItem("filePrefix", "/foo"));
  assertThat(displayData, hasDisplayItem("shardNameTemplate", "-SS-of-NN-"));
  assertThat(displayData, hasDisplayItem("fileSuffix", "bar"));
  assertThat(
      displayData,
      hasDisplayItem(
          "schema",
          "{\"type\":\"record\",\"name\":\"GenericClass\",\"namespace\":\"org.apache.beam.sdk.io"
              + ".AvroIOTest$\",\"fields\":[{\"name\":\"intField\",\"type\":\"int\"},"
              + "{\"name\":\"stringField\",\"type\":\"string\"}]}"));
  assertThat(displayData, hasDisplayItem("numShards", 100));
  assertThat(displayData, hasDisplayItem("codec", CodecFactory.deflateCodec(6).toString()));
}
 
Example #20
Source File: CsvToAvro.java    From java-docs-samples with Apache License 2.0 6 votes vote down vote up
public static void runCsvToAvro(SampleOptions options)
    throws IOException, IllegalArgumentException {
  FileSystems.setDefaultPipelineOptions(options);

  // Get Avro Schema
  String schemaJson = getSchema(options.getAvroSchema());
  Schema schema = new Schema.Parser().parse(schemaJson);

  // Check schema field types before starting the Dataflow job
  checkFieldTypes(schema);

  // Create the Pipeline object with the options we defined above.
  Pipeline pipeline = Pipeline.create(options);

  // Convert CSV to Avro
  pipeline.apply("Read CSV files", TextIO.read().from(options.getInputFile()))
      .apply("Convert CSV to Avro formatted data",
          ParDo.of(new ConvertCsvToAvro(schemaJson, options.getCsvDelimiter())))
      .setCoder(AvroCoder.of(GenericRecord.class, schema))
      .apply("Write Avro formatted data", AvroIO.writeGenericRecords(schemaJson)
          .to(options.getOutput()).withCodec(CodecFactory.snappyCodec()).withSuffix(".avro"));

  // Run the pipeline.
  pipeline.run().waitUntilFinish();
}
 
Example #21
Source File: Hdfs.java    From pxf with Apache License 2.0 6 votes vote down vote up
@Override
public void writeAvroFile(String pathToFile, String schemaName,
                          String codecName, IAvroSchema[] data)
        throws Exception {
    Path path = getDatapath(pathToFile);
    OutputStream outStream = fs.create(path, true, bufferSize,
            replicationSize, blockSize);
    Schema schema = new Schema.Parser().parse(new FileInputStream(
            schemaName));
    DatumWriter<GenericRecord> writer = new GenericDatumWriter<>(
            schema);
    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(
            writer);
    if (!StringUtils.isEmpty(codecName)) {
        dataFileWriter.setCodec(CodecFactory.fromString(codecName));
    }

    dataFileWriter.create(schema, outStream);

    for (IAvroSchema iAvroSchema : data) {
        GenericRecord datum = iAvroSchema.serialize();
        dataFileWriter.append(datum);
    }
    dataFileWriter.close();
}
 
Example #22
Source File: TestExtractAvroMetadata.java    From localization_nifi with Apache License 2.0 6 votes vote down vote up
@Test
public void testExtractionWithCodec() throws IOException {
    final TestRunner runner = TestRunners.newTestRunner(new ExtractAvroMetadata());
    runner.setProperty(ExtractAvroMetadata.METADATA_KEYS, AVRO_CODEC_ATTR); // test dynamic attribute avro.codec

    final Schema schema = new Schema.Parser().parse(new File("src/test/resources/array.avsc"));

    final GenericData.Array<String> data = new GenericData.Array<>(schema, Arrays.asList("one", "two", "three"));
    final DatumWriter<GenericData.Array<String>> datumWriter = new GenericDatumWriter<>(schema);

    final ByteArrayOutputStream out = new ByteArrayOutputStream();
    final DataFileWriter<GenericData.Array<String>> dataFileWriter = new DataFileWriter<>(datumWriter);
    dataFileWriter.setCodec(CodecFactory.deflateCodec(1));
    dataFileWriter.create(schema, out);
    dataFileWriter.append(data);
    dataFileWriter.close();

    runner.enqueue(out.toByteArray());
    runner.run();

    runner.assertAllFlowFilesTransferred(ExtractAvroMetadata.REL_SUCCESS, 1);

    final MockFlowFile flowFile = runner.getFlowFilesForRelationship(ExtractAvroMetadata.REL_SUCCESS).get(0);
    flowFile.assertAttributeEquals("avro.codec", "deflate");
}
 
Example #23
Source File: StageRunData.java    From geowave with Apache License 2.0 6 votes vote down vote up
private synchronized DataFileWriter getDataWriterCreateIfNull(
    final String typeName,
    final GeoWaveAvroFormatPlugin plugin) {
  if (!cachedWriters.containsKey(typeName)) {
    FSDataOutputStream out = null;
    final DataFileWriter dfw = new DataFileWriter(new GenericDatumWriter());
    cachedWriters.put(typeName, dfw);
    dfw.setCodec(CodecFactory.snappyCodec());
    try {
      // TODO: we should probably clean up the type name to make it
      // HDFS path safe in case there are invalid characters
      // also, if a file already exists do we want to delete it or
      // append to it?
      out = fs.create(new Path(hdfsBaseDirectory, typeName));
      dfw.create(plugin.getAvroSchema(), out);

    } catch (final IOException e) {
      LOGGER.error("Unable to create output stream", e);
      // cache a null value so we don't continually try to recreate
      cachedWriters.put(typeName, null);
      return null;
    }
  }
  return cachedWriters.get(typeName);
}
 
Example #24
Source File: SerializableAvroCodecFactoryTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testDefaultCodecsSerDe() throws Exception {
  for (String codec : avroCodecs) {
    SerializableAvroCodecFactory codecFactory =
        new SerializableAvroCodecFactory(CodecFactory.fromString(codec));

    SerializableAvroCodecFactory serdeC = SerializableUtils.clone(codecFactory);

    assertEquals(CodecFactory.fromString(codec).toString(), serdeC.getCodec().toString());
  }
}
 
Example #25
Source File: JdbcCommon.java    From nifi with Apache License 2.0 5 votes vote down vote up
private AvroConversionOptions(String recordName, int maxRows, boolean convertNames, boolean useLogicalTypes,
        int defaultPrecision, int defaultScale, CodecFactory codec) {
    this.recordName = recordName;
    this.maxRows = maxRows;
    this.convertNames = convertNames;
    this.useLogicalTypes = useLogicalTypes;
    this.defaultPrecision = defaultPrecision;
    this.defaultScale = defaultScale;
    this.codec = codec;
}
 
Example #26
Source File: SerializableAvroCodecFactoryTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testDefaultCodecsIn() throws Exception {
  for (String codec : avroCodecs) {
    SerializableAvroCodecFactory codecFactory =
        new SerializableAvroCodecFactory(CodecFactory.fromString(codec));

    assertEquals(CodecFactory.fromString(codec).toString(), codecFactory.getCodec().toString());
  }
}
 
Example #27
Source File: SerializableAvroCodecFactoryTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testDeflateCodecSerDeWithLevels() throws Exception {
  for (int i = 0; i < 10; ++i) {
    SerializableAvroCodecFactory codecFactory =
        new SerializableAvroCodecFactory(CodecFactory.deflateCodec(i));

    SerializableAvroCodecFactory serdeC = SerializableUtils.clone(codecFactory);

    assertEquals(CodecFactory.deflateCodec(i).toString(), serdeC.getCodec().toString());
  }
}
 
Example #28
Source File: ConstantAvroDestination.java    From beam with Apache License 2.0 5 votes vote down vote up
public ConstantAvroDestination(
    FilenamePolicy filenamePolicy,
    Schema schema,
    Map<String, Object> metadata,
    CodecFactory codec,
    SerializableFunction<UserT, OutputT> formatFunction) {
  this.filenamePolicy = filenamePolicy;
  this.schema = Suppliers.compose(new SchemaFunction(), Suppliers.ofInstance(schema.toString()));
  this.metadata = metadata;
  this.codec = new SerializableAvroCodecFactory(codec);
  this.formatFunction = formatFunction;
}
 
Example #29
Source File: AvroRecordWriter.java    From spork with Apache License 2.0 5 votes vote down vote up
static void configureDataFileWriter(DataFileWriter<GenericData.Record> writer,
    JobConf job) throws UnsupportedEncodingException {
  if (FileOutputFormat.getCompressOutput(job)) {
    int level = job.getInt(DEFLATE_LEVEL_KEY,
        DEFAULT_DEFLATE_LEVEL);
    String codecName = job.get(AvroJob.OUTPUT_CODEC, DEFLATE_CODEC);
    CodecFactory factory = codecName.equals(DEFLATE_CODEC)
      ? CodecFactory.deflateCodec(level)
      : CodecFactory.fromString(codecName);
    writer.setCodec(factory);
  }

  // Do max as core-default.xml has io.file.buffer.size as 4K
  writer.setSyncInterval(job.getInt(SYNC_INTERVAL_KEY, Math.max(
          job.getInt("io.file.buffer.size", DEFAULT_SYNC_INTERVAL), DEFAULT_SYNC_INTERVAL)));

  // copy metadata from job
  for (Map.Entry<String,String> e : job) {
    if (e.getKey().startsWith(AvroJob.TEXT_PREFIX))
      writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()),
                     e.getValue());
    if (e.getKey().startsWith(AvroJob.BINARY_PREFIX))
      writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()),
                     URLDecoder.decode(e.getValue(), "ISO-8859-1")
                     .getBytes("ISO-8859-1"));
  }
}
 
Example #30
Source File: AvroSink.java    From beam with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("deprecation") // uses internal test functionality.
@Override
protected void prepareWrite(WritableByteChannel channel) throws Exception {
  DestinationT destination = getDestination();
  CodecFactory codec = dynamicDestinations.getCodec(destination);
  Schema schema = dynamicDestinations.getSchema(destination);
  Map<String, Object> metadata = dynamicDestinations.getMetadata(destination);

  DatumWriter<OutputT> datumWriter =
      genericRecords ? new GenericDatumWriter<>(schema) : new ReflectDatumWriter<>(schema);
  dataFileWriter = new DataFileWriter<>(datumWriter).setCodec(codec);
  for (Map.Entry<String, Object> entry : metadata.entrySet()) {
    Object v = entry.getValue();
    if (v instanceof String) {
      dataFileWriter.setMeta(entry.getKey(), (String) v);
    } else if (v instanceof Long) {
      dataFileWriter.setMeta(entry.getKey(), (Long) v);
    } else if (v instanceof byte[]) {
      dataFileWriter.setMeta(entry.getKey(), (byte[]) v);
    } else {
      throw new IllegalStateException(
          "Metadata value type must be one of String, Long, or byte[]. Found "
              + v.getClass().getSimpleName());
    }
  }
  dataFileWriter.create(schema, Channels.newOutputStream(channel));
}