Java Code Examples for org.apache.avro.reflect.ReflectDatumWriter

The following examples show how to use org.apache.avro.reflect.ReflectDatumWriter. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: hadoop   Source File: AvroTestUtil.java    License: Apache License 2.0 6 votes vote down vote up
public static void testReflect(Object value, Type type, String schema)
  throws Exception {

  // check that schema matches expected
  Schema s = ReflectData.get().getSchema(type);
  assertEquals(Schema.parse(schema), s);

  // check that value is serialized correctly
  ReflectDatumWriter<Object> writer = new ReflectDatumWriter<Object>(s);
  ByteArrayOutputStream out = new ByteArrayOutputStream();
  writer.write(value, EncoderFactory.get().directBinaryEncoder(out, null));
  ReflectDatumReader<Object> reader = new ReflectDatumReader<Object>(s);
  Object after =
    reader.read(null,
                DecoderFactory.get().binaryDecoder(out.toByteArray(), null));
  assertEquals(value, after);
}
 
Example 2
Source Project: big-c   Source File: AvroTestUtil.java    License: Apache License 2.0 6 votes vote down vote up
public static void testReflect(Object value, Type type, String schema)
  throws Exception {

  // check that schema matches expected
  Schema s = ReflectData.get().getSchema(type);
  assertEquals(Schema.parse(schema), s);

  // check that value is serialized correctly
  ReflectDatumWriter<Object> writer = new ReflectDatumWriter<Object>(s);
  ByteArrayOutputStream out = new ByteArrayOutputStream();
  writer.write(value, EncoderFactory.get().directBinaryEncoder(out, null));
  ReflectDatumReader<Object> reader = new ReflectDatumReader<Object>(s);
  Object after =
    reader.read(null,
                DecoderFactory.get().binaryDecoder(out.toByteArray(), null));
  assertEquals(value, after);
}
 
Example 3
Source Project: beam   Source File: AvroIO.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void open(WritableByteChannel channel) throws IOException {
  this.schema = new Schema.Parser().parse(getJsonSchema());
  DataFileWriter<?> writer;
  if (getRecordFormatter() == null) {
    writer = reflectWriter = new DataFileWriter<>(new ReflectDatumWriter<>(schema));
  } else {
    writer = genericWriter = new DataFileWriter<>(new GenericDatumWriter<>(schema));
  }
  writer.setCodec(getCodec().getCodec());
  for (Map.Entry<String, Object> entry : getMetadata().entrySet()) {
    Object v = entry.getValue();
    if (v instanceof String) {
      writer.setMeta(entry.getKey(), (String) v);
    } else if (v instanceof Long) {
      writer.setMeta(entry.getKey(), (Long) v);
    } else if (v instanceof byte[]) {
      writer.setMeta(entry.getKey(), (byte[]) v);
    } else {
      throw new IllegalStateException(
          "Metadata value type must be one of String, Long, or byte[]. Found "
              + v.getClass().getSimpleName());
    }
  }
  writer.create(schema, Channels.newOutputStream(channel));
}
 
Example 4
Source Project: datafu   Source File: AvroKeyWithMetadataRecordWriter.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Constructor.
 *
 * @param writerSchema The writer schema for the records in the Avro container file.
 * @param compressionCodec A compression codec factory for the Avro container file.
 * @param outputStream The output stream to write the Avro container file to.
 * @param conf the configuration
 * @throws IOException If the record writer cannot be opened.
 */
public AvroKeyWithMetadataRecordWriter(Schema writerSchema, CodecFactory compressionCodec,
    OutputStream outputStream, Configuration conf) throws IOException {
  // Create an Avro container file and a writer to it.
  mAvroFileWriter = new DataFileWriter<T>(new ReflectDatumWriter<T>(writerSchema));
  mAvroFileWriter.setCodec(compressionCodec);
  
  for (Entry<String,String> e : conf)
  {
    if (e.getKey().startsWith(TEXT_PREFIX))
      mAvroFileWriter.setMeta(e.getKey().substring(TEXT_PREFIX.length()),
                              e.getValue());
  }
  
  mAvroFileWriter.create(writerSchema, outputStream);
}
 
Example 5
Source Project: mt-flume   Source File: AbstractAvroEventSerializer.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void configure(Context context) {

  int syncIntervalBytes =
      context.getInteger(SYNC_INTERVAL_BYTES, DEFAULT_SYNC_INTERVAL_BYTES);
  String compressionCodec =
      context.getString(COMPRESSION_CODEC, DEFAULT_COMPRESSION_CODEC);

  writer = new ReflectDatumWriter<T>(getSchema());
  dataFileWriter = new DataFileWriter<T>(writer);

  dataFileWriter.setSyncInterval(syncIntervalBytes);

  try {
    CodecFactory codecFactory = CodecFactory.fromString(compressionCodec);
    dataFileWriter.setCodec(codecFactory);
  } catch (AvroRuntimeException e) {
    logger.warn("Unable to instantiate avro codec with name (" +
        compressionCodec + "). Compression disabled. Exception follows.", e);
  }
}
 
Example 6
Source Project: mt-flume   Source File: Log4jAppender.java    License: Apache License 2.0 6 votes vote down vote up
private byte[] serialize(Object datum, Schema datumSchema) throws FlumeException {
  if (schema == null || !datumSchema.equals(schema)) {
    schema = datumSchema;
    out = new ByteArrayOutputStream();
    writer = new ReflectDatumWriter<Object>(schema);
    encoder = EncoderFactory.get().binaryEncoder(out, null);
  }
  out.reset();
  try {
    writer.write(datum, encoder);
    encoder.flush();
    return out.toByteArray();
  } catch (IOException e) {
    throw new FlumeException(e);
  }
}
 
Example 7
Source Project: kite   Source File: Log4jAppender.java    License: Apache License 2.0 6 votes vote down vote up
private byte[] serialize(Object datum, Schema datumSchema) throws FlumeException {
  if (schema == null || !datumSchema.equals(schema)) {
    schema = datumSchema;
    out = new ByteArrayOutputStream();
    writer = new ReflectDatumWriter<Object>(schema);
    encoder = EncoderFactory.get().binaryEncoder(out, null);
  }
  out.reset();
  try {
    writer.write(datum, encoder);
    encoder.flush();
    return out.toByteArray();
  } catch (IOException e) {
    throw new FlumeException(e);
  }
}
 
Example 8
Source Project: Flink-CEPplus   Source File: AvroFactory.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
private static <T> AvroFactory<T> fromReflective(Class<T> type, ClassLoader cl, Optional<Schema> previousSchema) {
	ReflectData reflectData = new ReflectData(cl);
	Schema newSchema = reflectData.getSchema(type);

	return new AvroFactory<>(
		reflectData,
		newSchema,
		new ReflectDatumReader<>(previousSchema.orElse(newSchema), newSchema, reflectData),
		new ReflectDatumWriter<>(newSchema, reflectData)
	);
}
 
Example 9
Source Project: Flink-CEPplus   Source File: AvroOutputFormat.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void open(int taskNumber, int numTasks) throws IOException {
	super.open(taskNumber, numTasks);

	DatumWriter<E> datumWriter;
	Schema schema;
	if (org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)) {
		datumWriter = new SpecificDatumWriter<E>(avroValueType);
		try {
			schema = ((org.apache.avro.specific.SpecificRecordBase) avroValueType.newInstance()).getSchema();
		} catch (InstantiationException | IllegalAccessException e) {
			throw new RuntimeException(e.getMessage());
		}
	} else if (org.apache.avro.generic.GenericRecord.class.isAssignableFrom(avroValueType)) {
		if (userDefinedSchema == null) {
			throw new IllegalStateException("Schema must be set when using Generic Record");
		}
		datumWriter = new GenericDatumWriter<E>(userDefinedSchema);
		schema = userDefinedSchema;
	} else {
		datumWriter = new ReflectDatumWriter<E>(avroValueType);
		schema = ReflectData.get().getSchema(avroValueType);
	}
	dataFileWriter = new DataFileWriter<E>(datumWriter);
	if (codec != null) {
		dataFileWriter.setCodec(codec.getCodecFactory());
	}
	if (userDefinedSchema == null) {
		dataFileWriter.create(schema, stream);
	} else {
		dataFileWriter.create(userDefinedSchema, stream);
	}
}
 
Example 10
Source Project: flink   Source File: AvroFactory.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
private static <T> AvroFactory<T> fromReflective(Class<T> type, ClassLoader cl, Optional<Schema> previousSchema) {
	ReflectData reflectData = new ReflectData(cl);
	Schema newSchema = reflectData.getSchema(type);

	return new AvroFactory<>(
		reflectData,
		newSchema,
		new ReflectDatumReader<>(previousSchema.orElse(newSchema), newSchema, reflectData),
		new ReflectDatumWriter<>(newSchema, reflectData)
	);
}
 
Example 11
Source Project: flink   Source File: AvroOutputFormat.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void open(int taskNumber, int numTasks) throws IOException {
	super.open(taskNumber, numTasks);

	DatumWriter<E> datumWriter;
	Schema schema;
	if (org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)) {
		datumWriter = new SpecificDatumWriter<E>(avroValueType);
		try {
			schema = ((org.apache.avro.specific.SpecificRecordBase) avroValueType.newInstance()).getSchema();
		} catch (InstantiationException | IllegalAccessException e) {
			throw new RuntimeException(e.getMessage());
		}
	} else if (org.apache.avro.generic.GenericRecord.class.isAssignableFrom(avroValueType)) {
		if (userDefinedSchema == null) {
			throw new IllegalStateException("Schema must be set when using Generic Record");
		}
		datumWriter = new GenericDatumWriter<E>(userDefinedSchema);
		schema = userDefinedSchema;
	} else {
		datumWriter = new ReflectDatumWriter<E>(avroValueType);
		schema = ReflectData.get().getSchema(avroValueType);
	}
	dataFileWriter = new DataFileWriter<E>(datumWriter);
	if (codec != null) {
		dataFileWriter.setCodec(codec.getCodecFactory());
	}
	if (userDefinedSchema == null) {
		dataFileWriter.create(schema, stream);
	} else {
		dataFileWriter.create(userDefinedSchema, stream);
	}
}
 
Example 12
@Override
public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(
  TaskAttemptContext context) throws IOException, InterruptedException {

  boolean isMapOnly = context.getNumReduceTasks() == 0;
  Schema schema =
    isMapOnly ? AvroJob.getMapOutputSchema(context.getConfiguration())
      : AvroJob.getOutputSchema(context.getConfiguration());

  final DataFileWriter<T> WRITER =
    new DataFileWriter<T>(new ReflectDatumWriter<T>());

  configureDataFileWriter(WRITER, context);

  Path path = getDefaultWorkFile(context, EXT);
  WRITER.create(schema,
    path.getFileSystem(context.getConfiguration()).create(path));

  return new RecordWriter<AvroWrapper<T>, NullWritable>() {
    @Override
    public void write(AvroWrapper<T> wrapper, NullWritable ignore)
      throws IOException {
      WRITER.append(wrapper.datum());
    }

    @Override
    public void close(TaskAttemptContext taskAttemptContext)
      throws IOException, InterruptedException {
      WRITER.close();
    }
  };
}
 
Example 13
Source Project: schema-evolution-samples   Source File: AvroCodec.java    License: Apache License 2.0 5 votes vote down vote up
private DatumWriter getDatumWriter(Class<?> type, Schema schema){
	DatumWriter writer = null;
	logger.debug("Finding correct DatumWriter for type {}",type.getName());
	if(SpecificRecord.class.isAssignableFrom(type)){
		writer = new SpecificDatumWriter<>(schema);
	}else if(GenericRecord.class.isAssignableFrom(type)){
		writer = new GenericDatumWriter<>(schema);
	}else{
		writer = new ReflectDatumWriter<>(schema);
	}
	logger.debug("DatumWriter of type {} selected",writer.getClass().getName());
	return writer;
}
 
Example 14
Source Project: beam   Source File: AvroCoder.java    License: Apache License 2.0 5 votes vote down vote up
protected AvroCoder(Class<T> type, Schema schema) {
  this.type = type;
  this.schemaSupplier = new SerializableSchemaSupplier(schema);
  typeDescriptor = TypeDescriptor.of(type);
  nonDeterministicReasons = new AvroDeterminismChecker().check(TypeDescriptor.of(type), schema);

  // Decoder and Encoder start off null for each thread. They are allocated and potentially
  // reused inside encode/decode.
  this.decoder = new EmptyOnDeserializationThreadLocal<>();
  this.encoder = new EmptyOnDeserializationThreadLocal<>();

  this.reflectData = Suppliers.memoize(new SerializableReflectDataSupplier(getType()));

  // Reader and writer are allocated once per thread per Coder
  this.reader =
      new EmptyOnDeserializationThreadLocal<DatumReader<T>>() {
        private final AvroCoder<T> myCoder = AvroCoder.this;

        @Override
        public DatumReader<T> initialValue() {
          return myCoder.getType().equals(GenericRecord.class)
              ? new GenericDatumReader<>(myCoder.getSchema())
              : new ReflectDatumReader<>(
                  myCoder.getSchema(), myCoder.getSchema(), myCoder.reflectData.get());
        }
      };

  this.writer =
      new EmptyOnDeserializationThreadLocal<DatumWriter<T>>() {
        private final AvroCoder<T> myCoder = AvroCoder.this;

        @Override
        public DatumWriter<T> initialValue() {
          return myCoder.getType().equals(GenericRecord.class)
              ? new GenericDatumWriter<>(myCoder.getSchema())
              : new ReflectDatumWriter<>(myCoder.getSchema(), myCoder.reflectData.get());
        }
      };
}
 
Example 15
Source Project: beam   Source File: AvroSink.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("deprecation") // uses internal test functionality.
@Override
protected void prepareWrite(WritableByteChannel channel) throws Exception {
  DestinationT destination = getDestination();
  CodecFactory codec = dynamicDestinations.getCodec(destination);
  Schema schema = dynamicDestinations.getSchema(destination);
  Map<String, Object> metadata = dynamicDestinations.getMetadata(destination);

  DatumWriter<OutputT> datumWriter =
      genericRecords ? new GenericDatumWriter<>(schema) : new ReflectDatumWriter<>(schema);
  dataFileWriter = new DataFileWriter<>(datumWriter).setCodec(codec);
  for (Map.Entry<String, Object> entry : metadata.entrySet()) {
    Object v = entry.getValue();
    if (v instanceof String) {
      dataFileWriter.setMeta(entry.getKey(), (String) v);
    } else if (v instanceof Long) {
      dataFileWriter.setMeta(entry.getKey(), (Long) v);
    } else if (v instanceof byte[]) {
      dataFileWriter.setMeta(entry.getKey(), (byte[]) v);
    } else {
      throw new IllegalStateException(
          "Metadata value type must be one of String, Long, or byte[]. Found "
              + v.getClass().getSimpleName());
    }
  }
  dataFileWriter.create(schema, Channels.newOutputStream(channel));
}
 
Example 16
Source Project: pulsar   Source File: AvroWriter.java    License: Apache License 2.0 5 votes vote down vote up
public AvroWriter(Schema schema, boolean jsr310ConversionEnabled) {
    this.byteArrayOutputStream = new ByteArrayOutputStream();
    this.encoder = EncoderFactory.get().binaryEncoder(this.byteArrayOutputStream, this.encoder);
    ReflectData reflectData = new ReflectData();
    AvroSchema.addLogicalTypeConversions(reflectData, jsr310ConversionEnabled);
    this.writer = new ReflectDatumWriter<>(schema, reflectData);
}
 
Example 17
Source Project: flink   Source File: AvroFactory.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
private static <T> AvroFactory<T> fromReflective(Class<T> type, ClassLoader cl, Optional<Schema> previousSchema) {
	ReflectData reflectData = new ReflectData(cl);
	Schema newSchema = reflectData.getSchema(type);

	return new AvroFactory<>(
		reflectData,
		newSchema,
		new ReflectDatumReader<>(previousSchema.orElse(newSchema), newSchema, reflectData),
		new ReflectDatumWriter<>(newSchema, reflectData)
	);
}
 
Example 18
Source Project: flink   Source File: AvroWriters.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Creates an {@link AvroWriterFactory} for the given type. The Avro writers will
 * use reflection to create the schema for the type and use that schema to write
 * the records.
 *
 * @param type The class of the type to write.
 */
public static <T> AvroWriterFactory<T> forReflectRecord(Class<T> type) {
	String schemaString = ReflectData.get().getSchema(type).toString();
	AvroBuilder<T> builder = (out) -> createAvroDataFileWriter(
		schemaString,
		ReflectDatumWriter::new,
		out);
	return new AvroWriterFactory<>(builder);
}
 
Example 19
Source Project: flink   Source File: AvroOutputFormat.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void open(int taskNumber, int numTasks) throws IOException {
	super.open(taskNumber, numTasks);

	DatumWriter<E> datumWriter;
	Schema schema;
	if (org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)) {
		datumWriter = new SpecificDatumWriter<E>(avroValueType);
		try {
			schema = ((org.apache.avro.specific.SpecificRecordBase) avroValueType.newInstance()).getSchema();
		} catch (InstantiationException | IllegalAccessException e) {
			throw new RuntimeException(e.getMessage());
		}
	} else if (org.apache.avro.generic.GenericRecord.class.isAssignableFrom(avroValueType)) {
		if (userDefinedSchema == null) {
			throw new IllegalStateException("Schema must be set when using Generic Record");
		}
		datumWriter = new GenericDatumWriter<E>(userDefinedSchema);
		schema = userDefinedSchema;
	} else {
		datumWriter = new ReflectDatumWriter<E>(avroValueType);
		schema = ReflectData.get().getSchema(avroValueType);
	}
	dataFileWriter = new DataFileWriter<E>(datumWriter);
	if (codec != null) {
		dataFileWriter.setCodec(codec.getCodecFactory());
	}
	if (userDefinedSchema == null) {
		dataFileWriter.create(schema, stream);
	} else {
		dataFileWriter.create(userDefinedSchema, stream);
	}
}
 
Example 20
public AvroKeyValueWithMetadataRecordWriter(AvroDatumConverter<K, ?> keyConverter,
    AvroDatumConverter<V, ?> valueConverter, CodecFactory compressionCodec,
    OutputStream outputStream, Configuration conf) throws IOException {
  // Create the generic record schema for the key/value pair.
  mKeyValuePairSchema = AvroKeyValue.getSchema(
      keyConverter.getWriterSchema(), valueConverter.getWriterSchema());

  // Create an Avro container file and a writer to it.
  mAvroFileWriter = new DataFileWriter<GenericRecord>(
      new ReflectDatumWriter<GenericRecord>(mKeyValuePairSchema));
  mAvroFileWriter.setCodec(compressionCodec);
  
  for (Entry<String,String> e : conf)
  {
    if (e.getKey().startsWith(TEXT_PREFIX))
      mAvroFileWriter.setMeta(e.getKey().substring(TEXT_PREFIX.length()),
                              e.getValue());
  }
  
  mAvroFileWriter.create(mKeyValuePairSchema, outputStream);

  // Keep a reference to the converters.
  mKeyConverter = keyConverter;
  mValueConverter = valueConverter;

  // Create a reusable output record.
  mOutputRecord = new AvroKeyValue<Object, Object>(new GenericData.Record(mKeyValuePairSchema));
}
 
Example 21
Source Project: mt-flume   Source File: TestAvroEventSerializer.java    License: Apache License 2.0 5 votes vote down vote up
private byte[] serializeAvro(Object datum, Schema schema) throws IOException {
  ByteArrayOutputStream out = new ByteArrayOutputStream();
  ReflectDatumWriter<Object> writer = new ReflectDatumWriter<Object>(schema);
  BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null);
  out.reset();
  writer.write(datum, encoder);
  encoder.flush();
  return out.toByteArray();
}
 
Example 22
Source Project: stratosphere   Source File: AvroBaseValue.java    License: Apache License 2.0 5 votes vote down vote up
private ReflectDatumWriter<T> getWriter() {
	if (this.writer == null) {
		@SuppressWarnings("unchecked")
		Class<T> clazz = (Class<T>) datum().getClass();
		this.writer = new ReflectDatumWriter<T>(clazz);
	}
	return this.writer;
}
 
Example 23
Source Project: stratosphere   Source File: AvroSerializer.java    License: Apache License 2.0 5 votes vote down vote up
private final void checkAvroInitialized() {
	if (this.reader == null) {
		this.reader = new ReflectDatumReader<T>(type);
		this.writer = new ReflectDatumWriter<T>(type);
		this.encoder = new DataOutputEncoder();
		this.decoder = new DataInputDecoder();
	}
}
 
Example 24
Source Project: kite   Source File: AvroAppender.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void open() throws IOException {
  writer = new ReflectDatumWriter<E>();
  dataFileWriter = new DataFileWriter<E>(writer);

  if (enableCompression) {
    dataFileWriter.setCodec(getCodecFactory());
  }

  out = fileSystem.create(path, true);
  dataFileWriter.create(schema, out);
}
 
Example 25
@Override
public DatumWriter<T> createDatumWriter(T data, Schema schema) {
    return new ReflectDatumWriter<>(schema);
}
 
Example 26
Source Project: Flink-CEPplus   Source File: AvroExternalJarProgram.java    License: Apache License 2.0 4 votes vote down vote up
public static void writeTestData(File testFile, int numRecords) throws IOException {

		DatumWriter<MyUser> userDatumWriter = new ReflectDatumWriter<MyUser>(MyUser.class);
		DataFileWriter<MyUser> dataFileWriter = new DataFileWriter<MyUser>(userDatumWriter);

		dataFileWriter.create(ReflectData.get().getSchema(MyUser.class), testFile);

		Generator generator = new Generator();

		for (int i = 0; i < numRecords; i++) {
			MyUser user = generator.nextUser();
			dataFileWriter.append(user);
		}

		dataFileWriter.close();
	}
 
Example 27
Source Project: flink   Source File: AvroExternalJarProgram.java    License: Apache License 2.0 4 votes vote down vote up
public static void writeTestData(File testFile, int numRecords) throws IOException {

		DatumWriter<MyUser> userDatumWriter = new ReflectDatumWriter<MyUser>(MyUser.class);
		DataFileWriter<MyUser> dataFileWriter = new DataFileWriter<MyUser>(userDatumWriter);

		dataFileWriter.create(ReflectData.get().getSchema(MyUser.class), testFile);

		Generator generator = new Generator();

		for (int i = 0; i < numRecords; i++) {
			MyUser user = generator.nextUser();
			dataFileWriter.append(user);
		}

		dataFileWriter.close();
	}
 
Example 28
Source Project: hadoop   Source File: AvroReflectSerialization.java    License: Apache License 2.0 4 votes vote down vote up
@InterfaceAudience.Private
@Override
public DatumWriter getWriter(Class<Object> clazz) {
  return new ReflectDatumWriter();
}
 
Example 29
Source Project: big-c   Source File: AvroReflectSerialization.java    License: Apache License 2.0 4 votes vote down vote up
@InterfaceAudience.Private
@Override
public DatumWriter getWriter(Class<Object> clazz) {
  return new ReflectDatumWriter();
}
 
Example 30
Source Project: beam   Source File: AvroSourceTest.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Generates an input Avro file containing the given records in the temporary directory and
 * returns the full path of the file.
 */
private <T> String generateTestFile(
    String filename,
    List<T> elems,
    SyncBehavior syncBehavior,
    int syncInterval,
    AvroCoder<T> coder,
    String codec)
    throws IOException {
  Random random = new Random(0);
  File tmpFile = tmpFolder.newFile(filename);
  String path = tmpFile.toString();

  FileOutputStream os = new FileOutputStream(tmpFile);
  DatumWriter<T> datumWriter =
      coder.getType().equals(GenericRecord.class)
          ? new GenericDatumWriter<>(coder.getSchema())
          : new ReflectDatumWriter<>(coder.getSchema());
  try (DataFileWriter<T> writer = new DataFileWriter<>(datumWriter)) {
    writer.setCodec(CodecFactory.fromString(codec));
    writer.create(coder.getSchema(), os);

    int recordIndex = 0;
    int syncIndex = syncBehavior == SyncBehavior.SYNC_RANDOM ? random.nextInt(syncInterval) : 0;

    for (T elem : elems) {
      writer.append(elem);
      recordIndex++;

      switch (syncBehavior) {
        case SYNC_REGULAR:
          if (recordIndex == syncInterval) {
            recordIndex = 0;
            writer.sync();
          }
          break;
        case SYNC_RANDOM:
          if (recordIndex == syncIndex) {
            recordIndex = 0;
            writer.sync();
            syncIndex = random.nextInt(syncInterval);
          }
          break;
        case SYNC_DEFAULT:
        default:
      }
    }
  }
  return path;
}