org.apache.avro.reflect.ReflectDatumWriter Java Examples

The following examples show how to use org.apache.avro.reflect.ReflectDatumWriter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AbstractAvroEventSerializer.java    From mt-flume with Apache License 2.0 6 votes vote down vote up
@Override
public void configure(Context context) {

  int syncIntervalBytes =
      context.getInteger(SYNC_INTERVAL_BYTES, DEFAULT_SYNC_INTERVAL_BYTES);
  String compressionCodec =
      context.getString(COMPRESSION_CODEC, DEFAULT_COMPRESSION_CODEC);

  writer = new ReflectDatumWriter<T>(getSchema());
  dataFileWriter = new DataFileWriter<T>(writer);

  dataFileWriter.setSyncInterval(syncIntervalBytes);

  try {
    CodecFactory codecFactory = CodecFactory.fromString(compressionCodec);
    dataFileWriter.setCodec(codecFactory);
  } catch (AvroRuntimeException e) {
    logger.warn("Unable to instantiate avro codec with name (" +
        compressionCodec + "). Compression disabled. Exception follows.", e);
  }
}
 
Example #2
Source File: AvroKeyWithMetadataRecordWriter.java    From datafu with Apache License 2.0 6 votes vote down vote up
/**
 * Constructor.
 *
 * @param writerSchema The writer schema for the records in the Avro container file.
 * @param compressionCodec A compression codec factory for the Avro container file.
 * @param outputStream The output stream to write the Avro container file to.
 * @param conf the configuration
 * @throws IOException If the record writer cannot be opened.
 */
public AvroKeyWithMetadataRecordWriter(Schema writerSchema, CodecFactory compressionCodec,
    OutputStream outputStream, Configuration conf) throws IOException {
  // Create an Avro container file and a writer to it.
  mAvroFileWriter = new DataFileWriter<T>(new ReflectDatumWriter<T>(writerSchema));
  mAvroFileWriter.setCodec(compressionCodec);
  
  for (Entry<String,String> e : conf)
  {
    if (e.getKey().startsWith(TEXT_PREFIX))
      mAvroFileWriter.setMeta(e.getKey().substring(TEXT_PREFIX.length()),
                              e.getValue());
  }
  
  mAvroFileWriter.create(writerSchema, outputStream);
}
 
Example #3
Source File: Log4jAppender.java    From mt-flume with Apache License 2.0 6 votes vote down vote up
private byte[] serialize(Object datum, Schema datumSchema) throws FlumeException {
  if (schema == null || !datumSchema.equals(schema)) {
    schema = datumSchema;
    out = new ByteArrayOutputStream();
    writer = new ReflectDatumWriter<Object>(schema);
    encoder = EncoderFactory.get().binaryEncoder(out, null);
  }
  out.reset();
  try {
    writer.write(datum, encoder);
    encoder.flush();
    return out.toByteArray();
  } catch (IOException e) {
    throw new FlumeException(e);
  }
}
 
Example #4
Source File: Log4jAppender.java    From kite with Apache License 2.0 6 votes vote down vote up
private byte[] serialize(Object datum, Schema datumSchema) throws FlumeException {
  if (schema == null || !datumSchema.equals(schema)) {
    schema = datumSchema;
    out = new ByteArrayOutputStream();
    writer = new ReflectDatumWriter<Object>(schema);
    encoder = EncoderFactory.get().binaryEncoder(out, null);
  }
  out.reset();
  try {
    writer.write(datum, encoder);
    encoder.flush();
    return out.toByteArray();
  } catch (IOException e) {
    throw new FlumeException(e);
  }
}
 
Example #5
Source File: AvroTestUtil.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public static void testReflect(Object value, Type type, String schema)
  throws Exception {

  // check that schema matches expected
  Schema s = ReflectData.get().getSchema(type);
  assertEquals(Schema.parse(schema), s);

  // check that value is serialized correctly
  ReflectDatumWriter<Object> writer = new ReflectDatumWriter<Object>(s);
  ByteArrayOutputStream out = new ByteArrayOutputStream();
  writer.write(value, EncoderFactory.get().directBinaryEncoder(out, null));
  ReflectDatumReader<Object> reader = new ReflectDatumReader<Object>(s);
  Object after =
    reader.read(null,
                DecoderFactory.get().binaryDecoder(out.toByteArray(), null));
  assertEquals(value, after);
}
 
Example #6
Source File: AvroTestUtil.java    From big-c with Apache License 2.0 6 votes vote down vote up
public static void testReflect(Object value, Type type, String schema)
  throws Exception {

  // check that schema matches expected
  Schema s = ReflectData.get().getSchema(type);
  assertEquals(Schema.parse(schema), s);

  // check that value is serialized correctly
  ReflectDatumWriter<Object> writer = new ReflectDatumWriter<Object>(s);
  ByteArrayOutputStream out = new ByteArrayOutputStream();
  writer.write(value, EncoderFactory.get().directBinaryEncoder(out, null));
  ReflectDatumReader<Object> reader = new ReflectDatumReader<Object>(s);
  Object after =
    reader.read(null,
                DecoderFactory.get().binaryDecoder(out.toByteArray(), null));
  assertEquals(value, after);
}
 
Example #7
Source File: AvroIO.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public void open(WritableByteChannel channel) throws IOException {
  this.schema = new Schema.Parser().parse(getJsonSchema());
  DataFileWriter<?> writer;
  if (getRecordFormatter() == null) {
    writer = reflectWriter = new DataFileWriter<>(new ReflectDatumWriter<>(schema));
  } else {
    writer = genericWriter = new DataFileWriter<>(new GenericDatumWriter<>(schema));
  }
  writer.setCodec(getCodec().getCodec());
  for (Map.Entry<String, Object> entry : getMetadata().entrySet()) {
    Object v = entry.getValue();
    if (v instanceof String) {
      writer.setMeta(entry.getKey(), (String) v);
    } else if (v instanceof Long) {
      writer.setMeta(entry.getKey(), (Long) v);
    } else if (v instanceof byte[]) {
      writer.setMeta(entry.getKey(), (byte[]) v);
    } else {
      throw new IllegalStateException(
          "Metadata value type must be one of String, Long, or byte[]. Found "
              + v.getClass().getSimpleName());
    }
  }
  writer.create(schema, Channels.newOutputStream(channel));
}
 
Example #8
Source File: AvroCoder.java    From beam with Apache License 2.0 5 votes vote down vote up
protected AvroCoder(Class<T> type, Schema schema) {
  this.type = type;
  this.schemaSupplier = new SerializableSchemaSupplier(schema);
  typeDescriptor = TypeDescriptor.of(type);
  nonDeterministicReasons = new AvroDeterminismChecker().check(TypeDescriptor.of(type), schema);

  // Decoder and Encoder start off null for each thread. They are allocated and potentially
  // reused inside encode/decode.
  this.decoder = new EmptyOnDeserializationThreadLocal<>();
  this.encoder = new EmptyOnDeserializationThreadLocal<>();

  this.reflectData = Suppliers.memoize(new SerializableReflectDataSupplier(getType()));

  // Reader and writer are allocated once per thread per Coder
  this.reader =
      new EmptyOnDeserializationThreadLocal<DatumReader<T>>() {
        private final AvroCoder<T> myCoder = AvroCoder.this;

        @Override
        public DatumReader<T> initialValue() {
          return myCoder.getType().equals(GenericRecord.class)
              ? new GenericDatumReader<>(myCoder.getSchema())
              : new ReflectDatumReader<>(
                  myCoder.getSchema(), myCoder.getSchema(), myCoder.reflectData.get());
        }
      };

  this.writer =
      new EmptyOnDeserializationThreadLocal<DatumWriter<T>>() {
        private final AvroCoder<T> myCoder = AvroCoder.this;

        @Override
        public DatumWriter<T> initialValue() {
          return myCoder.getType().equals(GenericRecord.class)
              ? new GenericDatumWriter<>(myCoder.getSchema())
              : new ReflectDatumWriter<>(myCoder.getSchema(), myCoder.reflectData.get());
        }
      };
}
 
Example #9
Source File: TestAvroEventSerializer.java    From mt-flume with Apache License 2.0 5 votes vote down vote up
private byte[] serializeAvro(Object datum, Schema schema) throws IOException {
  ByteArrayOutputStream out = new ByteArrayOutputStream();
  ReflectDatumWriter<Object> writer = new ReflectDatumWriter<Object>(schema);
  BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null);
  out.reset();
  writer.write(datum, encoder);
  encoder.flush();
  return out.toByteArray();
}
 
Example #10
Source File: AvroKeyValueWithMetadataRecordWriter.java    From datafu with Apache License 2.0 5 votes vote down vote up
public AvroKeyValueWithMetadataRecordWriter(AvroDatumConverter<K, ?> keyConverter,
    AvroDatumConverter<V, ?> valueConverter, CodecFactory compressionCodec,
    OutputStream outputStream, Configuration conf) throws IOException {
  // Create the generic record schema for the key/value pair.
  mKeyValuePairSchema = AvroKeyValue.getSchema(
      keyConverter.getWriterSchema(), valueConverter.getWriterSchema());

  // Create an Avro container file and a writer to it.
  mAvroFileWriter = new DataFileWriter<GenericRecord>(
      new ReflectDatumWriter<GenericRecord>(mKeyValuePairSchema));
  mAvroFileWriter.setCodec(compressionCodec);
  
  for (Entry<String,String> e : conf)
  {
    if (e.getKey().startsWith(TEXT_PREFIX))
      mAvroFileWriter.setMeta(e.getKey().substring(TEXT_PREFIX.length()),
                              e.getValue());
  }
  
  mAvroFileWriter.create(mKeyValuePairSchema, outputStream);

  // Keep a reference to the converters.
  mKeyConverter = keyConverter;
  mValueConverter = valueConverter;

  // Create a reusable output record.
  mOutputRecord = new AvroKeyValue<Object, Object>(new GenericData.Record(mKeyValuePairSchema));
}
 
Example #11
Source File: AvroOutputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void open(int taskNumber, int numTasks) throws IOException {
	super.open(taskNumber, numTasks);

	DatumWriter<E> datumWriter;
	Schema schema;
	if (org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)) {
		datumWriter = new SpecificDatumWriter<E>(avroValueType);
		try {
			schema = ((org.apache.avro.specific.SpecificRecordBase) avroValueType.newInstance()).getSchema();
		} catch (InstantiationException | IllegalAccessException e) {
			throw new RuntimeException(e.getMessage());
		}
	} else if (org.apache.avro.generic.GenericRecord.class.isAssignableFrom(avroValueType)) {
		if (userDefinedSchema == null) {
			throw new IllegalStateException("Schema must be set when using Generic Record");
		}
		datumWriter = new GenericDatumWriter<E>(userDefinedSchema);
		schema = userDefinedSchema;
	} else {
		datumWriter = new ReflectDatumWriter<E>(avroValueType);
		schema = ReflectData.get().getSchema(avroValueType);
	}
	dataFileWriter = new DataFileWriter<E>(datumWriter);
	if (codec != null) {
		dataFileWriter.setCodec(codec.getCodecFactory());
	}
	if (userDefinedSchema == null) {
		dataFileWriter.create(schema, stream);
	} else {
		dataFileWriter.create(userDefinedSchema, stream);
	}
}
 
Example #12
Source File: AvroWriters.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Creates an {@link AvroWriterFactory} for the given type. The Avro writers will
 * use reflection to create the schema for the type and use that schema to write
 * the records.
 *
 * @param type The class of the type to write.
 */
public static <T> AvroWriterFactory<T> forReflectRecord(Class<T> type) {
	String schemaString = ReflectData.get().getSchema(type).toString();
	AvroBuilder<T> builder = (out) -> createAvroDataFileWriter(
		schemaString,
		ReflectDatumWriter::new,
		out);
	return new AvroWriterFactory<>(builder);
}
 
Example #13
Source File: AvroFactory.java    From flink with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
private static <T> AvroFactory<T> fromReflective(Class<T> type, ClassLoader cl, Optional<Schema> previousSchema) {
	ReflectData reflectData = new ReflectData(cl);
	Schema newSchema = reflectData.getSchema(type);

	return new AvroFactory<>(
		reflectData,
		newSchema,
		new ReflectDatumReader<>(previousSchema.orElse(newSchema), newSchema, reflectData),
		new ReflectDatumWriter<>(newSchema, reflectData)
	);
}
 
Example #14
Source File: AvroWriter.java    From pulsar with Apache License 2.0 5 votes vote down vote up
public AvroWriter(Schema schema, boolean jsr310ConversionEnabled) {
    this.byteArrayOutputStream = new ByteArrayOutputStream();
    this.encoder = EncoderFactory.get().binaryEncoder(this.byteArrayOutputStream, this.encoder);
    ReflectData reflectData = new ReflectData();
    AvroSchema.addLogicalTypeConversions(reflectData, jsr310ConversionEnabled);
    this.writer = new ReflectDatumWriter<>(schema, reflectData);
}
 
Example #15
Source File: AvroBaseValue.java    From stratosphere with Apache License 2.0 5 votes vote down vote up
private ReflectDatumWriter<T> getWriter() {
	if (this.writer == null) {
		@SuppressWarnings("unchecked")
		Class<T> clazz = (Class<T>) datum().getClass();
		this.writer = new ReflectDatumWriter<T>(clazz);
	}
	return this.writer;
}
 
Example #16
Source File: AvroSink.java    From beam with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("deprecation") // uses internal test functionality.
@Override
protected void prepareWrite(WritableByteChannel channel) throws Exception {
  DestinationT destination = getDestination();
  CodecFactory codec = dynamicDestinations.getCodec(destination);
  Schema schema = dynamicDestinations.getSchema(destination);
  Map<String, Object> metadata = dynamicDestinations.getMetadata(destination);

  DatumWriter<OutputT> datumWriter =
      genericRecords ? new GenericDatumWriter<>(schema) : new ReflectDatumWriter<>(schema);
  dataFileWriter = new DataFileWriter<>(datumWriter).setCodec(codec);
  for (Map.Entry<String, Object> entry : metadata.entrySet()) {
    Object v = entry.getValue();
    if (v instanceof String) {
      dataFileWriter.setMeta(entry.getKey(), (String) v);
    } else if (v instanceof Long) {
      dataFileWriter.setMeta(entry.getKey(), (Long) v);
    } else if (v instanceof byte[]) {
      dataFileWriter.setMeta(entry.getKey(), (byte[]) v);
    } else {
      throw new IllegalStateException(
          "Metadata value type must be one of String, Long, or byte[]. Found "
              + v.getClass().getSimpleName());
    }
  }
  dataFileWriter.create(schema, Channels.newOutputStream(channel));
}
 
Example #17
Source File: AvroCodec.java    From schema-evolution-samples with Apache License 2.0 5 votes vote down vote up
private DatumWriter getDatumWriter(Class<?> type, Schema schema){
	DatumWriter writer = null;
	logger.debug("Finding correct DatumWriter for type {}",type.getName());
	if(SpecificRecord.class.isAssignableFrom(type)){
		writer = new SpecificDatumWriter<>(schema);
	}else if(GenericRecord.class.isAssignableFrom(type)){
		writer = new GenericDatumWriter<>(schema);
	}else{
		writer = new ReflectDatumWriter<>(schema);
	}
	logger.debug("DatumWriter of type {} selected",writer.getClass().getName());
	return writer;
}
 
Example #18
Source File: AvroSerializer.java    From stratosphere with Apache License 2.0 5 votes vote down vote up
private final void checkAvroInitialized() {
	if (this.reader == null) {
		this.reader = new ReflectDatumReader<T>(type);
		this.writer = new ReflectDatumWriter<T>(type);
		this.encoder = new DataOutputEncoder();
		this.decoder = new DataInputDecoder();
	}
}
 
Example #19
Source File: AvroAppender.java    From kite with Apache License 2.0 5 votes vote down vote up
@Override
public void open() throws IOException {
  writer = new ReflectDatumWriter<E>();
  dataFileWriter = new DataFileWriter<E>(writer);

  if (enableCompression) {
    dataFileWriter.setCodec(getCodecFactory());
  }

  out = fileSystem.create(path, true);
  dataFileWriter.create(schema, out);
}
 
Example #20
Source File: AvroOutputFormat.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
@Override
public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(
  TaskAttemptContext context) throws IOException, InterruptedException {

  boolean isMapOnly = context.getNumReduceTasks() == 0;
  Schema schema =
    isMapOnly ? AvroJob.getMapOutputSchema(context.getConfiguration())
      : AvroJob.getOutputSchema(context.getConfiguration());

  final DataFileWriter<T> WRITER =
    new DataFileWriter<T>(new ReflectDatumWriter<T>());

  configureDataFileWriter(WRITER, context);

  Path path = getDefaultWorkFile(context, EXT);
  WRITER.create(schema,
    path.getFileSystem(context.getConfiguration()).create(path));

  return new RecordWriter<AvroWrapper<T>, NullWritable>() {
    @Override
    public void write(AvroWrapper<T> wrapper, NullWritable ignore)
      throws IOException {
      WRITER.append(wrapper.datum());
    }

    @Override
    public void close(TaskAttemptContext taskAttemptContext)
      throws IOException, InterruptedException {
      WRITER.close();
    }
  };
}
 
Example #21
Source File: AvroOutputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void open(int taskNumber, int numTasks) throws IOException {
	super.open(taskNumber, numTasks);

	DatumWriter<E> datumWriter;
	Schema schema;
	if (org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)) {
		datumWriter = new SpecificDatumWriter<E>(avroValueType);
		try {
			schema = ((org.apache.avro.specific.SpecificRecordBase) avroValueType.newInstance()).getSchema();
		} catch (InstantiationException | IllegalAccessException e) {
			throw new RuntimeException(e.getMessage());
		}
	} else if (org.apache.avro.generic.GenericRecord.class.isAssignableFrom(avroValueType)) {
		if (userDefinedSchema == null) {
			throw new IllegalStateException("Schema must be set when using Generic Record");
		}
		datumWriter = new GenericDatumWriter<E>(userDefinedSchema);
		schema = userDefinedSchema;
	} else {
		datumWriter = new ReflectDatumWriter<E>(avroValueType);
		schema = ReflectData.get().getSchema(avroValueType);
	}
	dataFileWriter = new DataFileWriter<E>(datumWriter);
	if (codec != null) {
		dataFileWriter.setCodec(codec.getCodecFactory());
	}
	if (userDefinedSchema == null) {
		dataFileWriter.create(schema, stream);
	} else {
		dataFileWriter.create(userDefinedSchema, stream);
	}
}
 
Example #22
Source File: AvroFactory.java    From flink with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
private static <T> AvroFactory<T> fromReflective(Class<T> type, ClassLoader cl, Optional<Schema> previousSchema) {
	ReflectData reflectData = new ReflectData(cl);
	Schema newSchema = reflectData.getSchema(type);

	return new AvroFactory<>(
		reflectData,
		newSchema,
		new ReflectDatumReader<>(previousSchema.orElse(newSchema), newSchema, reflectData),
		new ReflectDatumWriter<>(newSchema, reflectData)
	);
}
 
Example #23
Source File: AvroOutputFormat.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
public void open(int taskNumber, int numTasks) throws IOException {
	super.open(taskNumber, numTasks);

	DatumWriter<E> datumWriter;
	Schema schema;
	if (org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)) {
		datumWriter = new SpecificDatumWriter<E>(avroValueType);
		try {
			schema = ((org.apache.avro.specific.SpecificRecordBase) avroValueType.newInstance()).getSchema();
		} catch (InstantiationException | IllegalAccessException e) {
			throw new RuntimeException(e.getMessage());
		}
	} else if (org.apache.avro.generic.GenericRecord.class.isAssignableFrom(avroValueType)) {
		if (userDefinedSchema == null) {
			throw new IllegalStateException("Schema must be set when using Generic Record");
		}
		datumWriter = new GenericDatumWriter<E>(userDefinedSchema);
		schema = userDefinedSchema;
	} else {
		datumWriter = new ReflectDatumWriter<E>(avroValueType);
		schema = ReflectData.get().getSchema(avroValueType);
	}
	dataFileWriter = new DataFileWriter<E>(datumWriter);
	if (codec != null) {
		dataFileWriter.setCodec(codec.getCodecFactory());
	}
	if (userDefinedSchema == null) {
		dataFileWriter.create(schema, stream);
	} else {
		dataFileWriter.create(userDefinedSchema, stream);
	}
}
 
Example #24
Source File: AvroFactory.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
private static <T> AvroFactory<T> fromReflective(Class<T> type, ClassLoader cl, Optional<Schema> previousSchema) {
	ReflectData reflectData = new ReflectData(cl);
	Schema newSchema = reflectData.getSchema(type);

	return new AvroFactory<>(
		reflectData,
		newSchema,
		new ReflectDatumReader<>(previousSchema.orElse(newSchema), newSchema, reflectData),
		new ReflectDatumWriter<>(newSchema, reflectData)
	);
}
 
Example #25
Source File: AvroExternalJarProgram.java    From stratosphere with Apache License 2.0 4 votes vote down vote up
public static void writeTestData(File testFile, int numRecords) throws IOException {
	
	DatumWriter<MyUser> userDatumWriter = new ReflectDatumWriter<MyUser>(MyUser.class);
	DataFileWriter<MyUser> dataFileWriter = new DataFileWriter<MyUser>(userDatumWriter);
	
	dataFileWriter.create(ReflectData.get().getSchema(MyUser.class), testFile);
	
	
	Generator generator = new Generator();
	
	for (int i = 0; i < numRecords; i++) {
		MyUser user = generator.nextUser();
		dataFileWriter.append(user);
	}
	
	dataFileWriter.close();
}
 
Example #26
Source File: SimpleAvroArchiveBuilder.java    From occurrence with Apache License 2.0 4 votes vote down vote up
/**
 * Merges the content of sourceFS:sourcePath into targetFS:outputPath in a file called downloadKey.avro.
 */
public static void mergeToSingleAvro(final FileSystem sourceFS, FileSystem targetFS, String sourcePath,
                                     String targetPath, String downloadKey) throws IOException {

  Path outputPath = new Path(targetPath, downloadKey + AVRO_EXTENSION);

  ReflectDatumWriter<GenericContainer> rdw = new ReflectDatumWriter<>(GenericContainer.class);
  ReflectDatumReader<GenericContainer> rdr = new ReflectDatumReader<>(GenericContainer.class);
  boolean first = false;

  try (
    FSDataOutputStream zipped = targetFS.create(outputPath, true);
    DataFileWriter<GenericContainer> dfw = new DataFileWriter<>(rdw)
  ) {

    final Path inputPath = new Path(sourcePath);

    FileStatus[] hdfsFiles = sourceFS.listStatus(inputPath);

    for (FileStatus fs : hdfsFiles) {
      try(InputStream is = sourceFS.open(fs.getPath());
          DataFileStream<GenericContainer> dfs = new DataFileStream<>(is, rdr)) {
        if (!first) {
          dfw.setCodec(CodecFactory.deflateCodec(-1));
          dfw.setFlushOnEveryBlock(false);
          dfw.create(dfs.getSchema(), zipped);
          first = true;
        }

        dfw.appendAllFrom(dfs, false);
      }

    }

    dfw.flush();
    dfw.close();
    zipped.flush();

  } catch (Exception ex) {
    LOG.error("Error combining Avro files", ex);
    throw Throwables.propagate(ex);
  }
}
 
Example #27
Source File: ReflectAvroDatumProvider.java    From apicurio-registry with Apache License 2.0 4 votes vote down vote up
@Override
public DatumWriter<T> createDatumWriter(T data, Schema schema) {
    return new ReflectDatumWriter<>(schema);
}
 
Example #28
Source File: AvroExternalJarProgram.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void writeTestData(File testFile, int numRecords) throws IOException {

		DatumWriter<MyUser> userDatumWriter = new ReflectDatumWriter<MyUser>(MyUser.class);
		DataFileWriter<MyUser> dataFileWriter = new DataFileWriter<MyUser>(userDatumWriter);

		dataFileWriter.create(ReflectData.get().getSchema(MyUser.class), testFile);

		Generator generator = new Generator();

		for (int i = 0; i < numRecords; i++) {
			MyUser user = generator.nextUser();
			dataFileWriter.append(user);
		}

		dataFileWriter.close();
	}
 
Example #29
Source File: TestAvroSchemaHandler.java    From pulsar with Apache License 2.0 4 votes vote down vote up
@Test
public void testAvroSchemaHandler() throws IOException {
    List<PulsarColumnHandle> columnHandles = new ArrayList();
    RawMessage message = mock(RawMessage.class);
    Schema schema1 = ReflectData.AllowNull.get().getSchema(Foo1.class);
    PulsarSqlSchemaInfoProvider pulsarSqlSchemaInfoProvider = mock(PulsarSqlSchemaInfoProvider.class);
    AvroSchemaHandler avroSchemaHandler = new AvroSchemaHandler(pulsarSqlSchemaInfoProvider,
            StructSchema.parseSchemaInfo(SchemaDefinition.builder().withPojo(Foo2.class).build(), SchemaType.AVRO), columnHandles);
    byte[] schemaVersion = new byte[8];
    for (int i = 0 ; i<8; i++) {
        schemaVersion[i] = 0;
    }
    ReflectDatumWriter<Foo1> writer;
    BinaryEncoder encoder = null;
    ByteArrayOutputStream byteArrayOutputStream;
    byteArrayOutputStream = new ByteArrayOutputStream();
    encoder = EncoderFactory.get().binaryEncoder(byteArrayOutputStream, encoder);
    writer = new ReflectDatumWriter<>(schema1);
    Foo1 foo1 = new Foo1();
    foo1.setField1("value1");
    foo1.setBar(new Bar());
    foo1.getBar().setField1("value1");
    writer.write(foo1, encoder);
    encoder.flush();
    when(message.getSchemaVersion()).thenReturn(schemaVersion);
    byte[] bytes =byteArrayOutputStream.toByteArray();

    when(message.getData()).thenReturn(ByteBufAllocator.DEFAULT
            .buffer(bytes.length, bytes.length).writeBytes(byteArrayOutputStream.toByteArray()));
    when(pulsarSqlSchemaInfoProvider.getSchemaByVersion(any()))
            .thenReturn(completedFuture(StructSchema.parseSchemaInfo(SchemaDefinition.builder()
                    .withPojo(Foo1.class).build(), SchemaType.AVRO)));

    Object object  = ((GenericAvroRecord)avroSchemaHandler.deserialize(message.getData(),
            message.getSchemaVersion())).getField("field1");
    Assert.assertEquals(foo1.field1, (String)object);
    String[] fields = new String[2];
    fields[0] = "bar";
    fields[1] = "field1";
    PulsarColumnHandle pulsarColumnHandle = new PulsarColumnHandle("test",
            "bar.field1",
            BigintType.BIGINT,
            true,
            true,
            fields,
            new Integer[5],
            null);
    columnHandles.add(pulsarColumnHandle);
    when(message.getData()).thenReturn(ByteBufAllocator.DEFAULT
            .buffer(bytes.length, bytes.length).writeBytes(byteArrayOutputStream.toByteArray()));
    object = avroSchemaHandler.extractField(0, avroSchemaHandler.deserialize(message.getData(),
            message.getSchemaVersion()));
    Assert.assertEquals(foo1.bar.field1, (String)object);
}
 
Example #30
Source File: AvroSourceTest.java    From beam with Apache License 2.0 4 votes vote down vote up
/**
 * Generates an input Avro file containing the given records in the temporary directory and
 * returns the full path of the file.
 */
private <T> String generateTestFile(
    String filename,
    List<T> elems,
    SyncBehavior syncBehavior,
    int syncInterval,
    AvroCoder<T> coder,
    String codec)
    throws IOException {
  Random random = new Random(0);
  File tmpFile = tmpFolder.newFile(filename);
  String path = tmpFile.toString();

  FileOutputStream os = new FileOutputStream(tmpFile);
  DatumWriter<T> datumWriter =
      coder.getType().equals(GenericRecord.class)
          ? new GenericDatumWriter<>(coder.getSchema())
          : new ReflectDatumWriter<>(coder.getSchema());
  try (DataFileWriter<T> writer = new DataFileWriter<>(datumWriter)) {
    writer.setCodec(CodecFactory.fromString(codec));
    writer.create(coder.getSchema(), os);

    int recordIndex = 0;
    int syncIndex = syncBehavior == SyncBehavior.SYNC_RANDOM ? random.nextInt(syncInterval) : 0;

    for (T elem : elems) {
      writer.append(elem);
      recordIndex++;

      switch (syncBehavior) {
        case SYNC_REGULAR:
          if (recordIndex == syncInterval) {
            recordIndex = 0;
            writer.sync();
          }
          break;
        case SYNC_RANDOM:
          if (recordIndex == syncIndex) {
            recordIndex = 0;
            writer.sync();
            syncIndex = random.nextInt(syncInterval);
          }
          break;
        case SYNC_DEFAULT:
        default:
      }
    }
  }
  return path;
}