org.apache.avro.reflect.ReflectData Java Examples

The following examples show how to use org.apache.avro.reflect.ReflectData. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PubsubIO.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Returns a {@link PTransform} that continuously reads binary encoded Avro messages of the
 * specific type.
 *
 * <p>Beam will infer a schema for the Avro schema. This allows the output to be used by SQL and
 * by the schema-transform library.
 */
@Experimental(Kind.SCHEMAS)
public static <T> Read<T> readAvrosWithBeamSchema(Class<T> clazz) {
  if (clazz.equals(GenericRecord.class)) {
    throw new IllegalArgumentException("For GenericRecord, please call readAvroGenericRecords");
  }
  org.apache.avro.Schema avroSchema = ReflectData.get().getSchema(clazz);
  AvroCoder<T> coder = AvroCoder.of(clazz);
  Schema schema = AvroUtils.getSchema(clazz, null);
  return Read.newBuilder(parsePayloadUsingCoder(coder))
      .setCoder(
          SchemaCoder.of(
              schema,
              TypeDescriptor.of(clazz),
              AvroUtils.getToRowFunction(clazz, avroSchema),
              AvroUtils.getFromRowFunction(clazz)))
      .build();
}
 
Example #2
Source File: CSVAppender.java    From kite with Apache License 2.0 6 votes vote down vote up
/**
 * Returns a the value as the first matching schema type or null.
 *
 * Note that if the value may be null even if the schema does not allow the
 * value to be null.
 *
 * @param value a value
 * @param schema a Schema
 * @return a String representation of the value according to the Schema type
 */
private static String valueString(Object value, Schema schema) {
  if (value == null || schema.getType() == Schema.Type.NULL) {
    return null;
  }

  switch (schema.getType()) {
    case BOOLEAN:
    case FLOAT:
    case DOUBLE:
    case INT:
    case LONG:
    case STRING:
      return value.toString();
    case ENUM:
      // serialize as the ordinal from the schema
      return String.valueOf(schema.getEnumOrdinal(value.toString()));
    case UNION:
      int index = ReflectData.get().resolveUnion(schema, value);
      return valueString(value, schema.getTypes().get(index));
    default:
      // FIXED, BYTES, MAP, ARRAY, RECORD are not supported
      throw new DatasetOperationException(
          "Unsupported field type:" + schema.getType());
  }
}
 
Example #3
Source File: ParquetStreamingFileSinkITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteParquetAvroReflect() throws Exception {

	final File folder = TEMPORARY_FOLDER.newFolder();

	final List<Datum> data = Arrays.asList(
			new Datum("a", 1), new Datum("b", 2), new Datum("c", 3));

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.enableCheckpointing(100);

	DataStream<Datum> stream = env.addSource(
			new FiniteTestSource<>(data), TypeInformation.of(Datum.class));

	stream.addSink(
			StreamingFileSink.forBulkFormat(
					Path.fromLocalFile(folder),
					ParquetAvroWriters.forReflectRecord(Datum.class))
					.build());

	env.execute();

	validateResults(folder, ReflectData.get(), data);
}
 
Example #4
Source File: AvroCodecTests.java    From schema-evolution-samples with Apache License 2.0 6 votes vote down vote up
@Test
public void dynamicReflectEncoderReflectDecoder() throws Exception{
	Schema schema = ReflectData.get().getSchema(Account.class);
	SchemaRegistryClient client = mock(SchemaRegistryClient.class);
	when(client.register(any())).thenReturn(10);
	when(client.fetch(eq(10))).thenReturn(schema);
	AvroCodec codec = new AvroCodec();
	AvroCodecProperties properties = new AvroCodecProperties();
	properties.setDynamicSchemaGenerationEnabled(true);
	codec.setProperties(properties);
	codec.setSchemaRegistryClient(client);
	codec.setResolver(new PathMatchingResourcePatternResolver(new AnnotationConfigApplicationContext()));
	codec.init();
	Account account = new Account();
	account.setCreatedAt(System.currentTimeMillis());
	account.setId(1L);
	byte[] results = codec.encode(account);
	Account decoded = codec.decode(results,Account.class);
	Assert.assertEquals(account.getId(), decoded.getId());
}
 
Example #5
Source File: TestDataModelUtil.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testCreateRecord() {
  assertNull("createRecord should not create Specific instances",
      DataModelUtil.createRecord(StandardEvent.class, StandardEvent.getClassSchema()));

  assertNull("createRecord should not create Reflect instances",
      DataModelUtil.createRecord(ReflectStandardEvent.class,
          ReflectData.get().getSchema(ReflectStandardEvent.class)));

  assertNotNull("createRecord should create Generic instances",
      DataModelUtil.createRecord(GenericData.Record.class,
          StandardEvent.getClassSchema()));

  assertEquals("createRecord did not return the expected class",
      TestGenericRecord.class,
      DataModelUtil.createRecord(TestGenericRecord.class,
          StandardEvent.getClassSchema()).getClass());
}
 
Example #6
Source File: AvroSchemaTest.java    From pulsar with Apache License 2.0 6 votes vote down vote up
@Test
public void discardBufferIfBadAvroData() {
    AvroWriter<NasaMission> avroWriter = new AvroWriter<>(
            ReflectData.AllowNull.get().getSchema(NasaMission.class));

    NasaMission badNasaMissionData = new NasaMission();
    badNasaMissionData.setId(1);
    // set null in the non-null field. The java set will accept it but going ahead, the avro encode will crash.
    badNasaMissionData.setName(null);

    // Because data does not conform to schema expect a crash
    Assert.assertThrows( SchemaSerializationException.class, () -> avroWriter.write(badNasaMissionData));

    // Get the buffered data using powermock
    BinaryEncoder encoder = Whitebox.getInternalState(avroWriter, "encoder");

    // Assert that the buffer position is reset to zero
    Assert.assertEquals(((BufferedBinaryEncoder)encoder).bytesBuffered(), 0);
}
 
Example #7
Source File: AvroTestUtil.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public static void testReflect(Object value, Type type, String schema)
  throws Exception {

  // check that schema matches expected
  Schema s = ReflectData.get().getSchema(type);
  assertEquals(Schema.parse(schema), s);

  // check that value is serialized correctly
  ReflectDatumWriter<Object> writer = new ReflectDatumWriter<Object>(s);
  ByteArrayOutputStream out = new ByteArrayOutputStream();
  writer.write(value, EncoderFactory.get().directBinaryEncoder(out, null));
  ReflectDatumReader<Object> reader = new ReflectDatumReader<Object>(s);
  Object after =
    reader.read(null,
                DecoderFactory.get().binaryDecoder(out.toByteArray(), null));
  assertEquals(value, after);
}
 
Example #8
Source File: TestReflectReadWrite.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private Path writePojosToParquetFile( int num, CompressionCodecName compression,
                                      boolean enableDictionary) throws IOException {
  File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp");
  tmp.deleteOnExit();
  tmp.delete();
  Path path = new Path(tmp.getPath());

  Pojo object = getPojo();

  Schema schema = ReflectData.get().getSchema(object.getClass());
  try(ParquetWriter<Pojo> writer = AvroParquetWriter.<Pojo>builder(path)
      .withSchema(schema)
      .withCompressionCodec(compression)
      .withDataModel(ReflectData.get())
      .withDictionaryEncoding(enableDictionary)
      .build()) {
    for (int i = 0; i < num; i++) {
      writer.write(object);
    }
  }
  return path;
}
 
Example #9
Source File: AvroSchema.java    From pulsar with Apache License 2.0 6 votes vote down vote up
public static void addLogicalTypeConversions(ReflectData reflectData, boolean jsr310ConversionEnabled) {
    reflectData.addLogicalTypeConversion(new Conversions.DecimalConversion());
    reflectData.addLogicalTypeConversion(new TimeConversions.DateConversion());
    reflectData.addLogicalTypeConversion(new TimeConversions.TimeMillisConversion());
    reflectData.addLogicalTypeConversion(new TimeConversions.TimeMicrosConversion());
    reflectData.addLogicalTypeConversion(new TimeConversions.TimestampMicrosConversion());
    if (jsr310ConversionEnabled) {
        reflectData.addLogicalTypeConversion(new TimeConversions.TimestampMillisConversion());
    } else {
        try {
            Class.forName("org.joda.time.DateTime");
            reflectData.addLogicalTypeConversion(new JodaTimeConversions.TimestampConversion());
        } catch (ClassNotFoundException e) {
            // Skip if have not provide joda-time dependency.
        }
    }
}
 
Example #10
Source File: ParquetStreamingFileSinkITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteParquetAvroReflect() throws Exception {

	final File folder = TEMPORARY_FOLDER.newFolder();

	final List<Datum> data = Arrays.asList(
			new Datum("a", 1), new Datum("b", 2), new Datum("c", 3));

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.enableCheckpointing(100);

	DataStream<Datum> stream = env.addSource(
			new FiniteTestSource<>(data), TypeInformation.of(Datum.class));

	stream.addSink(
			StreamingFileSink.forBulkFormat(
					Path.fromLocalFile(folder),
					ParquetAvroWriters.forReflectRecord(Datum.class))
					.build());

	env.execute();

	validateResults(folder, ReflectData.get(), data);
}
 
Example #11
Source File: TestFileSystemDatasetRepository.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testReadNullsWithPrimitivesAllowNullSchema() {
  final String name = "allowNullPrimitives";
  try {
    repo.create(NAMESPACE, name, new DatasetDescriptor.Builder()
        .schema(ReflectData.AllowNull.get().getSchema(ObjectPoJo.class))
        .build(), ObjectPoJo.class);

    // should load the dataset because PrimitivePoJo can be used to write
    final Dataset<PrimitivePoJo> dataset = repo.load(
        NAMESPACE, name, PrimitivePoJo.class);
    TestHelpers.assertThrows("AllowNull primitives cannot read nullable type",
        IncompatibleSchemaException.class, new Runnable() {
          @Override
          public void run() {
            dataset.newReader();
          }
        });

  } catch (RuntimeException e) {
    throw e;
  } finally {
    repo.delete(NAMESPACE, name);
  }
}
 
Example #12
Source File: AvroSourceTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testCreationWithSchema() throws Exception {
  List<Bird> expected = createRandomRecords(100);
  String filename =
      generateTestFile(
          "tmp.avro",
          expected,
          SyncBehavior.SYNC_DEFAULT,
          0,
          AvroCoder.of(Bird.class),
          DataFileConstants.NULL_CODEC);

  // Create a source with a schema object
  Schema schema = ReflectData.get().getSchema(Bird.class);
  AvroSource<GenericRecord> source = AvroSource.from(filename).withSchema(schema);
  List<GenericRecord> records = SourceTestUtils.readFromSource(source, null);
  assertEqualsWithGeneric(expected, records);

  // Create a source with a JSON schema
  String schemaString = ReflectData.get().getSchema(Bird.class).toString();
  source = AvroSource.from(filename).withSchema(schemaString);
  records = SourceTestUtils.readFromSource(source, null);
  assertEqualsWithGeneric(expected, records);
}
 
Example #13
Source File: ParquetIOTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test(expected = org.apache.beam.sdk.Pipeline.PipelineExecutionException.class)
public void testWriteAndReadUsingReflectDataSchemaWithoutDataModelThrowsException() {
  Schema testRecordSchema = ReflectData.get().getSchema(TestRecord.class);

  List<GenericRecord> records = generateGenericRecords(1000);
  mainPipeline
      .apply(Create.of(records).withCoder(AvroCoder.of(testRecordSchema)))
      .apply(
          FileIO.<GenericRecord>write()
              .via(ParquetIO.sink(testRecordSchema))
              .to(temporaryFolder.getRoot().getAbsolutePath()));
  mainPipeline.run().waitUntilFinish();

  PCollection<GenericRecord> readBack =
      readPipeline.apply(
          ParquetIO.read(testRecordSchema)
              .from(temporaryFolder.getRoot().getAbsolutePath() + "/*"));

  PAssert.that(readBack).containsInAnyOrder(records);
  readPipeline.run().waitUntilFinish();
}
 
Example #14
Source File: ParquetIOTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteAndReadUsingReflectDataSchemaWithDataModel() {
  Schema testRecordSchema = ReflectData.get().getSchema(TestRecord.class);

  List<GenericRecord> records = generateGenericRecords(1000);
  mainPipeline
      .apply(Create.of(records).withCoder(AvroCoder.of(testRecordSchema)))
      .apply(
          FileIO.<GenericRecord>write()
              .via(ParquetIO.sink(testRecordSchema))
              .to(temporaryFolder.getRoot().getAbsolutePath()));
  mainPipeline.run().waitUntilFinish();

  PCollection<GenericRecord> readBack =
      readPipeline.apply(
          ParquetIO.read(testRecordSchema)
              .withAvroDataModel(GenericData.get())
              .from(temporaryFolder.getRoot().getAbsolutePath() + "/*"));

  PAssert.that(readBack).containsInAnyOrder(records);
  readPipeline.run().waitUntilFinish();
}
 
Example #15
Source File: ParquetStreamingFileSinkITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteParquetAvroReflect() throws Exception {

	final File folder = TEMPORARY_FOLDER.newFolder();

	final List<Datum> data = Arrays.asList(
			new Datum("a", 1), new Datum("b", 2), new Datum("c", 3));

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.enableCheckpointing(100);

	DataStream<Datum> stream = env.addSource(
			new FiniteTestSource<>(data), TypeInformation.of(Datum.class));

	stream.addSink(
			StreamingFileSink.forBulkFormat(
					Path.fromLocalFile(folder),
					ParquetAvroWriters.forReflectRecord(Datum.class))
					.build());

	env.execute();

	validateResults(folder, ReflectData.get(), data);
}
 
Example #16
Source File: TestSchemaCommandCluster.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testObjSchemaToHDFSFile() throws Exception {
  Schema schema = ReflectData.get().getSchema(User.class);

  String hdfsSchemaPath = "hdfs:/tmp/schemas/obj.avsc";
  ObjectSchemaCommand command = new ObjectSchemaCommand(console);
  command.setConf(getConfiguration());
  command.classNames = Lists.newArrayList("org.kitesdk.cli.example.User");
  command.outputPath = hdfsSchemaPath;
  int rc = command.run();
  Assert.assertEquals("Should return success code", 0, rc);
  String fileContent = CharStreams.toString(
      new InputStreamReader(getDFS().open(new Path(hdfsSchemaPath)), "utf8"));
  Assert.assertTrue("File should contain pretty printed schema",
      TestUtil.matchesSchema(schema).matches(fileContent));
  verifyNoMoreInteractions(console);
}
 
Example #17
Source File: SimpleSchemaTest.java    From pulsar with Apache License 2.0 5 votes vote down vote up
@Test
public void newProducerWithMultipleSchemaDisabled() throws Exception {
    String topic = "my-property/my-ns/schema-test";
    AvroWriter<V1Data> v1DataAvroWriter = new AvroWriter<>(
            ReflectData.AllowNull.get().getSchema(V1Data.class));
    try (Producer<byte[]> p = pulsarClient.newProducer()
                                          .topic(topic)
                                          .enableMultiSchema(false).create()) {
        Assert.assertThrows(InvalidMessageException.class,
                () -> p.newMessage(Schema.AUTO_PRODUCE_BYTES(Schema.AVRO(V1Data.class)))
                       .value(v1DataAvroWriter.write(new V1Data(0))).send());
    }
}
 
Example #18
Source File: AvroSchemaGenRelConverter.java    From samza with Apache License 2.0 5 votes vote down vote up
private Schema computePayloadSchema(String streamName, SamzaSqlRelMessage relMessage) {
  SamzaSqlRelRecord relRecord = relMessage.getSamzaSqlRelRecord();
  List<Schema.Field> keyFields = new ArrayList<>();
  List<String> fieldNames = relRecord.getFieldNames();
  List<Object> values = relRecord.getFieldValues();

  for (int index = 0; index < fieldNames.size(); index++) {
    if (fieldNames.get(index).equals(SamzaSqlRelMessage.KEY_NAME) || values.get(index) == null) {
      continue;
    }

    Object value = values.get(index);
    Schema avroType;
    if (value instanceof GenericData.Record) {
      avroType = ((GenericData.Record) value).getSchema();
    } else {
      avroType = ReflectData.get().getSchema(value.getClass());
    }
    keyFields.add(new Schema.Field(fieldNames.get(index), avroType, "", null));
  }

  Schema ks = Schema.createRecord(streamName, "", streamName + "_namespace", false);
  ks.setFields(keyFields);
  String schemaStr = ks.toString();
  Schema schema;
  // See whether we have a schema object corresponding to the schemaValue and reuse it.
  // CachedSchemaRegistryClient doesn't like if we recreate schema objects.
  if (schemas.containsKey(schemaStr)) {
    schema = schemas.get(schemaStr);
  } else {
    schema = Schema.parse(schemaStr);
    schemas.put(schemaStr, schema);
  }

  return schema;
}
 
Example #19
Source File: StructSchema.java    From pulsar with Apache License 2.0 5 votes vote down vote up
protected static Schema extractAvroSchema(SchemaDefinition schemaDefinition, Class pojo) {
    try {
        return parseAvroSchema(pojo.getDeclaredField("SCHEMA$").get(null).toString());
    } catch (NoSuchFieldException | IllegalAccessException | IllegalArgumentException ignored) {
        return schemaDefinition.getAlwaysAllowNull() ? ReflectData.AllowNull.get().getSchema(pojo)
            : ReflectData.get().getSchema(pojo);
    }
}
 
Example #20
Source File: AvroWriter.java    From pulsar with Apache License 2.0 5 votes vote down vote up
public AvroWriter(Schema schema, boolean jsr310ConversionEnabled) {
    this.byteArrayOutputStream = new ByteArrayOutputStream();
    this.encoder = EncoderFactory.get().binaryEncoder(this.byteArrayOutputStream, this.encoder);
    ReflectData reflectData = new ReflectData();
    AvroSchema.addLogicalTypeConversions(reflectData, jsr310ConversionEnabled);
    this.writer = new ReflectDatumWriter<>(schema, reflectData);
}
 
Example #21
Source File: TestDataModelUtil.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetReaderSchemaForReflect() {
  Class<String> type = String.class;
  Schema writerSchema = ReflectData.get().getSchema(String.class);
  Schema expResult = writerSchema;
  Schema result = DataModelUtil.getReaderSchema(type, writerSchema);
  assertEquals(expResult, result);
}
 
Example #22
Source File: AvroWriters.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Creates an {@link AvroWriterFactory} for the given type. The Avro writers will
 * use reflection to create the schema for the type and use that schema to write
 * the records.
 *
 * @param type The class of the type to write.
 */
public static <T> AvroWriterFactory<T> forReflectRecord(Class<T> type) {
	String schemaString = ReflectData.get().getSchema(type).toString();
	AvroBuilder<T> builder = (out) -> createAvroDataFileWriter(
		schemaString,
		ReflectDatumWriter::new,
		out);
	return new AvroWriterFactory<>(builder);
}
 
Example #23
Source File: RecordBuilder.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private E newRecordInstance() {
  if (recordClass != GenericData.Record.class && !recordClass.isInterface()) {
    E record = (E) ReflectData.newInstance(recordClass, schema);
    if (record != null) {
      return record;
    }
  }
  return (E) new GenericData.Record(schema);
}
 
Example #24
Source File: TestReflectLogicalTypes.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testWriteUUIDArray() throws IOException {
  Schema uuidArraySchema = SchemaBuilder.record(RecordWithUUIDArray.class.getName())
      .fields()
      .name("uuids").type().array().items().stringType().noDefault()
      .endRecord();
  LogicalTypes.uuid().addToSchema(
      uuidArraySchema.getField("uuids").schema().getElementType());

  Schema stringArraySchema = SchemaBuilder.record("RecordWithUUIDArray")
      .fields()
      .name("uuids").type().array().items().stringType().noDefault()
      .endRecord();
  stringArraySchema.getField("uuids").schema()
      .addProp(SpecificData.CLASS_PROP, List.class.getName());

  UUID u1 = UUID.randomUUID();
  UUID u2 = UUID.randomUUID();

  GenericRecord expected = new GenericData.Record(stringArraySchema);
  List<String> uuids = new ArrayList<String>();
  uuids.add(u1.toString());
  uuids.add(u2.toString());
  expected.put("uuids", uuids);

  RecordWithUUIDArray r = new RecordWithUUIDArray();
  r.uuids = new UUID[] {u1, u2};

  File test = write(REFLECT, uuidArraySchema, r);

  Assert.assertEquals("Should read UUIDs as Strings",
      expected,
      read(ReflectData.get(), stringArraySchema, test).get(0));
}
 
Example #25
Source File: TestReflectLogicalTypes.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testWriteUUIDWithParuetUUID() throws IOException {
  Schema uuidSchema = SchemaBuilder.record(RecordWithUUID.class.getName())
      .fields().requiredString("uuid").endRecord();
  LogicalTypes.uuid().addToSchema(uuidSchema.getField("uuid").schema());

  UUID u1 = UUID.randomUUID();
  UUID u2 = UUID.randomUUID();

  RecordWithUUID r1 = new RecordWithUUID();
  r1.uuid = u1;
  RecordWithUUID r2 = new RecordWithUUID();
  r2.uuid = u2;

  List<RecordWithStringUUID> expected = Arrays.asList(
      new RecordWithStringUUID(), new RecordWithStringUUID());
  expected.get(0).uuid = u1.toString();
  expected.get(1).uuid = u2.toString();

  File test = write(
      AvroTestUtil.conf(AvroWriteSupport.WRITE_PARQUET_UUID, true),
      REFLECT, uuidSchema, r1, r2);

  Assert.assertEquals("Should read UUID objects",
      Arrays.asList(r1, r2),
      read(REFLECT, uuidSchema, test));

  Schema uuidStringSchema = SchemaBuilder.record(RecordWithStringUUID.class.getName())
      .fields().requiredString("uuid").endRecord();
  LogicalTypes.uuid().addToSchema(uuidStringSchema.getField("uuid").schema());
  Assert.assertEquals("Should read uuid as Strings",
      expected,
      read(ReflectData.get(), uuidStringSchema, test));
}
 
Example #26
Source File: TestReflectLogicalTypes.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testWriteUUIDMissingLogicalType() throws IOException {
  Schema uuidSchema = SchemaBuilder.record(RecordWithUUID.class.getName())
      .fields().requiredString("uuid").endRecord();
  LogicalTypes.uuid().addToSchema(uuidSchema.getField("uuid").schema());

  UUID u1 = UUID.randomUUID();
  UUID u2 = UUID.randomUUID();

  RecordWithUUID r1 = new RecordWithUUID();
  r1.uuid = u1;
  RecordWithUUID r2 = new RecordWithUUID();
  r2.uuid = u2;

  List<RecordWithStringUUID> expected = Arrays.asList(
      new RecordWithStringUUID(), new RecordWithStringUUID());
  expected.get(0).uuid = u1.toString();
  expected.get(1).uuid = u2.toString();

  // write without using REFLECT, which has the logical type
  File test = write(uuidSchema, r1, r2);

  // verify that the field's type overrides the logical type
  Schema uuidStringSchema = SchemaBuilder
      .record(RecordWithStringUUID.class.getName())
      .fields().requiredString("uuid").endRecord();

  Assert.assertEquals("Should read uuid as String without UUID conversion",
      expected,
      read(REFLECT, uuidStringSchema, test));

  Assert.assertEquals("Should read uuid as String without UUID logical type",
      expected,
      read(ReflectData.get(), uuidStringSchema, test)
      );
}
 
Example #27
Source File: DataModelUtil.java    From kite with Apache License 2.0 5 votes vote down vote up
/**
 * Get the writer schema based on the given type and dataset schema.
 *
 * @param <E> The entity type
 * @param type The Java class of the entity type
 * @param schema The {@link Schema} for the entity
 * @return The reader schema based on the given type and writer schema
 */
public static <E> Schema getWriterSchema(Class<E> type, Schema schema) {
  Schema writerSchema = schema;
  GenericData dataModel = getDataModelForType(type);
  if (dataModel instanceof AllowNulls) {
    // assume fields are non-null by default to avoid schema conflicts
    dataModel = ReflectData.get();
  }

  if (dataModel instanceof SpecificData) {
    writerSchema = ((SpecificData)dataModel).getSchema(type);
  }

  return writerSchema;
}
 
Example #28
Source File: TestStringBehavior.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testReflectJavaClass() throws IOException {
  Schema reflectSchema = ReflectData.get()
      .getSchema(ReflectRecordJavaClass.class);
  System.err.println("Schema: " + reflectSchema.toString(true));
  ReflectRecordJavaClass avroRecord;
  try(DataFileReader<ReflectRecordJavaClass> avro =
        new DataFileReader<>(avroFile,
          new ReflectDatumReader<>(reflectSchema))) {
    avroRecord = avro.next();
  }

  ReflectRecordJavaClass parquetRecord;
  Configuration conf = new Configuration();
  conf.setBoolean(AvroReadSupport.AVRO_COMPATIBILITY, false);
  AvroReadSupport.setAvroDataSupplier(conf, ReflectDataSupplier.class);
  AvroReadSupport.setAvroReadSchema(conf, reflectSchema);
  AvroReadSupport.setRequestedProjection(conf, reflectSchema);
  try(ParquetReader<ReflectRecordJavaClass> parquet = AvroParquetReader
      .<ReflectRecordJavaClass>builder(parquetFile)
      .withConf(conf)
      .build()) {
    parquetRecord = parquet.read();
  }

  // Avro uses String even if CharSequence is set
  Assert.assertEquals("Avro default string class should be String",
      String.class, avroRecord.default_class.getClass());
  Assert.assertEquals("Parquet default string class should be String",
      String.class, parquetRecord.default_class.getClass());

  Assert.assertEquals("Avro stringable class should be BigDecimal",
      BigDecimal.class, avroRecord.stringable_class.getClass());
  Assert.assertEquals("Parquet stringable class should be BigDecimal",
      BigDecimal.class, parquetRecord.stringable_class.getClass());
  Assert.assertEquals("Should have the correct BigDecimal value",
      BIG_DECIMAL, parquetRecord.stringable_class);
}
 
Example #29
Source File: TestLog4jAppenderWithAvro.java    From mt-flume with Apache License 2.0 5 votes vote down vote up
@Test
public void testAvroReflect() throws IOException {
  loadProperties("flume-log4jtest-avro-reflect.properties");
  PropertyConfigurator.configure(props);
  Logger logger = LogManager.getLogger(TestLog4jAppenderWithAvro.class);
  String msg = "This is log message number " + String.valueOf(0);

  AppEvent appEvent = new AppEvent();
  appEvent.setMessage(msg);

  logger.info(appEvent);

  Transaction transaction = ch.getTransaction();
  transaction.begin();
  Event event = ch.take();
  Assert.assertNotNull(event);

  Schema schema = ReflectData.get().getSchema(appEvent.getClass());

  ReflectDatumReader<AppEvent> reader = new ReflectDatumReader<AppEvent>(AppEvent.class);
  BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(event.getBody(), null);
  AppEvent recordFromEvent = reader.read(null, decoder);
  Assert.assertEquals(msg, recordFromEvent.getMessage());

  Map<String, String> hdrs = event.getHeaders();

  Assert.assertNull(hdrs.get(Log4jAvroHeaders.MESSAGE_ENCODING.toString()));

  Assert.assertNull("Schema URL should not be set",
      hdrs.get(Log4jAvroHeaders.AVRO_SCHEMA_URL.toString()));
  Assert.assertEquals("Schema string should be set", schema.toString(),
      hdrs.get(Log4jAvroHeaders.AVRO_SCHEMA_LITERAL.toString()));

  transaction.commit();
  transaction.close();

}
 
Example #30
Source File: TestReflectLogicalTypes.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testWriteUUID() throws IOException {
  Schema uuidSchema = SchemaBuilder.record(RecordWithUUID.class.getName())
      .fields().requiredString("uuid").endRecord();
  LogicalTypes.uuid().addToSchema(uuidSchema.getField("uuid").schema());

  UUID u1 = UUID.randomUUID();
  UUID u2 = UUID.randomUUID();

  RecordWithUUID r1 = new RecordWithUUID();
  r1.uuid = u1;
  RecordWithUUID r2 = new RecordWithUUID();
  r2.uuid = u2;

  List<RecordWithStringUUID> expected = Arrays.asList(
      new RecordWithStringUUID(), new RecordWithStringUUID());
  expected.get(0).uuid = u1.toString();
  expected.get(1).uuid = u2.toString();

  File test = write(REFLECT, uuidSchema, r1, r2);

  // verify that the field's type overrides the logical type
  Schema uuidStringSchema = SchemaBuilder
      .record(RecordWithStringUUID.class.getName())
      .fields().requiredString("uuid").endRecord();

  Assert.assertEquals("Should read uuid as String without UUID conversion",
      expected,
      read(REFLECT, uuidStringSchema, test));

  LogicalTypes.uuid().addToSchema(uuidStringSchema.getField("uuid").schema());
  Assert.assertEquals("Should read uuid as String without UUID logical type",
      expected,
      read(ReflectData.get(), uuidStringSchema, test));
}