Java Code Examples for org.apache.avro.mapred.AvroValue#datum()

The following examples show how to use org.apache.avro.mapred.AvroValue#datum() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroKeyDedupReducer.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
@Override
public int compare(AvroValue<GenericRecord> o1, AvroValue<GenericRecord> o2) {
  GenericRecord record1 = o1.datum();
  GenericRecord record2 = o2.datum();
  for (String deltaFieldName : this.deltaSchemaProvider.getDeltaFieldNames(record1)) {
    if (record1.get(deltaFieldName).equals(record2.get(deltaFieldName))) {
      continue;
    }
    return ((Comparable) record1.get(deltaFieldName)).compareTo(record2.get(deltaFieldName));
  }

  return 0;
}
 
Example 2
Source File: TransformPhaseJob.java    From incubator-pinot with Apache License 2.0 5 votes vote down vote up
@Override
public void reduce(IntWritable key, Iterable<AvroValue<GenericRecord>> values, Context context)
    throws IOException, InterruptedException {
  for (AvroValue<GenericRecord> value : values) {
    GenericRecord record = value.datum();
    context.write(new AvroKey<GenericRecord>(record), NullWritable.get());
  }
}
 
Example 3
Source File: KeyDedupReducerTest.java    From incubator-gobblin with Apache License 2.0 4 votes vote down vote up
@Test
public void testAvroReduce()
    throws IOException, InterruptedException {
  Schema keySchema = new Schema.Parser().parse(AVRO_KEY_SCHEMA);
  GenericRecordBuilder keyRecordBuilder = new GenericRecordBuilder(keySchema.getField("key").schema());
  keyRecordBuilder.set("partitionKey", 1);
  keyRecordBuilder.set("environment", "test");
  keyRecordBuilder.set("subKey", "2");
  GenericRecord record = keyRecordBuilder.build();
  keyRecordBuilder = new GenericRecordBuilder(keySchema);
  keyRecordBuilder.set("key", record);
  GenericRecord keyRecord = keyRecordBuilder.build();

  // Test reducer with delta field "scn"
  Schema fullSchema = new Schema.Parser().parse(AVRO_FULL_SCHEMA);
  AvroValue<GenericRecord> fullRecord1 = new AvroValue<>();
  AvroValue<GenericRecord> fullRecord2 = new AvroValue<>();
  AvroValue<GenericRecord> fullRecord3 = new AvroValue<>();
  AvroValue<GenericRecord> fullRecord4 = new AvroValue<>();

  GenericRecordBuilder fullRecordBuilder1 = new GenericRecordBuilder(fullSchema);
  fullRecordBuilder1.set("key", record);
  fullRecordBuilder1.set("scn", 123);
  fullRecordBuilder1.set("scn2", 100);
  fullRecord1.datum(fullRecordBuilder1.build());
  fullRecordBuilder1.set("scn", 125);
  fullRecordBuilder1.set("scn2", 1);
  fullRecord2.datum(fullRecordBuilder1.build());
  fullRecordBuilder1.set("scn", 124);
  fullRecordBuilder1.set("scn2", 10);
  fullRecord3.datum(fullRecordBuilder1.build());
  fullRecordBuilder1.set("scn", 122);
  fullRecordBuilder1.set("scn2", 1000);
  fullRecord4.datum(fullRecordBuilder1.build());

  Configuration conf = mock(Configuration.class);
  when(conf.get(AvroKeyDedupReducer.DELTA_SCHEMA_PROVIDER))
      .thenReturn(FieldAttributeBasedDeltaFieldsProvider.class.getName());
  when(conf.get(FieldAttributeBasedDeltaFieldsProvider.ATTRIBUTE_FIELD)).thenReturn("attributes_json");

  when(conf.get(FieldAttributeBasedDeltaFieldsProvider.DELTA_PROP_NAME,
      FieldAttributeBasedDeltaFieldsProvider.DEFAULT_DELTA_PROP_NAME))
      .thenReturn(FieldAttributeBasedDeltaFieldsProvider.DEFAULT_DELTA_PROP_NAME);
  RecordKeyDedupReducerBase<AvroKey<GenericRecord>, AvroValue<GenericRecord>,
      AvroKey<GenericRecord>, NullWritable> reducer = new AvroKeyDedupReducer();

  WrappedReducer.Context reducerContext = mock(WrappedReducer.Context.class);
  when(reducerContext.getConfiguration()).thenReturn(conf);
  Counter moreThan1Counter = new GenericCounter();
  when(reducerContext.getCounter(RecordKeyDedupReducerBase.EVENT_COUNTER.MORE_THAN_1)).thenReturn(moreThan1Counter);

  Counter dedupedCounter = new GenericCounter();
  when(reducerContext.getCounter(RecordKeyDedupReducerBase.EVENT_COUNTER.DEDUPED)).thenReturn(dedupedCounter);

  Counter recordCounter = new GenericCounter();
  when(reducerContext.getCounter(RecordKeyDedupReducerBase.EVENT_COUNTER.RECORD_COUNT)).thenReturn(recordCounter);
  reducer.setup(reducerContext);

  doNothing().when(reducerContext).write(any(AvroKey.class), any(NullWritable.class));
  List<AvroValue<GenericRecord>> valueIterable =
      Lists.newArrayList(fullRecord1, fullRecord2, fullRecord3, fullRecord4);

  AvroKey<GenericRecord> key = new AvroKey<>();
  key.datum(keyRecord);
  reducer.reduce(key, valueIterable, reducerContext);
  Assert.assertEquals(reducer.getOutKey().datum(), fullRecord2.datum());

  // Test reducer without delta field
  Configuration conf2 = mock(Configuration.class);
  when(conf2.get(AvroKeyDedupReducer.DELTA_SCHEMA_PROVIDER)).thenReturn(null);
  when(reducerContext.getConfiguration()).thenReturn(conf2);
  RecordKeyDedupReducerBase<AvroKey<GenericRecord>, AvroValue<GenericRecord>,
      AvroKey<GenericRecord>, NullWritable> reducer2 = new AvroKeyDedupReducer();
  reducer2.setup(reducerContext);
  reducer2.reduce(key, valueIterable, reducerContext);
  Assert.assertEquals(reducer2.getOutKey().datum(), fullRecord1.datum());

  // Test reducer with compound delta key.
  Schema fullSchema2 = new Schema.Parser().parse(AVRO_FULL_SCHEMA_WITH_TWO_DELTA_FIELDS);
  GenericRecordBuilder fullRecordBuilder2 = new GenericRecordBuilder(fullSchema2);
  fullRecordBuilder2.set("key", record);
  fullRecordBuilder2.set("scn", 123);
  fullRecordBuilder2.set("scn2", 100);
  fullRecord1.datum(fullRecordBuilder2.build());
  fullRecordBuilder2.set("scn", 125);
  fullRecordBuilder2.set("scn2", 1000);
  fullRecord2.datum(fullRecordBuilder2.build());
  fullRecordBuilder2.set("scn", 126);
  fullRecordBuilder2.set("scn2", 1000);
  fullRecord3.datum(fullRecordBuilder2.build());
  fullRecordBuilder2.set("scn", 130);
  fullRecordBuilder2.set("scn2", 100);
  fullRecord4.datum(fullRecordBuilder2.build());
  List<AvroValue<GenericRecord>> valueIterable2 =
      Lists.newArrayList(fullRecord1, fullRecord2, fullRecord3, fullRecord4);
  reducer.reduce(key, valueIterable2, reducerContext);
  Assert.assertEquals(reducer.getOutKey().datum(), fullRecord3.datum());

}
 
Example 4
Source File: GenericPartitioner.java    From incubator-pinot with Apache License 2.0 4 votes vote down vote up
@Override
public int getPartition(T genericRecordAvroKey, AvroValue<GenericRecord> genericRecordAvroValue, int numPartitions) {
  final GenericRecord inputRecord = genericRecordAvroValue.datum();
  final Object partitionColumnValue = inputRecord.get(_partitionColumn);
  return _partitionFunction.getPartition(partitionColumnValue);
}