Java Code Examples for org.apache.avro.mapred.AvroKey#datum()

The following examples show how to use org.apache.avro.mapred.AvroKey#datum() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: RegressionAdmmTrain.java    From ml-ease with Apache License 2.0 5 votes vote down vote up
@Override
public int getPartition(AvroKey<Integer> key,
                        AvroValue<RegressionPrepareOutput> value,
                        int numPartitions)
{
  Integer keyInt = key.datum();
  if (keyInt < 0 || keyInt >= numPartitions)
  {
    throw new RuntimeException("Map key is wrong! key has to be in the range of [0,numPartitions-1].");
  }
  return keyInt;
}
 
Example 2
Source File: SegmentPreprocessingMapper.java    From incubator-pinot with Apache License 2.0 5 votes vote down vote up
@Override
public void map(AvroKey<GenericRecord> record, NullWritable value, final Context context)
    throws IOException, InterruptedException {

  if (_isAppend) {
    // Normalize time column value and check against sample value
    String timeColumnValue = record.datum().get(_timeColumn).toString();
    String normalizedTimeColumnValue = _normalizedDateSegmentNameGenerator.getNormalizedDate(timeColumnValue);

    if (!normalizedTimeColumnValue.equals(_sampleNormalizedTimeColumnValue) && _firstInstanceOfMismatchedTime) {
      _firstInstanceOfMismatchedTime = false;
      // TODO: Create a custom exception and gracefully catch this exception outside, changing what the path to input
      // into segment creation should be
      LOGGER.warn("This segment contains multiple time units. Sample is {}, current is {}",
          _sampleNormalizedTimeColumnValue, normalizedTimeColumnValue);
    }
  }

  final GenericRecord inputRecord = record.datum();
  final Schema schema = inputRecord.getSchema();
  Preconditions.checkArgument(_outputSchema.equals(schema), "The schema of all avro files should be the same!");

  GenericRecord outputKey = new GenericData.Record(_outputKeySchema);
  if (_sortedColumn == null) {
    outputKey.put("hashcode", inputRecord.hashCode());
  } else if (_enablePartitioning) {
    outputKey.put(_sortedColumn, inputRecord.get(_sortedColumn));
  } else {
    outputKey.put(_sortedColumn, inputRecord.get(_sortedColumn));
    outputKey.put("hashcode", inputRecord.hashCode());
  }

  try {
    context.write(new AvroKey<>(outputKey), new AvroValue<>(inputRecord));
  } catch (Exception e) {
    LOGGER.error("Exception when writing context on mapper!");
    throw e;
  }
}
 
Example 3
Source File: TransformPhaseJob.java    From incubator-pinot with Apache License 2.0 5 votes vote down vote up
@Override
public void map(AvroKey<GenericRecord> recordWrapper, NullWritable value, Context context)
    throws IOException, InterruptedException {
  GenericRecord record = recordWrapper.datum();
  GenericRecord outputRecord = transformUDF.transformRecord(sourceName, record);

  if (outputRecord != null) {

    IntWritable key = new IntWritable(reducerKey);
    reducerKey = (reducerKey == numReducers) ? (1) : (reducerKey + 1);
    context.write(key, new AvroValue<GenericRecord>(outputRecord));
  }

}
 
Example 4
Source File: AggregationPhaseJob.java    From incubator-pinot with Apache License 2.0 5 votes vote down vote up
@Override
public void map(AvroKey<GenericRecord> record, NullWritable value, Context context) throws IOException, InterruptedException {

  // input record
  GenericRecord inputRecord = record.datum();

  // dimensions
  List<Object> dimensions = new ArrayList<>();
  for (String dimension : dimensionNames) {
    Object dimensionValue = ThirdeyeAvroUtils.getDimensionFromRecord(inputRecord, dimension);
    dimensions.add(dimensionValue);
  }

  // metrics
  Number[] metrics = new Number[numMetrics];
  for (int i = 0; i < numMetrics; i++) {
    Number metricValue = ThirdeyeAvroUtils.getMetricFromRecord(inputRecord, metricNames.get(i), metricTypes.get(i));
    metrics[i] = metricValue;
  }

  // time
  long timeValue = ThirdeyeAvroUtils.getMetricFromRecord(inputRecord, timeColumnName).longValue();
  long inputTimeMillis = inputGranularity.toMillis(timeValue);
  long bucketTime = aggregateGranularity.convertToUnit(inputTimeMillis);

  AggregationPhaseMapOutputKey keyWrapper = new AggregationPhaseMapOutputKey(bucketTime, dimensions, dimensionTypes);
  byte[] keyBytes = keyWrapper.toBytes();
  keyWritable.set(keyBytes, 0, keyBytes.length);

  AggregationPhaseMapOutputValue valWrapper = new AggregationPhaseMapOutputValue(metrics, metricTypes);
  byte[] valBytes = valWrapper.toBytes();
  valWritable.set(valBytes, 0, valBytes.length);

  numRecords ++;
  context.write(keyWritable, valWritable);
}
 
Example 5
Source File: TopKPhaseJob.java    From incubator-pinot with Apache License 2.0 5 votes vote down vote up
@Override
public void map(AvroKey<GenericRecord> key, NullWritable value, Context context)
    throws IOException, InterruptedException {

  // input record
  GenericRecord inputRecord = key.datum();

  // read metrics
  Number[] metricValues = new Number[numMetrics];
  for (int i = 0; i < numMetrics; i++) {
    String metricName = metricNames.get(i);
    Number metricValue = ThirdeyeAvroUtils.getMetricFromRecord(inputRecord, metricName);
    metricValues[i] = metricValue;
  }
  TopKPhaseMapOutputValue valWrapper = new TopKPhaseMapOutputValue(metricValues, metricTypes);
  byte[] valBytes = valWrapper.toBytes();
  valWritable.set(valBytes, 0, valBytes.length);

  // read dimensions
  for (int i = 0; i < dimensionNames.size(); i++) {
    String dimensionName = dimensionNames.get(i);
    DimensionType dimensionType = dimensionTypes.get(i);
    Object dimensionValue = ThirdeyeAvroUtils.getDimensionFromRecord(inputRecord, dimensionName);

    TopKPhaseMapOutputKey keyWrapper = new TopKPhaseMapOutputKey(dimensionName, dimensionValue, dimensionType);
    byte[] keyBytes = keyWrapper.toBytes();
    keyWritable.set(keyBytes, 0, keyBytes.length);
    context.write(keyWritable, valWritable);
  }
  TopKPhaseMapOutputKey allKeyWrapper = new TopKPhaseMapOutputKey(TOPK_ALL_DIMENSION_NAME, TOPK_ALL_DIMENSION_VALUE, DimensionType.STRING);
  byte[] allKeyBytes = allKeyWrapper.toBytes();
  keyWritable.set(allKeyBytes, 0, allKeyBytes.length);
  context.write(keyWritable, valWritable);
}
 
Example 6
Source File: JoinPhaseJob.java    From incubator-pinot with Apache License 2.0 5 votes vote down vote up
@Override
public void map(AvroKey<GenericRecord> recordWrapper, NullWritable value, Context context)
    throws IOException, InterruptedException {
  GenericRecord record = recordWrapper.datum();
  MapOutputValue mapOutputValue = new MapOutputValue(record.getSchema().getName(), record);
  String joinKeyValue = joinKeyExtractor.extractJoinKey(sourceName, record);
  LOGGER.info("Join Key:{}", joinKeyValue);

  if (!"INVALID".equals(joinKeyValue)) {
    context.write(new BytesWritable(joinKeyValue.toString().getBytes()),
        new BytesWritable(mapOutputValue.toBytes()));
  }
}
 
Example 7
Source File: OSMNodeMapper.java    From geowave with Apache License 2.0 5 votes vote down vote up
@Override
public void map(final AvroKey<AvroNode> key, final NullWritable value, final Context context)
    throws IOException, InterruptedException {

  final AvroNode node = key.datum();
  final AvroPrimitive p = node.getCommon();

  final Mutation m = new Mutation(getIdHash(p.getId()));
  // Mutation m = new Mutation(_longWriter.writeField(p.getId()));
  // Mutation m = new Mutation(p.getId().toString());

  put(m, ColumnFamily.NODE, ColumnQualifier.ID, p.getId());
  put(m, ColumnFamily.NODE, ColumnQualifier.LONGITUDE, node.getLongitude());
  put(m, ColumnFamily.NODE, ColumnQualifier.LATITUDE, node.getLatitude());

  if (!Long.valueOf(0).equals(p.getVersion())) {
    put(m, ColumnFamily.NODE, ColumnQualifier.VERSION, p.getVersion());
  }

  if (!Long.valueOf(0).equals(p.getTimestamp())) {
    put(m, ColumnFamily.NODE, ColumnQualifier.TIMESTAMP, p.getTimestamp());
  }

  if (!Long.valueOf(0).equals(p.getChangesetId())) {
    put(m, ColumnFamily.NODE, ColumnQualifier.CHANGESET, p.getChangesetId());
  }

  if (!Long.valueOf(0).equals(p.getUserId())) {
    put(m, ColumnFamily.NODE, ColumnQualifier.USER_ID, p.getUserId());
  }

  put(m, ColumnFamily.NODE, ColumnQualifier.USER_TEXT, p.getUserName());
  put(m, ColumnFamily.NODE, ColumnQualifier.OSM_VISIBILITY, p.getVisible());

  for (final Map.Entry<String, String> kvp : p.getTags().entrySet()) {
    put(m, ColumnFamily.NODE, kvp.getKey(), kvp.getValue().toString());
  }
  context.write(_tableName, m);
}
 
Example 8
Source File: LazyAvroKeyWrapper.java    From components with Apache License 2.0 4 votes vote down vote up
@Override
public AvroKey<DatumT> decode(InputStream inStream) throws IOException {
    AvroKey<DatumT> key = new AvroKey<>();
    key.datum(datumCoder.decode(inStream));
    return key;
}
 
Example 9
Source File: KeyDedupReducerTest.java    From incubator-gobblin with Apache License 2.0 4 votes vote down vote up
@Test
public void testAvroReduce()
    throws IOException, InterruptedException {
  Schema keySchema = new Schema.Parser().parse(AVRO_KEY_SCHEMA);
  GenericRecordBuilder keyRecordBuilder = new GenericRecordBuilder(keySchema.getField("key").schema());
  keyRecordBuilder.set("partitionKey", 1);
  keyRecordBuilder.set("environment", "test");
  keyRecordBuilder.set("subKey", "2");
  GenericRecord record = keyRecordBuilder.build();
  keyRecordBuilder = new GenericRecordBuilder(keySchema);
  keyRecordBuilder.set("key", record);
  GenericRecord keyRecord = keyRecordBuilder.build();

  // Test reducer with delta field "scn"
  Schema fullSchema = new Schema.Parser().parse(AVRO_FULL_SCHEMA);
  AvroValue<GenericRecord> fullRecord1 = new AvroValue<>();
  AvroValue<GenericRecord> fullRecord2 = new AvroValue<>();
  AvroValue<GenericRecord> fullRecord3 = new AvroValue<>();
  AvroValue<GenericRecord> fullRecord4 = new AvroValue<>();

  GenericRecordBuilder fullRecordBuilder1 = new GenericRecordBuilder(fullSchema);
  fullRecordBuilder1.set("key", record);
  fullRecordBuilder1.set("scn", 123);
  fullRecordBuilder1.set("scn2", 100);
  fullRecord1.datum(fullRecordBuilder1.build());
  fullRecordBuilder1.set("scn", 125);
  fullRecordBuilder1.set("scn2", 1);
  fullRecord2.datum(fullRecordBuilder1.build());
  fullRecordBuilder1.set("scn", 124);
  fullRecordBuilder1.set("scn2", 10);
  fullRecord3.datum(fullRecordBuilder1.build());
  fullRecordBuilder1.set("scn", 122);
  fullRecordBuilder1.set("scn2", 1000);
  fullRecord4.datum(fullRecordBuilder1.build());

  Configuration conf = mock(Configuration.class);
  when(conf.get(AvroKeyDedupReducer.DELTA_SCHEMA_PROVIDER))
      .thenReturn(FieldAttributeBasedDeltaFieldsProvider.class.getName());
  when(conf.get(FieldAttributeBasedDeltaFieldsProvider.ATTRIBUTE_FIELD)).thenReturn("attributes_json");

  when(conf.get(FieldAttributeBasedDeltaFieldsProvider.DELTA_PROP_NAME,
      FieldAttributeBasedDeltaFieldsProvider.DEFAULT_DELTA_PROP_NAME))
      .thenReturn(FieldAttributeBasedDeltaFieldsProvider.DEFAULT_DELTA_PROP_NAME);
  RecordKeyDedupReducerBase<AvroKey<GenericRecord>, AvroValue<GenericRecord>,
      AvroKey<GenericRecord>, NullWritable> reducer = new AvroKeyDedupReducer();

  WrappedReducer.Context reducerContext = mock(WrappedReducer.Context.class);
  when(reducerContext.getConfiguration()).thenReturn(conf);
  Counter moreThan1Counter = new GenericCounter();
  when(reducerContext.getCounter(RecordKeyDedupReducerBase.EVENT_COUNTER.MORE_THAN_1)).thenReturn(moreThan1Counter);

  Counter dedupedCounter = new GenericCounter();
  when(reducerContext.getCounter(RecordKeyDedupReducerBase.EVENT_COUNTER.DEDUPED)).thenReturn(dedupedCounter);

  Counter recordCounter = new GenericCounter();
  when(reducerContext.getCounter(RecordKeyDedupReducerBase.EVENT_COUNTER.RECORD_COUNT)).thenReturn(recordCounter);
  reducer.setup(reducerContext);

  doNothing().when(reducerContext).write(any(AvroKey.class), any(NullWritable.class));
  List<AvroValue<GenericRecord>> valueIterable =
      Lists.newArrayList(fullRecord1, fullRecord2, fullRecord3, fullRecord4);

  AvroKey<GenericRecord> key = new AvroKey<>();
  key.datum(keyRecord);
  reducer.reduce(key, valueIterable, reducerContext);
  Assert.assertEquals(reducer.getOutKey().datum(), fullRecord2.datum());

  // Test reducer without delta field
  Configuration conf2 = mock(Configuration.class);
  when(conf2.get(AvroKeyDedupReducer.DELTA_SCHEMA_PROVIDER)).thenReturn(null);
  when(reducerContext.getConfiguration()).thenReturn(conf2);
  RecordKeyDedupReducerBase<AvroKey<GenericRecord>, AvroValue<GenericRecord>,
      AvroKey<GenericRecord>, NullWritable> reducer2 = new AvroKeyDedupReducer();
  reducer2.setup(reducerContext);
  reducer2.reduce(key, valueIterable, reducerContext);
  Assert.assertEquals(reducer2.getOutKey().datum(), fullRecord1.datum());

  // Test reducer with compound delta key.
  Schema fullSchema2 = new Schema.Parser().parse(AVRO_FULL_SCHEMA_WITH_TWO_DELTA_FIELDS);
  GenericRecordBuilder fullRecordBuilder2 = new GenericRecordBuilder(fullSchema2);
  fullRecordBuilder2.set("key", record);
  fullRecordBuilder2.set("scn", 123);
  fullRecordBuilder2.set("scn2", 100);
  fullRecord1.datum(fullRecordBuilder2.build());
  fullRecordBuilder2.set("scn", 125);
  fullRecordBuilder2.set("scn2", 1000);
  fullRecord2.datum(fullRecordBuilder2.build());
  fullRecordBuilder2.set("scn", 126);
  fullRecordBuilder2.set("scn2", 1000);
  fullRecord3.datum(fullRecordBuilder2.build());
  fullRecordBuilder2.set("scn", 130);
  fullRecordBuilder2.set("scn2", 100);
  fullRecord4.datum(fullRecordBuilder2.build());
  List<AvroValue<GenericRecord>> valueIterable2 =
      Lists.newArrayList(fullRecord1, fullRecord2, fullRecord3, fullRecord4);
  reducer.reduce(key, valueIterable2, reducerContext);
  Assert.assertEquals(reducer.getOutKey().datum(), fullRecord3.datum());

}
 
Example 10
Source File: DerivedColumnTransformationPhaseJob.java    From incubator-pinot with Apache License 2.0 4 votes vote down vote up
@Override
public void map(AvroKey<GenericRecord> key, NullWritable value, Context context)
    throws IOException, InterruptedException {

  // input record
  GenericRecord inputRecord = key.datum();

  // output record
  GenericRecord outputRecord = new Record(outputSchema);

  // dimensions
  for (int i = 0; i < dimensionsNames.size(); i++) {

    String dimensionName = dimensionsNames.get(i);
    DimensionType dimensionType = dimensionsTypes.get(i);
    Object dimensionValue = ThirdeyeAvroUtils.getDimensionFromRecord(inputRecord, dimensionName);
    String dimensionValueStr = String.valueOf(dimensionValue);


    // add original dimension value with whitelist applied
    Object whitelistDimensionValue = dimensionValue;
    if (whitelist != null) {
      List<String> whitelistDimensions = whitelist.get(dimensionName);
      if (CollectionUtils.isNotEmpty(whitelistDimensions)) {
        // whitelist config exists for this dimension but value not present in whitelist
        if (!whitelistDimensions.contains(dimensionValueStr)) {
          whitelistDimensionValue = dimensionType.getValueFromString(nonWhitelistValueMap.get(dimensionName));
        }
      }
    }
    outputRecord.put(dimensionName, whitelistDimensionValue);

    // add column for topk, if topk config exists for that column, plus any whitelist values
    if (topKDimensionsMap.containsKey(dimensionName)) {
      Set<String> topKDimensionValues = topKDimensionsMap.get(dimensionName);
      // if topk config exists for that dimension
      if (CollectionUtils.isNotEmpty(topKDimensionValues)) {
        String topkDimensionName = dimensionName + ThirdEyeConstants.TOPK_DIMENSION_SUFFIX;
        Object topkDimensionValue = dimensionValue;
        // topk config exists for this dimension, but value not present in topk or whitelist
        if (!topKDimensionValues.contains(dimensionValueStr) &&
            (whitelist == null || whitelist.get(dimensionName) == null
            || !whitelist.get(dimensionName).contains(dimensionValueStr))) {
          topkDimensionValue = ThirdEyeConstants.OTHER;
        }
        outputRecord.put(topkDimensionName, String.valueOf(topkDimensionValue));
      }
    }
  }

  // metrics
  for (int i = 0; i < metricNames.size(); i ++) {
    String metricName = metricNames.get(i);
    MetricType metricType = metricTypes.get(i);
    outputRecord.put(metricName, ThirdeyeAvroUtils.getMetricFromRecord(inputRecord, metricName, metricType));
  }

  // time
  outputRecord.put(timeColumnName, ThirdeyeAvroUtils.getMetricFromRecord(inputRecord, timeColumnName));

  AvroKey<GenericRecord> outputKey = new AvroKey<GenericRecord>(outputRecord);
  avroMultipleOutputs.write(outputKey, NullWritable.get(), inputFileName);
}
 
Example 11
Source File: TopkPhaseTest.java    From incubator-pinot with Apache License 2.0 4 votes vote down vote up
@Test
public void testTopKColumnTransformationPhase() throws Exception {

  int recordCount = 0;
  List<GenericRecord> inputRecords = generateTestMapperData();
  for (GenericRecord record : inputRecords) {
    AvroKey<GenericRecord> inKey = new AvroKey<GenericRecord>();
    inKey.datum(record);
    mapDriver.addInput(new Pair<AvroKey<GenericRecord>, NullWritable>(inKey, NullWritable.get()));
    recordCount++;
  }

  List<Pair<BytesWritable, BytesWritable>> result = mapDriver.run();
  // for each record, we emit
  // a records per dimension
  // and one record for ALL,ALL
  Assert.assertEquals("Incorrect number of records emitted by mapper", recordCount * (3 + 1), result.size());

  Map<String, Integer> counts = new HashMap<>();
  for (Pair<BytesWritable, BytesWritable> pair : result) {
    TopKPhaseMapOutputKey key = TopKPhaseMapOutputKey.fromBytes(pair.getFirst().getBytes());
    String dimensionName = key.getDimensionName();
    Integer count = counts.get(dimensionName);
    if (count == null) {
      count = 0;
    }
    counts.put(dimensionName , count + 1);
  }
  Assert.assertEquals("Incorrect number of records emitted from map", 2, (int) counts.get("d1"));
  Assert.assertEquals("Incorrect number of records emitted from map", 2, (int) counts.get("d2"));
  Assert.assertEquals("Incorrect number of records emitted from map", 2, (int) counts.get("d3"));
  Assert.assertEquals("Incorrect number of records emitted from map", 2, (int) counts.get("0"));

  List<Pair<BytesWritable, List<BytesWritable>>> reduceInput = generateTestReduceData(result);
  reduceDriver.addAll(reduceInput);
  reduceDriver.run();

  File topKFile = new File(outputPath, ThirdEyeConstants.TOPK_VALUES_FILE);
  Assert.assertTrue("Topk file failed to generate!", topKFile.exists());
  TopKDimensionValues topk = OBJECT_MAPPER.readValue(new FileInputStream(topKFile), TopKDimensionValues.class);
  Map<String, Set<String>> topkMap = topk.getTopKDimensions();
  Assert.assertEquals("Incorrect topk object", topkMap.size(), 1);
  Set<String> expected = new HashSet<>();
  expected.add("501");
  Assert.assertEquals("Incorrect topk values in topk object", expected, topkMap.get("d2"));
  Assert.assertEquals("Incorrect whitelist values in topk object", null, topkMap.get("d3"));
}
 
Example 12
Source File: DerivedColumnNoTransformationTest.java    From incubator-pinot with Apache License 2.0 4 votes vote down vote up
@Override
public void map(AvroKey<GenericRecord> key, NullWritable value, Context context)
    throws IOException, InterruptedException {

  // input record
  GenericRecord inputRecord = key.datum();

  // output record
  GenericRecord outputRecord = new Record(outputSchema);

  // dimensions
  for (int i = 0; i < dimensionsNames.size(); i++) {
    String dimensionName = dimensionsNames.get(i);
    DimensionType dimensionType = dimensionTypes.get(i);
    Object dimensionValue = ThirdeyeAvroUtils.getDimensionFromRecord(inputRecord, dimensionName);
    String dimensionValueStr = String.valueOf(dimensionValue);

    // add original dimension value with whitelist applied
    Object whitelistDimensionValue = dimensionValue;
    if (whitelist != null) {
      List<String> whitelistDimensions = whitelist.get(dimensionName);
      if (CollectionUtils.isNotEmpty(whitelistDimensions)) {
        // whitelist config exists for this dimension but value not present in whitelist
        if (!whitelistDimensions.contains(dimensionValueStr)) {
          whitelistDimensionValue = dimensionType.getValueFromString(nonWhitelistValueMap.get(dimensionName));
        }
      }
    }
    outputRecord.put(dimensionName, whitelistDimensionValue);

    // add column for topk, if topk config exists for that column
    if (topKDimensionsMap.containsKey(dimensionName)) {
      Set<String> topKDimensionValues = topKDimensionsMap.get(dimensionName);
      // if topk config exists for that dimension
      if (CollectionUtils.isNotEmpty(topKDimensionValues)) {
        String topkDimensionName = dimensionName + ThirdEyeConstants.TOPK_DIMENSION_SUFFIX;
        Object topkDimensionValue = dimensionValue;
        // topk config exists for this dimension, but value not present in topk
        if (!topKDimensionValues.contains(dimensionValueStr) &&
            (whitelist == null || whitelist.get(dimensionName) == null || !whitelist.get(dimensionName).contains(dimensionValueStr))) {
          topkDimensionValue = ThirdEyeConstants.OTHER;
        }
        outputRecord.put(topkDimensionName, topkDimensionValue);
      }
    }
  }


  // metrics
  for (int i = 0; i < metricNames.size(); i ++) {
    String metricName = metricNames.get(i);
    MetricType metricType = metricTypes.get(i);
    outputRecord.put(metricName, ThirdeyeAvroUtils.getMetricFromRecord(inputRecord, metricName, metricType));
  }

  // time
  outputRecord.put(timeColumnName, ThirdeyeAvroUtils.getMetricFromRecord(inputRecord, timeColumnName));

  AvroKey<GenericRecord> outputKey = new AvroKey<GenericRecord>(outputRecord);
  context.write(outputKey, NullWritable.get());
}
 
Example 13
Source File: DerivedColumnTransformationTest.java    From incubator-pinot with Apache License 2.0 4 votes vote down vote up
@Override
public void map(AvroKey<GenericRecord> key, NullWritable value, Context context)
    throws IOException, InterruptedException {

  // input record
  GenericRecord inputRecord = key.datum();

  // output record
  GenericRecord outputRecord = new Record(outputSchema);

  // dimensions
  for (int i = 0; i < dimensionsNames.size(); i++) {
    String dimensionName = dimensionsNames.get(i);
    DimensionType dimensionType = dimensionTypes.get(i);
    Object dimensionValue = ThirdeyeAvroUtils.getDimensionFromRecord(inputRecord, dimensionName);
    String dimensionValueStr = String.valueOf(dimensionValue);

    // add original dimension value with whitelist applied
    Object whitelistDimensionValue = dimensionValue;
    if (whitelist != null) {
      List<String> whitelistDimensions = whitelist.get(dimensionName);
      if (CollectionUtils.isNotEmpty(whitelistDimensions)) {
        // whitelist config exists for this dimension but value not present in whitelist
        if (!whitelistDimensions.contains(dimensionValueStr)) {
          whitelistDimensionValue = dimensionType.getValueFromString(nonWhitelistValueMap.get(dimensionName));
        }
      }
    }
    outputRecord.put(dimensionName, whitelistDimensionValue);

    // add column for topk, if topk config exists for that column
    if (topKDimensionsMap.containsKey(dimensionName)) {
      Set<String> topKDimensionValues = topKDimensionsMap.get(dimensionName);
      // if topk config exists for that dimension
      if (CollectionUtils.isNotEmpty(topKDimensionValues)) {
        String topkDimensionName = dimensionName + ThirdEyeConstants.TOPK_DIMENSION_SUFFIX;
        Object topkDimensionValue = dimensionValue;
        // topk config exists for this dimension, but value not present in topk
        if (!topKDimensionValues.contains(dimensionValueStr) &&
            (whitelist == null || whitelist.get(dimensionName) == null || !whitelist.get(dimensionName).contains(dimensionValueStr))) {
          topkDimensionValue = ThirdEyeConstants.OTHER;
        }
        outputRecord.put(topkDimensionName, String.valueOf(topkDimensionValue));
      }
    }
  }


  // metrics
  for (String metric : metricNames) {
    outputRecord.put(metric, ThirdeyeAvroUtils.getMetricFromRecord(inputRecord, metric));
  }

  // time
  outputRecord.put(timeColumnName, ThirdeyeAvroUtils.getMetricFromRecord(inputRecord, timeColumnName));

  AvroKey<GenericRecord> outputKey = new AvroKey<GenericRecord>(outputRecord);
  context.write(outputKey, NullWritable.get());
}
 
Example 14
Source File: OSMWayMapper.java    From geowave with Apache License 2.0 4 votes vote down vote up
@Override
public void map(final AvroKey<AvroWay> key, final NullWritable value, final Context context)
    throws IOException, InterruptedException {

  final AvroWay way = key.datum();
  final AvroPrimitive p = way.getCommon();

  final Mutation m = new Mutation(getIdHash(p.getId()));
  // Mutation m = new Mutation(_longWriter.writeField(p.getId()));
  // Mutation m = new Mutation(p.getId().toString());

  put(m, ColumnFamily.WAY, ColumnQualifier.ID, p.getId());

  final AvroLongArray lr = new AvroLongArray();
  lr.setIds(way.getNodes());

  put(m, ColumnFamily.WAY, ColumnQualifier.REFERENCES, lr);

  if (!Long.valueOf(0).equals(p.getVersion())) {
    put(m, ColumnFamily.WAY, ColumnQualifier.VERSION, p.getVersion());
  }

  if (!Long.valueOf(0).equals(p.getTimestamp())) {
    put(m, ColumnFamily.WAY, ColumnQualifier.TIMESTAMP, p.getTimestamp());
  }

  if (!Long.valueOf(0).equals(p.getChangesetId())) {
    put(m, ColumnFamily.WAY, ColumnQualifier.CHANGESET, p.getChangesetId());
  }

  if (!Long.valueOf(0).equals(p.getUserId())) {
    put(m, ColumnFamily.WAY, ColumnQualifier.USER_ID, p.getUserId());
  }

  put(m, ColumnFamily.WAY, ColumnQualifier.USER_TEXT, p.getUserName());
  put(m, ColumnFamily.WAY, ColumnQualifier.OSM_VISIBILITY, p.getVisible());

  for (final Map.Entry<String, String> kvp : p.getTags().entrySet()) {
    put(m, ColumnFamily.WAY, kvp.getKey(), kvp.getValue());
  }

  context.write(_tableName, m);
}
 
Example 15
Source File: OSMRelationMapper.java    From geowave with Apache License 2.0 4 votes vote down vote up
@Override
public void map(final AvroKey<AvroRelation> key, final NullWritable value, final Context context)
    throws IOException, InterruptedException {

  final AvroRelation relation = key.datum();
  final AvroPrimitive p = relation.getCommon();

  final Mutation m = new Mutation(getIdHash(p.getId()));
  // Mutation m = new Mutation(_longWriter.writeField(p.getId()));
  // Mutation m = new Mutation(p.getId().toString());

  put(m, ColumnFamily.RELATION, ColumnQualifier.ID, p.getId());

  int i = 0;
  for (final AvroRelationMember rm : relation.getMembers()) {
    put(
        m,
        ColumnFamily.RELATION,
        ColumnQualifier.getRelationMember(ColumnQualifier.REFERENCE_ROLEID_PREFIX, i),
        rm.getRole());
    put(
        m,
        ColumnFamily.RELATION,
        ColumnQualifier.getRelationMember(ColumnQualifier.REFERENCE_MEMID_PREFIX, i),
        rm.getMember());
    put(
        m,
        ColumnFamily.RELATION,
        ColumnQualifier.getRelationMember(ColumnQualifier.REFERENCE_TYPE_PREFIX, i),
        rm.getMemberType().toString());
    i++;
  }

  if (!Long.valueOf(0).equals(p.getVersion())) {
    put(m, ColumnFamily.RELATION, ColumnQualifier.VERSION, p.getVersion());
  }

  if (!Long.valueOf(0).equals(p.getTimestamp())) {
    put(m, ColumnFamily.RELATION, ColumnQualifier.TIMESTAMP, p.getTimestamp());
  }

  if (!Long.valueOf(0).equals(p.getChangesetId())) {
    put(m, ColumnFamily.RELATION, ColumnQualifier.CHANGESET, p.getChangesetId());
  }

  if (!Long.valueOf(0).equals(p.getUserId())) {
    put(m, ColumnFamily.RELATION, ColumnQualifier.USER_ID, p.getUserId());
  }

  put(m, ColumnFamily.RELATION, ColumnQualifier.USER_TEXT, p.getUserName());
  put(m, ColumnFamily.RELATION, ColumnQualifier.OSM_VISIBILITY, p.getVisible());

  for (final Map.Entry<String, String> kvp : p.getTags().entrySet()) {
    put(m, ColumnFamily.RELATION, kvp.getKey().toString(), kvp.getValue().toString());
  }

  context.write(_tableName, m);
}
 
Example 16
Source File: CassandraKeyComparator.java    From hdfs2cass with Apache License 2.0 4 votes vote down vote up
@Override
public int compare(AvroKey<ByteBuffer> o1, AvroKey<ByteBuffer> o2) {
  final ByteBuffer key1 = o1.datum();
  final ByteBuffer key2 = o2.datum();
  return compare(key1, key2);
}