org.apache.avro.mapred.AvroKey Java Examples

The following examples show how to use org.apache.avro.mapred.AvroKey. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: HiveIncrPullSource.java From hudi with Apache License 2.0

6 votes

@Override
protected InputBatch<JavaRDD<GenericRecord>> fetchNewData(Option<String> lastCheckpointStr, long sourceLimit) {
  try {
    // find the source commit to pull
    Option<String> commitToPull = findCommitToPull(lastCheckpointStr);

    if (!commitToPull.isPresent()) {
      return new InputBatch<>(Option.empty(), lastCheckpointStr.isPresent() ? lastCheckpointStr.get() : "");
    }

    // read the files out.
    List<FileStatus> commitDeltaFiles = Arrays.asList(fs.listStatus(new Path(incrPullRootPath, commitToPull.get())));
    String pathStr = commitDeltaFiles.stream().map(f -> f.getPath().toString()).collect(Collectors.joining(","));
    JavaPairRDD<AvroKey, NullWritable> avroRDD = sparkContext.newAPIHadoopFile(pathStr, AvroKeyInputFormat.class,
        AvroKey.class, NullWritable.class, sparkContext.hadoopConfiguration());
    return new InputBatch<>(Option.of(avroRDD.keys().map(r -> ((GenericRecord) r.datum()))),
        String.valueOf(commitToPull.get()));
  } catch (IOException ioe) {
    throw new HoodieIOException("Unable to read from source from checkpoint: " + lastCheckpointStr, ioe);
  }
}

Example #2

Source File: CassandraPartitioner.java From hdfs2cass with Apache License 2.0

6 votes

@Override
public int getPartition(AvroKey<ByteBuffer> key, Object value, int numReducers) {
  if (distributeRandomly) {
    return reducers.get(random.nextInt(reducers.size()));
  }

  final Token token = partitioner.getToken(key.datum());
  BigInteger bigIntToken;
  if (token instanceof BigIntegerToken) {
    bigIntToken = ((BigIntegerToken) token).token.abs();
  } else if (token instanceof LongToken) {
    bigIntToken = BigInteger.valueOf(((LongToken) token).token).add(MURMUR3_SCALE);
  } else {
    throw new RuntimeException("Invalid partitioner Token type. Only BigIntegerToken and LongToken supported");
  }
  return reducers.get(bigIntToken.divide(rangePerReducer).intValue());
}

Example #3

Source File: AvroKeyMapper.java From incubator-gobblin with Apache License 2.0

6 votes

@Override
protected void map(AvroKey<GenericRecord> key, NullWritable value, Context context)
    throws IOException, InterruptedException {
  if (context.getNumReduceTasks() == 0) {
    context.write(key, NullWritable.get());
  } else {
    populateComparableKeyRecord(key.datum(), this.outKey.datum());
    this.outValue.datum(key.datum());
    try {
      context.write(this.outKey, this.outValue);
    } catch (AvroRuntimeException e) {
      final Path[] paths = ((CombineFileSplit) context.getInputSplit()).getPaths();
      throw new IOException("Unable to process paths " + StringUtils.join(paths, ','), e);
    }
  }
  context.getCounter(EVENT_COUNTER.RECORD_COUNT).increment(1);
}

Example #4

Source File: VectorExportMapper.java From geowave with Apache License 2.0

6 votes

@Override
protected void map(
    final GeoWaveInputKey key,
    final SimpleFeature value,
    final Mapper<GeoWaveInputKey, SimpleFeature, AvroKey<AvroSimpleFeatureCollection>, NullWritable>.Context context)
    throws IOException, InterruptedException {
  AvroSFCWriter avroWriter = adapterIdToAvroWriterMap.get(key.getInternalAdapterId());
  if (avroWriter == null) {
    avroWriter = new AvroSFCWriter(value.getFeatureType(), batchSize);
    adapterIdToAvroWriterMap.put(key.getInternalAdapterId(), avroWriter);
  }
  final AvroSimpleFeatureCollection retVal = avroWriter.write(value);
  if (retVal != null) {
    outKey.datum(retVal);
    context.write(outKey, outVal);
  }
}

Example #5

Source File: RegressionNaiveTrain.java From ml-ease with Apache License 2.0

6 votes

@Override
public int getPartition(AvroKey<String> key,
                        AvroValue<Integer> value,
                        int numPartitions)
{
  String k = key.datum().toString();
  if (_partitionIdMap!=null)
  {
    if (_partitionIdMap.containsKey(k))
    {
      int partitionId = _partitionIdMap.get(k);
      return partitionId % numPartitions;
    }
  }
  return Math.abs(k.hashCode()) % numPartitions;
}

Example #6

Source File: HadoopSegmentPreprocessingJob.java From incubator-pinot with Apache License 2.0

6 votes

private void setHadoopJobConfigs(Job job, int numInputPaths) {
  job.getConfiguration().set(JobContext.JOB_NAME, this.getClass().getName());
  // Turn this on to always firstly use class paths that user specifies.
  job.getConfiguration().set(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, "true");
  // Turn this off since we don't need an empty file in the output directory
  job.getConfiguration().set(FileOutputCommitter.SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, "false");

  job.setJarByClass(HadoopSegmentPreprocessingJob.class);

  String hadoopTokenFileLocation = System.getenv(UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION);
  if (hadoopTokenFileLocation != null) {
    job.getConfiguration().set(MRJobConfig.MAPREDUCE_JOB_CREDENTIALS_BINARY, hadoopTokenFileLocation);
  }

  // Mapper configs.
  job.setMapperClass(SegmentPreprocessingMapper.class);
  job.setMapOutputKeyClass(AvroKey.class);
  job.setMapOutputValueClass(AvroValue.class);
  job.getConfiguration().setInt(JobContext.NUM_MAPS, numInputPaths);

  // Reducer configs.
  job.setReducerClass(SegmentPreprocessingReducer.class);
  job.setOutputKeyClass(AvroKey.class);
  job.setOutputValueClass(NullWritable.class);
}

Example #7

Source File: TransformPhaseJob.java From incubator-pinot with Apache License 2.0

5 votes

@Override
public void reduce(IntWritable key, Iterable<AvroValue<GenericRecord>> values, Context context)
    throws IOException, InterruptedException {
  for (AvroValue<GenericRecord> value : values) {
    GenericRecord record = value.datum();
    context.write(new AvroKey<GenericRecord>(record), NullWritable.get());
  }
}

Example #8

Source File: CompactionAvroJobConfigurator.java From incubator-gobblin with Apache License 2.0

5 votes

protected void configureReducer(Job job) throws IOException {
  job.setOutputFormatClass(AvroKeyCompactorOutputFormat.class);
  job.setReducerClass(AvroKeyDedupReducer.class);
  job.setOutputKeyClass(AvroKey.class);
  job.setOutputValueClass(NullWritable.class);
  setNumberOfReducers(job);
}

Example #9

Source File: TransformPhaseJob.java From incubator-pinot with Apache License 2.0

5 votes

@Override
public void map(AvroKey<GenericRecord> recordWrapper, NullWritable value, Context context)
    throws IOException, InterruptedException {
  GenericRecord record = recordWrapper.datum();
  GenericRecord outputRecord = transformUDF.transformRecord(sourceName, record);

  if (outputRecord != null) {

    IntWritable key = new IntWritable(reducerKey);
    reducerKey = (reducerKey == numReducers) ? (1) : (reducerKey + 1);
    context.write(key, new AvroValue<GenericRecord>(outputRecord));
  }

}

Example #10

Source File: CombineAvroKeyInputFormat.java From incubator-pinot with Apache License 2.0

5 votes

@Override
public RecordReader<AvroKey<T>, NullWritable> createRecordReader(InputSplit split, TaskAttemptContext context)
    throws IOException {
  Class cls = AvroKeyRecordReaderWrapper.class;

  return new CombineFileRecordReader<>((CombineFileSplit) split, context,
      (Class<? extends RecordReader<AvroKey<T>, NullWritable>>) cls);
}

Example #11

Source File: SegmentPreprocessingMapper.java From incubator-pinot with Apache License 2.0

5 votes

@Override
public void map(AvroKey<GenericRecord> record, NullWritable value, final Context context)
    throws IOException, InterruptedException {

  if (_isAppend) {
    // Normalize time column value and check against sample value
    String timeColumnValue = record.datum().get(_timeColumn).toString();
    String normalizedTimeColumnValue = _normalizedDateSegmentNameGenerator.getNormalizedDate(timeColumnValue);

    if (!normalizedTimeColumnValue.equals(_sampleNormalizedTimeColumnValue) && _firstInstanceOfMismatchedTime) {
      _firstInstanceOfMismatchedTime = false;
      // TODO: Create a custom exception and gracefully catch this exception outside, changing what the path to input
      // into segment creation should be
      LOGGER.warn("This segment contains multiple time units. Sample is {}, current is {}",
          _sampleNormalizedTimeColumnValue, normalizedTimeColumnValue);
    }
  }

  final GenericRecord inputRecord = record.datum();
  final Schema schema = inputRecord.getSchema();
  Preconditions.checkArgument(_outputSchema.equals(schema), "The schema of all avro files should be the same!");

  GenericRecord outputKey = new GenericData.Record(_outputKeySchema);
  if (_sortedColumn == null) {
    outputKey.put("hashcode", inputRecord.hashCode());
  } else if (_enablePartitioning) {
    outputKey.put(_sortedColumn, inputRecord.get(_sortedColumn));
  } else {
    outputKey.put(_sortedColumn, inputRecord.get(_sortedColumn));
    outputKey.put("hashcode", inputRecord.hashCode());
  }

  try {
    context.write(new AvroKey<>(outputKey), new AvroValue<>(inputRecord));
  } catch (Exception e) {
    LOGGER.error("Exception when writing context on mapper!");
    throw e;
  }
}

Example #12

Source File: SegmentPreprocessingReducer.java From incubator-pinot with Apache License 2.0

5 votes

@Override
public void reduce(final T inputRecord, final Iterable<AvroValue<GenericRecord>> values, final Context context)
    throws IOException, InterruptedException {
  for (final AvroValue<GenericRecord> value : values) {
    String fileName = generateFileName();
    _multipleOutputs.write(new AvroKey<>(value.datum()), NullWritable.get(), fileName);
  }
}

Example #13

Source File: SimpleAvroJob.java From datafu with Apache License 2.0

5 votes

@Override
protected void reduce(AvroKey<GenericRecord> key,
                      Iterable<AvroValue<GenericRecord>> values,
                      Context context) throws IOException, InterruptedException
{
  long count = 0L;
  for (AvroValue<GenericRecord> value : values)
  {
    count += (Long)value.datum().get("count");
  }
  output.put("id", key.datum().get("id"));
  output.put("count",count);
  context.write(new AvroKey<GenericRecord>(output), null);
}

Example #14

Source File: PartitioningCombiner.java From datafu with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
public void reduce(Object keyObj,
                    Iterable<Object> values,
                    ReduceContext<Object,Object,Object,Object> context) throws IOException, InterruptedException
{           
  Accumulator<GenericRecord,GenericRecord> acc = getAccumulator();
      
  if (acc == null)
  {
    throw new RuntimeException("No accumulator set for combiner!");
  }
  
  acc.cleanup();
                  
  long accumulatedCount = 0;    
  for (Object valueObj : values)
  {       
    AvroValue<GenericRecord> value = (AvroValue<GenericRecord>)valueObj;
    acc.accumulate(value.datum());
    accumulatedCount++;
  }
  
  if (accumulatedCount > 0)
  {
    GenericRecord intermediateValue = acc.getFinal();
    if (intermediateValue != null)
    {
      context.write((AvroKey<GenericRecord>)keyObj,new AvroValue<GenericRecord>(intermediateValue));
    }
  }
}

Example #15

Source File: AvroKeyValueIdentityMapper.java From datafu with Apache License 2.0

5 votes

@Override
protected void map(Object keyObj, Object valueObj, Context context) throws java.io.IOException, java.lang.InterruptedException
{
  @SuppressWarnings("unchecked")
  GenericRecord input = ((AvroKey<GenericRecord>)keyObj).datum();
  GenericRecord key = (GenericRecord)input.get("key");
  GenericRecord value = (GenericRecord)input.get("value");
  context.write(new AvroKey<GenericRecord>(key),new AvroValue<GenericRecord>(value));
}

Example #16

Source File: CollapsingMapper.java From datafu with Apache License 2.0

5 votes

public void collect(GenericRecord key, GenericRecord value) throws IOException, InterruptedException
{        
  if (key == null)
  {
    throw new RuntimeException("key is null");
  }
  if (value == null)
  {
    throw new RuntimeException("value is null");
  }
  getContext().write(new AvroKey<GenericRecord>(key), new AvroValue<GenericRecord>(value));
}

Example #17

Source File: AvroHdfsFileSource.java From components with Apache License 2.0

5 votes

private AvroHdfsFileSource(UgiDoAs doAs, String filepattern, LazyAvroCoder<?> lac, ExtraHadoopConfiguration extraConfig,
        SerializableSplit serializableSplit) {
    super(doAs, filepattern, (Class) AvroKeyInputFormat.class, AvroKey.class, NullWritable.class, extraConfig,
            serializableSplit);
    this.lac = lac;
    setDefaultCoder(LazyAvroKeyWrapper.of(lac), WritableCoder.of(NullWritable.class));
}

Example #18

Source File: TimePartitioner.java From datafu with Apache License 2.0

5 votes

@Override
public int getPartition(AvroKey<GenericRecord> key, AvroValue<GenericRecord> value, int numReduceTasks)
{
  if (numReduceTasks != this.numReducers)
  {
    throw new RuntimeException("numReduceTasks " + numReduceTasks + " does not match expected " + this.numReducers);
  }
  
  Long time = (Long)key.datum().get("time");
  if (time == null)
  {
    throw new RuntimeException("time is null");
  }
  
  List<Integer> partitions = this.partitionMapping.get(time);

  if (partitions == null)
  {
    throw new RuntimeException("Couldn't find partition for " + time);
  }
  
  GenericRecord extractedKey = (GenericRecord)key.datum().get("value");
  
  if (extractedKey == null)
  {
    throw new RuntimeException("extracted key is null");
  }
  
  int partitionIndex = (extractedKey.hashCode() & Integer.MAX_VALUE) % partitions.size();
  
  return partitions.get(partitionIndex);
}

Example #19

Source File: AggregationPhaseJob.java From incubator-pinot with Apache License 2.0

5 votes

@Override
public void map(AvroKey<GenericRecord> record, NullWritable value, Context context) throws IOException, InterruptedException {

  // input record
  GenericRecord inputRecord = record.datum();

  // dimensions
  List<Object> dimensions = new ArrayList<>();
  for (String dimension : dimensionNames) {
    Object dimensionValue = ThirdeyeAvroUtils.getDimensionFromRecord(inputRecord, dimension);
    dimensions.add(dimensionValue);
  }

  // metrics
  Number[] metrics = new Number[numMetrics];
  for (int i = 0; i < numMetrics; i++) {
    Number metricValue = ThirdeyeAvroUtils.getMetricFromRecord(inputRecord, metricNames.get(i), metricTypes.get(i));
    metrics[i] = metricValue;
  }

  // time
  long timeValue = ThirdeyeAvroUtils.getMetricFromRecord(inputRecord, timeColumnName).longValue();
  long inputTimeMillis = inputGranularity.toMillis(timeValue);
  long bucketTime = aggregateGranularity.convertToUnit(inputTimeMillis);

  AggregationPhaseMapOutputKey keyWrapper = new AggregationPhaseMapOutputKey(bucketTime, dimensions, dimensionTypes);
  byte[] keyBytes = keyWrapper.toBytes();
  keyWritable.set(keyBytes, 0, keyBytes.length);

  AggregationPhaseMapOutputValue valWrapper = new AggregationPhaseMapOutputValue(metrics, metricTypes);
  byte[] valBytes = valWrapper.toBytes();
  valWritable.set(valBytes, 0, valBytes.length);

  numRecords ++;
  context.write(keyWritable, valWritable);
}

Example #20

Source File: TopKPhaseJob.java From incubator-pinot with Apache License 2.0

5 votes

@Override
public void map(AvroKey<GenericRecord> key, NullWritable value, Context context)
    throws IOException, InterruptedException {

  // input record
  GenericRecord inputRecord = key.datum();

  // read metrics
  Number[] metricValues = new Number[numMetrics];
  for (int i = 0; i < numMetrics; i++) {
    String metricName = metricNames.get(i);
    Number metricValue = ThirdeyeAvroUtils.getMetricFromRecord(inputRecord, metricName);
    metricValues[i] = metricValue;
  }
  TopKPhaseMapOutputValue valWrapper = new TopKPhaseMapOutputValue(metricValues, metricTypes);
  byte[] valBytes = valWrapper.toBytes();
  valWritable.set(valBytes, 0, valBytes.length);

  // read dimensions
  for (int i = 0; i < dimensionNames.size(); i++) {
    String dimensionName = dimensionNames.get(i);
    DimensionType dimensionType = dimensionTypes.get(i);
    Object dimensionValue = ThirdeyeAvroUtils.getDimensionFromRecord(inputRecord, dimensionName);

    TopKPhaseMapOutputKey keyWrapper = new TopKPhaseMapOutputKey(dimensionName, dimensionValue, dimensionType);
    byte[] keyBytes = keyWrapper.toBytes();
    keyWritable.set(keyBytes, 0, keyBytes.length);
    context.write(keyWritable, valWritable);
  }
  TopKPhaseMapOutputKey allKeyWrapper = new TopKPhaseMapOutputKey(TOPK_ALL_DIMENSION_NAME, TOPK_ALL_DIMENSION_VALUE, DimensionType.STRING);
  byte[] allKeyBytes = allKeyWrapper.toBytes();
  keyWritable.set(allKeyBytes, 0, allKeyBytes.length);
  context.write(keyWritable, valWritable);
}

Example #21

Source File: JoinPhaseJob.java From incubator-pinot with Apache License 2.0

5 votes

@Override
public void map(AvroKey<GenericRecord> recordWrapper, NullWritable value, Context context)
    throws IOException, InterruptedException {
  GenericRecord record = recordWrapper.datum();
  MapOutputValue mapOutputValue = new MapOutputValue(record.getSchema().getName(), record);
  String joinKeyValue = joinKeyExtractor.extractJoinKey(sourceName, record);
  LOGGER.info("Join Key:{}", joinKeyValue);

  if (!"INVALID".equals(joinKeyValue)) {
    context.write(new BytesWritable(joinKeyValue.toString().getBytes()),
        new BytesWritable(mapOutputValue.toBytes()));
  }
}

Example #22

Source File: JoinPhaseJob.java From incubator-pinot with Apache License 2.0

5 votes

@Override
public void reduce(BytesWritable joinKeyWritable, Iterable<BytesWritable> recordBytesWritable,
    Context context) throws IOException, InterruptedException {
  Map<String, List<GenericRecord>> joinInput = new HashMap<String, List<GenericRecord>>();
  for (BytesWritable writable : recordBytesWritable) {

    byte[] bytes = writable.copyBytes();
    MapOutputValue mapOutputValue = MapOutputValue.fromBytes(bytes, schemaMap);
    String schemaName = mapOutputValue.getSchemaName();
    if (!joinInput.containsKey(schemaName)) {
      joinInput.put(schemaName, new ArrayList<GenericRecord>());
    }
    joinInput.get(schemaName).add(mapOutputValue.getRecord());
  }

  int[] exists = new int[sourceNames.size()];
  for (int i = 0; i < sourceNames.size(); i++) {
    String source = sourceNames.get(i);
    if (joinInput.containsKey(source)) {
      exists[i] = 1;
    } else {
      exists[i] = 0;
    }
  }
  String counterName = Arrays.toString(exists);
  if (!countersMap.containsKey(counterName)) {
    countersMap.put(counterName, new AtomicInteger(0));
  }
  countersMap.get(counterName).incrementAndGet();
  // invoke the udf and pass in the join data
  List<GenericRecord> outputRecords =
      joinUDF.performJoin(new String(joinKeyWritable.copyBytes()), joinInput);
  if (outputRecords != null) {
    for (GenericRecord outputRecord : outputRecords) {
      context.write(new AvroKey<GenericRecord>(outputRecord), NullWritable.get());
    }
  }
}

Example #23

Source File: VectorExportMapper.java From geowave with Apache License 2.0

5 votes

@Override
protected void setup(
    final Mapper<GeoWaveInputKey, SimpleFeature, AvroKey<AvroSimpleFeatureCollection>, NullWritable>.Context context)
    throws IOException, InterruptedException {
  super.setup(context);
  batchSize =
      context.getConfiguration().getInt(
          VectorMRExportJobRunner.BATCH_SIZE_KEY,
          VectorExportOptions.DEFAULT_BATCH_SIZE);
}

Example #24

Source File: VectorExportMapper.java From geowave with Apache License 2.0

5 votes

@Override
protected void cleanup(
    final Mapper<GeoWaveInputKey, SimpleFeature, AvroKey<AvroSimpleFeatureCollection>, NullWritable>.Context context)
    throws IOException, InterruptedException {
  super.cleanup(context);
  writeRemainingAvroBatches(context);
}

Example #25

Source File: VectorExportMapper.java From geowave with Apache License 2.0

5 votes

private void writeRemainingAvroBatches(
    final Mapper<GeoWaveInputKey, SimpleFeature, AvroKey<AvroSimpleFeatureCollection>, NullWritable>.Context context)
    throws IOException, InterruptedException {
  for (final AvroSFCWriter writer : adapterIdToAvroWriterMap.values()) {
    if (writer.avList.size() > 0) {
      writer.simpleFeatureCollection.setSimpleFeatureCollection(writer.avList);
      outKey.datum(writer.simpleFeatureCollection);
      context.write(outKey, outVal);
    }
  }
}

Example #26

Source File: OSMNodeMapper.java From geowave with Apache License 2.0

5 votes

@Override
public void map(final AvroKey<AvroNode> key, final NullWritable value, final Context context)
    throws IOException, InterruptedException {

  final AvroNode node = key.datum();
  final AvroPrimitive p = node.getCommon();

  final Mutation m = new Mutation(getIdHash(p.getId()));
  // Mutation m = new Mutation(_longWriter.writeField(p.getId()));
  // Mutation m = new Mutation(p.getId().toString());

  put(m, ColumnFamily.NODE, ColumnQualifier.ID, p.getId());
  put(m, ColumnFamily.NODE, ColumnQualifier.LONGITUDE, node.getLongitude());
  put(m, ColumnFamily.NODE, ColumnQualifier.LATITUDE, node.getLatitude());

  if (!Long.valueOf(0).equals(p.getVersion())) {
    put(m, ColumnFamily.NODE, ColumnQualifier.VERSION, p.getVersion());
  }

  if (!Long.valueOf(0).equals(p.getTimestamp())) {
    put(m, ColumnFamily.NODE, ColumnQualifier.TIMESTAMP, p.getTimestamp());
  }

  if (!Long.valueOf(0).equals(p.getChangesetId())) {
    put(m, ColumnFamily.NODE, ColumnQualifier.CHANGESET, p.getChangesetId());
  }

  if (!Long.valueOf(0).equals(p.getUserId())) {
    put(m, ColumnFamily.NODE, ColumnQualifier.USER_ID, p.getUserId());
  }

  put(m, ColumnFamily.NODE, ColumnQualifier.USER_TEXT, p.getUserName());
  put(m, ColumnFamily.NODE, ColumnQualifier.OSM_VISIBILITY, p.getVisible());

  for (final Map.Entry<String, String> kvp : p.getTags().entrySet()) {
    put(m, ColumnFamily.NODE, kvp.getKey(), kvp.getValue().toString());
  }
  context.write(_tableName, m);
}

Example #27

Source File: IngestMapper.java From geowave with Apache License 2.0

5 votes

@Override
protected void map(
    final AvroKey key,
    final NullWritable value,
    final org.apache.hadoop.mapreduce.Mapper.Context context)
    throws IOException, InterruptedException {
  try (CloseableIterator<GeoWaveData> data =
      ingestWithMapper.toGeoWaveData(key.datum(), indexNames, globalVisibility)) {
    while (data.hasNext()) {
      final GeoWaveData d = data.next();
      context.write(new GeoWaveOutputKey<>(d), d.getValue());
    }
  }
}

Example #28

Source File: IntermediateKeyValueMapper.java From geowave with Apache License 2.0

5 votes

@Override
protected void map(
    final AvroKey key,
    final NullWritable value,
    final org.apache.hadoop.mapreduce.Mapper.Context context)
    throws IOException, InterruptedException {
  try (CloseableIterator<KeyValueData<WritableComparable<?>, Writable>> data =
      ingestWithReducer.toIntermediateMapReduceData(key.datum())) {
    while (data.hasNext()) {
      final KeyValueData<WritableComparable<?>, Writable> d = data.next();
      context.write(d.getKey(), d.getValue());
    }
  }
}

Example #29

Source File: TestMapReduceHBase.java From kite with Apache License 2.0

5 votes

@Override
protected void reduce(AvroKey<GenericData.Record> key,
    Iterable<NullWritable> values,
    Context context)
    throws IOException, InterruptedException {
  context.write(key.datum(), null);
}

Example #30

Source File: TestMapReduceHBase.java From kite with Apache License 2.0

5 votes

@Test
@SuppressWarnings("deprecation")
public void testJobEmptyView() throws Exception {
  Job job = new Job(HBaseTestUtils.getConf());

  String datasetName = tableName + ".TestGenericEntity";

  Dataset<GenericRecord> inputDataset = repo.create("default", "in",
      new DatasetDescriptor.Builder()
          .schemaLiteral(testGenericEntity).build());

  DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
      .schemaLiteral(testGenericEntity)
      .build();
  Dataset<GenericRecord> outputDataset = repo.create("default", datasetName, descriptor);

  DatasetKeyInputFormat.configure(job).readFrom(inputDataset);

  job.setMapperClass(AvroKeyWrapperMapper.class);
  job.setMapOutputKeyClass(AvroKey.class);
  job.setMapOutputValueClass(NullWritable.class);
  AvroJob.setMapOutputKeySchema(job, new Schema.Parser().parse(testGenericEntity));

  job.setReducerClass(AvroKeyWrapperReducer.class);
  job.setOutputKeyClass(GenericData.Record.class);
  job.setOutputValueClass(Void.class);
  AvroJob.setOutputKeySchema(job, new Schema.Parser().parse(testGenericEntity));

  DatasetKeyOutputFormat.configure(job).writeTo(outputDataset);

  Assert.assertTrue(job.waitForCompletion(true));
}