org.apache.hadoop.mapred.RawKeyValueIterator Java Examples

The following examples show how to use org.apache.hadoop.mapred.RawKeyValueIterator. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ReduceContext.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
public ReduceContext(Configuration conf, TaskAttemptID taskid,
                     RawKeyValueIterator input, 
                     Counter inputCounter,
                     RecordWriter<KEYOUT,VALUEOUT> output,
                     OutputCommitter committer,
                     StatusReporter reporter,
                     RawComparator<KEYIN> comparator,
                     Class<KEYIN> keyClass,
                     Class<VALUEIN> valueClass
                     ) throws InterruptedException, IOException{
  super(conf, taskid, output, committer, reporter);
  this.input = input;
  this.inputCounter = inputCounter;
  this.comparator = comparator;
  SerializationFactory serializationFactory = new SerializationFactory(conf);
  this.keyDeserializer = serializationFactory.getDeserializer(keyClass);
  this.keyDeserializer.open(buffer);
  this.valueDeserializer = serializationFactory.getDeserializer(valueClass);
  this.valueDeserializer.open(buffer);
  hasMore = input.next();
}
 
Example #2
Source File: MergeManagerImpl.java    From big-c with Apache License 2.0 6 votes vote down vote up
private void combineAndSpill(
    RawKeyValueIterator kvIter,
    Counters.Counter inCounter) throws IOException {
  JobConf job = jobConf;
  Reducer combiner = ReflectionUtils.newInstance(combinerClass, job);
  Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass();
  Class<V> valClass = (Class<V>) job.getMapOutputValueClass();
  RawComparator<K> comparator = 
    (RawComparator<K>)job.getCombinerKeyGroupingComparator();
  try {
    CombineValuesIterator values = new CombineValuesIterator(
        kvIter, comparator, keyClass, valClass, job, Reporter.NULL,
        inCounter);
    while (values.more()) {
      combiner.reduce(values.getKey(), values, combineCollector,
                      Reporter.NULL);
      values.nextKey();
    }
  } finally {
    combiner.close();
  }
}
 
Example #3
Source File: MergeManagerImpl.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Override
public RawKeyValueIterator close() throws Throwable {
  // Wait for on-going merges to complete
  if (memToMemMerger != null) { 
    memToMemMerger.close();
  }
  inMemoryMerger.close();
  onDiskMerger.close();
  
  List<InMemoryMapOutput<K, V>> memory = 
    new ArrayList<InMemoryMapOutput<K, V>>(inMemoryMergedMapOutputs);
  inMemoryMergedMapOutputs.clear();
  memory.addAll(inMemoryMapOutputs);
  inMemoryMapOutputs.clear();
  List<CompressAwarePath> disk = new ArrayList<CompressAwarePath>(onDiskMapOutputs);
  onDiskMapOutputs.clear();
  return finalMerge(jobConf, rfs, memory, disk);
}
 
Example #4
Source File: ReduceContext.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public ReduceContext(Configuration conf, TaskAttemptID taskid,
                     RawKeyValueIterator input, 
                     Counter inputKeyCounter,
                     Counter inputValueCounter,
                     RecordWriter<KEYOUT,VALUEOUT> output,
                     OutputCommitter committer,
                     StatusReporter reporter,
                     RawComparator<KEYIN> comparator,
                     Class<KEYIN> keyClass,
                     Class<VALUEIN> valueClass
                     ) throws InterruptedException, IOException{
  super(conf, taskid, output, committer, reporter);
  this.input = input;
  this.inputKeyCounter = inputKeyCounter;
  this.inputValueCounter = inputValueCounter;
  this.comparator = comparator;
  SerializationFactory serializationFactory = new SerializationFactory(conf);
  this.keyDeserializer = serializationFactory.getDeserializer(keyClass);
  this.keyDeserializer.open(buffer);
  this.valueDeserializer = serializationFactory.getDeserializer(valueClass);
  this.valueDeserializer.open(buffer);
  hasMore = input.next();
}
 
Example #5
Source File: MergeManagerImpl.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private void combineAndSpill(
    RawKeyValueIterator kvIter,
    Counters.Counter inCounter) throws IOException {
  JobConf job = jobConf;
  Reducer combiner = ReflectionUtils.newInstance(combinerClass, job);
  Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass();
  Class<V> valClass = (Class<V>) job.getMapOutputValueClass();
  RawComparator<K> comparator = 
    (RawComparator<K>)job.getCombinerKeyGroupingComparator();
  try {
    CombineValuesIterator values = new CombineValuesIterator(
        kvIter, comparator, keyClass, valClass, job, Reporter.NULL,
        inCounter);
    while (values.more()) {
      combiner.reduce(values.getKey(), values, combineCollector,
                      Reporter.NULL);
      values.nextKey();
    }
  } finally {
    combiner.close();
  }
}
 
Example #6
Source File: MergeManagerImpl.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Override
public RawKeyValueIterator close() throws Throwable {
  // Wait for on-going merges to complete
  if (memToMemMerger != null) { 
    memToMemMerger.close();
  }
  inMemoryMerger.close();
  onDiskMerger.close();
  
  List<InMemoryMapOutput<K, V>> memory = 
    new ArrayList<InMemoryMapOutput<K, V>>(inMemoryMergedMapOutputs);
  inMemoryMergedMapOutputs.clear();
  memory.addAll(inMemoryMapOutputs);
  inMemoryMapOutputs.clear();
  List<CompressAwarePath> disk = new ArrayList<CompressAwarePath>(onDiskMapOutputs);
  onDiskMapOutputs.clear();
  return finalMerge(jobConf, rfs, memory, disk);
}
 
Example #7
Source File: Reducer.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public Context(Configuration conf, TaskAttemptID taskid,
               RawKeyValueIterator input, 
               Counter inputCounter,
               RecordWriter<KEYOUT,VALUEOUT> output,
               OutputCommitter committer,
               StatusReporter reporter,
               RawComparator<KEYIN> comparator,
               Class<KEYIN> keyClass,
               Class<VALUEIN> valueClass
               ) throws IOException, InterruptedException {
  super(conf, taskid, input, inputCounter, output, committer, reporter, 
        comparator, keyClass, valueClass);
}
 
Example #8
Source File: MergeManagerImpl.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Override
public void merge(List<InMemoryMapOutput<K, V>> inputs) throws IOException {
  if (inputs == null || inputs.size() == 0) {
    return;
  }

  TaskAttemptID dummyMapId = inputs.get(0).getMapId(); 
  List<Segment<K, V>> inMemorySegments = new ArrayList<Segment<K, V>>();
  long mergeOutputSize = 
    createInMemorySegments(inputs, inMemorySegments, 0);
  int noInMemorySegments = inMemorySegments.size();
  
  InMemoryMapOutput<K, V> mergedMapOutputs = 
    unconditionalReserve(dummyMapId, mergeOutputSize, false);
  
  Writer<K, V> writer = 
    new InMemoryWriter<K, V>(mergedMapOutputs.getArrayStream());
  
  LOG.info("Initiating Memory-to-Memory merge with " + noInMemorySegments +
           " segments of total-size: " + mergeOutputSize);

  RawKeyValueIterator rIter = 
    Merger.merge(jobConf, rfs,
                 (Class<K>)jobConf.getMapOutputKeyClass(),
                 (Class<V>)jobConf.getMapOutputValueClass(),
                 inMemorySegments, inMemorySegments.size(),
                 new Path(reduceId.toString()),
                 (RawComparator<K>)jobConf.getOutputKeyComparator(),
                 reporter, null, null, null);
  Merger.writeFile(rIter, writer, reporter, jobConf);
  writer.close();

  LOG.info(reduceId +  
           " Memory-to-Memory merge of the " + noInMemorySegments +
           " files in-memory complete.");

  // Note the output of the merge
  closeInMemoryMergedFile(mergedMapOutputs);
}
 
Example #9
Source File: Reducer.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public Context(Configuration conf, TaskAttemptID taskid,
               RawKeyValueIterator input, 
               Counter inputKeyCounter,
               Counter inputValueCounter,
               RecordWriter<KEYOUT,VALUEOUT> output,
               OutputCommitter committer,
               StatusReporter reporter,
               RawComparator<KEYIN> comparator,
               Class<KEYIN> keyClass,
               Class<VALUEIN> valueClass
               ) throws IOException, InterruptedException {
  super(conf, taskid, input, inputKeyCounter, inputValueCounter,
        output, committer, reporter, 
        comparator, keyClass, valueClass);
}
 
Example #10
Source File: ReduceContextImpl.java    From big-c with Apache License 2.0 5 votes vote down vote up
public ReduceContextImpl(Configuration conf, TaskAttemptID taskid,
                         RawKeyValueIterator input, 
                         Counter inputKeyCounter,
                         Counter inputValueCounter,
                         RecordWriter<KEYOUT,VALUEOUT> output,
                         OutputCommitter committer,
                         StatusReporter reporter,
                         RawComparator<KEYIN> comparator,
                         Class<KEYIN> keyClass,
                         Class<VALUEIN> valueClass
                        ) throws InterruptedException, IOException{
  super(conf, taskid, output, committer, reporter);
  this.input = input;
  this.inputKeyCounter = inputKeyCounter;
  this.inputValueCounter = inputValueCounter;
  this.comparator = comparator;
  this.serializationFactory = new SerializationFactory(conf);
  this.keyDeserializer = serializationFactory.getDeserializer(keyClass);
  this.keyDeserializer.open(buffer);
  this.valueDeserializer = serializationFactory.getDeserializer(valueClass);
  this.valueDeserializer.open(buffer);
  hasMore = input.next();
  this.keyClass = keyClass;
  this.valueClass = valueClass;
  this.conf = conf;
  this.taskid = taskid;
}
 
Example #11
Source File: MergeManagerImpl.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Override
public void merge(List<InMemoryMapOutput<K, V>> inputs) throws IOException {
  if (inputs == null || inputs.size() == 0) {
    return;
  }

  TaskAttemptID dummyMapId = inputs.get(0).getMapId(); 
  List<Segment<K, V>> inMemorySegments = new ArrayList<Segment<K, V>>();
  long mergeOutputSize = 
    createInMemorySegments(inputs, inMemorySegments, 0);
  int noInMemorySegments = inMemorySegments.size();
  
  InMemoryMapOutput<K, V> mergedMapOutputs = 
    unconditionalReserve(dummyMapId, mergeOutputSize, false);
  
  Writer<K, V> writer = 
    new InMemoryWriter<K, V>(mergedMapOutputs.getArrayStream());
  
  LOG.info("Initiating Memory-to-Memory merge with " + noInMemorySegments +
           " segments of total-size: " + mergeOutputSize);

  RawKeyValueIterator rIter = 
    Merger.merge(jobConf, rfs,
                 (Class<K>)jobConf.getMapOutputKeyClass(),
                 (Class<V>)jobConf.getMapOutputValueClass(),
                 inMemorySegments, inMemorySegments.size(),
                 new Path(reduceId.toString()),
                 (RawComparator<K>)jobConf.getOutputKeyComparator(),
                 reporter, null, null, null);
  Merger.writeFile(rIter, writer, reporter, jobConf);
  writer.close();

  LOG.info(reduceId +  
           " Memory-to-Memory merge of the " + noInMemorySegments +
           " files in-memory complete.");

  // Note the output of the merge
  closeInMemoryMergedFile(mergedMapOutputs);
}
 
Example #12
Source File: ReduceContextImpl.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public ReduceContextImpl(Configuration conf, TaskAttemptID taskid,
                         RawKeyValueIterator input, 
                         Counter inputKeyCounter,
                         Counter inputValueCounter,
                         RecordWriter<KEYOUT,VALUEOUT> output,
                         OutputCommitter committer,
                         StatusReporter reporter,
                         RawComparator<KEYIN> comparator,
                         Class<KEYIN> keyClass,
                         Class<VALUEIN> valueClass
                        ) throws InterruptedException, IOException{
  super(conf, taskid, output, committer, reporter);
  this.input = input;
  this.inputKeyCounter = inputKeyCounter;
  this.inputValueCounter = inputValueCounter;
  this.comparator = comparator;
  this.serializationFactory = new SerializationFactory(conf);
  this.keyDeserializer = serializationFactory.getDeserializer(keyClass);
  this.keyDeserializer.open(buffer);
  this.valueDeserializer = serializationFactory.getDeserializer(valueClass);
  this.valueDeserializer.open(buffer);
  hasMore = input.next();
  this.keyClass = keyClass;
  this.valueClass = valueClass;
  this.conf = conf;
  this.taskid = taskid;
}
 
Example #13
Source File: MRCombiner.java    From tez with Apache License 2.0 4 votes vote down vote up
private static <KEYIN, VALUEIN, KEYOUT, VALUEOUT> org.apache.hadoop.mapreduce.Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context createReduceContext(
    Configuration conf,
    TaskAttemptID mrTaskAttemptID,
    final TezRawKeyValueIterator rawIter,
    Counter combineInputRecordsCounter,
    Counter combineOutputRecordsCounter,
    RecordWriter<KEYOUT, VALUEOUT> recordWriter,
    MRTaskReporter reporter,
    RawComparator<KEYIN> comparator,
    Class<KEYIN> keyClass,
    Class<VALUEIN> valClass) throws InterruptedException, IOException {

  RawKeyValueIterator r = new RawKeyValueIterator() {

    @Override
    public boolean next() throws IOException {
      return rawIter.next();
    }

    @Override
    public DataInputBuffer getValue() throws IOException {
      return rawIter.getValue();
    }

    @Override
    public Progress getProgress() {
      return rawIter.getProgress();
    }

    @Override
    public DataInputBuffer getKey() throws IOException {
      return rawIter.getKey();
    }

    @Override
    public void close() throws IOException {
      rawIter.close();
    }
  };

  ReduceContext<KEYIN, VALUEIN, KEYOUT, VALUEOUT> rContext = new ReduceContextImpl<KEYIN, VALUEIN, KEYOUT, VALUEOUT>(
      conf, mrTaskAttemptID, r, null,
      combineInputRecordsCounter, recordWriter, null, reporter, comparator,
      keyClass, valClass);

  org.apache.hadoop.mapreduce.Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context reducerContext = new WrappedReducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>()
      .getReducerContext(rContext);
  return reducerContext;
}
 
Example #14
Source File: MRTask.java    From tez with Apache License 2.0 4 votes vote down vote up
protected static <INKEY,INVALUE,OUTKEY,OUTVALUE>
org.apache.hadoop.mapreduce.Reducer<INKEY,INVALUE,OUTKEY,OUTVALUE>.Context
createReduceContext(org.apache.hadoop.mapreduce.Reducer
                      <INKEY,INVALUE,OUTKEY,OUTVALUE> reducer,
                    Configuration job,
                    TaskAttemptID taskId,
                    final TezRawKeyValueIterator rIter,
                    org.apache.hadoop.mapreduce.Counter inputKeyCounter,
                    org.apache.hadoop.mapreduce.Counter inputValueCounter,
                    org.apache.hadoop.mapreduce.RecordWriter<OUTKEY,OUTVALUE> output,
                    org.apache.hadoop.mapreduce.OutputCommitter committer,
                    org.apache.hadoop.mapreduce.StatusReporter reporter,
                    RawComparator<INKEY> comparator,
                    Class<INKEY> keyClass, Class<INVALUE> valueClass
) throws IOException, InterruptedException {
  RawKeyValueIterator r =
      new RawKeyValueIterator() {

        @Override
        public boolean next() throws IOException {
          return rIter.next();
        }

        @Override
        public DataInputBuffer getValue() throws IOException {
          return rIter.getValue();
        }

        @Override
        public Progress getProgress() {
          return rIter.getProgress();
        }

        @Override
        public DataInputBuffer getKey() throws IOException {
          return rIter.getKey();
        }

        @Override
        public void close() throws IOException {
          rIter.close();
        }
      };
  org.apache.hadoop.mapreduce.ReduceContext<INKEY, INVALUE, OUTKEY, OUTVALUE>
  reduceContext =
    new ReduceContextImpl<INKEY, INVALUE, OUTKEY, OUTVALUE>(
        job,
        taskId,
        r,
        inputKeyCounter,
        inputValueCounter,
        output,
        committer,
        reporter,
        comparator,
        keyClass,
        valueClass);
  if (LOG.isDebugEnabled()) {
    LOG.debug("Using key class: " + keyClass
        + ", valueClass: " + valueClass);
  }

  org.apache.hadoop.mapreduce.Reducer<INKEY,INVALUE,OUTKEY,OUTVALUE>.Context
      reducerContext =
        new WrappedReducer<INKEY, INVALUE, OUTKEY, OUTVALUE>().getReducerContext(
            reduceContext);

  return reducerContext;
}
 
Example #15
Source File: MRCombiner.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
private static <KEYIN, VALUEIN, KEYOUT, VALUEOUT> org.apache.hadoop.mapreduce.Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context createReduceContext(
    Configuration conf,
    TaskAttemptID mrTaskAttemptID,
    final TezRawKeyValueIterator rawIter,
    Counter combineInputKeyCounter,
    Counter combineInputValueCounter,
    RecordWriter<KEYOUT, VALUEOUT> recordWriter,
    MRTaskReporter reporter,
    RawComparator<KEYIN> comparator,
    Class<KEYIN> keyClass,
    Class<VALUEIN> valClass) throws InterruptedException, IOException {

  RawKeyValueIterator r = new RawKeyValueIterator() {

    @Override
    public boolean next() throws IOException {
      return rawIter.next();
    }

    @Override
    public DataInputBuffer getValue() throws IOException {
      return rawIter.getValue();
    }

    @Override
    public Progress getProgress() {
      return rawIter.getProgress();
    }

    @Override
    public DataInputBuffer getKey() throws IOException {
      return rawIter.getKey();
    }

    @Override
    public void close() throws IOException {
      rawIter.close();
    }
  };

  ReduceContext<KEYIN, VALUEIN, KEYOUT, VALUEOUT> rContext = new ReduceContextImpl<KEYIN, VALUEIN, KEYOUT, VALUEOUT>(
      conf, mrTaskAttemptID, r, combineInputKeyCounter,
      combineInputValueCounter, recordWriter, null, reporter, comparator,
      keyClass, valClass);

  org.apache.hadoop.mapreduce.Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context reducerContext = new WrappedReducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>()
      .getReducerContext(rContext);
  return reducerContext;
}
 
Example #16
Source File: MRTask.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
protected static <INKEY,INVALUE,OUTKEY,OUTVALUE>
org.apache.hadoop.mapreduce.Reducer<INKEY,INVALUE,OUTKEY,OUTVALUE>.Context
createReduceContext(org.apache.hadoop.mapreduce.Reducer
                      <INKEY,INVALUE,OUTKEY,OUTVALUE> reducer,
                    Configuration job,
                    TaskAttemptID taskId,
                    final TezRawKeyValueIterator rIter,
                    org.apache.hadoop.mapreduce.Counter inputKeyCounter,
                    org.apache.hadoop.mapreduce.Counter inputValueCounter,
                    org.apache.hadoop.mapreduce.RecordWriter<OUTKEY,OUTVALUE> output,
                    org.apache.hadoop.mapreduce.OutputCommitter committer,
                    org.apache.hadoop.mapreduce.StatusReporter reporter,
                    RawComparator<INKEY> comparator,
                    Class<INKEY> keyClass, Class<INVALUE> valueClass
) throws IOException, InterruptedException {
  RawKeyValueIterator r =
      new RawKeyValueIterator() {

        @Override
        public boolean next() throws IOException {
          return rIter.next();
        }

        @Override
        public DataInputBuffer getValue() throws IOException {
          return rIter.getValue();
        }

        @Override
        public Progress getProgress() {
          return rIter.getProgress();
        }

        @Override
        public DataInputBuffer getKey() throws IOException {
          return rIter.getKey();
        }

        @Override
        public void close() throws IOException {
          rIter.close();
        }
      };
  org.apache.hadoop.mapreduce.ReduceContext<INKEY, INVALUE, OUTKEY, OUTVALUE>
  reduceContext =
    new ReduceContextImpl<INKEY, INVALUE, OUTKEY, OUTVALUE>(
        job,
        taskId,
        r,
        inputKeyCounter,
        inputValueCounter,
        output,
        committer,
        reporter,
        comparator,
        keyClass,
        valueClass);
  if (LOG.isDebugEnabled()) {
    LOG.debug("Using key class: " + keyClass
        + ", valueClass: " + valueClass);
  }

  org.apache.hadoop.mapreduce.Reducer<INKEY,INVALUE,OUTKEY,OUTVALUE>.Context
      reducerContext =
        new WrappedReducer<INKEY, INVALUE, OUTKEY, OUTVALUE>().getReducerContext(
            reduceContext);

  return reducerContext;
}
 
Example #17
Source File: TestGridMixClasses.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test (timeout=3000)
public void testSleepReducer() throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(JobContext.NUM_REDUCES, 2);
  CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
  conf.setBoolean(FileOutputFormat.COMPRESS, true);

  CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
  conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
  TaskAttemptID taskId = new TaskAttemptID();

  RawKeyValueIterator input = new FakeRawKeyValueReducerIterator();

  Counter counter = new GenericCounter();
  Counter inputValueCounter = new GenericCounter();
  RecordWriter<NullWritable, NullWritable> output = new LoadRecordReduceWriter();

  OutputCommitter committer = new CustomOutputCommitter();

  StatusReporter reporter = new DummyReporter();
  RawComparator<GridmixKey> comparator = new FakeRawComparator();

  ReduceContext<GridmixKey, NullWritable, NullWritable, NullWritable> reducecontext = new ReduceContextImpl<GridmixKey, NullWritable, NullWritable, NullWritable>(
          conf, taskId, input, counter, inputValueCounter, output, committer,
          reporter, comparator, GridmixKey.class, NullWritable.class);
  org.apache.hadoop.mapreduce.Reducer<GridmixKey, NullWritable, NullWritable, NullWritable>.Context context = new WrappedReducer<GridmixKey, NullWritable, NullWritable, NullWritable>()
          .getReducerContext(reducecontext);

  SleepReducer test = new SleepReducer();
  long start = System.currentTimeMillis();
  test.setup(context);
  long sleeper = context.getCurrentKey().getReduceOutputBytes();
  // status has been changed
  assertEquals("Sleeping... " + sleeper + " ms left", context.getStatus());
  // should sleep 0.9 sec

  assertTrue(System.currentTimeMillis() >= (start + sleeper));
  test.cleanup(context);
  // status has been changed again

  assertEquals("Slept for " + sleeper, context.getStatus());

}
 
Example #18
Source File: TestGridMixClasses.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test (timeout=3000)
public void testLoadJobLoadReducer() throws Exception {
  LoadJob.LoadReducer test = new LoadJob.LoadReducer();

  Configuration conf = new Configuration();
  conf.setInt(JobContext.NUM_REDUCES, 2);
  CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
  conf.setBoolean(FileOutputFormat.COMPRESS, true);

  CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
  conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
  TaskAttemptID taskid = new TaskAttemptID();

  RawKeyValueIterator input = new FakeRawKeyValueIterator();

  Counter counter = new GenericCounter();
  Counter inputValueCounter = new GenericCounter();
  LoadRecordWriter output = new LoadRecordWriter();

  OutputCommitter committer = new CustomOutputCommitter();

  StatusReporter reporter = new DummyReporter();
  RawComparator<GridmixKey> comparator = new FakeRawComparator();

  ReduceContext<GridmixKey, GridmixRecord, NullWritable, GridmixRecord> reduceContext = new ReduceContextImpl<GridmixKey, GridmixRecord, NullWritable, GridmixRecord>(
          conf, taskid, input, counter, inputValueCounter, output, committer,
          reporter, comparator, GridmixKey.class, GridmixRecord.class);
  // read for previous data
  reduceContext.nextKeyValue();
  org.apache.hadoop.mapreduce.Reducer<GridmixKey, GridmixRecord, NullWritable, GridmixRecord>.Context context = new WrappedReducer<GridmixKey, GridmixRecord, NullWritable, GridmixRecord>()
          .getReducerContext(reduceContext);

  // test.setup(context);
  test.run(context);
  // have been readed 9 records (-1 for previous)
  assertEquals(9, counter.getValue());
  assertEquals(10, inputValueCounter.getValue());
  assertEquals(1, output.getData().size());
  GridmixRecord record = output.getData().values().iterator()
          .next();

  assertEquals(1593, record.getSize());
}
 
Example #19
Source File: TestMerger.java    From big-c with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings( { "unchecked" })
public void testMergeShouldReturnProperProgress(
    List<Segment<Text, Text>> segments) throws IOException {
  Path tmpDir = new Path("localpath");
  Class<Text> keyClass = (Class<Text>) jobConf.getMapOutputKeyClass();
  Class<Text> valueClass = (Class<Text>) jobConf.getMapOutputValueClass();
  RawComparator<Text> comparator = jobConf.getOutputKeyComparator();
  Counter readsCounter = new Counter();
  Counter writesCounter = new Counter();
  Progress mergePhase = new Progress();
  RawKeyValueIterator mergeQueue = Merger.merge(conf, fs, keyClass,
      valueClass, segments, 2, tmpDir, comparator, getReporter(),
      readsCounter, writesCounter, mergePhase);
  final float epsilon = 0.00001f;

  // Reading 6 keys total, 3 each in 2 segments, so each key read moves the
  // progress forward 1/6th of the way. Initially the first keys from each
  // segment have been read as part of the merge setup, so progress = 2/6.
  Assert.assertEquals(2/6.0f, mergeQueue.getProgress().get(), epsilon);

  // The first next() returns one of the keys already read during merge setup
  Assert.assertTrue(mergeQueue.next());
  Assert.assertEquals(2/6.0f, mergeQueue.getProgress().get(), epsilon);

  // Subsequent next() calls should read one key and move progress
  Assert.assertTrue(mergeQueue.next());
  Assert.assertEquals(3/6.0f, mergeQueue.getProgress().get(), epsilon);
  Assert.assertTrue(mergeQueue.next());
  Assert.assertEquals(4/6.0f, mergeQueue.getProgress().get(), epsilon);

  // At this point we've exhausted all of the keys in one segment
  // so getting the next key will return the already cached key from the
  // other segment
  Assert.assertTrue(mergeQueue.next());
  Assert.assertEquals(4/6.0f, mergeQueue.getProgress().get(), epsilon);

  // Subsequent next() calls should read one key and move progress
  Assert.assertTrue(mergeQueue.next());
  Assert.assertEquals(5/6.0f, mergeQueue.getProgress().get(), epsilon);
  Assert.assertTrue(mergeQueue.next());
  Assert.assertEquals(1.0f, mergeQueue.getProgress().get(), epsilon);

  // Now there should be no more input
  Assert.assertFalse(mergeQueue.next());
  Assert.assertEquals(1.0f, mergeQueue.getProgress().get(), epsilon);
  Assert.assertTrue(mergeQueue.getKey() == null);
  Assert.assertEquals(0, mergeQueue.getValue().getData().length);
}
 
Example #20
Source File: MergeManagerImpl.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Override
public void merge(List<InMemoryMapOutput<K,V>> inputs) throws IOException {
  if (inputs == null || inputs.size() == 0) {
    return;
  }
  
  //name this output file same as the name of the first file that is 
  //there in the current list of inmem files (this is guaranteed to
  //be absent on the disk currently. So we don't overwrite a prev. 
  //created spill). Also we need to create the output file now since
  //it is not guaranteed that this file will be present after merge
  //is called (we delete empty files as soon as we see them
  //in the merge method)

  //figure out the mapId 
  TaskAttemptID mapId = inputs.get(0).getMapId();
  TaskID mapTaskId = mapId.getTaskID();

  List<Segment<K, V>> inMemorySegments = new ArrayList<Segment<K, V>>();
  long mergeOutputSize = 
    createInMemorySegments(inputs, inMemorySegments,0);
  int noInMemorySegments = inMemorySegments.size();

  Path outputPath = 
    mapOutputFile.getInputFileForWrite(mapTaskId,
                                       mergeOutputSize).suffix(
                                           Task.MERGED_OUTPUT_PREFIX);

  FSDataOutputStream out = CryptoUtils.wrapIfNecessary(jobConf, rfs.create(outputPath));
  Writer<K, V> writer = new Writer<K, V>(jobConf, out,
      (Class<K>) jobConf.getMapOutputKeyClass(),
      (Class<V>) jobConf.getMapOutputValueClass(), codec, null, true);

  RawKeyValueIterator rIter = null;
  CompressAwarePath compressAwarePath;
  try {
    LOG.info("Initiating in-memory merge with " + noInMemorySegments + 
             " segments...");
    
    rIter = Merger.merge(jobConf, rfs,
                         (Class<K>)jobConf.getMapOutputKeyClass(),
                         (Class<V>)jobConf.getMapOutputValueClass(),
                         inMemorySegments, inMemorySegments.size(),
                         new Path(reduceId.toString()),
                         (RawComparator<K>)jobConf.getOutputKeyComparator(),
                         reporter, spilledRecordsCounter, null, null);
    
    if (null == combinerClass) {
      Merger.writeFile(rIter, writer, reporter, jobConf);
    } else {
      combineCollector.setWriter(writer);
      combineAndSpill(rIter, reduceCombineInputCounter);
    }
    writer.close();
    compressAwarePath = new CompressAwarePath(outputPath,
        writer.getRawLength(), writer.getCompressedLength());

    LOG.info(reduceId +  
        " Merge of the " + noInMemorySegments +
        " files in-memory complete." +
        " Local file is " + outputPath + " of size " + 
        localFS.getFileStatus(outputPath).getLen());
  } catch (IOException e) { 
    //make sure that we delete the ondisk file that we created 
    //earlier when we invoked cloneFileAttributes
    localFS.delete(outputPath, true);
    throw e;
  }

  // Note the output of the merge
  closeOnDiskFile(compressAwarePath);
}
 
Example #21
Source File: TestShufflePlugin.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Override
public RawKeyValueIterator run() throws java.io.IOException, java.lang.InterruptedException{
  return null;
}
 
Example #22
Source File: Shuffle.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Override
public RawKeyValueIterator run() throws IOException, InterruptedException {
  // Scale the maximum events we fetch per RPC call to mitigate OOM issues
  // on the ApplicationMaster when a thundering herd of reducers fetch events
  // TODO: This should not be necessary after HADOOP-8942
  int eventsPerReducer = Math.max(MIN_EVENTS_TO_FETCH,
      MAX_RPC_OUTSTANDING_EVENTS / jobConf.getNumReduceTasks());
  int maxEventsToFetch = Math.min(MAX_EVENTS_TO_FETCH, eventsPerReducer);

  // Start the map-completion events fetcher thread
  final EventFetcher<K,V> eventFetcher = 
    new EventFetcher<K,V>(reduceId, umbilical, scheduler, this,
        maxEventsToFetch);
  eventFetcher.start();
  
  // Start the map-output fetcher threads
  boolean isLocal = localMapFiles != null;
  final int numFetchers = isLocal ? 1 :
    jobConf.getInt(MRJobConfig.SHUFFLE_PARALLEL_COPIES, 5);
  Fetcher<K,V>[] fetchers = new Fetcher[numFetchers];
  if (isLocal) {
    fetchers[0] = new LocalFetcher<K, V>(jobConf, reduceId, scheduler,
        merger, reporter, metrics, this, reduceTask.getShuffleSecret(),
        localMapFiles);
    fetchers[0].start();
  } else {
    for (int i=0; i < numFetchers; ++i) {
      fetchers[i] = new Fetcher<K,V>(jobConf, reduceId, scheduler, merger, 
                                     reporter, metrics, this, 
                                     reduceTask.getShuffleSecret());
      fetchers[i].start();
    }
  }
  
  // Wait for shuffle to complete successfully
  while (!scheduler.waitUntilDone(PROGRESS_FREQUENCY)) {
    reporter.progress();
    
    synchronized (this) {
      if (throwable != null) {
        throw new ShuffleError("error in shuffle in " + throwingThreadName,
                               throwable);
      }
    }
  }

  // Stop the event-fetcher thread
  eventFetcher.shutDown();
  
  // Stop the map-output fetcher threads
  for (Fetcher<K,V> fetcher : fetchers) {
    fetcher.shutDown();
  }
  
  // stop the scheduler
  scheduler.close();

  copyPhase.complete(); // copy is already complete
  taskStatus.setPhase(TaskStatus.Phase.SORT);
  reduceTask.statusUpdate(umbilical);

  // Finish the on-going merges...
  RawKeyValueIterator kvIter = null;
  try {
    kvIter = merger.close();
  } catch (Throwable e) {
    throw new ShuffleError("Error while doing final merge " , e);
  }

  // Sanity check
  synchronized (this) {
    if (throwable != null) {
      throw new ShuffleError("error in shuffle in " + throwingThreadName,
                             throwable);
    }
  }
  
  return kvIter;
}
 
Example #23
Source File: MergeManagerImpl.java    From big-c with Apache License 2.0 4 votes vote down vote up
public RawKVIteratorReader(RawKeyValueIterator kvIter, long size)
    throws IOException {
  super(null, null, size, null, spilledRecordsCounter);
  this.kvIter = kvIter;
}
 
Example #24
Source File: MergeManagerImpl.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Override
public void merge(List<CompressAwarePath> inputs) throws IOException {
  // sanity check
  if (inputs == null || inputs.isEmpty()) {
    LOG.info("No ondisk files to merge...");
    return;
  }
  
  long approxOutputSize = 0;
  int bytesPerSum = 
    jobConf.getInt("io.bytes.per.checksum", 512);
  
  LOG.info("OnDiskMerger: We have  " + inputs.size() + 
           " map outputs on disk. Triggering merge...");
  
  // 1. Prepare the list of files to be merged. 
  for (CompressAwarePath file : inputs) {
    approxOutputSize += localFS.getFileStatus(file).getLen();
  }

  // add the checksum length
  approxOutputSize += 
    ChecksumFileSystem.getChecksumLength(approxOutputSize, bytesPerSum);

  // 2. Start the on-disk merge process
  Path outputPath = 
    localDirAllocator.getLocalPathForWrite(inputs.get(0).toString(), 
        approxOutputSize, jobConf).suffix(Task.MERGED_OUTPUT_PREFIX);

  FSDataOutputStream out = CryptoUtils.wrapIfNecessary(jobConf, rfs.create(outputPath));
  Writer<K, V> writer = new Writer<K, V>(jobConf, out,
      (Class<K>) jobConf.getMapOutputKeyClass(),
      (Class<V>) jobConf.getMapOutputValueClass(), codec, null, true);

  RawKeyValueIterator iter  = null;
  CompressAwarePath compressAwarePath;
  Path tmpDir = new Path(reduceId.toString());
  try {
    iter = Merger.merge(jobConf, rfs,
                        (Class<K>) jobConf.getMapOutputKeyClass(),
                        (Class<V>) jobConf.getMapOutputValueClass(),
                        codec, inputs.toArray(new Path[inputs.size()]), 
                        true, ioSortFactor, tmpDir, 
                        (RawComparator<K>) jobConf.getOutputKeyComparator(), 
                        reporter, spilledRecordsCounter, null, 
                        mergedMapOutputsCounter, null);

    Merger.writeFile(iter, writer, reporter, jobConf);
    writer.close();
    compressAwarePath = new CompressAwarePath(outputPath,
        writer.getRawLength(), writer.getCompressedLength());
  } catch (IOException e) {
    localFS.delete(outputPath, true);
    throw e;
  }

  closeOnDiskFile(compressAwarePath);

  LOG.info(reduceId +
      " Finished merging " + inputs.size() + 
      " map output files on disk of total-size " + 
      approxOutputSize + "." + 
      " Local output file is " + outputPath + " of size " +
      localFS.getFileStatus(outputPath).getLen());
}
 
Example #25
Source File: TestGridMixClasses.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test (timeout=3000)
public void testSleepReducer() throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(JobContext.NUM_REDUCES, 2);
  CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
  conf.setBoolean(FileOutputFormat.COMPRESS, true);

  CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
  conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
  TaskAttemptID taskId = new TaskAttemptID();

  RawKeyValueIterator input = new FakeRawKeyValueReducerIterator();

  Counter counter = new GenericCounter();
  Counter inputValueCounter = new GenericCounter();
  RecordWriter<NullWritable, NullWritable> output = new LoadRecordReduceWriter();

  OutputCommitter committer = new CustomOutputCommitter();

  StatusReporter reporter = new DummyReporter();
  RawComparator<GridmixKey> comparator = new FakeRawComparator();

  ReduceContext<GridmixKey, NullWritable, NullWritable, NullWritable> reducecontext = new ReduceContextImpl<GridmixKey, NullWritable, NullWritable, NullWritable>(
          conf, taskId, input, counter, inputValueCounter, output, committer,
          reporter, comparator, GridmixKey.class, NullWritable.class);
  org.apache.hadoop.mapreduce.Reducer<GridmixKey, NullWritable, NullWritable, NullWritable>.Context context = new WrappedReducer<GridmixKey, NullWritable, NullWritable, NullWritable>()
          .getReducerContext(reducecontext);

  SleepReducer test = new SleepReducer();
  long start = System.currentTimeMillis();
  test.setup(context);
  long sleeper = context.getCurrentKey().getReduceOutputBytes();
  // status has been changed
  assertEquals("Sleeping... " + sleeper + " ms left", context.getStatus());
  // should sleep 0.9 sec

  assertTrue(System.currentTimeMillis() >= (start + sleeper));
  test.cleanup(context);
  // status has been changed again

  assertEquals("Slept for " + sleeper, context.getStatus());

}
 
Example #26
Source File: TestGridMixClasses.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test (timeout=3000)
public void testLoadJobLoadReducer() throws Exception {
  LoadJob.LoadReducer test = new LoadJob.LoadReducer();

  Configuration conf = new Configuration();
  conf.setInt(JobContext.NUM_REDUCES, 2);
  CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
  conf.setBoolean(FileOutputFormat.COMPRESS, true);

  CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
  conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
  TaskAttemptID taskid = new TaskAttemptID();

  RawKeyValueIterator input = new FakeRawKeyValueIterator();

  Counter counter = new GenericCounter();
  Counter inputValueCounter = new GenericCounter();
  LoadRecordWriter output = new LoadRecordWriter();

  OutputCommitter committer = new CustomOutputCommitter();

  StatusReporter reporter = new DummyReporter();
  RawComparator<GridmixKey> comparator = new FakeRawComparator();

  ReduceContext<GridmixKey, GridmixRecord, NullWritable, GridmixRecord> reduceContext = new ReduceContextImpl<GridmixKey, GridmixRecord, NullWritable, GridmixRecord>(
          conf, taskid, input, counter, inputValueCounter, output, committer,
          reporter, comparator, GridmixKey.class, GridmixRecord.class);
  // read for previous data
  reduceContext.nextKeyValue();
  org.apache.hadoop.mapreduce.Reducer<GridmixKey, GridmixRecord, NullWritable, GridmixRecord>.Context context = new WrappedReducer<GridmixKey, GridmixRecord, NullWritable, GridmixRecord>()
          .getReducerContext(reduceContext);

  // test.setup(context);
  test.run(context);
  // have been readed 9 records (-1 for previous)
  assertEquals(9, counter.getValue());
  assertEquals(10, inputValueCounter.getValue());
  assertEquals(1, output.getData().size());
  GridmixRecord record = output.getData().values().iterator()
          .next();

  assertEquals(1593, record.getSize());
}
 
Example #27
Source File: TestMerger.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings( { "unchecked" })
public void testMergeShouldReturnProperProgress(
    List<Segment<Text, Text>> segments) throws IOException {
  Path tmpDir = new Path("localpath");
  Class<Text> keyClass = (Class<Text>) jobConf.getMapOutputKeyClass();
  Class<Text> valueClass = (Class<Text>) jobConf.getMapOutputValueClass();
  RawComparator<Text> comparator = jobConf.getOutputKeyComparator();
  Counter readsCounter = new Counter();
  Counter writesCounter = new Counter();
  Progress mergePhase = new Progress();
  RawKeyValueIterator mergeQueue = Merger.merge(conf, fs, keyClass,
      valueClass, segments, 2, tmpDir, comparator, getReporter(),
      readsCounter, writesCounter, mergePhase);
  final float epsilon = 0.00001f;

  // Reading 6 keys total, 3 each in 2 segments, so each key read moves the
  // progress forward 1/6th of the way. Initially the first keys from each
  // segment have been read as part of the merge setup, so progress = 2/6.
  Assert.assertEquals(2/6.0f, mergeQueue.getProgress().get(), epsilon);

  // The first next() returns one of the keys already read during merge setup
  Assert.assertTrue(mergeQueue.next());
  Assert.assertEquals(2/6.0f, mergeQueue.getProgress().get(), epsilon);

  // Subsequent next() calls should read one key and move progress
  Assert.assertTrue(mergeQueue.next());
  Assert.assertEquals(3/6.0f, mergeQueue.getProgress().get(), epsilon);
  Assert.assertTrue(mergeQueue.next());
  Assert.assertEquals(4/6.0f, mergeQueue.getProgress().get(), epsilon);

  // At this point we've exhausted all of the keys in one segment
  // so getting the next key will return the already cached key from the
  // other segment
  Assert.assertTrue(mergeQueue.next());
  Assert.assertEquals(4/6.0f, mergeQueue.getProgress().get(), epsilon);

  // Subsequent next() calls should read one key and move progress
  Assert.assertTrue(mergeQueue.next());
  Assert.assertEquals(5/6.0f, mergeQueue.getProgress().get(), epsilon);
  Assert.assertTrue(mergeQueue.next());
  Assert.assertEquals(1.0f, mergeQueue.getProgress().get(), epsilon);

  // Now there should be no more input
  Assert.assertFalse(mergeQueue.next());
  Assert.assertEquals(1.0f, mergeQueue.getProgress().get(), epsilon);
  Assert.assertTrue(mergeQueue.getKey() == null);
  Assert.assertEquals(0, mergeQueue.getValue().getData().length);
}
 
Example #28
Source File: TestShufflePlugin.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Override
public RawKeyValueIterator run() throws java.io.IOException, java.lang.InterruptedException{
  return null;
}
 
Example #29
Source File: Shuffle.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Override
public RawKeyValueIterator run() throws IOException, InterruptedException {
  // Scale the maximum events we fetch per RPC call to mitigate OOM issues
  // on the ApplicationMaster when a thundering herd of reducers fetch events
  // TODO: This should not be necessary after HADOOP-8942
  int eventsPerReducer = Math.max(MIN_EVENTS_TO_FETCH,
      MAX_RPC_OUTSTANDING_EVENTS / jobConf.getNumReduceTasks());
  int maxEventsToFetch = Math.min(MAX_EVENTS_TO_FETCH, eventsPerReducer);

  // Start the map-completion events fetcher thread
  final EventFetcher<K,V> eventFetcher = 
    new EventFetcher<K,V>(reduceId, umbilical, scheduler, this,
        maxEventsToFetch);
  eventFetcher.start();
  
  // Start the map-output fetcher threads
  boolean isLocal = localMapFiles != null;
  final int numFetchers = isLocal ? 1 :
    jobConf.getInt(MRJobConfig.SHUFFLE_PARALLEL_COPIES, 5);
  Fetcher<K,V>[] fetchers = new Fetcher[numFetchers];
  if (isLocal) {
    fetchers[0] = new LocalFetcher<K, V>(jobConf, reduceId, scheduler,
        merger, reporter, metrics, this, reduceTask.getShuffleSecret(),
        localMapFiles);
    fetchers[0].start();
  } else {
    for (int i=0; i < numFetchers; ++i) {
      fetchers[i] = new Fetcher<K,V>(jobConf, reduceId, scheduler, merger, 
                                     reporter, metrics, this, 
                                     reduceTask.getShuffleSecret());
      fetchers[i].start();
    }
  }
  
  // Wait for shuffle to complete successfully
  while (!scheduler.waitUntilDone(PROGRESS_FREQUENCY)) {
    reporter.progress();
    
    synchronized (this) {
      if (throwable != null) {
        throw new ShuffleError("error in shuffle in " + throwingThreadName,
                               throwable);
      }
    }
  }

  // Stop the event-fetcher thread
  eventFetcher.shutDown();
  
  // Stop the map-output fetcher threads
  for (Fetcher<K,V> fetcher : fetchers) {
    fetcher.shutDown();
  }
  
  // stop the scheduler
  scheduler.close();

  copyPhase.complete(); // copy is already complete
  taskStatus.setPhase(TaskStatus.Phase.SORT);
  reduceTask.statusUpdate(umbilical);

  // Finish the on-going merges...
  RawKeyValueIterator kvIter = null;
  try {
    kvIter = merger.close();
  } catch (Throwable e) {
    throw new ShuffleError("Error while doing final merge " , e);
  }

  // Sanity check
  synchronized (this) {
    if (throwable != null) {
      throw new ShuffleError("error in shuffle in " + throwingThreadName,
                             throwable);
    }
  }
  
  return kvIter;
}
 
Example #30
Source File: MergeManagerImpl.java    From hadoop with Apache License 2.0 4 votes vote down vote up
public RawKVIteratorReader(RawKeyValueIterator kvIter, long size)
    throws IOException {
  super(null, null, size, null, spilledRecordsCounter);
  this.kvIter = kvIter;
}