Java Code Examples for org.apache.hadoop.util.QuickSort

The following examples show how to use org.apache.hadoop.util.QuickSort. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: hadoop   Source File: TeraInputFormat.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Find the split points for a given sample. The sample keys are sorted
 * and down sampled to find even split points for the partitions. The
 * returned keys should be the start of their respective partitions.
 * @param numPartitions the desired number of partitions
 * @return an array of size numPartitions - 1 that holds the split points
 */
Text[] createPartitions(int numPartitions) {
  int numRecords = records.size();
  System.out.println("Making " + numPartitions + " from " + numRecords + 
                     " sampled records");
  if (numPartitions > numRecords) {
    throw new IllegalArgumentException
      ("Requested more partitions than input keys (" + numPartitions +
       " > " + numRecords + ")");
  }
  new QuickSort().sort(this, 0, records.size());
  float stepSize = numRecords / (float) numPartitions;
  Text[] result = new Text[numPartitions-1];
  for(int i=1; i < numPartitions; ++i) {
    result[i-1] = records.get(Math.round(stepSize * i));
  }
  return result;
}
 
Example 2
Source Project: pravega-samples   Source File: TeraInputFormat.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Find the split points for a given sample. The sample keys are sorted
 * and down sampled to find even split points for the partitions. The
 * returned keys should be the start of their respective partitions.
 * @param numPartitions the desired number of partitions
 * @return an array of size numPartitions - 1 that holds the split points
 */
Text[] createPartitions(int numPartitions) {
  int numRecords = records.size();
  System.out.println("Making " + numPartitions + " from " + numRecords + 
                     " sampled records");
  if (numPartitions > numRecords) {
    throw new IllegalArgumentException
      ("Requested more partitions than input keys (" + numPartitions +
       " > " + numRecords + ")");
  }
  new QuickSort().sort(this, 0, records.size());
  float stepSize = numRecords / (float) numPartitions;
  Text[] result = new Text[numPartitions-1];
  for(int i=1; i < numPartitions; ++i) {
    result[i-1] = records.get(Math.round(stepSize * i));
  }
  return result;
}
 
Example 3
Source Project: dremio-oss   Source File: QuickSorterTemplate.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public SelectionVector4 getFinalSort(BufferAllocator allocator, int targetBatchSize){
  Stopwatch watch = Stopwatch.createStarted();

  intVector.setValueCount(totalCount);
  QuickSort qs = new QuickSort();
  if (totalCount > 0) {
    qs.sort(this, 0, totalCount);
  }

  SelectionVector4 finalSortedSV4 = new SelectionVector4(allocator.buffer(totalCount * 4), totalCount, targetBatchSize);
  for (int i = 0; i < totalCount; i++) {
    finalSortedSV4.set(i, intVector.get(i));
  }

  logger.debug("Took {} us to final sort {} records in {} batches",
    watch.elapsed(TimeUnit.MICROSECONDS), totalCount, hyperBatch.size());

  return finalSortedSV4;
}
 
Example 4
Source Project: big-c   Source File: TeraInputFormat.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Find the split points for a given sample. The sample keys are sorted
 * and down sampled to find even split points for the partitions. The
 * returned keys should be the start of their respective partitions.
 * @param numPartitions the desired number of partitions
 * @return an array of size numPartitions - 1 that holds the split points
 */
Text[] createPartitions(int numPartitions) {
  int numRecords = records.size();
  System.out.println("Making " + numPartitions + " from " + numRecords + 
                     " sampled records");
  if (numPartitions > numRecords) {
    throw new IllegalArgumentException
      ("Requested more partitions than input keys (" + numPartitions +
       " > " + numRecords + ")");
  }
  new QuickSort().sort(this, 0, records.size());
  float stepSize = numRecords / (float) numPartitions;
  Text[] result = new Text[numPartitions-1];
  for(int i=1; i < numPartitions; ++i) {
    result[i-1] = records.get(Math.round(stepSize * i));
  }
  return result;
}
 
Example 5
Source Project: RDFS   Source File: TeraInputFormat.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Find the split points for a given sample. The sample keys are sorted
 * and down sampled to find even split points for the partitions. The
 * returned keys should be the start of their respective partitions.
 * @param numPartitions the desired number of partitions
 * @return an array of size numPartitions - 1 that holds the split points
 */
Text[] createPartitions(int numPartitions) {
  int numRecords = records.size();
  System.out.println("Making " + numPartitions + " from " + numRecords + 
                     " records");
  if (numPartitions > numRecords) {
    throw new IllegalArgumentException
      ("Requested more partitions than input keys (" + numPartitions +
       " > " + numRecords + ")");
  }
  new QuickSort().sort(this, 0, records.size());
  float stepSize = numRecords / (float) numPartitions;
  System.out.println("Step size is " + stepSize);
  Text[] result = new Text[numPartitions-1];
  for(int i=1; i < numPartitions; ++i) {
    result[i-1] = records.get(Math.round(stepSize * i));
  }
  return result;
}
 
Example 6
Source Project: incubator-tez   Source File: TeraInputFormat.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Find the split points for a given sample. The sample keys are sorted
 * and down sampled to find even split points for the partitions. The
 * returned keys should be the start of their respective partitions.
 * @param numPartitions the desired number of partitions
 * @return an array of size numPartitions - 1 that holds the split points
 */
Text[] createPartitions(int numPartitions) {
  int numRecords = records.size();
  System.out.println("Making " + numPartitions + " from " + numRecords + 
                     " sampled records");
  if (numPartitions > numRecords) {
    throw new IllegalArgumentException
      ("Requested more partitions than input keys (" + numPartitions +
       " > " + numRecords + ")");
  }
  new QuickSort().sort(this, 0, records.size());
  float stepSize = numRecords / (float) numPartitions;
  Text[] result = new Text[numPartitions-1];
  for(int i=1; i < numPartitions; ++i) {
    result[i-1] = records.get(Math.round(stepSize * i));
  }
  return result;
}
 
Example 7
Source Project: hadoop-book   Source File: TeraInputFormat.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Find the split points for a given sample. The sample keys are sorted
 * and down sampled to find even split points for the partitions. The
 * returned keys should be the start of their respective partitions.
 *
 * @param numPartitions the desired number of partitions
 * @return an array of size numPartitions - 1 that holds the split
 * points
 */
Text[] createPartitions(int numPartitions) {
    int numRecords = records.size();
    System.out.println("Making " + numPartitions + " from " + numRecords
            + " records");
    if (numPartitions > numRecords) {
        throw new IllegalArgumentException("Requested more partitions than input keys (" + numPartitions
                + " > " + numRecords + ")");
    }
    new QuickSort().sort(this, 0, records.size());
    float stepSize = numRecords / (float) numPartitions;
    System.out.println("Step size is " + stepSize);
    Text[] result = new Text[numPartitions - 1];
    for (int i = 1; i < numPartitions; ++i) {
        result[i - 1] = records.get(Math.round(stepSize * i));
    }
    return result;
}
 
Example 8
Source Project: hadoop-gpu   Source File: TeraInputFormat.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Find the split points for a given sample. The sample keys are sorted
 * and down sampled to find even split points for the partitions. The
 * returned keys should be the start of their respective partitions.
 * @param numPartitions the desired number of partitions
 * @return an array of size numPartitions - 1 that holds the split points
 */
Text[] createPartitions(int numPartitions) {
  int numRecords = records.size();
  System.out.println("Making " + numPartitions + " from " + numRecords + 
                     " records");
  if (numPartitions > numRecords) {
    throw new IllegalArgumentException
      ("Requested more partitions than input keys (" + numPartitions +
       " > " + numRecords + ")");
  }
  new QuickSort().sort(this, 0, records.size());
  float stepSize = numRecords / (float) numPartitions;
  System.out.println("Step size is " + stepSize);
  Text[] result = new Text[numPartitions-1];
  for(int i=1; i < numPartitions; ++i) {
    result[i-1] = records.get(Math.round(stepSize * i));
  }
  return result;
}
 
Example 9
Source Project: Bats   Source File: SortTemplate.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void sort(SelectionVector4 vector4, VectorContainer container){
  Stopwatch watch = Stopwatch.createStarted();
  QuickSort qs = new QuickSort();
  qs.sort(this, 0, vector4.getTotalCount());
  logger.debug("Took {} us to sort {} records", watch.elapsed(TimeUnit.MICROSECONDS), vector4.getTotalCount());
}
 
Example 10
Source Project: Bats   Source File: SingleBatchSorterTemplate.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void sort(SelectionVector2 vector2){
  QuickSort qs = new QuickSort();
  Stopwatch watch = Stopwatch.createStarted();
  if (vector2.getCount() > 0) {
    qs.sort(this, 0, vector2.getCount());
  }
  logger.debug("Took {} us to sort {} records", watch.elapsed(TimeUnit.MICROSECONDS), vector2.getCount());
}
 
Example 11
Source Project: dremio-oss   Source File: SingleBatchSorterTemplate.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void sort(SelectionVector2 vector2){
  QuickSort qs = new QuickSort();
  Stopwatch watch = Stopwatch.createStarted();
  if (vector2.getCount() > 0) {
    qs.sort(this, 0, vector2.getCount());
  }
  logger.debug("Took {} us to sort {} records", watch.elapsed(TimeUnit.MICROSECONDS), vector2.getCount());
}
 
Example 12
Source Project: dremio-oss   Source File: SortTest.java    License: Apache License 2.0 5 votes vote down vote up
public long doSort(){
  QuickSort qs = new QuickSort();
  ByteSortable b = new ByteSortable();
  long nano = System.nanoTime();
  qs.sort(b, 0, RECORD_COUNT);
  return System.nanoTime() - nano;
}
 
Example 13
Source Project: tajo   Source File: VectorizedSorter.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Iterable<Tuple> sort() {
  new QuickSort().sort(this, 0, mappings.length);
  return new Iterable<Tuple>() {
    @Override
    public Iterator<Tuple> iterator() {
      return new Iterator<Tuple>() {
        int index;
        public boolean hasNext() { return index < mappings.length; }
        public Tuple next() { return tuples[mappings[index++]]; }
        public void remove() { throw new TajoRuntimeException(new UnsupportedException()); }
      };
    }
  };
}
 
Example 14
Source Project: RDFS   Source File: BasicReducePartition.java    License: Apache License 2.0 5 votes vote down vote up
protected void sortMemBlock(MemoryBlock memBlock) {
  if (memBlock.currentPtr <= 0) {
    return;
  }
  // quick sort the offsets
  OffsetSortable sortableObj = new OffsetSortable(memBlock, kvbuffer);
  QuickSort quickSort = new QuickSort();
  quickSort.sort(sortableObj, 0, memBlock.currentPtr);
}
 
Example 15
Source Project: Bats   Source File: SampleSortTemplate.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public void sort(SelectionVector2 vector2, VectorContainer container){
  QuickSort qs = new QuickSort();
  qs.sort(this, 0, vector2.getCount());
}
 
Example 16
Source Project: Cubert   Source File: LookUpTable.java    License: Apache License 2.0 4 votes vote down vote up
private void buildTable() throws IOException
{
    QuickSort quickSort = new QuickSort();

    long start, end;

    /* Sort the offsets array */
    start = System.currentTimeMillis();
    if (offsetArr.length > 1)
    {
        quickSort.sort(sortable, 0, offsetArr.length);
    }
    end = System.currentTimeMillis();
    print.f("LookUpTable: Sorted %d entries in %d ms", offsetArr.length, (end - start));

    /* Fill in the HashCode array */
    start = System.currentTimeMillis();
    int prevHashCode = -1;
    Tuple prevTuple = newTuple();
    Tuple t = newTuple();

    for (int i = 0; i < offsetArr.length; ++i)
    {
        t = store.getTuple(offsetArr[i], t);
        int hashCode = tupleHashCode(t);
        if (prevHashCode != hashCode)
        {
            hashCodeArr[hashCode] = i;
            prevHashCode = hashCode;
        }

        if (i == 0 || !compareKeys(prevTuple, t))
        {
            offsetArr[i] = offsetArr[i] | SIGNBIT;
        }

        /* Object Reuse: Swap the tuples instead of creating new ones */
        Tuple temp = t;
        t = prevTuple;
        prevTuple = temp;
    }
    end = System.currentTimeMillis();
    print.f("LookUpTable: Created HashCode Array for %d entries in %d ms", offsetArr.length, (end - start));
}
 
Example 17
Source Project: RDFS   Source File: MapTask.java    License: Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
public MapOutputBuffer(TaskUmbilicalProtocol umbilical, JobConf job,
                       TaskReporter reporter
                       ) throws IOException, ClassNotFoundException {
  this.job = job;
  this.reporter = reporter;
  localFs = FileSystem.getLocal(job);
  partitions = job.getNumReduceTasks();
   
  rfs = ((LocalFileSystem)localFs).getRaw();

  indexCacheList = new ArrayList<SpillRecord>();
  
  spillSortCounters = new MapSpillSortCounters(reporter);
  
  //sanity checks
  final float spillper = job.getFloat("io.sort.spill.percent",(float)0.8);
  final float recper = job.getFloat("io.sort.record.percent",(float)0.05);
  boolean localMode = job.get("mapred.job.tracker", "local").equals("local");
  int sortmb = job.getInt("io.sort.mb", 100);
  if (localMode) {
    sortmb = job.getInt("io.sort.mb.localmode", 100);
  }
  if (spillper > (float)1.0 || spillper < (float)0.0) {
    throw new IOException("Invalid \"io.sort.spill.percent\": " + spillper);
  }
  if (recper > (float)1.0 || recper < (float)0.01) {
    throw new IOException("Invalid \"io.sort.record.percent\": " + recper);
  }
  if ((sortmb & 0x7FF) != sortmb) {
    throw new IOException("Invalid \"io.sort.mb\": " + sortmb);
  }
  sorter = ReflectionUtils.newInstance(
        job.getClass("map.sort.class", QuickSort.class, IndexedSorter.class), job);
  LOG.info("io.sort.mb = " + sortmb);
  // buffers and accounting
  int maxMemUsage = sortmb << 20;
  int recordCapacity = (int)(maxMemUsage * recper);
  recordCapacity -= recordCapacity % RECSIZE;
  kvbuffer = new byte[maxMemUsage - recordCapacity];
  bufvoid = kvbuffer.length;
  recordCapacity /= RECSIZE;
  kvoffsets = new int[recordCapacity];
  kvindices = new int[recordCapacity * ACCTSIZE];
  softBufferLimit = (int)(kvbuffer.length * spillper);
  softRecordLimit = (int)(kvoffsets.length * spillper);
  LOG.info("data buffer = " + softBufferLimit + "/" + kvbuffer.length);
  LOG.info("record buffer = " + softRecordLimit + "/" + kvoffsets.length);
  // k/v serialization
  comparator = job.getOutputKeyComparator();
  keyClass = (Class<K>)job.getMapOutputKeyClass();
  valClass = (Class<V>)job.getMapOutputValueClass();
  serializationFactory = new SerializationFactory(job);
  keySerializer = serializationFactory.getSerializer(keyClass);
  keySerializer.open(bb);
  valSerializer = serializationFactory.getSerializer(valClass);
  valSerializer.open(bb);
  // counters
  mapOutputByteCounter = reporter.getCounter(MAP_OUTPUT_BYTES);
  mapOutputRecordCounter = reporter.getCounter(MAP_OUTPUT_RECORDS);
  Counters.Counter combineInputCounter = 
    reporter.getCounter(COMBINE_INPUT_RECORDS);
  combineOutputCounter = reporter.getCounter(COMBINE_OUTPUT_RECORDS);
  // compression
  if (job.getCompressMapOutput()) {
    Class<? extends CompressionCodec> codecClass =
      job.getMapOutputCompressorClass(DefaultCodec.class);
    codec = ReflectionUtils.newInstance(codecClass, job);
  }
  // combiner
  combinerRunner = CombinerRunner.create(job, getTaskID(), 
                                         combineInputCounter,
                                         reporter, null);
  if (combinerRunner != null) {
    combineCollector= new CombineOutputCollector<K,V>(combineOutputCounter);
  } else {
    combineCollector = null;
  }
  minSpillsForCombine = job.getInt("min.num.spills.for.combine", 3);
  spillThread.setDaemon(true);
  spillThread.setName("SpillThread");
  spillLock.lock();
  try {
    spillThread.start();
    while (!spillThreadRunning) {
      spillDone.await();
    }
  } catch (InterruptedException e) {
    throw (IOException)new IOException("Spill thread failed to initialize"
        ).initCause(sortSpillException);
  } finally {
    spillLock.unlock();
  }
  if (sortSpillException != null) {
    throw (IOException)new IOException("Spill thread failed to initialize"
        ).initCause(sortSpillException);
  }
}
 
Example 18
Source Project: incubator-tez   Source File: ExternalSorter.java    License: Apache License 2.0 4 votes vote down vote up
public ExternalSorter(TezOutputContext outputContext, Configuration conf, int numOutputs,
    long initialMemoryAvailable) throws IOException {
  this.outputContext = outputContext;
  this.conf = conf;
  this.partitions = numOutputs;

  rfs = ((LocalFileSystem)FileSystem.getLocal(this.conf)).getRaw();

  int assignedMb = (int) (initialMemoryAvailable >> 20);
  if (assignedMb <= 0) {
    if (initialMemoryAvailable > 0) { // Rounded down to 0MB - may be > 0 && < 1MB
      this.availableMemoryMb = 1;
      LOG.warn("initialAvailableMemory: " + initialMemoryAvailable
          + " is too low. Rounding to 1 MB");
    } else {
      throw new RuntimeException("InitialMemoryAssigned is <= 0: " + initialMemoryAvailable);
    }
  } else {
    this.availableMemoryMb = assignedMb;
  }

  // sorter
  sorter = ReflectionUtils.newInstance(this.conf.getClass(
      TezJobConfig.TEZ_RUNTIME_INTERNAL_SORTER_CLASS, QuickSort.class,
      IndexedSorter.class), this.conf);

  comparator = ConfigUtils.getIntermediateOutputKeyComparator(this.conf);

  // k/v serialization
  keyClass = ConfigUtils.getIntermediateOutputKeyClass(this.conf);
  valClass = ConfigUtils.getIntermediateOutputValueClass(this.conf);
  serializationFactory = new SerializationFactory(this.conf);
  keySerializer = serializationFactory.getSerializer(keyClass);
  valSerializer = serializationFactory.getSerializer(valClass);

  //    counters    
  mapOutputByteCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES);
  mapOutputRecordCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_RECORDS);
  outputBytesWithOverheadCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
  fileOutputByteCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);
  spilledRecordsCounter = outputContext.getCounters().findCounter(TaskCounter.SPILLED_RECORDS);
  additionalSpillBytesWritten = outputContext.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN);
  additionalSpillBytesRead = outputContext.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ);
  numAdditionalSpills = outputContext.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT);

  // compression
  if (ConfigUtils.shouldCompressIntermediateOutput(this.conf)) {
    Class<? extends CompressionCodec> codecClass =
        ConfigUtils.getIntermediateOutputCompressorClass(this.conf, DefaultCodec.class);
    codec = ReflectionUtils.newInstance(codecClass, this.conf);
  } else {
    codec = null;
  }

  this.ifileReadAhead = this.conf.getBoolean(
      TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD,
      TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_DEFAULT);
  if (this.ifileReadAhead) {
    this.ifileReadAheadLength = conf.getInt(
        TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES,
        TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES_DEFAULT);
  } else {
    this.ifileReadAheadLength = 0;
  }
  this.ifileBufferSize = conf.getInt("io.file.buffer.size",
      TezJobConfig.TEZ_RUNTIME_IFILE_BUFFER_SIZE_DEFAULT);

  
  // Task outputs
  mapOutputFile = TezRuntimeUtils.instantiateTaskOutputManager(conf, outputContext);
  
  LOG.info("Instantiating Partitioner: [" + conf.get(TezJobConfig.TEZ_RUNTIME_PARTITIONER_CLASS) + "]");
  this.conf.setInt(TezRuntimeFrameworkConfigs.TEZ_RUNTIME_NUM_EXPECTED_PARTITIONS, this.partitions);
  this.partitioner = TezRuntimeUtils.instantiatePartitioner(this.conf);
  this.combiner = TezRuntimeUtils.instantiateCombiner(this.conf, outputContext);
}
 
Example 19
Source Project: hadoop-gpu   Source File: MapTask.java    License: Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
public MapOutputBuffer(TaskUmbilicalProtocol umbilical, JobConf job,
                       TaskReporter reporter
                       ) throws IOException, ClassNotFoundException {
  this.job = job;
  this.reporter = reporter;
  localFs = FileSystem.getLocal(job);
  partitions = job.getNumReduceTasks();
   
  rfs = ((LocalFileSystem)localFs).getRaw();

  indexCacheList = new ArrayList<SpillRecord>();
  
  //sanity checks
  final float spillper = job.getFloat("io.sort.spill.percent",(float)0.8);
  final float recper = job.getFloat("io.sort.record.percent",(float)0.05);
  final int sortmb = job.getInt("io.sort.mb", 100);
  if (spillper > (float)1.0 || spillper < (float)0.0) {
    throw new IOException("Invalid \"io.sort.spill.percent\": " + spillper);
  }
  if (recper > (float)1.0 || recper < (float)0.01) {
    throw new IOException("Invalid \"io.sort.record.percent\": " + recper);
  }
  if ((sortmb & 0x7FF) != sortmb) {
    throw new IOException("Invalid \"io.sort.mb\": " + sortmb);
  }
  sorter = ReflectionUtils.newInstance(
        job.getClass("map.sort.class", QuickSort.class, IndexedSorter.class), job);
  LOG.info("io.sort.mb = " + sortmb);
  // buffers and accounting
  int maxMemUsage = sortmb << 20;
  int recordCapacity = (int)(maxMemUsage * recper);
  recordCapacity -= recordCapacity % RECSIZE;
  kvbuffer = new byte[maxMemUsage - recordCapacity];
  bufvoid = kvbuffer.length;
  recordCapacity /= RECSIZE;
  kvoffsets = new int[recordCapacity];
  kvindices = new int[recordCapacity * ACCTSIZE];
  softBufferLimit = (int)(kvbuffer.length * spillper);
  softRecordLimit = (int)(kvoffsets.length * spillper);
  LOG.info("data buffer = " + softBufferLimit + "/" + kvbuffer.length);
  LOG.info("record buffer = " + softRecordLimit + "/" + kvoffsets.length);
  // k/v serialization
  comparator = job.getOutputKeyComparator();
  keyClass = (Class<K>)job.getMapOutputKeyClass();
  valClass = (Class<V>)job.getMapOutputValueClass();
  serializationFactory = new SerializationFactory(job);
  keySerializer = serializationFactory.getSerializer(keyClass);
  keySerializer.open(bb);
  valSerializer = serializationFactory.getSerializer(valClass);
  valSerializer.open(bb);
  // counters
  mapOutputByteCounter = reporter.getCounter(MAP_OUTPUT_BYTES);
  mapOutputRecordCounter = reporter.getCounter(MAP_OUTPUT_RECORDS);
  Counters.Counter combineInputCounter = 
    reporter.getCounter(COMBINE_INPUT_RECORDS);
  combineOutputCounter = reporter.getCounter(COMBINE_OUTPUT_RECORDS);
  // compression
  if (job.getCompressMapOutput()) {
    Class<? extends CompressionCodec> codecClass =
      job.getMapOutputCompressorClass(DefaultCodec.class);
    codec = ReflectionUtils.newInstance(codecClass, job);
  }
  // combiner
  combinerRunner = CombinerRunner.create(job, getTaskID(), 
                                         combineInputCounter,
                                         reporter, null);
  if (combinerRunner != null) {
    combineCollector= new CombineOutputCollector<K,V>(combineOutputCounter);
  } else {
    combineCollector = null;
  }
  minSpillsForCombine = job.getInt("min.num.spills.for.combine", 3);
  spillThread.setDaemon(true);
  spillThread.setName("SpillThread");
  spillLock.lock();
  try {
    spillThread.start();
    while (!spillThreadRunning) {
      spillDone.await();
    }
  } catch (InterruptedException e) {
    throw (IOException)new IOException("Spill thread failed to initialize"
        ).initCause(sortSpillException);
  } finally {
    spillLock.unlock();
  }
  if (sortSpillException != null) {
    throw (IOException)new IOException("Spill thread failed to initialize"
        ).initCause(sortSpillException);
  }
}