* Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *     http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * See the License for the specific language governing permissions and
 * limitations under the License.
package org.apache.hadoop.mapred;

import java.io.DataOutputStream;
import java.io.IOException;

import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.OutputBuffer;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.SequenceFile.ValueBytes;
import org.apache.hadoop.util.Progress;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.io.SequenceFile.Sorter.RawKeyValueIterator;
import org.apache.hadoop.util.Progressable;

/** This class implements the sort interface using primitive int arrays as 
 * the data structures (that is why this class is called 'BasicType'SorterBase)
abstract class BasicTypeSorterBase implements BufferSorter {
  protected OutputBuffer keyValBuffer; //the buffer used for storing
  protected int[] startOffsets; //the array used to store the start offsets of
                                //keys in keyValBuffer
  protected int[] keyLengths; //the array used to store the lengths of
  protected int[] valueLengths; //the array used to store the value lengths 
  protected int[] pointers; //the array of startOffsets's indices. This will
                            //be sorted at the end to contain a sorted array of
                            //indices to offsets
  protected RawComparator comparator; //the comparator for the map output
  protected int count; //the number of key/values
  //the overhead of the arrays in memory 
  //12 => 4 for keyoffsets, 4 for keylengths, 4 for valueLengths, and
  //4 for indices into startOffsets array in the
  //pointers array (ignored the partpointers list itself)
  static private final int BUFFERED_KEY_VAL_OVERHEAD = 16;
  static private final int INITIAL_ARRAY_SIZE = 5;
  //we maintain the max lengths of the key/val that we encounter.  During 
  //iteration of the sorted results, we will create a DataOutputBuffer to
  //return the keys. The max size of the DataOutputBuffer will be the max
  //keylength that we encounter. Expose this value to model memory more
  private int maxKeyLength = 0;
  private int maxValLength = 0;

  //Reference to the Progressable object for sending KeepAlive
  protected Progressable reporter;

  //Implementation of methods of the SorterBase interface
  public void configure(JobConf conf) {
    comparator = conf.getOutputKeyComparator();
  public void setProgressable(Progressable reporter) {
    this.reporter = reporter;  

  public void addKeyValue(int recordOffset, int keyLength, int valLength) {
    //Add the start offset of the key in the startOffsets array and the
    //length in the keyLengths array.
    if (startOffsets == null || count == startOffsets.length)
    startOffsets[count] = recordOffset;
    keyLengths[count] = keyLength;
    if (keyLength > maxKeyLength) {
      maxKeyLength = keyLength;
    if (valLength > maxValLength) {
      maxValLength = valLength;
    valueLengths[count] = valLength;
    pointers[count] = count;

  public void setInputBuffer(OutputBuffer buffer) {
    //store a reference to the keyValBuffer that we need to read during sort
    this.keyValBuffer = buffer;

  public long getMemoryUtilized() {
    //the total length of the arrays + the max{Key,Val}Length (this will be the 
    //max size of the DataOutputBuffers during the iteration of the sorted
    if (startOffsets != null) {
      return (startOffsets.length) * BUFFERED_KEY_VAL_OVERHEAD + 
              maxKeyLength + maxValLength;
    else { //nothing from this yet
      return 0;

  public abstract RawKeyValueIterator sort();
  public void close() {
    //set count to 0; also, we don't reuse the arrays since we want to maintain
    //consistency in the memory model
    count = 0;
    startOffsets = null;
    keyLengths = null;
    valueLengths = null;
    pointers = null;
    maxKeyLength = 0;
    maxValLength = 0;
    //release the large key-value buffer so that the GC, if necessary,
    //can collect it away
    keyValBuffer = null;
  private void grow() {
    int currLength = 0;
    if (startOffsets != null) {
      currLength = startOffsets.length;
    int newLength = (int)(currLength * 1.1) + 1;
    startOffsets = grow(startOffsets, newLength);
    keyLengths = grow(keyLengths, newLength);
    valueLengths = grow(valueLengths, newLength);
    pointers = grow(pointers, newLength);
  private int[] grow(int[] old, int newLength) {
    int[] result = new int[newLength];
    if(old != null) { 
      System.arraycopy(old, 0, result, 0, old.length);
    return result;
} //BasicTypeSorterBase

//Implementation of methods of the RawKeyValueIterator interface. These
//methods must be invoked to iterate over key/vals after sort is done.
class MRSortResultIterator implements RawKeyValueIterator {
  private int count;
  private int[] pointers;
  private int[] startOffsets;
  private int[] keyLengths;
  private int[] valLengths;
  private int currStartOffsetIndex;
  private int currIndexInPointers;
  private OutputBuffer keyValBuffer;
  private DataOutputBuffer key = new DataOutputBuffer();
  private InMemUncompressedBytes value = new InMemUncompressedBytes();
  public MRSortResultIterator(OutputBuffer keyValBuffer, 
                              int []pointers, int []startOffsets,
                              int []keyLengths, int []valLengths) {
    this.count = pointers.length;
    this.pointers = pointers;
    this.startOffsets = startOffsets;
    this.keyLengths = keyLengths;
    this.valLengths = valLengths;
    this.keyValBuffer = keyValBuffer;
  public Progress getProgress() {
    return null;
  public DataOutputBuffer getKey() throws IOException {
    int currKeyOffset = startOffsets[currStartOffsetIndex];
    int currKeyLength = keyLengths[currStartOffsetIndex];
    //reuse the same key
    key.write(keyValBuffer.getData(), currKeyOffset, currKeyLength);
    return key;

  public ValueBytes getValue() throws IO