org.apache.hadoop.mapred.IFile.Writer Java Examples

The following examples show how to use org.apache.hadoop.mapred.IFile.Writer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestCombineOutputCollector.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Test
public void testCustomCollect() throws Throwable {
  //mock creation
  TaskReporter mockTaskReporter = mock(TaskReporter.class);

  @SuppressWarnings("unchecked")
  Writer<String, Integer> mockWriter = mock(Writer.class);

  Configuration conf = new Configuration();
  conf.set(MRJobConfig.COMBINE_RECORDS_BEFORE_PROGRESS, "2");
  
  coc = new CombineOutputCollector<String, Integer>(outCounter, mockTaskReporter, conf);
  coc.setWriter(mockWriter);
  verify(mockTaskReporter, never()).progress();

  coc.collect("dummy", 1);
  verify(mockTaskReporter, never()).progress();
  
  coc.collect("dummy", 2);
  verify(mockTaskReporter, times(1)).progress();
}
 
Example #2
Source File: TestCombineOutputCollector.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Test
public void testDefaultCollect() throws Throwable {
  //mock creation
  TaskReporter mockTaskReporter = mock(TaskReporter.class);

  @SuppressWarnings("unchecked")
  Writer<String, Integer> mockWriter = mock(Writer.class);

  Configuration conf = new Configuration();
  
  coc = new CombineOutputCollector<String, Integer>(outCounter, mockTaskReporter, conf);
  coc.setWriter(mockWriter);
  verify(mockTaskReporter, never()).progress();

  for(int i = 0; i < Task.DEFAULT_COMBINE_RECORDS_BEFORE_PROGRESS; i++) {
  	coc.collect("dummy", i);
  }
  verify(mockTaskReporter, times(1)).progress();
  for(int i = 0; i < Task.DEFAULT_COMBINE_RECORDS_BEFORE_PROGRESS; i++) {
  	coc.collect("dummy", i);
  }
  verify(mockTaskReporter, times(2)).progress();
}
 
Example #3
Source File: TestCombineOutputCollector.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Test
public void testDefaultCollect() throws Throwable {
  //mock creation
  TaskReporter mockTaskReporter = mock(TaskReporter.class);

  @SuppressWarnings("unchecked")
  Writer<String, Integer> mockWriter = mock(Writer.class);

  Configuration conf = new Configuration();
  
  coc = new CombineOutputCollector<String, Integer>(outCounter, mockTaskReporter, conf);
  coc.setWriter(mockWriter);
  verify(mockTaskReporter, never()).progress();

  for(int i = 0; i < Task.DEFAULT_COMBINE_RECORDS_BEFORE_PROGRESS; i++) {
  	coc.collect("dummy", i);
  }
  verify(mockTaskReporter, times(1)).progress();
  for(int i = 0; i < Task.DEFAULT_COMBINE_RECORDS_BEFORE_PROGRESS; i++) {
  	coc.collect("dummy", i);
  }
  verify(mockTaskReporter, times(2)).progress();
}
 
Example #4
Source File: TestCombineOutputCollector.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Test
public void testCustomCollect() throws Throwable {
  //mock creation
  TaskReporter mockTaskReporter = mock(TaskReporter.class);

  @SuppressWarnings("unchecked")
  Writer<String, Integer> mockWriter = mock(Writer.class);

  Configuration conf = new Configuration();
  conf.set(MRJobConfig.COMBINE_RECORDS_BEFORE_PROGRESS, "2");
  
  coc = new CombineOutputCollector<String, Integer>(outCounter, mockTaskReporter, conf);
  coc.setWriter(mockWriter);
  verify(mockTaskReporter, never()).progress();

  coc.collect("dummy", 1);
  verify(mockTaskReporter, never()).progress();
  
  coc.collect("dummy", 2);
  verify(mockTaskReporter, times(1)).progress();
}
 
Example #5
Source File: MergeManagerImpl.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Override
public void merge(List<InMemoryMapOutput<K, V>> inputs) throws IOException {
  if (inputs == null || inputs.size() == 0) {
    return;
  }

  TaskAttemptID dummyMapId = inputs.get(0).getMapId(); 
  List<Segment<K, V>> inMemorySegments = new ArrayList<Segment<K, V>>();
  long mergeOutputSize = 
    createInMemorySegments(inputs, inMemorySegments, 0);
  int noInMemorySegments = inMemorySegments.size();
  
  InMemoryMapOutput<K, V> mergedMapOutputs = 
    unconditionalReserve(dummyMapId, mergeOutputSize, false);
  
  Writer<K, V> writer = 
    new InMemoryWriter<K, V>(mergedMapOutputs.getArrayStream());
  
  LOG.info("Initiating Memory-to-Memory merge with " + noInMemorySegments +
           " segments of total-size: " + mergeOutputSize);

  RawKeyValueIterator rIter = 
    Merger.merge(jobConf, rfs,
                 (Class<K>)jobConf.getMapOutputKeyClass(),
                 (Class<V>)jobConf.getMapOutputValueClass(),
                 inMemorySegments, inMemorySegments.size(),
                 new Path(reduceId.toString()),
                 (RawComparator<K>)jobConf.getOutputKeyComparator(),
                 reporter, null, null, null);
  Merger.writeFile(rIter, writer, reporter, jobConf);
  writer.close();

  LOG.info(reduceId +  
           " Memory-to-Memory merge of the " + noInMemorySegments +
           " files in-memory complete.");

  // Note the output of the merge
  closeInMemoryMergedFile(mergedMapOutputs);
}
 
Example #6
Source File: BackupStore.java    From big-c with Apache License 2.0 5 votes vote down vote up
private Writer<K,V> createSpillFile() throws IOException {
  Path tmp =
      new Path(MRJobConfig.OUTPUT + "/backup_" + tid.getId() + "_"
          + (spillNumber++) + ".out");

  LOG.info("Created file: " + tmp);

  file = lDirAlloc.getLocalPathForWrite(tmp.toUri().getPath(), 
      -1, conf);
  FSDataOutputStream out = fs.create(file);
  out = CryptoUtils.wrapIfNecessary(conf, out);
  return new Writer<K, V>(conf, out, null, null, null, null, true);
}
 
Example #7
Source File: Merger.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public static <K extends Object, V extends Object>
  void writeFile(RawKeyValueIterator records, Writer<K, V> writer, 
                 Progressable progressable, Configuration conf) 
  throws IOException {
    long progressBar = conf.getLong("mapred.merge.recordsBeforeProgress",
        10000);
    long recordCtr = 0;
    while(records.next()) {
      writer.append(records.getKey(), records.getValue());
      
      if (((recordCtr++) % progressBar) == 0) {
        progressable.progress();
      }
    }
}
 
Example #8
Source File: Merger.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public static <K extends Object, V extends Object>
  void writeFile(RawKeyValueIterator records, Writer<K, V> writer, 
                 Progressable progressable, Configuration conf) 
  throws IOException {
    long progressBar = conf.getLong(JobContext.RECORDS_BEFORE_PROGRESS,
        10000);
    long recordCtr = 0;
    while(records.next()) {
      writer.append(records.getKey(), records.getValue());
      
      if (((recordCtr++) % progressBar) == 0) {
        progressable.progress();
      }
    }
}
 
Example #9
Source File: BackupStore.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private Writer<K,V> createSpillFile() throws IOException {
  Path tmp =
      new Path(MRJobConfig.OUTPUT + "/backup_" + tid.getId() + "_"
          + (spillNumber++) + ".out");

  LOG.info("Created file: " + tmp);

  file = lDirAlloc.getLocalPathForWrite(tmp.toUri().getPath(), 
      -1, conf);
  FSDataOutputStream out = fs.create(file);
  out = CryptoUtils.wrapIfNecessary(conf, out);
  return new Writer<K, V>(conf, out, null, null, null, null, true);
}
 
Example #10
Source File: Merger.java    From big-c with Apache License 2.0 5 votes vote down vote up
public static <K extends Object, V extends Object>
  void writeFile(RawKeyValueIterator records, Writer<K, V> writer, 
                 Progressable progressable, Configuration conf) 
  throws IOException {
    long progressBar = conf.getLong(JobContext.RECORDS_BEFORE_PROGRESS,
        10000);
    long recordCtr = 0;
    while(records.next()) {
      writer.append(records.getKey(), records.getValue());
      
      if (((recordCtr++) % progressBar) == 0) {
        progressable.progress();
      }
    }
}
 
Example #11
Source File: ReducePartition.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public IndexRecord spill(JobConf job, FSDataOutputStream out,
    Class<K> keyClass, Class<V> valClass, CompressionCodec codec,
    Counter spillCounter) throws IOException {
  IFile.Writer<K, V> writer = null;
  IndexRecord rec = new IndexRecord();
  long segmentStart = out.getPos();
  try {
    writer = new Writer<K, V>(job, out, keyClass, valClass, codec,
        spillCounter);
    // spill directly
    KeyValueSpillIterator kvSortedArray = this.getKeyValueSpillIterator();
    MemoryBlockIndex memBlkIdx = kvSortedArray.next();
    while (memBlkIdx != null) {
      int pos = memBlkIdx.getIndex();
      MemoryBlock memBlk = memBlkIdx.getMemoryBlock();
      writer.append(kvbuffer, memBlk.offsets[pos], memBlk.keyLenArray[pos],
          memBlk.valueLenArray[pos]);
      memBlkIdx = kvSortedArray.next();
    }
  } finally {
    // close the writer
    if (null != writer)
      writer.close();
  }
  rec.startOffset = segmentStart;
  rec.rawLength = writer.getRawLength();
  rec.partLength = writer.getCompressedLength();
  writer = null;
  return rec;
}
 
Example #12
Source File: MergeManagerImpl.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Override
public void merge(List<InMemoryMapOutput<K, V>> inputs) throws IOException {
  if (inputs == null || inputs.size() == 0) {
    return;
  }

  TaskAttemptID dummyMapId = inputs.get(0).getMapId(); 
  List<Segment<K, V>> inMemorySegments = new ArrayList<Segment<K, V>>();
  long mergeOutputSize = 
    createInMemorySegments(inputs, inMemorySegments, 0);
  int noInMemorySegments = inMemorySegments.size();
  
  InMemoryMapOutput<K, V> mergedMapOutputs = 
    unconditionalReserve(dummyMapId, mergeOutputSize, false);
  
  Writer<K, V> writer = 
    new InMemoryWriter<K, V>(mergedMapOutputs.getArrayStream());
  
  LOG.info("Initiating Memory-to-Memory merge with " + noInMemorySegments +
           " segments of total-size: " + mergeOutputSize);

  RawKeyValueIterator rIter = 
    Merger.merge(jobConf, rfs,
                 (Class<K>)jobConf.getMapOutputKeyClass(),
                 (Class<V>)jobConf.getMapOutputValueClass(),
                 inMemorySegments, inMemorySegments.size(),
                 new Path(reduceId.toString()),
                 (RawComparator<K>)jobConf.getOutputKeyComparator(),
                 reporter, null, null, null);
  Merger.writeFile(rIter, writer, reporter, jobConf);
  writer.close();

  LOG.info(reduceId +  
           " Memory-to-Memory merge of the " + noInMemorySegments +
           " files in-memory complete.");

  // Note the output of the merge
  closeInMemoryMergedFile(mergedMapOutputs);
}
 
Example #13
Source File: Merger.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public static <K extends Object, V extends Object>
  void writeFile(RawKeyValueIterator records, Writer<K, V> writer, 
                 Progressable progressable, Configuration conf) 
  throws IOException {
    long progressBar = conf.getLong("mapred.merge.recordsBeforeProgress",
        10000);
    long recordCtr = 0;
    while(records.next()) {
      writer.append(records.getKey(), records.getValue());
      
      if (((recordCtr++) % progressBar) == 0) {
        progressable.progress();
      }
    }
}
 
Example #14
Source File: BlockMapOutputBuffer.java    From RDFS with Apache License 2.0 4 votes vote down vote up
public void spillSingleRecord(K key, V value, int part)
    throws IOException {

  ProcResourceValues spillStartProcVals =
      task.getCurrentProcResourceValues();
  long spillStartMilli = System.currentTimeMillis();
  // spill
  FSDataOutputStream out = null;
  long spillBytes = 0;
  try {
    // create spill file
    final SpillRecord spillRec = new SpillRecord(partitions);
    final Path filename =
        task.mapOutputFile.getSpillFileForWrite(getTaskID(),
            numSpills, key.getLength() + value.getLength());
    out = rfs.create(filename);
    IndexRecord rec = new IndexRecord();
    for (int i = 0; i < partitions; ++i) {
      IFile.Writer<K, V> writer = null;
      try {
        long segmentStart = out.getPos();
        // Create a new codec, don't care!
        writer =
            new IFile.Writer<K, V>(job, out, keyClass, valClass,
                codec, task.spilledRecordsCounter);
        if (i == part) {
          final long recordStart = out.getPos();
          writer.append(key, value);
          // Note that our map byte count will not be accurate with
          // compression
          mapOutputByteCounter
              .increment(out.getPos() - recordStart);
        }
        writer.close();

        // record offsets
        rec.startOffset = segmentStart;
        rec.rawLength = writer.getRawLength();
        rec.partLength = writer.getCompressedLength();
        spillBytes += writer.getCompressedLength();
        spillRec.putIndex(rec, i);
        writer = null;
      } catch (IOException e) {
        if (null != writer)
          writer.close();
        throw e;
      }
    }

    if (totalIndexCacheMemory >= INDEX_CACHE_MEMORY_LIMIT) {
      // create spill index file
      Path indexFilename =
          task.mapOutputFile.getSpillIndexFileForWrite(getTaskID(),
              numSpills, partitions
                  * MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH);
      spillRec.writeToFile(indexFilename, job);
    } else {
      indexCacheList.add(spillRec);
      totalIndexCacheMemory +=
          spillRec.size() * MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH;
    }
    
    LOG.info("Finished spill big record " + numBigRecordsSpills);
    ++numBigRecordsSpills;
    ++numSpills;
  } finally {
    if (out != null)
      out.close();
  }

  long spillEndMilli = System.currentTimeMillis();
  ProcResourceValues spillEndProcVals =
      task.getCurrentProcResourceValues();
  mapSpillSortCounter.incCountersPerSpill(spillStartProcVals,
      spillEndProcVals, spillEndMilli - spillStartMilli, spillBytes);
  mapSpillSortCounter.incSpillSingleRecord();
}
 
Example #15
Source File: MergeManagerImpl.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Override
public void merge(List<InMemoryMapOutput<K,V>> inputs) throws IOException {
  if (inputs == null || inputs.size() == 0) {
    return;
  }
  
  //name this output file same as the name of the first file that is 
  //there in the current list of inmem files (this is guaranteed to
  //be absent on the disk currently. So we don't overwrite a prev. 
  //created spill). Also we need to create the output file now since
  //it is not guaranteed that this file will be present after merge
  //is called (we delete empty files as soon as we see them
  //in the merge method)

  //figure out the mapId 
  TaskAttemptID mapId = inputs.get(0).getMapId();
  TaskID mapTaskId = mapId.getTaskID();

  List<Segment<K, V>> inMemorySegments = new ArrayList<Segment<K, V>>();
  long mergeOutputSize = 
    createInMemorySegments(inputs, inMemorySegments,0);
  int noInMemorySegments = inMemorySegments.size();

  Path outputPath = 
    mapOutputFile.getInputFileForWrite(mapTaskId,
                                       mergeOutputSize).suffix(
                                           Task.MERGED_OUTPUT_PREFIX);

  FSDataOutputStream out = CryptoUtils.wrapIfNecessary(jobConf, rfs.create(outputPath));
  Writer<K, V> writer = new Writer<K, V>(jobConf, out,
      (Class<K>) jobConf.getMapOutputKeyClass(),
      (Class<V>) jobConf.getMapOutputValueClass(), codec, null, true);

  RawKeyValueIterator rIter = null;
  CompressAwarePath compressAwarePath;
  try {
    LOG.info("Initiating in-memory merge with " + noInMemorySegments + 
             " segments...");
    
    rIter = Merger.merge(jobConf, rfs,
                         (Class<K>)jobConf.getMapOutputKeyClass(),
                         (Class<V>)jobConf.getMapOutputValueClass(),
                         inMemorySegments, inMemorySegments.size(),
                         new Path(reduceId.toString()),
                         (RawComparator<K>)jobConf.getOutputKeyComparator(),
                         reporter, spilledRecordsCounter, null, null);
    
    if (null == combinerClass) {
      Merger.writeFile(rIter, writer, reporter, jobConf);
    } else {
      combineCollector.setWriter(writer);
      combineAndSpill(rIter, reduceCombineInputCounter);
    }
    writer.close();
    compressAwarePath = new CompressAwarePath(outputPath,
        writer.getRawLength(), writer.getCompressedLength());

    LOG.info(reduceId +  
        " Merge of the " + noInMemorySegments +
        " files in-memory complete." +
        " Local file is " + outputPath + " of size " + 
        localFS.getFileStatus(outputPath).getLen());
  } catch (IOException e) { 
    //make sure that we delete the ondisk file that we created 
    //earlier when we invoked cloneFileAttributes
    localFS.delete(outputPath, true);
    throw e;
  }

  // Note the output of the merge
  closeOnDiskFile(compressAwarePath);
}
 
Example #16
Source File: MergeManagerImpl.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Override
public void merge(List<CompressAwarePath> inputs) throws IOException {
  // sanity check
  if (inputs == null || inputs.isEmpty()) {
    LOG.info("No ondisk files to merge...");
    return;
  }
  
  long approxOutputSize = 0;
  int bytesPerSum = 
    jobConf.getInt("io.bytes.per.checksum", 512);
  
  LOG.info("OnDiskMerger: We have  " + inputs.size() + 
           " map outputs on disk. Triggering merge...");
  
  // 1. Prepare the list of files to be merged. 
  for (CompressAwarePath file : inputs) {
    approxOutputSize += localFS.getFileStatus(file).getLen();
  }

  // add the checksum length
  approxOutputSize += 
    ChecksumFileSystem.getChecksumLength(approxOutputSize, bytesPerSum);

  // 2. Start the on-disk merge process
  Path outputPath = 
    localDirAllocator.getLocalPathForWrite(inputs.get(0).toString(), 
        approxOutputSize, jobConf).suffix(Task.MERGED_OUTPUT_PREFIX);

  FSDataOutputStream out = CryptoUtils.wrapIfNecessary(jobConf, rfs.create(outputPath));
  Writer<K, V> writer = new Writer<K, V>(jobConf, out,
      (Class<K>) jobConf.getMapOutputKeyClass(),
      (Class<V>) jobConf.getMapOutputValueClass(), codec, null, true);

  RawKeyValueIterator iter  = null;
  CompressAwarePath compressAwarePath;
  Path tmpDir = new Path(reduceId.toString());
  try {
    iter = Merger.merge(jobConf, rfs,
                        (Class<K>) jobConf.getMapOutputKeyClass(),
                        (Class<V>) jobConf.getMapOutputValueClass(),
                        codec, inputs.toArray(new Path[inputs.size()]), 
                        true, ioSortFactor, tmpDir, 
                        (RawComparator<K>) jobConf.getOutputKeyComparator(), 
                        reporter, spilledRecordsCounter, null, 
                        mergedMapOutputsCounter, null);

    Merger.writeFile(iter, writer, reporter, jobConf);
    writer.close();
    compressAwarePath = new CompressAwarePath(outputPath,
        writer.getRawLength(), writer.getCompressedLength());
  } catch (IOException e) {
    localFS.delete(outputPath, true);
    throw e;
  }

  closeOnDiskFile(compressAwarePath);

  LOG.info(reduceId +
      " Finished merging " + inputs.size() + 
      " map output files on disk of total-size " + 
      approxOutputSize + "." + 
      " Local output file is " + outputPath + " of size " +
      localFS.getFileStatus(outputPath).getLen());
}
 
Example #17
Source File: Task.java    From RDFS with Apache License 2.0 4 votes vote down vote up
public synchronized void setWriter(Writer<K, V> writer) {
  this.writer = writer;
}
 
Example #18
Source File: MapTask.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/**
 * Handles the degenerate case where serialization fails to fit in
 * the in-memory buffer, so we must spill the record from collect
 * directly to a spill file. Consider this "losing".
 */
private void spillSingleRecord(final K key, final V value,
                               int partition) throws IOException {
  long size = kvbuffer.length + partitions * APPROX_HEADER_LENGTH;
  FSDataOutputStream out = null;
  try {
    
    long spillStartMilli = System.currentTimeMillis();
    ProcResourceValues spillStartProcVals = getCurrentProcResourceValues();
    long spillBytes = 0;
    
    // create spill file
    final SpillRecord spillRec = new SpillRecord(partitions);
    final Path filename = mapOutputFile.getSpillFileForWrite(getTaskID(),
        numSpills, size);
    out = rfs.create(filename);
    
    // we don't run the combiner for a single record
    IndexRecord rec = new IndexRecord();
    for (int i = 0; i < partitions; ++i) {
      IFile.Writer<K, V> writer = null;
      try {
        long segmentStart = out.getPos();
        // Create a new codec, don't care!
        writer = new IFile.Writer<K,V>(job, out, keyClass, valClass, codec,
                                        spilledRecordsCounter);

        if (i == partition) {
          final long recordStart = out.getPos();
          writer.append(key, value);
          // Note that our map byte count will not be accurate with
          // compression
          mapOutputByteCounter.increment(out.getPos() - recordStart);
        }
        writer.close();

        // record offsets
        rec.startOffset = segmentStart;
        rec.rawLength = writer.getRawLength();
        rec.partLength = writer.getCompressedLength();
        spillBytes += writer.getCompressedLength();
        spillRec.putIndex(rec, i);

        writer = null;
      } catch (IOException e) {
        if (null != writer) writer.close();
        throw e;
      }
    }
    if (totalIndexCacheMemory >= INDEX_CACHE_MEMORY_LIMIT) {
      // create spill index file
      Path indexFilename = mapOutputFile.getSpillIndexFileForWrite(
          getTaskID(), numSpills,
          partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH);
      spillRec.writeToFile(indexFilename, job);
    } else {
      indexCacheList.add(spillRec);
      totalIndexCacheMemory +=
        spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
    }
    
    long spillEndMilli = System.currentTimeMillis();
    ProcResourceValues spillEndProcVals = getCurrentProcResourceValues();        
    spillSortCounters.incCountersPerSpill(spillStartProcVals,
        spillEndProcVals, spillEndMilli - spillStartMilli, spillBytes);
    ++numSpills;
  } finally {
    if (out != null) out.close();
  }
}
 
Example #19
Source File: MapTask.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
/**
 * Handles the degenerate case where serialization fails to fit in
 * the in-memory buffer, so we must spill the record from collect
 * directly to a spill file. Consider this "losing".
 */
private void spillSingleRecord(final K key, final V value,
                               int partition) throws IOException {
  long size = kvbuffer.length + partitions * APPROX_HEADER_LENGTH;
  FSDataOutputStream out = null;
  try {
    // create spill file
    final SpillRecord spillRec = new SpillRecord(partitions);
    final Path filename = mapOutputFile.getSpillFileForWrite(getTaskID(),
        numSpills, size);
    out = rfs.create(filename);
    
    // we don't run the combiner for a single record
    IndexRecord rec = new IndexRecord();
    for (int i = 0; i < partitions; ++i) {
      IFile.Writer<K, V> writer = null;
      try {
        long segmentStart = out.getPos();
        // Create a new codec, don't care!
        writer = new IFile.Writer<K,V>(job, out, keyClass, valClass, codec,
                                        spilledRecordsCounter);

        if (i == partition) {
          final long recordStart = out.getPos();
          writer.append(key, value);
          // Note that our map byte count will not be accurate with
          // compression
          mapOutputByteCounter.increment(out.getPos() - recordStart);
        }
        writer.close();

        // record offsets
        rec.startOffset = segmentStart;
        rec.rawLength = writer.getRawLength();
        rec.partLength = writer.getCompressedLength();
        spillRec.putIndex(rec, i);

        writer = null;
      } catch (IOException e) {
        if (null != writer) writer.close();
        throw e;
      }
    }
    if (totalIndexCacheMemory >= INDEX_CACHE_MEMORY_LIMIT) {
      // create spill index file
      Path indexFilename = mapOutputFile.getSpillIndexFileForWrite(
          getTaskID(), numSpills,
          partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH);
      spillRec.writeToFile(indexFilename, job);
    } else {
      indexCacheList.add(spillRec);
      totalIndexCacheMemory +=
        spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
    }
    ++numSpills;
  } finally {
    if (out != null) out.close();
  }
}
 
Example #20
Source File: Task.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
public synchronized void setWriter(Writer<K, V> writer) {
  this.writer = writer;
}
 
Example #21
Source File: MapTask.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
private void sortAndSpill() throws IOException, ClassNotFoundException,
                                   InterruptedException {
  //approximate the length of the output file to be the length of the
  //buffer + header lengths for the partitions
  long size = (bufend >= bufstart
      ? bufend - bufstart
      : (bufvoid - bufend) + bufstart) +
              partitions * APPROX_HEADER_LENGTH;
  FSDataOutputStream out = null;
  try {
    // create spill file
    final SpillRecord spillRec = new SpillRecord(partitions);
    final Path filename = mapOutputFile.getSpillFileForWrite(getTaskID(),
        numSpills, size);
    out = rfs.create(filename);

    final int endPosition = (kvend > kvstart)
      ? kvend
      : kvoffsets.length + kvend;
    sorter.sort(MapOutputBuffer.this, kvstart, endPosition, reporter);
    int spindex = kvstart;
    IndexRecord rec = new IndexRecord();
    InMemValBytes value = new InMemValBytes();
    for (int i = 0; i < partitions; ++i) {
      IFile.Writer<K, V> writer = null;
      try {
        long segmentStart = out.getPos();
        writer = new Writer<K, V>(job, out, keyClass, valClass, codec,
                                  spilledRecordsCounter);
        if (combinerRunner == null) {
          // spill directly
          DataInputBuffer key = new DataInputBuffer();
          while (spindex < endPosition &&
              kvindices[kvoffsets[spindex % kvoffsets.length]
                        + PARTITION] == i) {
            final int kvoff = kvoffsets[spindex % kvoffsets.length];
            getVBytesForOffset(kvoff, value);
            key.reset(kvbuffer, kvindices[kvoff + KEYSTART],
                      (kvindices[kvoff + VALSTART] - 
                       kvindices[kvoff + KEYSTART]));
            writer.append(key, value);
            ++spindex;
          }
        } else {
          int spstart = spindex;
          while (spindex < endPosition &&
              kvindices[kvoffsets[spindex % kvoffsets.length]
                        + PARTITION] == i) {
            ++spindex;
          }
          // Note: we would likeo to avoid the combiner if we've fewer
          // than some threshold of records for a partition
          if (spstart != spindex) {
            combineCollector.setWriter(writer);
            RawKeyValueIterator kvIter =
              new MRResultIterator(spstart, spindex);
            combinerRunner.combine(kvIter, combineCollector);
          }
        }

        // close the writer
        writer.close();

        // record offsets
        rec.startOffset = segmentStart;
        rec.rawLength = writer.getRawLength();
        rec.partLength = writer.getCompressedLength();
        spillRec.putIndex(rec, i);

        writer = null;
      } finally {
        if (null != writer) writer.close();
      }
    }

    if (totalIndexCacheMemory >= INDEX_CACHE_MEMORY_LIMIT) {
      // create spill index file
      Path indexFilename = mapOutputFile.getSpillIndexFileForWrite(
          getTaskID(), numSpills,
          partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH);
      spillRec.writeToFile(indexFilename, job);
    } else {
      indexCacheList.add(spillRec);
      totalIndexCacheMemory +=
        spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
    }
    LOG.info("Finished spill " + numSpills);
    ++numSpills;
  } finally {
    if (out != null) out.close();
  }
}
 
Example #22
Source File: TestPipeApplication.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * test PipesMapRunner    test the transfer data from reader
 *
 * @throws Exception
 */
@Test
public void testRunner() throws Exception {

  // clean old password files
  File[] psw = cleanTokenPasswordFile();
  try {
    RecordReader<FloatWritable, NullWritable> rReader = new ReaderPipesMapRunner();
    JobConf conf = new JobConf();
    conf.set(Submitter.IS_JAVA_RR, "true");
    // for stdour and stderror

    conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskName);

    CombineOutputCollector<IntWritable, Text> output = new CombineOutputCollector<IntWritable, Text>(
            new Counters.Counter(), new Progress());
    FileSystem fs = new RawLocalFileSystem();
    fs.setConf(conf);
    Writer<IntWritable, Text> wr = new Writer<IntWritable, Text>(conf, fs.create(
            new Path(workSpace + File.separator + "outfile")), IntWritable.class,
            Text.class, null, null, true);
    output.setWriter(wr);
    // stub for client
    File fCommand = getFileCommand("org.apache.hadoop.mapred.pipes.PipeApplicationRunnableStub");

    conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath());
    // token for authorization
    Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>(
            "user".getBytes(), "password".getBytes(), new Text("kind"), new Text(
            "service"));
    TokenCache.setJobToken(token,  conf.getCredentials());
    conf.setBoolean(MRJobConfig.SKIP_RECORDS, true);
    TestTaskReporter reporter = new TestTaskReporter();
    PipesMapRunner<FloatWritable, NullWritable, IntWritable, Text> runner = new PipesMapRunner<FloatWritable, NullWritable, IntWritable, Text>();

    initStdOut(conf);

    runner.configure(conf);
    runner.run(rReader, output, reporter);

    String stdOut = readStdOut(conf);

    // test part of translated data. As common file for client and test -
    // clients stdOut
    // check version
    assertTrue(stdOut.contains("CURRENT_PROTOCOL_VERSION:0"));
    // check key and value classes
    assertTrue(stdOut
            .contains("Key class:org.apache.hadoop.io.FloatWritable"));
    assertTrue(stdOut
            .contains("Value class:org.apache.hadoop.io.NullWritable"));
    // test have sent all data from reader
    assertTrue(stdOut.contains("value:0.0"));
    assertTrue(stdOut.contains("value:9.0"));

  } finally {
    if (psw != null) {
      // remove password files
      for (File file : psw) {
        file.deleteOnExit();
      }
    }

  }
}
 
Example #23
Source File: MapTask.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Handles the degenerate case where serialization fails to fit in
 * the in-memory buffer, so we must spill the record from collect
 * directly to a spill file. Consider this "losing".
 */
private void spillSingleRecord(final K key, final V value,
                               int partition) throws IOException {
  long size = kvbuffer.length + partitions * APPROX_HEADER_LENGTH;
  FSDataOutputStream out = null;
  try {
    // create spill file
    final SpillRecord spillRec = new SpillRecord(partitions);
    final Path filename =
        mapOutputFile.getSpillFileForWrite(numSpills, size);
    out = rfs.create(filename);

    // we don't run the combiner for a single record
    IndexRecord rec = new IndexRecord();
    for (int i = 0; i < partitions; ++i) {
      IFile.Writer<K, V> writer = null;
      try {
        long segmentStart = out.getPos();
        // Create a new codec, don't care!
        FSDataOutputStream partitionOut = CryptoUtils.wrapIfNecessary(job, out);
        writer = new IFile.Writer<K,V>(job, partitionOut, keyClass, valClass, codec,
                                        spilledRecordsCounter);

        if (i == partition) {
          final long recordStart = out.getPos();
          writer.append(key, value);
          // Note that our map byte count will not be accurate with
          // compression
          mapOutputByteCounter.increment(out.getPos() - recordStart);
        }
        writer.close();

        // record offsets
        rec.startOffset = segmentStart;
        rec.rawLength = writer.getRawLength() + CryptoUtils.cryptoPadding(job);
        rec.partLength = writer.getCompressedLength() + CryptoUtils.cryptoPadding(job);
        spillRec.putIndex(rec, i);

        writer = null;
      } catch (IOException e) {
        if (null != writer) writer.close();
        throw e;
      }
    }
    if (totalIndexCacheMemory >= indexCacheMemoryLimit) {
      // create spill index file
      Path indexFilename =
          mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions
              * MAP_OUTPUT_INDEX_RECORD_LENGTH);
      spillRec.writeToFile(indexFilename, job);
    } else {
      indexCacheList.add(spillRec);
      totalIndexCacheMemory +=
        spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
    }
    ++numSpills;
  } finally {
    if (out != null) out.close();
  }
}
 
Example #24
Source File: MapTask.java    From big-c with Apache License 2.0 4 votes vote down vote up
private void sortAndSpill() throws IOException, ClassNotFoundException,
                                   InterruptedException {
  //approximate the length of the output file to be the length of the
  //buffer + header lengths for the partitions
  final long size = distanceTo(bufstart, bufend, bufvoid) +
              partitions * APPROX_HEADER_LENGTH;
  FSDataOutputStream out = null;
  try {
    // create spill file
    final SpillRecord spillRec = new SpillRecord(partitions);
    final Path filename =
        mapOutputFile.getSpillFileForWrite(numSpills, size);
    out = rfs.create(filename);

    final int mstart = kvend / NMETA;
    final int mend = 1 + // kvend is a valid record
      (kvstart >= kvend
      ? kvstart
      : kvmeta.capacity() + kvstart) / NMETA;
    sorter.sort(MapOutputBuffer.this, mstart, mend, reporter);
    int spindex = mstart;
    final IndexRecord rec = new IndexRecord();
    final InMemValBytes value = new InMemValBytes();
    for (int i = 0; i < partitions; ++i) {
      IFile.Writer<K, V> writer = null;
      try {
        long segmentStart = out.getPos();
        FSDataOutputStream partitionOut = CryptoUtils.wrapIfNecessary(job, out);
        writer = new Writer<K, V>(job, partitionOut, keyClass, valClass, codec,
                                  spilledRecordsCounter);
        if (combinerRunner == null) {
          // spill directly
          DataInputBuffer key = new DataInputBuffer();
          while (spindex < mend &&
              kvmeta.get(offsetFor(spindex % maxRec) + PARTITION) == i) {
            final int kvoff = offsetFor(spindex % maxRec);
            int keystart = kvmeta.get(kvoff + KEYSTART);
            int valstart = kvmeta.get(kvoff + VALSTART);
            key.reset(kvbuffer, keystart, valstart - keystart);
            getVBytesForOffset(kvoff, value);
            writer.append(key, value);
            ++spindex;
          }
        } else {
          int spstart = spindex;
          while (spindex < mend &&
              kvmeta.get(offsetFor(spindex % maxRec)
                        + PARTITION) == i) {
            ++spindex;
          }
          // Note: we would like to avoid the combiner if we've fewer
          // than some threshold of records for a partition
          if (spstart != spindex) {
            combineCollector.setWriter(writer);
            RawKeyValueIterator kvIter =
              new MRResultIterator(spstart, spindex);
            combinerRunner.combine(kvIter, combineCollector);
          }
        }

        // close the writer
        writer.close();

        // record offsets
        rec.startOffset = segmentStart;
        rec.rawLength = writer.getRawLength() + CryptoUtils.cryptoPadding(job);
        rec.partLength = writer.getCompressedLength() + CryptoUtils.cryptoPadding(job);
        spillRec.putIndex(rec, i);

        writer = null;
      } finally {
        if (null != writer) writer.close();
      }
    }

    if (totalIndexCacheMemory >= indexCacheMemoryLimit) {
      // create spill index file
      Path indexFilename =
          mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions
              * MAP_OUTPUT_INDEX_RECORD_LENGTH);
      spillRec.writeToFile(indexFilename, job);
    } else {
      indexCacheList.add(spillRec);
      totalIndexCacheMemory +=
        spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
    }
    LOG.info("Finished spill " + numSpills);
    ++numSpills;
  } finally {
    if (out != null) out.close();
  }
}
 
Example #25
Source File: Task.java    From big-c with Apache License 2.0 4 votes vote down vote up
public synchronized void setWriter(Writer<K, V> writer) {
  this.writer = writer;
}
 
Example #26
Source File: TestPipeApplication.java    From big-c with Apache License 2.0 4 votes vote down vote up
public synchronized void setWriter(Writer<K, V> writer) {
  this.writer = writer;
}
 
Example #27
Source File: TestPipeApplication.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * test org.apache.hadoop.mapred.pipes.Application
 * test a internal functions: MessageType.REGISTER_COUNTER,  INCREMENT_COUNTER, STATUS, PROGRESS...
 *
 * @throws Throwable
 */

@Test
public void testApplication() throws Throwable {
  JobConf conf = new JobConf();

  RecordReader<FloatWritable, NullWritable> rReader = new Reader();

  // client for test
  File fCommand = getFileCommand("org.apache.hadoop.mapred.pipes.PipeApplicationStub");

  TestTaskReporter reporter = new TestTaskReporter();

  File[] psw = cleanTokenPasswordFile();
  try {

    conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskName);
    conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath());

    // token for authorization
    Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>(
            "user".getBytes(), "password".getBytes(), new Text("kind"), new Text(
            "service"));

    TokenCache.setJobToken(token, conf.getCredentials());
    FakeCollector output = new FakeCollector(new Counters.Counter(),
            new Progress());
    FileSystem fs = new RawLocalFileSystem();
    fs.setConf(conf);
    Writer<IntWritable, Text> wr = new Writer<IntWritable, Text>(conf, fs.create(
            new Path(workSpace.getAbsolutePath() + File.separator + "outfile")),
            IntWritable.class, Text.class, null, null, true);
    output.setWriter(wr);
    conf.set(Submitter.PRESERVE_COMMANDFILE, "true");

    initStdOut(conf);

    Application<WritableComparable<IntWritable>, Writable, IntWritable, Text> application = new Application<WritableComparable<IntWritable>, Writable, IntWritable, Text>(
            conf, rReader, output, reporter, IntWritable.class, Text.class);
    application.getDownlink().flush();

    application.getDownlink().mapItem(new IntWritable(3), new Text("txt"));

    application.getDownlink().flush();

    application.waitForFinish();

    wr.close();

    // test getDownlink().mapItem();
    String stdOut = readStdOut(conf);
    assertTrue(stdOut.contains("key:3"));
    assertTrue(stdOut.contains("value:txt"));

    // reporter test counter, and status should be sended
    // test MessageType.REGISTER_COUNTER and INCREMENT_COUNTER
    assertEquals(1.0, reporter.getProgress(), 0.01);
    assertNotNull(reporter.getCounter("group", "name"));
    // test status MessageType.STATUS
    assertEquals(reporter.getStatus(), "PROGRESS");
    stdOut = readFile(new File(workSpace.getAbsolutePath() + File.separator
            + "outfile"));
    // check MessageType.PROGRESS
    assertEquals(0.55f, rReader.getProgress(), 0.001);
    application.getDownlink().close();
    // test MessageType.OUTPUT
    Entry<IntWritable, Text> entry = output.getCollect().entrySet()
            .iterator().next();
    assertEquals(123, entry.getKey().get());
    assertEquals("value", entry.getValue().toString());
    try {
      // try to abort
      application.abort(new Throwable());
      fail();
    } catch (IOException e) {
      // abort works ?
      assertEquals("pipe child exception", e.getMessage());
    }
  } finally {
    if (psw != null) {
      // remove password files
      for (File file : psw) {
        file.deleteOnExit();
      }
    }
  }
}
 
Example #28
Source File: TestPipeApplication.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * test PipesMapRunner    test the transfer data from reader
 *
 * @throws Exception
 */
@Test
public void testRunner() throws Exception {

  // clean old password files
  File[] psw = cleanTokenPasswordFile();
  try {
    RecordReader<FloatWritable, NullWritable> rReader = new ReaderPipesMapRunner();
    JobConf conf = new JobConf();
    conf.set(Submitter.IS_JAVA_RR, "true");
    // for stdour and stderror

    conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskName);

    CombineOutputCollector<IntWritable, Text> output = new CombineOutputCollector<IntWritable, Text>(
            new Counters.Counter(), new Progress());
    FileSystem fs = new RawLocalFileSystem();
    fs.setConf(conf);
    Writer<IntWritable, Text> wr = new Writer<IntWritable, Text>(conf, fs.create(
            new Path(workSpace + File.separator + "outfile")), IntWritable.class,
            Text.class, null, null, true);
    output.setWriter(wr);
    // stub for client
    File fCommand = getFileCommand("org.apache.hadoop.mapred.pipes.PipeApplicationRunnableStub");

    conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath());
    // token for authorization
    Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>(
            "user".getBytes(), "password".getBytes(), new Text("kind"), new Text(
            "service"));
    TokenCache.setJobToken(token,  conf.getCredentials());
    conf.setBoolean(MRJobConfig.SKIP_RECORDS, true);
    TestTaskReporter reporter = new TestTaskReporter();
    PipesMapRunner<FloatWritable, NullWritable, IntWritable, Text> runner = new PipesMapRunner<FloatWritable, NullWritable, IntWritable, Text>();

    initStdOut(conf);

    runner.configure(conf);
    runner.run(rReader, output, reporter);

    String stdOut = readStdOut(conf);

    // test part of translated data. As common file for client and test -
    // clients stdOut
    // check version
    assertTrue(stdOut.contains("CURRENT_PROTOCOL_VERSION:0"));
    // check key and value classes
    assertTrue(stdOut
            .contains("Key class:org.apache.hadoop.io.FloatWritable"));
    assertTrue(stdOut
            .contains("Value class:org.apache.hadoop.io.NullWritable"));
    // test have sent all data from reader
    assertTrue(stdOut.contains("value:0.0"));
    assertTrue(stdOut.contains("value:9.0"));

  } finally {
    if (psw != null) {
      // remove password files
      for (File file : psw) {
        file.deleteOnExit();
      }
    }

  }
}
 
Example #29
Source File: MergeManagerImpl.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Override
public void merge(List<CompressAwarePath> inputs) throws IOException {
  // sanity check
  if (inputs == null || inputs.isEmpty()) {
    LOG.info("No ondisk files to merge...");
    return;
  }
  
  long approxOutputSize = 0;
  int bytesPerSum = 
    jobConf.getInt("io.bytes.per.checksum", 512);
  
  LOG.info("OnDiskMerger: We have  " + inputs.size() + 
           " map outputs on disk. Triggering merge...");
  
  // 1. Prepare the list of files to be merged. 
  for (CompressAwarePath file : inputs) {
    approxOutputSize += localFS.getFileStatus(file).getLen();
  }

  // add the checksum length
  approxOutputSize += 
    ChecksumFileSystem.getChecksumLength(approxOutputSize, bytesPerSum);

  // 2. Start the on-disk merge process
  Path outputPath = 
    localDirAllocator.getLocalPathForWrite(inputs.get(0).toString(), 
        approxOutputSize, jobConf).suffix(Task.MERGED_OUTPUT_PREFIX);

  FSDataOutputStream out = CryptoUtils.wrapIfNecessary(jobConf, rfs.create(outputPath));
  Writer<K, V> writer = new Writer<K, V>(jobConf, out,
      (Class<K>) jobConf.getMapOutputKeyClass(),
      (Class<V>) jobConf.getMapOutputValueClass(), codec, null, true);

  RawKeyValueIterator iter  = null;
  CompressAwarePath compressAwarePath;
  Path tmpDir = new Path(reduceId.toString());
  try {
    iter = Merger.merge(jobConf, rfs,
                        (Class<K>) jobConf.getMapOutputKeyClass(),
                        (Class<V>) jobConf.getMapOutputValueClass(),
                        codec, inputs.toArray(new Path[inputs.size()]), 
                        true, ioSortFactor, tmpDir, 
                        (RawComparator<K>) jobConf.getOutputKeyComparator(), 
                        reporter, spilledRecordsCounter, null, 
                        mergedMapOutputsCounter, null);

    Merger.writeFile(iter, writer, reporter, jobConf);
    writer.close();
    compressAwarePath = new CompressAwarePath(outputPath,
        writer.getRawLength(), writer.getCompressedLength());
  } catch (IOException e) {
    localFS.delete(outputPath, true);
    throw e;
  }

  closeOnDiskFile(compressAwarePath);

  LOG.info(reduceId +
      " Finished merging " + inputs.size() + 
      " map output files on disk of total-size " + 
      approxOutputSize + "." + 
      " Local output file is " + outputPath + " of size " +
      localFS.getFileStatus(outputPath).getLen());
}
 
Example #30
Source File: MergeManagerImpl.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Override
public void merge(List<InMemoryMapOutput<K,V>> inputs) throws IOException {
  if (inputs == null || inputs.size() == 0) {
    return;
  }
  
  //name this output file same as the name of the first file that is 
  //there in the current list of inmem files (this is guaranteed to
  //be absent on the disk currently. So we don't overwrite a prev. 
  //created spill). Also we need to create the output file now since
  //it is not guaranteed that this file will be present after merge
  //is called (we delete empty files as soon as we see them
  //in the merge method)

  //figure out the mapId 
  TaskAttemptID mapId = inputs.get(0).getMapId();
  TaskID mapTaskId = mapId.getTaskID();

  List<Segment<K, V>> inMemorySegments = new ArrayList<Segment<K, V>>();
  long mergeOutputSize = 
    createInMemorySegments(inputs, inMemorySegments,0);
  int noInMemorySegments = inMemorySegments.size();

  Path outputPath = 
    mapOutputFile.getInputFileForWrite(mapTaskId,
                                       mergeOutputSize).suffix(
                                           Task.MERGED_OUTPUT_PREFIX);

  FSDataOutputStream out = CryptoUtils.wrapIfNecessary(jobConf, rfs.create(outputPath));
  Writer<K, V> writer = new Writer<K, V>(jobConf, out,
      (Class<K>) jobConf.getMapOutputKeyClass(),
      (Class<V>) jobConf.getMapOutputValueClass(), codec, null, true);

  RawKeyValueIterator rIter = null;
  CompressAwarePath compressAwarePath;
  try {
    LOG.info("Initiating in-memory merge with " + noInMemorySegments + 
             " segments...");
    
    rIter = Merger.merge(jobConf, rfs,
                         (Class<K>)jobConf.getMapOutputKeyClass(),
                         (Class<V>)jobConf.getMapOutputValueClass(),
                         inMemorySegments, inMemorySegments.size(),
                         new Path(reduceId.toString()),
                         (RawComparator<K>)jobConf.getOutputKeyComparator(),
                         reporter, spilledRecordsCounter, null, null);
    
    if (null == combinerClass) {
      Merger.writeFile(rIter, writer, reporter, jobConf);
    } else {
      combineCollector.setWriter(writer);
      combineAndSpill(rIter, reduceCombineInputCounter);
    }
    writer.close();
    compressAwarePath = new CompressAwarePath(outputPath,
        writer.getRawLength(), writer.getCompressedLength());

    LOG.info(reduceId +  
        " Merge of the " + noInMemorySegments +
        " files in-memory complete." +
        " Local file is " + outputPath + " of size " + 
        localFS.getFileStatus(outputPath).getLen());
  } catch (IOException e) { 
    //make sure that we delete the ondisk file that we created 
    //earlier when we invoked cloneFileAttributes
    localFS.delete(outputPath, true);
    throw e;
  }

  // Note the output of the merge
  closeOnDiskFile(compressAwarePath);
}