org.apache.hadoop.util.IndexedSorter Java Examples

The following examples show how to use org.apache.hadoop.util.IndexedSorter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PipelinedSorter.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
public SpanIterator sort(IndexedSorter sorter, RawComparator comparator) {
	this.comparator = comparator;
  ki = new byte[keymax];
  kj = new byte[keymax];
  LOG.info("begin sorting Span"+index + " ("+length()+")");
  if(length() > 1) {
    sorter.sort(this, 0, length(), nullProgressable);
  }
  LOG.info("done sorting Span"+index);
  return new SpanIterator(this);
}
 
Example #2
Source File: PipelinedSorter.java    From tez with Apache License 2.0 5 votes vote down vote up
public SpanIterator sort(IndexedSorter sorter) {
  long start = System.currentTimeMillis();
  if(length() > 1) {
    sorter.sort(this, 0, length(), progressable);
  }
  LOG.info(outputContext.getDestinationVertexName() + ": " + "done sorting span=" + index + ", length=" + length() + ", "
      + "time=" + (System.currentTimeMillis() - start));
  return new SpanIterator((SortSpan)this);
}
 
Example #3
Source File: MapTask.java    From RDFS with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
public MapOutputBuffer(TaskUmbilicalProtocol umbilical, JobConf job,
                       TaskReporter reporter
                       ) throws IOException, ClassNotFoundException {
  this.job = job;
  this.reporter = reporter;
  localFs = FileSystem.getLocal(job);
  partitions = job.getNumReduceTasks();
   
  rfs = ((LocalFileSystem)localFs).getRaw();

  indexCacheList = new ArrayList<SpillRecord>();
  
  spillSortCounters = new MapSpillSortCounters(reporter);
  
  //sanity checks
  final float spillper = job.getFloat("io.sort.spill.percent",(float)0.8);
  final float recper = job.getFloat("io.sort.record.percent",(float)0.05);
  boolean localMode = job.get("mapred.job.tracker", "local").equals("local");
  int sortmb = job.getInt("io.sort.mb", 100);
  if (localMode) {
    sortmb = job.getInt("io.sort.mb.localmode", 100);
  }
  if (spillper > (float)1.0 || spillper < (float)0.0) {
    throw new IOException("Invalid \"io.sort.spill.percent\": " + spillper);
  }
  if (recper > (float)1.0 || recper < (float)0.01) {
    throw new IOException("Invalid \"io.sort.record.percent\": " + recper);
  }
  if ((sortmb & 0x7FF) != sortmb) {
    throw new IOException("Invalid \"io.sort.mb\": " + sortmb);
  }
  sorter = ReflectionUtils.newInstance(
        job.getClass("map.sort.class", QuickSort.class, IndexedSorter.class), job);
  LOG.info("io.sort.mb = " + sortmb);
  // buffers and accounting
  int maxMemUsage = sortmb << 20;
  int recordCapacity = (int)(maxMemUsage * recper);
  recordCapacity -= recordCapacity % RECSIZE;
  kvbuffer = new byte[maxMemUsage - recordCapacity];
  bufvoid = kvbuffer.length;
  recordCapacity /= RECSIZE;
  kvoffsets = new int[recordCapacity];
  kvindices = new int[recordCapacity * ACCTSIZE];
  softBufferLimit = (int)(kvbuffer.length * spillper);
  softRecordLimit = (int)(kvoffsets.length * spillper);
  LOG.info("data buffer = " + softBufferLimit + "/" + kvbuffer.length);
  LOG.info("record buffer = " + softRecordLimit + "/" + kvoffsets.length);
  // k/v serialization
  comparator = job.getOutputKeyComparator();
  keyClass = (Class<K>)job.getMapOutputKeyClass();
  valClass = (Class<V>)job.getMapOutputValueClass();
  serializationFactory = new SerializationFactory(job);
  keySerializer = serializationFactory.getSerializer(keyClass);
  keySerializer.open(bb);
  valSerializer = serializationFactory.getSerializer(valClass);
  valSerializer.open(bb);
  // counters
  mapOutputByteCounter = reporter.getCounter(MAP_OUTPUT_BYTES);
  mapOutputRecordCounter = reporter.getCounter(MAP_OUTPUT_RECORDS);
  Counters.Counter combineInputCounter = 
    reporter.getCounter(COMBINE_INPUT_RECORDS);
  combineOutputCounter = reporter.getCounter(COMBINE_OUTPUT_RECORDS);
  // compression
  if (job.getCompressMapOutput()) {
    Class<? extends CompressionCodec> codecClass =
      job.getMapOutputCompressorClass(DefaultCodec.class);
    codec = ReflectionUtils.newInstance(codecClass, job);
  }
  // combiner
  combinerRunner = CombinerRunner.create(job, getTaskID(), 
                                         combineInputCounter,
                                         reporter, null);
  if (combinerRunner != null) {
    combineCollector= new CombineOutputCollector<K,V>(combineOutputCounter);
  } else {
    combineCollector = null;
  }
  minSpillsForCombine = job.getInt("min.num.spills.for.combine", 3);
  spillThread.setDaemon(true);
  spillThread.setName("SpillThread");
  spillLock.lock();
  try {
    spillThread.start();
    while (!spillThreadRunning) {
      spillDone.await();
    }
  } catch (InterruptedException e) {
    throw (IOException)new IOException("Spill thread failed to initialize"
        ).initCause(sortSpillException);
  } finally {
    spillLock.unlock();
  }
  if (sortSpillException != null) {
    throw (IOException)new IOException("Spill thread failed to initialize"
        ).initCause(sortSpillException);
  }
}
 
Example #4
Source File: ExternalSorter.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
public ExternalSorter(TezOutputContext outputContext, Configuration conf, int numOutputs,
    long initialMemoryAvailable) throws IOException {
  this.outputContext = outputContext;
  this.conf = conf;
  this.partitions = numOutputs;

  rfs = ((LocalFileSystem)FileSystem.getLocal(this.conf)).getRaw();

  int assignedMb = (int) (initialMemoryAvailable >> 20);
  if (assignedMb <= 0) {
    if (initialMemoryAvailable > 0) { // Rounded down to 0MB - may be > 0 && < 1MB
      this.availableMemoryMb = 1;
      LOG.warn("initialAvailableMemory: " + initialMemoryAvailable
          + " is too low. Rounding to 1 MB");
    } else {
      throw new RuntimeException("InitialMemoryAssigned is <= 0: " + initialMemoryAvailable);
    }
  } else {
    this.availableMemoryMb = assignedMb;
  }

  // sorter
  sorter = ReflectionUtils.newInstance(this.conf.getClass(
      TezJobConfig.TEZ_RUNTIME_INTERNAL_SORTER_CLASS, QuickSort.class,
      IndexedSorter.class), this.conf);

  comparator = ConfigUtils.getIntermediateOutputKeyComparator(this.conf);

  // k/v serialization
  keyClass = ConfigUtils.getIntermediateOutputKeyClass(this.conf);
  valClass = ConfigUtils.getIntermediateOutputValueClass(this.conf);
  serializationFactory = new SerializationFactory(this.conf);
  keySerializer = serializationFactory.getSerializer(keyClass);
  valSerializer = serializationFactory.getSerializer(valClass);

  //    counters    
  mapOutputByteCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES);
  mapOutputRecordCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_RECORDS);
  outputBytesWithOverheadCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
  fileOutputByteCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);
  spilledRecordsCounter = outputContext.getCounters().findCounter(TaskCounter.SPILLED_RECORDS);
  additionalSpillBytesWritten = outputContext.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN);
  additionalSpillBytesRead = outputContext.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ);
  numAdditionalSpills = outputContext.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT);

  // compression
  if (ConfigUtils.shouldCompressIntermediateOutput(this.conf)) {
    Class<? extends CompressionCodec> codecClass =
        ConfigUtils.getIntermediateOutputCompressorClass(this.conf, DefaultCodec.class);
    codec = ReflectionUtils.newInstance(codecClass, this.conf);
  } else {
    codec = null;
  }

  this.ifileReadAhead = this.conf.getBoolean(
      TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD,
      TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_DEFAULT);
  if (this.ifileReadAhead) {
    this.ifileReadAheadLength = conf.getInt(
        TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES,
        TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES_DEFAULT);
  } else {
    this.ifileReadAheadLength = 0;
  }
  this.ifileBufferSize = conf.getInt("io.file.buffer.size",
      TezJobConfig.TEZ_RUNTIME_IFILE_BUFFER_SIZE_DEFAULT);

  
  // Task outputs
  mapOutputFile = TezRuntimeUtils.instantiateTaskOutputManager(conf, outputContext);
  
  LOG.info("Instantiating Partitioner: [" + conf.get(TezJobConfig.TEZ_RUNTIME_PARTITIONER_CLASS) + "]");
  this.conf.setInt(TezRuntimeFrameworkConfigs.TEZ_RUNTIME_NUM_EXPECTED_PARTITIONS, this.partitions);
  this.partitioner = TezRuntimeUtils.instantiatePartitioner(this.conf);
  this.combiner = TezRuntimeUtils.instantiateCombiner(this.conf, outputContext);
}
 
Example #5
Source File: PipelinedSorter.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
public SortTask(SortSpan sortable, 
          IndexedSorter sorter, RawComparator comparator) {
    this.sortable = sortable;
    this.sorter = sorter;
    this.comparator = comparator;
}
 
Example #6
Source File: PipelinedSorter.java    From tez with Apache License 2.0 4 votes vote down vote up
public SortTask(SortSpan sortable, IndexedSorter sorter) {
    this.sortable = sortable;
    this.sorter = sorter;
}
 
Example #7
Source File: MapTask.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
public MapOutputBuffer(TaskUmbilicalProtocol umbilical, JobConf job,
                       TaskReporter reporter
                       ) throws IOException, ClassNotFoundException {
  this.job = job;
  this.reporter = reporter;
  localFs = FileSystem.getLocal(job);
  partitions = job.getNumReduceTasks();
   
  rfs = ((LocalFileSystem)localFs).getRaw();

  indexCacheList = new ArrayList<SpillRecord>();
  
  //sanity checks
  final float spillper = job.getFloat("io.sort.spill.percent",(float)0.8);
  final float recper = job.getFloat("io.sort.record.percent",(float)0.05);
  final int sortmb = job.getInt("io.sort.mb", 100);
  if (spillper > (float)1.0 || spillper < (float)0.0) {
    throw new IOException("Invalid \"io.sort.spill.percent\": " + spillper);
  }
  if (recper > (float)1.0 || recper < (float)0.01) {
    throw new IOException("Invalid \"io.sort.record.percent\": " + recper);
  }
  if ((sortmb & 0x7FF) != sortmb) {
    throw new IOException("Invalid \"io.sort.mb\": " + sortmb);
  }
  sorter = ReflectionUtils.newInstance(
        job.getClass("map.sort.class", QuickSort.class, IndexedSorter.class), job);
  LOG.info("io.sort.mb = " + sortmb);
  // buffers and accounting
  int maxMemUsage = sortmb << 20;
  int recordCapacity = (int)(maxMemUsage * recper);
  recordCapacity -= recordCapacity % RECSIZE;
  kvbuffer = new byte[maxMemUsage - recordCapacity];
  bufvoid = kvbuffer.length;
  recordCapacity /= RECSIZE;
  kvoffsets = new int[recordCapacity];
  kvindices = new int[recordCapacity * ACCTSIZE];
  softBufferLimit = (int)(kvbuffer.length * spillper);
  softRecordLimit = (int)(kvoffsets.length * spillper);
  LOG.info("data buffer = " + softBufferLimit + "/" + kvbuffer.length);
  LOG.info("record buffer = " + softRecordLimit + "/" + kvoffsets.length);
  // k/v serialization
  comparator = job.getOutputKeyComparator();
  keyClass = (Class<K>)job.getMapOutputKeyClass();
  valClass = (Class<V>)job.getMapOutputValueClass();
  serializationFactory = new SerializationFactory(job);
  keySerializer = serializationFactory.getSerializer(keyClass);
  keySerializer.open(bb);
  valSerializer = serializationFactory.getSerializer(valClass);
  valSerializer.open(bb);
  // counters
  mapOutputByteCounter = reporter.getCounter(MAP_OUTPUT_BYTES);
  mapOutputRecordCounter = reporter.getCounter(MAP_OUTPUT_RECORDS);
  Counters.Counter combineInputCounter = 
    reporter.getCounter(COMBINE_INPUT_RECORDS);
  combineOutputCounter = reporter.getCounter(COMBINE_OUTPUT_RECORDS);
  // compression
  if (job.getCompressMapOutput()) {
    Class<? extends CompressionCodec> codecClass =
      job.getMapOutputCompressorClass(DefaultCodec.class);
    codec = ReflectionUtils.newInstance(codecClass, job);
  }
  // combiner
  combinerRunner = CombinerRunner.create(job, getTaskID(), 
                                         combineInputCounter,
                                         reporter, null);
  if (combinerRunner != null) {
    combineCollector= new CombineOutputCollector<K,V>(combineOutputCounter);
  } else {
    combineCollector = null;
  }
  minSpillsForCombine = job.getInt("min.num.spills.for.combine", 3);
  spillThread.setDaemon(true);
  spillThread.setName("SpillThread");
  spillLock.lock();
  try {
    spillThread.start();
    while (!spillThreadRunning) {
      spillDone.await();
    }
  } catch (InterruptedException e) {
    throw (IOException)new IOException("Spill thread failed to initialize"
        ).initCause(sortSpillException);
  } finally {
    spillLock.unlock();
  }
  if (sortSpillException != null) {
    throw (IOException)new IOException("Spill thread failed to initialize"
        ).initCause(sortSpillException);
  }
}