org.apache.hadoop.mapred.Task Java Examples

The following examples show how to use org.apache.hadoop.mapred.Task. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HadoopMapReduceInfoProvider.java    From CloverETL-Engine with GNU Lesser General Public License v2.1 5 votes vote down vote up
@Override
public List<HadoopCounterGroup> getCounterGroups() {
	List<HadoopCounterGroup> result = new ArrayList<HadoopCounterGroup>();
	result.add(new HadoopCounterGroup(JobInProgress.Counter.values(), "JobInProgressCounters", null));
	result.add(new HadoopCounterGroup(Task.Counter.values(), "TaskCounters", null));
	//TODO this is temporary solution for adding group for FileSystemCounters enum as the enum class itself cannot be found
	String fsCountersGroupName = "FileSystemCounters";
	List<HadoopCounterKey> fsCounters = new ArrayList<HadoopCounterKey>();
	fsCounters.add(new HadoopCounterKey("FILE_BYTES_READ", fsCountersGroupName));
	fsCounters.add(new HadoopCounterKey("HDFS_BYTES_READ", fsCountersGroupName));
	fsCounters.add(new HadoopCounterKey("FILE_BYTES_WRITTEN", fsCountersGroupName));
	fsCounters.add(new HadoopCounterKey("HDFS_BYTES_WRITTEN", fsCountersGroupName));
	result.add(new HadoopCounterGroup(fsCounters, fsCountersGroupName, null));
	return result;
}
 
Example #2
Source File: ContainerRemoteLaunchEvent.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public ContainerRemoteLaunchEvent(TaskAttemptId taskAttemptID,
    ContainerLaunchContext containerLaunchContext,
    Container allocatedContainer, Task remoteTask) {
  super(taskAttemptID, allocatedContainer.getId(), StringInterner
    .weakIntern(allocatedContainer.getNodeId().toString()),
    allocatedContainer.getContainerToken(),
    ContainerLauncher.EventType.CONTAINER_REMOTE_LAUNCH);
  this.allocatedContainer = allocatedContainer;
  this.containerLaunchContext = containerLaunchContext;
  this.task = remoteTask;
}
 
Example #3
Source File: TaskAttemptContextImpl.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Set the current status of the task to the given string.
 */
@Override
public void setStatus(String status) {
  String normalizedStatus = Task.normalizeStatus(status, conf);
  setStatusString(normalizedStatus);
  reporter.setStatus(normalizedStatus);
}
 
Example #4
Source File: TaskAttemptContextImpl.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Set the current status of the task to the given string.
 */
@Override
public void setStatus(String status) {
  String normalizedStatus = Task.normalizeStatus(status, conf);
  setStatusString(normalizedStatus);
  reporter.setStatus(normalizedStatus);
}
 
Example #5
Source File: ContainerRemoteLaunchEvent.java    From big-c with Apache License 2.0 5 votes vote down vote up
public ContainerRemoteLaunchEvent(TaskAttemptId taskAttemptID,
    ContainerLaunchContext containerLaunchContext,
    Container allocatedContainer, Task remoteTask) {
  super(taskAttemptID, allocatedContainer.getId(), StringInterner
    .weakIntern(allocatedContainer.getNodeId().toString()),
    allocatedContainer.getContainerToken(),
    ContainerLauncher.EventType.CONTAINER_REMOTE_LAUNCH);
  this.allocatedContainer = allocatedContainer;
  this.containerLaunchContext = containerLaunchContext;
  this.task = remoteTask;
}
 
Example #6
Source File: TaskAttemptImpl.java    From big-c with Apache License 2.0 4 votes vote down vote up
static ContainerLaunchContext createContainerLaunchContext(
    Map<ApplicationAccessType, String> applicationACLs,
    Configuration conf, Token<JobTokenIdentifier> jobToken, Task remoteTask,
    final org.apache.hadoop.mapred.JobID oldJobId,
    WrappedJvmID jvmID,
    TaskAttemptListener taskAttemptListener,
    Credentials credentials) {

  synchronized (commonContainerSpecLock) {
    if (commonContainerSpec == null) {
      commonContainerSpec = createCommonContainerLaunchContext(
          applicationACLs, conf, jobToken, oldJobId, credentials);
    }
  }

  // Fill in the fields needed per-container that are missing in the common
  // spec.

  boolean userClassesTakesPrecedence =
    conf.getBoolean(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, false);

  // Setup environment by cloning from common env.
  Map<String, String> env = commonContainerSpec.getEnvironment();
  Map<String, String> myEnv = new HashMap<String, String>(env.size());
  myEnv.putAll(env);
  if (userClassesTakesPrecedence) {
    myEnv.put(Environment.CLASSPATH_PREPEND_DISTCACHE.name(), "true");
  }
  MapReduceChildJVM.setVMEnv(myEnv, remoteTask);

  // Set up the launch command
  List<String> commands = MapReduceChildJVM.getVMCommand(
      taskAttemptListener.getAddress(), remoteTask, jvmID);

  // Duplicate the ByteBuffers for access by multiple containers.
  Map<String, ByteBuffer> myServiceData = new HashMap<String, ByteBuffer>();
  for (Entry<String, ByteBuffer> entry : commonContainerSpec
              .getServiceData().entrySet()) {
    myServiceData.put(entry.getKey(), entry.getValue().duplicate());
  }

  // Construct the actual Container
  ContainerLaunchContext container = ContainerLaunchContext.newInstance(
      commonContainerSpec.getLocalResources(), myEnv, commands,
      myServiceData, commonContainerSpec.getTokens().duplicate(),
      applicationACLs);

  return container;
}
 
Example #7
Source File: TestShuffleScheduler.java    From big-c with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("rawtypes")
@Test
public <K, V> void TestSucceedAndFailedCopyMap() throws Exception {
  JobConf job = new JobConf();
  job.setNumMapTasks(2);
  //mock creation
  TaskUmbilicalProtocol mockUmbilical = mock(TaskUmbilicalProtocol.class);
  Reporter mockReporter = mock(Reporter.class);
  FileSystem mockFileSystem = mock(FileSystem.class);
  Class<? extends org.apache.hadoop.mapred.Reducer>  combinerClass = job.getCombinerClass();
  @SuppressWarnings("unchecked")  // needed for mock with generic
  CombineOutputCollector<K, V>  mockCombineOutputCollector =
      (CombineOutputCollector<K, V>) mock(CombineOutputCollector.class);
  org.apache.hadoop.mapreduce.TaskAttemptID mockTaskAttemptID =
      mock(org.apache.hadoop.mapreduce.TaskAttemptID.class);
  LocalDirAllocator mockLocalDirAllocator = mock(LocalDirAllocator.class);
  CompressionCodec mockCompressionCodec = mock(CompressionCodec.class);
  Counter mockCounter = mock(Counter.class);
  TaskStatus mockTaskStatus = mock(TaskStatus.class);
  Progress mockProgress = mock(Progress.class);
  MapOutputFile mockMapOutputFile = mock(MapOutputFile.class);
  Task mockTask = mock(Task.class);
  @SuppressWarnings("unchecked")
  MapOutput<K, V> output = mock(MapOutput.class);

  ShuffleConsumerPlugin.Context<K, V> context =
      new ShuffleConsumerPlugin.Context<K, V>(
          mockTaskAttemptID, job, mockFileSystem,
          mockUmbilical, mockLocalDirAllocator,
          mockReporter, mockCompressionCodec,
          combinerClass, mockCombineOutputCollector,
          mockCounter, mockCounter, mockCounter,
          mockCounter, mockCounter, mockCounter,
          mockTaskStatus, mockProgress, mockProgress,
          mockTask, mockMapOutputFile, null);
  TaskStatus status = new TaskStatus() {
    @Override
    public boolean getIsMap() {
      return false;
    }
    @Override
    public void addFetchFailedMap(TaskAttemptID mapTaskId) {
    }
  };
  Progress progress = new Progress();
  ShuffleSchedulerImpl<K, V> scheduler = new ShuffleSchedulerImpl<K, V>(job,
      status, null, null, progress, context.getShuffledMapsCounter(),
      context.getReduceShuffleBytes(), context.getFailedShuffleCounter());

  MapHost host1 = new MapHost("host1", null);
  TaskAttemptID failedAttemptID = new TaskAttemptID(
      new org.apache.hadoop.mapred.TaskID(
      new JobID("test",0), TaskType.MAP, 0), 0);

  TaskAttemptID succeedAttemptID = new TaskAttemptID(
      new org.apache.hadoop.mapred.TaskID(
      new JobID("test",0), TaskType.MAP, 1), 1);

  // handle output fetch failure for failedAttemptID, part I
  scheduler.hostFailed(host1.getHostName());

  // handle output fetch succeed for succeedAttemptID
  long bytes = (long)500 * 1024 * 1024;
  scheduler.copySucceeded(succeedAttemptID, host1, bytes, 0, 500000, output);

  // handle output fetch failure for failedAttemptID, part II
  // for MAPREDUCE-6361: verify no NPE exception get thrown out
  scheduler.copyFailed(failedAttemptID, host1, true, false);
}
 
Example #8
Source File: TestShufflePlugin.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test
/**
 * A testing method verifying availability and accessibility of API that is needed
 * for sub-classes of ShuffleConsumerPlugin
 */
public void testConsumerApi() {

  JobConf jobConf = new JobConf();
  ShuffleConsumerPlugin<K, V> shuffleConsumerPlugin = new TestShuffleConsumerPlugin<K, V>();

  //mock creation
  ReduceTask mockReduceTask = mock(ReduceTask.class);
  TaskUmbilicalProtocol mockUmbilical = mock(TaskUmbilicalProtocol.class);
  Reporter mockReporter = mock(Reporter.class);
  FileSystem mockFileSystem = mock(FileSystem.class);
  Class<? extends org.apache.hadoop.mapred.Reducer>  combinerClass = jobConf.getCombinerClass();
  @SuppressWarnings("unchecked")  // needed for mock with generic
  CombineOutputCollector<K, V>  mockCombineOutputCollector =
    (CombineOutputCollector<K, V>) mock(CombineOutputCollector.class);
  org.apache.hadoop.mapreduce.TaskAttemptID mockTaskAttemptID =
    mock(org.apache.hadoop.mapreduce.TaskAttemptID.class);
  LocalDirAllocator mockLocalDirAllocator = mock(LocalDirAllocator.class);
  CompressionCodec mockCompressionCodec = mock(CompressionCodec.class);
  Counter mockCounter = mock(Counter.class);
  TaskStatus mockTaskStatus = mock(TaskStatus.class);
  Progress mockProgress = mock(Progress.class);
  MapOutputFile mockMapOutputFile = mock(MapOutputFile.class);
  Task mockTask = mock(Task.class);

  try {
    String [] dirs = jobConf.getLocalDirs();
    // verify that these APIs are available through super class handler
    ShuffleConsumerPlugin.Context<K, V> context =
   new ShuffleConsumerPlugin.Context<K, V>(mockTaskAttemptID, jobConf, mockFileSystem,
                                              mockUmbilical, mockLocalDirAllocator,
                                              mockReporter, mockCompressionCodec,
                                              combinerClass, mockCombineOutputCollector,
                                              mockCounter, mockCounter, mockCounter,
                                              mockCounter, mockCounter, mockCounter,
                                              mockTaskStatus, mockProgress, mockProgress,
                                              mockTask, mockMapOutputFile, null);
    shuffleConsumerPlugin.init(context);
    shuffleConsumerPlugin.run();
    shuffleConsumerPlugin.close();
  }
  catch (Exception e) {
    assertTrue("Threw exception:" + e, false);
  }

  // verify that these APIs are available for 3rd party plugins
  mockReduceTask.getTaskID();
  mockReduceTask.getJobID();
  mockReduceTask.getNumMaps();
  mockReduceTask.getPartition();
  mockReporter.progress();
}
 
Example #9
Source File: MergeManagerImpl.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Override
public void merge(List<CompressAwarePath> inputs) throws IOException {
  // sanity check
  if (inputs == null || inputs.isEmpty()) {
    LOG.info("No ondisk files to merge...");
    return;
  }
  
  long approxOutputSize = 0;
  int bytesPerSum = 
    jobConf.getInt("io.bytes.per.checksum", 512);
  
  LOG.info("OnDiskMerger: We have  " + inputs.size() + 
           " map outputs on disk. Triggering merge...");
  
  // 1. Prepare the list of files to be merged. 
  for (CompressAwarePath file : inputs) {
    approxOutputSize += localFS.getFileStatus(file).getLen();
  }

  // add the checksum length
  approxOutputSize += 
    ChecksumFileSystem.getChecksumLength(approxOutputSize, bytesPerSum);

  // 2. Start the on-disk merge process
  Path outputPath = 
    localDirAllocator.getLocalPathForWrite(inputs.get(0).toString(), 
        approxOutputSize, jobConf).suffix(Task.MERGED_OUTPUT_PREFIX);

  FSDataOutputStream out = CryptoUtils.wrapIfNecessary(jobConf, rfs.create(outputPath));
  Writer<K, V> writer = new Writer<K, V>(jobConf, out,
      (Class<K>) jobConf.getMapOutputKeyClass(),
      (Class<V>) jobConf.getMapOutputValueClass(), codec, null, true);

  RawKeyValueIterator iter  = null;
  CompressAwarePath compressAwarePath;
  Path tmpDir = new Path(reduceId.toString());
  try {
    iter = Merger.merge(jobConf, rfs,
                        (Class<K>) jobConf.getMapOutputKeyClass(),
                        (Class<V>) jobConf.getMapOutputValueClass(),
                        codec, inputs.toArray(new Path[inputs.size()]), 
                        true, ioSortFactor, tmpDir, 
                        (RawComparator<K>) jobConf.getOutputKeyComparator(), 
                        reporter, spilledRecordsCounter, null, 
                        mergedMapOutputsCounter, null);

    Merger.writeFile(iter, writer, reporter, jobConf);
    writer.close();
    compressAwarePath = new CompressAwarePath(outputPath,
        writer.getRawLength(), writer.getCompressedLength());
  } catch (IOException e) {
    localFS.delete(outputPath, true);
    throw e;
  }

  closeOnDiskFile(compressAwarePath);

  LOG.info(reduceId +
      " Finished merging " + inputs.size() + 
      " map output files on disk of total-size " + 
      approxOutputSize + "." + 
      " Local output file is " + outputPath + " of size " +
      localFS.getFileStatus(outputPath).getLen());
}
 
Example #10
Source File: MergeManagerImpl.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Override
public void merge(List<InMemoryMapOutput<K,V>> inputs) throws IOException {
  if (inputs == null || inputs.size() == 0) {
    return;
  }
  
  //name this output file same as the name of the first file that is 
  //there in the current list of inmem files (this is guaranteed to
  //be absent on the disk currently. So we don't overwrite a prev. 
  //created spill). Also we need to create the output file now since
  //it is not guaranteed that this file will be present after merge
  //is called (we delete empty files as soon as we see them
  //in the merge method)

  //figure out the mapId 
  TaskAttemptID mapId = inputs.get(0).getMapId();
  TaskID mapTaskId = mapId.getTaskID();

  List<Segment<K, V>> inMemorySegments = new ArrayList<Segment<K, V>>();
  long mergeOutputSize = 
    createInMemorySegments(inputs, inMemorySegments,0);
  int noInMemorySegments = inMemorySegments.size();

  Path outputPath = 
    mapOutputFile.getInputFileForWrite(mapTaskId,
                                       mergeOutputSize).suffix(
                                           Task.MERGED_OUTPUT_PREFIX);

  FSDataOutputStream out = CryptoUtils.wrapIfNecessary(jobConf, rfs.create(outputPath));
  Writer<K, V> writer = new Writer<K, V>(jobConf, out,
      (Class<K>) jobConf.getMapOutputKeyClass(),
      (Class<V>) jobConf.getMapOutputValueClass(), codec, null, true);

  RawKeyValueIterator rIter = null;
  CompressAwarePath compressAwarePath;
  try {
    LOG.info("Initiating in-memory merge with " + noInMemorySegments + 
             " segments...");
    
    rIter = Merger.merge(jobConf, rfs,
                         (Class<K>)jobConf.getMapOutputKeyClass(),
                         (Class<V>)jobConf.getMapOutputValueClass(),
                         inMemorySegments, inMemorySegments.size(),
                         new Path(reduceId.toString()),
                         (RawComparator<K>)jobConf.getOutputKeyComparator(),
                         reporter, spilledRecordsCounter, null, null);
    
    if (null == combinerClass) {
      Merger.writeFile(rIter, writer, reporter, jobConf);
    } else {
      combineCollector.setWriter(writer);
      combineAndSpill(rIter, reduceCombineInputCounter);
    }
    writer.close();
    compressAwarePath = new CompressAwarePath(outputPath,
        writer.getRawLength(), writer.getCompressedLength());

    LOG.info(reduceId +  
        " Merge of the " + noInMemorySegments +
        " files in-memory complete." +
        " Local file is " + outputPath + " of size " + 
        localFS.getFileStatus(outputPath).getLen());
  } catch (IOException e) { 
    //make sure that we delete the ondisk file that we created 
    //earlier when we invoked cloneFileAttributes
    localFS.delete(outputPath, true);
    throw e;
  }

  // Note the output of the merge
  closeOnDiskFile(compressAwarePath);
}
 
Example #11
Source File: TestTaskImpl.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Override
protected Task createRemoteTask() {
  return new MockTask(taskType);
}
 
Example #12
Source File: ContainerRemoteLaunchEvent.java    From big-c with Apache License 2.0 4 votes vote down vote up
public Task getRemoteTask() {
  return this.task;
}
 
Example #13
Source File: TaskAttemptImpl.java    From hadoop with Apache License 2.0 4 votes vote down vote up
static ContainerLaunchContext createContainerLaunchContext(
    Map<ApplicationAccessType, String> applicationACLs,
    Configuration conf, Token<JobTokenIdentifier> jobToken, Task remoteTask,
    final org.apache.hadoop.mapred.JobID oldJobId,
    WrappedJvmID jvmID,
    TaskAttemptListener taskAttemptListener,
    Credentials credentials) {

  synchronized (commonContainerSpecLock) {
    if (commonContainerSpec == null) {
      commonContainerSpec = createCommonContainerLaunchContext(
          applicationACLs, conf, jobToken, oldJobId, credentials);
    }
  }

  // Fill in the fields needed per-container that are missing in the common
  // spec.

  boolean userClassesTakesPrecedence =
    conf.getBoolean(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, false);

  // Setup environment by cloning from common env.
  Map<String, String> env = commonContainerSpec.getEnvironment();
  Map<String, String> myEnv = new HashMap<String, String>(env.size());
  myEnv.putAll(env);
  if (userClassesTakesPrecedence) {
    myEnv.put(Environment.CLASSPATH_PREPEND_DISTCACHE.name(), "true");
  }
  MapReduceChildJVM.setVMEnv(myEnv, remoteTask);

  // Set up the launch command
  List<String> commands = MapReduceChildJVM.getVMCommand(
      taskAttemptListener.getAddress(), remoteTask, jvmID);

  // Duplicate the ByteBuffers for access by multiple containers.
  Map<String, ByteBuffer> myServiceData = new HashMap<String, ByteBuffer>();
  for (Entry<String, ByteBuffer> entry : commonContainerSpec
              .getServiceData().entrySet()) {
    myServiceData.put(entry.getKey(), entry.getValue().duplicate());
  }

  // Construct the actual Container
  ContainerLaunchContext container = ContainerLaunchContext.newInstance(
      commonContainerSpec.getLocalResources(), myEnv, commands,
      myServiceData, commonContainerSpec.getTokens().duplicate(),
      applicationACLs);

  return container;
}
 
Example #14
Source File: TestShuffleScheduler.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("rawtypes")
@Test
public <K, V> void TestSucceedAndFailedCopyMap() throws Exception {
  JobConf job = new JobConf();
  job.setNumMapTasks(2);
  //mock creation
  TaskUmbilicalProtocol mockUmbilical = mock(TaskUmbilicalProtocol.class);
  Reporter mockReporter = mock(Reporter.class);
  FileSystem mockFileSystem = mock(FileSystem.class);
  Class<? extends org.apache.hadoop.mapred.Reducer>  combinerClass = job.getCombinerClass();
  @SuppressWarnings("unchecked")  // needed for mock with generic
  CombineOutputCollector<K, V>  mockCombineOutputCollector =
      (CombineOutputCollector<K, V>) mock(CombineOutputCollector.class);
  org.apache.hadoop.mapreduce.TaskAttemptID mockTaskAttemptID =
      mock(org.apache.hadoop.mapreduce.TaskAttemptID.class);
  LocalDirAllocator mockLocalDirAllocator = mock(LocalDirAllocator.class);
  CompressionCodec mockCompressionCodec = mock(CompressionCodec.class);
  Counter mockCounter = mock(Counter.class);
  TaskStatus mockTaskStatus = mock(TaskStatus.class);
  Progress mockProgress = mock(Progress.class);
  MapOutputFile mockMapOutputFile = mock(MapOutputFile.class);
  Task mockTask = mock(Task.class);
  @SuppressWarnings("unchecked")
  MapOutput<K, V> output = mock(MapOutput.class);

  ShuffleConsumerPlugin.Context<K, V> context =
      new ShuffleConsumerPlugin.Context<K, V>(
          mockTaskAttemptID, job, mockFileSystem,
          mockUmbilical, mockLocalDirAllocator,
          mockReporter, mockCompressionCodec,
          combinerClass, mockCombineOutputCollector,
          mockCounter, mockCounter, mockCounter,
          mockCounter, mockCounter, mockCounter,
          mockTaskStatus, mockProgress, mockProgress,
          mockTask, mockMapOutputFile, null);
  TaskStatus status = new TaskStatus() {
    @Override
    public boolean getIsMap() {
      return false;
    }
    @Override
    public void addFetchFailedMap(TaskAttemptID mapTaskId) {
    }
  };
  Progress progress = new Progress();
  ShuffleSchedulerImpl<K, V> scheduler = new ShuffleSchedulerImpl<K, V>(job,
      status, null, null, progress, context.getShuffledMapsCounter(),
      context.getReduceShuffleBytes(), context.getFailedShuffleCounter());

  MapHost host1 = new MapHost("host1", null);
  TaskAttemptID failedAttemptID = new TaskAttemptID(
      new org.apache.hadoop.mapred.TaskID(
      new JobID("test",0), TaskType.MAP, 0), 0);

  TaskAttemptID succeedAttemptID = new TaskAttemptID(
      new org.apache.hadoop.mapred.TaskID(
      new JobID("test",0), TaskType.MAP, 1), 1);

  // handle output fetch failure for failedAttemptID, part I
  scheduler.hostFailed(host1.getHostName());

  // handle output fetch succeed for succeedAttemptID
  long bytes = (long)500 * 1024 * 1024;
  scheduler.copySucceeded(succeedAttemptID, host1, bytes, 0, 500000, output);

  // handle output fetch failure for failedAttemptID, part II
  // for MAPREDUCE-6361: verify no NPE exception get thrown out
  scheduler.copyFailed(failedAttemptID, host1, true, false);
}
 
Example #15
Source File: TestShufflePlugin.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test
/**
 * A testing method verifying availability and accessibility of API that is needed
 * for sub-classes of ShuffleConsumerPlugin
 */
public void testConsumerApi() {

  JobConf jobConf = new JobConf();
  ShuffleConsumerPlugin<K, V> shuffleConsumerPlugin = new TestShuffleConsumerPlugin<K, V>();

  //mock creation
  ReduceTask mockReduceTask = mock(ReduceTask.class);
  TaskUmbilicalProtocol mockUmbilical = mock(TaskUmbilicalProtocol.class);
  Reporter mockReporter = mock(Reporter.class);
  FileSystem mockFileSystem = mock(FileSystem.class);
  Class<? extends org.apache.hadoop.mapred.Reducer>  combinerClass = jobConf.getCombinerClass();
  @SuppressWarnings("unchecked")  // needed for mock with generic
  CombineOutputCollector<K, V>  mockCombineOutputCollector =
    (CombineOutputCollector<K, V>) mock(CombineOutputCollector.class);
  org.apache.hadoop.mapreduce.TaskAttemptID mockTaskAttemptID =
    mock(org.apache.hadoop.mapreduce.TaskAttemptID.class);
  LocalDirAllocator mockLocalDirAllocator = mock(LocalDirAllocator.class);
  CompressionCodec mockCompressionCodec = mock(CompressionCodec.class);
  Counter mockCounter = mock(Counter.class);
  TaskStatus mockTaskStatus = mock(TaskStatus.class);
  Progress mockProgress = mock(Progress.class);
  MapOutputFile mockMapOutputFile = mock(MapOutputFile.class);
  Task mockTask = mock(Task.class);

  try {
    String [] dirs = jobConf.getLocalDirs();
    // verify that these APIs are available through super class handler
    ShuffleConsumerPlugin.Context<K, V> context =
   new ShuffleConsumerPlugin.Context<K, V>(mockTaskAttemptID, jobConf, mockFileSystem,
                                              mockUmbilical, mockLocalDirAllocator,
                                              mockReporter, mockCompressionCodec,
                                              combinerClass, mockCombineOutputCollector,
                                              mockCounter, mockCounter, mockCounter,
                                              mockCounter, mockCounter, mockCounter,
                                              mockTaskStatus, mockProgress, mockProgress,
                                              mockTask, mockMapOutputFile, null);
    shuffleConsumerPlugin.init(context);
    shuffleConsumerPlugin.run();
    shuffleConsumerPlugin.close();
  }
  catch (Exception e) {
    assertTrue("Threw exception:" + e, false);
  }

  // verify that these APIs are available for 3rd party plugins
  mockReduceTask.getTaskID();
  mockReduceTask.getJobID();
  mockReduceTask.getNumMaps();
  mockReduceTask.getPartition();
  mockReporter.progress();
}
 
Example #16
Source File: MergeManagerImpl.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Override
public void merge(List<CompressAwarePath> inputs) throws IOException {
  // sanity check
  if (inputs == null || inputs.isEmpty()) {
    LOG.info("No ondisk files to merge...");
    return;
  }
  
  long approxOutputSize = 0;
  int bytesPerSum = 
    jobConf.getInt("io.bytes.per.checksum", 512);
  
  LOG.info("OnDiskMerger: We have  " + inputs.size() + 
           " map outputs on disk. Triggering merge...");
  
  // 1. Prepare the list of files to be merged. 
  for (CompressAwarePath file : inputs) {
    approxOutputSize += localFS.getFileStatus(file).getLen();
  }

  // add the checksum length
  approxOutputSize += 
    ChecksumFileSystem.getChecksumLength(approxOutputSize, bytesPerSum);

  // 2. Start the on-disk merge process
  Path outputPath = 
    localDirAllocator.getLocalPathForWrite(inputs.get(0).toString(), 
        approxOutputSize, jobConf).suffix(Task.MERGED_OUTPUT_PREFIX);

  FSDataOutputStream out = CryptoUtils.wrapIfNecessary(jobConf, rfs.create(outputPath));
  Writer<K, V> writer = new Writer<K, V>(jobConf, out,
      (Class<K>) jobConf.getMapOutputKeyClass(),
      (Class<V>) jobConf.getMapOutputValueClass(), codec, null, true);

  RawKeyValueIterator iter  = null;
  CompressAwarePath compressAwarePath;
  Path tmpDir = new Path(reduceId.toString());
  try {
    iter = Merger.merge(jobConf, rfs,
                        (Class<K>) jobConf.getMapOutputKeyClass(),
                        (Class<V>) jobConf.getMapOutputValueClass(),
                        codec, inputs.toArray(new Path[inputs.size()]), 
                        true, ioSortFactor, tmpDir, 
                        (RawComparator<K>) jobConf.getOutputKeyComparator(), 
                        reporter, spilledRecordsCounter, null, 
                        mergedMapOutputsCounter, null);

    Merger.writeFile(iter, writer, reporter, jobConf);
    writer.close();
    compressAwarePath = new CompressAwarePath(outputPath,
        writer.getRawLength(), writer.getCompressedLength());
  } catch (IOException e) {
    localFS.delete(outputPath, true);
    throw e;
  }

  closeOnDiskFile(compressAwarePath);

  LOG.info(reduceId +
      " Finished merging " + inputs.size() + 
      " map output files on disk of total-size " + 
      approxOutputSize + "." + 
      " Local output file is " + outputPath + " of size " +
      localFS.getFileStatus(outputPath).getLen());
}
 
Example #17
Source File: MergeManagerImpl.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Override
public void merge(List<InMemoryMapOutput<K,V>> inputs) throws IOException {
  if (inputs == null || inputs.size() == 0) {
    return;
  }
  
  //name this output file same as the name of the first file that is 
  //there in the current list of inmem files (this is guaranteed to
  //be absent on the disk currently. So we don't overwrite a prev. 
  //created spill). Also we need to create the output file now since
  //it is not guaranteed that this file will be present after merge
  //is called (we delete empty files as soon as we see them
  //in the merge method)

  //figure out the mapId 
  TaskAttemptID mapId = inputs.get(0).getMapId();
  TaskID mapTaskId = mapId.getTaskID();

  List<Segment<K, V>> inMemorySegments = new ArrayList<Segment<K, V>>();
  long mergeOutputSize = 
    createInMemorySegments(inputs, inMemorySegments,0);
  int noInMemorySegments = inMemorySegments.size();

  Path outputPath = 
    mapOutputFile.getInputFileForWrite(mapTaskId,
                                       mergeOutputSize).suffix(
                                           Task.MERGED_OUTPUT_PREFIX);

  FSDataOutputStream out = CryptoUtils.wrapIfNecessary(jobConf, rfs.create(outputPath));
  Writer<K, V> writer = new Writer<K, V>(jobConf, out,
      (Class<K>) jobConf.getMapOutputKeyClass(),
      (Class<V>) jobConf.getMapOutputValueClass(), codec, null, true);

  RawKeyValueIterator rIter = null;
  CompressAwarePath compressAwarePath;
  try {
    LOG.info("Initiating in-memory merge with " + noInMemorySegments + 
             " segments...");
    
    rIter = Merger.merge(jobConf, rfs,
                         (Class<K>)jobConf.getMapOutputKeyClass(),
                         (Class<V>)jobConf.getMapOutputValueClass(),
                         inMemorySegments, inMemorySegments.size(),
                         new Path(reduceId.toString()),
                         (RawComparator<K>)jobConf.getOutputKeyComparator(),
                         reporter, spilledRecordsCounter, null, null);
    
    if (null == combinerClass) {
      Merger.writeFile(rIter, writer, reporter, jobConf);
    } else {
      combineCollector.setWriter(writer);
      combineAndSpill(rIter, reduceCombineInputCounter);
    }
    writer.close();
    compressAwarePath = new CompressAwarePath(outputPath,
        writer.getRawLength(), writer.getCompressedLength());

    LOG.info(reduceId +  
        " Merge of the " + noInMemorySegments +
        " files in-memory complete." +
        " Local file is " + outputPath + " of size " + 
        localFS.getFileStatus(outputPath).getLen());
  } catch (IOException e) { 
    //make sure that we delete the ondisk file that we created 
    //earlier when we invoked cloneFileAttributes
    localFS.delete(outputPath, true);
    throw e;
  }

  // Note the output of the merge
  closeOnDiskFile(compressAwarePath);
}
 
Example #18
Source File: TestTaskImpl.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Override
protected Task createRemoteTask() {
  return new MockTask(taskType);
}
 
Example #19
Source File: ContainerRemoteLaunchEvent.java    From hadoop with Apache License 2.0 4 votes vote down vote up
public Task getRemoteTask() {
  return this.task;
}
 
Example #20
Source File: TaskAttemptListener.java    From big-c with Apache License 2.0 2 votes vote down vote up
/**
 * Register a JVM with the listener.  This should be called as soon as a 
 * JVM ID is assigned to a task attempt, before it has been launched.
 * @param task the task itself for this JVM.
 * @param jvmID The ID of the JVM .
 */
void registerPendingTask(Task task, WrappedJvmID jvmID);
 
Example #21
Source File: TaskAttemptListener.java    From hadoop with Apache License 2.0 2 votes vote down vote up
/**
 * Register a JVM with the listener.  This should be called as soon as a 
 * JVM ID is assigned to a task attempt, before it has been launched.
 * @param task the task itself for this JVM.
 * @param jvmID The ID of the JVM .
 */
void registerPendingTask(Task task, WrappedJvmID jvmID);
 
Example #22
Source File: TaskAttemptImpl.java    From big-c with Apache License 2.0 votes vote down vote up
protected abstract org.apache.hadoop.mapred.Task createRemoteTask(); 
Example #23
Source File: TaskAttemptImpl.java    From hadoop with Apache License 2.0 votes vote down vote up
protected abstract org.apache.hadoop.mapred.Task createRemoteTask();