Java Code Examples for org.apache.hadoop.mapreduce.v2.app.job.Job#getTasks()

The following examples show how to use org.apache.hadoop.mapreduce.v2.app.job.Job#getTasks() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestTaskAttempt.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private void testMRAppHistory(MRApp app) throws Exception {
  Configuration conf = new Configuration();
  Job job = app.submit(conf);
  app.waitForState(job, JobState.FAILED);
  Map<TaskId, Task> tasks = job.getTasks();

  Assert.assertEquals("Num tasks is not correct", 1, tasks.size());
  Task task = tasks.values().iterator().next();
  Assert.assertEquals("Task state not correct", TaskState.FAILED, task
      .getReport().getTaskState());
  Map<TaskAttemptId, TaskAttempt> attempts = tasks.values().iterator().next()
      .getAttempts();
  Assert.assertEquals("Num attempts is not correct", 4, attempts.size());

  Iterator<TaskAttempt> it = attempts.values().iterator();
  TaskAttemptReport report = it.next().getReport();
  Assert.assertEquals("Attempt state not correct", TaskAttemptState.FAILED,
      report.getTaskAttemptState());
  Assert.assertEquals("Diagnostic Information is not Correct",
      "Test Diagnostic Event", report.getDiagnosticInfo());
  report = it.next().getReport();
  Assert.assertEquals("Attempt state not correct", TaskAttemptState.FAILED,
      report.getTaskAttemptState());
}
 
Example 2
Source File: TestTaskAttempt.java    From big-c with Apache License 2.0 6 votes vote down vote up
private void testMRAppHistory(MRApp app) throws Exception {
  Configuration conf = new Configuration();
  Job job = app.submit(conf);
  app.waitForState(job, JobState.FAILED);
  Map<TaskId, Task> tasks = job.getTasks();

  Assert.assertEquals("Num tasks is not correct", 1, tasks.size());
  Task task = tasks.values().iterator().next();
  Assert.assertEquals("Task state not correct", TaskState.FAILED, task
      .getReport().getTaskState());
  Map<TaskAttemptId, TaskAttempt> attempts = tasks.values().iterator().next()
      .getAttempts();
  Assert.assertEquals("Num attempts is not correct", 4, attempts.size());

  Iterator<TaskAttempt> it = attempts.values().iterator();
  TaskAttemptReport report = it.next().getReport();
  Assert.assertEquals("Attempt state not correct", TaskAttemptState.FAILED,
      report.getTaskAttemptState());
  Assert.assertEquals("Diagnostic Information is not Correct",
      "Test Diagnostic Event", report.getDiagnosticInfo());
  report = it.next().getReport();
  Assert.assertEquals("Attempt state not correct", TaskAttemptState.FAILED,
      report.getTaskAttemptState());
}
 
Example 3
Source File: TestFail.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Test
//First attempt is failed and second attempt is passed
//The job succeeds.
public void testFailTask() throws Exception {
  MRApp app = new MockFirstFailingAttemptMRApp(1, 0);
  Configuration conf = new Configuration();
  // this test requires two task attempts, but uberization overrides max to 1
  conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
  Job job = app.submit(conf);
  app.waitForState(job, JobState.SUCCEEDED);
  Map<TaskId,Task> tasks = job.getTasks();
  Assert.assertEquals("Num tasks is not correct", 1, tasks.size());
  Task task = tasks.values().iterator().next();
  Assert.assertEquals("Task state not correct", TaskState.SUCCEEDED,
      task.getReport().getTaskState());
  Map<TaskAttemptId, TaskAttempt> attempts =
      tasks.values().iterator().next().getAttempts();
  Assert.assertEquals("Num attempts is not correct", 2, attempts.size());
  //one attempt must be failed 
  //and another must have succeeded
  Iterator<TaskAttempt> it = attempts.values().iterator();
  Assert.assertEquals("Attempt state not correct", TaskAttemptState.FAILED,
      it.next().getReport().getTaskAttemptState());
  Assert.assertEquals("Attempt state not correct", TaskAttemptState.SUCCEEDED,
      it.next().getReport().getTaskAttemptState());
}
 
Example 4
Source File: TestFail.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Test
//All Task attempts are timed out, leading to Job failure
public void testTimedOutTask() throws Exception {
  MRApp app = new TimeOutTaskMRApp(1, 0);
  Configuration conf = new Configuration();
  int maxAttempts = 2;
  conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, maxAttempts);
  // disable uberization (requires entire job to be reattempted, so max for
  // subtask attempts is overridden to 1)
  conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
  Job job = app.submit(conf);
  app.waitForState(job, JobState.FAILED);
  Map<TaskId,Task> tasks = job.getTasks();
  Assert.assertEquals("Num tasks is not correct", 1, tasks.size());
  Task task = tasks.values().iterator().next();
  Assert.assertEquals("Task state not correct", TaskState.FAILED,
      task.getReport().getTaskState());
  Map<TaskAttemptId, TaskAttempt> attempts =
      tasks.values().iterator().next().getAttempts();
  Assert.assertEquals("Num attempts is not correct", maxAttempts,
      attempts.size());
  for (TaskAttempt attempt : attempts.values()) {
    Assert.assertEquals("Attempt state not correct", TaskAttemptState.FAILED,
        attempt.getReport().getTaskAttemptState());
  }
}
 
Example 5
Source File: TestKill.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test
public void testKillJob() throws Exception {
  final CountDownLatch latch = new CountDownLatch(1);
  
  MRApp app = new BlockingMRApp(1, 0, latch);
  //this will start the job but job won't complete as task is
  //blocked
  Job job = app.submit(new Configuration());
  
  //wait and vailidate for Job to become RUNNING
  app.waitForState(job, JobState.RUNNING);
  
  //send the kill signal to Job
  app.getContext().getEventHandler().handle(
      new JobEvent(job.getID(), JobEventType.JOB_KILL));
  
  //unblock Task
  latch.countDown();

  //wait and validate for Job to be KILLED
  app.waitForState(job, JobState.KILLED);
  Map<TaskId,Task> tasks = job.getTasks();
  Assert.assertEquals("No of tasks is not correct", 1, 
      tasks.size());
  Task task = tasks.values().iterator().next();
  Assert.assertEquals("Task state not correct", TaskState.KILLED, 
      task.getReport().getTaskState());
  Map<TaskAttemptId, TaskAttempt> attempts = 
    tasks.values().iterator().next().getAttempts();
  Assert.assertEquals("No of attempts is not correct", 1, 
      attempts.size());
  Iterator<TaskAttempt> it = attempts.values().iterator();
  Assert.assertEquals("Attempt state not correct", TaskAttemptState.KILLED, 
        it.next().getReport().getTaskAttemptState());
}
 
Example 6
Source File: TestFail.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testTaskFailWithUnusedContainer() throws Exception {
  MRApp app = new MRAppWithFailingTaskAndUnusedContainer();
  Configuration conf = new Configuration();
  int maxAttempts = 1;
  conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, maxAttempts);
  // disable uberization (requires entire job to be reattempted, so max for
  // subtask attempts is overridden to 1)
  conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
  Job job = app.submit(conf);
  app.waitForState(job, JobState.RUNNING);
  Map<TaskId, Task> tasks = job.getTasks();
  Assert.assertEquals("Num tasks is not correct", 1, tasks.size());
  Task task = tasks.values().iterator().next();
  app.waitForState(task, TaskState.SCHEDULED);
  Map<TaskAttemptId, TaskAttempt> attempts = tasks.values().iterator()
      .next().getAttempts();
  Assert.assertEquals("Num attempts is not correct", maxAttempts, attempts
      .size());
  TaskAttempt attempt = attempts.values().iterator().next();
  app.waitForInternalState((TaskAttemptImpl) attempt,
      TaskAttemptStateInternal.ASSIGNED);
  app.getDispatcher().getEventHandler().handle(
      new TaskAttemptEvent(attempt.getID(),
          TaskAttemptEventType.TA_CONTAINER_COMPLETED));
  app.waitForState(job, JobState.FAILED);
}
 
Example 7
Source File: TestFail.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test
public void testTaskFailWithUnusedContainer() throws Exception {
  MRApp app = new MRAppWithFailingTaskAndUnusedContainer();
  Configuration conf = new Configuration();
  int maxAttempts = 1;
  conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, maxAttempts);
  // disable uberization (requires entire job to be reattempted, so max for
  // subtask attempts is overridden to 1)
  conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
  Job job = app.submit(conf);
  app.waitForState(job, JobState.RUNNING);
  Map<TaskId, Task> tasks = job.getTasks();
  Assert.assertEquals("Num tasks is not correct", 1, tasks.size());
  Task task = tasks.values().iterator().next();
  app.waitForState(task, TaskState.SCHEDULED);
  Map<TaskAttemptId, TaskAttempt> attempts = tasks.values().iterator()
      .next().getAttempts();
  Assert.assertEquals("Num attempts is not correct", maxAttempts, attempts
      .size());
  TaskAttempt attempt = attempts.values().iterator().next();
  app.waitForInternalState((TaskAttemptImpl) attempt,
      TaskAttemptStateInternal.ASSIGNED);
  app.getDispatcher().getEventHandler().handle(
      new TaskAttemptEvent(attempt.getID(),
          TaskAttemptEventType.TA_CONTAINER_COMPLETED));
  app.waitForState(job, JobState.FAILED);
}
 
Example 8
Source File: TestAMWebApp.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test public void testSingleCounterView() {
  AppContext appContext = new MockAppContext(0, 1, 1, 1);
  Job job = appContext.getAllJobs().values().iterator().next();
  // add a failed task to the job without any counters
  Task failedTask = MockJobs.newTask(job.getID(), 2, 1, true);
  Map<TaskId,Task> tasks = job.getTasks();
  tasks.put(failedTask.getID(), failedTask);
  Map<String, String> params = getJobParams(appContext);
  params.put(AMParams.COUNTER_GROUP, 
      "org.apache.hadoop.mapreduce.FileSystemCounter");
  params.put(AMParams.COUNTER_NAME, "HDFS_WRITE_OPS");
  WebAppTests.testPage(SingleCounterPage.class, AppContext.class,
                       appContext, params);
}
 
Example 9
Source File: TestJobHistoryEvents.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test
public void testHistoryEvents() throws Exception {
  Configuration conf = new Configuration();
  MRApp app = new MRAppWithHistory(2, 1, true, this.getClass().getName(), true);
  app.submit(conf);
  Job job = app.getContext().getAllJobs().values().iterator().next();
  JobId jobId = job.getID();
  LOG.info("JOBID is " + TypeConverter.fromYarn(jobId).toString());
  app.waitForState(job, JobState.SUCCEEDED);
  
  //make sure all events are flushed 
  app.waitForState(Service.STATE.STOPPED);
  /*
   * Use HistoryContext to read logged events and verify the number of 
   * completed maps 
  */
  HistoryContext context = new JobHistory();
  // test start and stop states
  ((JobHistory)context).init(conf);
  ((JobHistory)context).start();
  Assert.assertTrue( context.getStartTime()>0);
  Assert.assertEquals(((JobHistory)context).getServiceState(),Service.STATE.STARTED);

  // get job before stopping JobHistory
  Job parsedJob = context.getJob(jobId);

  // stop JobHistory
  ((JobHistory)context).stop();
  Assert.assertEquals(((JobHistory)context).getServiceState(),Service.STATE.STOPPED);


  Assert.assertEquals("CompletedMaps not correct", 2,
      parsedJob.getCompletedMaps());
  Assert.assertEquals(System.getProperty("user.name"), parsedJob.getUserName());
  
  Map<TaskId, Task> tasks = parsedJob.getTasks();
  Assert.assertEquals("No of tasks not correct", 3, tasks.size());
  for (Task task : tasks.values()) {
    verifyTask(task);
  }
  
  Map<TaskId, Task> maps = parsedJob.getTasks(TaskType.MAP);
  Assert.assertEquals("No of maps not correct", 2, maps.size());
  
  Map<TaskId, Task> reduces = parsedJob.getTasks(TaskType.REDUCE);
  Assert.assertEquals("No of reduces not correct", 1, reduces.size());
  
  
  Assert.assertEquals("CompletedReduce not correct", 1,
      parsedJob.getCompletedReduces());
  
  Assert.assertEquals("Job state not currect", JobState.SUCCEEDED,
      parsedJob.getState());
}
 
Example 10
Source File: DefaultSpeculator.java    From hadoop with Apache License 2.0 4 votes vote down vote up
private int maybeScheduleASpeculation(TaskType type) {
  int successes = 0;

  long now = clock.getTime();

  ConcurrentMap<JobId, AtomicInteger> containerNeeds
      = type == TaskType.MAP ? mapContainerNeeds : reduceContainerNeeds;

  for (ConcurrentMap.Entry<JobId, AtomicInteger> jobEntry : containerNeeds.entrySet()) {
    // This race conditon is okay.  If we skip a speculation attempt we
    //  should have tried because the event that lowers the number of
    //  containers needed to zero hasn't come through, it will next time.
    // Also, if we miss the fact that the number of containers needed was
    //  zero but increased due to a failure it's not too bad to launch one
    //  container prematurely.
    if (jobEntry.getValue().get() > 0) {
      continue;
    }

    int numberSpeculationsAlready = 0;
    int numberRunningTasks = 0;

    // loop through the tasks of the kind
    Job job = context.getJob(jobEntry.getKey());

    Map<TaskId, Task> tasks = job.getTasks(type);

    int numberAllowedSpeculativeTasks
        = (int) Math.max(minimumAllowedSpeculativeTasks,
            proportionTotalTasksSpeculatable * tasks.size());

    TaskId bestTaskID = null;
    long bestSpeculationValue = -1L;

    // this loop is potentially pricey.
    // TODO track the tasks that are potentially worth looking at
    for (Map.Entry<TaskId, Task> taskEntry : tasks.entrySet()) {
      long mySpeculationValue = speculationValue(taskEntry.getKey(), now);

      if (mySpeculationValue == ALREADY_SPECULATING) {
        ++numberSpeculationsAlready;
      }

      if (mySpeculationValue != NOT_RUNNING) {
        ++numberRunningTasks;
      }

      if (mySpeculationValue > bestSpeculationValue) {
        bestTaskID = taskEntry.getKey();
        bestSpeculationValue = mySpeculationValue;
      }
    }
    numberAllowedSpeculativeTasks
        = (int) Math.max(numberAllowedSpeculativeTasks,
            proportionRunningTasksSpeculatable * numberRunningTasks);

    // If we found a speculation target, fire it off
    if (bestTaskID != null
        && numberAllowedSpeculativeTasks > numberSpeculationsAlready) {
      addSpeculativeAttempt(bestTaskID);
      ++successes;
    }
  }

  return successes;
}
 
Example 11
Source File: JobInfo.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Go through a job and update the member variables with counts for
 * information to output in the page.
 *
 * @param job
 *          the job to get counts for.
 */
private void countTasksAndAttempts(Job job) {
  numReduces = 0;
  numMaps = 0;
  final Map<TaskId, Task> tasks = job.getTasks();
  if (tasks == null) {
    return;
  }
  for (Task task : tasks.values()) {
    // Attempts counts
    Map<TaskAttemptId, TaskAttempt> attempts = task.getAttempts();
    int successful, failed, killed;
    for (TaskAttempt attempt : attempts.values()) {

      successful = 0;
      failed = 0;
      killed = 0;
      if (TaskAttemptStateUI.NEW.correspondsTo(attempt.getState())) {
        // Do Nothing
      } else if (TaskAttemptStateUI.RUNNING.correspondsTo(attempt.getState())) {
        // Do Nothing
      } else if (TaskAttemptStateUI.SUCCESSFUL.correspondsTo(attempt
          .getState())) {
        ++successful;
      } else if (TaskAttemptStateUI.FAILED.correspondsTo(attempt.getState())) {
        ++failed;
      } else if (TaskAttemptStateUI.KILLED.correspondsTo(attempt.getState())) {
        ++killed;
      }

      switch (task.getType()) {
      case MAP:
        successfulMapAttempts += successful;
        failedMapAttempts += failed;
        killedMapAttempts += killed;
        if (attempt.getState() == TaskAttemptState.SUCCEEDED) {
          numMaps++;
          avgMapTime += (attempt.getFinishTime() - attempt.getLaunchTime());
        }
        break;
      case REDUCE:
        successfulReduceAttempts += successful;
        failedReduceAttempts += failed;
        killedReduceAttempts += killed;
        if (attempt.getState() == TaskAttemptState.SUCCEEDED) {
          numReduces++;
          avgShuffleTime += (attempt.getShuffleFinishTime() - attempt
              .getLaunchTime());
          avgMergeTime += attempt.getSortFinishTime()
              - attempt.getShuffleFinishTime();
          avgReduceTime += (attempt.getFinishTime() - attempt
              .getSortFinishTime());
        }
        break;
      }
    }
  }

  if (numMaps > 0) {
    avgMapTime = avgMapTime / numMaps;
  }

  if (numReduces > 0) {
    avgReduceTime = avgReduceTime / numReduces;
    avgShuffleTime = avgShuffleTime / numReduces;
    avgMergeTime = avgMergeTime / numReduces;
  }
}
 
Example 12
Source File: JobInfo.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Go through a job and update the member variables with counts for
 * information to output in the page.
 *
 * @param job
 *          the job to get counts for.
 */
private void countTasksAndAttempts(Job job) {
  final Map<TaskId, Task> tasks = job.getTasks();
  if (tasks == null) {
    return;
  }
  for (Task task : tasks.values()) {
    switch (task.getType()) {
    case MAP:
      // Task counts
      switch (task.getState()) {
      case RUNNING:
        ++this.mapsRunning;
        break;
      case SCHEDULED:
        ++this.mapsPending;
        break;
      default:
        break;
      }
      break;
    case REDUCE:
      // Task counts
      switch (task.getState()) {
      case RUNNING:
        ++this.reducesRunning;
        break;
      case SCHEDULED:
        ++this.reducesPending;
        break;
      default:
        break;
      }
      break;
    default:
      throw new IllegalStateException(
          "Task type is neither map nor reduce: " + task.getType());
    }
    // Attempts counts
    Map<TaskAttemptId, TaskAttempt> attempts = task.getAttempts();
    int newAttempts, running, successful, failed, killed;
    for (TaskAttempt attempt : attempts.values()) {

      newAttempts = 0;
      running = 0;
      successful = 0;
      failed = 0;
      killed = 0;
      if (TaskAttemptStateUI.NEW.correspondsTo(attempt.getState())) {
        ++newAttempts;
      } else if (TaskAttemptStateUI.RUNNING.correspondsTo(attempt.getState())) {
        ++running;
      } else if (TaskAttemptStateUI.SUCCESSFUL.correspondsTo(attempt
          .getState())) {
        ++successful;
      } else if (TaskAttemptStateUI.FAILED.correspondsTo(attempt.getState())) {
        ++failed;
      } else if (TaskAttemptStateUI.KILLED.correspondsTo(attempt.getState())) {
        ++killed;
      }

      switch (task.getType()) {
      case MAP:
        this.newMapAttempts += newAttempts;
        this.runningMapAttempts += running;
        this.successfulMapAttempts += successful;
        this.failedMapAttempts += failed;
        this.killedMapAttempts += killed;
        break;
      case REDUCE:
        this.newReduceAttempts += newAttempts;
        this.runningReduceAttempts += running;
        this.successfulReduceAttempts += successful;
        this.failedReduceAttempts += failed;
        this.killedReduceAttempts += killed;
        break;
      default:
        throw new IllegalStateException("Task type neither map nor reduce: " + 
            task.getType());
      }
    }
  }
}
 
Example 13
Source File: DefaultSpeculator.java    From big-c with Apache License 2.0 4 votes vote down vote up
private int maybeScheduleASpeculation(TaskType type) {
  int successes = 0;

  long now = clock.getTime();

  ConcurrentMap<JobId, AtomicInteger> containerNeeds
      = type == TaskType.MAP ? mapContainerNeeds : reduceContainerNeeds;

  for (ConcurrentMap.Entry<JobId, AtomicInteger> jobEntry : containerNeeds.entrySet()) {
    // This race conditon is okay.  If we skip a speculation attempt we
    //  should have tried because the event that lowers the number of
    //  containers needed to zero hasn't come through, it will next time.
    // Also, if we miss the fact that the number of containers needed was
    //  zero but increased due to a failure it's not too bad to launch one
    //  container prematurely.
    if (jobEntry.getValue().get() > 0) {
      continue;
    }

    int numberSpeculationsAlready = 0;
    int numberRunningTasks = 0;

    // loop through the tasks of the kind
    Job job = context.getJob(jobEntry.getKey());

    Map<TaskId, Task> tasks = job.getTasks(type);

    int numberAllowedSpeculativeTasks
        = (int) Math.max(minimumAllowedSpeculativeTasks,
            proportionTotalTasksSpeculatable * tasks.size());

    TaskId bestTaskID = null;
    long bestSpeculationValue = -1L;

    // this loop is potentially pricey.
    // TODO track the tasks that are potentially worth looking at
    for (Map.Entry<TaskId, Task> taskEntry : tasks.entrySet()) {
      long mySpeculationValue = speculationValue(taskEntry.getKey(), now);

      if (mySpeculationValue == ALREADY_SPECULATING) {
        ++numberSpeculationsAlready;
      }

      if (mySpeculationValue != NOT_RUNNING) {
        ++numberRunningTasks;
      }

      if (mySpeculationValue > bestSpeculationValue) {
        bestTaskID = taskEntry.getKey();
        bestSpeculationValue = mySpeculationValue;
      }
    }
    numberAllowedSpeculativeTasks
        = (int) Math.max(numberAllowedSpeculativeTasks,
            proportionRunningTasksSpeculatable * numberRunningTasks);

    // If we found a speculation target, fire it off
    if (bestTaskID != null
        && numberAllowedSpeculativeTasks > numberSpeculationsAlready) {
      addSpeculativeAttempt(bestTaskID);
      ++successes;
    }
  }

  return successes;
}
 
Example 14
Source File: TestContainerLauncher.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test(timeout = 15000)
public void testSlowNM() throws Exception {

  conf = new Configuration();
  int maxAttempts = 1;
  conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, maxAttempts);
  conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
  // set timeout low for the test
  conf.setInt("yarn.rpc.nm-command-timeout", 3000);
  conf.set(YarnConfiguration.IPC_RPC_IMPL, HadoopYarnProtoRPC.class.getName());
  YarnRPC rpc = YarnRPC.create(conf);
  String bindAddr = "localhost:0";
  InetSocketAddress addr = NetUtils.createSocketAddr(bindAddr);
  NMTokenSecretManagerInNM tokenSecretManager =
      new NMTokenSecretManagerInNM();
  MasterKey masterKey = Records.newRecord(MasterKey.class);
  masterKey.setBytes(ByteBuffer.wrap("key".getBytes()));
  tokenSecretManager.setMasterKey(masterKey);
  conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION,
    "token");
  server =
      rpc.getServer(ContainerManagementProtocol.class,
        new DummyContainerManager(), addr, conf, tokenSecretManager, 1);
  server.start();

  MRApp app = new MRAppWithSlowNM(tokenSecretManager);

  try {
  Job job = app.submit(conf);
  app.waitForState(job, JobState.RUNNING);

  Map<TaskId, Task> tasks = job.getTasks();
  Assert.assertEquals("Num tasks is not correct", 1, tasks.size());

  Task task = tasks.values().iterator().next();
  app.waitForState(task, TaskState.SCHEDULED);

  Map<TaskAttemptId, TaskAttempt> attempts = tasks.values().iterator()
      .next().getAttempts();
    Assert.assertEquals("Num attempts is not correct", maxAttempts,
        attempts.size());

  TaskAttempt attempt = attempts.values().iterator().next();
    app.waitForInternalState((TaskAttemptImpl) attempt,
        TaskAttemptStateInternal.ASSIGNED);

  app.waitForState(job, JobState.FAILED);

  String diagnostics = attempt.getDiagnostics().toString();
  LOG.info("attempt.getDiagnostics: " + diagnostics);

    Assert.assertTrue(diagnostics.contains("Container launch failed for "
        + "container_0_0000_01_000000 : "));
    Assert
        .assertTrue(diagnostics
            .contains("java.net.SocketTimeoutException: 3000 millis timeout while waiting for channel"));

  } finally {
    server.stop();
  app.stop();
}
}
 
Example 15
Source File: TestKill.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test
public void testKillTaskAttempt() throws Exception {
  final CountDownLatch latch = new CountDownLatch(1);
  MRApp app = new BlockingMRApp(2, 0, latch);
  //this will start the job but job won't complete as Task is blocked
  Job job = app.submit(new Configuration());
  
  //wait and vailidate for Job to become RUNNING
  app.waitForState(job, JobState.RUNNING);
  Map<TaskId,Task> tasks = job.getTasks();
  Assert.assertEquals("No of tasks is not correct", 2, 
      tasks.size());
  Iterator<Task> it = tasks.values().iterator();
  Task task1 = it.next();
  Task task2 = it.next();
  
  //wait for tasks to become running
  app.waitForState(task1, TaskState.SCHEDULED);
  app.waitForState(task2, TaskState.SCHEDULED);
  
  //send the kill signal to the first Task's attempt
  TaskAttempt attempt = task1.getAttempts().values().iterator().next();
  app.getContext().getEventHandler().handle(
        new TaskAttemptEvent(attempt.getID(), TaskAttemptEventType.TA_KILL));
  
  //unblock
  latch.countDown();
  
  //wait and validate for Job to become SUCCEEDED
  //job will still succeed
  app.waitForState(job, JobState.SUCCEEDED);
  
  //first Task will have two attempts 1st is killed, 2nd Succeeds
  //both Tasks and Job succeeds
  Assert.assertEquals("Task state not correct", TaskState.SUCCEEDED, 
      task1.getReport().getTaskState());
  Assert.assertEquals("Task state not correct", TaskState.SUCCEEDED, 
      task2.getReport().getTaskState());
 
  Map<TaskAttemptId, TaskAttempt> attempts = task1.getAttempts();
  Assert.assertEquals("No of attempts is not correct", 2, 
      attempts.size());
  Iterator<TaskAttempt> iter = attempts.values().iterator();
  Assert.assertEquals("Attempt state not correct", TaskAttemptState.KILLED, 
        iter.next().getReport().getTaskAttemptState());
  Assert.assertEquals("Attempt state not correct", TaskAttemptState.SUCCEEDED, 
      iter.next().getReport().getTaskAttemptState());
  
  attempts = task2.getAttempts();
  Assert.assertEquals("No of attempts is not correct", 1, 
      attempts.size());
  iter = attempts.values().iterator();
  Assert.assertEquals("Attempt state not correct", TaskAttemptState.SUCCEEDED, 
        iter.next().getReport().getTaskAttemptState());
}
 
Example 16
Source File: JobInfo.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Go through a job and update the member variables with counts for
 * information to output in the page.
 *
 * @param job
 *          the job to get counts for.
 */
private void countTasksAndAttempts(Job job) {
  numReduces = 0;
  numMaps = 0;
  final Map<TaskId, Task> tasks = job.getTasks();
  if (tasks == null) {
    return;
  }
  for (Task task : tasks.values()) {
    // Attempts counts
    Map<TaskAttemptId, TaskAttempt> attempts = task.getAttempts();
    int successful, failed, killed;
    for (TaskAttempt attempt : attempts.values()) {

      successful = 0;
      failed = 0;
      killed = 0;
      if (TaskAttemptStateUI.NEW.correspondsTo(attempt.getState())) {
        // Do Nothing
      } else if (TaskAttemptStateUI.RUNNING.correspondsTo(attempt.getState())) {
        // Do Nothing
      } else if (TaskAttemptStateUI.SUCCESSFUL.correspondsTo(attempt
          .getState())) {
        ++successful;
      } else if (TaskAttemptStateUI.FAILED.correspondsTo(attempt.getState())) {
        ++failed;
      } else if (TaskAttemptStateUI.KILLED.correspondsTo(attempt.getState())) {
        ++killed;
      }

      switch (task.getType()) {
      case MAP:
        successfulMapAttempts += successful;
        failedMapAttempts += failed;
        killedMapAttempts += killed;
        if (attempt.getState() == TaskAttemptState.SUCCEEDED) {
          numMaps++;
          avgMapTime += (attempt.getFinishTime() - attempt.getLaunchTime());
        }
        break;
      case REDUCE:
        successfulReduceAttempts += successful;
        failedReduceAttempts += failed;
        killedReduceAttempts += killed;
        if (attempt.getState() == TaskAttemptState.SUCCEEDED) {
          numReduces++;
          avgShuffleTime += (attempt.getShuffleFinishTime() - attempt
              .getLaunchTime());
          avgMergeTime += attempt.getSortFinishTime()
              - attempt.getShuffleFinishTime();
          avgReduceTime += (attempt.getFinishTime() - attempt
              .getSortFinishTime());
        }
        break;
      }
    }
  }

  if (numMaps > 0) {
    avgMapTime = avgMapTime / numMaps;
  }

  if (numReduces > 0) {
    avgReduceTime = avgReduceTime / numReduces;
    avgShuffleTime = avgShuffleTime / numReduces;
    avgMergeTime = avgMergeTime / numReduces;
  }
}
 
Example 17
Source File: TestTaskAttempt.java    From hadoop with Apache License 2.0 4 votes vote down vote up
public void verifyMillisCounters(int mapMemMb, int reduceMemMb,
    int minContainerSize) throws Exception {
  Clock actualClock = new SystemClock();
  ControlledClock clock = new ControlledClock(actualClock);
  clock.setTime(10);
  MRApp app =
      new MRApp(1, 1, false, "testSlotMillisCounterUpdate", true, clock);
  Configuration conf = new Configuration();
  conf.setInt(MRJobConfig.MAP_MEMORY_MB, mapMemMb);
  conf.setInt(MRJobConfig.REDUCE_MEMORY_MB, reduceMemMb);
  conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 
    minContainerSize);
  app.setClusterInfo(new ClusterInfo(Resource.newInstance(10240, 1, 0)));

  Job job = app.submit(conf);
  app.waitForState(job, JobState.RUNNING);
  Map<TaskId, Task> tasks = job.getTasks();
  Assert.assertEquals("Num tasks is not correct", 2, tasks.size());
  Iterator<Task> taskIter = tasks.values().iterator();
  Task mTask = taskIter.next();
  app.waitForState(mTask, TaskState.RUNNING);
  Task rTask = taskIter.next();
  app.waitForState(rTask, TaskState.RUNNING);
  Map<TaskAttemptId, TaskAttempt> mAttempts = mTask.getAttempts();
  Assert.assertEquals("Num attempts is not correct", 1, mAttempts.size());
  Map<TaskAttemptId, TaskAttempt> rAttempts = rTask.getAttempts();
  Assert.assertEquals("Num attempts is not correct", 1, rAttempts.size());
  TaskAttempt mta = mAttempts.values().iterator().next();
  TaskAttempt rta = rAttempts.values().iterator().next();
  app.waitForState(mta, TaskAttemptState.RUNNING);
  app.waitForState(rta, TaskAttemptState.RUNNING);

  clock.setTime(11);
  app.getContext()
      .getEventHandler()
      .handle(new TaskAttemptEvent(mta.getID(), TaskAttemptEventType.TA_DONE));
  app.getContext()
      .getEventHandler()
      .handle(new TaskAttemptEvent(rta.getID(), TaskAttemptEventType.TA_DONE));
  app.waitForState(job, JobState.SUCCEEDED);
  Assert.assertEquals(mta.getFinishTime(), 11);
  Assert.assertEquals(mta.getLaunchTime(), 10);
  Assert.assertEquals(rta.getFinishTime(), 11);
  Assert.assertEquals(rta.getLaunchTime(), 10);
  Counters counters = job.getAllCounters();
  Assert.assertEquals((int) Math.ceil((float) mapMemMb / minContainerSize),
      counters.findCounter(JobCounter.SLOTS_MILLIS_MAPS).getValue());
  Assert.assertEquals((int) Math.ceil((float) reduceMemMb / minContainerSize),
      counters.findCounter(JobCounter.SLOTS_MILLIS_REDUCES).getValue());
  Assert.assertEquals(1,
      counters.findCounter(JobCounter.MILLIS_MAPS).getValue());
  Assert.assertEquals(1,
      counters.findCounter(JobCounter.MILLIS_REDUCES).getValue());
  Assert.assertEquals(mapMemMb,
      counters.findCounter(JobCounter.MB_MILLIS_MAPS).getValue());
  Assert.assertEquals(reduceMemMb,
      counters.findCounter(JobCounter.MB_MILLIS_REDUCES).getValue());
  Assert.assertEquals(1,
      counters.findCounter(JobCounter.VCORES_MILLIS_MAPS).getValue());
  Assert.assertEquals(1,
      counters.findCounter(JobCounter.VCORES_MILLIS_REDUCES).getValue());
}
 
Example 18
Source File: TestKill.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test
public void testKillTaskAttempt() throws Exception {
  final CountDownLatch latch = new CountDownLatch(1);
  MRApp app = new BlockingMRApp(2, 0, latch);
  //this will start the job but job won't complete as Task is blocked
  Job job = app.submit(new Configuration());
  
  //wait and vailidate for Job to become RUNNING
  app.waitForState(job, JobState.RUNNING);
  Map<TaskId,Task> tasks = job.getTasks();
  Assert.assertEquals("No of tasks is not correct", 2, 
      tasks.size());
  Iterator<Task> it = tasks.values().iterator();
  Task task1 = it.next();
  Task task2 = it.next();
  
  //wait for tasks to become running
  app.waitForState(task1, TaskState.SCHEDULED);
  app.waitForState(task2, TaskState.SCHEDULED);
  
  //send the kill signal to the first Task's attempt
  TaskAttempt attempt = task1.getAttempts().values().iterator().next();
  app.getContext().getEventHandler().handle(
        new TaskAttemptEvent(attempt.getID(), TaskAttemptEventType.TA_KILL));
  
  //unblock
  latch.countDown();
  
  //wait and validate for Job to become SUCCEEDED
  //job will still succeed
  app.waitForState(job, JobState.SUCCEEDED);
  
  //first Task will have two attempts 1st is killed, 2nd Succeeds
  //both Tasks and Job succeeds
  Assert.assertEquals("Task state not correct", TaskState.SUCCEEDED, 
      task1.getReport().getTaskState());
  Assert.assertEquals("Task state not correct", TaskState.SUCCEEDED, 
      task2.getReport().getTaskState());
 
  Map<TaskAttemptId, TaskAttempt> attempts = task1.getAttempts();
  Assert.assertEquals("No of attempts is not correct", 2, 
      attempts.size());
  Iterator<TaskAttempt> iter = attempts.values().iterator();
  Assert.assertEquals("Attempt state not correct", TaskAttemptState.KILLED, 
        iter.next().getReport().getTaskAttemptState());
  Assert.assertEquals("Attempt state not correct", TaskAttemptState.SUCCEEDED, 
      iter.next().getReport().getTaskAttemptState());
  
  attempts = task2.getAttempts();
  Assert.assertEquals("No of attempts is not correct", 1, 
      attempts.size());
  iter = attempts.values().iterator();
  Assert.assertEquals("Attempt state not correct", TaskAttemptState.SUCCEEDED, 
        iter.next().getReport().getTaskAttemptState());
}
 
Example 19
Source File: TestKill.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test
public void testKillTask() throws Exception {
  final CountDownLatch latch = new CountDownLatch(1);
  MRApp app = new BlockingMRApp(2, 0, latch);
  //this will start the job but job won't complete as Task is blocked
  Job job = app.submit(new Configuration());
  
  //wait and vailidate for Job to become RUNNING
  app.waitForState(job, JobState.RUNNING);
  Map<TaskId,Task> tasks = job.getTasks();
  Assert.assertEquals("No of tasks is not correct", 2, 
      tasks.size());
  Iterator<Task> it = tasks.values().iterator();
  Task task1 = it.next();
  Task task2 = it.next();
  
  //send the kill signal to the first Task
  app.getContext().getEventHandler().handle(
        new TaskEvent(task1.getID(), TaskEventType.T_KILL));
  
  //unblock Task
  latch.countDown();
  
  //wait and validate for Job to become SUCCEEDED
  app.waitForState(job, JobState.SUCCEEDED);
  
  //first Task is killed and second is Succeeded
  //Job is succeeded
  
  Assert.assertEquals("Task state not correct", TaskState.KILLED, 
      task1.getReport().getTaskState());
  Assert.assertEquals("Task state not correct", TaskState.SUCCEEDED, 
      task2.getReport().getTaskState());
  Map<TaskAttemptId, TaskAttempt> attempts = task1.getAttempts();
  Assert.assertEquals("No of attempts is not correct", 1, 
      attempts.size());
  Iterator<TaskAttempt> iter = attempts.values().iterator();
  Assert.assertEquals("Attempt state not correct", TaskAttemptState.KILLED, 
        iter.next().getReport().getTaskAttemptState());

  attempts = task2.getAttempts();
  Assert.assertEquals("No of attempts is not correct", 1, 
      attempts.size());
  iter = attempts.values().iterator();
  Assert.assertEquals("Attempt state not correct", TaskAttemptState.SUCCEEDED, 
        iter.next().getReport().getTaskAttemptState());
}
 
Example 20
Source File: JobInfo.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Go through a job and update the member variables with counts for
 * information to output in the page.
 *
 * @param job
 *          the job to get counts for.
 */
private void countTasksAndAttempts(Job job) {
  final Map<TaskId, Task> tasks = job.getTasks();
  if (tasks == null) {
    return;
  }
  for (Task task : tasks.values()) {
    switch (task.getType()) {
    case MAP:
      // Task counts
      switch (task.getState()) {
      case RUNNING:
        ++this.mapsRunning;
        break;
      case SCHEDULED:
        ++this.mapsPending;
        break;
      default:
        break;
      }
      break;
    case REDUCE:
      // Task counts
      switch (task.getState()) {
      case RUNNING:
        ++this.reducesRunning;
        break;
      case SCHEDULED:
        ++this.reducesPending;
        break;
      default:
        break;
      }
      break;
    default:
      throw new IllegalStateException(
          "Task type is neither map nor reduce: " + task.getType());
    }
    // Attempts counts
    Map<TaskAttemptId, TaskAttempt> attempts = task.getAttempts();
    int newAttempts, running, successful, failed, killed;
    for (TaskAttempt attempt : attempts.values()) {

      newAttempts = 0;
      running = 0;
      successful = 0;
      failed = 0;
      killed = 0;
      if (TaskAttemptStateUI.NEW.correspondsTo(attempt.getState())) {
        ++newAttempts;
      } else if (TaskAttemptStateUI.RUNNING.correspondsTo(attempt.getState())) {
        ++running;
      } else if (TaskAttemptStateUI.SUCCESSFUL.correspondsTo(attempt
          .getState())) {
        ++successful;
      } else if (TaskAttemptStateUI.FAILED.correspondsTo(attempt.getState())) {
        ++failed;
      } else if (TaskAttemptStateUI.KILLED.correspondsTo(attempt.getState())) {
        ++killed;
      }

      switch (task.getType()) {
      case MAP:
        this.newMapAttempts += newAttempts;
        this.runningMapAttempts += running;
        this.successfulMapAttempts += successful;
        this.failedMapAttempts += failed;
        this.killedMapAttempts += killed;
        break;
      case REDUCE:
        this.newReduceAttempts += newAttempts;
        this.runningReduceAttempts += running;
        this.successfulReduceAttempts += successful;
        this.failedReduceAttempts += failed;
        this.killedReduceAttempts += killed;
        break;
      default:
        throw new IllegalStateException("Task type neither map nor reduce: " + 
            task.getType());
      }
    }
  }
}