Java Code Examples for org.apache.hadoop.mapreduce.v2.app.job.Task#getAttempts()

The following examples show how to use org.apache.hadoop.mapreduce.v2.app.job.Task#getAttempts() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AttemptsPage.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Override
protected Collection<TaskAttempt> getTaskAttempts() {
  List<TaskAttempt> fewTaskAttemps = new ArrayList<TaskAttempt>();
  String taskTypeStr = $(TASK_TYPE);
  TaskType taskType = MRApps.taskType(taskTypeStr);
  String attemptStateStr = $(ATTEMPT_STATE);
  TaskAttemptStateUI neededState = MRApps
      .taskAttemptState(attemptStateStr);
  for (Task task : super.app.getJob().getTasks(taskType).values()) {
    Map<TaskAttemptId, TaskAttempt> attempts = task.getAttempts();
    for (TaskAttempt attempt : attempts.values()) {
      if (neededState.correspondsTo(attempt.getState())) {
        fewTaskAttemps.add(attempt);
      }
    }
  }
  return fewTaskAttemps;
}
 
Example 2
Source File: HsAttemptsPage.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Override
protected Collection<TaskAttempt> getTaskAttempts() {
  List<TaskAttempt> fewTaskAttemps = new ArrayList<TaskAttempt>();
  String taskTypeStr = $(TASK_TYPE);
  TaskType taskType = MRApps.taskType(taskTypeStr);
  String attemptStateStr = $(ATTEMPT_STATE);
  TaskAttemptStateUI neededState = MRApps
      .taskAttemptState(attemptStateStr);
  Job j = app.getJob();
  Map<TaskId, Task> tasks = j.getTasks(taskType);
  for (Task task : tasks.values()) {
    Map<TaskAttemptId, TaskAttempt> attempts = task.getAttempts();
    for (TaskAttempt attempt : attempts.values()) {
      if (neededState.correspondsTo(attempt.getState())) {
        fewTaskAttemps.add(attempt);
      }
    }
  }
  return fewTaskAttemps;
}
 
Example 3
Source File: HsAttemptsPage.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Override
protected Collection<TaskAttempt> getTaskAttempts() {
  List<TaskAttempt> fewTaskAttemps = new ArrayList<TaskAttempt>();
  String taskTypeStr = $(TASK_TYPE);
  TaskType taskType = MRApps.taskType(taskTypeStr);
  String attemptStateStr = $(ATTEMPT_STATE);
  TaskAttemptStateUI neededState = MRApps
      .taskAttemptState(attemptStateStr);
  Job j = app.getJob();
  Map<TaskId, Task> tasks = j.getTasks(taskType);
  for (Task task : tasks.values()) {
    Map<TaskAttemptId, TaskAttempt> attempts = task.getAttempts();
    for (TaskAttempt attempt : attempts.values()) {
      if (neededState.correspondsTo(attempt.getState())) {
        fewTaskAttemps.add(attempt);
      }
    }
  }
  return fewTaskAttemps;
}
 
Example 4
Source File: AttemptsPage.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Override
protected Collection<TaskAttempt> getTaskAttempts() {
  List<TaskAttempt> fewTaskAttemps = new ArrayList<TaskAttempt>();
  String taskTypeStr = $(TASK_TYPE);
  TaskType taskType = MRApps.taskType(taskTypeStr);
  String attemptStateStr = $(ATTEMPT_STATE);
  TaskAttemptStateUI neededState = MRApps
      .taskAttemptState(attemptStateStr);
  for (Task task : super.app.getJob().getTasks(taskType).values()) {
    Map<TaskAttemptId, TaskAttempt> attempts = task.getAttempts();
    for (TaskAttempt attempt : attempts.values()) {
      if (neededState.correspondsTo(attempt.getState())) {
        fewTaskAttemps.add(attempt);
      }
    }
  }
  return fewTaskAttemps;
}
 
Example 5
Source File: TestJobHistoryEvents.java    From big-c with Apache License 2.0 5 votes vote down vote up
private void verifyTask(Task task) {
  Assert.assertEquals("Task state not currect", TaskState.SUCCEEDED,
      task.getState());
  Map<TaskAttemptId, TaskAttempt> attempts = task.getAttempts();
  Assert.assertEquals("No of attempts not correct", 1, attempts.size());
  for (TaskAttempt attempt : attempts.values()) {
    verifyAttempt(attempt);
  }
}
 
Example 6
Source File: TestJobHistoryEvents.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private void verifyTask(Task task) {
  Assert.assertEquals("Task state not currect", TaskState.SUCCEEDED,
      task.getState());
  Map<TaskAttemptId, TaskAttempt> attempts = task.getAttempts();
  Assert.assertEquals("No of attempts not correct", 1, attempts.size());
  for (TaskAttempt attempt : attempts.values()) {
    verifyAttempt(attempt);
  }
}
 
Example 7
Source File: DefaultSpeculator.java    From big-c with Apache License 2.0 4 votes vote down vote up
private long speculationValue(TaskId taskID, long now) {
  Job job = context.getJob(taskID.getJobId());
  Task task = job.getTask(taskID);
  Map<TaskAttemptId, TaskAttempt> attempts = task.getAttempts();
  long acceptableRuntime = Long.MIN_VALUE;
  long result = Long.MIN_VALUE;

  if (!mayHaveSpeculated.contains(taskID)) {
    acceptableRuntime = estimator.thresholdRuntime(taskID);
    if (acceptableRuntime == Long.MAX_VALUE) {
      return ON_SCHEDULE;
    }
  }

  TaskAttemptId runningTaskAttemptID = null;

  int numberRunningAttempts = 0;

  for (TaskAttempt taskAttempt : attempts.values()) {
    if (taskAttempt.getState() == TaskAttemptState.RUNNING
        || taskAttempt.getState() == TaskAttemptState.STARTING) {
      if (++numberRunningAttempts > 1) {
        return ALREADY_SPECULATING;
      }
      runningTaskAttemptID = taskAttempt.getID();

      long estimatedRunTime = estimator.estimatedRuntime(runningTaskAttemptID);

      long taskAttemptStartTime
          = estimator.attemptEnrolledTime(runningTaskAttemptID);
      if (taskAttemptStartTime > now) {
        // This background process ran before we could process the task
        //  attempt status change that chronicles the attempt start
        return TOO_NEW;
      }

      long estimatedEndTime = estimatedRunTime + taskAttemptStartTime;

      long estimatedReplacementEndTime
          = now + estimator.estimatedNewAttemptRuntime(taskID);

      float progress = taskAttempt.getProgress();
      TaskAttemptHistoryStatistics data =
          runningTaskAttemptStatistics.get(runningTaskAttemptID);
      if (data == null) {
        runningTaskAttemptStatistics.put(runningTaskAttemptID,
          new TaskAttemptHistoryStatistics(estimatedRunTime, progress, now));
      } else {
        if (estimatedRunTime == data.getEstimatedRunTime()
            && progress == data.getProgress()) {
          // Previous stats are same as same stats
          if (data.notHeartbeatedInAWhile(now)) {
            // Stats have stagnated for a while, simulate heart-beat.
            TaskAttemptStatus taskAttemptStatus = new TaskAttemptStatus();
            taskAttemptStatus.id = runningTaskAttemptID;
            taskAttemptStatus.progress = progress;
            taskAttemptStatus.taskState = taskAttempt.getState();
            // Now simulate the heart-beat
            handleAttempt(taskAttemptStatus);
          }
        } else {
          // Stats have changed - update our data structure
          data.setEstimatedRunTime(estimatedRunTime);
          data.setProgress(progress);
          data.resetHeartBeatTime(now);
        }
      }

      if (estimatedEndTime < now) {
        return PROGRESS_IS_GOOD;
      }

      if (estimatedReplacementEndTime >= estimatedEndTime) {
        return TOO_LATE_TO_SPECULATE;
      }

      result = estimatedEndTime - estimatedReplacementEndTime;
    }
  }

  // If we are here, there's at most one task attempt.
  if (numberRunningAttempts == 0) {
    return NOT_RUNNING;
  }



  if (acceptableRuntime == Long.MIN_VALUE) {
    acceptableRuntime = estimator.thresholdRuntime(taskID);
    if (acceptableRuntime == Long.MAX_VALUE) {
      return ON_SCHEDULE;
    }
  }

  return result;
}
 
Example 8
Source File: JobInfo.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Go through a job and update the member variables with counts for
 * information to output in the page.
 *
 * @param job
 *          the job to get counts for.
 */
private void countTasksAndAttempts(Job job) {
  numReduces = 0;
  numMaps = 0;
  final Map<TaskId, Task> tasks = job.getTasks();
  if (tasks == null) {
    return;
  }
  for (Task task : tasks.values()) {
    // Attempts counts
    Map<TaskAttemptId, TaskAttempt> attempts = task.getAttempts();
    int successful, failed, killed;
    for (TaskAttempt attempt : attempts.values()) {

      successful = 0;
      failed = 0;
      killed = 0;
      if (TaskAttemptStateUI.NEW.correspondsTo(attempt.getState())) {
        // Do Nothing
      } else if (TaskAttemptStateUI.RUNNING.correspondsTo(attempt.getState())) {
        // Do Nothing
      } else if (TaskAttemptStateUI.SUCCESSFUL.correspondsTo(attempt
          .getState())) {
        ++successful;
      } else if (TaskAttemptStateUI.FAILED.correspondsTo(attempt.getState())) {
        ++failed;
      } else if (TaskAttemptStateUI.KILLED.correspondsTo(attempt.getState())) {
        ++killed;
      }

      switch (task.getType()) {
      case MAP:
        successfulMapAttempts += successful;
        failedMapAttempts += failed;
        killedMapAttempts += killed;
        if (attempt.getState() == TaskAttemptState.SUCCEEDED) {
          numMaps++;
          avgMapTime += (attempt.getFinishTime() - attempt.getLaunchTime());
        }
        break;
      case REDUCE:
        successfulReduceAttempts += successful;
        failedReduceAttempts += failed;
        killedReduceAttempts += killed;
        if (attempt.getState() == TaskAttemptState.SUCCEEDED) {
          numReduces++;
          avgShuffleTime += (attempt.getShuffleFinishTime() - attempt
              .getLaunchTime());
          avgMergeTime += attempt.getSortFinishTime()
              - attempt.getShuffleFinishTime();
          avgReduceTime += (attempt.getFinishTime() - attempt
              .getSortFinishTime());
        }
        break;
      }
    }
  }

  if (numMaps > 0) {
    avgMapTime = avgMapTime / numMaps;
  }

  if (numReduces > 0) {
    avgReduceTime = avgReduceTime / numReduces;
    avgShuffleTime = avgShuffleTime / numReduces;
    avgMergeTime = avgMergeTime / numReduces;
  }
}
 
Example 9
Source File: TestTaskAttempt.java    From big-c with Apache License 2.0 4 votes vote down vote up
public void verifyMillisCounters(int mapMemMb, int reduceMemMb,
    int minContainerSize) throws Exception {
  Clock actualClock = new SystemClock();
  ControlledClock clock = new ControlledClock(actualClock);
  clock.setTime(10);
  MRApp app =
      new MRApp(1, 1, false, "testSlotMillisCounterUpdate", true, clock);
  Configuration conf = new Configuration();
  conf.setInt(MRJobConfig.MAP_MEMORY_MB, mapMemMb);
  conf.setInt(MRJobConfig.REDUCE_MEMORY_MB, reduceMemMb);
  conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 
    minContainerSize);
  app.setClusterInfo(new ClusterInfo(Resource.newInstance(10240, 1)));

  Job job = app.submit(conf);
  app.waitForState(job, JobState.RUNNING);
  Map<TaskId, Task> tasks = job.getTasks();
  Assert.assertEquals("Num tasks is not correct", 2, tasks.size());
  Iterator<Task> taskIter = tasks.values().iterator();
  Task mTask = taskIter.next();
  app.waitForState(mTask, TaskState.RUNNING);
  Task rTask = taskIter.next();
  app.waitForState(rTask, TaskState.RUNNING);
  Map<TaskAttemptId, TaskAttempt> mAttempts = mTask.getAttempts();
  Assert.assertEquals("Num attempts is not correct", 1, mAttempts.size());
  Map<TaskAttemptId, TaskAttempt> rAttempts = rTask.getAttempts();
  Assert.assertEquals("Num attempts is not correct", 1, rAttempts.size());
  TaskAttempt mta = mAttempts.values().iterator().next();
  TaskAttempt rta = rAttempts.values().iterator().next();
  app.waitForState(mta, TaskAttemptState.RUNNING);
  app.waitForState(rta, TaskAttemptState.RUNNING);

  clock.setTime(11);
  app.getContext()
      .getEventHandler()
      .handle(new TaskAttemptEvent(mta.getID(), TaskAttemptEventType.TA_DONE));
  app.getContext()
      .getEventHandler()
      .handle(new TaskAttemptEvent(rta.getID(), TaskAttemptEventType.TA_DONE));
  app.waitForState(job, JobState.SUCCEEDED);
  Assert.assertEquals(mta.getFinishTime(), 11);
  Assert.assertEquals(mta.getLaunchTime(), 10);
  Assert.assertEquals(rta.getFinishTime(), 11);
  Assert.assertEquals(rta.getLaunchTime(), 10);
  Counters counters = job.getAllCounters();
  Assert.assertEquals((int) Math.ceil((float) mapMemMb / minContainerSize),
      counters.findCounter(JobCounter.SLOTS_MILLIS_MAPS).getValue());
  Assert.assertEquals((int) Math.ceil((float) reduceMemMb / minContainerSize),
      counters.findCounter(JobCounter.SLOTS_MILLIS_REDUCES).getValue());
  Assert.assertEquals(1,
      counters.findCounter(JobCounter.MILLIS_MAPS).getValue());
  Assert.assertEquals(1,
      counters.findCounter(JobCounter.MILLIS_REDUCES).getValue());
  Assert.assertEquals(mapMemMb,
      counters.findCounter(JobCounter.MB_MILLIS_MAPS).getValue());
  Assert.assertEquals(reduceMemMb,
      counters.findCounter(JobCounter.MB_MILLIS_REDUCES).getValue());
  Assert.assertEquals(1,
      counters.findCounter(JobCounter.VCORES_MILLIS_MAPS).getValue());
  Assert.assertEquals(1,
      counters.findCounter(JobCounter.VCORES_MILLIS_REDUCES).getValue());
}
 
Example 10
Source File: TestKill.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test
public void testKillTaskAttempt() throws Exception {
  final CountDownLatch latch = new CountDownLatch(1);
  MRApp app = new BlockingMRApp(2, 0, latch);
  //this will start the job but job won't complete as Task is blocked
  Job job = app.submit(new Configuration());
  
  //wait and vailidate for Job to become RUNNING
  app.waitForState(job, JobState.RUNNING);
  Map<TaskId,Task> tasks = job.getTasks();
  Assert.assertEquals("No of tasks is not correct", 2, 
      tasks.size());
  Iterator<Task> it = tasks.values().iterator();
  Task task1 = it.next();
  Task task2 = it.next();
  
  //wait for tasks to become running
  app.waitForState(task1, TaskState.SCHEDULED);
  app.waitForState(task2, TaskState.SCHEDULED);
  
  //send the kill signal to the first Task's attempt
  TaskAttempt attempt = task1.getAttempts().values().iterator().next();
  app.getContext().getEventHandler().handle(
        new TaskAttemptEvent(attempt.getID(), TaskAttemptEventType.TA_KILL));
  
  //unblock
  latch.countDown();
  
  //wait and validate for Job to become SUCCEEDED
  //job will still succeed
  app.waitForState(job, JobState.SUCCEEDED);
  
  //first Task will have two attempts 1st is killed, 2nd Succeeds
  //both Tasks and Job succeeds
  Assert.assertEquals("Task state not correct", TaskState.SUCCEEDED, 
      task1.getReport().getTaskState());
  Assert.assertEquals("Task state not correct", TaskState.SUCCEEDED, 
      task2.getReport().getTaskState());
 
  Map<TaskAttemptId, TaskAttempt> attempts = task1.getAttempts();
  Assert.assertEquals("No of attempts is not correct", 2, 
      attempts.size());
  Iterator<TaskAttempt> iter = attempts.values().iterator();
  Assert.assertEquals("Attempt state not correct", TaskAttemptState.KILLED, 
        iter.next().getReport().getTaskAttemptState());
  Assert.assertEquals("Attempt state not correct", TaskAttemptState.SUCCEEDED, 
      iter.next().getReport().getTaskAttemptState());
  
  attempts = task2.getAttempts();
  Assert.assertEquals("No of attempts is not correct", 1, 
      attempts.size());
  iter = attempts.values().iterator();
  Assert.assertEquals("Attempt state not correct", TaskAttemptState.SUCCEEDED, 
        iter.next().getReport().getTaskAttemptState());
}
 
Example 11
Source File: TestKill.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test
public void testKillTask() throws Exception {
  final CountDownLatch latch = new CountDownLatch(1);
  MRApp app = new BlockingMRApp(2, 0, latch);
  //this will start the job but job won't complete as Task is blocked
  Job job = app.submit(new Configuration());
  
  //wait and vailidate for Job to become RUNNING
  app.waitForState(job, JobState.RUNNING);
  Map<TaskId,Task> tasks = job.getTasks();
  Assert.assertEquals("No of tasks is not correct", 2, 
      tasks.size());
  Iterator<Task> it = tasks.values().iterator();
  Task task1 = it.next();
  Task task2 = it.next();
  
  //send the kill signal to the first Task
  app.getContext().getEventHandler().handle(
        new TaskEvent(task1.getID(), TaskEventType.T_KILL));
  
  //unblock Task
  latch.countDown();
  
  //wait and validate for Job to become SUCCEEDED
  app.waitForState(job, JobState.SUCCEEDED);
  
  //first Task is killed and second is Succeeded
  //Job is succeeded
  
  Assert.assertEquals("Task state not correct", TaskState.KILLED, 
      task1.getReport().getTaskState());
  Assert.assertEquals("Task state not correct", TaskState.SUCCEEDED, 
      task2.getReport().getTaskState());
  Map<TaskAttemptId, TaskAttempt> attempts = task1.getAttempts();
  Assert.assertEquals("No of attempts is not correct", 1, 
      attempts.size());
  Iterator<TaskAttempt> iter = attempts.values().iterator();
  Assert.assertEquals("Attempt state not correct", TaskAttemptState.KILLED, 
        iter.next().getReport().getTaskAttemptState());

  attempts = task2.getAttempts();
  Assert.assertEquals("No of attempts is not correct", 1, 
      attempts.size());
  iter = attempts.values().iterator();
  Assert.assertEquals("Attempt state not correct", TaskAttemptState.SUCCEEDED, 
        iter.next().getReport().getTaskAttemptState());
}
 
Example 12
Source File: JobInfo.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Go through a job and update the member variables with counts for
 * information to output in the page.
 *
 * @param job
 *          the job to get counts for.
 */
private void countTasksAndAttempts(Job job) {
  final Map<TaskId, Task> tasks = job.getTasks();
  if (tasks == null) {
    return;
  }
  for (Task task : tasks.values()) {
    switch (task.getType()) {
    case MAP:
      // Task counts
      switch (task.getState()) {
      case RUNNING:
        ++this.mapsRunning;
        break;
      case SCHEDULED:
        ++this.mapsPending;
        break;
      default:
        break;
      }
      break;
    case REDUCE:
      // Task counts
      switch (task.getState()) {
      case RUNNING:
        ++this.reducesRunning;
        break;
      case SCHEDULED:
        ++this.reducesPending;
        break;
      default:
        break;
      }
      break;
    default:
      throw new IllegalStateException(
          "Task type is neither map nor reduce: " + task.getType());
    }
    // Attempts counts
    Map<TaskAttemptId, TaskAttempt> attempts = task.getAttempts();
    int newAttempts, running, successful, failed, killed;
    for (TaskAttempt attempt : attempts.values()) {

      newAttempts = 0;
      running = 0;
      successful = 0;
      failed = 0;
      killed = 0;
      if (TaskAttemptStateUI.NEW.correspondsTo(attempt.getState())) {
        ++newAttempts;
      } else if (TaskAttemptStateUI.RUNNING.correspondsTo(attempt.getState())) {
        ++running;
      } else if (TaskAttemptStateUI.SUCCESSFUL.correspondsTo(attempt
          .getState())) {
        ++successful;
      } else if (TaskAttemptStateUI.FAILED.correspondsTo(attempt.getState())) {
        ++failed;
      } else if (TaskAttemptStateUI.KILLED.correspondsTo(attempt.getState())) {
        ++killed;
      }

      switch (task.getType()) {
      case MAP:
        this.newMapAttempts += newAttempts;
        this.runningMapAttempts += running;
        this.successfulMapAttempts += successful;
        this.failedMapAttempts += failed;
        this.killedMapAttempts += killed;
        break;
      case REDUCE:
        this.newReduceAttempts += newAttempts;
        this.runningReduceAttempts += running;
        this.successfulReduceAttempts += successful;
        this.failedReduceAttempts += failed;
        this.killedReduceAttempts += killed;
        break;
      default:
        throw new IllegalStateException("Task type neither map nor reduce: " + 
            task.getType());
      }
    }
  }
}
 
Example 13
Source File: DefaultSpeculator.java    From hadoop with Apache License 2.0 4 votes vote down vote up
private long speculationValue(TaskId taskID, long now) {
  Job job = context.getJob(taskID.getJobId());
  Task task = job.getTask(taskID);
  Map<TaskAttemptId, TaskAttempt> attempts = task.getAttempts();
  long acceptableRuntime = Long.MIN_VALUE;
  long result = Long.MIN_VALUE;

  if (!mayHaveSpeculated.contains(taskID)) {
    acceptableRuntime = estimator.thresholdRuntime(taskID);
    if (acceptableRuntime == Long.MAX_VALUE) {
      return ON_SCHEDULE;
    }
  }

  TaskAttemptId runningTaskAttemptID = null;

  int numberRunningAttempts = 0;

  for (TaskAttempt taskAttempt : attempts.values()) {
    if (taskAttempt.getState() == TaskAttemptState.RUNNING
        || taskAttempt.getState() == TaskAttemptState.STARTING) {
      if (++numberRunningAttempts > 1) {
        return ALREADY_SPECULATING;
      }
      runningTaskAttemptID = taskAttempt.getID();

      long estimatedRunTime = estimator.estimatedRuntime(runningTaskAttemptID);

      long taskAttemptStartTime
          = estimator.attemptEnrolledTime(runningTaskAttemptID);
      if (taskAttemptStartTime > now) {
        // This background process ran before we could process the task
        //  attempt status change that chronicles the attempt start
        return TOO_NEW;
      }

      long estimatedEndTime = estimatedRunTime + taskAttemptStartTime;

      long estimatedReplacementEndTime
          = now + estimator.estimatedNewAttemptRuntime(taskID);

      float progress = taskAttempt.getProgress();
      TaskAttemptHistoryStatistics data =
          runningTaskAttemptStatistics.get(runningTaskAttemptID);
      if (data == null) {
        runningTaskAttemptStatistics.put(runningTaskAttemptID,
          new TaskAttemptHistoryStatistics(estimatedRunTime, progress, now));
      } else {
        if (estimatedRunTime == data.getEstimatedRunTime()
            && progress == data.getProgress()) {
          // Previous stats are same as same stats
          if (data.notHeartbeatedInAWhile(now)) {
            // Stats have stagnated for a while, simulate heart-beat.
            TaskAttemptStatus taskAttemptStatus = new TaskAttemptStatus();
            taskAttemptStatus.id = runningTaskAttemptID;
            taskAttemptStatus.progress = progress;
            taskAttemptStatus.taskState = taskAttempt.getState();
            // Now simulate the heart-beat
            handleAttempt(taskAttemptStatus);
          }
        } else {
          // Stats have changed - update our data structure
          data.setEstimatedRunTime(estimatedRunTime);
          data.setProgress(progress);
          data.resetHeartBeatTime(now);
        }
      }

      if (estimatedEndTime < now) {
        return PROGRESS_IS_GOOD;
      }

      if (estimatedReplacementEndTime >= estimatedEndTime) {
        return TOO_LATE_TO_SPECULATE;
      }

      result = estimatedEndTime - estimatedReplacementEndTime;
    }
  }

  // If we are here, there's at most one task attempt.
  if (numberRunningAttempts == 0) {
    return NOT_RUNNING;
  }



  if (acceptableRuntime == Long.MIN_VALUE) {
    acceptableRuntime = estimator.thresholdRuntime(taskID);
    if (acceptableRuntime == Long.MAX_VALUE) {
      return ON_SCHEDULE;
    }
  }

  return result;
}
 
Example 14
Source File: JobInfo.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Go through a job and update the member variables with counts for
 * information to output in the page.
 *
 * @param job
 *          the job to get counts for.
 */
private void countTasksAndAttempts(Job job) {
  numReduces = 0;
  numMaps = 0;
  final Map<TaskId, Task> tasks = job.getTasks();
  if (tasks == null) {
    return;
  }
  for (Task task : tasks.values()) {
    // Attempts counts
    Map<TaskAttemptId, TaskAttempt> attempts = task.getAttempts();
    int successful, failed, killed;
    for (TaskAttempt attempt : attempts.values()) {

      successful = 0;
      failed = 0;
      killed = 0;
      if (TaskAttemptStateUI.NEW.correspondsTo(attempt.getState())) {
        // Do Nothing
      } else if (TaskAttemptStateUI.RUNNING.correspondsTo(attempt.getState())) {
        // Do Nothing
      } else if (TaskAttemptStateUI.SUCCESSFUL.correspondsTo(attempt
          .getState())) {
        ++successful;
      } else if (TaskAttemptStateUI.FAILED.correspondsTo(attempt.getState())) {
        ++failed;
      } else if (TaskAttemptStateUI.KILLED.correspondsTo(attempt.getState())) {
        ++killed;
      }

      switch (task.getType()) {
      case MAP:
        successfulMapAttempts += successful;
        failedMapAttempts += failed;
        killedMapAttempts += killed;
        if (attempt.getState() == TaskAttemptState.SUCCEEDED) {
          numMaps++;
          avgMapTime += (attempt.getFinishTime() - attempt.getLaunchTime());
        }
        break;
      case REDUCE:
        successfulReduceAttempts += successful;
        failedReduceAttempts += failed;
        killedReduceAttempts += killed;
        if (attempt.getState() == TaskAttemptState.SUCCEEDED) {
          numReduces++;
          avgShuffleTime += (attempt.getShuffleFinishTime() - attempt
              .getLaunchTime());
          avgMergeTime += attempt.getSortFinishTime()
              - attempt.getShuffleFinishTime();
          avgReduceTime += (attempt.getFinishTime() - attempt
              .getSortFinishTime());
        }
        break;
      }
    }
  }

  if (numMaps > 0) {
    avgMapTime = avgMapTime / numMaps;
  }

  if (numReduces > 0) {
    avgReduceTime = avgReduceTime / numReduces;
    avgShuffleTime = avgShuffleTime / numReduces;
    avgMergeTime = avgMergeTime / numReduces;
  }
}
 
Example 15
Source File: TestTaskAttempt.java    From hadoop with Apache License 2.0 4 votes vote down vote up
public void verifyMillisCounters(int mapMemMb, int reduceMemMb,
    int minContainerSize) throws Exception {
  Clock actualClock = new SystemClock();
  ControlledClock clock = new ControlledClock(actualClock);
  clock.setTime(10);
  MRApp app =
      new MRApp(1, 1, false, "testSlotMillisCounterUpdate", true, clock);
  Configuration conf = new Configuration();
  conf.setInt(MRJobConfig.MAP_MEMORY_MB, mapMemMb);
  conf.setInt(MRJobConfig.REDUCE_MEMORY_MB, reduceMemMb);
  conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 
    minContainerSize);
  app.setClusterInfo(new ClusterInfo(Resource.newInstance(10240, 1, 0)));

  Job job = app.submit(conf);
  app.waitForState(job, JobState.RUNNING);
  Map<TaskId, Task> tasks = job.getTasks();
  Assert.assertEquals("Num tasks is not correct", 2, tasks.size());
  Iterator<Task> taskIter = tasks.values().iterator();
  Task mTask = taskIter.next();
  app.waitForState(mTask, TaskState.RUNNING);
  Task rTask = taskIter.next();
  app.waitForState(rTask, TaskState.RUNNING);
  Map<TaskAttemptId, TaskAttempt> mAttempts = mTask.getAttempts();
  Assert.assertEquals("Num attempts is not correct", 1, mAttempts.size());
  Map<TaskAttemptId, TaskAttempt> rAttempts = rTask.getAttempts();
  Assert.assertEquals("Num attempts is not correct", 1, rAttempts.size());
  TaskAttempt mta = mAttempts.values().iterator().next();
  TaskAttempt rta = rAttempts.values().iterator().next();
  app.waitForState(mta, TaskAttemptState.RUNNING);
  app.waitForState(rta, TaskAttemptState.RUNNING);

  clock.setTime(11);
  app.getContext()
      .getEventHandler()
      .handle(new TaskAttemptEvent(mta.getID(), TaskAttemptEventType.TA_DONE));
  app.getContext()
      .getEventHandler()
      .handle(new TaskAttemptEvent(rta.getID(), TaskAttemptEventType.TA_DONE));
  app.waitForState(job, JobState.SUCCEEDED);
  Assert.assertEquals(mta.getFinishTime(), 11);
  Assert.assertEquals(mta.getLaunchTime(), 10);
  Assert.assertEquals(rta.getFinishTime(), 11);
  Assert.assertEquals(rta.getLaunchTime(), 10);
  Counters counters = job.getAllCounters();
  Assert.assertEquals((int) Math.ceil((float) mapMemMb / minContainerSize),
      counters.findCounter(JobCounter.SLOTS_MILLIS_MAPS).getValue());
  Assert.assertEquals((int) Math.ceil((float) reduceMemMb / minContainerSize),
      counters.findCounter(JobCounter.SLOTS_MILLIS_REDUCES).getValue());
  Assert.assertEquals(1,
      counters.findCounter(JobCounter.MILLIS_MAPS).getValue());
  Assert.assertEquals(1,
      counters.findCounter(JobCounter.MILLIS_REDUCES).getValue());
  Assert.assertEquals(mapMemMb,
      counters.findCounter(JobCounter.MB_MILLIS_MAPS).getValue());
  Assert.assertEquals(reduceMemMb,
      counters.findCounter(JobCounter.MB_MILLIS_REDUCES).getValue());
  Assert.assertEquals(1,
      counters.findCounter(JobCounter.VCORES_MILLIS_MAPS).getValue());
  Assert.assertEquals(1,
      counters.findCounter(JobCounter.VCORES_MILLIS_REDUCES).getValue());
}
 
Example 16
Source File: TestKill.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test
public void testKillTaskAttempt() throws Exception {
  final CountDownLatch latch = new CountDownLatch(1);
  MRApp app = new BlockingMRApp(2, 0, latch);
  //this will start the job but job won't complete as Task is blocked
  Job job = app.submit(new Configuration());
  
  //wait and vailidate for Job to become RUNNING
  app.waitForState(job, JobState.RUNNING);
  Map<TaskId,Task> tasks = job.getTasks();
  Assert.assertEquals("No of tasks is not correct", 2, 
      tasks.size());
  Iterator<Task> it = tasks.values().iterator();
  Task task1 = it.next();
  Task task2 = it.next();
  
  //wait for tasks to become running
  app.waitForState(task1, TaskState.SCHEDULED);
  app.waitForState(task2, TaskState.SCHEDULED);
  
  //send the kill signal to the first Task's attempt
  TaskAttempt attempt = task1.getAttempts().values().iterator().next();
  app.getContext().getEventHandler().handle(
        new TaskAttemptEvent(attempt.getID(), TaskAttemptEventType.TA_KILL));
  
  //unblock
  latch.countDown();
  
  //wait and validate for Job to become SUCCEEDED
  //job will still succeed
  app.waitForState(job, JobState.SUCCEEDED);
  
  //first Task will have two attempts 1st is killed, 2nd Succeeds
  //both Tasks and Job succeeds
  Assert.assertEquals("Task state not correct", TaskState.SUCCEEDED, 
      task1.getReport().getTaskState());
  Assert.assertEquals("Task state not correct", TaskState.SUCCEEDED, 
      task2.getReport().getTaskState());
 
  Map<TaskAttemptId, TaskAttempt> attempts = task1.getAttempts();
  Assert.assertEquals("No of attempts is not correct", 2, 
      attempts.size());
  Iterator<TaskAttempt> iter = attempts.values().iterator();
  Assert.assertEquals("Attempt state not correct", TaskAttemptState.KILLED, 
        iter.next().getReport().getTaskAttemptState());
  Assert.assertEquals("Attempt state not correct", TaskAttemptState.SUCCEEDED, 
      iter.next().getReport().getTaskAttemptState());
  
  attempts = task2.getAttempts();
  Assert.assertEquals("No of attempts is not correct", 1, 
      attempts.size());
  iter = attempts.values().iterator();
  Assert.assertEquals("Attempt state not correct", TaskAttemptState.SUCCEEDED, 
        iter.next().getReport().getTaskAttemptState());
}
 
Example 17
Source File: TestKill.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test
public void testKillTask() throws Exception {
  final CountDownLatch latch = new CountDownLatch(1);
  MRApp app = new BlockingMRApp(2, 0, latch);
  //this will start the job but job won't complete as Task is blocked
  Job job = app.submit(new Configuration());
  
  //wait and vailidate for Job to become RUNNING
  app.waitForState(job, JobState.RUNNING);
  Map<TaskId,Task> tasks = job.getTasks();
  Assert.assertEquals("No of tasks is not correct", 2, 
      tasks.size());
  Iterator<Task> it = tasks.values().iterator();
  Task task1 = it.next();
  Task task2 = it.next();
  
  //send the kill signal to the first Task
  app.getContext().getEventHandler().handle(
        new TaskEvent(task1.getID(), TaskEventType.T_KILL));
  
  //unblock Task
  latch.countDown();
  
  //wait and validate for Job to become SUCCEEDED
  app.waitForState(job, JobState.SUCCEEDED);
  
  //first Task is killed and second is Succeeded
  //Job is succeeded
  
  Assert.assertEquals("Task state not correct", TaskState.KILLED, 
      task1.getReport().getTaskState());
  Assert.assertEquals("Task state not correct", TaskState.SUCCEEDED, 
      task2.getReport().getTaskState());
  Map<TaskAttemptId, TaskAttempt> attempts = task1.getAttempts();
  Assert.assertEquals("No of attempts is not correct", 1, 
      attempts.size());
  Iterator<TaskAttempt> iter = attempts.values().iterator();
  Assert.assertEquals("Attempt state not correct", TaskAttemptState.KILLED, 
        iter.next().getReport().getTaskAttemptState());

  attempts = task2.getAttempts();
  Assert.assertEquals("No of attempts is not correct", 1, 
      attempts.size());
  iter = attempts.values().iterator();
  Assert.assertEquals("Attempt state not correct", TaskAttemptState.SUCCEEDED, 
        iter.next().getReport().getTaskAttemptState());
}
 
Example 18
Source File: JobInfo.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Go through a job and update the member variables with counts for
 * information to output in the page.
 *
 * @param job
 *          the job to get counts for.
 */
private void countTasksAndAttempts(Job job) {
  final Map<TaskId, Task> tasks = job.getTasks();
  if (tasks == null) {
    return;
  }
  for (Task task : tasks.values()) {
    switch (task.getType()) {
    case MAP:
      // Task counts
      switch (task.getState()) {
      case RUNNING:
        ++this.mapsRunning;
        break;
      case SCHEDULED:
        ++this.mapsPending;
        break;
      default:
        break;
      }
      break;
    case REDUCE:
      // Task counts
      switch (task.getState()) {
      case RUNNING:
        ++this.reducesRunning;
        break;
      case SCHEDULED:
        ++this.reducesPending;
        break;
      default:
        break;
      }
      break;
    default:
      throw new IllegalStateException(
          "Task type is neither map nor reduce: " + task.getType());
    }
    // Attempts counts
    Map<TaskAttemptId, TaskAttempt> attempts = task.getAttempts();
    int newAttempts, running, successful, failed, killed;
    for (TaskAttempt attempt : attempts.values()) {

      newAttempts = 0;
      running = 0;
      successful = 0;
      failed = 0;
      killed = 0;
      if (TaskAttemptStateUI.NEW.correspondsTo(attempt.getState())) {
        ++newAttempts;
      } else if (TaskAttemptStateUI.RUNNING.correspondsTo(attempt.getState())) {
        ++running;
      } else if (TaskAttemptStateUI.SUCCESSFUL.correspondsTo(attempt
          .getState())) {
        ++successful;
      } else if (TaskAttemptStateUI.FAILED.correspondsTo(attempt.getState())) {
        ++failed;
      } else if (TaskAttemptStateUI.KILLED.correspondsTo(attempt.getState())) {
        ++killed;
      }

      switch (task.getType()) {
      case MAP:
        this.newMapAttempts += newAttempts;
        this.runningMapAttempts += running;
        this.successfulMapAttempts += successful;
        this.failedMapAttempts += failed;
        this.killedMapAttempts += killed;
        break;
      case REDUCE:
        this.newReduceAttempts += newAttempts;
        this.runningReduceAttempts += running;
        this.successfulReduceAttempts += successful;
        this.failedReduceAttempts += failed;
        this.killedReduceAttempts += killed;
        break;
      default:
        throw new IllegalStateException("Task type neither map nor reduce: " + 
            task.getType());
      }
    }
  }
}