/***********************************************************************
 	hadoop-gpu
	Authors: Koichi Shirahata, Hitoshi Sato, Satoshi Matsuoka

This software is licensed under Apache License, Version 2.0 (the  "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-------------------------------------------------------------------------
File: TaskInProgress.java
Version: 0.20.1
***********************************************************************/

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.mapred;


import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.TreeSet;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.mapred.JobClient.RawSplit;
import org.apache.hadoop.mapred.SortedRanges.Range;
import org.apache.hadoop.net.Node;


/*************************************************************
 * TaskInProgress maintains all the info needed for a
 * Task in the lifetime of its owning Job.  A given Task
 * might be speculatively executed or reexecuted, so we
 * need a level of indirection above the running-id itself.
 * <br>
 * A given TaskInProgress contains multiple taskids,
 * 0 or more of which might be executing at any one time.
 * (That's what allows speculative execution.)  A taskid
 * is now *never* recycled.  A TIP allocates enough taskids
 * to account for all the speculation and failures it will
 * ever have to handle.  Once those are up, the TIP is dead.
 * **************************************************************
 */
class TaskInProgress {
  static final int MAX_TASK_EXECS = 1;
  int maxTaskAttempts = 4;    
  static final double SPECULATIVE_GAP = 0.2;
  static final long SPECULATIVE_LAG = 60 * 1000;
  private static final int NUM_ATTEMPTS_PER_RESTART = 1000;

  public static final Log LOG = LogFactory.getLog(TaskInProgress.class);

  // Defines the TIP
  private String jobFile = null;
  private RawSplit rawSplit;
  private int numMaps;
  private int partition;
  private JobTracker jobtracker;
  private TaskID id;
  private JobInProgress job;

  // Status of the TIP
  private int successEventNumber = -1;
  private int numTaskFailures = 0;
  private int numKilledTasks = 0;
  private double progress = 0;
  private String state = "";
  private long startTime = 0;
  private long execStartTime = 0;
  private long execFinishTime = 0;
  private int completes = 0;
  private boolean failed = false;
  private boolean killed = false;
  private long maxSkipRecords = 0;
  private FailedRanges failedRanges = new FailedRanges();
  private volatile boolean skipping = false;
  private boolean jobCleanup = false; 
  private boolean jobSetup = false;
   
  // The 'next' usable taskid of this tip
  int nextTaskId = 0;
    
  // The taskid that took this TIP to SUCCESS
  private TaskAttemptID successfulTaskId;

  // The first taskid of this tip
  private TaskAttemptID firstTaskId;
  
  // Map from task Id -> TaskTracker Id, contains tasks that are
  // currently runnings
  private TreeMap<TaskAttemptID, String> activeTasks = new TreeMap<TaskAttemptID, String>();
  // All attempt Ids of this TIP
  private TreeSet<TaskAttemptID> tasks = new TreeSet<TaskAttemptID>();
  private JobConf conf;
  private Map<TaskAttemptID,List<String>> taskDiagnosticData =
    new TreeMap<TaskAttemptID,List<String>>();
  /**
   * Map from taskId -> TaskStatus
   */
  private TreeMap<TaskAttemptID,TaskStatus> taskStatuses = 
    new TreeMap<TaskAttemptID,TaskStatus>();

  // Map from taskId -> TaskTracker Id, 
  // contains cleanup attempts and where they ran, if any
  private TreeMap<TaskAttemptID, String> cleanupTasks =
    new TreeMap<TaskAttemptID, String>();

  private TreeSet<String> machinesWhereFailed = new TreeSet<String>();
  private TreeSet<TaskAttemptID> tasksReportedClosed = new TreeSet<TaskAttemptID>();
  
  //list of tasks to kill, <taskid> -> <shouldFail> 
  private TreeMap<TaskAttemptID, Boolean> tasksToKill = new TreeMap<TaskAttemptID, Boolean>();
  
  //task to commit, <taskattemptid>  
  private TaskAttemptID taskToCommit;
  
  private Counters counters = new Counters();
  

  /**
   * Constructor for MapTask
   */
  public TaskInProgress(JobID jobid, String jobFile, 
                        RawSplit rawSplit, 
                        JobTracker jobtracker, JobConf conf, 
                        JobInProgress job, int partition) {
    this.jobFile = jobFile;
    this.rawSplit = rawSplit;
    this.jobtracker = jobtracker;
    this.job = job;
    this.conf = conf;
    this.partition = partition;
    this.maxSkipRecords = SkipBadRecords.getMapperMaxSkipRecords(conf);
    setMaxTaskAttempts();
    init(jobid);
  }
        
  /**
   * Constructor for ReduceTask
   */
  public TaskInProgress(JobID jobid, String jobFile, 
                        int numMaps, 
                        int partition, JobTracker jobtracker, JobConf conf,
                        JobInProgress job) {
    this.jobFile = jobFile;
    this.numMaps = numMaps;
    this.partition = partition;
    this.jobtracker = jobtracker;
    this.job = job;
    this.conf = conf;
    this.maxSkipRecords = SkipBadRecords.getReducerMaxSkipGroups(conf);
    setMaxTaskAttempts();
    init(jobid);
  }
  
  /**
   * Set the max number of attempts before we declare a TIP as "failed"
   */
  private void setMaxTaskAttempts() {
    if (isMapTask()) {
      this.maxTaskAttempts = conf.getMaxMapAttempts();
    } else {
      this.maxTaskAttempts = conf.getMaxReduceAttempts();
    }
  }
    
  /**
   * Return the index of the tip within the job, so 
   * "task_200707121733_1313_0002_m_012345" would return 12345;
   * @return int the tip index
   */
  public int idWithinJob() {
    return partition;
  }    

  public boolean isJobCleanupTask() {
   return jobCleanup;
  }
  
  public void setJobCleanupTask() {
    jobCleanup = true;
  }

  public boolean isJobSetupTask() {
    return jobSetup;
  }
	  
  public void setJobSetupTask() {
    jobSetup = true;
  }

  public boolean isOnlyCommitPending() {
    for (TaskStatus t : taskStatuses.values()) {
      if (t.getRunState() == TaskStatus.State.COMMIT_PENDING) {
        return true;
      }
    }
    return false;
  }
 
  public boolean isCommitPending(TaskAttemptID taskId) {
    TaskStatus t = taskStatuses.get(taskId);
    if (t == null) {
      return false;
    }
    return t.getRunState() ==  TaskStatus.State.COMMIT_PENDING;
  }
  
  /**
   * Initialization common to Map and Reduce
   */
  void init(JobID jobId) {
    this.startTime = System.currentTimeMillis();
    this.id = new TaskID(jobId, isMapTask(), partition);
    this.skipping = startSkipping();
  }

  ////////////////////////////////////
  // Accessors, info, profiles, etc.
  ////////////////////////////////////

  /**
   * Return the start time
   */
  public long getStartTime() {
    return startTime;
  }
  
  /**
   * Return the exec start time
   */
  public long getExecStartTime() {
    return execStartTime;
  }
  
  /**
   * Set the exec start time
   */
  public void setExecStartTime(long startTime) {
    execStartTime = startTime;
  }
  
  /**
   * Return the exec finish time
   */
  public long getExecFinishTime() {
    return execFinishTime;
  }

  /**
   * Set the exec finish time
   */
  public void setExecFinishTime(long finishTime) {
    execFinishTime = finishTime;
    JobHistory.Task.logUpdates(id, execFinishTime); // log the update
  }
  
  /**
   * Return the parent job
   */
  public JobInProgress getJob() {
    return job;
  }
  /**
   * Return an ID for this task, not its component taskid-threads
   */
  public TaskID getTIPId() {
    return this.id;
  }
  /**
   * Whether this is a map task
   */
  public boolean isMapTask() {
    return rawSplit != null;
  }
    
  /**
   * Is the Task associated with taskid is the first attempt of the tip? 
   * @param taskId
   * @return Returns true if the Task is the first attempt of the tip
   */  
  public boolean isFirstAttempt(TaskAttemptID taskId) {
    return firstTaskId == null ? false : firstTaskId.equals(taskId); 
  }
  
  /**
   * Is this tip currently running any tasks?
   * @return true if any tasks are running
   */
  public boolean isRunning() {
    return !activeTasks.isEmpty();
  }
    
  TaskAttemptID getSuccessfulTaskid() {
    return successfulTaskId;
  }
  
  private void setSuccessfulTaskid(TaskAttemptID successfulTaskId) {
    this.successfulTaskId = successfulTaskId; 
  }
  
  private void resetSuccessfulTaskid() {
    this.successfulTaskId = null; 
  }
  
  /**
   * Is this tip complete?
   * 
   * @return <code>true</code> if the tip is complete, else <code>false</code>
   */
  public synchronized boolean isComplete() {
    return (completes > 0);
  }

  /**
   * Is the given taskid the one that took this tip to completion?
   * 
   * @param taskid taskid of attempt to check for completion
   * @return <code>true</code> if taskid is complete, else <code>false</code>
   */
  public boolean isComplete(TaskAttemptID taskid) {
    return ((completes > 0) 
            && taskid.equals(getSuccessfulTaskid()));
  }

  /**
   * Is the tip a failure?
   * 
   * @return <code>true</code> if tip has failed, else <code>false</code>
   */
  public boolean isFailed() {
    return failed;
  }

  /**
   * Number of times the TaskInProgress has failed.
   */
  public int numTaskFailures() {
    return numTaskFailures;
  }

  /**
   * Number of times the TaskInProgress has been killed by the framework.
   */
  public int numKilledTasks() {
    return numKilledTasks;
  }

  /**
   * Get the overall progress (from 0 to 1.0) for this TIP
   */
  public double getProgress() {
    return progress;
  }
    
  /**
   * Get the task's counters
   */
  public Counters getCounters() {
    return counters;
  }

  /**
   * Returns whether a component task-thread should be 
   * closed because the containing JobInProgress has completed
   * or the task is killed by the user
   */
  public boolean shouldClose(TaskAttemptID taskid) {
    /**
     * If the task hasn't been closed yet, and it belongs to a completed
     * TaskInProgress close it.
     * 
     * However, for completed map tasks we do not close the task which
     * actually was the one responsible for _completing_ the TaskInProgress. 
     */
    boolean close = false;
    TaskStatus ts = taskStatuses.get(taskid);
    if ((ts != null) &&
        (!tasksReportedClosed.contains(taskid)) &&
        ((this.failed) ||
        ((job.getStatus().getRunState() != JobStatus.RUNNING &&
         (job.getStatus().getRunState() != JobStatus.PREP))))) {
      tasksReportedClosed.add(taskid);
      close = true;
    } else if (isComplete() && 
               !(isMapTask() && !jobSetup && 
                   !jobCleanup && isComplete(taskid)) &&
               !tasksReportedClosed.contains(taskid)) {
      tasksReportedClosed.add(taskid);
      close = true; 
    } else if (isCommitPending(taskid) && !shouldCommit(taskid) &&
               !tasksReportedClosed.contains(taskid)) {
      tasksReportedClosed.add(taskid);
      close = true; 
    } else {
      close = tasksToKill.keySet().contains(taskid);
    }   
    return close;
  }

  /**
   * Commit this task attempt for the tip. 
   * @param taskid
   */
  public void doCommit(TaskAttemptID taskid) {
    taskToCommit = taskid;
  }

  /**
   * Returns whether the task attempt should be committed or not 
   */
  public boolean shouldCommit(TaskAttemptID taskid) {
    return !isComplete() && isCommitPending(taskid) && 
           taskToCommit.equals(taskid);
  }

  /**
   * Creates a "status report" for this task.  Includes the
   * task ID and overall status, plus reports for all the
   * component task-threads that have ever been started.
   */
  synchronized TaskReport generateSingleReport() {
    ArrayList<String> diagnostics = new ArrayList<String>();
    for (List<String> l : taskDiagnosticData.values()) {
      diagnostics.addAll(l);
    }
    TIPStatus currentStatus = null;
    if (isRunning() && !isComplete()) {
      currentStatus = TIPStatus.RUNNING;
    } else if (isComplete()) {
      currentStatus = TIPStatus.COMPLETE;
    } else if (wasKilled()) {
      currentStatus = TIPStatus.KILLED;
    } else if (isFailed()) {
      currentStatus = TIPStatus.FAILED;
    } else if (!(isComplete() || isRunning() || wasKilled())) {
      currentStatus = TIPStatus.PENDING;
    }

    TaskReport report = new TaskReport
    (getTIPId(), (float)progress, state,
     diagnostics.toArray(new String[diagnostics.size()]),
     currentStatus, execStartTime, execFinishTime, counters, false); 
    if(firstTaskId != null && jobtracker != null && jobtracker.getTaskStatus(firstTaskId) != null) {
//    	LOG.info("AAAA " + jobtracker.getTaskStatus(firstTaskId).runOnGPU());
    	report.setRunOnGPU(jobtracker.getTaskStatus(firstTaskId).runOnGPU());
    }
    
    if (currentStatus == TIPStatus.RUNNING) {
      report.setRunningTaskAttempts(activeTasks.keySet());
    } else if (currentStatus == TIPStatus.COMPLETE) {
      report.setSuccessfulAttempt(getSuccessfulTaskid());
    }
    
    return report;
  }

  /**
   * Get the diagnostic messages for a given task within this tip.
   * 
   * @param taskId the id of the required task
   * @return the list of diagnostics for that task
   */
  synchronized List<String> getDiagnosticInfo(TaskAttemptID taskId) {
    return taskDiagnosticData.get(taskId);
  }
    
  ////////////////////////////////////////////////
  // Update methods, usually invoked by the owning
  // job.
  ////////////////////////////////////////////////
  
  /**
   * Save diagnostic information for a given task.
   * 
   * @param taskId id of the task 
   * @param diagInfo diagnostic information for the task
   */
  public void addDiagnosticInfo(TaskAttemptID taskId, String diagInfo) {
    List<String> diagHistory = taskDiagnosticData.get(taskId);
    if (diagHistory == null) {
      diagHistory = new ArrayList<String>();
      taskDiagnosticData.put(taskId, diagHistory);
    }
    diagHistory.add(diagInfo);
  }
  
  /**
   * A status message from a client has arrived.
   * It updates the status of a single component-thread-task,
   * which might result in an overall TaskInProgress status update.
   * @return has the task changed its state noticably?
   */
  synchronized boolean updateStatus(TaskStatus status) {
    TaskAttemptID taskid = status.getTaskID();
    String diagInfo = status.getDiagnosticInfo();
    TaskStatus oldStatus = taskStatuses.get(taskid);
    boolean changed = true;
    if (diagInfo != null && diagInfo.length() > 0) {
      LOG.info("Error from "+taskid+": "+diagInfo);
      addDiagnosticInfo(taskid, diagInfo);
    }
    
    if(skipping) {
      failedRanges.updateState(status);
    }
    
    if (oldStatus != null) {
      TaskStatus.State oldState = oldStatus.getRunState();
      TaskStatus.State newState = status.getRunState();
          
      // We should never recieve a duplicate success/failure/killed
      // status update for the same taskid! This is a safety check, 
      // and is addressed better at the TaskTracker to ensure this.
      // @see {@link TaskTracker.transmitHeartbeat()}
      if ((newState != TaskStatus.State.RUNNING && 
           newState != TaskStatus.State.COMMIT_PENDING && 
           newState != TaskStatus.State.FAILED_UNCLEAN && 
           newState != TaskStatus.State.KILLED_UNCLEAN && 
           newState != TaskStatus.State.UNASSIGNED) && 
          (oldState == newState)) {
        LOG.warn("Recieved duplicate status update of '" + newState + 
                 "' for '" + taskid + "' of TIP '" + getTIPId() + "'");
        return false;
      }

      // The task is not allowed to move from completed back to running.
      // We have seen out of order status messagesmoving tasks from complete
      // to running. This is a spot fix, but it should be addressed more
      // globally.
      if ((newState == TaskStatus.State.RUNNING || 
          newState == TaskStatus.State.UNASSIGNED) &&
          (oldState == TaskStatus.State.FAILED || 
           oldState == TaskStatus.State.KILLED || 
           oldState == TaskStatus.State.FAILED_UNCLEAN || 
           oldState == TaskStatus.State.KILLED_UNCLEAN || 
           oldState == TaskStatus.State.SUCCEEDED ||
           oldState == TaskStatus.State.COMMIT_PENDING)) {
        return false;
      }
      
      //Do not accept any status once the task is marked FAILED/KILLED
      //This is to handle the case of the JobTracker timing out a task
      //due to launch delay, but the TT comes back with any state or 
      //TT got expired
      if (oldState == TaskStatus.State.FAILED ||
          oldState == TaskStatus.State.KILLED) {
        tasksToKill.put(taskid, true);
        return false;	  
      }
          
      changed = oldState != newState;
    }
    // if task is a cleanup attempt, do not replace the complete status,
    // update only specific fields.
    // For example, startTime should not be updated, 
    // but finishTime has to be updated.
    if (!isCleanupAttempt(taskid)) {
      taskStatuses.put(taskid, status);
    } else {
      taskStatuses.get(taskid).statusUpdate(status.getRunState(),
        status.getProgress(), status.getStateString(), status.getPhase(),
        status.getFinishTime());
    }

    // Recompute progress
    recomputeProgress();
    return changed;
  }

  /**
   * Indicate that one of the taskids in this TaskInProgress
   * has failed.
   */
  public void incompleteSubTask(TaskAttemptID taskid, 
                                JobStatus jobStatus) {
    //
    // Note the failure and its location
    //
    TaskStatus status = taskStatuses.get(taskid);
    String trackerName;
    String trackerHostName = null;
    TaskStatus.State taskState = TaskStatus.State.FAILED;
    if (status != null) {
      trackerName = status.getTaskTracker();
      trackerHostName = 
        JobInProgress.convertTrackerNameToHostName(trackerName);
      // Check if the user manually KILLED/FAILED this task-attempt...
      Boolean shouldFail = tasksToKill.remove(taskid);
      if (shouldFail != null) {
        if (status.getRunState() == TaskStatus.State.FAILED ||
            status.getRunState() == TaskStatus.State.KILLED) {
          taskState = (shouldFail) ? TaskStatus.State.FAILED :
                                     TaskStatus.State.KILLED;
        } else {
          taskState = (shouldFail) ? TaskStatus.State.FAILED_UNCLEAN :
                                     TaskStatus.State.KILLED_UNCLEAN;
          
        }
        status.setRunState(taskState);
        addDiagnosticInfo(taskid, "Task has been " + taskState + " by the user" );
      }
 
      taskState = status.getRunState();
      if (taskState != TaskStatus.State.FAILED && 
          taskState != TaskStatus.State.KILLED &&
          taskState != TaskStatus.State.FAILED_UNCLEAN &&
          taskState != TaskStatus.State.KILLED_UNCLEAN) {
        LOG.info("Task '" + taskid + "' running on '" + trackerName + 
                "' in state: '" + taskState + "' being failed!");
        status.setRunState(TaskStatus.State.FAILED);
        taskState = TaskStatus.State.FAILED;
      }

      // tasktracker went down and failed time was not reported. 
      if (0 == status.getFinishTime()){
        status.setFinishTime(System.currentTimeMillis());
      }
    }

    this.activeTasks.remove(taskid);
    
    // Since we do not fail completed reduces (whose outputs go to hdfs), we 
    // should note this failure only for completed maps, only if this taskid;
    // completed this map. however if the job is done, there is no need to 
    // manipulate completed maps
    if (this.isMapTask() && !jobSetup && !jobCleanup && isComplete(taskid) && 
        jobStatus.getRunState() != JobStatus.SUCCEEDED) {
      this.completes--;
      
      // Reset the successfulTaskId since we don't have a SUCCESSFUL task now
      resetSuccessfulTaskid();
    }

    // Note that there can be failures of tasks that are hosted on a machine 
    // that has not yet registered with restarted jobtracker
    // recalculate the counts only if its a genuine failure
    if (tasks.contains(taskid)) {
      if (taskState == TaskStatus.State.FAILED) {
        numTaskFailures++;
        machinesWhereFailed.add(trackerHostName);
        if(maxSkipRecords>0) {
          //skipping feature enabled
          LOG.debug("TaskInProgress adding" + status.getNextRecordRange());
          failedRanges.add(status.getNextRecordRange());
          skipping = startSkipping();
        }

      } else if (taskState == TaskStatus.State.KILLED) {
        numKilledTasks++;
      }
    }

    if (numTaskFailures >= maxTaskAttempts) {
      LOG.info("TaskInProgress " + getTIPId() + " has failed " + numTaskFailures + " times.");
      kill();
    }
  }
  
  /**
   * Get whether to start skipping mode. 
   */
  private boolean startSkipping() {
    if(maxSkipRecords>0 && 
        numTaskFailures>=SkipBadRecords.getAttemptsToStartSkipping(conf)) {
      return true;
    }
    return false;
  }

  /**
   * Finalize the <b>completed</b> task; note that this might not be the first 
   * task-attempt of the {@link TaskInProgress} and hence might be declared 
   * {@link TaskStatus.State.SUCCEEDED} or {@link TaskStatus.State.KILLED}
   * 
   * @param taskId id of the completed task-attempt
   * @param finalTaskState final {@link TaskStatus.State} of the task-attempt
   */
  private void completedTask(TaskAttemptID taskId, TaskStatus.State finalTaskState) {
    TaskStatus status = taskStatuses.get(taskId);
    status.setRunState(finalTaskState);
    activeTasks.remove(taskId);
  }
  
  /**
   * Indicate that one of the taskids in this already-completed
   * TaskInProgress has successfully completed; hence we mark this
   * taskid as {@link TaskStatus.State.KILLED}. 
   */
  void alreadyCompletedTask(TaskAttemptID taskid) {
    // 'KILL' the task 
    completedTask(taskid, TaskStatus.State.KILLED);
    
    // Note the reason for the task being 'KILLED'
    addDiagnosticInfo(taskid, "Already completed TIP");
    
    LOG.info("Already complete TIP " + getTIPId() + 
             " has completed task " + taskid);
  }

  /**
   * Indicate that one of the taskids in this TaskInProgress
   * has successfully completed!
   */
  public void completed(TaskAttemptID taskid) {
    //
    // Record that this taskid is complete
    //
    completedTask(taskid, TaskStatus.State.SUCCEEDED);
        
    // Note the successful taskid
    setSuccessfulTaskid(taskid);
    
    //
    // Now that the TIP is complete, the other speculative 
    // subtasks will be closed when the owning tasktracker 
    // reports in and calls shouldClose() on this object.
    //

    this.completes++;
    this.execFinishTime = System.currentTimeMillis();
    recomputeProgress();
    
  }

  /**
   * Get the split locations 
   */
  public String[] getSplitLocations() {
    if (isMapTask() && !jobSetup && !jobCleanup) {
      return rawSplit.getLocations();
    }
    return new String[0];
  }
  
  /**
   * Get the Status of the tasks managed by this TIP
   */
  public TaskStatus[] getTaskStatuses() {
    return taskStatuses.values().toArray(new TaskStatus[taskStatuses.size()]);
  }

  /**
   * Get the status of the specified task
   * @param taskid
   * @return
   */
  public TaskStatus getTaskStatus(TaskAttemptID taskid) {
    return taskStatuses.get(taskid);
  }
  /**
   * The TIP's been ordered kill()ed.
   */
  public void kill() {
    if (isComplete() || failed) {
      return;
    }
    this.failed = true;
    killed = true;
    this.execFinishTime = System.currentTimeMillis();
    recomputeProgress();
  }

  /**
   * Was the task killed?
   * @return true if the task killed
   */
  public boolean wasKilled() {
    return killed;
  }
  
  /**
   * Kill the given task
   */
  boolean killTask(TaskAttemptID taskId, boolean shouldFail) {
    TaskStatus st = taskStatuses.get(taskId);
    if(st != null && (st.getRunState() == TaskStatus.State.RUNNING
        || st.getRunState() == TaskStatus.State.COMMIT_PENDING ||
        st.inTaskCleanupPhase() ||
        st.getRunState() == TaskStatus.State.UNASSIGNED)
        && tasksToKill.put(taskId, shouldFail) == null ) {
      String logStr = "Request received to " + (shouldFail ? "fail" : "kill") 
                      + " task '" + taskId + "' by user";
      addDiagnosticInfo(taskId, logStr);
      LOG.info(logStr);
      return true;
    }
    return false;
  }

  /**
   * This method is called whenever there's a status change
   * for one of the TIP's sub-tasks.  It recomputes the overall 
   * progress for the TIP.  We examine all sub-tasks and find 
   * the one that's most advanced (and non-failed).
   */
  void recomputeProgress() {
    if (isComplete()) {
      this.progress = 1;
      // update the counters and the state
      TaskStatus completedStatus = taskStatuses.get(getSuccessfulTaskid());
      this.counters = completedStatus.getCounters();
      this.state = completedStatus.getStateString();
    } else if (failed) {
      this.progress = 0;
      // reset the counters and the state
      this.state = "";
      this.counters = new Counters();
    } else {
      double bestProgress = 0;
      String bestState = "";
      Counters bestCounters = new Counters();
      for (Iterator<TaskAttemptID> it = taskStatuses.keySet().iterator(); it.hasNext();) {
        TaskAttemptID taskid = it.next();
        TaskStatus status = taskStatuses.get(taskid);
        if (status.getRunState() == TaskStatus.State.SUCCEEDED) {
          bestProgress = 1;
          bestState = status.getStateString();
          bestCounters = status.getCounters();
          break;
        } else if (status.getRunState() == TaskStatus.State.COMMIT_PENDING) {
          //for COMMIT_PENDING, we take the last state that we recorded
          //when the task was RUNNING
          bestProgress = this.progress;
          bestState = this.state;
          bestCounters = this.counters;
        } else if (status.getRunState() == TaskStatus.State.RUNNING) {
          if (status.getProgress() >= bestProgress) {
            bestProgress = status.getProgress();
            bestState = status.getStateString();
            if (status.getIncludeCounters()) {
              bestCounters = status.getCounters();
            } else {
              bestCounters = this.counters;
            }
          }
        }
      }
      this.progress = bestProgress;
      this.state = bestState;
      this.counters = bestCounters;
    }
  }

  /////////////////////////////////////////////////
  // "Action" methods that actually require the TIP
  // to do something.
  /////////////////////////////////////////////////

  /**
   * Return whether this TIP still needs to run
   */
  boolean isRunnable() {
    return !failed && (completes == 0);
  }
    
  /**
   * Return whether the TIP has a speculative task to run.  We
   * only launch a speculative task if the current TIP is really
   * far behind, and has been behind for a non-trivial amount of 
   * time.
   */
  boolean hasSpeculativeTask(long currentTime, double averageProgress) {
    //
    // REMIND - mjc - these constants should be examined
    // in more depth eventually...
    //
      
    if (!skipping && activeTasks.size() <= MAX_TASK_EXECS &&
        (averageProgress - progress >= SPECULATIVE_GAP) &&
        (currentTime - startTime >= SPECULATIVE_LAG) 
        && completes == 0 && !isOnlyCommitPending()) {
      return true;
    }
    return false;
  }
    
  /**
   * Return a Task that can be sent to a TaskTracker for execution.
   */
  public Task getTaskToRun(String taskTracker) throws IOException {
    if (0 == execStartTime){
      // assume task starts running now
      execStartTime = System.currentTimeMillis();
    }

    // Create the 'taskid'; do not count the 'killed' tasks against the job!
    TaskAttemptID taskid = null;
    if (nextTaskId < (MAX_TASK_EXECS + maxTaskAttempts + numKilledTasks)) {
      // Make sure that the attempts are unqiue across restarts
      int attemptId = job.getNumRestarts() * NUM_ATTEMPTS_PER_RESTART + nextTaskId;
      taskid = new TaskAttemptID( id, attemptId);
      ++nextTaskId;
    } else {
      LOG.warn("Exceeded limit of " + (MAX_TASK_EXECS + maxTaskAttempts) +
              " (plus " + numKilledTasks + " killed)"  + 
              " attempts for the tip '" + getTIPId() + "'");
      return null;
    }

    return addRunningTask(taskid, taskTracker);
  }
  
  public Task addRunningTask(TaskAttemptID taskid, String taskTracker) {
    return addRunningTask(taskid, taskTracker, false);
  }
  
  /**
   * Adds a previously running task to this tip. This is used in case of 
   * jobtracker restarts.
   */
  public Task addRunningTask(TaskAttemptID taskid, 
                             String taskTracker,
                             boolean taskCleanup) {
    // create the task
    Task t = null;
    if (isMapTask()) {
      LOG.debug("attempt " + numTaskFailures + " sending skippedRecords "
          + failedRanges.getIndicesCount());
      String splitClass = null;
      BytesWritable split;
      if (!jobSetup && !jobCleanup) {
        splitClass = rawSplit.getClassName();
        split = rawSplit.getBytes();
      } else {
        split = new BytesWritable();
      }
      t = new MapTask(jobFile, taskid, partition, splitClass, split);
    } else {
      t = new ReduceTask(jobFile, taskid, partition, numMaps);
    }
    if (jobCleanup) {
      t.setJobCleanupTask();
    }
    if (jobSetup) {
      t.setJobSetupTask();
    }
    if (taskCleanup) {
      t.setTaskCleanupTask();
      t.setState(taskStatuses.get(taskid).getRunState());
      cleanupTasks.put(taskid, taskTracker);
    }
    t.setConf(conf);
    LOG.debug("Launching task with skipRanges:"+failedRanges.getSkipRanges());
    t.setSkipRanges(failedRanges.getSkipRanges());
    t.setSkipping(skipping);
    if(failedRanges.isTestAttempt()) {
      t.setWriteSkipRecs(false);
    }

    activeTasks.put(taskid, taskTracker);
    tasks.add(taskid);

    // Ask JobTracker to note that the task exists
    jobtracker.createTaskEntry(taskid, taskTracker, this);

    // check and set the first attempt
    if (firstTaskId == null) {
      firstTaskId = taskid;
    }
    return t;
  }

  boolean isRunningTask(TaskAttemptID taskid) {
    TaskStatus status = taskStatuses.get(taskid);
    return status != null && status.getRunState() == TaskStatus.State.RUNNING;
  }
  
  boolean isCleanupAttempt(TaskAttemptID taskid) {
    return cleanupTasks.containsKey(taskid);
  }
  
  String machineWhereCleanupRan(TaskAttemptID taskid) {
    return cleanupTasks.get(taskid);
  }
  
  String machineWhereTaskRan(TaskAttemptID taskid) {
    return taskStatuses.get(taskid).getTaskTracker();
  }
    
  boolean wasKilled(TaskAttemptID taskid) {
    return tasksToKill.containsKey(taskid);
  }
  
  /**
   * Has this task already failed on this machine?
   * @param trackerHost The task tracker hostname
   * @return Has it failed?
   */
  public boolean hasFailedOnMachine(String trackerHost) {
    return machinesWhereFailed.contains(trackerHost);
  }
    
  /**
   * Was this task ever scheduled to run on this machine?
   * @param trackerHost The task tracker hostname 
   * @param trackerName The tracker name
   * @return Was task scheduled on the tracker?
   */
  public boolean hasRunOnMachine(String trackerHost, String trackerName) {
    return this.activeTasks.values().contains(trackerName) || 
      hasFailedOnMachine(trackerHost);
  }
  /**
   * Get the number of machines where this task has failed.
   * @return the size of the failed machine set
   */
  public int getNumberOfFailedMachines() {
    return machinesWhereFailed.size();
  }
    
  /**
   * Get the id of this map or reduce task.
   * @return The index of this tip in the maps/reduces lists.
   */
  public int getIdWithinJob() {
    return partition;
  }
    
  /**
   * Set the event number that was raised for this tip
   */
  public void setSuccessEventNumber(int eventNumber) {
    successEventNumber = eventNumber;
  }
       
  /**
   * Get the event number that was raised for this tip
   */
  public int getSuccessEventNumber() {
    return successEventNumber;
  }
  
  /** 
   * Gets the Node list of input split locations sorted in rack order.
   */ 
  public String getSplitNodes() {
    if (!isMapTask() || jobSetup || jobCleanup) {
      return "";
    }
    String[] splits = rawSplit.getLocations();
    Node[] nodes = new Node[splits.length];
    for (int i = 0; i < splits.length; i++) {
      nodes[i] = jobtracker.getNode(splits[i]);
    }
    // sort nodes on rack location
    Arrays.sort(nodes, new Comparator<Node>() {
      public int compare(Node a, Node b) {
        String left = a.getNetworkLocation();
        String right = b.getNetworkLocation();
        return left.compareTo(right);
      }
    }); 
    return nodeToString(nodes);
  }

  private static String nodeToString(Node[] nodes) {
    if (nodes == null || nodes.length == 0) {
      return "";
    }
    StringBuffer ret = new StringBuffer(nodes[0].toString());
    for(int i = 1; i < nodes.length;i++) {
      ret.append(",");
      ret.append(nodes[i].toString());
    }
    return ret.toString();
  }

  public long getMapInputSize() {
    if(isMapTask() && !jobSetup && !jobCleanup) {
      return rawSplit.getDataLength();
    } else {
      return 0;
    }
  }
  
  public void clearSplit() {
    rawSplit.clearBytes();
  }
  
  /**
   * This class keeps the records to be skipped during further executions 
   * based on failed records from all the previous attempts.
   * It also narrow down the skip records if it is more than the 
   * acceptable value by dividing the failed range into half. In this case one 
   * half is executed in the next attempt (test attempt). 
   * In the test attempt, only the test range gets executed, others get skipped. 
   * Based on the success/failure of the test attempt, the range is divided 
   * further.
   */
  private class FailedRanges {
    private SortedRanges skipRanges = new SortedRanges();
    private Divide divide;
    
    synchronized SortedRanges getSkipRanges() {
      if(divide!=null) {
        return divide.skipRange;
      }
      return skipRanges;
    }
    
    synchronized boolean isTestAttempt() {
      return divide!=null;
    }
    
    synchronized long getIndicesCount() {
      if(isTestAttempt()) {
        return divide.skipRange.getIndicesCount();
      }
      return skipRanges.getIndicesCount();
    }
    
    synchronized void updateState(TaskStatus status){
      if (isTestAttempt() && 
          (status.getRunState() == TaskStatus.State.SUCCEEDED)) {
        divide.testPassed = true;
        //since it was the test attempt we need to set it to failed
        //as it worked only on the test range
        status.setRunState(TaskStatus.State.FAILED);
        
      }
    }
    
    synchronized void add(Range failedRange) {
      LOG.warn("FailedRange:"+ failedRange);
      if(divide!=null) {
        LOG.warn("FailedRange:"+ failedRange +"  test:"+divide.test +
            "  pass:"+divide.testPassed);
        if(divide.testPassed) {
          //test range passed
          //other range would be bad. test it
          failedRange = divide.other;
        }
        else {
          //test range failed
          //other range would be good.
          failedRange = divide.test;
        }
        //reset
        divide = null;
      }
      
      if(maxSkipRecords==0 || failedRange.getLength()<=maxSkipRecords) {
        skipRanges.add(failedRange);
      } else {
        //start dividing the range to narrow down the skipped
        //records until maxSkipRecords are met OR all attempts
        //get exhausted
        divide = new Divide(failedRange);
      }
    }
    
    class Divide {
      private final SortedRanges skipRange;
      private final Range test;
      private final Range other;
      private boolean testPassed;
      Divide(Range range){
        long half = range.getLength()/2;
        test = new Range(range.getStartIndex(), half);
        other = new Range(test.getEndIndex(), range.getLength()-half);
        //construct the skip range from the skipRanges
        skipRange = new SortedRanges();
        for(Range r : skipRanges.getRanges()) {
          skipRange.add(r);
        }
        skipRange.add(new Range(0,test.getStartIndex()));
        skipRange.add(new Range(test.getEndIndex(), 
            (Long.MAX_VALUE-test.getEndIndex())));
      }
    }
    
  }

  TreeMap<TaskAttemptID, String> getActiveTasks() {
    return activeTasks;
  }
}