/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.mapred;

import java.io.DataInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.Vector;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.CleanupQueue.PathDeletionContext;
import org.apache.hadoop.mapred.JobHistory.Values;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.mapreduce.server.jobtracker.TaskTracker;
import org.apache.hadoop.metrics.MetricsContext;
import org.apache.hadoop.metrics.MetricsRecord;
import org.apache.hadoop.metrics.MetricsUtil;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.net.NetworkTopology;
import org.apache.hadoop.net.Node;
import org.apache.hadoop.util.StringUtils;

/*************************************************************
 * JobInProgress maintains all the info for keeping
 * a Job on the straight and narrow.  It keeps its JobProfile
 * and its latest JobStatus, plus a set of tables for
 * doing bookkeeping of its Tasks.
 * ***********************************************************
 */
public class JobInProgress extends JobInProgressTraits {
  /**
   * Used when the a kill is issued to a job which is initializing.
   */
  static class KillInterruptedException extends InterruptedException {
   private static final long serialVersionUID = 1L;
    public KillInterruptedException(String msg) {
      super(msg);
    }
  }

  static final Log LOG = LogFactory.getLog(JobInProgress.class);
  static final Log countersLog = LogFactory.getLog("Counters");

  JobProfile profile;
  JobStatus status;
  Path jobFile = null;
  Path localJobFile = null;

  int numMapTasks = 0;
  int numReduceTasks = 0;
  long memoryPerMap;
  long memoryPerReduce;
  volatile int numSlotsPerMap = 1;
  volatile int numSlotsPerReduce = 1;
  int maxTaskFailuresPerTracker;
  volatile long totalMapWaitTime = 0L;
  volatile long totalReduceWaitTime = 0L;
  volatile long firstMapStartTime = 0;
  volatile long firstReduceStartTime = 0;

  // Counters to track currently running/finished/failed Map/Reduce task-attempts
  int runningMapTasks = 0;
  int runningReduceTasks = 0;
  int pendingMapTasks = 0;
  int pendingReduceTasks = 0;
  int neededMapTasks = 0;
  int neededReduceTasks = 0;
  int finishedMapTasks = 0;
  int finishedReduceTasks = 0;
  int failedMapTasks = 0;
  int failedReduceTasks = 0;
  int killedMapTasks = 0;
  int killedReduceTasks = 0;

  static final float DEFAULT_COMPLETED_MAPS_PERCENT_FOR_REDUCE_SLOWSTART = 0.05f;
  int completedMapsForReduceSlowstart = 0;
  int rushReduceReduces = 5;
  int rushReduceMaps = 5;

  // Speculate when the percentage of the unfinished maps is lower than this
  public static final String SPECULATIVE_MAP_UNFINISHED_THRESHOLD_KEY =
      "mapred.map.tasks.speculation.unfinished.threshold";
  private float speculativeMapUnfininshedThreshold = 0.001F;

  // Speculate when the percentage of the unfinished reduces is lower than this
  public static final String SPECULATIVE_REDUCE_UNFINISHED_THRESHOLD_KEY =
      "mapred.reduce.tasks.speculation.unfinished.threshold";
  private float speculativeReduceUnfininshedThreshold = 0.001F;

  // runningMapTasks include speculative tasks, so we need to capture
  // speculative tasks separately
  int speculativeMapTasks = 0;
  int speculativeReduceTasks = 0;
  boolean garbageCollected = false;
  private static AtomicInteger totalSpeculativeMapTasks = new AtomicInteger(0);
  private static AtomicInteger totalSpeculativeReduceTasks =
      new AtomicInteger(0);

  int mapFailuresPercent = 0;
  int reduceFailuresPercent = 0;
  int failedMapTIPs = 0;
  int failedReduceTIPs = 0;
  private volatile boolean launchedCleanup = false;
  private volatile boolean launchedSetup = false;
  private volatile boolean jobKilled = false;
  private volatile boolean jobFailed = false;
  boolean jobSetupCleanupNeeded;
  boolean jobFinishWhenReducesDone;
  boolean taskCleanupNeeded;

  JobPriority priority = JobPriority.NORMAL;
  JobTracker jobtracker;

  // NetworkTopology Node to the set of TIPs
  Map<Node, List<TaskInProgress>> nonRunningMapCache;

  // Map of NetworkTopology Node to set of running TIPs
  Map<Node, Set<TaskInProgress>> runningMapCache;

  // A list of non-local non-running maps
  List<TaskInProgress> nonLocalMaps;

  // A set of non-local running maps
  Set<TaskInProgress> nonLocalRunningMaps;

  // A list of non-running reduce TIPs
  List<TaskInProgress> nonRunningReduces;

  // A set of running reduce TIPs
  Set<TaskInProgress> runningReduces;

  // A list of cleanup tasks for the map task attempts, to be launched
  List<TaskAttemptID> mapCleanupTasks = new LinkedList<TaskAttemptID>();