/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.tez.analyzer;

import static org.junit.Assert.assertTrue;

import java.io.File;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Random;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.server.namenode.EditLogFileOutputStream;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.tez.analyzer.plugins.CriticalPathAnalyzer;
import org.apache.tez.analyzer.plugins.CriticalPathAnalyzer.CriticalPathDependency;
import org.apache.tez.analyzer.plugins.CriticalPathAnalyzer.CriticalPathStep;
import org.apache.tez.analyzer.plugins.CriticalPathAnalyzer.CriticalPathStep.EntityType;
import org.apache.tez.client.TezClient;
import org.apache.tez.dag.api.DAG;
import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.dag.api.TezConstants;
import org.apache.tez.dag.api.client.DAGClient;
import org.apache.tez.dag.api.client.DAGStatus;
import org.apache.tez.dag.history.logging.ats.ATSHistoryLoggingService;
import org.apache.tez.dag.history.logging.impl.SimpleHistoryLoggingService;
import org.apache.tez.dag.records.TaskAttemptTerminationCause;
import org.apache.tez.dag.records.TezDAGID;
import org.apache.tez.history.ATSImportTool;
import org.apache.tez.history.parser.ATSFileParser;
import org.apache.tez.history.parser.SimpleHistoryParser;
import org.apache.tez.history.parser.datamodel.DagInfo;
import org.apache.tez.test.SimpleTestDAG;
import org.apache.tez.test.SimpleTestDAG3Vertices;
import org.apache.tez.test.TestInput;
import org.apache.tez.test.TestProcessor;
import org.apache.tez.test.dag.SimpleReverseVTestDAG;
import org.apache.tez.test.dag.SimpleVTestDAG;
import org.apache.tez.tests.MiniTezClusterWithTimeline;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Joiner;
import com.google.common.collect.Lists;

public class TestAnalyzer {
  private static final Logger LOG = LoggerFactory.getLogger(TestAnalyzer.class);

  private static String TEST_ROOT_DIR =
      "target" + Path.SEPARATOR + TestAnalyzer.class.getName() + "-tmpDir";
  private static String DOWNLOAD_DIR = TEST_ROOT_DIR + Path.SEPARATOR + "download";
  private final static String SIMPLE_HISTORY_DIR = "/tmp/simplehistory/";
  private final static String HISTORY_TXT = "history.txt";

  private static MiniDFSCluster dfsCluster;
  private static MiniTezClusterWithTimeline miniTezCluster;

  private static Configuration conf = new Configuration();
  private static FileSystem fs;
  
  private static TezClient tezSession = null;
  
  private boolean usingATS = true;
  private boolean downloadedSimpleHistoryFile = false;
  private static String yarnTimelineAddress;

  @BeforeClass
  public static void setupClass() throws Exception {
    conf = new Configuration();
    conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_EDITS_NOEDITLOGCHANNELFLUSH, false);
    EditLogFileOutputStream.setShouldSkipFsyncForTesting(true);
    conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, TEST_ROOT_DIR);
    dfsCluster =
        new MiniDFSCluster.Builder(conf).numDataNodes(1).format(true).build();
    fs = dfsCluster.getFileSystem();
    conf.set("fs.defaultFS", fs.getUri().toString());

    setupTezCluster();
  }
  
  @AfterClass
  public static void tearDownClass() throws Exception {
    LOG.info("Stopping mini clusters");
    if (miniTezCluster != null) {
      miniTezCluster.stop();
      miniTezCluster = null;
    }
    if (dfsCluster != null) {
      dfsCluster.shutdown();
      dfsCluster = null;
    }
  }
  
  private CriticalPathAnalyzer setupCPAnalyzer() {
    Configuration analyzerConf = new Configuration(false);
    analyzerConf.setBoolean(CriticalPathAnalyzer.DRAW_SVG, false);
    CriticalPathAnalyzer cp = new CriticalPathAnalyzer();
    cp.setConf(analyzerConf);
    return cp;
  }

  private static void setupTezCluster() throws Exception {
    // make the test run faster by speeding heartbeat frequency
    conf.setInt(YarnConfiguration.RM_NM_HEARTBEAT_INTERVAL_MS, 100);
    conf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, true);
    conf.setBoolean(TezConfiguration.TEZ_AM_ALLOW_DISABLED_TIMELINE_DOMAINS, true);
    conf.set(TezConfiguration.TEZ_HISTORY_LOGGING_SERVICE_CLASS, ATSHistoryLoggingService
        .class.getName());

    miniTezCluster =
        new MiniTezClusterWithTimeline(TestAnalyzer.class.getName(), 1, 1, 1, true);

    miniTezCluster.init(conf);
    miniTezCluster.start();
    yarnTimelineAddress = miniTezCluster.getConfig().get(YarnConfiguration.TIMELINE_SERVICE_WEBAPP_ADDRESS);
  }

  private TezConfiguration createCommonTezLog() throws Exception {
    TezConfiguration tezConf = new TezConfiguration(miniTezCluster.getConfig());
    
    tezConf.setInt(TezConfiguration.TEZ_AM_RM_HEARTBEAT_INTERVAL_MS_MAX, 100);
    Path remoteStagingDir = dfsCluster.getFileSystem().makeQualified(new Path(TEST_ROOT_DIR, String
        .valueOf(new Random().nextInt(100000))));
    
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR,
        remoteStagingDir.toString());
    tezConf.setBoolean(TezConfiguration.TEZ_AM_NODE_BLACKLISTING_ENABLED, false);
    
    return tezConf;
  }

  private void createTezSessionATS() throws Exception {
    TezConfiguration tezConf = createCommonTezLog();
    tezConf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, true);
    tezConf.set(YarnConfiguration.TIMELINE_SERVICE_WEBAPP_ADDRESS,
        miniTezCluster.getConfig().get(YarnConfiguration.TIMELINE_SERVICE_WEBAPP_ADDRESS));
    tezConf.setBoolean(TezConfiguration.TEZ_AM_ALLOW_DISABLED_TIMELINE_DOMAINS, true);
    tezConf.set(TezConfiguration.TEZ_HISTORY_LOGGING_SERVICE_CLASS,
        ATSHistoryLoggingService.class.getName());
    
    Path remoteStagingDir = dfsCluster.getFileSystem().makeQualified(new Path(TEST_ROOT_DIR, String
        .valueOf(new Random().nextInt(100000))));
    
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR,
        remoteStagingDir.toString());
    tezConf.setBoolean(TezConfiguration.TEZ_AM_NODE_BLACKLISTING_ENABLED, false);

    tezSession = TezClient.create("TestAnalyzer", tezConf, true);
    tezSession.start();
  }
  
  private void createTezSessionSimpleHistory() throws Exception {
    TezConfiguration tezConf = createCommonTezLog();
    tezConf.set(TezConfiguration.TEZ_HISTORY_LOGGING_SERVICE_CLASS,
        SimpleHistoryLoggingService.class.getName());

    tezConf.set(TezConfiguration.TEZ_SIMPLE_HISTORY_LOGGING_DIR, SIMPLE_HISTORY_DIR);

    Path remoteStagingDir = dfsCluster.getFileSystem().makeQualified(new Path(TEST_ROOT_DIR, String
        .valueOf(new Random().nextInt(100000))));
    
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR,
        remoteStagingDir.toString());
    tezConf.setBoolean(TezConfiguration.TEZ_AM_NODE_BLACKLISTING_ENABLED, false);

    tezSession = TezClient.create("TestFaultTolerance", tezConf, true);
    tezSession.start();
  }
  
  private StepCheck createStep(String attempt, CriticalPathDependency reason) {
    return createStep(attempt, reason, null, null);
  }
  
  private StepCheck createStep(String attempt, CriticalPathDependency reason, 
      TaskAttemptTerminationCause errCause, List<String> notes) {
    return new StepCheck(attempt, reason, errCause, notes);
  }
  
  private class StepCheck {
    String attempt; // attempt is the TaskAttemptInfo short name with regex
    CriticalPathDependency reason;
    TaskAttemptTerminationCause errCause;
    List<String> notesStr;

    StepCheck(String attempt, CriticalPathDependency reason, 
        TaskAttemptTerminationCause cause, List<String> notes) {
      this.attempt = attempt;
      this.reason = reason;
      this.errCause = cause;
      this.notesStr = notes;
    }
    String getAttemptDetail() {
      return attempt;
    }
    CriticalPathDependency getReason() {
      return reason;
    }
    TaskAttemptTerminationCause getErrCause() {
      return errCause;
    }
    List<String> getNotesStr() {
      return notesStr;
    }
  }
  
  private void runDAG(DAG dag, DAGStatus.State finalState) throws Exception {
    tezSession.waitTillReady();
    LOG.info("ABC Running DAG name: " + dag.getName());
    DAGClient dagClient = tezSession.submitDAG(dag);
    DAGStatus dagStatus = dagClient.getDAGStatus(null);
    while (!dagStatus.isCompleted()) {
      LOG.info("Waiting for dag to complete. Sleeping for 500ms."
          + " DAG name: " + dag.getName()
          + " DAG appContext: " + dagClient.getExecutionContext()
          + " Current state: " + dagStatus.getState());
      Thread.sleep(100);
      dagStatus = dagClient.getDAGStatus(null);
    }

    Assert.assertEquals(finalState, dagStatus.getState());
  }
  
  private void verify(ApplicationId appId, int dagNum, List<StepCheck[]> steps) throws Exception {
    String dagId = TezDAGID.getInstance(appId, dagNum).toString();
    DagInfo dagInfo = getDagInfo(dagId);
    
    verifyCriticalPath(dagInfo, steps);
  }
  
  private DagInfo getDagInfo(String dagId) throws Exception {
    // sleep for a bit to let ATS events be sent from AM
    DagInfo dagInfo = null;
    if (usingATS) {
      //Export the data from ATS
      String[] args = { "--dagId=" + dagId, "--downloadDir=" + DOWNLOAD_DIR, "--yarnTimelineAddress=" + yarnTimelineAddress };
  
      int result = ATSImportTool.process(args);
      assertTrue(result == 0);
  
      //Parse ATS data and verify results
      //Parse downloaded contents
      File downloadedFile = new File(DOWNLOAD_DIR
          + Path.SEPARATOR + dagId + ".zip");
      ATSFileParser parser = new ATSFileParser(Arrays.asList(downloadedFile));
      dagInfo = parser.getDAGData(dagId);
      assertTrue(dagInfo.getDagId().equals(dagId));
    } else {
      if (!downloadedSimpleHistoryFile) {
        downloadedSimpleHistoryFile = true;
        TezDAGID tezDAGID = TezDAGID.fromString(dagId);
        ApplicationAttemptId applicationAttemptId = ApplicationAttemptId.newInstance(tezDAGID
            .getApplicationId(), 1);
        Path historyPath = new Path(miniTezCluster.getConfig().get("fs.defaultFS")
            + SIMPLE_HISTORY_DIR + HISTORY_TXT + "."
            + applicationAttemptId);
        FileSystem fs = historyPath.getFileSystem(miniTezCluster.getConfig());
  
        Path localPath = new Path(DOWNLOAD_DIR, HISTORY_TXT);
        fs.copyToLocalFile(historyPath, localPath);
      }
      //Now parse via SimpleHistory
      File localFile = new File(DOWNLOAD_DIR, HISTORY_TXT);
      SimpleHistoryParser parser = new SimpleHistoryParser(Arrays.asList(localFile));
      dagInfo = parser.getDAGData(dagId);
      assertTrue(dagInfo.getDagId().equals(dagId));
    }
    return dagInfo;
  }
  
  private void verifyCriticalPath(DagInfo dagInfo, List<StepCheck[]> stepsOptions) throws Exception {
    CriticalPathAnalyzer cp = setupCPAnalyzer();
    cp.analyze(dagInfo);
    
    List<CriticalPathStep> criticalPath = cp.getCriticalPath();

    for (CriticalPathStep step : criticalPath) {
      LOG.info("ABC Step: " + step.getType());
      if (step.getType() == EntityType.ATTEMPT) {
        LOG.info("ABC Attempt: " + step.getAttempt().getShortName() 
            + " " + step.getAttempt().getDetailedStatus());
      }
      LOG.info("ABC Reason: " + step.getReason());
      String notes = Joiner.on(";").join(step.getNotes());
      LOG.info("ABC Notes: " + notes);
    }

    boolean foundMatchingLength = false;
    for (StepCheck[] steps : stepsOptions) {
      if (steps.length + 2 == criticalPath.size()) {
        foundMatchingLength = true;
        Assert.assertEquals(CriticalPathStep.EntityType.VERTEX_INIT, criticalPath.get(0).getType());
        Assert.assertEquals(criticalPath.get(1).getAttempt().getShortName(),
            criticalPath.get(0).getAttempt().getShortName());

        for (int i=1; i<criticalPath.size() - 1; ++i) {
          StepCheck check = steps[i-1];
          CriticalPathStep step = criticalPath.get(i);
          Assert.assertEquals(CriticalPathStep.EntityType.ATTEMPT, step.getType());
          Assert.assertTrue(check.getAttemptDetail(), 
              step.getAttempt().getShortName().matches(check.getAttemptDetail()));
          Assert.assertEquals(steps[i-1].getReason(), step.getReason());
          if (check.getErrCause() != null) {
            Assert.assertEquals(check.getErrCause(),
                TaskAttemptTerminationCause.valueOf(step.getAttempt().getTerminationCause()));
          }
          if (check.getNotesStr() != null) {
            String notes = Joiner.on("#").join(step.getNotes());
            for (String note : check.getNotesStr()) {
              Assert.assertTrue(note, notes.contains(notes));
            }
          }
        }
    
        Assert.assertEquals(CriticalPathStep.EntityType.DAG_COMMIT,
            criticalPath.get(criticalPath.size() - 1).getType());
        break;
      }
    }
    
    Assert.assertTrue(foundMatchingLength);
    
  }
  
  @Test (timeout=300000)
  public void testWithATS() throws Exception {
    usingATS = true;
    createTezSessionATS();
    runTests();
  }
  
  @Test (timeout=300000)
  public void testWithSimpleHistory() throws Exception {
    usingATS = false;
    createTezSessionSimpleHistory();
    runTests();
  }
  
  private void runTests() throws Exception {
    ApplicationId appId = tezSession.getAppMasterApplicationId();
    List<List<StepCheck[]>> stepsOptions = Lists.newArrayList();
    // run all test dags
    stepsOptions.add(testAttemptOfDownstreamVertexConnectedWithTwoUpstreamVerticesFailure());
    stepsOptions.add(testInputFailureCausesRerunOfTwoVerticesWithoutExit());
    stepsOptions.add(testMultiVersionInputFailureWithoutExit());
    stepsOptions.add(testCascadingInputFailureWithoutExitSuccess());
    stepsOptions.add(testTaskMultipleFailures());
    stepsOptions.add(testBasicInputFailureWithoutExit());
    stepsOptions.add(testBasicTaskFailure());
    stepsOptions.add(testBasicSuccessScatterGather());
    stepsOptions.add(testMultiVersionInputFailureWithExit());
    stepsOptions.add(testBasicInputFailureWithExit());
    stepsOptions.add(testInputFailureRerunCanSendOutputToTwoDownstreamVertices());
    stepsOptions.add(testCascadingInputFailureWithExitSuccess());
    stepsOptions.add(testInternalPreemption());
    
    // close session to flush
    if (tezSession != null) {
      tezSession.stop();
    }
    Thread.sleep((TezConstants.TEZ_DAG_SLEEP_TIME_BEFORE_EXIT*3)/2);
    
    // verify all dags
    for (int i=0; i<stepsOptions.size(); ++i) {
      verify(appId, i+1, stepsOptions.get(i));
    }    
  }
  
  private List<StepCheck[]> testBasicSuccessScatterGather() throws Exception {
    Configuration testConf = new Configuration(false);
    testConf.setInt(SimpleTestDAG.TEZ_SIMPLE_DAG_NUM_TASKS, 1);
    StepCheck[] check = { 
        createStep("v1 : 000000_0", CriticalPathDependency.INIT_DEPENDENCY),
        createStep("v2 : 000000_0", CriticalPathDependency.DATA_DEPENDENCY)
        };
    DAG dag = SimpleTestDAG.createDAG("testBasicSuccessScatterGather", testConf);
    runDAG(dag, DAGStatus.State.SUCCEEDED);
    return Collections.singletonList(check);
  }
  
  private List<StepCheck[]> testBasicTaskFailure() throws Exception {
    Configuration testConf = new Configuration(false);
    testConf.setInt(SimpleTestDAG.TEZ_SIMPLE_DAG_NUM_TASKS, 1);
    testConf.setBoolean(TestProcessor.getVertexConfName(
        TestProcessor.TEZ_FAILING_PROCESSOR_DO_FAIL, "v1"), true);
    testConf.set(TestProcessor.getVertexConfName(
        TestProcessor.TEZ_FAILING_PROCESSOR_FAILING_TASK_INDEX, "v1"), "0");
    testConf.setInt(TestProcessor.getVertexConfName(
        TestProcessor.TEZ_FAILING_PROCESSOR_FAILING_UPTO_TASK_ATTEMPT, "v1"), 0);

    StepCheck[] check = {
        createStep("v1 : 000000_0", CriticalPathDependency.INIT_DEPENDENCY),
        createStep("v1 : 000000_1", CriticalPathDependency.RETRY_DEPENDENCY),
        createStep("v2 : 000000_0", CriticalPathDependency.DATA_DEPENDENCY),
    };
    DAG dag = SimpleTestDAG.createDAG("testBasicTaskFailure", testConf);
    runDAG(dag, DAGStatus.State.SUCCEEDED);
    return Collections.singletonList(check);
  }
  
  private List<StepCheck[]> testTaskMultipleFailures() throws Exception {
    Configuration testConf = new Configuration(false);
    testConf.setInt(SimpleTestDAG.TEZ_SIMPLE_DAG_NUM_TASKS, 1);
    testConf.setBoolean(TestProcessor.getVertexConfName(
        TestProcessor.TEZ_FAILING_PROCESSOR_DO_FAIL, "v1"), true);
    testConf.set(TestProcessor.getVertexConfName(
        TestProcessor.TEZ_FAILING_PROCESSOR_FAILING_TASK_INDEX, "v1"), "0");
    testConf.setInt(TestProcessor.getVertexConfName(
        TestProcessor.TEZ_FAILING_PROCESSOR_FAILING_UPTO_TASK_ATTEMPT, "v1"), 1);

    StepCheck[] check = {
        createStep("v1 : 000000_0", CriticalPathDependency.INIT_DEPENDENCY),
        createStep("v1 : 000000_1", CriticalPathDependency.RETRY_DEPENDENCY),
        createStep("v1 : 000000_2", CriticalPathDependency.RETRY_DEPENDENCY),
        createStep("v2 : 000000_0", CriticalPathDependency.DATA_DEPENDENCY),
    };    
    
    DAG dag = SimpleTestDAG.createDAG("testTaskMultipleFailures", testConf);
    runDAG(dag, DAGStatus.State.SUCCEEDED);
    return Collections.singletonList(check);
  }
  
  private List<StepCheck[]> testBasicInputFailureWithExit() throws Exception {
    Configuration testConf = new Configuration(false);
    testConf.setInt(SimpleTestDAG.TEZ_SIMPLE_DAG_NUM_TASKS, 1);
    testConf.setBoolean(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_DO_FAIL, "v2"), true);
    testConf.setBoolean(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_DO_FAIL_AND_EXIT, "v2"), true);
    testConf.set(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_FAILING_TASK_INDEX, "v2"), "0");
    testConf.set(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_FAILING_TASK_ATTEMPT, "v2"), "0");
    testConf.set(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_FAILING_INPUT_INDEX, "v2"), "0");

    StepCheck[] check = {
        createStep("v1 : 000000_0", CriticalPathDependency.INIT_DEPENDENCY),
        createStep("v2 : 000000_0", CriticalPathDependency.DATA_DEPENDENCY),
        createStep("v1 : 000000_1", CriticalPathDependency.OUTPUT_RECREATE_DEPENDENCY),
        createStep("v2 : 000000_1", CriticalPathDependency.DATA_DEPENDENCY),
      };
    
    DAG dag = SimpleTestDAG.createDAG("testBasicInputFailureWithExit", testConf);
    runDAG(dag, DAGStatus.State.SUCCEEDED);
    return Collections.singletonList(check);
  }
  
  private List<StepCheck[]> testBasicInputFailureWithoutExit() throws Exception {
    Configuration testConf = new Configuration(false);
    testConf.setInt(SimpleTestDAG.TEZ_SIMPLE_DAG_NUM_TASKS, 1);
    testConf.setBoolean(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_DO_FAIL, "v2"), true);
    testConf.set(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_FAILING_TASK_INDEX, "v2"), "0");
    testConf.set(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_FAILING_TASK_ATTEMPT, "v2"), "0");
    testConf.set(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_FAILING_INPUT_INDEX, "v2"), "0");

    StepCheck[] check = {
        createStep("v1 : 000000_0", CriticalPathDependency.INIT_DEPENDENCY),
        createStep("v2 : 000000_0", CriticalPathDependency.DATA_DEPENDENCY),
        createStep("v1 : 000000_1", CriticalPathDependency.OUTPUT_RECREATE_DEPENDENCY),
        createStep("v2 : 000000_0", CriticalPathDependency.DATA_DEPENDENCY),
      };

    DAG dag = SimpleTestDAG.createDAG("testBasicInputFailureWithoutExit", testConf);
    runDAG(dag, DAGStatus.State.SUCCEEDED);
    return Collections.singletonList(check);
  }

  private List<StepCheck[]> testMultiVersionInputFailureWithExit() throws Exception {
    Configuration testConf = new Configuration(false);
    testConf.setInt(SimpleTestDAG.TEZ_SIMPLE_DAG_NUM_TASKS, 1);
    testConf.setBoolean(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_DO_FAIL, "v2"), true);
    testConf.setBoolean(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_DO_FAIL_AND_EXIT, "v2"), true);
    testConf.set(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_FAILING_TASK_INDEX, "v2"), "0");
    testConf.set(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_FAILING_TASK_ATTEMPT, "v2"), "0,1");
    testConf.set(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_FAILING_INPUT_INDEX, "v2"), "0");
    testConf.setInt(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_FAILING_UPTO_INPUT_ATTEMPT, "v2"), 1);

    StepCheck[] check = {
        createStep("v1 : 000000_0", CriticalPathDependency.INIT_DEPENDENCY),
        createStep("v2 : 000000_0", CriticalPathDependency.DATA_DEPENDENCY),
        createStep("v1 : 000000_1", CriticalPathDependency.OUTPUT_RECREATE_DEPENDENCY),
        createStep("v2 : 000000_1", CriticalPathDependency.DATA_DEPENDENCY),
        createStep("v1 : 000000_2", CriticalPathDependency.OUTPUT_RECREATE_DEPENDENCY),
        createStep("v2 : 000000_2", CriticalPathDependency.DATA_DEPENDENCY),
      };

    DAG dag = SimpleTestDAG.createDAG("testMultiVersionInputFailureWithExit", testConf);
    runDAG(dag, DAGStatus.State.SUCCEEDED);
    return Collections.singletonList(check);
  }

  private List<StepCheck[]> testMultiVersionInputFailureWithoutExit() throws Exception {
    Configuration testConf = new Configuration(false);
    testConf.setInt(SimpleTestDAG.TEZ_SIMPLE_DAG_NUM_TASKS, 1);
    testConf.setBoolean(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_DO_FAIL, "v2"), true);
    testConf.set(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_FAILING_TASK_INDEX, "v2"), "0");
    testConf.set(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_FAILING_TASK_ATTEMPT, "v2"), "0");
    testConf.set(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_FAILING_INPUT_INDEX, "v2"), "0");
    testConf.setInt(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_FAILING_UPTO_INPUT_ATTEMPT, "v2"), 1);

    StepCheck[] check = {
        createStep("v1 : 000000_0", CriticalPathDependency.INIT_DEPENDENCY),
        createStep("v2 : 000000_0", CriticalPathDependency.DATA_DEPENDENCY),
        createStep("v1 : 000000_1", CriticalPathDependency.OUTPUT_RECREATE_DEPENDENCY),
        createStep("v2 : 000000_0", CriticalPathDependency.DATA_DEPENDENCY),
        createStep("v1 : 000000_2", CriticalPathDependency.OUTPUT_RECREATE_DEPENDENCY),
        createStep("v2 : 000000_0", CriticalPathDependency.DATA_DEPENDENCY),
      };

    DAG dag = SimpleTestDAG.createDAG("testMultiVersionInputFailureWithoutExit", testConf);
    runDAG(dag, DAGStatus.State.SUCCEEDED);
    return Collections.singletonList(check);
  }
  
  /**
   * Sets configuration for cascading input failure tests that
   * use SimpleTestDAG3Vertices.
   * @param testConf configuration
   * @param failAndExit whether input failure should trigger attempt exit 
   */
  private void setCascadingInputFailureConfig(Configuration testConf, 
                                              boolean failAndExit,
                                              int numTasks) {
    // v2 attempt0 succeeds.
    // v2 all tasks attempt1 input0 fail up to version 0.
    testConf.setInt(SimpleTestDAG3Vertices.TEZ_SIMPLE_DAG_NUM_TASKS, numTasks);
    testConf.setBoolean(TestInput.getVertexConfName(
            TestInput.TEZ_FAILING_INPUT_DO_FAIL, "v2"), true);
    testConf.setBoolean(TestInput.getVertexConfName(
            TestInput.TEZ_FAILING_INPUT_DO_FAIL_AND_EXIT, "v2"), failAndExit);
    testConf.set(TestInput.getVertexConfName(
            TestInput.TEZ_FAILING_INPUT_FAILING_TASK_INDEX, "v2"), "-1");
    testConf.set(TestInput.getVertexConfName(
            TestInput.TEZ_FAILING_INPUT_FAILING_TASK_ATTEMPT, "v2"), "1");
    testConf.set(TestInput.getVertexConfName(
            TestInput.TEZ_FAILING_INPUT_FAILING_INPUT_INDEX, "v2"), "0");
    testConf.setInt(TestInput.getVertexConfName(
            TestInput.TEZ_FAILING_INPUT_FAILING_UPTO_INPUT_ATTEMPT, "v2"),
            0);

    //v3 task0 attempt0 all inputs fails up to version 0.
    testConf.setBoolean(TestInput.getVertexConfName(
            TestInput.TEZ_FAILING_INPUT_DO_FAIL, "v3"), true);
    testConf.setBoolean(TestInput.getVertexConfName(
            TestInput.TEZ_FAILING_INPUT_DO_FAIL_AND_EXIT, "v3"), failAndExit);
    testConf.set(TestInput.getVertexConfName(
            TestInput.TEZ_FAILING_INPUT_FAILING_TASK_INDEX, "v3"), "0");
    testConf.set(TestInput.getVertexConfName(
            TestInput.TEZ_FAILING_INPUT_FAILING_TASK_ATTEMPT, "v3"), "0");
    testConf.set(TestInput.getVertexConfName(
            TestInput.TEZ_FAILING_INPUT_FAILING_INPUT_INDEX, "v3"), "-1");
    testConf.setInt(TestInput.getVertexConfName(
            TestInput.TEZ_FAILING_INPUT_FAILING_UPTO_INPUT_ATTEMPT, "v3"),
            0);
  }
  
  /**
   * Test cascading input failure without exit. Expecting success.
   * v1 -- v2 -- v3
   * v3 all-tasks attempt0 input0 fails. Wait. Triggering v2 rerun.
   * v2 task0 attempt1 input0 fails. Wait. Triggering v1 rerun.
   * v1 attempt1 rerun and succeeds. v2 accepts v1 attempt1 output. v2 attempt1 succeeds.
   * v3 attempt0 accepts v2 attempt1 output.
   * 
   * AM vertex succeeded order is v1, v2, v1, v2, v3.
   * @throws Exception
   */
  private List<StepCheck[]> testCascadingInputFailureWithoutExitSuccess() throws Exception {
    Configuration testConf = new Configuration(false);
    setCascadingInputFailureConfig(testConf, false, 1);

    StepCheck[] check = {
        createStep("v1 : 000000_0", CriticalPathDependency.INIT_DEPENDENCY),
        createStep("v2 : 000000_0", CriticalPathDependency.DATA_DEPENDENCY),
        createStep("v3 : 000000_0", CriticalPathDependency.DATA_DEPENDENCY),
        createStep("v2 : 000000_1", CriticalPathDependency.OUTPUT_RECREATE_DEPENDENCY),
        createStep("v1 : 000000_1", CriticalPathDependency.OUTPUT_RECREATE_DEPENDENCY),
        createStep("v2 : 000000_1", CriticalPathDependency.DATA_DEPENDENCY),
        createStep("v3 : 000000_0", CriticalPathDependency.DATA_DEPENDENCY),
    };
    
    DAG dag = SimpleTestDAG3Vertices.createDAG(
              "testCascadingInputFailureWithoutExitSuccess", testConf);
    runDAG(dag, DAGStatus.State.SUCCEEDED);
    return Collections.singletonList(check);
  }
  
  /**
   * Test cascading input failure with exit. Expecting success.
   * v1 -- v2 -- v3
   * v3 all-tasks attempt0 input0 fails. v3 attempt0 exits. Triggering v2 rerun.
   * v2 task0 attempt1 input0 fails. v2 attempt1 exits. Triggering v1 rerun.
   * v1 attempt1 rerun and succeeds. v2 accepts v1 attempt1 output. v2 attempt2 succeeds.
   * v3 attempt1 accepts v2 attempt2 output.
   * 
   * AM vertex succeeded order is v1, v2, v3, v1, v2, v3.
   * @throws Exception
   */
  private List<StepCheck[]> testCascadingInputFailureWithExitSuccess() throws Exception {
    Configuration testConf = new Configuration(false);
    setCascadingInputFailureConfig(testConf, true, 1);
    
    StepCheck[] check = {
        createStep("v1 : 000000_0", CriticalPathDependency.INIT_DEPENDENCY),
        createStep("v2 : 000000_0", CriticalPathDependency.DATA_DEPENDENCY),
        createStep("v3 : 000000_0", CriticalPathDependency.DATA_DEPENDENCY),
        createStep("v2 : 000000_1", CriticalPathDependency.OUTPUT_RECREATE_DEPENDENCY),
        createStep("v1 : 000000_1", CriticalPathDependency.OUTPUT_RECREATE_DEPENDENCY),
        createStep("v2 : 000000_2", CriticalPathDependency.DATA_DEPENDENCY),
        createStep("v3 : 000000_1", CriticalPathDependency.DATA_DEPENDENCY),
      };

    DAG dag = SimpleTestDAG3Vertices.createDAG(
              "testCascadingInputFailureWithExitSuccess", testConf);
    runDAG(dag, DAGStatus.State.SUCCEEDED);
    return Collections.singletonList(check);
  }
  
  /**
   * 1 NM is running and can run 4 containers based on YARN mini cluster defaults and 
   * Tez defaults for AM/task memory
   * v3 task0 reports read errors against both tasks of v2. This re-starts both of them.
   * Now all 4 slots are occupied 1 AM + 3 tasks
   * Now retries of v2 report read error against 1 task of v1. That re-starts.
   * Retry of v1 task has no space - so it preempts the least priority task (current tez logic)
   * v3 is preempted and re-run. Shows up on critical path as preempted failure.
   * Also v1 retry attempts note show that it caused preemption of v3
   * @throws Exception
   */
  private List<StepCheck[]> testInternalPreemption() throws Exception {
    Configuration testConf = new Configuration(false);
    setCascadingInputFailureConfig(testConf, false, 2);

    StepCheck[] check = {
        createStep("v1 : 00000[01]_0", CriticalPathDependency.INIT_DEPENDENCY),
        createStep("v2 : 00000[01]_0", CriticalPathDependency.DATA_DEPENDENCY),
        createStep("v3 : 000000_0", CriticalPathDependency.DATA_DEPENDENCY, 
            TaskAttemptTerminationCause.INTERNAL_PREEMPTION, null),
        createStep("v2 : 00000[01]_1", CriticalPathDependency.OUTPUT_RECREATE_DEPENDENCY),
        createStep("v1 : 000000_1", CriticalPathDependency.OUTPUT_RECREATE_DEPENDENCY,
            null, Collections.singletonList("preemption of v3")),
        createStep("v2 : 00000[01]_1", CriticalPathDependency.DATA_DEPENDENCY),
        createStep("v3 : 000000_1", CriticalPathDependency.DATA_DEPENDENCY)
    };
    
    DAG dag = SimpleTestDAG3Vertices.createDAG(
              "testInternalPreemption", testConf);
    runDAG(dag, DAGStatus.State.SUCCEEDED);
    return Collections.singletonList(check);
  }
  
  /**
   * Input failure of v3 causes rerun of both both v1 and v2 vertices. 
   *   v1  v2
   *    \ /
   *    v3
   * 
   * @throws Exception
   */
  private List<StepCheck[]> testInputFailureCausesRerunOfTwoVerticesWithoutExit() throws Exception {
    Configuration testConf = new Configuration(false);
    testConf.setInt(SimpleVTestDAG.TEZ_SIMPLE_V_DAG_NUM_TASKS, 1);
    testConf.setBoolean(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_DO_FAIL, "v3"), true);
    testConf.setBoolean(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_DO_FAIL_AND_EXIT, "v3"), false);
    testConf.set(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_FAILING_TASK_INDEX, "v3"), "0");
    testConf.set(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_FAILING_TASK_ATTEMPT, "v3"), "0");
    testConf.set(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_FAILING_INPUT_INDEX, "v3"), "-1");
    testConf.set(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_FAILING_UPTO_INPUT_ATTEMPT, "v3"), "1");
    
    StepCheck[] check = {
        // use regex for either vertices being possible on the path
        createStep("v[12] : 000000_0", CriticalPathDependency.INIT_DEPENDENCY),
        createStep("v3 : 000000_0", CriticalPathDependency.DATA_DEPENDENCY),
        createStep("v[12] : 000000_[01]", CriticalPathDependency.OUTPUT_RECREATE_DEPENDENCY),
        createStep("v3 : 000000_0", CriticalPathDependency.DATA_DEPENDENCY),
        createStep("v[12] : 000000_[012]", CriticalPathDependency.OUTPUT_RECREATE_DEPENDENCY),
        createStep("v3 : 000000_0", CriticalPathDependency.DATA_DEPENDENCY),
        createStep("v[12] : 000000_[12]", CriticalPathDependency.OUTPUT_RECREATE_DEPENDENCY),
        createStep("v3 : 000000_0", CriticalPathDependency.DATA_DEPENDENCY),
        createStep("v[12] : 000000_2", CriticalPathDependency.OUTPUT_RECREATE_DEPENDENCY),
        createStep("v3 : 000000_0", CriticalPathDependency.DATA_DEPENDENCY),
      };

    DAG dag = SimpleVTestDAG.createDAG(
            "testInputFailureCausesRerunOfTwoVerticesWithoutExit", testConf);
    runDAG(dag, DAGStatus.State.SUCCEEDED);
    return Collections.singletonList(check);
  }
  
  /**
   * Downstream(v3) attempt failure of a vertex connected with 
   * 2 upstream vertices.. 
   *   v1  v2
   *    \ /
   *    v3
   * 
   * @throws Exception
   */
  private List<StepCheck[]> testAttemptOfDownstreamVertexConnectedWithTwoUpstreamVerticesFailure() 
      throws Exception {
    Configuration testConf = new Configuration(false);
    testConf.setInt(SimpleVTestDAG.TEZ_SIMPLE_V_DAG_NUM_TASKS, 1);
    testConf.setBoolean(TestProcessor.getVertexConfName(
        TestProcessor.TEZ_FAILING_PROCESSOR_DO_FAIL, "v3"), true);
    testConf.set(TestProcessor.getVertexConfName(
        TestProcessor.TEZ_FAILING_PROCESSOR_FAILING_TASK_INDEX, "v3"), "0");
    testConf.setInt(TestProcessor.getVertexConfName(
        TestProcessor.TEZ_FAILING_PROCESSOR_FAILING_UPTO_TASK_ATTEMPT, "v3"), 1);
    
    StepCheck[] check = {
        // use regex for either vertices being possible on the path
        createStep("v[12] : 000000_0", CriticalPathDependency.INIT_DEPENDENCY),
        createStep("v3 : 000000_0", CriticalPathDependency.DATA_DEPENDENCY),
        createStep("v3 : 000000_1", CriticalPathDependency.RETRY_DEPENDENCY),
        createStep("v3 : 000000_2", CriticalPathDependency.RETRY_DEPENDENCY),
      };

    DAG dag = SimpleVTestDAG.createDAG(
            "testAttemptOfDownstreamVertexConnectedWithTwoUpstreamVerticesFailure", testConf);
    runDAG(dag, DAGStatus.State.SUCCEEDED);
    return Collections.singletonList(check);
  }
    
  /**
   * Input failure of v2,v3 trigger v1 rerun.
   * Both v2 and v3 report error on v1 and dont exit. So one of them triggers next
   * version of v1 and also consume the output of the next version. While the other
   * consumes the output of the next version of v1. 
   * Reruns can send output to 2 downstream vertices. 
   *     v1
   *    /  \
   *   v2   v3 
   * 
   * Also covers multiple consumer vertices report failure against same producer task.
   * @throws Exception
   */
  private List<StepCheck[]> testInputFailureRerunCanSendOutputToTwoDownstreamVertices() throws Exception {
    Configuration testConf = new Configuration(false);
    testConf.setInt(SimpleReverseVTestDAG.TEZ_SIMPLE_REVERSE_V_DAG_NUM_TASKS, 1);
    testConf.setBoolean(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_DO_FAIL, "v2"), true);
    testConf.setBoolean(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_DO_FAIL_AND_EXIT, "v2"), false);
    testConf.set(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_FAILING_TASK_INDEX, "v2"), "-1");
    testConf.set(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_FAILING_TASK_ATTEMPT, "v2"), "0");
    testConf.set(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_FAILING_INPUT_INDEX, "v2"), "-1");
    testConf.set(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_FAILING_UPTO_INPUT_ATTEMPT, "v2"), "0");
    
    testConf.setBoolean(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_DO_FAIL, "v3"), true);
    testConf.setBoolean(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_DO_FAIL_AND_EXIT, "v3"), false);
    testConf.set(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_FAILING_TASK_INDEX, "v3"), "-1");
    testConf.set(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_FAILING_TASK_ATTEMPT, "v3"), "0");
    testConf.set(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_FAILING_INPUT_INDEX, "v3"), "-1");
    testConf.set(TestInput.getVertexConfName(
        TestInput.TEZ_FAILING_INPUT_FAILING_UPTO_INPUT_ATTEMPT, "v3"), "0");
    
    StepCheck[] check = {
        // use regex for either vertices being possible on the path
      createStep("v1 : 000000_0", CriticalPathDependency.INIT_DEPENDENCY),
      createStep("v[23] : 000000_0", CriticalPathDependency.DATA_DEPENDENCY),
      createStep("v1 : 000000_1", CriticalPathDependency.OUTPUT_RECREATE_DEPENDENCY),
      createStep("v[23] : 000000_0", CriticalPathDependency.DATA_DEPENDENCY),
    };
    DAG dag = SimpleReverseVTestDAG.createDAG(
            "testInputFailureRerunCanSendOutputToTwoDownstreamVertices", testConf);
    runDAG(dag, DAGStatus.State.SUCCEEDED);
    return Collections.singletonList(check);
  }
  
}