Java Code Examples for org.apache.tez.dag.api.DAG#create()

The following examples show how to use org.apache.tez.dag.api.DAG#create() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestMockDAGAppMaster.java    From tez with Apache License 2.0 6 votes vote down vote up
@Ignore
@Test (timeout = 60000)
public void testTaskEventsProcessingSpeed() throws Exception {
  Logger.getRootLogger().setLevel(Level.WARN);
  TezConfiguration tezconf = new TezConfiguration(defaultConf);
  tezconf.setBoolean(TezConfiguration.TEZ_AM_USE_CONCURRENT_DISPATCHER, true);
  MockTezClient tezClient = new MockTezClient("testMockAM", tezconf, true, null, null, null,
      null, false, false, 30, 1000);
  tezClient.start();

  final String vAName = "A";
  
  DAG dag = DAG.create("testTaskEventsProcessingSpeed");
  Vertex vA = Vertex.create(vAName, ProcessorDescriptor.create("Proc.class"), 50000);
  dag.addVertex(vA);

  MockDAGAppMaster mockApp = tezClient.getLocalClient().getMockApp();
  mockApp.doSleep = false;
  DAGClient dagClient = tezClient.submitDAG(dag);
  DAGStatus status = dagClient.waitForCompletion();
  Assert.assertEquals(DAGStatus.State.SUCCEEDED, status.getState());
  tezClient.stop();
}
 
Example 2
Source File: TestMockDAGAppMaster.java    From tez with Apache License 2.0 6 votes vote down vote up
@Test (timeout = 10000)
public void testSchedulerErrorHandling() throws Exception {
  TezConfiguration tezconf = new TezConfiguration(defaultConf);

  MockTezClient tezClient = new MockTezClient("testMockAM", tezconf, true, null, null, null, null);
  tezClient.start();

  MockDAGAppMaster mockApp = tezClient.getLocalClient().getMockApp();
  MockContainerLauncher mockLauncher = mockApp.getContainerLauncher();
  mockLauncher.startScheduling(false);

  DAG dag = DAG.create("testSchedulerErrorHandling");
  Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), 5);
  dag.addVertex(vA);

  tezClient.submitDAG(dag);
  mockLauncher.waitTillContainersLaunched();
  mockApp.handle(new DAGAppMasterEventSchedulingServiceError(
      org.apache.hadoop.util.StringUtils.stringifyException(new RuntimeException("Mock error"))));

  while(!mockApp.getShutdownHandler().wasShutdownInvoked()) {
    Thread.sleep(100);
  }
  Assert.assertEquals(DAGState.RUNNING, mockApp.getContext().getCurrentDAG().getState());
}
 
Example 3
Source File: TestAMRecovery.java    From tez with Apache License 2.0 6 votes vote down vote up
/**
 * v1 --> v2 <br>
 * v1 has a customized VM to control whether to schedule only one second task when it is partiallyFinished test case.
 * v2 has a customized VM which could control when to kill AM
 *
 * @param vertexManagerClass
 * @param dmType
 * @param failOnParitialCompleted
 * @return
 * @throws IOException
 */
private DAG createDAG(String dagName, Class vertexManagerClass, DataMovementType dmType,
    boolean failOnParitialCompleted) throws IOException {
  if (failOnParitialCompleted) {
    tezConf.set(FAIL_ON_PARTIAL_FINISHED, "true");
  } else {
    tezConf.set(FAIL_ON_PARTIAL_FINISHED, "false");
  }
  DAG dag = DAG.create(dagName);
  UserPayload payload = UserPayload.create(null);
  Vertex v1 = Vertex.create("v1", MyProcessor.getProcDesc(), 2);
  v1.setVertexManagerPlugin(VertexManagerPluginDescriptor.create(
      ScheduleControlledVertexManager.class.getName()).setUserPayload(
      TezUtils.createUserPayloadFromConf(tezConf)));
  Vertex v2 = Vertex.create("v2", DoNothingProcessor.getProcDesc(), 2);
  v2.setVertexManagerPlugin(VertexManagerPluginDescriptor.create(
      vertexManagerClass.getName()).setUserPayload(
      TezUtils.createUserPayloadFromConf(tezConf)));

  dag.addVertex(v1).addVertex(v2);
  dag.addEdge(Edge.create(v1, v2, EdgeProperty.create(dmType,
      DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
      TestOutput.getOutputDesc(payload), TestInput.getInputDesc(payload))));
  return dag;
}
 
Example 4
Source File: SimpleTestDAG.java    From tez with Apache License 2.0 6 votes vote down vote up
public static DAG createDAG(String name, 
    Configuration conf) throws Exception {
  UserPayload payload = UserPayload.create(null);
  int taskCount = TEZ_SIMPLE_DAG_NUM_TASKS_DEFAULT;
  if (conf != null) {
    taskCount = conf.getInt(TEZ_SIMPLE_DAG_NUM_TASKS, TEZ_SIMPLE_DAG_NUM_TASKS_DEFAULT);
    payload = TezUtils.createUserPayloadFromConf(conf);
  }
  DAG dag = DAG.create(name);
  Vertex v1 = Vertex.create("v1", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  Vertex v2 = Vertex.create("v2", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  dag.addVertex(v1).addVertex(v2).addEdge(Edge.create(v1, v2,
      EdgeProperty.create(DataMovementType.SCATTER_GATHER,
          DataSourceType.PERSISTED,
          SchedulingType.SEQUENTIAL,
          TestOutput.getOutputDesc(payload),
          TestInput.getInputDesc(payload))));
  return dag;
}
 
Example 5
Source File: TestTezClientUtils.java    From tez with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 5000)
public void testSessionTokenInAmClc() throws IOException, YarnException {

  TezConfiguration tezConf = new TezConfiguration();
  tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, STAGING_DIR.getAbsolutePath());

  ApplicationId appId = ApplicationId.newInstance(1000, 1);
  DAG dag = DAG.create("testdag");
  dag.addVertex(Vertex.create("testVertex", ProcessorDescriptor.create("processorClassname"), 1)
      .setTaskLaunchCmdOpts("initialLaunchOpts"));

  Credentials credentials = new Credentials();
  JobTokenSecretManager jobTokenSecretManager = new JobTokenSecretManager();
  TezClientUtils.createSessionToken(appId.toString(), jobTokenSecretManager, credentials);
  Token<JobTokenIdentifier> jobToken = TokenCache.getSessionToken(credentials);
  assertNotNull(jobToken);

  AMConfiguration amConf =
      new AMConfiguration(tezConf, new HashMap<String, LocalResource>(), credentials);
  ApplicationSubmissionContext appSubmissionContext =
      TezClientUtils.createApplicationSubmissionContext(appId, dag, "amName", amConf,
          new HashMap<String, LocalResource>(), credentials, false, new TezApiVersionInfo(),
          null, null);

  ContainerLaunchContext amClc = appSubmissionContext.getAMContainerSpec();
  Map<String, ByteBuffer> amServiceData = amClc.getServiceData();
  assertNotNull(amServiceData);
  assertEquals(1, amServiceData.size());

  DataInputByteBuffer dibb = new DataInputByteBuffer();
  dibb.reset(amServiceData.values().iterator().next());
  Token<JobTokenIdentifier> jtSent = new Token<JobTokenIdentifier>();
  jtSent.readFields(dibb);

  assertTrue(Arrays.equals(jobToken.getIdentifier(), jtSent.getIdentifier()));
}
 
Example 6
Source File: TestTezJobs.java    From tez with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 60000)
public void testVertexFailuresMaxPercent() throws TezException, InterruptedException, IOException {

  TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
  tezConf.set(TezConfiguration.TEZ_VERTEX_FAILURES_MAXPERCENT, "50.0f");
  tezConf.setInt(TezConfiguration.TEZ_AM_TASK_MAX_FAILED_ATTEMPTS, 1);
  TezClient tezClient = TezClient.create("TestVertexFailuresMaxPercent", tezConf);
  tezClient.start();

  try {
    DAG dag = DAG.create("TestVertexFailuresMaxPercent");
    Vertex vertex1 = Vertex.create("Parent", ProcessorDescriptor.create(
        FailingAttemptProcessor.class.getName()), 2);
    Vertex vertex2 = Vertex.create("Child", ProcessorDescriptor.create(FailingAttemptProcessor.class.getName()), 2);

    OrderedPartitionedKVEdgeConfig edgeConfig = OrderedPartitionedKVEdgeConfig
        .newBuilder(Text.class.getName(), IntWritable.class.getName(),
            HashPartitioner.class.getName())
        .setFromConfiguration(tezConf)
        .build();
    dag.addVertex(vertex1)
        .addVertex(vertex2)
        .addEdge(Edge.create(vertex1, vertex2, edgeConfig.createDefaultEdgeProperty()));

    DAGClient dagClient = tezClient.submitDAG(dag);
    dagClient.waitForCompletion();
    Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState());
  } finally {
    tezClient.stop();
  }
}
 
Example 7
Source File: TestPreemption.java    From tez with Apache License 2.0 5 votes vote down vote up
DAG createDAG(DataMovementType dmType) {
  DAG dag = DAG.create("test-" + dagCount++);
  Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), 5);
  Vertex vB = Vertex.create("B", ProcessorDescriptor.create("Proc.class"), 5);
  Edge eAB = Edge.create(vA, vB, 
  EdgeProperty.create(dmType, DataSourceType.PERSISTED,
      SchedulingType.SEQUENTIAL, OutputDescriptor.create("O.class"),
      InputDescriptor.create("I.class")));
  
  dag.addVertex(vA).addVertex(vB).addEdge(eAB);
  return dag;
}
 
Example 8
Source File: TestMRRJobsDAGApi.java    From tez with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 60000)
public void testNonDefaultFSStagingDir() throws Exception {
  SleepProcessorConfig spConf = new SleepProcessorConfig(1);

  DAG dag = DAG.create("TezSleepProcessor");
  Vertex vertex = Vertex.create("SleepVertex", ProcessorDescriptor.create(
          SleepProcessor.class.getName()).setUserPayload(spConf.toUserPayload()), 1,
      Resource.newInstance(1024, 1));
  dag.addVertex(vertex);

  TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
  Path stagingDir = new Path(TEST_ROOT_DIR, "testNonDefaultFSStagingDir"
      + String.valueOf(random.nextInt(100000)));
  FileSystem localFs = FileSystem.getLocal(tezConf);
  stagingDir = localFs.makeQualified(stagingDir);
  localFs.mkdirs(stagingDir);
  tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir.toString());

  TezClient tezSession = TezClient.create("TezSleepProcessor", tezConf, false);
  tezSession.start();

  DAGClient dagClient = tezSession.submitDAG(dag);

  DAGStatus dagStatus = dagClient.getDAGStatus(null);
  while (!dagStatus.isCompleted()) {
    LOG.info("Waiting for job to complete. Sleeping for 500ms." + " Current state: "
        + dagStatus.getState());
    Thread.sleep(500l);
    dagStatus = dagClient.getDAGStatus(null);
  }
  dagStatus = dagClient.getDAGStatus(Sets.newHashSet(StatusGetOpts.GET_COUNTERS));

  assertEquals(DAGStatus.State.SUCCEEDED, dagStatus.getState());
  assertNotNull(dagStatus.getDAGCounters());
  assertNotNull(dagStatus.getDAGCounters().getGroup(FileSystemCounter.class.getName()));
  assertNotNull(dagStatus.getDAGCounters().findCounter(TaskCounter.GC_TIME_MILLIS));
  ExampleDriver.printDAGStatus(dagClient, new String[] { "SleepVertex" }, true, true);
  tezSession.stop();
}
 
Example 9
Source File: SixLevelsFailingDAG.java    From tez with Apache License 2.0 5 votes vote down vote up
public static DAG createDAG(String name, 
        Configuration conf) throws Exception {
    if (conf != null) {
      payload = TezUtils.createUserPayloadFromConf(conf);
    } 
    dag = DAG.create(name);
    addDAGVerticesAndEdges();
    return dag;
}
 
Example 10
Source File: SimpleReverseVTestDAG.java    From tez with Apache License 2.0 5 votes vote down vote up
public static DAG createDAG(String name, 
    Configuration conf) throws Exception {
  UserPayload payload = UserPayload.create(null);
  int taskCount = TEZ_SIMPLE_REVERSE_V_DAG_NUM_TASKS_DEFAULT;
  if (conf != null) {
    taskCount = conf.getInt(TEZ_SIMPLE_REVERSE_V_DAG_NUM_TASKS, TEZ_SIMPLE_REVERSE_V_DAG_NUM_TASKS_DEFAULT);
    payload = TezUtils.createUserPayloadFromConf(conf);
  }
  DAG dag = DAG.create(name);
  Vertex v1 = Vertex.create("v1", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  Vertex v2 = Vertex.create("v2", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  Vertex v3 = Vertex.create("v3", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  dag.addVertex(v1).addVertex(v2).addVertex(v3);
  dag.addEdge(Edge.create(v1, v2,
      EdgeProperty.create(DataMovementType.SCATTER_GATHER,
          DataSourceType.PERSISTED,
          SchedulingType.SEQUENTIAL,
          TestOutput.getOutputDesc(payload),
          TestInput.getInputDesc(payload))));
  dag.addEdge(Edge.create(v1, v3,
      EdgeProperty.create(DataMovementType.SCATTER_GATHER,
          DataSourceType.PERSISTED,
          SchedulingType.SEQUENTIAL,
          TestOutput.getOutputDesc(payload),
          TestInput.getInputDesc(payload))));
  return dag;
}
 
Example 11
Source File: TestGroupConstParallelTez.java    From spork with Apache License 2.0 5 votes vote down vote up
private DAG getTezDAG(TezOperPlan tezPlan, PigContext pc) {
    TezPlanContainerNode tezPlanNode = new TezPlanContainerNode(OperatorKey.genOpKey("DAGName"), tezPlan);
    TezScriptState scriptState = new TezScriptState("test");
    ScriptState.start(scriptState);
    scriptState.setDAGScriptInfo(tezPlanNode);
    DAG tezDag = DAG.create(tezPlanNode.getOperatorKey().toString());
    return tezDag;
}
 
Example 12
Source File: TezJobCompiler.java    From spork with Apache License 2.0 5 votes vote down vote up
public DAG buildDAG(TezPlanContainerNode tezPlanNode, Map<String, LocalResource> localResources)
        throws IOException, YarnException {
    DAG tezDag = DAG.create(tezPlanNode.getOperatorKey().toString());
    tezDag.setCredentials(new Credentials());
    TezDagBuilder dagBuilder = new TezDagBuilder(pigContext, tezPlanNode.getTezOperPlan(), tezDag, localResources);
    dagBuilder.visit();
    return tezDag;
}
 
Example 13
Source File: TestTezClientUtils.java    From tez with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 5000)
public void testAMLoggingOptsSimple() throws IOException, YarnException {

  TezConfiguration tezConf = new TezConfiguration();
  tezConf.set(TezConfiguration.TEZ_AM_LOG_LEVEL, "WARN");
  tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, STAGING_DIR.getAbsolutePath());

  ApplicationId appId = ApplicationId.newInstance(1000, 1);
  Credentials credentials = new Credentials();
  JobTokenSecretManager jobTokenSecretManager = new JobTokenSecretManager();
  TezClientUtils.createSessionToken(appId.toString(), jobTokenSecretManager, credentials);
  DAG dag = DAG.create("testdag");
  dag.addVertex(Vertex.create("testVertex", ProcessorDescriptor.create("processorClassname"), 1)
      .setTaskLaunchCmdOpts("initialLaunchOpts"));
  AMConfiguration amConf =
      new AMConfiguration(tezConf, new HashMap<String, LocalResource>(), credentials);
  ApplicationSubmissionContext appSubmissionContext =
      TezClientUtils.createApplicationSubmissionContext(appId, dag, "amName", amConf,
          new HashMap<String, LocalResource>(), credentials, false, new TezApiVersionInfo(),
          null, null);

  List<String> expectedCommands = new LinkedList<String>();
  expectedCommands.add("-Dlog4j.configuratorClass=org.apache.tez.common.TezLog4jConfigurator");
  expectedCommands.add("-Dlog4j.configuration=" + TezConstants.TEZ_CONTAINER_LOG4J_PROPERTIES_FILE);
  expectedCommands.add("-D" + YarnConfiguration.YARN_APP_CONTAINER_LOG_DIR + "=" +
      ApplicationConstants.LOG_DIR_EXPANSION_VAR);
  expectedCommands.add("-D" + TezConstants.TEZ_ROOT_LOGGER_NAME + "=" + "WARN" + "," +
      TezConstants.TEZ_CONTAINER_LOGGER_NAME);

  List<String> commands = appSubmissionContext.getAMContainerSpec().getCommands();
  assertEquals(1, commands.size());
  for (String expectedCmd : expectedCommands) {
    assertTrue(commands.get(0).contains(expectedCmd));
  }

  Map<String, String> environment = appSubmissionContext.getAMContainerSpec().getEnvironment();
  String logEnv = environment.get(TezConstants.TEZ_CONTAINER_LOG_PARAMS);
  assertNull(logEnv);
}
 
Example 14
Source File: TestATSHistoryWithACLs.java    From tez with Apache License 2.0 4 votes vote down vote up
/**
 * Test Disable Logging for all dags in a session 
 * due to failure to create domain in session start
 * @throws Exception
 */
@Test (timeout=50000)
public void testDisableSessionLogging() throws Exception {
  TezClient tezSession = null;
  String viewAcls = "nobody nobody_group";
  SleepProcessorConfig spConf = new SleepProcessorConfig(1);

  DAG dag = DAG.create("TezSleepProcessor");
  Vertex vertex = Vertex.create("SleepVertex", ProcessorDescriptor.create(
          SleepProcessor.class.getName()).setUserPayload(spConf.toUserPayload()), 1,
          Resource.newInstance(256, 1));
  dag.addVertex(vertex);
  DAGAccessControls accessControls = new DAGAccessControls();
  accessControls.setUsersWithViewACLs(Collections.singleton("nobody2"));
  accessControls.setGroupsWithViewACLs(Collections.singleton("nobody_group2"));
  dag.setAccessControls(accessControls);

  TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
  tezConf.set(TezConfiguration.TEZ_AM_VIEW_ACLS, viewAcls);
  tezConf.set(TezConfiguration.TEZ_HISTORY_LOGGING_SERVICE_CLASS,
      ATSHistoryLoggingService.class.getName());
  Path remoteStagingDir = remoteFs.makeQualified(new Path("/tmp", String.valueOf(random
      .nextInt(100000))));
  remoteFs.mkdirs(remoteStagingDir);
  tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, remoteStagingDir.toString());

  tezSession = TezClient.create("TezSleepProcessor", tezConf, true);
  tezSession.start();

  //////submit first dag //////
  DAGClient dagClient = tezSession.submitDAG(dag);
  DAGStatus dagStatus = dagClient.getDAGStatus(null);
  while (!dagStatus.isCompleted()) {
    LOG.info("Waiting for job to complete. Sleeping for 500ms." + " Current state: "
        + dagStatus.getState());
    Thread.sleep(500l);
    dagStatus = dagClient.getDAGStatus(null);
  }
  assertEquals(DAGStatus.State.SUCCEEDED, dagStatus.getState());

  //////submit second dag//////
  DAG dag2 = DAG.create("TezSleepProcessor2");
  vertex = Vertex.create("SleepVertex", ProcessorDescriptor.create(
        SleepProcessor.class.getName()).setUserPayload(spConf.toUserPayload()), 1,
     Resource.newInstance(256, 1));
  dag2.addVertex(vertex);
  accessControls = new DAGAccessControls();
  accessControls.setUsersWithViewACLs(Collections.singleton("nobody3"));
  accessControls.setGroupsWithViewACLs(Collections.singleton("nobody_group3"));
  dag2.setAccessControls(accessControls);
  dagClient = tezSession.submitDAG(dag2);
  dagStatus = dagClient.getDAGStatus(null);
  while (!dagStatus.isCompleted()) {
    LOG.info("Waiting for job to complete. Sleeping for 500ms." + " Current state: "
              + dagStatus.getState());
    Thread.sleep(500l);
    dagStatus = dagClient.getDAGStatus(null);
  }
  tezSession.stop();
}
 
Example 15
Source File: TopK.java    From sequenceiq-samples with Apache License 2.0 4 votes vote down vote up
private DAG createDAG(TezConfiguration tezConf, String inputPath, String outputPath,
        String columnIndex, String top, String numPartitions) throws IOException {

    DataSourceDescriptor dataSource = MRInput.createConfigBuilder(new Configuration(tezConf),
            TextInputFormat.class, inputPath).build();

    DataSinkDescriptor dataSink = MROutput.createConfigBuilder(new Configuration(tezConf),
            TextOutputFormat.class, outputPath).build();

    Vertex tokenizerVertex = Vertex.create(TOKENIZER,
            ProcessorDescriptor.create(TokenProcessor.class.getName())
                    .setUserPayload(createPayload(Integer.valueOf(columnIndex))))
            .addDataSource(INPUT, dataSource);

    int topK = Integer.valueOf(top);
    Vertex sumVertex = Vertex.create(SUM,
            ProcessorDescriptor.create(SumProcessor.class.getName())
                    .setUserPayload(createPayload(topK)), Integer.valueOf(numPartitions));

    // parallelism must be set to 1 as the writer needs to see the global picture of
    // the data set
    // multiple tasks from the writer will result in multiple list of the top K
    // elements as all task will take the partitioned data's top K element
    Vertex writerVertex = Vertex.create(WRITER,
            ProcessorDescriptor.create(Writer.class.getName())
                    .setUserPayload(createPayload(topK)), 1)
            .addDataSink(OUTPUT, dataSink);

    OrderedPartitionedKVEdgeConfig tokenSumEdge = OrderedPartitionedKVEdgeConfig
            .newBuilder(Text.class.getName(), IntWritable.class.getName(),
                    HashPartitioner.class.getName()).build();

    UnorderedKVEdgeConfig sumWriterEdge = UnorderedKVEdgeConfig
            .newBuilder(IntWritable.class.getName(), Text.class.getName()).build();

    DAG dag = DAG.create("topk");
    return dag
            .addVertex(tokenizerVertex)
            .addVertex(sumVertex)
            .addVertex(writerVertex)
            .addEdge(Edge.create(tokenizerVertex, sumVertex, tokenSumEdge.createDefaultEdgeProperty()))
            .addEdge(Edge.create(sumVertex, writerVertex, sumWriterEdge.createDefaultBroadcastEdgeProperty()));
}
 
Example 16
Source File: TestSpeculation.java    From tez with Apache License 2.0 4 votes vote down vote up
/**
 * Test single task speculation.
 *
 * @throws Exception the exception
 */
@Retry
@Test (timeout = 10000)
public void testSingleTaskSpeculation() throws Exception {
  // Map<Timeout conf value, expected number of tasks>
  Map<Long, Integer> confToExpected = new HashMap<Long, Integer>();
  confToExpected.put(Long.MAX_VALUE >> 1, 1); // Really long time to speculate
  confToExpected.put(100L, 2);
  confToExpected.put(-1L, 1); // Don't speculate
  defaultConf.setLong(TezConfiguration.TEZ_AM_SOONEST_RETRY_AFTER_NO_SPECULATE, 50);
  for(Map.Entry<Long, Integer> entry : confToExpected.entrySet()) {
    defaultConf.setLong(
            TezConfiguration.TEZ_AM_LEGACY_SPECULATIVE_SINGLE_TASK_VERTEX_TIMEOUT,
            entry.getKey());

    DAG dag = DAG.create("test");
    Vertex vA = Vertex.create("A",
            ProcessorDescriptor.create("Proc.class"),
            1);
    dag.addVertex(vA);

    MockTezClient tezClient = createTezSession();

    DAGClient dagClient = tezClient.submitDAG(dag);
    DAGImpl dagImpl = (DAGImpl) mockApp.getContext().getCurrentDAG();
    TezVertexID vertexId = TezVertexID.getInstance(dagImpl.getID(), 0);
    // original attempt is killed and speculative one is successful
    TezTaskAttemptID killedTaId =
        TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 0);
    TezTaskAttemptID successTaId =
        TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 1);
    Thread.sleep(200);
    // cause speculation trigger
    mockLauncher.setStatusUpdatesForTask(killedTaId, 100);

    mockLauncher.startScheduling(true);
    dagClient.waitForCompletion();
    Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState());
    Task task = dagImpl.getTask(killedTaId.getTaskID());
    Assert.assertEquals(entry.getValue().intValue(), task.getAttempts().size());
    if (entry.getValue() > 1) {
      Assert.assertEquals(successTaId, task.getSuccessfulAttempt().getID());
      TaskAttempt killedAttempt = task.getAttempt(killedTaId);
      Joiner.on(",").join(killedAttempt.getDiagnostics()).contains("Killed as speculative attempt");
      Assert.assertEquals(TaskAttemptTerminationCause.TERMINATED_EFFECTIVE_SPECULATION,
              killedAttempt.getTerminationCause());
    }
    tezClient.stop();
  }
}
 
Example 17
Source File: SimpleTestDAG.java    From tez with Apache License 2.0 4 votes vote down vote up
/**
 *  v1  v2
 *   \  /
 *    v3
 *   /  \
 *  v4  v5
 *   \  /
 *    v6
 * @param name
 * @param conf
 * @return
 * @throws Exception
 */
public static DAG createDAGForVertexOrder(String name, Configuration conf) throws Exception{
  UserPayload payload = UserPayload.create(null);
  int taskCount = TEZ_SIMPLE_DAG_NUM_TASKS_DEFAULT;
  if (conf != null) {
    taskCount = conf.getInt(TEZ_SIMPLE_DAG_NUM_TASKS, TEZ_SIMPLE_DAG_NUM_TASKS_DEFAULT);
    payload = TezUtils.createUserPayloadFromConf(conf);
  }
  DAG dag = DAG.create(name);

  Vertex v1 = Vertex.create("v1", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  Vertex v2 = Vertex.create("v2", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  Vertex v3 = Vertex.create("v3", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  Vertex v4 = Vertex.create("v4", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  Vertex v5 = Vertex.create("v5", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  Vertex v6 = Vertex.create("v6", TestProcessor.getProcDesc(payload), taskCount, defaultResource);

  // add vertex not in the topological order, since we are using this dag for testing vertex topological order
  dag.addVertex(v4)
    .addVertex(v5)
    .addVertex(v6)
    .addVertex(v1)
    .addVertex(v2)
    .addVertex(v3)
    .addEdge(Edge.create(v1, v3,
        EdgeProperty.create(DataMovementType.SCATTER_GATHER,
            DataSourceType.PERSISTED,
            SchedulingType.SEQUENTIAL,
            TestOutput.getOutputDesc(payload),
            TestInput.getInputDesc(payload))))
    .addEdge(Edge.create(v2, v3,
        EdgeProperty.create(DataMovementType.SCATTER_GATHER,
            DataSourceType.PERSISTED,
            SchedulingType.SEQUENTIAL,
            TestOutput.getOutputDesc(payload),
            TestInput.getInputDesc(payload))))
    .addEdge(Edge.create(v3, v4,
        EdgeProperty.create(DataMovementType.SCATTER_GATHER,
            DataSourceType.PERSISTED,
            SchedulingType.SEQUENTIAL,
            TestOutput.getOutputDesc(payload),
            TestInput.getInputDesc(payload))))
    .addEdge(Edge.create(v3, v5,
        EdgeProperty.create(DataMovementType.SCATTER_GATHER,
            DataSourceType.PERSISTED,
            SchedulingType.SEQUENTIAL,
            TestOutput.getOutputDesc(payload),
            TestInput.getInputDesc(payload))))
    .addEdge(Edge.create(v4, v6,
        EdgeProperty.create(DataMovementType.SCATTER_GATHER,
            DataSourceType.PERSISTED,
            SchedulingType.SEQUENTIAL,
            TestOutput.getOutputDesc(payload),
            TestInput.getInputDesc(payload))))
    .addEdge(Edge.create(v5, v6,
        EdgeProperty.create(DataMovementType.SCATTER_GATHER,
            DataSourceType.PERSISTED,
            SchedulingType.SEQUENTIAL,
            TestOutput.getOutputDesc(payload),
            TestInput.getInputDesc(payload))));

  return dag;
}
 
Example 18
Source File: TestMRRJobsDAGApi.java    From tez with Apache License 2.0 4 votes vote down vote up
@Test(timeout = 60000)
public void testHistoryLogging() throws IOException,
    InterruptedException, TezException, ClassNotFoundException, YarnException {
  SleepProcessorConfig spConf = new SleepProcessorConfig(1);

  DAG dag = DAG.create("TezSleepProcessorHistoryLogging");
  Vertex vertex = Vertex.create("SleepVertex", ProcessorDescriptor.create(
          SleepProcessor.class.getName()).setUserPayload(spConf.toUserPayload()), 2,
      Resource.newInstance(1024, 1));
  dag.addVertex(vertex);

  TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
  Path remoteStagingDir = remoteFs.makeQualified(new Path("/tmp", String.valueOf(random
      .nextInt(100000))));
  remoteFs.mkdirs(remoteStagingDir);
  tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, remoteStagingDir.toString());

  FileSystem localFs = FileSystem.getLocal(tezConf);
  Path historyLogDir = new Path(TEST_ROOT_DIR, "testHistoryLogging");
  localFs.mkdirs(historyLogDir);

  tezConf.set(TezConfiguration.TEZ_SIMPLE_HISTORY_LOGGING_DIR,
      localFs.makeQualified(historyLogDir).toString());

  tezConf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, false);
  TezClient tezSession = TezClient.create("TezSleepProcessorHistoryLogging", tezConf);
  tezSession.start();

  DAGClient dagClient = tezSession.submitDAG(dag);

  DAGStatus dagStatus = dagClient.getDAGStatus(null);
  while (!dagStatus.isCompleted()) {
    LOG.info("Waiting for job to complete. Sleeping for 500ms." + " Current state: "
        + dagStatus.getState());
    Thread.sleep(500l);
    dagStatus = dagClient.getDAGStatus(null);
  }
  assertEquals(DAGStatus.State.SUCCEEDED, dagStatus.getState());

  FileStatus historyLogFileStatus = null;
  for (FileStatus fileStatus : localFs.listStatus(historyLogDir)) {
    if (fileStatus.isDirectory()) {
      continue;
    }
    Path p = fileStatus.getPath();
    if (p.getName().startsWith(SimpleHistoryLoggingService.LOG_FILE_NAME_PREFIX)) {
      historyLogFileStatus = fileStatus;
      break;
    }
  }
  Assert.assertNotNull(historyLogFileStatus);
  Assert.assertTrue(historyLogFileStatus.getLen() > 0);
  tezSession.stop();
}
 
Example 19
Source File: TestSpeculation.java    From tez with Apache License 2.0 4 votes vote down vote up
/**
 * Test basic speculation per vertex conf.
 *
 * @throws Exception the exception
 */
@Retry
@Test (timeout=10000)
public void testBasicSpeculationPerVertexConf() throws Exception {
  DAG dag = DAG.create("test");
  String vNameNoSpec = "A";
  String vNameSpec = "B";
  String speculatorSleepTime = "50";
  Vertex vA = Vertex.create(vNameNoSpec, ProcessorDescriptor.create("Proc.class"), 5);
  Vertex vB = Vertex.create(vNameSpec, ProcessorDescriptor.create("Proc.class"), 5);
  vA.setConf(TezConfiguration.TEZ_AM_SPECULATION_ENABLED, "false");
  dag.addVertex(vA);
  dag.addVertex(vB);
  // min/max src fraction is set to 1. So vertices will run sequentially
  dag.addEdge(
      Edge.create(vA, vB,
          EdgeProperty.create(DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED,
              SchedulingType.SEQUENTIAL, OutputDescriptor.create("O"),
              InputDescriptor.create("I"))));

  MockTezClient tezClient = createTezSession();

  DAGClient dagClient = tezClient.submitDAG(dag);
  DAGImpl dagImpl = (DAGImpl) mockApp.getContext().getCurrentDAG();
  TezVertexID vertexId = dagImpl.getVertex(vNameSpec).getVertexId();
  TezVertexID vertexIdNoSpec = dagImpl.getVertex(vNameNoSpec).getVertexId();
  // original attempt is killed and speculative one is successful
  TezTaskAttemptID killedTaId =
      TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 0);
  TezTaskAttemptID noSpecTaId = TezTaskAttemptID
      .getInstance(TezTaskID.getInstance(vertexIdNoSpec, 0), 0);

  // cause speculation trigger for both
  mockLauncher.setStatusUpdatesForTask(killedTaId, 100);
  mockLauncher.setStatusUpdatesForTask(noSpecTaId, 100);

  mockLauncher.startScheduling(true);
  org.apache.tez.dag.app.dag.Vertex vSpec = dagImpl.getVertex(vertexId);
  org.apache.tez.dag.app.dag.Vertex vNoSpec = dagImpl.getVertex(vertexIdNoSpec);
  // Wait enough time to give chance for the speculator to trigger
  // speculation on VB.
  // This would fail because of JUnit time out.
  do {
    Thread.sleep(100);
  } while (vSpec.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS)
      .getValue() <= 0);
  dagClient.waitForCompletion();
  // speculation for vA but not for vB
  Assert.assertTrue("Num Speculations is not higher than 0",
      vSpec.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS)
          .getValue() > 0);
  Assert.assertEquals(0,
      vNoSpec.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS)
          .getValue());

  tezClient.stop();
}
 
Example 20
Source File: YARNRunner.java    From tez with Apache License 2.0 4 votes vote down vote up
private DAG createDAG(FileSystem fs, JobID jobId, Configuration[] stageConfs,
    String jobSubmitDir, Credentials ts,
    Map<String, LocalResource> jobLocalResources) throws IOException {

  String jobName = stageConfs[0].get(MRJobConfig.JOB_NAME,
      YarnConfiguration.DEFAULT_APPLICATION_NAME);
  DAG dag = DAG.create(jobName);

  LOG.info("Number of stages: " + stageConfs.length);

  List<TaskLocationHint> mapInputLocations =
      getMapLocationHintsFromInputSplits(
          jobId, fs, stageConfs[0], jobSubmitDir);
  List<TaskLocationHint> reduceInputLocations = null;

  Vertex[] vertices = new Vertex[stageConfs.length];
  for (int i = 0; i < stageConfs.length; i++) {
    vertices[i] = createVertexForStage(stageConfs[i], jobLocalResources,
        i == 0 ? mapInputLocations : reduceInputLocations, i,
        stageConfs.length);
  }

  for (int i = 0; i < vertices.length; i++) {
    dag.addVertex(vertices[i]);
    if (i > 0) {
      // Set edge conf based on Input conf (compression etc properties for MapReduce are
      // w.r.t Outputs - MAP_OUTPUT_COMPRESS for example)
      Map<String, String> partitionerConf = null;
      if (stageConfs[i-1] != null) {
        partitionerConf = Maps.newHashMap();
        for (Map.Entry<String, String> entry : stageConfs[i - 1]) {
          partitionerConf.put(entry.getKey(), entry.getValue());
        }
      }
      OrderedPartitionedKVEdgeConfig edgeConf =
          OrderedPartitionedKVEdgeConfig.newBuilder(stageConfs[i - 1].get(
                  TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS),
              stageConfs[i - 1].get(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS),
              MRPartitioner.class.getName(), partitionerConf)
              .setFromConfigurationUnfiltered(stageConfs[i-1])
              .configureInput().useLegacyInput().done()
              .build();
      Edge edge = Edge.create(vertices[i - 1], vertices[i], edgeConf.createDefaultEdgeProperty());
      dag.addEdge(edge);
    }

  }
  return dag;
}