Java Code Examples for org.apache.tez.dag.api.DAG#addVertex()

The following examples show how to use org.apache.tez.dag.api.DAG#addVertex() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestMockDAGAppMaster.java    From tez with Apache License 2.0 6 votes vote down vote up
@Ignore
@Test (timeout = 60000)
public void testTaskEventsProcessingSpeed() throws Exception {
  Logger.getRootLogger().setLevel(Level.WARN);
  TezConfiguration tezconf = new TezConfiguration(defaultConf);
  tezconf.setBoolean(TezConfiguration.TEZ_AM_USE_CONCURRENT_DISPATCHER, true);
  MockTezClient tezClient = new MockTezClient("testMockAM", tezconf, true, null, null, null,
      null, false, false, 30, 1000);
  tezClient.start();

  final String vAName = "A";
  
  DAG dag = DAG.create("testTaskEventsProcessingSpeed");
  Vertex vA = Vertex.create(vAName, ProcessorDescriptor.create("Proc.class"), 50000);
  dag.addVertex(vA);

  MockDAGAppMaster mockApp = tezClient.getLocalClient().getMockApp();
  mockApp.doSleep = false;
  DAGClient dagClient = tezClient.submitDAG(dag);
  DAGStatus status = dagClient.waitForCompletion();
  Assert.assertEquals(DAGStatus.State.SUCCEEDED, status.getState());
  tezClient.stop();
}
 
Example 2
Source File: TestMockDAGAppMaster.java    From tez with Apache License 2.0 6 votes vote down vote up
@Test (timeout = 10000)
public void testSchedulerErrorHandling() throws Exception {
  TezConfiguration tezconf = new TezConfiguration(defaultConf);

  MockTezClient tezClient = new MockTezClient("testMockAM", tezconf, true, null, null, null, null);
  tezClient.start();

  MockDAGAppMaster mockApp = tezClient.getLocalClient().getMockApp();
  MockContainerLauncher mockLauncher = mockApp.getContainerLauncher();
  mockLauncher.startScheduling(false);

  DAG dag = DAG.create("testSchedulerErrorHandling");
  Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), 5);
  dag.addVertex(vA);

  tezClient.submitDAG(dag);
  mockLauncher.waitTillContainersLaunched();
  mockApp.handle(new DAGAppMasterEventSchedulingServiceError(
      org.apache.hadoop.util.StringUtils.stringifyException(new RuntimeException("Mock error"))));

  while(!mockApp.getShutdownHandler().wasShutdownInvoked()) {
    Thread.sleep(100);
  }
  Assert.assertEquals(DAGState.RUNNING, mockApp.getContext().getCurrentDAG().getState());
}
 
Example 3
Source File: TestTezJobs.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 60000)
public void testSleepJob() throws TezException, IOException, InterruptedException {
  SleepProcessorConfig spConf = new SleepProcessorConfig(1);

  DAG dag = new DAG("TezSleepProcessor");
  Vertex vertex = new Vertex("SleepVertex", new ProcessorDescriptor(
      SleepProcessor.class.getName()).setUserPayload(spConf.toUserPayload()), 1,
      Resource.newInstance(1024, 1));
  dag.addVertex(vertex);

  TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
  Path remoteStagingDir = remoteFs.makeQualified(new Path("/tmp", String.valueOf(random
      .nextInt(100000))));
  remoteFs.mkdirs(remoteStagingDir);
  tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, remoteStagingDir.toString());

  TezClient tezSession = new TezClient("TezSleepProcessor", tezConf, false);
  tezSession.start();

  DAGClient dagClient = tezSession.submitDAG(dag);

  DAGStatus dagStatus = dagClient.getDAGStatus(null);
  while (!dagStatus.isCompleted()) {
    LOG.info("Waiting for job to complete. Sleeping for 500ms." + " Current state: "
        + dagStatus.getState());
    Thread.sleep(500l);
    dagStatus = dagClient.getDAGStatus(null);
  }
  dagStatus = dagClient.getDAGStatus(Sets.newHashSet(StatusGetOpts.GET_COUNTERS));

  assertEquals(DAGStatus.State.SUCCEEDED, dagStatus.getState());
  assertNotNull(dagStatus.getDAGCounters());
  assertNotNull(dagStatus.getDAGCounters().getGroup(FileSystemCounter.class.getName()));
  assertNotNull(dagStatus.getDAGCounters().findCounter(TaskCounter.GC_TIME_MILLIS));
  ExampleDriver.printDAGStatus(dagClient, new String[] { "SleepVertex" }, true, true);
  tezSession.stop();
}
 
Example 4
Source File: TezClient.java    From tez with Apache License 2.0 5 votes vote down vote up
/**
 * API to help pre-allocate containers in session mode. In non-session mode
 * this is ignored. The pre-allocated containers may be re-used by subsequent
 * job DAGs to improve performance.
 * The preWarm vertex should be configured and setup exactly
 * like the other vertices in the job DAGs so that the pre-allocated
 * containers may be re-used by the subsequent DAGs to improve performance.
 * The processor for the preWarmVertex may be used to pre-warm the containers
 * by pre-loading classes etc. It should be short-running so that pre-warming
 * does not block real execution. Users can specify their custom processors or
 * use the PreWarmProcessor from the runtime library.
 * The parallelism of the preWarmVertex will determine the number of preWarmed
 * containers.
 * Pre-warming is best efforts and among other factors is limited by the free
 * resources on the cluster. Based on the specified timeout value it returns
 * false if the status is not READY after the wait period.
 * @param preWarmVertex
 * @param timeout
 * @param unit
 * @throws TezException
 * @throws IOException
 */
@Unstable
public synchronized void preWarm(PreWarmVertex preWarmVertex,
    long timeout, TimeUnit unit)
    throws TezException, IOException {
  if (!isSession) {
    // do nothing for non session mode. This is there to let the code
    // work correctly in both modes
    LOG.warn("preWarm is not supported in non-session mode," +
        "please use session-mode of TezClient");
    return;
  }

  verifySessionStateForSubmission();
  
  DAG dag = org.apache.tez.dag.api.DAG.create(TezConstants.TEZ_PREWARM_DAG_NAME_PREFIX + "_"
      + preWarmDAGCounter++);
  dag.addVertex(preWarmVertex);

  boolean isReady;
  try {
    isReady = waitTillReady(timeout, unit);
  } catch (InterruptedException e) {
    throw new IOException("Interrupted while waiting for AM to become " +
        "available", e);
  }
  if(isReady) {
    prewarmDagClient = submitDAG(dag);
  } else {
    throw new SessionNotReady("Tez AM not ready, could not submit DAG");
  }
}
 
Example 5
Source File: TestMockDAGAppMaster.java    From tez with Apache License 2.0 5 votes vote down vote up
@Test (timeout = 10000)
public void testMultipleSubmissions() throws Exception {
  Map<String, LocalResource> lrDAG = Maps.newHashMap();
  String lrName1 = "LR1";
  lrDAG.put(lrName1, LocalResource.newInstance(URL.newInstance("file", "localhost", 0, "/test"),
      LocalResourceType.FILE, LocalResourceVisibility.PUBLIC, 1, 1));
  Map<String, LocalResource> lrVertex = Maps.newHashMap();
  String lrName2 = "LR2";
  lrVertex.put(lrName2, LocalResource.newInstance(URL.newInstance("file", "localhost", 0, "/test1"),
      LocalResourceType.FILE, LocalResourceVisibility.PUBLIC, 1, 1));

  DAG dag = DAG.create("test").addTaskLocalFiles(lrDAG);
  Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), 5).addTaskLocalFiles(lrVertex);
  dag.addVertex(vA);

  TezConfiguration tezconf = new TezConfiguration(defaultConf);
  
  MockTezClient tezClient = new MockTezClient("testMockAM", tezconf, true, null, null, null, null);
  tezClient.start();
  DAGClient dagClient = tezClient.submitDAG(dag);
  dagClient.waitForCompletion();
  Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState());
  tezClient.stop();
  
  // submit the same DAG again to verify it can be done.
  tezClient = new MockTezClient("testMockAM", tezconf, true, null, null, null, null);
  tezClient.start();
  dagClient = tezClient.submitDAG(dag);
  dagClient.waitForCompletion();
  Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState());
  tezClient.stop();
}
 
Example 6
Source File: TestMRRJobsDAGApi.java    From tez with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 60000)
public void testSleepJob() throws TezException, IOException, InterruptedException {
  SleepProcessorConfig spConf = new SleepProcessorConfig(1);

  DAG dag = DAG.create("TezSleepProcessor");
  Vertex vertex = Vertex.create("SleepVertex", ProcessorDescriptor.create(
          SleepProcessor.class.getName()).setUserPayload(spConf.toUserPayload()), 1,
      Resource.newInstance(1024, 1));
  dag.addVertex(vertex);

  TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
  Path remoteStagingDir = remoteFs.makeQualified(new Path("/tmp", String.valueOf(random
      .nextInt(100000))));
  remoteFs.mkdirs(remoteStagingDir);
  tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, remoteStagingDir.toString());

  TezClient tezSession = TezClient.create("TezSleepProcessor", tezConf, false);
  tezSession.start();

  DAGClient dagClient = tezSession.submitDAG(dag);

  DAGStatus dagStatus = dagClient.getDAGStatus(null);
  while (!dagStatus.isCompleted()) {
    LOG.info("Waiting for job to complete. Sleeping for 500ms." + " Current state: "
        + dagStatus.getState());
    Thread.sleep(500l);
    dagStatus = dagClient.getDAGStatus(null);
  }
  dagStatus = dagClient.getDAGStatus(Sets.newHashSet(StatusGetOpts.GET_COUNTERS));

  assertEquals(DAGStatus.State.SUCCEEDED, dagStatus.getState());
  assertNotNull(dagStatus.getDAGCounters());
  assertNotNull(dagStatus.getDAGCounters().getGroup(FileSystemCounter.class.getName()));
  assertNotNull(dagStatus.getDAGCounters().findCounter(TaskCounter.GC_TIME_MILLIS));
  ExampleDriver.printDAGStatus(dagClient, new String[] { "SleepVertex" }, true, true);
  tezSession.stop();
}
 
Example 7
Source File: JoinDataGen.java    From tez with Apache License 2.0 5 votes vote down vote up
private DAG createDag(TezConfiguration tezConf, Path largeOutPath, Path smallOutPath,
    Path expectedOutputPath, int numTasks, long largeOutSize, long smallOutSize)
    throws IOException {

  long largeOutSizePerTask = largeOutSize / numTasks;
  long smallOutSizePerTask = smallOutSize / numTasks;

  DAG dag = DAG.create("JoinDataGen");

  Vertex genDataVertex = Vertex.create("datagen", ProcessorDescriptor.create(
      GenDataProcessor.class.getName()).setUserPayload(
      UserPayload.create(ByteBuffer.wrap(GenDataProcessor.createConfiguration(largeOutSizePerTask,
          smallOutSizePerTask)))), numTasks);
  genDataVertex.addDataSink(STREAM_OUTPUT_NAME, 
      MROutput.createConfigBuilder(new Configuration(tezConf),
          TextOutputFormat.class, largeOutPath.toUri().toString()).build());
  genDataVertex.addDataSink(HASH_OUTPUT_NAME, 
      MROutput.createConfigBuilder(new Configuration(tezConf),
          TextOutputFormat.class, smallOutPath.toUri().toString()).build());
  genDataVertex.addDataSink(EXPECTED_OUTPUT_NAME, 
      MROutput.createConfigBuilder(new Configuration(tezConf),
          TextOutputFormat.class, expectedOutputPath.toUri().toString()).build());

  dag.addVertex(genDataVertex);

  return dag;
}
 
Example 8
Source File: TestTezClientUtils.java    From tez with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 5000)
public void testAMLoggingOptsSimple() throws IOException, YarnException {

  TezConfiguration tezConf = new TezConfiguration();
  tezConf.set(TezConfiguration.TEZ_AM_LOG_LEVEL, "WARN");
  tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, STAGING_DIR.getAbsolutePath());

  ApplicationId appId = ApplicationId.newInstance(1000, 1);
  Credentials credentials = new Credentials();
  JobTokenSecretManager jobTokenSecretManager = new JobTokenSecretManager();
  TezClientUtils.createSessionToken(appId.toString(), jobTokenSecretManager, credentials);
  DAG dag = DAG.create("testdag");
  dag.addVertex(Vertex.create("testVertex", ProcessorDescriptor.create("processorClassname"), 1)
      .setTaskLaunchCmdOpts("initialLaunchOpts"));
  AMConfiguration amConf =
      new AMConfiguration(tezConf, new HashMap<String, LocalResource>(), credentials);
  ApplicationSubmissionContext appSubmissionContext =
      TezClientUtils.createApplicationSubmissionContext(appId, dag, "amName", amConf,
          new HashMap<String, LocalResource>(), credentials, false, new TezApiVersionInfo(),
          null, null);

  List<String> expectedCommands = new LinkedList<String>();
  expectedCommands.add("-Dlog4j.configuratorClass=org.apache.tez.common.TezLog4jConfigurator");
  expectedCommands.add("-Dlog4j.configuration=" + TezConstants.TEZ_CONTAINER_LOG4J_PROPERTIES_FILE);
  expectedCommands.add("-D" + YarnConfiguration.YARN_APP_CONTAINER_LOG_DIR + "=" +
      ApplicationConstants.LOG_DIR_EXPANSION_VAR);
  expectedCommands.add("-D" + TezConstants.TEZ_ROOT_LOGGER_NAME + "=" + "WARN" + "," +
      TezConstants.TEZ_CONTAINER_LOGGER_NAME);

  List<String> commands = appSubmissionContext.getAMContainerSpec().getCommands();
  assertEquals(1, commands.size());
  for (String expectedCmd : expectedCommands) {
    assertTrue(commands.get(0).contains(expectedCmd));
  }

  Map<String, String> environment = appSubmissionContext.getAMContainerSpec().getEnvironment();
  String logEnv = environment.get(TezConstants.TEZ_CONTAINER_LOG_PARAMS);
  assertNull(logEnv);
}
 
Example 9
Source File: IntersectDataGen.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
private DAG createDag(TezConfiguration tezConf, Path largeOutPath, Path smallOutPath,
    Path expectedOutputPath, int numTasks, long largeOutSize, long smallOutSize)
    throws IOException {

  long largeOutSizePerTask = largeOutSize / numTasks;
  long smallOutSizePerTask = smallOutSize / numTasks;

  DAG dag = new DAG("IntersectDataGen");

  byte[] streamOutputPayload = createPayloadForOutput(largeOutPath, tezConf);
  byte[] hashOutputPayload = createPayloadForOutput(smallOutPath, tezConf);
  byte[] expectedOutputPayload = createPayloadForOutput(expectedOutputPath, tezConf);

  Vertex genDataVertex = new Vertex("datagen", new ProcessorDescriptor(
      GenDataProcessor.class.getName()).setUserPayload(GenDataProcessor.createConfiguration(
      largeOutSizePerTask, smallOutSizePerTask)), numTasks, MRHelpers.getMapResource(tezConf));
  genDataVertex.addOutput(STREAM_OUTPUT_NAME,
      new OutputDescriptor(MROutput.class.getName()).setUserPayload(streamOutputPayload),
      MROutputCommitter.class);
  genDataVertex.addOutput(HASH_OUTPUT_NAME,
      new OutputDescriptor(MROutput.class.getName()).setUserPayload(hashOutputPayload),
      MROutputCommitter.class);
  genDataVertex.addOutput(EXPECTED_OUTPUT_NAME,
      new OutputDescriptor(MROutput.class.getName()).setUserPayload(expectedOutputPayload),
      MROutputCommitter.class);

  dag.addVertex(genDataVertex);

  return dag;
}
 
Example 10
Source File: TestTezJobs.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
@Test
public void testNonDefaultFSStagingDir() throws Exception {
  SleepProcessorConfig spConf = new SleepProcessorConfig(1);

  DAG dag = new DAG("TezSleepProcessor");
  Vertex vertex = new Vertex("SleepVertex", new ProcessorDescriptor(
      SleepProcessor.class.getName()).setUserPayload(spConf.toUserPayload()), 1,
      Resource.newInstance(1024, 1));
  dag.addVertex(vertex);

  TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
  Path stagingDir = new Path(TEST_ROOT_DIR, "testNonDefaultFSStagingDir"
      + String.valueOf(random.nextInt(100000)));
  FileSystem localFs = FileSystem.getLocal(tezConf);
  stagingDir = localFs.makeQualified(stagingDir);
  localFs.mkdirs(stagingDir);
  tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir.toString());

  TezClient tezSession = new TezClient("TezSleepProcessor", tezConf, false);
  tezSession.start();

  DAGClient dagClient = tezSession.submitDAG(dag);

  DAGStatus dagStatus = dagClient.getDAGStatus(null);
  while (!dagStatus.isCompleted()) {
    LOG.info("Waiting for job to complete. Sleeping for 500ms." + " Current state: "
        + dagStatus.getState());
    Thread.sleep(500l);
    dagStatus = dagClient.getDAGStatus(null);
  }
  dagStatus = dagClient.getDAGStatus(Sets.newHashSet(StatusGetOpts.GET_COUNTERS));

  assertEquals(DAGStatus.State.SUCCEEDED, dagStatus.getState());
  assertNotNull(dagStatus.getDAGCounters());
  assertNotNull(dagStatus.getDAGCounters().getGroup(FileSystemCounter.class.getName()));
  assertNotNull(dagStatus.getDAGCounters().findCounter(TaskCounter.GC_TIME_MILLIS));
  ExampleDriver.printDAGStatus(dagClient, new String[] { "SleepVertex" }, true, true);
  tezSession.stop();
}
 
Example 11
Source File: YARNRunner.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
private DAG createDAG(FileSystem fs, JobID jobId, Configuration[] stageConfs,
    String jobSubmitDir, Credentials ts,
    Map<String, LocalResource> jobLocalResources) throws IOException {

  String jobName = stageConfs[0].get(MRJobConfig.JOB_NAME,
      YarnConfiguration.DEFAULT_APPLICATION_NAME);
  DAG dag = new DAG(jobName);

  LOG.info("Number of stages: " + stageConfs.length);

  List<TaskLocationHint> mapInputLocations =
      getMapLocationHintsFromInputSplits(
          jobId, fs, stageConfs[0], jobSubmitDir);
  List<TaskLocationHint> reduceInputLocations = null;

  Vertex[] vertices = new Vertex[stageConfs.length];
  for (int i = 0; i < stageConfs.length; i++) {
    vertices[i] = createVertexForStage(stageConfs[i], jobLocalResources,
        i == 0 ? mapInputLocations : reduceInputLocations, i,
        stageConfs.length);
  }

  for (int i = 0; i < vertices.length; i++) {
    dag.addVertex(vertices[i]);
    if (i > 0) {
      // Set edge conf based on Input conf (compression etc properties for MapReduce are
      // w.r.t Outputs - MAP_OUTPUT_COMPRESS for example)
      OrderedPartitionedKVEdgeConfigurer edgeConf =
          OrderedPartitionedKVEdgeConfigurer.newBuilder(stageConfs[i - 1].get(
                  TezJobConfig.TEZ_RUNTIME_KEY_CLASS),
              stageConfs[i - 1].get(TezJobConfig.TEZ_RUNTIME_VALUE_CLASS),
              MRPartitioner.class.getName(), stageConfs[i - 1])
              .configureInput().useLegacyInput().done()
              .setFromConfiguration(stageConfs[i - 1]).build();
      Edge edge = new Edge(vertices[i-1], vertices[i], edgeConf.createDefaultEdgeProperty());
      dag.addEdge(edge);
    }

  }
  return dag;
}
 
Example 12
Source File: TestATSHistoryWithACLs.java    From tez with Apache License 2.0 4 votes vote down vote up
@Test (timeout=50000)
public void testDAGACls() throws Exception {
  TezClient tezSession = null;
  ApplicationId applicationId;
  String viewAcls = "nobody nobody_group";
  try {
    SleepProcessorConfig spConf = new SleepProcessorConfig(1);

    DAG dag = DAG.create("TezSleepProcessor");
    Vertex vertex = Vertex.create("SleepVertex", ProcessorDescriptor.create(
            SleepProcessor.class.getName()).setUserPayload(spConf.toUserPayload()), 1,
        Resource.newInstance(256, 1));
    dag.addVertex(vertex);
    DAGAccessControls accessControls = new DAGAccessControls();
    accessControls.setUsersWithViewACLs(Collections.singleton("nobody2"));
    accessControls.setGroupsWithViewACLs(Collections.singleton("nobody_group2"));
    dag.setAccessControls(accessControls);

    TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
    tezConf.set(TezConfiguration.TEZ_AM_VIEW_ACLS, viewAcls);
    tezConf.set(TezConfiguration.TEZ_HISTORY_LOGGING_SERVICE_CLASS,
        ATSHistoryLoggingService.class.getName());
    Path remoteStagingDir = remoteFs.makeQualified(new Path("/tmp", String.valueOf(random
        .nextInt(100000))));
    remoteFs.mkdirs(remoteStagingDir);
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, remoteStagingDir.toString());

    tezSession = TezClient.create("TezSleepProcessor", tezConf, true);
    tezSession.start();

    applicationId = tezSession.getAppMasterApplicationId();

    DAGClient dagClient = tezSession.submitDAG(dag);

    DAGStatus dagStatus = dagClient.getDAGStatus(null);
    while (!dagStatus.isCompleted()) {
      LOG.info("Waiting for job to complete. Sleeping for 500ms." + " Current state: "
          + dagStatus.getState());
      Thread.sleep(500l);
      dagStatus = dagClient.getDAGStatus(null);
    }
    assertEquals(DAGStatus.State.SUCCEEDED, dagStatus.getState());
  } finally {
    if (tezSession != null) {
      tezSession.stop();
    }
  }

  TimelineDomain timelineDomain = getDomain(
      ATSHistoryACLPolicyManager.DOMAIN_ID_PREFIX + applicationId.toString());
  verifyDomainACLs(timelineDomain,
      Collections.singleton("nobody"), Collections.singleton("nobody_group"));

  timelineDomain = getDomain(ATSHistoryACLPolicyManager.DOMAIN_ID_PREFIX
      + applicationId.toString() + "_1");
  verifyDomainACLs(timelineDomain,
      Sets.newHashSet("nobody", "nobody2"),
      Sets.newHashSet("nobody_group", "nobody_group2"));

  verifyEntityDomains(applicationId, false);
}
 
Example 13
Source File: TestPipelinedShuffle.java    From tez with Apache License 2.0 4 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
  this.tezConf = new TezConfiguration(getConf());
  String dagName = "pipelinedShuffleTest";
  DAG dag = DAG.create(dagName);

  Vertex m1_Vertex = Vertex.create("mapper1",
      ProcessorDescriptor.create(DataGenerator.class.getName()), 1);

  Vertex m2_Vertex = Vertex.create("mapper2",
      ProcessorDescriptor.create(DataGenerator.class.getName()), 1);

  Vertex reducerVertex = Vertex.create("reducer",
      ProcessorDescriptor.create(SimpleReduceProcessor.class.getName()), 1);

  Edge mapper1_to_reducer = Edge.create(m1_Vertex, reducerVertex,
      OrderedPartitionedKVEdgeConfig
          .newBuilder(Text.class.getName(), Text.class.getName(),
              HashPartitioner.class.getName())
          .setFromConfiguration(tezConf).build().createDefaultEdgeProperty());

  Edge mapper2_to_reducer = Edge.create(m2_Vertex, reducerVertex,
      OrderedPartitionedKVEdgeConfig
          .newBuilder(Text.class.getName(), Text.class.getName(),
              HashPartitioner.class.getName())
          .setFromConfiguration(tezConf).build().createDefaultEdgeProperty());

  dag.addVertex(m1_Vertex);
  dag.addVertex(m2_Vertex);
  dag.addVertex(reducerVertex);

  dag.addEdge(mapper1_to_reducer).addEdge(mapper2_to_reducer);

  TezClient client = TezClient.create(dagName, tezConf);
  client.start();
  client.waitTillReady();

  DAGClient dagClient = client.submitDAG(dag);
  Set<StatusGetOpts> getOpts = Sets.newHashSet();
  getOpts.add(StatusGetOpts.GET_COUNTERS);

  DAGStatus dagStatus = dagClient.waitForCompletionWithStatusUpdates(getOpts);

  System.out.println(dagStatus.getDAGCounters());
  TezCounters counters = dagStatus.getDAGCounters();

  //Ensure that atleast 10 spills were there in this job.
  assertTrue(counters.findCounter(TaskCounter.SHUFFLE_CHUNK_COUNT).getValue() > 10);

  if (dagStatus.getState() != DAGStatus.State.SUCCEEDED) {
    System.out.println("DAG diagnostics: " + dagStatus.getDiagnostics());
    return -1;
  }
  return 0;
}
 
Example 14
Source File: YARNRunner.java    From tez with Apache License 2.0 4 votes vote down vote up
private DAG createDAG(FileSystem fs, JobID jobId, Configuration[] stageConfs,
    String jobSubmitDir, Credentials ts,
    Map<String, LocalResource> jobLocalResources) throws IOException {

  String jobName = stageConfs[0].get(MRJobConfig.JOB_NAME,
      YarnConfiguration.DEFAULT_APPLICATION_NAME);
  DAG dag = DAG.create(jobName);

  LOG.info("Number of stages: " + stageConfs.length);

  List<TaskLocationHint> mapInputLocations =
      getMapLocationHintsFromInputSplits(
          jobId, fs, stageConfs[0], jobSubmitDir);
  List<TaskLocationHint> reduceInputLocations = null;

  Vertex[] vertices = new Vertex[stageConfs.length];
  for (int i = 0; i < stageConfs.length; i++) {
    vertices[i] = createVertexForStage(stageConfs[i], jobLocalResources,
        i == 0 ? mapInputLocations : reduceInputLocations, i,
        stageConfs.length);
  }

  for (int i = 0; i < vertices.length; i++) {
    dag.addVertex(vertices[i]);
    if (i > 0) {
      // Set edge conf based on Input conf (compression etc properties for MapReduce are
      // w.r.t Outputs - MAP_OUTPUT_COMPRESS for example)
      Map<String, String> partitionerConf = null;
      if (stageConfs[i-1] != null) {
        partitionerConf = Maps.newHashMap();
        for (Map.Entry<String, String> entry : stageConfs[i - 1]) {
          partitionerConf.put(entry.getKey(), entry.getValue());
        }
      }
      OrderedPartitionedKVEdgeConfig edgeConf =
          OrderedPartitionedKVEdgeConfig.newBuilder(stageConfs[i - 1].get(
                  TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS),
              stageConfs[i - 1].get(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS),
              MRPartitioner.class.getName(), partitionerConf)
              .setFromConfigurationUnfiltered(stageConfs[i-1])
              .configureInput().useLegacyInput().done()
              .build();
      Edge edge = Edge.create(vertices[i - 1], vertices[i], edgeConf.createDefaultEdgeProperty());
      dag.addEdge(edge);
    }

  }
  return dag;
}
 
Example 15
Source File: TestSpeculation.java    From tez with Apache License 2.0 4 votes vote down vote up
/**
 * Test single task speculation.
 *
 * @throws Exception the exception
 */
@Retry
@Test (timeout = 10000)
public void testSingleTaskSpeculation() throws Exception {
  // Map<Timeout conf value, expected number of tasks>
  Map<Long, Integer> confToExpected = new HashMap<Long, Integer>();
  confToExpected.put(Long.MAX_VALUE >> 1, 1); // Really long time to speculate
  confToExpected.put(100L, 2);
  confToExpected.put(-1L, 1); // Don't speculate
  defaultConf.setLong(TezConfiguration.TEZ_AM_SOONEST_RETRY_AFTER_NO_SPECULATE, 50);
  for(Map.Entry<Long, Integer> entry : confToExpected.entrySet()) {
    defaultConf.setLong(
            TezConfiguration.TEZ_AM_LEGACY_SPECULATIVE_SINGLE_TASK_VERTEX_TIMEOUT,
            entry.getKey());

    DAG dag = DAG.create("test");
    Vertex vA = Vertex.create("A",
            ProcessorDescriptor.create("Proc.class"),
            1);
    dag.addVertex(vA);

    MockTezClient tezClient = createTezSession();

    DAGClient dagClient = tezClient.submitDAG(dag);
    DAGImpl dagImpl = (DAGImpl) mockApp.getContext().getCurrentDAG();
    TezVertexID vertexId = TezVertexID.getInstance(dagImpl.getID(), 0);
    // original attempt is killed and speculative one is successful
    TezTaskAttemptID killedTaId =
        TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 0);
    TezTaskAttemptID successTaId =
        TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 1);
    Thread.sleep(200);
    // cause speculation trigger
    mockLauncher.setStatusUpdatesForTask(killedTaId, 100);

    mockLauncher.startScheduling(true);
    dagClient.waitForCompletion();
    Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState());
    Task task = dagImpl.getTask(killedTaId.getTaskID());
    Assert.assertEquals(entry.getValue().intValue(), task.getAttempts().size());
    if (entry.getValue() > 1) {
      Assert.assertEquals(successTaId, task.getSuccessfulAttempt().getID());
      TaskAttempt killedAttempt = task.getAttempt(killedTaId);
      Joiner.on(",").join(killedAttempt.getDiagnostics()).contains("Killed as speculative attempt");
      Assert.assertEquals(TaskAttemptTerminationCause.TERMINATED_EFFECTIVE_SPECULATION,
              killedAttempt.getTerminationCause());
    }
    tezClient.stop();
  }
}
 
Example 16
Source File: TestMockDAGAppMaster.java    From tez with Apache License 2.0 4 votes vote down vote up
@Ignore
@Test (timeout = 60000)
public void testBasicCounterMemory() throws Exception {
  Logger.getRootLogger().setLevel(Level.WARN);
  TezConfiguration tezconf = new TezConfiguration(defaultConf);
  MockTezClient tezClient = new MockTezClient("testMockAM", tezconf, true, null, null, null,
      null, false, false);
  tezClient.start();

  final String vAName = "A";
  
  DAG dag = DAG.create("testBasicCounterMemory");
  Vertex vA = Vertex.create(vAName, ProcessorDescriptor.create("Proc.class"), 10000);
  dag.addVertex(vA);

  MockDAGAppMaster mockApp = tezClient.getLocalClient().getMockApp();
  MockContainerLauncher mockLauncher = mockApp.getContainerLauncher();
  mockLauncher.startScheduling(false);
  mockApp.countersDelegate = new CountersDelegate() {
    @Override
    public TezCounters getCounters(TaskSpec taskSpec) {
      TezCounters counters = new TezCounters();
      final String longName = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
      final String shortName = "abcdefghijklmnopqrstuvwxyz";
      for (int i=0; i<6; ++i) {
        for (int j=0; j<15; ++j) {
          counters.findCounter((i + longName), (i + (shortName))).increment(1);
        }
      }
      return counters;
    }
  };
  mockApp.doSleep = false;
  DAGClient dagClient = tezClient.submitDAG(dag);
  mockLauncher.waitTillContainersLaunched();
  DAGImpl dagImpl = (DAGImpl) mockApp.getContext().getCurrentDAG();
  mockLauncher.startScheduling(true);
  DAGStatus status = dagClient.waitForCompletion();
  Assert.assertEquals(DAGStatus.State.SUCCEEDED, status.getState());
  TezCounters counters = dagImpl.getAllCounters();
  Assert.assertNotNull(counters);
  checkMemory(dag.getName(), mockApp);
  tezClient.stop();
}
 
Example 17
Source File: TestSpeculation.java    From tez with Apache License 2.0 4 votes vote down vote up
/**
 * Test basic speculation per vertex conf.
 *
 * @throws Exception the exception
 */
@Retry
@Test (timeout=10000)
public void testBasicSpeculationPerVertexConf() throws Exception {
  DAG dag = DAG.create("test");
  String vNameNoSpec = "A";
  String vNameSpec = "B";
  String speculatorSleepTime = "50";
  Vertex vA = Vertex.create(vNameNoSpec, ProcessorDescriptor.create("Proc.class"), 5);
  Vertex vB = Vertex.create(vNameSpec, ProcessorDescriptor.create("Proc.class"), 5);
  vA.setConf(TezConfiguration.TEZ_AM_SPECULATION_ENABLED, "false");
  dag.addVertex(vA);
  dag.addVertex(vB);
  // min/max src fraction is set to 1. So vertices will run sequentially
  dag.addEdge(
      Edge.create(vA, vB,
          EdgeProperty.create(DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED,
              SchedulingType.SEQUENTIAL, OutputDescriptor.create("O"),
              InputDescriptor.create("I"))));

  MockTezClient tezClient = createTezSession();

  DAGClient dagClient = tezClient.submitDAG(dag);
  DAGImpl dagImpl = (DAGImpl) mockApp.getContext().getCurrentDAG();
  TezVertexID vertexId = dagImpl.getVertex(vNameSpec).getVertexId();
  TezVertexID vertexIdNoSpec = dagImpl.getVertex(vNameNoSpec).getVertexId();
  // original attempt is killed and speculative one is successful
  TezTaskAttemptID killedTaId =
      TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 0);
  TezTaskAttemptID noSpecTaId = TezTaskAttemptID
      .getInstance(TezTaskID.getInstance(vertexIdNoSpec, 0), 0);

  // cause speculation trigger for both
  mockLauncher.setStatusUpdatesForTask(killedTaId, 100);
  mockLauncher.setStatusUpdatesForTask(noSpecTaId, 100);

  mockLauncher.startScheduling(true);
  org.apache.tez.dag.app.dag.Vertex vSpec = dagImpl.getVertex(vertexId);
  org.apache.tez.dag.app.dag.Vertex vNoSpec = dagImpl.getVertex(vertexIdNoSpec);
  // Wait enough time to give chance for the speculator to trigger
  // speculation on VB.
  // This would fail because of JUnit time out.
  do {
    Thread.sleep(100);
  } while (vSpec.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS)
      .getValue() <= 0);
  dagClient.waitForCompletion();
  // speculation for vA but not for vB
  Assert.assertTrue("Num Speculations is not higher than 0",
      vSpec.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS)
          .getValue() > 0);
  Assert.assertEquals(0,
      vNoSpec.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS)
          .getValue());

  tezClient.stop();
}
 
Example 18
Source File: TestATSHistoryWithACLs.java    From tez with Apache License 2.0 4 votes vote down vote up
@Test (timeout=50000)
public void testSimpleAMACls() throws Exception {
  TezClient tezSession = null;
  ApplicationId applicationId;
  String viewAcls = "nobody nobody_group";
  try {
    SleepProcessorConfig spConf = new SleepProcessorConfig(1);

    DAG dag = DAG.create("TezSleepProcessor");
    Vertex vertex = Vertex.create("SleepVertex", ProcessorDescriptor.create(
            SleepProcessor.class.getName()).setUserPayload(spConf.toUserPayload()), 1,
        Resource.newInstance(256, 1));
    dag.addVertex(vertex);

    TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
    tezConf.set(TezConfiguration.TEZ_AM_VIEW_ACLS, viewAcls);
    tezConf.set(TezConfiguration.TEZ_HISTORY_LOGGING_SERVICE_CLASS,
        ATSHistoryLoggingService.class.getName());
    Path remoteStagingDir = remoteFs.makeQualified(new Path("/tmp", String.valueOf(random
        .nextInt(100000))));
    remoteFs.mkdirs(remoteStagingDir);
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, remoteStagingDir.toString());

    tezSession = TezClient.create("TezSleepProcessor", tezConf, true);
    tezSession.start();

    applicationId = tezSession.getAppMasterApplicationId();

    DAGClient dagClient = tezSession.submitDAG(dag);

    DAGStatus dagStatus = dagClient.getDAGStatus(null);
    while (!dagStatus.isCompleted()) {
      LOG.info("Waiting for job to complete. Sleeping for 500ms." + " Current state: "
          + dagStatus.getState());
      Thread.sleep(500l);
      dagStatus = dagClient.getDAGStatus(null);
    }
    assertEquals(DAGStatus.State.SUCCEEDED, dagStatus.getState());
  } finally {
    if (tezSession != null) {
      tezSession.stop();
    }
  }

  TimelineDomain timelineDomain = getDomain(
      ATSHistoryACLPolicyManager.DOMAIN_ID_PREFIX + applicationId.toString());
  verifyDomainACLs(timelineDomain,
      Collections.singleton("nobody"), Collections.singleton("nobody_group"));

  verifyEntityDomains(applicationId, true);
}
 
Example 19
Source File: TestTezClientUtils.java    From tez with Apache License 2.0 4 votes vote down vote up
@Test(timeout = 5000)
public void testAMLoggingOptsPerLogger() throws IOException, YarnException {

  TezConfiguration tezConf = new TezConfiguration();
  tezConf.set(TezConfiguration.TEZ_AM_LOG_LEVEL,
      "WARN;org.apache.hadoop.ipc=DEBUG;org.apache.hadoop.security=DEBUG");
  tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, STAGING_DIR.getAbsolutePath());

  ApplicationId appId = ApplicationId.newInstance(1000, 1);
  Credentials credentials = new Credentials();
  JobTokenSecretManager jobTokenSecretManager = new JobTokenSecretManager();
  TezClientUtils.createSessionToken(appId.toString(), jobTokenSecretManager, credentials);
  DAG dag = DAG.create("testdag");
  dag.addVertex(Vertex.create("testVertex", ProcessorDescriptor.create("processorClassname"), 1)
      .setTaskLaunchCmdOpts("initialLaunchOpts"));
  AMConfiguration amConf =
      new AMConfiguration(tezConf, new HashMap<String, LocalResource>(), credentials);
  ApplicationSubmissionContext appSubmissionContext =
      TezClientUtils.createApplicationSubmissionContext(appId, dag, "amName", amConf,
          new HashMap<String, LocalResource>(), credentials, false, new TezApiVersionInfo(),
          null, null);

  List<String> expectedCommands = new LinkedList<String>();
  expectedCommands.add("-Dlog4j.configuratorClass=org.apache.tez.common.TezLog4jConfigurator");
  expectedCommands.add(
      "-Dlog4j.configuration=" + TezConstants.TEZ_CONTAINER_LOG4J_PROPERTIES_FILE);
  expectedCommands.add("-D" + YarnConfiguration.YARN_APP_CONTAINER_LOG_DIR + "=" +
      ApplicationConstants.LOG_DIR_EXPANSION_VAR);
  expectedCommands.add("-D" + TezConstants.TEZ_ROOT_LOGGER_NAME + "=" + "WARN" + "," +
      TezConstants.TEZ_CONTAINER_LOGGER_NAME);

  List<String> commands = appSubmissionContext.getAMContainerSpec().getCommands();
  assertEquals(1, commands.size());
  for (String expectedCmd : expectedCommands) {
    assertTrue(commands.get(0).contains(expectedCmd));
  }

  Map<String, String> environment = appSubmissionContext.getAMContainerSpec().getEnvironment();
  String logEnv = environment.get(TezConstants.TEZ_CONTAINER_LOG_PARAMS);
  assertEquals("org.apache.hadoop.ipc=DEBUG;org.apache.hadoop.security=DEBUG", logEnv);
}
 
Example 20
Source File: TestMockDAGAppMaster.java    From tez with Apache License 2.0 4 votes vote down vote up
@Test (timeout = 100000)
public void testConcurrencyLimit() throws Exception {
  // the test relies on local mode behavior of launching a new container per task.
  // so task concurrency == container concurrency
  TezConfiguration tezconf = new TezConfiguration(defaultConf);
  
  final int concurrencyLimit = 5;
  MockTezClient tezClient = new MockTezClient("testMockAM", tezconf, true, null, null, null,
      null, false, false, concurrencyLimit*4, 1000);

  tezClient.start();
  
  MockDAGAppMaster mockApp = tezClient.getLocalClient().getMockApp();
  MockContainerLauncher mockLauncher = mockApp.getContainerLauncher();
  mockLauncher.startScheduling(false);
  
  final AtomicInteger concurrency = new AtomicInteger(0);
  final AtomicBoolean exceededConcurrency = new AtomicBoolean(false);
  mockApp.containerDelegate = new ContainerDelegate() {
    @Override
    public void stop(ContainerStopRequest event) {
      concurrency.decrementAndGet();
    }
    @Override
    public void launch(ContainerLaunchRequest event) {
      int maxConc = concurrency.incrementAndGet();
      if (maxConc > concurrencyLimit) {
        exceededConcurrency.set(true);
      }
      System.out.println("Launched: " + maxConc);
    }
  };
  DAG dag = DAG.create("testConcurrencyLimit");
  Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), 20).setConf(
      TezConfiguration.TEZ_AM_VERTEX_MAX_TASK_CONCURRENCY, String.valueOf(concurrencyLimit));
  dag.addVertex(vA);

  mockLauncher.startScheduling(true);
  DAGClient dagClient = tezClient.submitDAG(dag);
  dagClient.waitForCompletion();
  Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState());
  Assert.assertFalse(exceededConcurrency.get());
  tezClient.stop();
}