org.apache.tez.dag.api.ProcessorDescriptor Java Examples

The following examples show how to use org.apache.tez.dag.api.ProcessorDescriptor. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TaskSpec.java    From incubator-tez with Apache License 2.0 6 votes vote down vote up
public TaskSpec(TezTaskAttemptID taskAttemptID,
    String dagName, String vertexName,
    ProcessorDescriptor processorDescriptor,
    List<InputSpec> inputSpecList, List<OutputSpec> outputSpecList, 
    @Nullable List<GroupInputSpec> groupInputSpecList) {
  checkNotNull(taskAttemptID, "taskAttemptID is null");
  checkNotNull(dagName, "dagName is null");
  checkNotNull(vertexName, "vertexName is null");
  checkNotNull(processorDescriptor, "processorDescriptor is null");
  checkNotNull(inputSpecList, "inputSpecList is null");
  checkNotNull(outputSpecList, "outputSpecList is null");
  this.taskAttemptId = taskAttemptID;
  this.dagName = StringInterner.weakIntern(dagName);
  this.vertexName = StringInterner.weakIntern(vertexName);
  this.processorDescriptor = processorDescriptor;
  this.inputSpecList = inputSpecList;
  this.outputSpecList = outputSpecList;
  this.groupInputSpecList = groupInputSpecList;
}
 
Example #2
Source File: TestTaskExecution.java    From incubator-tez with Apache License 2.0 6 votes vote down vote up
private TezTaskRunner createTaskRunner(ApplicationId appId, TezTaskUmbilicalForTest umbilical,
    TaskReporter taskReporter, ListeningExecutorService executor, byte[] processorConf)
    throws IOException {
  TezConfiguration tezConf = new TezConfiguration(defaultConf);
  UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
  Path testDir = new Path(workDir, UUID.randomUUID().toString());
  String[] localDirs = new String[] { testDir.toString() };

  TezDAGID dagId = TezDAGID.getInstance(appId, 1);
  TezVertexID vertexId = TezVertexID.getInstance(dagId, 1);
  TezTaskID taskId = TezTaskID.getInstance(vertexId, 1);
  TezTaskAttemptID taskAttemptId = TezTaskAttemptID.getInstance(taskId, 1);
  ProcessorDescriptor processorDescriptor = new ProcessorDescriptor(TestProcessor.class.getName())
      .setUserPayload(processorConf);
  TaskSpec taskSpec = new TaskSpec(taskAttemptId, "dagName", "vertexName", processorDescriptor,
      new ArrayList<InputSpec>(), new ArrayList<OutputSpec>(), null);

  TezTaskRunner taskRunner = new TezTaskRunner(tezConf, ugi, localDirs, taskSpec, umbilical, 1,
      new HashMap<String, ByteBuffer>(), HashMultimap.<String, String> create(), taskReporter,
      executor);
  return taskRunner;
}
 
Example #3
Source File: TestTezClient.java    From tez with Apache License 2.0 6 votes vote down vote up
@Test(timeout = 5000)
public void testClientResubmit() throws Exception {
  TezClientForTest client = configureAndCreateTezClient(null, true, null);
  client.start();
  Map<String, LocalResource> lrDAG = Collections.singletonMap("LR1",
      LocalResource.newInstance(
          URL.newInstance("file", "localhost", 0, "/test1"),
          LocalResourceType.FILE,
          LocalResourceVisibility.PUBLIC, 1, 1));
  Vertex vertex1 = Vertex.create("Vertex1", ProcessorDescriptor.create("P1"), 1,
      Resource.newInstance(1, 1));
  vertex1.setTaskLaunchCmdOpts("-XX:+UseParallelGC -XX:+UseG1GC");
  Vertex vertex2 = Vertex.create("Vertex2", ProcessorDescriptor.create("P2"), 1,
      Resource.newInstance(1, 1));
  vertex2.setTaskLaunchCmdOpts("-XX:+UseParallelGC -XX:+UseG1GC");
  DAG dag = DAG.create("DAG").addVertex(vertex1).addVertex(vertex2).addTaskLocalFiles(lrDAG);
  for (int i = 0; i < 3; ++i) {
    try {
      client.submitDAG(dag);
      Assert.fail("Expected TezUncheckedException here.");
    } catch(TezUncheckedException ex) {
      Assert.assertTrue(ex.getMessage().contains("Invalid/conflicting GC options found"));
    }
  }
  client.stop();
}
 
Example #4
Source File: TestMockDAGAppMaster.java    From tez with Apache License 2.0 6 votes vote down vote up
@Ignore
@Test (timeout = 60000)
public void testTaskEventsProcessingSpeed() throws Exception {
  Logger.getRootLogger().setLevel(Level.WARN);
  TezConfiguration tezconf = new TezConfiguration(defaultConf);
  tezconf.setBoolean(TezConfiguration.TEZ_AM_USE_CONCURRENT_DISPATCHER, true);
  MockTezClient tezClient = new MockTezClient("testMockAM", tezconf, true, null, null, null,
      null, false, false, 30, 1000);
  tezClient.start();

  final String vAName = "A";
  
  DAG dag = DAG.create("testTaskEventsProcessingSpeed");
  Vertex vA = Vertex.create(vAName, ProcessorDescriptor.create("Proc.class"), 50000);
  dag.addVertex(vA);

  MockDAGAppMaster mockApp = tezClient.getLocalClient().getMockApp();
  mockApp.doSleep = false;
  DAGClient dagClient = tezClient.submitDAG(dag);
  DAGStatus status = dagClient.waitForCompletion();
  Assert.assertEquals(DAGStatus.State.SUCCEEDED, status.getState());
  tezClient.stop();
}
 
Example #5
Source File: TezProcessorContextImpl.java    From tez with Apache License 2.0 6 votes vote down vote up
public TezProcessorContextImpl(Configuration conf, String[] workDirs, int appAttemptNumber,
    TezUmbilical tezUmbilical, String dagName, String vertexName,
    int vertexParallelism, TezTaskAttemptID taskAttemptID,
    @Nullable UserPayload userPayload, LogicalIOProcessorRuntimeTask runtimeTask,
    Map<String, ByteBuffer> serviceConsumerMetadata,
    Map<String, String> auxServiceEnv, MemoryDistributor memDist,
    ProcessorDescriptor processorDescriptor, InputReadyTracker inputReadyTracker, ObjectRegistry objectRegistry,
    ExecutionContext ExecutionContext, long memAvailable, TezExecutors sharedExecutor) {
  super(conf, workDirs, appAttemptNumber, dagName, vertexName, vertexParallelism, taskAttemptID,
      runtimeTask.addAndGetTezCounter(vertexName), runtimeTask, tezUmbilical, serviceConsumerMetadata,
      auxServiceEnv, memDist, processorDescriptor, objectRegistry, ExecutionContext, memAvailable,
      sharedExecutor);
  Objects.requireNonNull(inputReadyTracker, "inputReadyTracker is null");
  this.userPayload = userPayload;
  this.sourceInfo = new EventMetaData(EventProducerConsumerType.PROCESSOR,
      taskVertexName, "", taskAttemptID);
  this.inputReadyTracker = inputReadyTracker;
}
 
Example #6
Source File: TestContainerReuse.java    From tez with Apache License 2.0 6 votes vote down vote up
private AMSchedulerEventTALaunchRequest createLaunchRequestEvent(
  TezTaskAttemptID taID, TaskAttempt ta, Resource capability,
  String[] hosts, String[] racks, Priority priority,
  ContainerContext containerContext) {
  TaskLocationHint locationHint = null;
  if (hosts != null || racks != null) {
    Set<String> hostsSet = Sets.newHashSet(hosts);
    Set<String> racksSet = Sets.newHashSet(racks);
    locationHint = TaskLocationHint.createTaskLocationHint(hostsSet, racksSet);
  }
  AMSchedulerEventTALaunchRequest lr = new AMSchedulerEventTALaunchRequest(
    taID, capability, new TaskSpec(taID, "dagName", "vertexName", -1,
      ProcessorDescriptor.create("processorClassName"),
    Collections.singletonList(new InputSpec("vertexName",
        InputDescriptor.create("inputClassName"), 1)),
    Collections.singletonList(new OutputSpec("vertexName",
        OutputDescriptor.create("outputClassName"), 1)), null, null), ta, locationHint,
    priority.getPriority(), containerContext, 0, 0, 0);
  return lr;
}
 
Example #7
Source File: TestMockDAGAppMaster.java    From tez with Apache License 2.0 6 votes vote down vote up
@Test (timeout = 10000)
public void testSchedulerErrorHandling() throws Exception {
  TezConfiguration tezconf = new TezConfiguration(defaultConf);

  MockTezClient tezClient = new MockTezClient("testMockAM", tezconf, true, null, null, null, null);
  tezClient.start();

  MockDAGAppMaster mockApp = tezClient.getLocalClient().getMockApp();
  MockContainerLauncher mockLauncher = mockApp.getContainerLauncher();
  mockLauncher.startScheduling(false);

  DAG dag = DAG.create("testSchedulerErrorHandling");
  Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), 5);
  dag.addVertex(vA);

  tezClient.submitDAG(dag);
  mockLauncher.waitTillContainersLaunched();
  mockApp.handle(new DAGAppMasterEventSchedulingServiceError(
      org.apache.hadoop.util.StringUtils.stringifyException(new RuntimeException("Mock error"))));

  while(!mockApp.getShutdownHandler().wasShutdownInvoked()) {
    Thread.sleep(100);
  }
  Assert.assertEquals(DAGState.RUNNING, mockApp.getContext().getCurrentDAG().getState());
}
 
Example #8
Source File: TezProcessorContextImpl.java    From incubator-tez with Apache License 2.0 6 votes vote down vote up
public TezProcessorContextImpl(Configuration conf, String[] workDirs, int appAttemptNumber,
    TezUmbilical tezUmbilical, String dagName, String vertexName,
    TezTaskAttemptID taskAttemptID, TezCounters counters,
    @Nullable byte[] userPayload, RuntimeTask runtimeTask,
    Map<String, ByteBuffer> serviceConsumerMetadata,
    Map<String, String> auxServiceEnv, MemoryDistributor memDist,
    ProcessorDescriptor processorDescriptor, InputReadyTracker inputReadyTracker) {
  super(conf, workDirs, appAttemptNumber, dagName, vertexName, taskAttemptID,
      counters, runtimeTask, tezUmbilical, serviceConsumerMetadata,
      auxServiceEnv, memDist, processorDescriptor);
  checkNotNull(inputReadyTracker, "inputReadyTracker is null");
  this.userPayload = DagTypeConverters.convertToTezUserPayload(userPayload);
  this.sourceInfo = new EventMetaData(EventProducerConsumerType.PROCESSOR,
      taskVertexName, "", taskAttemptID);
  this.inputReadyTracker = inputReadyTracker;
}
 
Example #9
Source File: TestTaskErrorsUsingLocalMode.java    From tez with Apache License 2.0 6 votes vote down vote up
@Test(timeout = 20000)
public void testFatalErrorReported() throws IOException, TezException, InterruptedException {

  TezClient tezClient = getTezClient("testFatalErrorReported");
  DAGClient dagClient = null;

  try {
    FailingProcessor.configureForFatalFail();
    DAG dag = DAG.create("testFatalErrorReportedDag").addVertex(
        Vertex
            .create(VERTEX_NAME, ProcessorDescriptor.create(FailingProcessor.class.getName()), 1));

    dagClient = tezClient.submitDAG(dag);
    dagClient.waitForCompletion();
    assertEquals(DAGStatus.State.FAILED, dagClient.getDAGStatus(null).getState());
    assertEquals(1, dagClient.getVertexStatus(VERTEX_NAME, null).getProgress().getFailedTaskAttemptCount());
  } finally {
    if (dagClient != null) {
      dagClient.close();
    }
    tezClient.stop();
  }
}
 
Example #10
Source File: TopKDataGen.java    From sequenceiq-samples with Apache License 2.0 6 votes vote down vote up
private DAG createDag(TezConfiguration tezConf, Path outPath, long outSize, int extraColumns, int numTasks)
        throws IOException {

    long largeOutSizePerTask = outSize / numTasks;

    DAG dag = DAG.create("TopK DataGen");

    Vertex genDataVertex = Vertex.create("datagen", ProcessorDescriptor.create(
                    GenDataProcessor.class.getName()).setUserPayload(
                    UserPayload.create(ByteBuffer.wrap(GenDataProcessor.createConfiguration(largeOutSizePerTask, extraColumns)))),
            numTasks);
    genDataVertex.addDataSink(OUTPUT,
            MROutput.createConfigBuilder(new Configuration(tezConf),
                    TextOutputFormat.class, outPath.toUri().toString()).build());
    dag.addVertex(genDataVertex);

    return dag;
}
 
Example #11
Source File: TestLogicalIOProcessorRuntimeTask.java    From tez with Apache License 2.0 5 votes vote down vote up
private TaskSpec createTaskSpec(TezTaskAttemptID taskAttemptID,
    String dagName, String vertexName, int parallelism,
    String processorClassname, String outputClassName) {
  ProcessorDescriptor processorDesc = createProcessorDescriptor(processorClassname);
  TaskSpec taskSpec = new TaskSpec(taskAttemptID,
      dagName, vertexName, parallelism, processorDesc,
      createInputSpecList(), createOutputSpecList(outputClassName), null, null);
  return taskSpec;
}
 
Example #12
Source File: MapUtils.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
public static LogicalIOProcessorRuntimeTask createLogicalTask(FileSystem fs, Path workDir,
    JobConf jobConf, int mapId, Path mapInput,
    TezUmbilical umbilical, String dagName,
    String vertexName, List<InputSpec> inputSpecs,
    List<OutputSpec> outputSpecs) throws Exception {
  jobConf.setInputFormat(SequenceFileInputFormat.class);

  ProcessorDescriptor mapProcessorDesc = new ProcessorDescriptor(
      MapProcessor.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(jobConf));
  
  Token<JobTokenIdentifier> shuffleToken = new Token<JobTokenIdentifier>();

  TaskSpec taskSpec = new TaskSpec(
      TezTestUtils.getMockTaskAttemptId(0, 0, mapId, 0),
      dagName, vertexName,
      mapProcessorDesc,
      inputSpecs,
      outputSpecs, null);

  Map<String, ByteBuffer> serviceConsumerMetadata = new HashMap<String, ByteBuffer>();
  serviceConsumerMetadata.put(ShuffleUtils.SHUFFLE_HANDLER_SERVICE_ID,
      ShuffleUtils.convertJobTokenToBytes(shuffleToken));
  
  LogicalIOProcessorRuntimeTask task = new LogicalIOProcessorRuntimeTask(
      taskSpec,
      0,
      jobConf,
      new String[] {workDir.toString()},
      umbilical,
      serviceConsumerMetadata,
      HashMultimap.<String, String>create());
  return task;
}
 
Example #13
Source File: TaskSpec.java    From tez with Apache License 2.0 5 votes vote down vote up
@Override
public void readFields(DataInput in) throws IOException {
  taskAttemptId = TezTaskAttemptID.readTezTaskAttemptID(in);
  dagName = StringInterner.weakIntern(in.readUTF());
  vertexName = StringInterner.weakIntern(in.readUTF());
  vertexParallelism = in.readInt();
  // TODO TEZ-305 convert this to PB
  processorDescriptor = new ProcessorDescriptor();
  processorDescriptor.readFields(in);
  int numInputSpecs = in.readInt();
  inputSpecList = new ArrayList<InputSpec>(numInputSpecs);
  for (int i = 0; i < numInputSpecs; i++) {
    InputSpec inputSpec = new InputSpec();
    inputSpec.readFields(in);
    inputSpecList.add(inputSpec);
  }
  int numOutputSpecs = in.readInt();
  outputSpecList = new ArrayList<OutputSpec>(numOutputSpecs);
  for (int i = 0; i < numOutputSpecs; i++) {
    OutputSpec outputSpec = new OutputSpec();
    outputSpec.readFields(in);
    outputSpecList.add(outputSpec);
  }
  boolean hasGroupInputs = in.readBoolean();
  if (hasGroupInputs) {
    int numGroups = in.readInt();
    groupInputSpecList = Lists.newArrayListWithCapacity(numGroups);
    for (int i=0; i<numGroups; ++i) {
      GroupInputSpec group = new GroupInputSpec();
      group.readFields(in);
      groupInputSpecList.add(group);
    }
  }
  boolean hasVertexConf = in.readBoolean();
  if (hasVertexConf) {
    taskConf = new Configuration(false);
    taskConf.readFields(in);
  }
}
 
Example #14
Source File: TaskAttemptImpl.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
TaskSpec createRemoteTaskSpec() {
  Vertex vertex = getVertex();
  ProcessorDescriptor procDesc = vertex.getProcessorDescriptor();
  int taskId = getTaskID().getId();
  return new TaskSpec(getID(),
      vertex.getDAG().getName(),
      vertex.getName(), procDesc,
      vertex.getInputSpecList(taskId), vertex.getOutputSpecList(taskId), 
      vertex.getGroupInputSpecList(taskId));
}
 
Example #15
Source File: WordCount.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
private DAG createDAG(FileSystem fs, TezConfiguration tezConf,
    Map<String, LocalResource> localResources, Path stagingDir,
    String inputPath, String outputPath) throws IOException {

  Configuration inputConf = new Configuration(tezConf);
  inputConf.set(FileInputFormat.INPUT_DIR, inputPath);
  InputDescriptor id = new InputDescriptor(MRInput.class.getName())
      .setUserPayload(MRInput.createUserPayload(inputConf,
          TextInputFormat.class.getName(), true, true));

  Configuration outputConf = new Configuration(tezConf);
  outputConf.set(FileOutputFormat.OUTDIR, outputPath);
  OutputDescriptor od = new OutputDescriptor(MROutput.class.getName())
    .setUserPayload(MROutput.createUserPayload(
        outputConf, TextOutputFormat.class.getName(), true));

  Vertex tokenizerVertex = new Vertex("tokenizer", new ProcessorDescriptor(
      TokenProcessor.class.getName()), -1, MRHelpers.getMapResource(tezConf));
  tokenizerVertex.addInput("MRInput", id, MRInputAMSplitGenerator.class);

  Vertex summerVertex = new Vertex("summer",
      new ProcessorDescriptor(
          SumProcessor.class.getName()), 1, MRHelpers.getReduceResource(tezConf));
  summerVertex.addOutput("MROutput", od, MROutputCommitter.class);

  OrderedPartitionedKVEdgeConfigurer edgeConf = OrderedPartitionedKVEdgeConfigurer
      .newBuilder(Text.class.getName(), IntWritable.class.getName(),
          HashPartitioner.class.getName(), null).build();

  DAG dag = new DAG("WordCount");
  dag.addVertex(tokenizerVertex)
      .addVertex(summerVertex)
      .addEdge(
          new Edge(tokenizerVertex, summerVertex, edgeConf.createDefaultEdgeProperty()));
  return dag;  
}
 
Example #16
Source File: TaskSpec.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
@Override
public void readFields(DataInput in) throws IOException {
  taskAttemptId = TezTaskAttemptID.readTezTaskAttemptID(in);
  dagName = StringInterner.weakIntern(in.readUTF());
  vertexName = StringInterner.weakIntern(in.readUTF());
  // TODO TEZ-305 convert this to PB
  processorDescriptor = new ProcessorDescriptor();
  processorDescriptor.readFields(in);
  int numInputSpecs = in.readInt();
  inputSpecList = new ArrayList<InputSpec>(numInputSpecs);
  for (int i = 0; i < numInputSpecs; i++) {
    InputSpec inputSpec = new InputSpec();
    inputSpec.readFields(in);
    inputSpecList.add(inputSpec);
  }
  int numOutputSpecs = in.readInt();
  outputSpecList = new ArrayList<OutputSpec>(numOutputSpecs);
  for (int i = 0; i < numOutputSpecs; i++) {
    OutputSpec outputSpec = new OutputSpec();
    outputSpec.readFields(in);
    outputSpecList.add(outputSpec);
  }
  boolean hasGroupInputs = in.readBoolean();
  if (hasGroupInputs) {
    int numGroups = in.readInt();
    groupInputSpecList = Lists.newArrayListWithCapacity(numGroups);
    for (int i=0; i<numGroups; ++i) {
      GroupInputSpec group = new GroupInputSpec();
      group.readFields(in);
      groupInputSpecList.add(group);
    }
  }
}
 
Example #17
Source File: TestMemoryWithEvents.java    From tez with Apache License 2.0 5 votes vote down vote up
@Ignore
@Test (timeout = 600000)
public void testMemoryRootInputEvents() throws Exception {
  DAG dag = DAG.create("testMemoryRootInputEvents");
  Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), numTasks);
  Vertex vB = Vertex.create("B", ProcessorDescriptor.create("Proc.class"), numTasks);
  vA.addDataSource(
      "Input",
      DataSourceDescriptor.create(InputDescriptor.create("In"),
          InputInitializerDescriptor.create(SimulationInitializer.class.getName()), null));
  dag.addVertex(vA).addVertex(vB);
  testMemory(dag, false);
}
 
Example #18
Source File: LogicalIOProcessorRuntimeTask.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
private LogicalIOProcessor createProcessor(
    ProcessorDescriptor processorDescriptor) {
  Processor processor = ReflectionUtils.createClazzInstance(processorDescriptor
      .getClassName());
  if (!(processor instanceof LogicalIOProcessor)) {
    throw new TezUncheckedException(processor.getClass().getName()
        + " is not a sub-type of LogicalIOProcessor."
        + " Only LogicalIOProcessor sub-types supported by LogicalIOProcessorRuntimeTask.");
  }
  return (LogicalIOProcessor) processor;
}
 
Example #19
Source File: TestMemoryWithEvents.java    From tez with Apache License 2.0 5 votes vote down vote up
@Ignore
@Test (timeout = 600000)
public void testMemoryBroadcast() throws Exception {
  DAG dag = DAG.create("testMemoryBroadcast");
  Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), numTasks);
  Vertex vB = Vertex.create("B", ProcessorDescriptor.create("Proc.class"), numTasks);
  dag.addVertex(vA)
      .addVertex(vB)
      .addEdge(
          Edge.create(vA, vB, EdgeProperty.create(DataMovementType.BROADCAST,
              DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
              OutputDescriptor.create("Out"), InputDescriptor.create("In"))));
  testMemory(dag, true);
}
 
Example #20
Source File: TestExternalTezServices.java    From tez with Apache License 2.0 5 votes vote down vote up
private void runExceptionSimulation() throws IOException, TezException, InterruptedException {
  DAG dag = DAG.create(ContainerRunnerImpl.DAG_NAME_INSTRUMENTED_FAILURES);
  Vertex v =Vertex.create("Vertex1", ProcessorDescriptor.create(SleepProcessor.class.getName()),
      3);
  v.setExecutionContext(EXECUTION_CONTEXT_EXT_SERVICE_PUSH);
  dag.addVertex(v);

  DAGClient dagClient = extServiceTestHelper.getSharedTezClient().submitDAG(dag);
  DAGStatus dagStatus = dagClient.waitForCompletion();
  assertEquals(DAGStatus.State.SUCCEEDED, dagStatus.getState());
  assertEquals(1, dagStatus.getDAGProgress().getFailedTaskAttemptCount());
  assertEquals(1, dagStatus.getDAGProgress().getRejectedTaskAttemptCount());

}
 
Example #21
Source File: TestTezClient.java    From tez with Apache License 2.0 5 votes vote down vote up
public void testMultipleSubmissionsJob(boolean isSession) throws Exception {
  TezClientForTest client1 = configureAndCreateTezClient(new HashMap<String, LocalResource>(),
      isSession, null);
  when(client1.mockYarnClient.getApplicationReport(client1.mockAppId).getYarnApplicationState())
  .thenReturn(YarnApplicationState.RUNNING);
  client1.start();
  
  String mockLR1Name = "LR1";
  Map<String, LocalResource> lrDAG = Collections.singletonMap(mockLR1Name, LocalResource
      .newInstance(URL.newInstance("file", "localhost", 0, "/test"), LocalResourceType.FILE,
          LocalResourceVisibility.PUBLIC, 1, 1));
  String mockLR2Name = "LR2";
  Map<String, LocalResource> lrVertex = Collections.singletonMap(mockLR2Name, LocalResource
      .newInstance(URL.newInstance("file", "localhost", 0, "/test1"), LocalResourceType.FILE,
          LocalResourceVisibility.PUBLIC, 1, 1));
  Vertex vertex = Vertex.create("Vertex", ProcessorDescriptor.create("P"), 1,
      Resource.newInstance(1, 1)).addTaskLocalFiles(lrVertex);
  DAG dag = DAG.create("DAG").addVertex(vertex).addTaskLocalFiles(lrDAG);

  // the dag resource will be added to the vertex once
  client1.submitDAG(dag);
  
  TezClientForTest client2 = configureAndCreateTezClient();
  when(client2.mockYarnClient.getApplicationReport(client2.mockAppId).getYarnApplicationState())
  .thenReturn(YarnApplicationState.RUNNING);
  client2.start();
  
  // verify resubmission of same dag to new client (simulates submission error resulting in the
  // creation of a new client and resubmission of the DAG)
  client2.submitDAG(dag);
  
  client1.stop();
  client2.stop();
}
 
Example #22
Source File: JoinDataGen.java    From tez with Apache License 2.0 5 votes vote down vote up
private DAG createDag(TezConfiguration tezConf, Path largeOutPath, Path smallOutPath,
    Path expectedOutputPath, int numTasks, long largeOutSize, long smallOutSize)
    throws IOException {

  long largeOutSizePerTask = largeOutSize / numTasks;
  long smallOutSizePerTask = smallOutSize / numTasks;

  DAG dag = DAG.create("JoinDataGen");

  Vertex genDataVertex = Vertex.create("datagen", ProcessorDescriptor.create(
      GenDataProcessor.class.getName()).setUserPayload(
      UserPayload.create(ByteBuffer.wrap(GenDataProcessor.createConfiguration(largeOutSizePerTask,
          smallOutSizePerTask)))), numTasks);
  genDataVertex.addDataSink(STREAM_OUTPUT_NAME, 
      MROutput.createConfigBuilder(new Configuration(tezConf),
          TextOutputFormat.class, largeOutPath.toUri().toString()).build());
  genDataVertex.addDataSink(HASH_OUTPUT_NAME, 
      MROutput.createConfigBuilder(new Configuration(tezConf),
          TextOutputFormat.class, smallOutPath.toUri().toString()).build());
  genDataVertex.addDataSink(EXPECTED_OUTPUT_NAME, 
      MROutput.createConfigBuilder(new Configuration(tezConf),
          TextOutputFormat.class, expectedOutputPath.toUri().toString()).build());

  dag.addVertex(genDataVertex);

  return dag;
}
 
Example #23
Source File: CartesianProduct.java    From tez with Apache License 2.0 5 votes vote down vote up
private DAG createDAG(TezConfiguration tezConf) throws IOException {
  InputDescriptor inputDescriptor = InputDescriptor.create(FakeInput.class.getName());
  InputInitializerDescriptor inputInitializerDescriptor =
    InputInitializerDescriptor.create(FakeInputInitializer.class.getName());
  DataSourceDescriptor dataSourceDescriptor =
    DataSourceDescriptor.create(inputDescriptor, inputInitializerDescriptor, null);

  Vertex v1 = Vertex.create(VERTEX1, ProcessorDescriptor.create(TokenProcessor.class.getName()));
  v1.addDataSource(INPUT, dataSourceDescriptor);
  Vertex v2 = Vertex.create(VERTEX2, ProcessorDescriptor.create(TokenProcessor.class.getName()));
  v2.addDataSource(INPUT, dataSourceDescriptor);

  OutputDescriptor outputDescriptor = OutputDescriptor.create(FakeOutput.class.getName());
  OutputCommitterDescriptor outputCommitterDescriptor =
    OutputCommitterDescriptor.create(FakeOutputCommitter.class.getName());
  DataSinkDescriptor dataSinkDescriptor =
    DataSinkDescriptor.create(outputDescriptor, outputCommitterDescriptor, null);

  CartesianProductConfig cartesianProductConfig =
    new CartesianProductConfig(Arrays.asList(sourceVertices));
  UserPayload userPayload = cartesianProductConfig.toUserPayload(tezConf);

  Vertex v3 = Vertex.create(VERTEX3, ProcessorDescriptor.create(JoinProcessor.class.getName()));
  v3.addDataSink(OUTPUT, dataSinkDescriptor);
  v3.setVertexManagerPlugin(
    VertexManagerPluginDescriptor.create(CartesianProductVertexManager.class.getName())
                                 .setUserPayload(userPayload));

  EdgeManagerPluginDescriptor edgeManagerDescriptor =
    EdgeManagerPluginDescriptor.create(CartesianProductEdgeManager.class.getName());
  edgeManagerDescriptor.setUserPayload(userPayload);
  UnorderedPartitionedKVEdgeConfig edgeConf =
    UnorderedPartitionedKVEdgeConfig.newBuilder(Text.class.getName(), IntWritable.class.getName(),
      RoundRobinPartitioner.class.getName()).build();
  EdgeProperty edgeProperty = edgeConf.createDefaultCustomEdgeProperty(edgeManagerDescriptor);

  return DAG.create("CrossProduct").addVertex(v1).addVertex(v2).addVertex(v3)
    .addEdge(Edge.create(v1, v3, edgeProperty)).addEdge(Edge.create(v2, v3, edgeProperty));
}
 
Example #24
Source File: RPCLoadGen.java    From tez with Apache License 2.0 5 votes vote down vote up
private DAG createDAG(TezConfiguration conf, int numTasks, int maxSleepTimeMillis,
                      int payloadSize, String mode) throws IOException {

  Map<String, LocalResource> localResourceMap = new HashMap<String, LocalResource>();
  UserPayload payload =
      createUserPayload(conf, maxSleepTimeMillis, payloadSize, mode, localResourceMap);

  Vertex vertex = Vertex.create("RPCLoadVertex",
      ProcessorDescriptor.create(RPCSleepProcessor.class.getName()).setUserPayload(
          payload), numTasks).addTaskLocalFiles(localResourceMap);

  return DAG.create("RPCLoadGen").addVertex(vertex);
}
 
Example #25
Source File: TestMRRJobsDAGApi.java    From tez with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 60000)
public void testSleepJob() throws TezException, IOException, InterruptedException {
  SleepProcessorConfig spConf = new SleepProcessorConfig(1);

  DAG dag = DAG.create("TezSleepProcessor");
  Vertex vertex = Vertex.create("SleepVertex", ProcessorDescriptor.create(
          SleepProcessor.class.getName()).setUserPayload(spConf.toUserPayload()), 1,
      Resource.newInstance(1024, 1));
  dag.addVertex(vertex);

  TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
  Path remoteStagingDir = remoteFs.makeQualified(new Path("/tmp", String.valueOf(random
      .nextInt(100000))));
  remoteFs.mkdirs(remoteStagingDir);
  tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, remoteStagingDir.toString());

  TezClient tezSession = TezClient.create("TezSleepProcessor", tezConf, false);
  tezSession.start();

  DAGClient dagClient = tezSession.submitDAG(dag);

  DAGStatus dagStatus = dagClient.getDAGStatus(null);
  while (!dagStatus.isCompleted()) {
    LOG.info("Waiting for job to complete. Sleeping for 500ms." + " Current state: "
        + dagStatus.getState());
    Thread.sleep(500l);
    dagStatus = dagClient.getDAGStatus(null);
  }
  dagStatus = dagClient.getDAGStatus(Sets.newHashSet(StatusGetOpts.GET_COUNTERS));

  assertEquals(DAGStatus.State.SUCCEEDED, dagStatus.getState());
  assertNotNull(dagStatus.getDAGCounters());
  assertNotNull(dagStatus.getDAGCounters().getGroup(FileSystemCounter.class.getName()));
  assertNotNull(dagStatus.getDAGCounters().findCounter(TaskCounter.GC_TIME_MILLIS));
  ExampleDriver.printDAGStatus(dagClient, new String[] { "SleepVertex" }, true, true);
  tezSession.stop();
}
 
Example #26
Source File: TestATSHistoryWithMiniCluster.java    From tez with Apache License 2.0 5 votes vote down vote up
@Test (timeout=50000)
public void testDisabledACls() throws Exception {
  TezClient tezSession = null;
  try {
    SleepProcessorConfig spConf = new SleepProcessorConfig(1);

    DAG dag = DAG.create("TezSleepProcessor");
    Vertex vertex = Vertex.create("SleepVertex", ProcessorDescriptor.create(
            SleepProcessor.class.getName()).setUserPayload(spConf.toUserPayload()), 1,
        Resource.newInstance(256, 1));
    dag.addVertex(vertex);

    TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
    tezConf.setBoolean(TezConfiguration.TEZ_AM_ALLOW_DISABLED_TIMELINE_DOMAINS, true);
    tezConf.set(TezConfiguration.TEZ_HISTORY_LOGGING_SERVICE_CLASS,
        ATSHistoryLoggingService.class.getName());
    Path remoteStagingDir = remoteFs.makeQualified(new Path("/tmp", String.valueOf(random
        .nextInt(100000))));
    remoteFs.mkdirs(remoteStagingDir);
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, remoteStagingDir.toString());

    tezSession = TezClient.create("TezSleepProcessor", tezConf, true);
    tezSession.start();

    DAGClient dagClient = tezSession.submitDAG(dag);

    DAGStatus dagStatus = dagClient.getDAGStatus(null);
    while (!dagStatus.isCompleted()) {
      LOG.info("Waiting for job to complete. Sleeping for 500ms." + " Current state: "
          + dagStatus.getState());
      Thread.sleep(500l);
      dagStatus = dagClient.getDAGStatus(null);
    }
    Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagStatus.getState());
  } finally {
    if (tezSession != null) {
      tezSession.stop();
    }
  }
}
 
Example #27
Source File: TestMockDAGAppMaster.java    From tez with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 5000)
public void testDAGFinishedRecoveryError() throws Exception {
  TezConfiguration tezconf = new TezConfiguration(defaultConf);

  MockTezClient tezClient = new MockTezClient("testMockAM", tezconf, true, null, null, null, null);
  tezClient.start();

  MockDAGAppMaster mockApp = tezClient.getLocalClient().getMockApp();
  mockApp.recoveryFatalError = true;
  MockContainerLauncher mockLauncher = mockApp.getContainerLauncher();
  mockLauncher.startScheduling(true);

  DAG dag = DAG.create("test");
  Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), 5);
  dag.addVertex(vA);

  DAGClient dagClient = tezClient.submitDAG(dag);
  dagClient.waitForCompletion();
  while(!mockApp.getShutdownHandler().wasShutdownInvoked()) {
    Thread.sleep(100);
  }
  Assert.assertEquals(DAGState.SUCCEEDED, mockApp.getContext().getCurrentDAG().getState());
  Assert.assertEquals(DAGAppMasterState.FAILED, mockApp.getState());
  Assert.assertTrue(StringUtils.join(mockApp.getDiagnostics(),",")
      .contains("Recovery had a fatal error, shutting down session after" +
            " DAG completion"));
}
 
Example #28
Source File: TaskSpec.java    From tez with Apache License 2.0 5 votes vote down vote up
public TaskSpec(
    String dagName, String vertexName,
    int vertexParallelism,
    ProcessorDescriptor processorDescriptor,
    List<InputSpec> inputSpecList, List<OutputSpec> outputSpecList,
    @Nullable List<GroupInputSpec> groupInputSpecList) {
  this(dagName, vertexName, vertexParallelism, processorDescriptor, inputSpecList,
      outputSpecList, groupInputSpecList, null);
}
 
Example #29
Source File: TestTezJobs.java    From tez with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 60000)
public void testVertexFailuresMaxPercent() throws TezException, InterruptedException, IOException {

  TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
  tezConf.set(TezConfiguration.TEZ_VERTEX_FAILURES_MAXPERCENT, "50.0f");
  tezConf.setInt(TezConfiguration.TEZ_AM_TASK_MAX_FAILED_ATTEMPTS, 1);
  TezClient tezClient = TezClient.create("TestVertexFailuresMaxPercent", tezConf);
  tezClient.start();

  try {
    DAG dag = DAG.create("TestVertexFailuresMaxPercent");
    Vertex vertex1 = Vertex.create("Parent", ProcessorDescriptor.create(
        FailingAttemptProcessor.class.getName()), 2);
    Vertex vertex2 = Vertex.create("Child", ProcessorDescriptor.create(FailingAttemptProcessor.class.getName()), 2);

    OrderedPartitionedKVEdgeConfig edgeConfig = OrderedPartitionedKVEdgeConfig
        .newBuilder(Text.class.getName(), IntWritable.class.getName(),
            HashPartitioner.class.getName())
        .setFromConfiguration(tezConf)
        .build();
    dag.addVertex(vertex1)
        .addVertex(vertex2)
        .addEdge(Edge.create(vertex1, vertex2, edgeConfig.createDefaultEdgeProperty()));

    DAGClient dagClient = tezClient.submitDAG(dag);
    dagClient.waitForCompletion();
    Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState());
  } finally {
    tezClient.stop();
  }
}
 
Example #30
Source File: TestTezClientUtils.java    From tez with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 5000)
public void testAMLoggingOptsSimple() throws IOException, YarnException {

  TezConfiguration tezConf = new TezConfiguration();
  tezConf.set(TezConfiguration.TEZ_AM_LOG_LEVEL, "WARN");
  tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, STAGING_DIR.getAbsolutePath());

  ApplicationId appId = ApplicationId.newInstance(1000, 1);
  Credentials credentials = new Credentials();
  JobTokenSecretManager jobTokenSecretManager = new JobTokenSecretManager();
  TezClientUtils.createSessionToken(appId.toString(), jobTokenSecretManager, credentials);
  DAG dag = DAG.create("testdag");
  dag.addVertex(Vertex.create("testVertex", ProcessorDescriptor.create("processorClassname"), 1)
      .setTaskLaunchCmdOpts("initialLaunchOpts"));
  AMConfiguration amConf =
      new AMConfiguration(tezConf, new HashMap<String, LocalResource>(), credentials);
  ApplicationSubmissionContext appSubmissionContext =
      TezClientUtils.createApplicationSubmissionContext(appId, dag, "amName", amConf,
          new HashMap<String, LocalResource>(), credentials, false, new TezApiVersionInfo(),
          null, null);

  List<String> expectedCommands = new LinkedList<String>();
  expectedCommands.add("-Dlog4j.configuratorClass=org.apache.tez.common.TezLog4jConfigurator");
  expectedCommands.add("-Dlog4j.configuration=" + TezConstants.TEZ_CONTAINER_LOG4J_PROPERTIES_FILE);
  expectedCommands.add("-D" + YarnConfiguration.YARN_APP_CONTAINER_LOG_DIR + "=" +
      ApplicationConstants.LOG_DIR_EXPANSION_VAR);
  expectedCommands.add("-D" + TezConstants.TEZ_ROOT_LOGGER_NAME + "=" + "WARN" + "," +
      TezConstants.TEZ_CONTAINER_LOGGER_NAME);

  List<String> commands = appSubmissionContext.getAMContainerSpec().getCommands();
  assertEquals(1, commands.size());
  for (String expectedCmd : expectedCommands) {
    assertTrue(commands.get(0).contains(expectedCmd));
  }

  Map<String, String> environment = appSubmissionContext.getAMContainerSpec().getEnvironment();
  String logEnv = environment.get(TezConstants.TEZ_CONTAINER_LOG_PARAMS);
  assertNull(logEnv);
}