Java Code Examples for org.apache.tez.dag.api.EdgeProperty.DataMovementType#SCATTER_GATHER

The following examples show how to use org.apache.tez.dag.api.EdgeProperty.DataMovementType#SCATTER_GATHER . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestDAGVerify.java    From incubator-tez with Apache License 2.0 6 votes vote down vote up
@Test(expected = IllegalStateException.class, timeout = 5000)
public void testOutputAndOutputVertexNameCollision() {
  Vertex v1 = new Vertex("v1",
      new ProcessorDescriptor("MapProcessor"),
      dummyTaskCount, dummyTaskResource);
  Vertex v2 = new Vertex("v2",
      new ProcessorDescriptor("MapProcessor"),
      dummyTaskCount, dummyTaskResource);
  
  v1.addOutput("v2", new OutputDescriptor());
  
  Edge e1 = new Edge(v1, v2,
      new EdgeProperty(DataMovementType.SCATTER_GATHER, 
          DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, 
          new OutputDescriptor("dummy output class"),
          new InputDescriptor("dummy input class")));
  
  DAG dag = new DAG("testDag");
  dag.addVertex(v1);
  dag.addVertex(v2);
  dag.addEdge(e1);
  dag.verify();
}
 
Example 2
Source File: TestDAGVerify.java    From incubator-tez with Apache License 2.0 6 votes vote down vote up
@Test(expected = IllegalStateException.class, timeout = 5000)
public void testInputAndInputVertexNameCollision() {
  Vertex v1 = new Vertex("v1",
      new ProcessorDescriptor("MapProcessor"),
      dummyTaskCount, dummyTaskResource);
  Vertex v2 = new Vertex("v2",
      new ProcessorDescriptor("MapProcessor"),
      dummyTaskCount, dummyTaskResource);
  
  v2.addInput("v1", new InputDescriptor(), null);
  
  Edge e1 = new Edge(v1, v2,
      new EdgeProperty(DataMovementType.SCATTER_GATHER, 
          DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, 
          new OutputDescriptor("dummy output class"),
          new InputDescriptor("dummy input class")));
  
  DAG dag = new DAG("testDag");
  dag.addVertex(v1);
  dag.addVertex(v2);
  dag.addEdge(e1);
  dag.verify();
}
 
Example 3
Source File: ShuffleVertexManagerBase.java    From tez with Apache License 2.0 6 votes vote down vote up
private void handleVertexStateUpdate(VertexStateUpdate stateUpdate) {
  Preconditions.checkArgument(stateUpdate.getVertexState() == VertexState.CONFIGURED,
      "Received incorrect state notification : " + stateUpdate.getVertexState() + " for vertex: "
          + stateUpdate.getVertexName() + " in vertex: " + getContext().getVertexName());
  Preconditions.checkArgument(srcVertexInfo.containsKey(stateUpdate.getVertexName()),
      "Received incorrect vertex notification : " + stateUpdate.getVertexState() + " for vertex: "
          + stateUpdate.getVertexName() + " in vertex: " + getContext().getVertexName());
  SourceVertexInfo vInfo = srcVertexInfo.get(stateUpdate.getVertexName());
  Preconditions.checkState(vInfo.vertexIsConfigured == false);
  vInfo.vertexIsConfigured = true;
  vInfo.numTasks = getContext().getVertexNumTasks(stateUpdate.getVertexName());
  if (vInfo.edgeProperty.getDataMovementType() == DataMovementType.SCATTER_GATHER) {
    totalNumBipartiteSourceTasks += vInfo.numTasks;
  }
  LOG.info("Received configured notification : {}" + " for vertex: {} in" +
      " vertex: {}" + " numBipartiteSourceTasks: {}",
      stateUpdate.getVertexState(), stateUpdate.getVertexName(),
      getContext().getVertexName(), totalNumBipartiteSourceTasks);
  processPendingTasks(null);
}
 
Example 4
Source File: ShuffleVertexManagerBase.java    From tez with Apache License 2.0 6 votes vote down vote up
@Override
public synchronized void onSourceTaskCompleted(TaskAttemptIdentifier attempt) {
  String srcVertexName = attempt.getTaskIdentifier().getVertexIdentifier().getName();
  int srcTaskId = attempt.getTaskIdentifier().getIdentifier();
  SourceVertexInfo srcInfo = srcVertexInfo.get(srcVertexName);
  if (srcInfo.vertexIsConfigured) {
    Preconditions.checkState(srcTaskId < srcInfo.numTasks,
        "Received completion for srcTaskId " + srcTaskId + " but Vertex: " + srcVertexName +
        " has only " + srcInfo.numTasks + " tasks");
  }
  //handle duplicate events and count task completions from all source vertices
  BitSet completedSourceTasks = srcInfo.finishedTaskSet;
  // duplicate notifications tracking
  if (!completedSourceTasks.get(srcTaskId)) {
    completedSourceTasks.set(srcTaskId);
    // source task has completed
    if (srcInfo.edgeProperty.getDataMovementType() == DataMovementType.SCATTER_GATHER) {
      numBipartiteSourceTasksCompleted++;
    }
  }
  processPendingTasks(attempt);
}
 
Example 5
Source File: TestDAGVerify.java    From incubator-tez with Apache License 2.0 6 votes vote down vote up
@Test(expected = IllegalStateException.class, timeout = 5000)  
public void testVerify3() {
  Vertex v1 = new Vertex("v1",
      new ProcessorDescriptor(dummyProcessorClassName),
      dummyTaskCount, dummyTaskResource);
  Vertex v2 = new Vertex("v2",
      new ProcessorDescriptor("MapProcessor"),
      dummyTaskCount, dummyTaskResource);
  Edge e1 = new Edge(v1, v2,
      new EdgeProperty(DataMovementType.SCATTER_GATHER, 
          DataSourceType.EPHEMERAL, SchedulingType.SEQUENTIAL, 
          new OutputDescriptor(dummyOutputClassName),
          new InputDescriptor(dummyInputClassName)));
  DAG dag = new DAG("testDag");
  dag.addVertex(v1);
  dag.addVertex(v2);
  dag.addEdge(e1);
  dag.verify();
}
 
Example 6
Source File: TezEdgeDescriptor.java    From spork with Apache License 2.0 5 votes vote down vote up
public TezEdgeDescriptor() {
    combinePlan = new PhysicalPlan();

    // The default is shuffle edge.
    inputClassName = OrderedGroupedKVInput.class.getName();
    outputClassName = OrderedPartitionedKVOutput.class.getName();
    partitionerClass = null;
    schedulingType = SchedulingType.SEQUENTIAL;
    dataSourceType = DataSourceType.PERSISTED;
    dataMovementType = DataMovementType.SCATTER_GATHER;
}
 
Example 7
Source File: TezCompilerUtil.java    From spork with Apache License 2.0 5 votes vote down vote up
static public void configureValueOnlyTupleOutput(TezEdgeDescriptor edge, DataMovementType dataMovementType) {
    edge.dataMovementType = dataMovementType;
    if (dataMovementType == DataMovementType.BROADCAST || dataMovementType == DataMovementType.ONE_TO_ONE) {
        edge.outputClassName = UnorderedKVOutput.class.getName();
        edge.inputClassName = UnorderedKVInput.class.getName();
    } else if (dataMovementType == DataMovementType.SCATTER_GATHER) {
        edge.outputClassName = UnorderedPartitionedKVOutput.class.getName();
        edge.inputClassName = UnorderedKVInput.class.getName();
        edge.partitionerClass = RoundRobinPartitioner.class;
    }
    edge.setIntermediateOutputKeyClass(POValueOutputTez.EmptyWritable.class.getName());
    edge.setIntermediateOutputValueClass(TUPLE_CLASS);
}
 
Example 8
Source File: TestDAGVerify.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 5000)
public void BinaryOutput() {
  IllegalStateException ex = null;
  try {
    Vertex v1 = new Vertex("v1",
        new ProcessorDescriptor("MapProcessor"),
        dummyTaskCount, dummyTaskResource);
    Vertex v2 = new Vertex("v2",
        new ProcessorDescriptor("MapProcessor"),
        dummyTaskCount, dummyTaskResource);
    Vertex v3 = new Vertex("v3",
        new ProcessorDescriptor("MapProcessor"),
        dummyTaskCount, dummyTaskResource);
    Edge e1 = new Edge(v1, v2,
        new EdgeProperty(DataMovementType.SCATTER_GATHER, 
            DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, 
            new OutputDescriptor("dummy output class"),
            new InputDescriptor("dummy input class")));
    Edge e2 = new Edge(v1, v2,
        new EdgeProperty(DataMovementType.SCATTER_GATHER, 
            DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, 
            new OutputDescriptor("dummy output class"),
            new InputDescriptor("dummy input class")));
    DAG dag = new DAG("testDag");
    dag.addVertex(v1);
    dag.addVertex(v2);
    dag.addVertex(v3);
    dag.addEdge(e1);
    dag.addEdge(e2);
    dag.verify();
  }
  catch (IllegalStateException e){
    ex = e;
  }
  Assert.assertNull(ex);
}
 
Example 9
Source File: TestDAGVerify.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 5000)
public void BinaryInputAllowed() {
  Vertex v1 = new Vertex("v1",
      new ProcessorDescriptor("MapProcessor"),
      dummyTaskCount, dummyTaskResource);
  Vertex v2 = new Vertex("v2",
      new ProcessorDescriptor("MapProcessor"),
      dummyTaskCount, dummyTaskResource);
  Vertex v3 = new Vertex("v3",
      new ProcessorDescriptor("ReduceProcessor"),
      dummyTaskCount, dummyTaskResource);
  Edge e1 = new Edge(v1, v3,
      new EdgeProperty(DataMovementType.SCATTER_GATHER, 
          DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
          new OutputDescriptor("dummy output class"),
          new InputDescriptor("dummy input class")));
  Edge e2 = new Edge(v2, v3,
      new EdgeProperty(DataMovementType.SCATTER_GATHER, 
          DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, 
          new OutputDescriptor("dummy output class"),
          new InputDescriptor("dummy input class")));
  DAG dag = new DAG("testDag");
  dag.addVertex(v1);
  dag.addVertex(v2);
  dag.addVertex(v3);
  dag.addEdge(e1);
  dag.addEdge(e2);
  dag.verify();
}
 
Example 10
Source File: DagTypeConverters.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
public static DataMovementType convertFromDAGPlan(PlanEdgeDataMovementType type){
  switch(type){
    case ONE_TO_ONE : return DataMovementType.ONE_TO_ONE;
    case BROADCAST : return DataMovementType.BROADCAST;
    case SCATTER_GATHER : return DataMovementType.SCATTER_GATHER;
    default : throw new IllegalArgumentException("unknown 'dataMovementType': " + type);
  }
}
 
Example 11
Source File: DagTypeConverters.java    From tez with Apache License 2.0 5 votes vote down vote up
public static DataMovementType convertFromDAGPlan(PlanEdgeDataMovementType type){
  switch(type){
    case ONE_TO_ONE : return DataMovementType.ONE_TO_ONE;
    case BROADCAST : return DataMovementType.BROADCAST;
    case SCATTER_GATHER : return DataMovementType.SCATTER_GATHER;
    case CUSTOM : return DataMovementType.CUSTOM;
    default : throw new IllegalArgumentException("unknown 'dataMovementType': " + type);
  }
}
 
Example 12
Source File: TestDAGVerify.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
@Test(timeout = 5000)
public void testVertexGroup() {
  Vertex v1 = new Vertex("v1",
      new ProcessorDescriptor("Processor"),
      dummyTaskCount, dummyTaskResource);
  Vertex v2 = new Vertex("v2",
      new ProcessorDescriptor("Processor"),
      dummyTaskCount, dummyTaskResource);
  Vertex v3 = new Vertex("v3",
      new ProcessorDescriptor("Processor"),
      dummyTaskCount, dummyTaskResource);
  Vertex v4 = new Vertex("v4",
      new ProcessorDescriptor("Processor"),
      dummyTaskCount, dummyTaskResource);
  Vertex v5 = new Vertex("v5",
      new ProcessorDescriptor("Processor"),
      dummyTaskCount, dummyTaskResource);
  
  DAG dag = new DAG("testDag");
  String groupName1 = "uv12";
  VertexGroup uv12 = dag.createVertexGroup(groupName1, v1, v2);
  OutputDescriptor outDesc = new OutputDescriptor();
  uv12.addOutput("uvOut", outDesc, null);
  
  String groupName2 = "uv23";
  VertexGroup uv23 = dag.createVertexGroup(groupName2, v2, v3);
  
  GroupInputEdge e1 = new GroupInputEdge(uv12, v4,
      new EdgeProperty(DataMovementType.SCATTER_GATHER, 
          DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
          new OutputDescriptor("dummy output class"),
          new InputDescriptor("dummy input class")),
          new InputDescriptor("dummy input class"));
  GroupInputEdge e2 = new GroupInputEdge(uv23, v5,
      new EdgeProperty(DataMovementType.SCATTER_GATHER, 
          DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
          new OutputDescriptor("dummy output class"),
          new InputDescriptor("dummy input class")),
          new InputDescriptor("dummy input class"));
  
  dag.addVertex(v1);
  dag.addVertex(v2);
  dag.addVertex(v3);
  dag.addVertex(v4);
  dag.addVertex(v5);
  dag.addEdge(e1);
  dag.addEdge(e2);
  dag.verify();

  // for the first Group v1 and v2 should get connected to v4 and also have 1 output
  // for the second Group v2 and v3 should get connected to v5
  // the Group place holders should disappear
  Assert.assertNull(dag.getVertex(uv12.getGroupName()));
  Assert.assertNull(dag.getVertex(uv23.getGroupName()));
  Assert.assertFalse(dag.edges.contains(e1));
  Assert.assertFalse(dag.edges.contains(e2));
  Assert.assertEquals(1, v1.getOutputs().size());
  Assert.assertEquals(1, v2.getOutputs().size());
  Assert.assertEquals(outDesc, v1.getOutputs().get(0).getDescriptor());
  Assert.assertEquals(outDesc, v2.getOutputs().get(0).getDescriptor());
  Assert.assertEquals(1, v1.getOutputVertices().size());
  Assert.assertEquals(1, v3.getOutputVertices().size());
  Assert.assertEquals(2, v2.getOutputVertices().size());
  Assert.assertTrue(v1.getOutputVertices().contains(v4));
  Assert.assertTrue(v3.getOutputVertices().contains(v5));
  Assert.assertTrue(v2.getOutputVertices().contains(v4));
  Assert.assertTrue(v2.getOutputVertices().contains(v5));
  Assert.assertEquals(2, v4.getInputVertices().size());
  Assert.assertTrue(v4.getInputVertices().contains(v1));
  Assert.assertTrue(v4.getInputVertices().contains(v2));
  Assert.assertEquals(2, v5.getInputVertices().size());
  Assert.assertTrue(v5.getInputVertices().contains(v2));
  Assert.assertTrue(v5.getInputVertices().contains(v3));
  Assert.assertEquals(1, v4.getGroupInputs().size());
  Assert.assertTrue(v4.getGroupInputs().containsKey(groupName1));
  Assert.assertEquals(1, v5.getGroupInputs().size());
  Assert.assertTrue(v5.getGroupInputs().containsKey(groupName2));
  Assert.assertEquals(2, dag.vertexGroups.size());
}
 
Example 13
Source File: TestDAGVerify.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
@Test(timeout = 5000)
public void testCycle1() {
  IllegalStateException ex=null;
  Vertex v1 = new Vertex("v1",
      new ProcessorDescriptor("MapProcessor"),
      dummyTaskCount, dummyTaskResource);
  Vertex v2 = new Vertex("v2",
      new ProcessorDescriptor("MapProcessor"),
      dummyTaskCount, dummyTaskResource);
  Vertex v3 = new Vertex("v3",
      new ProcessorDescriptor("MapProcessor"),
      dummyTaskCount, dummyTaskResource);
  Vertex v4 = new Vertex("v4",
      new ProcessorDescriptor("MapProcessor"),
      dummyTaskCount, dummyTaskResource);
  Edge e1 = new Edge(v1, v2,
      new EdgeProperty(DataMovementType.SCATTER_GATHER, 
          DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, 
          new OutputDescriptor("dummy output class"),
          new InputDescriptor("dummy input class")));
  Edge e2 = new Edge(v2, v3,
      new EdgeProperty(DataMovementType.SCATTER_GATHER, 
          DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, 
          new OutputDescriptor("dummy output class"),
          new InputDescriptor("dummy input class")));
  Edge e3 = new Edge(v2, v4,
      new EdgeProperty(DataMovementType.SCATTER_GATHER, 
          DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, 
          new OutputDescriptor("dummy output class"),
          new InputDescriptor("dummy input class")));
  Edge e4 = new Edge(v4, v1,
      new EdgeProperty(DataMovementType.SCATTER_GATHER, 
          DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, 
          new OutputDescriptor("dummy output class"),
          new InputDescriptor("dummy input class")));
  DAG dag = new DAG("testDag");
  dag.addVertex(v1);
  dag.addVertex(v2);
  dag.addVertex(v3);
  dag.addVertex(v4);
  dag.addEdge(e1);
  dag.addEdge(e2);
  dag.addEdge(e3);
  dag.addEdge(e4);
  try{
    dag.verify();
  }
  catch (IllegalStateException e){
    ex = e;
  }
  Assert.assertNotNull(ex);
  System.out.println(ex.getMessage());
  Assert.assertTrue(ex.getMessage().startsWith("DAG contains a cycle"));
}
 
Example 14
Source File: ShuffleVertexManagerBase.java    From tez with Apache License 2.0 4 votes vote down vote up
@Override
public synchronized void onVertexStarted(List<TaskAttemptIdentifier> completions) {
  // examine edges after vertex started because until then these may not have been defined
  Map<String, EdgeProperty> inputs = getContext().getInputVertexEdgeProperties();
  for(Map.Entry<String, EdgeProperty> entry : inputs.entrySet()) {
    srcVertexInfo.put(entry.getKey(), createSourceVertexInfo(entry.getValue(),
        getContext().getVertexNumTasks(getContext().getVertexName())));
    // TODO what if derived class has already called this
    // register for status update from all source vertices
    getContext().registerForVertexStateUpdates(entry.getKey(),
        EnumSet.of(VertexState.CONFIGURED));
    if (entry.getValue().getDataMovementType() == DataMovementType.SCATTER_GATHER) {
      bipartiteSources++;
    }
  }
  onVertexStartedCheck();

  for (VertexStateUpdate stateUpdate : pendingStateUpdates) {
    handleVertexStateUpdate(stateUpdate);
  }
  pendingStateUpdates.clear();

  // track the tasks in this vertex
  updatePendingTasks();

  for (VertexManagerEvent vmEvent : pendingVMEvents) {
    handleVertexManagerEvent(vmEvent);
  }
  pendingVMEvents.clear();

  LOG.info("OnVertexStarted vertex: {} with {} source tasks and {} pending" +
      " tasks", getContext().getVertexName(), totalNumBipartiteSourceTasks,
      totalTasksToSchedule);

  if (completions != null) {
    for (TaskAttemptIdentifier attempt : completions) {
      onSourceTaskCompleted(attempt);
    }
  }
  onVertexStartedDone.set(true);
  // for the special case when source has 0 tasks or min fraction == 0
  processPendingTasks(null);
}
 
Example 15
Source File: TestDAGPlan.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
@Test (timeout=5000)
public void testCredentialsSerde() {
  DAG dag = new DAG("testDag");
  ProcessorDescriptor pd1 = new ProcessorDescriptor("processor1").
      setUserPayload("processor1Bytes".getBytes());
  ProcessorDescriptor pd2 = new ProcessorDescriptor("processor2").
      setUserPayload("processor2Bytes".getBytes());
  Vertex v1 = new Vertex("v1", pd1, 10, Resource.newInstance(1024, 1));
  Vertex v2 = new Vertex("v2", pd2, 1, Resource.newInstance(1024, 1));
  v1.setTaskLaunchCmdOpts("").setTaskEnvironment(new HashMap<String, String>())
      .setTaskLocalFiles(new HashMap<String, LocalResource>());
  v2.setTaskLaunchCmdOpts("").setTaskEnvironment(new HashMap<String, String>())
      .setTaskLocalFiles(new HashMap<String, LocalResource>());

  InputDescriptor inputDescriptor = new InputDescriptor("input").
      setUserPayload("inputBytes".getBytes());
  OutputDescriptor outputDescriptor = new OutputDescriptor("output").
      setUserPayload("outputBytes".getBytes());
  Edge edge = new Edge(v1, v2, new EdgeProperty(
      DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED,
      SchedulingType.SEQUENTIAL, outputDescriptor, inputDescriptor));

  dag.addVertex(v1).addVertex(v2).addEdge(edge);

  Credentials dagCredentials = new Credentials();
  Token<TokenIdentifier> token1 = new Token<TokenIdentifier>();
  Token<TokenIdentifier> token2 = new Token<TokenIdentifier>();
  dagCredentials.addToken(new Text("Token1"), token1);
  dagCredentials.addToken(new Text("Token2"), token2);
  
  dag.setCredentials(dagCredentials);

  DAGPlan dagProto = dag.createDag(new TezConfiguration());

  assertTrue(dagProto.hasCredentialsBinary());
  
  Credentials fetchedCredentials = DagTypeConverters.convertByteStringToCredentials(dagProto
      .getCredentialsBinary());
  
  assertEquals(2, fetchedCredentials.numberOfTokens());
  assertNotNull(fetchedCredentials.getToken(new Text("Token1")));
  assertNotNull(fetchedCredentials.getToken(new Text("Token2")));
}
 
Example 16
Source File: TestDAGPlan.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
@Test(timeout = 5000)
public void testUserPayloadSerde() {
  DAG dag = new DAG("testDag");
  ProcessorDescriptor pd1 = new ProcessorDescriptor("processor1").
      setUserPayload("processor1Bytes".getBytes());
  ProcessorDescriptor pd2 = new ProcessorDescriptor("processor2").
      setUserPayload("processor2Bytes".getBytes());
  Vertex v1 = new Vertex("v1", pd1, 10, Resource.newInstance(1024, 1));
  Vertex v2 = new Vertex("v2", pd2, 1, Resource.newInstance(1024, 1));
  v1.setTaskLaunchCmdOpts("").setTaskEnvironment(new HashMap<String, String>())
      .setTaskLocalFiles(new HashMap<String, LocalResource>());
  v2.setTaskLaunchCmdOpts("").setTaskEnvironment(new HashMap<String, String>())
      .setTaskLocalFiles(new HashMap<String, LocalResource>());

  InputDescriptor inputDescriptor = new InputDescriptor("input").
      setUserPayload("inputBytes".getBytes());
  OutputDescriptor outputDescriptor = new OutputDescriptor("output").
      setUserPayload("outputBytes".getBytes());
  Edge edge = new Edge(v1, v2, new EdgeProperty(
      DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED,
      SchedulingType.SEQUENTIAL, outputDescriptor, inputDescriptor));

  dag.addVertex(v1).addVertex(v2).addEdge(edge);

  DAGPlan dagProto = dag.createDag(new TezConfiguration());

  assertEquals(2, dagProto.getVertexCount());
  assertEquals(1, dagProto.getEdgeCount());

  VertexPlan v1Proto = dagProto.getVertex(0);
  VertexPlan v2Proto = dagProto.getVertex(1);
  EdgePlan edgeProto = dagProto.getEdge(0);

  assertEquals("processor1Bytes", new String(v1Proto.getProcessorDescriptor()
      .getUserPayload().toByteArray()));
  assertEquals("processor1", v1Proto.getProcessorDescriptor().getClassName());

  assertEquals("processor2Bytes", new String(v2Proto.getProcessorDescriptor()
      .getUserPayload().toByteArray()));
  assertEquals("processor2", v2Proto.getProcessorDescriptor().getClassName());

  assertEquals("inputBytes", new String(edgeProto.getEdgeDestination()
      .getUserPayload().toByteArray()));
  assertEquals("input", edgeProto.getEdgeDestination().getClassName());

  assertEquals("outputBytes", new String(edgeProto.getEdgeSource()
      .getUserPayload().toByteArray()));
  assertEquals("output", edgeProto.getEdgeSource().getClassName());

  EdgeProperty edgeProperty = DagTypeConverters
      .createEdgePropertyMapFromDAGPlan(dagProto.getEdgeList().get(0));

  byte[] ib = edgeProperty.getEdgeDestination().getUserPayload();
  assertEquals("inputBytes", new String(ib));
  assertEquals("input", edgeProperty.getEdgeDestination().getClassName());

  byte[] ob = edgeProperty.getEdgeSource().getUserPayload();
  assertEquals("outputBytes", new String(ob));
  assertEquals("output", edgeProperty.getEdgeSource().getClassName());
}
 
Example 17
Source File: TestEdge.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings({ "rawtypes", "unchecked" })
@Test (timeout = 5000)
public void testCompositeEventHandling() {
  EventHandler eventHandler = mock(EventHandler.class);
  EdgeProperty edgeProp = new EdgeProperty(DataMovementType.SCATTER_GATHER,
      DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, mock(OutputDescriptor.class),
      mock(InputDescriptor.class));
  Edge edge = new Edge(edgeProp, eventHandler);
  
  TezVertexID srcVertexID = createVertexID(1);
  TezVertexID destVertexID = createVertexID(2);
  LinkedHashMap<TezTaskID, Task> srcTasks = mockTasks(srcVertexID, 1);
  LinkedHashMap<TezTaskID, Task> destTasks = mockTasks(destVertexID, 5);
  
  TezTaskID srcTaskID = srcTasks.keySet().iterator().next();
  
  Vertex srcVertex = mockVertex("src", srcVertexID, srcTasks);
  Vertex destVertex = mockVertex("dest", destVertexID, destTasks);
  
  edge.setSourceVertex(srcVertex);
  edge.setDestinationVertex(destVertex);
  edge.initialize();
  
  TezTaskAttemptID srcTAID = createTAIDForTest(srcTaskID, 2); // Task0, Attempt 0
  
  EventMetaData srcMeta = new EventMetaData(EventProducerConsumerType.OUTPUT, "consumerVertex", "producerVertex", srcTAID);
  
  // Verification via a CompositeEvent
  CompositeDataMovementEvent cdmEvent = new CompositeDataMovementEvent(0, destTasks.size(), "bytes".getBytes());
  cdmEvent.setVersion(2); // AttemptNum
  TezEvent tezEvent = new TezEvent(cdmEvent, srcMeta);
  // Event setup to look like it would after the Vertex is done with it.

  edge.sendTezEventToDestinationTasks(tezEvent);
  
  ArgumentCaptor<Event> args = ArgumentCaptor.forClass(Event.class);
  verify(eventHandler, times(destTasks.size())).handle(args.capture());
  
  verifyEvents(args.getAllValues(), srcTAID, destTasks);
  
  
  // Same Verification via regular DataMovementEvents
  reset(eventHandler);
  for (int i = 0 ; i < destTasks.size() ; i++) {
    DataMovementEvent dmEvent = new DataMovementEvent(i, "bytes".getBytes());
    dmEvent.setVersion(2);
    tezEvent = new TezEvent(dmEvent, srcMeta);
    edge.sendTezEventToDestinationTasks(tezEvent);
  }
  args = ArgumentCaptor.forClass(Event.class);
  verify(eventHandler, times(destTasks.size())).handle(args.capture());
  
  verifyEvents(args.getAllValues(), srcTAID, destTasks);

}
 
Example 18
Source File: VertexImpl.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
private VertexState setupVertex(VertexInitializedEvent event) {

    if (event == null) {
      initTimeRequested = clock.getTime();
    } else {
      initTimeRequested = event.getInitRequestedTime();
      initedTime = event.getInitedTime();
    }

    // VertexManager needs to be setup before attempting to Initialize any
    // Inputs - since events generated by them will be routed to the
    // VertexManager for handling.

    if (dagVertexGroups != null && !dagVertexGroups.isEmpty()) {
      List<GroupInputSpec> groupSpecList = Lists.newLinkedList();
      for (VertexGroupInfo groupInfo : dagVertexGroups.values()) {
        if (groupInfo.edgeMergedInputs.containsKey(getName())) {
          InputDescriptor mergedInput = groupInfo.edgeMergedInputs.get(getName());
          groupSpecList.add(new GroupInputSpec(groupInfo.groupName,
              Lists.newLinkedList(groupInfo.groupMembers), mergedInput));
        }
      }
      if (!groupSpecList.isEmpty()) {
        groupInputSpecList = groupSpecList;
      }
    }

    // Check if any inputs need initializers
    if (event != null) {
      this.rootInputDescriptors = event.getAdditionalInputs();
    } else {
      if (rootInputDescriptors != null) {
        LOG.info("Root Inputs exist for Vertex: " + getName() + " : "
            + rootInputDescriptors);
        for (RootInputLeafOutputDescriptor<InputDescriptor> input : rootInputDescriptors.values()) {
          if (input.getInitializerClassName() != null) {
            if (inputsWithInitializers == null) {
              inputsWithInitializers = Sets.newHashSet();
            }
            inputsWithInitializers.add(input.getEntityName());
            LOG.info("Starting root input initializer for input: "
                + input.getEntityName() + ", with class: ["
                + input.getInitializerClassName() + "]");
          }
        }
      }
    }

    boolean hasBipartite = false;
    if (sourceVertices != null) {
      for (Edge edge : sourceVertices.values()) {
        if (edge.getEdgeProperty().getDataMovementType() == DataMovementType.SCATTER_GATHER) {
          hasBipartite = true;
          break;
        }
      }
    }

    if (hasBipartite && inputsWithInitializers != null) {
      LOG.fatal("A vertex with an Initial Input and a Shuffle Input are not supported at the moment");
      if (event != null) {
        return VertexState.FAILED;
      } else {
        return finished(VertexState.FAILED);
      }
    }

    assignVertexManager();

    vertexManager.initialize();

    // Setup tasks early if possible. If the VertexManager is not being used
    // to set parallelism, sending events to Tasks is safe (and less confusing
    // then relying on tasks to be created after TaskEvents are generated).
    // For VertexManagers setting parallelism, the setParallelism call needs
    // to be inline.
    if (event != null) {
      numTasks = event.getNumTasks();
    } else {
      numTasks = getVertexPlan().getTaskConfig().getNumTasks();
    }

    if (!(numTasks == -1 || numTasks >= 0)) {
      addDiagnostic("Invalid task count for vertex"
          + ", numTasks=" + numTasks);
      trySetTerminationCause(VertexTerminationCause.INVALID_NUM_OF_TASKS);
      if (event != null) {
        abortVertex(VertexStatus.State.FAILED);
        return finished(VertexState.FAILED);
      } else {
        return VertexState.FAILED;
      }
    }

    checkTaskLimits();
    return VertexState.INITED;
  }
 
Example 19
Source File: ShuffleVertexManager.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
@Override
public void initialize(VertexManagerPluginContext context) {
  Configuration conf;
  try {
    conf = TezUtils.createConfFromUserPayload(context.getUserPayload());
  } catch (IOException e) {
    throw new TezUncheckedException(e);
  }
  
  this.context = context;

  this.slowStartMinSrcCompletionFraction = conf
      .getFloat(
          ShuffleVertexManager.TEZ_AM_SHUFFLE_VERTEX_MANAGER_MIN_SRC_FRACTION,
          ShuffleVertexManager.TEZ_AM_SHUFFLE_VERTEX_MANAGER_MIN_SRC_FRACTION_DEFAULT);
  this.slowStartMaxSrcCompletionFraction = conf
      .getFloat(
          ShuffleVertexManager.TEZ_AM_SHUFFLE_VERTEX_MANAGER_MAX_SRC_FRACTION,
          ShuffleVertexManager.TEZ_AM_SHUFFLE_VERTEX_MANAGER_MAX_SRC_FRACTION_DEFAULT);

  if (slowStartMinSrcCompletionFraction < 0
      || slowStartMaxSrcCompletionFraction < slowStartMinSrcCompletionFraction) {
    throw new IllegalArgumentException(
        "Invalid values for slowStartMinSrcCompletionFraction"
            + "/slowStartMaxSrcCompletionFraction. Min cannot be < 0 and "
            + "max cannot be < min.");
  }

  enableAutoParallelism = conf
      .getBoolean(
          ShuffleVertexManager.TEZ_AM_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL,
          ShuffleVertexManager.TEZ_AM_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL_DEFAULT);
  desiredTaskInputDataSize = conf
      .getLong(
          ShuffleVertexManager.TEZ_AM_SHUFFLE_VERTEX_MANAGER_DESIRED_TASK_INPUT_SIZE,
          ShuffleVertexManager.TEZ_AM_SHUFFLE_VERTEX_MANAGER_DESIRED_TASK_INPUT_SIZE_DEFAULT);
  minTaskParallelism = Math.max(1, conf
      .getInt(
          ShuffleVertexManager.TEZ_AM_SHUFFLE_VERTEX_MANAGER_MIN_TASK_PARALLELISM,
          ShuffleVertexManager.TEZ_AM_SHUFFLE_VERTEX_MANAGER_MIN_TASK_PARALLELISM_DEFAULT));
  LOG.info("Shuffle Vertex Manager: settings" + " minFrac:"
      + slowStartMinSrcCompletionFraction + " maxFrac:"
      + slowStartMaxSrcCompletionFraction + " auto:" + enableAutoParallelism
      + " desiredTaskIput:" + desiredTaskInputDataSize + " minTasks:"
      + minTaskParallelism);
  
  Map<String, EdgeProperty> inputs = context.getInputVertexEdgeProperties();
  for(Map.Entry<String, EdgeProperty> entry : inputs.entrySet()) {
    if (entry.getValue().getDataMovementType() == DataMovementType.SCATTER_GATHER) {
      String vertex = entry.getKey();
      bipartiteSources.put(vertex, new HashSet<Integer>());
    }
  }
  if(bipartiteSources.isEmpty()) {
    throw new TezUncheckedException("Atleast 1 bipartite source should exist");
  }
  // dont track the source tasks here since those tasks may themselves be
  // dynamically changed as the DAG progresses.

}
 
Example 20
Source File: TezOperDependencyParallelismEstimator.java    From spork with Apache License 2.0 4 votes vote down vote up
@Override
public int estimateParallelism(TezOperPlan plan, TezOperator tezOper, Configuration conf) throws IOException {

    if (tezOper.isVertexGroup()) {
        return -1;
    }

    boolean intermediateReducer = TezCompilerUtil.isIntermediateReducer(tezOper);

    // TODO: If map opts and reduce opts are same estimate higher parallelism
    // for tasks based on the count of number of map tasks else be conservative as now
    maxTaskCount = conf.getInt(PigReducerEstimator.MAX_REDUCER_COUNT_PARAM,
            PigReducerEstimator.DEFAULT_MAX_REDUCER_COUNT_PARAM);

    // If parallelism is set explicitly, respect it
    if (!intermediateReducer && tezOper.getRequestedParallelism()!=-1) {
        return tezOper.getRequestedParallelism();
    }

    // If we have already estimated parallelism, use that one
    if (tezOper.getEstimatedParallelism()!=-1) {
        return tezOper.getEstimatedParallelism();
    }

    List<TezOperator> preds = plan.getPredecessors(tezOper);
    if (preds==null) {
        throw new IOException("Cannot estimate parallelism for source vertex");
    }

    double estimatedParallelism = 0;

    for (Entry<OperatorKey, TezEdgeDescriptor> entry : tezOper.inEdges.entrySet()) {
        TezOperator pred = getPredecessorWithKey(plan, tezOper, entry.getKey().toString());

        // Don't include broadcast edge, broadcast edge is used for
        // replicated join (covered in TezParallelismFactorVisitor.visitFRJoin)
        // and sample/scalar (does not impact parallelism)
        if (entry.getValue().dataMovementType==DataMovementType.SCATTER_GATHER ||
                entry.getValue().dataMovementType==DataMovementType.ONE_TO_ONE) {
            double predParallelism = pred.getEffectiveParallelism();
            if (predParallelism==-1) {
                throw new IOException("Cannot estimate parallelism for " + tezOper.getOperatorKey().toString()
                        + ", effective parallelism for predecessor " + tezOper.getOperatorKey().toString()
                        + " is -1");
            }

            //For cases like Union we can just limit to sum of pred vertices parallelism
            boolean applyFactor = !tezOper.isUnion();
            if (pred.plan!=null && applyFactor) { // pred.plan can be null if it is a VertexGroup
                TezParallelismFactorVisitor parallelismFactorVisitor = new TezParallelismFactorVisitor(pred.plan, tezOper.getOperatorKey().toString());
                parallelismFactorVisitor.visit();
                predParallelism = predParallelism * parallelismFactorVisitor.getFactor();
            }
            estimatedParallelism += predParallelism;
        }
    }

    int roundedEstimatedParallelism = (int)Math.ceil(estimatedParallelism);

    if (intermediateReducer && tezOper.isOverrideIntermediateParallelism()) {
        // Estimated reducers should not be more than the configured limit
        roundedEstimatedParallelism = Math.min(roundedEstimatedParallelism, maxTaskCount);
        int userSpecifiedParallelism = pc.defaultParallel;
        if (tezOper.getRequestedParallelism() != -1) {
            userSpecifiedParallelism = tezOper.getRequestedParallelism();
        }
        int intermediateParallelism = Math.max(userSpecifiedParallelism, roundedEstimatedParallelism);
        if (userSpecifiedParallelism != -1 &&
                (intermediateParallelism > 200 && intermediateParallelism > (2 * userSpecifiedParallelism))) {
            // Estimated reducers shall not be more than 2x of requested parallelism
            // if greater than 200 and we are overriding user specified values
            intermediateParallelism = 2 * userSpecifiedParallelism;
        }
        roundedEstimatedParallelism = intermediateParallelism;
    } else {
        roundedEstimatedParallelism = Math.min(roundedEstimatedParallelism, maxTaskCount);
    }

    return roundedEstimatedParallelism;
}