org.apache.tez.dag.api.OutputDescriptor Java Examples

The following examples show how to use org.apache.tez.dag.api.OutputDescriptor. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: VertexImpl.java    From tez with Apache License 2.0 6 votes vote down vote up
@Override
public void setAdditionalOutputs(List<RootInputLeafOutputProto> outputs) {
  LOG.info("Setting " + outputs.size() + " additional outputs for vertex " + this.logIdentifier);
  this.additionalOutputs = Maps.newHashMapWithExpectedSize(outputs.size());
  this.outputCommitters = Maps.newHashMapWithExpectedSize(outputs.size());
  for (RootInputLeafOutputProto output : outputs) {
    addIO(output.getName());
    OutputDescriptor od = DagTypeConverters
        .convertOutputDescriptorFromDAGPlan(output.getIODescriptor());

    this.additionalOutputs
        .put(
            output.getName(),
            new RootInputLeafOutput<OutputDescriptor, OutputCommitterDescriptor>(
                output.getName(), od,
                output.hasControllerDescriptor() ? DagTypeConverters
                    .convertOutputCommitterDescriptorFromDAGPlan(output
                        .getControllerDescriptor()) : null));
    OutputSpec outputSpec = new OutputSpec(output.getName(), od, 0);
    additionalOutputSpecs.add(outputSpec);
  }
}
 
Example #2
Source File: TezOutputContextImpl.java    From incubator-tez with Apache License 2.0 6 votes vote down vote up
@Private
public TezOutputContextImpl(Configuration conf, String[] workDirs, int appAttemptNumber,
    TezUmbilical tezUmbilical, String dagName,
    String taskVertexName,
    String destinationVertexName,
    TezTaskAttemptID taskAttemptID, TezCounters counters, int outputIndex,
    @Nullable byte[] userPayload, RuntimeTask runtimeTask,
    Map<String, ByteBuffer> serviceConsumerMetadata,
    Map<String, String> auxServiceEnv, MemoryDistributor memDist,
    OutputDescriptor outputDescriptor) {
  super(conf, workDirs, appAttemptNumber, dagName, taskVertexName, taskAttemptID,
      wrapCounters(counters, taskVertexName, destinationVertexName, conf),
      runtimeTask, tezUmbilical, serviceConsumerMetadata,
      auxServiceEnv, memDist, outputDescriptor);
  checkNotNull(outputIndex, "outputIndex is null");
  checkNotNull(destinationVertexName, "destinationVertexName is null");
  this.userPayload = DagTypeConverters.convertToTezUserPayload(userPayload);
  this.outputIndex = outputIndex;
  this.destinationVertexName = destinationVertexName;
  this.sourceInfo = new EventMetaData(EventProducerConsumerType.OUTPUT,
      taskVertexName, destinationVertexName, taskAttemptID);
}
 
Example #3
Source File: TestContainerReuse.java    From incubator-tez with Apache License 2.0 6 votes vote down vote up
private AMSchedulerEventTALaunchRequest createLaunchRequestEvent(
  TezTaskAttemptID taID, TaskAttempt ta, Resource capability,
  String[] hosts, String[] racks, Priority priority,
  ContainerContext containerContext) {
  TaskLocationHint locationHint = null;
  if (hosts != null || racks != null) {
    Set<String> hostsSet = Sets.newHashSet(hosts);
    Set<String> racksSet = Sets.newHashSet(racks);
    locationHint = new TaskLocationHint(hostsSet, racksSet);
  }
  AMSchedulerEventTALaunchRequest lr = new AMSchedulerEventTALaunchRequest(
    taID, capability, new TaskSpec(taID, "dagName", "vertexName",
    new ProcessorDescriptor("processorClassName"),
    Collections.singletonList(new InputSpec("vertexName",
      new InputDescriptor("inputClassName"), 1)),
    Collections.singletonList(new OutputSpec("vertexName",
      new OutputDescriptor("outputClassName"), 1)), null), ta, locationHint,
    priority, containerContext);
  return lr;
}
 
Example #4
Source File: VertexImpl.java    From incubator-tez with Apache License 2.0 6 votes vote down vote up
@Override
public void setAdditionalOutputs(List<RootInputLeafOutputProto> outputs) {
  LOG.info("setting additional outputs for vertex " + this.vertexName);
  this.additionalOutputs = Maps.newHashMapWithExpectedSize(outputs.size());
  this.outputCommitters = Maps.newHashMapWithExpectedSize(outputs.size());
  for (RootInputLeafOutputProto output : outputs) {
    OutputDescriptor od = DagTypeConverters
        .convertOutputDescriptorFromDAGPlan(output.getEntityDescriptor());

    this.additionalOutputs.put(
        output.getName(),
        new RootInputLeafOutputDescriptor<OutputDescriptor>(output.getName(), od,
            output.hasInitializerClassName() ? output
                .getInitializerClassName() : null));
    OutputSpec outputSpec = new OutputSpec(output.getName(), od, 0);
    additionalOutputSpecs.add(outputSpec);
  }
}
 
Example #5
Source File: TestEdge.java    From tez with Apache License 2.0 6 votes vote down vote up
@Test(timeout = 5000)
public void testInvalidConsumerNumber() throws Exception {
  EventHandler mockEventHandler = mock(EventHandler.class);
  Edge edge = new Edge(EdgeProperty.create(
      EdgeManagerPluginDescriptor.create(CustomEdgeManagerWithInvalidReturnValue.class.getName())
        .setUserPayload(new CustomEdgeManagerWithInvalidReturnValue.EdgeManagerConfig(1,1,0,1).toUserPayload()),
      DataSourceType.PERSISTED,
      SchedulingType.SEQUENTIAL,
      OutputDescriptor.create(""),
      InputDescriptor.create("")), mockEventHandler, new TezConfiguration());
  TezVertexID v1Id = createVertexID(1);
  TezVertexID v2Id = createVertexID(2);
  edge.setSourceVertex(mockVertex("v1", v1Id, new LinkedHashMap<TezTaskID, Task>()));
  edge.setDestinationVertex(mockVertex("v2", v2Id, new LinkedHashMap<TezTaskID, Task>()));
  edge.initialize();
  try {
    TezEvent ireEvent = new TezEvent(InputReadErrorEvent.create("diag", 0, 1),
        new EventMetaData(EventProducerConsumerType.INPUT, "v2", "v1",
            TezTaskAttemptID.getInstance(TezTaskID.getInstance(v2Id, 1), 1)));
    edge.sendTezEventToSourceTasks(ireEvent);
    Assert.fail();
  } catch (AMUserCodeException e) {
    e.printStackTrace();
    assertTrue(e.getCause().getMessage().contains("ConsumerTaskNum must be positive"));
  }
}
 
Example #6
Source File: TestMockDAGAppMaster.java    From tez with Apache License 2.0 6 votes vote down vote up
private DAG createDAG(String dagName, boolean uv12CommitFail, boolean v3CommitFail) {
  DAG dag = DAG.create(dagName);
  Vertex v1 = Vertex.create("v1", ProcessorDescriptor.create("Proc"), 1);
  Vertex v2 = Vertex.create("v2", ProcessorDescriptor.create("Proc"), 1);
  Vertex v3 = Vertex.create("v3", ProcessorDescriptor.create("Proc"), 1);
  VertexGroup uv12 = dag.createVertexGroup("uv12", v1, v2);
  DataSinkDescriptor uv12DataSink = DataSinkDescriptor.create(
      OutputDescriptor.create("dummy output"), createOutputCommitterDesc(uv12CommitFail), null);
  uv12.addDataSink("uv12Out", uv12DataSink);
  DataSinkDescriptor v3DataSink = DataSinkDescriptor.create(
      OutputDescriptor.create("dummy output"), createOutputCommitterDesc(v3CommitFail), null);
  v3.addDataSink("v3Out", v3DataSink);

  GroupInputEdge e1 = GroupInputEdge.create(uv12, v3, EdgeProperty.create(
      DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED,
      SchedulingType.SEQUENTIAL,
      OutputDescriptor.create("dummy output class"),
      InputDescriptor.create("dummy input class")), InputDescriptor
      .create("merge.class"));
  dag.addVertex(v1)
    .addVertex(v2)
    .addVertex(v3)
    .addEdge(e1);
  return dag;
}
 
Example #7
Source File: TestContainerReuse.java    From tez with Apache License 2.0 6 votes vote down vote up
private AMSchedulerEventTALaunchRequest createLaunchRequestEvent(
  TezTaskAttemptID taID, TaskAttempt ta, Resource capability,
  String[] hosts, String[] racks, Priority priority,
  ContainerContext containerContext) {
  TaskLocationHint locationHint = null;
  if (hosts != null || racks != null) {
    Set<String> hostsSet = Sets.newHashSet(hosts);
    Set<String> racksSet = Sets.newHashSet(racks);
    locationHint = TaskLocationHint.createTaskLocationHint(hostsSet, racksSet);
  }
  AMSchedulerEventTALaunchRequest lr = new AMSchedulerEventTALaunchRequest(
    taID, capability, new TaskSpec(taID, "dagName", "vertexName", -1,
      ProcessorDescriptor.create("processorClassName"),
    Collections.singletonList(new InputSpec("vertexName",
        InputDescriptor.create("inputClassName"), 1)),
    Collections.singletonList(new OutputSpec("vertexName",
        OutputDescriptor.create("outputClassName"), 1)), null, null), ta, locationHint,
    priority.getPriority(), containerContext, 0, 0, 0);
  return lr;
}
 
Example #8
Source File: OutputCommitterContextImpl.java    From tez with Apache License 2.0 6 votes vote down vote up
public OutputCommitterContextImpl(ApplicationId applicationId,
    int dagAttemptNumber,
    String dagName,
    String vertexName,
    RootInputLeafOutput<OutputDescriptor, OutputCommitterDescriptor> output,
    int vertexIdx) {
  Objects.requireNonNull(applicationId, "applicationId is null");
  Objects.requireNonNull(dagName, "dagName is null");
  Objects.requireNonNull(vertexName, "vertexName is null");
  Objects.requireNonNull(output, "output is null");
  this.applicationId = applicationId;
  this.dagAttemptNumber = dagAttemptNumber;
  this.dagName = dagName;
  this.vertexName = vertexName;
  this.output = output;
  this.vertexIdx = vertexIdx;
}
 
Example #9
Source File: TestEdge.java    From tez with Apache License 2.0 6 votes vote down vote up
@Test(timeout = 5000)
public void testInvalidPhysicalInputCount() throws Exception {
  EventHandler mockEventHandler = mock(EventHandler.class);
  Edge edge = new Edge(EdgeProperty.create(
      EdgeManagerPluginDescriptor.create(CustomEdgeManagerWithInvalidReturnValue.class.getName())
        .setUserPayload(new CustomEdgeManagerWithInvalidReturnValue.EdgeManagerConfig(-1,1,1,1).toUserPayload()),
      DataSourceType.PERSISTED,
      SchedulingType.SEQUENTIAL,
      OutputDescriptor.create(""),
      InputDescriptor.create("")), mockEventHandler, new TezConfiguration());
  TezVertexID v1Id = createVertexID(1);
  TezVertexID v2Id = createVertexID(2);
  edge.setSourceVertex(mockVertex("v1", v1Id, new LinkedHashMap<TezTaskID, Task>()));
  edge.setDestinationVertex(mockVertex("v2", v2Id, new LinkedHashMap<TezTaskID, Task>()));
  edge.initialize();
  try {
    edge.getDestinationSpec(0);
    Assert.fail();
  } catch (AMUserCodeException e) {
    e.printStackTrace();
    assertTrue(e.getCause().getMessage().contains("PhysicalInputCount should not be negative"));
  }
}
 
Example #10
Source File: TestMemoryWithEvents.java    From tez with Apache License 2.0 5 votes vote down vote up
@Ignore
@Test (timeout = 600000)
public void testMemoryOneToOne() throws Exception {
  DAG dag = DAG.create("testMemoryOneToOne");
  Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), numTasks);
  Vertex vB = Vertex.create("B", ProcessorDescriptor.create("Proc.class"), numTasks);
  dag.addVertex(vA)
      .addVertex(vB)
      .addEdge(
          Edge.create(vA, vB, EdgeProperty.create(DataMovementType.ONE_TO_ONE,
              DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
              OutputDescriptor.create("Out"), InputDescriptor.create("In"))));
  testMemory(dag, true);
}
 
Example #11
Source File: TestRootInputVertexManager.java    From tez with Apache License 2.0 5 votes vote down vote up
@Test
public void testTezDrainCompletionsOnVertexStart() throws IOException {
  Configuration conf = new Configuration();
  RootInputVertexManager manager = null;
  HashMap<String, EdgeProperty> mockInputVertices =
      new HashMap<String, EdgeProperty>();
  String mockSrcVertexId1 = "Vertex1";
  EdgeProperty eProp1 = EdgeProperty.create(
      EdgeProperty.DataMovementType.BROADCAST,
      EdgeProperty.DataSourceType.PERSISTED,
      EdgeProperty.SchedulingType.SEQUENTIAL,
      OutputDescriptor.create("out"),
      InputDescriptor.create("in"));

  VertexManagerPluginContext mockContext =
      mock(VertexManagerPluginContext.class);
  when(mockContext.getVertexStatistics(any(String.class)))
      .thenReturn(mock(VertexStatistics.class));
  when(mockContext.getInputVertexEdgeProperties())
      .thenReturn(mockInputVertices);
  when(mockContext.getVertexNumTasks(mockSrcVertexId1)).thenReturn(3);

  mockInputVertices.put(mockSrcVertexId1, eProp1);

  // check initialization
  manager = createRootInputVertexManager(conf, mockContext, 0.1f, 0.1f);
  Assert.assertEquals(0, manager.numSourceTasksCompleted);
  manager.onVertexStarted(Collections.singletonList(
    createTaskAttemptIdentifier(mockSrcVertexId1, 0)));
  Assert.assertEquals(1, manager.numSourceTasksCompleted);
}
 
Example #12
Source File: TestMROutputLegacy.java    From tez with Apache License 2.0 5 votes vote down vote up
@Test (timeout = 5000)
public void testOldAPI_MR() throws Exception {
  String outputPath = TEST_DIR.getAbsolutePath();
  JobConf conf = new JobConf();
  conf.setOutputKeyClass(NullWritable.class);
  conf.setOutputValueClass(Text.class);
  conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class);
  org.apache.hadoop.mapred.SequenceFileOutputFormat.setOutputPath(conf, new Path(outputPath));
  // the output is attached to reducer
  conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, false);
  UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(conf);
  OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName())
      .setUserPayload(vertexPayload);
  DataSinkDescriptor sink = DataSinkDescriptor.create(od,
      OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null);

  OutputContext outputContext = createMockOutputContext(sink.getOutputDescriptor().getUserPayload());
  MROutputLegacy output = new MROutputLegacy(outputContext, 2);
  output.initialize();
  assertEquals(false, output.useNewApi);
  assertEquals(org.apache.hadoop.mapred.SequenceFileOutputFormat.class, output.oldOutputFormat.getClass());
  assertNull(output.newOutputFormat);
  assertEquals(NullWritable.class, output.oldApiTaskAttemptContext.getOutputKeyClass());
  assertEquals(Text.class, output.oldApiTaskAttemptContext.getOutputValueClass());
  assertNull(output.newApiTaskAttemptContext);
  assertNotNull(output.oldRecordWriter);
  assertNull(output.newRecordWriter);
  assertEquals(org.apache.hadoop.mapred.FileOutputCommitter.class, output.committer.getClass());
}
 
Example #13
Source File: TestMROutputLegacy.java    From tez with Apache License 2.0 5 votes vote down vote up
@Test (timeout = 5000)
public void testOldAPI_MapperOnly() throws Exception {
  String outputPath = TEST_DIR.getAbsolutePath();
  JobConf conf = new JobConf();
  conf.setOutputKeyClass(NullWritable.class);
  conf.setOutputValueClass(Text.class);
  conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class);
  org.apache.hadoop.mapred.SequenceFileOutputFormat.setOutputPath(conf, new Path(outputPath));
  // the output is attached to mapper
  conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, true);
  UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(conf);
  OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName())
      .setUserPayload(vertexPayload);
  DataSinkDescriptor sink = DataSinkDescriptor.create(od,
      OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null);

  OutputContext outputContext = createMockOutputContext(sink.getOutputDescriptor().getUserPayload());
  MROutputLegacy output = new MROutputLegacy(outputContext, 2);
  output.initialize();
  assertEquals(false, output.useNewApi);
  assertEquals(org.apache.hadoop.mapred.SequenceFileOutputFormat.class, output.oldOutputFormat.getClass());
  assertNull(output.newOutputFormat);
  assertEquals(NullWritable.class, output.oldApiTaskAttemptContext.getOutputKeyClass());
  assertEquals(Text.class, output.oldApiTaskAttemptContext.getOutputValueClass());
  assertNull(output.newApiTaskAttemptContext);
  assertNotNull(output.oldRecordWriter);
  assertNull(output.newRecordWriter);
  assertEquals(org.apache.hadoop.mapred.FileOutputCommitter.class, output.committer.getClass());
}
 
Example #14
Source File: UnorderedUnpartitionedKVEdgeConfigurer.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
/**
 * This is a convenience method for creating an Edge descriptor based on the specified
 * EdgeManagerDescriptor.
 *
 * @param edgeManagerDescriptor the custom edge specification
 * @return an {@link org.apache.tez.dag.api.EdgeProperty} instance
 */
public EdgeProperty createDefaultCustomEdgeProperty(EdgeManagerDescriptor edgeManagerDescriptor) {
  Preconditions.checkNotNull(edgeManagerDescriptor, "EdgeManagerDescriptor cannot be null");
  EdgeProperty edgeProperty =
      new EdgeProperty(edgeManagerDescriptor, EdgeProperty.DataSourceType.PERSISTED,
          EdgeProperty.SchedulingType.SEQUENTIAL,
          new OutputDescriptor(getOutputClassName()).setUserPayload(getOutputPayload()),
          new InputDescriptor(getInputClassName()).setUserPayload(getInputPayload()));
  return edgeProperty;
}
 
Example #15
Source File: LogicalIOProcessorRuntimeTask.java    From tez with Apache License 2.0 5 votes vote down vote up
private LogicalOutput createOutput(OutputSpec outputSpec, OutputContext outputContext) throws TezException {
  OutputDescriptor outputDesc = outputSpec.getOutputDescriptor();
  Output output = ReflectionUtils.createClazzInstance(outputDesc.getClassName(),
      new Class[]{OutputContext.class, Integer.TYPE},
      new Object[]{outputContext, outputSpec.getPhysicalEdgeCount()});

  if (!(output instanceof LogicalOutput)) {
    throw new TezUncheckedException(output.getClass().getName()
        + " is not a sub-type of LogicalOutput."
        + " Only LogicalOutput sub-types supported by LogicalIOProcessor.");
  }
  return (LogicalOutput) output;
}
 
Example #16
Source File: TestHistoryEventProtoConverter.java    From tez with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 5000)
public void testConvertVertexReconfigreDoneEvent() {
  TezVertexID vId = tezVertexID;
  Map<String, EdgeProperty> edgeMgrs =
      new HashMap<String, EdgeProperty>();

  edgeMgrs.put("a", EdgeProperty.create(EdgeManagerPluginDescriptor.create("a.class")
      .setHistoryText("text"), DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
      OutputDescriptor.create("Out"), InputDescriptor.create("In")));
  VertexConfigurationDoneEvent event = new VertexConfigurationDoneEvent(vId, 0L, 1, null,
      edgeMgrs, null, true);
  HistoryEventProto proto = converter.convert(event);
  assertCommon(proto, HistoryEventType.VERTEX_CONFIGURE_DONE, 0L,
      EntityTypes.TEZ_VERTEX_ID, null, null, 2);
  assertEventData(proto, ATSConstants.NUM_TASKS, "1");
  assertEventData(proto, ATSConstants.UPDATED_EDGE_MANAGERS, null);

  /*
  Map<String, Object> updatedEdgeMgrs = (Map<String, Object>)
      evt.getEventInfo().get(ATSConstants.UPDATED_EDGE_MANAGERS);
  Assert.assertEquals(1, updatedEdgeMgrs.size());
  Assert.assertTrue(updatedEdgeMgrs.containsKey("a"));
  Map<String, Object> updatedEdgeMgr = (Map<String, Object>) updatedEdgeMgrs.get("a");

  Assert.assertEquals(DataMovementType.CUSTOM.name(),
      updatedEdgeMgr.get(DAGUtils.DATA_MOVEMENT_TYPE_KEY));
  Assert.assertEquals("In", updatedEdgeMgr.get(DAGUtils.EDGE_DESTINATION_CLASS_KEY));
  Assert.assertEquals("a.class", updatedEdgeMgr.get(DAGUtils.EDGE_MANAGER_CLASS_KEY));
  */
}
 
Example #17
Source File: TestInputReadyVertexManager.java    From tez with Apache License 2.0 5 votes vote down vote up
@Test (timeout=5000)
public void testBasicScatterGather() throws Exception {
  HashMap<String, EdgeProperty> mockInputVertices = 
      new HashMap<String, EdgeProperty>();
  String mockSrcVertexId1 = "Vertex1";
  EdgeProperty eProp1 = EdgeProperty.create(
      EdgeProperty.DataMovementType.SCATTER_GATHER,
      EdgeProperty.DataSourceType.PERSISTED,
      SchedulingType.SEQUENTIAL,
      OutputDescriptor.create("out"),
      InputDescriptor.create("in"));
  
  String mockManagedVertexId = "Vertex";
  
  VertexManagerPluginContext mockContext = mock(VertexManagerPluginContext.class);
  when(mockContext.getInputVertexEdgeProperties()).thenReturn(mockInputVertices);
  when(mockContext.getVertexName()).thenReturn(mockManagedVertexId);
  when(mockContext.getVertexNumTasks(mockManagedVertexId)).thenReturn(2);
  when(mockContext.getVertexNumTasks(mockSrcVertexId1)).thenReturn(3);
  mockInputVertices.put(mockSrcVertexId1, eProp1);

  InputReadyVertexManager manager = new InputReadyVertexManager(mockContext);
  manager.initialize();
  verify(mockContext, times(1)).vertexReconfigurationPlanned();
  // source vertex configured
  manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId1, VertexState.CONFIGURED));
  verify(mockContext, times(1)).doneReconfiguringVertex();
  verify(mockContext, times(0)).scheduleTasks(requestCaptor.capture());
  // then own vertex started
  manager.onVertexStarted(Collections.singletonList(
      TestShuffleVertexManager.createTaskAttemptIdentifier(mockSrcVertexId1, 0)));
  manager.onSourceTaskCompleted(
      TestShuffleVertexManager.createTaskAttemptIdentifier(mockSrcVertexId1, 1));
  verify(mockContext, times(0)).scheduleTasks(anyList());
  manager.onSourceTaskCompleted(
      TestShuffleVertexManager.createTaskAttemptIdentifier(mockSrcVertexId1, 2));
  verify(mockContext, times(1)).scheduleTasks(requestCaptor.capture());
  Assert.assertEquals(2, requestCaptor.getValue().size());
}
 
Example #18
Source File: TestMemoryWithEvents.java    From tez with Apache License 2.0 5 votes vote down vote up
@Ignore
@Test (timeout = 600000)
public void testMemoryScatterGather() throws Exception {
  DAG dag = DAG.create("testMemoryScatterGather");
  Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), numTasks);
  Vertex vB = Vertex.create("B", ProcessorDescriptor.create("Proc.class"), numTasks);
  dag.addVertex(vA)
      .addVertex(vB)
      .addEdge(
          Edge.create(vA, vB, EdgeProperty.create(DataMovementType.SCATTER_GATHER,
              DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
              OutputDescriptor.create("Out"), InputDescriptor.create("In"))));
  testMemory(dag, true);
}
 
Example #19
Source File: OutputSpec.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
@Override
public void readFields(DataInput in) throws IOException {
  destinationVertexName = StringInterner.weakIntern(in.readUTF());
  physicalEdgeCount = in.readInt();
  outputDescriptor = new OutputDescriptor();
  outputDescriptor.readFields(in);
}
 
Example #20
Source File: TestMemoryDistributor.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 5000)
public void testScalingNoProcessor() {
  MemoryDistributor dist = new MemoryDistributor(2, 1, conf);
  
  dist.setJvmMemory(10000l);

  // First request
  MemoryUpdateCallbackForTest e1Callback = new MemoryUpdateCallbackForTest();
  TezInputContext e1InputContext1 = createTestInputContext();
  InputDescriptor e1InDesc1 = createTestInputDescriptor();
  dist.requestMemory(10000, e1Callback, e1InputContext1, e1InDesc1);
  
  // Second request
  MemoryUpdateCallbackForTest e2Callback = new MemoryUpdateCallbackForTest();
  TezInputContext e2InputContext2 = createTestInputContext();
  InputDescriptor e2InDesc2 = createTestInputDescriptor();
  dist.requestMemory(10000, e2Callback, e2InputContext2, e2InDesc2);
  
  // Third request - output
  MemoryUpdateCallbackForTest e3Callback = new MemoryUpdateCallbackForTest();
  TezOutputContext e3OutputContext1 = createTestOutputContext();
  OutputDescriptor e3OutDesc2 = createTestOutputDescriptor();
  dist.requestMemory(5000, e3Callback, e3OutputContext1, e3OutDesc2);
  
  dist.makeInitialAllocations();
  
  // Total available: 70% of 10K = 7000
  // 3 requests - 10K, 10K, 5K
  // Scale down to - 2800, 2800, 1400
  assertEquals(2800, e1Callback.assigned);
  assertEquals(2800, e2Callback.assigned);
  assertEquals(1400, e3Callback.assigned);
}
 
Example #21
Source File: TestMemoryDistributor.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 5000)
public void testReserveFractionConfigured() {
  Configuration conf = new Configuration(this.conf);
  conf.setDouble(TezJobConfig.TEZ_RUNTIME_SCALE_TASK_MEMORY_RESERVE_FRACTION, 0.5d);
  MemoryDistributor dist = new MemoryDistributor(2, 1, conf);
  
  dist.setJvmMemory(10000l);

  // First request
  MemoryUpdateCallbackForTest e1Callback = new MemoryUpdateCallbackForTest();
  TezInputContext e1InputContext1 = createTestInputContext();
  InputDescriptor e1InDesc1 = createTestInputDescriptor();
  dist.requestMemory(10000, e1Callback, e1InputContext1, e1InDesc1);
  
  // Second request
  MemoryUpdateCallbackForTest e2Callback = new MemoryUpdateCallbackForTest();
  TezInputContext e2InputContext2 = createTestInputContext();
  InputDescriptor e2InDesc2 = createTestInputDescriptor();
  dist.requestMemory(10000, e2Callback, e2InputContext2, e2InDesc2);
  
  // Third request - output
  MemoryUpdateCallbackForTest e3Callback = new MemoryUpdateCallbackForTest();
  TezOutputContext e3OutputContext1 = createTestOutputContext();
  OutputDescriptor e3OutDesc2 = createTestOutputDescriptor();
  dist.requestMemory(5000, e3Callback, e3OutputContext1, e3OutDesc2);
  
  dist.makeInitialAllocations();
  
  // Total available: 50% of 10K = 7000
  // 3 requests - 10K, 10K, 5K
  // Scale down to - 2000, 2000, 1000
  assertEquals(2000, e1Callback.assigned);
  assertEquals(2000, e2Callback.assigned);
  assertEquals(1000, e3Callback.assigned);
}
 
Example #22
Source File: ProtoConverters.java    From tez with Apache License 2.0 5 votes vote down vote up
public static OutputSpec getOutputSpecFromProto(IOSpecProto outputSpecProto) {
  OutputDescriptor outputDescriptor = null;
  if (outputSpecProto.hasIoDescriptor()) {
    outputDescriptor =
        DagTypeConverters.convertOutputDescriptorFromDAGPlan(outputSpecProto.getIoDescriptor());
  }
  OutputSpec outputSpec =
      new OutputSpec(outputSpecProto.getConnectedVertexName(), outputDescriptor,
          outputSpecProto.getPhysicalEdgeCount());
  return outputSpec;
}
 
Example #23
Source File: CartesianProduct.java    From tez with Apache License 2.0 5 votes vote down vote up
private DAG createDAG(TezConfiguration tezConf) throws IOException {
  InputDescriptor inputDescriptor = InputDescriptor.create(FakeInput.class.getName());
  InputInitializerDescriptor inputInitializerDescriptor =
    InputInitializerDescriptor.create(FakeInputInitializer.class.getName());
  DataSourceDescriptor dataSourceDescriptor =
    DataSourceDescriptor.create(inputDescriptor, inputInitializerDescriptor, null);

  Vertex v1 = Vertex.create(VERTEX1, ProcessorDescriptor.create(TokenProcessor.class.getName()));
  v1.addDataSource(INPUT, dataSourceDescriptor);
  Vertex v2 = Vertex.create(VERTEX2, ProcessorDescriptor.create(TokenProcessor.class.getName()));
  v2.addDataSource(INPUT, dataSourceDescriptor);

  OutputDescriptor outputDescriptor = OutputDescriptor.create(FakeOutput.class.getName());
  OutputCommitterDescriptor outputCommitterDescriptor =
    OutputCommitterDescriptor.create(FakeOutputCommitter.class.getName());
  DataSinkDescriptor dataSinkDescriptor =
    DataSinkDescriptor.create(outputDescriptor, outputCommitterDescriptor, null);

  CartesianProductConfig cartesianProductConfig =
    new CartesianProductConfig(Arrays.asList(sourceVertices));
  UserPayload userPayload = cartesianProductConfig.toUserPayload(tezConf);

  Vertex v3 = Vertex.create(VERTEX3, ProcessorDescriptor.create(JoinProcessor.class.getName()));
  v3.addDataSink(OUTPUT, dataSinkDescriptor);
  v3.setVertexManagerPlugin(
    VertexManagerPluginDescriptor.create(CartesianProductVertexManager.class.getName())
                                 .setUserPayload(userPayload));

  EdgeManagerPluginDescriptor edgeManagerDescriptor =
    EdgeManagerPluginDescriptor.create(CartesianProductEdgeManager.class.getName());
  edgeManagerDescriptor.setUserPayload(userPayload);
  UnorderedPartitionedKVEdgeConfig edgeConf =
    UnorderedPartitionedKVEdgeConfig.newBuilder(Text.class.getName(), IntWritable.class.getName(),
      RoundRobinPartitioner.class.getName()).build();
  EdgeProperty edgeProperty = edgeConf.createDefaultCustomEdgeProperty(edgeManagerDescriptor);

  return DAG.create("CrossProduct").addVertex(v1).addVertex(v2).addVertex(v3)
    .addEdge(Edge.create(v1, v3, edgeProperty)).addEdge(Edge.create(v2, v3, edgeProperty));
}
 
Example #24
Source File: TestDAGRecovery2.java    From tez with Apache License 2.0 5 votes vote down vote up
@Test(timeout=120000)
public void testFailingCommitter() throws Exception {
  DAG dag = SimpleVTestDAG.createDAG("FailingCommitterDAG", null);
  OutputDescriptor od =
      OutputDescriptor.create(MultiAttemptDAG.NoOpOutput.class.getName());
  od.setUserPayload(UserPayload.create(ByteBuffer.wrap(
      new MultiAttemptDAG.FailingOutputCommitter.FailingOutputCommitterConfig(true)
          .toUserPayload())));
  OutputCommitterDescriptor ocd = OutputCommitterDescriptor.create(
      MultiAttemptDAG.FailingOutputCommitter.class.getName());
  dag.getVertex("v3").addDataSink("FailingOutput", DataSinkDescriptor.create(od, ocd, null));
  runDAGAndVerify(dag, State.FAILED);
}
 
Example #25
Source File: TestOutput.java    From tez with Apache License 2.0 5 votes vote down vote up
public static OutputDescriptor getOutputDesc(UserPayload payload) {
  OutputDescriptor desc = OutputDescriptor.create(TestOutput.class.getName());
  if (payload != null) {
    desc.setUserPayload(payload);
  }
  return desc;
}
 
Example #26
Source File: TestMemoryDistributor.java    From tez with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 5000)
public void testScalingNoProcessor() throws TezException {
  MemoryDistributor dist = new MemoryDistributor(2, 1, conf);
  
  dist.setJvmMemory(10000l);

  // First request
  MemoryUpdateCallbackForTest e1Callback = new MemoryUpdateCallbackForTest();
  InputContext e1InputContext1 = createTestInputContext();
  InputDescriptor e1InDesc1 = createTestInputDescriptor();
  dist.requestMemory(10000, e1Callback, e1InputContext1, e1InDesc1);
  
  // Second request
  MemoryUpdateCallbackForTest e2Callback = new MemoryUpdateCallbackForTest();
  InputContext e2InputContext2 = createTestInputContext();
  InputDescriptor e2InDesc2 = createTestInputDescriptor();
  dist.requestMemory(10000, e2Callback, e2InputContext2, e2InDesc2);
  
  // Third request - output
  MemoryUpdateCallbackForTest e3Callback = new MemoryUpdateCallbackForTest();
  OutputContext e3OutputContext1 = createTestOutputContext();
  OutputDescriptor e3OutDesc2 = createTestOutputDescriptor();
  dist.requestMemory(5000, e3Callback, e3OutputContext1, e3OutDesc2);
  
  dist.makeInitialAllocations();
  
  // Total available: 70% of 10K = 7000
  // 3 requests - 10K, 10K, 5K
  // Scale down to - 2800, 2800, 1400
  assertEquals(2800, e1Callback.assigned);
  assertEquals(2800, e2Callback.assigned);
  assertEquals(1400, e3Callback.assigned);
}
 
Example #27
Source File: UnorderedPartitionedKVEdgeConfig.java    From tez with Apache License 2.0 5 votes vote down vote up
/**
 * This is a convenience method for creating an Edge descriptor based on the specified
 * EdgeManagerDescriptor.
 *
 * @param edgeManagerDescriptor the custom edge specification
 * @return an {@link org.apache.tez.dag.api.EdgeProperty} instance
 */
public EdgeProperty createDefaultCustomEdgeProperty(EdgeManagerPluginDescriptor edgeManagerDescriptor) {
  Objects.requireNonNull(edgeManagerDescriptor, "EdgeManagerDescriptor cannot be null");
  EdgeProperty edgeProperty =
      EdgeProperty.create(edgeManagerDescriptor, EdgeProperty.DataSourceType.PERSISTED,
          EdgeProperty.SchedulingType.SEQUENTIAL,
          OutputDescriptor.create(getOutputClassName()).setUserPayload(getOutputPayload()),
          InputDescriptor.create(getInputClassName()).setUserPayload(getInputPayload()));
  Utils.setEdgePropertyHistoryText(this, edgeProperty);
  return edgeProperty;
}
 
Example #28
Source File: UnorderedKVEdgeConfig.java    From tez with Apache License 2.0 5 votes vote down vote up
/**
 * This is a convenience method for the typical usage of this edge, and creates an instance of
 * {@link org.apache.tez.dag.api.EdgeProperty} which is likely to be used. </p>
 * If custom edge properties are required, the methods to get the relevant payloads should be
 * used. </p>
 * * In this case - DataMovementType.BROADCAST, EdgeProperty.DataSourceType.PERSISTED,
 * EdgeProperty.SchedulingType.SEQUENTIAL
 *
 * @return an {@link org.apache.tez.dag.api.EdgeProperty} instance
 */
public EdgeProperty createDefaultBroadcastEdgeProperty() {
  EdgeProperty edgeProperty = EdgeProperty.create(EdgeProperty.DataMovementType.BROADCAST,
      EdgeProperty.DataSourceType.PERSISTED, EdgeProperty.SchedulingType.SEQUENTIAL,
      OutputDescriptor.create(
          getOutputClassName()).setUserPayload(getOutputPayload()),
      InputDescriptor.create(
          getInputClassName()).setUserPayload(getInputPayload()));
  Utils.setEdgePropertyHistoryText(this, edgeProperty);
  return edgeProperty;
}
 
Example #29
Source File: UnorderedKVEdgeConfig.java    From tez with Apache License 2.0 5 votes vote down vote up
/**
 * This is a convenience method for the typical usage of this edge, and creates an instance of
 * {@link org.apache.tez.dag.api.EdgeProperty} which is likely to be used. </p>
 * If custom edge properties are required, the methods to get the relevant payloads should be
 * used. </p>
 * * In this case - DataMovementType.ONE_TO_ONE, EdgeProperty.DataSourceType.PERSISTED,
 * EdgeProperty.SchedulingType.SEQUENTIAL
 *
 * @return an {@link org.apache.tez.dag.api.EdgeProperty} instance
 */
public EdgeProperty createDefaultOneToOneEdgeProperty() {
  EdgeProperty edgeProperty = EdgeProperty.create(EdgeProperty.DataMovementType.ONE_TO_ONE,
      EdgeProperty.DataSourceType.PERSISTED, EdgeProperty.SchedulingType.SEQUENTIAL,
      OutputDescriptor.create(
          getOutputClassName()).setUserPayload(getOutputPayload()),
      InputDescriptor.create(
          getInputClassName()).setUserPayload(getInputPayload()));
  Utils.setEdgePropertyHistoryText(this, edgeProperty);
  return edgeProperty;
}
 
Example #30
Source File: UnorderedKVEdgeConfig.java    From tez with Apache License 2.0 5 votes vote down vote up
/**
 * This is a convenience method for creating an Edge descriptor based on the specified
 * EdgeManagerDescriptor.
 *
 * @param edgeManagerDescriptor the custom edge specification
 * @return an {@link org.apache.tez.dag.api.EdgeProperty} instance
 */
public EdgeProperty createDefaultCustomEdgeProperty(EdgeManagerPluginDescriptor edgeManagerDescriptor) {
  Objects.requireNonNull(edgeManagerDescriptor, "EdgeManagerDescriptor cannot be null");
  EdgeProperty edgeProperty =
      EdgeProperty.create(edgeManagerDescriptor, EdgeProperty.DataSourceType.PERSISTED,
          EdgeProperty.SchedulingType.SEQUENTIAL,
          OutputDescriptor.create(getOutputClassName()).setUserPayload(getOutputPayload()),
          InputDescriptor.create(getInputClassName()).setUserPayload(getInputPayload()));
  Utils.setEdgePropertyHistoryText(this, edgeProperty);
  return edgeProperty;
}