Java Code Examples for org.apache.tez.dag.api.OutputDescriptor#create()

The following examples show how to use org.apache.tez.dag.api.OutputDescriptor#create() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CartesianProduct.java    From tez with Apache License 2.0 5 votes vote down vote up
private DAG createDAG(TezConfiguration tezConf) throws IOException {
  InputDescriptor inputDescriptor = InputDescriptor.create(FakeInput.class.getName());
  InputInitializerDescriptor inputInitializerDescriptor =
    InputInitializerDescriptor.create(FakeInputInitializer.class.getName());
  DataSourceDescriptor dataSourceDescriptor =
    DataSourceDescriptor.create(inputDescriptor, inputInitializerDescriptor, null);

  Vertex v1 = Vertex.create(VERTEX1, ProcessorDescriptor.create(TokenProcessor.class.getName()));
  v1.addDataSource(INPUT, dataSourceDescriptor);
  Vertex v2 = Vertex.create(VERTEX2, ProcessorDescriptor.create(TokenProcessor.class.getName()));
  v2.addDataSource(INPUT, dataSourceDescriptor);

  OutputDescriptor outputDescriptor = OutputDescriptor.create(FakeOutput.class.getName());
  OutputCommitterDescriptor outputCommitterDescriptor =
    OutputCommitterDescriptor.create(FakeOutputCommitter.class.getName());
  DataSinkDescriptor dataSinkDescriptor =
    DataSinkDescriptor.create(outputDescriptor, outputCommitterDescriptor, null);

  CartesianProductConfig cartesianProductConfig =
    new CartesianProductConfig(Arrays.asList(sourceVertices));
  UserPayload userPayload = cartesianProductConfig.toUserPayload(tezConf);

  Vertex v3 = Vertex.create(VERTEX3, ProcessorDescriptor.create(JoinProcessor.class.getName()));
  v3.addDataSink(OUTPUT, dataSinkDescriptor);
  v3.setVertexManagerPlugin(
    VertexManagerPluginDescriptor.create(CartesianProductVertexManager.class.getName())
                                 .setUserPayload(userPayload));

  EdgeManagerPluginDescriptor edgeManagerDescriptor =
    EdgeManagerPluginDescriptor.create(CartesianProductEdgeManager.class.getName());
  edgeManagerDescriptor.setUserPayload(userPayload);
  UnorderedPartitionedKVEdgeConfig edgeConf =
    UnorderedPartitionedKVEdgeConfig.newBuilder(Text.class.getName(), IntWritable.class.getName(),
      RoundRobinPartitioner.class.getName()).build();
  EdgeProperty edgeProperty = edgeConf.createDefaultCustomEdgeProperty(edgeManagerDescriptor);

  return DAG.create("CrossProduct").addVertex(v1).addVertex(v2).addVertex(v3)
    .addEdge(Edge.create(v1, v3, edgeProperty)).addEdge(Edge.create(v2, v3, edgeProperty));
}
 
Example 2
Source File: TestDAGRecovery2.java    From tez with Apache License 2.0 5 votes vote down vote up
@Test(timeout=120000)
public void testFailingCommitter() throws Exception {
  DAG dag = SimpleVTestDAG.createDAG("FailingCommitterDAG", null);
  OutputDescriptor od =
      OutputDescriptor.create(MultiAttemptDAG.NoOpOutput.class.getName());
  od.setUserPayload(UserPayload.create(ByteBuffer.wrap(
      new MultiAttemptDAG.FailingOutputCommitter.FailingOutputCommitterConfig(true)
          .toUserPayload())));
  OutputCommitterDescriptor ocd = OutputCommitterDescriptor.create(
      MultiAttemptDAG.FailingOutputCommitter.class.getName());
  dag.getVertex("v3").addDataSink("FailingOutput", DataSinkDescriptor.create(od, ocd, null));
  runDAGAndVerify(dag, State.FAILED);
}
 
Example 3
Source File: TestOutput.java    From tez with Apache License 2.0 5 votes vote down vote up
public static OutputDescriptor getOutputDesc(UserPayload payload) {
  OutputDescriptor desc = OutputDescriptor.create(TestOutput.class.getName());
  if (payload != null) {
    desc.setUserPayload(payload);
  }
  return desc;
}
 
Example 4
Source File: TezDagBuilder.java    From spork with Apache License 2.0 4 votes vote down vote up
/**
 * Return EdgeProperty that connects two vertices.
 *
 * @param from
 * @param to
 * @return EdgeProperty
 * @throws IOException
 */
private EdgeProperty newEdge(TezOperator from, TezOperator to)
        throws IOException {
    TezEdgeDescriptor edge = to.inEdges.get(from.getOperatorKey());
    PhysicalPlan combinePlan = edge.combinePlan;

    InputDescriptor in = InputDescriptor.create(edge.inputClassName);
    OutputDescriptor out = OutputDescriptor.create(edge.outputClassName);

    Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties(), false);
    if (!combinePlan.isEmpty()) {
        addCombiner(combinePlan, to, conf);
    }

    List<POLocalRearrangeTez> lrs = PlanHelper.getPhysicalOperators(from.plan,
            POLocalRearrangeTez.class);

    for (POLocalRearrangeTez lr : lrs) {
        if (lr.getOutputKey().equals(to.getOperatorKey().toString())) {
            byte keyType = lr.getKeyType();
            setIntermediateOutputKeyValue(keyType, conf, to, lr.isConnectedToPackage());
            // In case of secondary key sort, main key type is the actual key type
            conf.set("pig.reduce.key.type", Byte.toString(lr.getMainKeyType()));
            break;
        }
    }

    conf.setIfUnset(TezRuntimeConfiguration.TEZ_RUNTIME_PARTITIONER_CLASS,
            MRPartitioner.class.getName());

    if (edge.getIntermediateOutputKeyClass() != null) {
        conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS,
                edge.getIntermediateOutputKeyClass());
    }

    if (edge.getIntermediateOutputValueClass() != null) {
        conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS,
                edge.getIntermediateOutputValueClass());
    }

    if (edge.getIntermediateOutputKeyComparatorClass() != null) {
        conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_COMPARATOR_CLASS,
                edge.getIntermediateOutputKeyComparatorClass());
    }

    conf.setBoolean(MRConfiguration.MAPPER_NEW_API, true);
    conf.set("pig.pigContext", ObjectSerializer.serialize(pc));
    conf.set("udf.import.list",
            ObjectSerializer.serialize(PigContext.getPackageImportList()));

    if(to.isGlobalSort() || to.isLimitAfterSort()){
        conf.set("pig.sortOrder",
                ObjectSerializer.serialize(to.getSortOrder()));
    }

    if (edge.isUseSecondaryKey()) {
        conf.set("pig.secondarySortOrder",
                ObjectSerializer.serialize(edge.getSecondarySortOrder()));
        conf.set(org.apache.hadoop.mapreduce.MRJobConfig.PARTITIONER_CLASS_ATTR,
                SecondaryKeyPartitioner.class.getName());
        // These needs to be on the vertex as well for POShuffleTezLoad to pick it up.
        // Tez framework also expects this to be per vertex and not edge. IFile.java picks
        // up keyClass and valueClass from vertex config. TODO - check with Tez folks
        // In MR - job.setSortComparatorClass() or MRJobConfig.KEY_COMPARATOR
        conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_COMPARATOR_CLASS,
                PigSecondaryKeyComparator.class.getName());
        // In MR - job.setOutputKeyClass() or MRJobConfig.OUTPUT_KEY_CLASS
        conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, NullableTuple.class.getName());
        setGroupingComparator(conf, PigSecondaryKeyGroupComparator.class.getName());
    }

    if (edge.partitionerClass != null) {
        conf.set(org.apache.hadoop.mapreduce.MRJobConfig.PARTITIONER_CLASS_ATTR,
                edge.partitionerClass.getName());
    }

    conf.set("udf.import.list",
            ObjectSerializer.serialize(PigContext.getPackageImportList()));

    MRToTezHelper.processMRSettings(conf, globalConf);

    String historyString = convertToHistoryText("", conf);
    in.setUserPayload(TezUtils.createUserPayloadFromConf(conf)).setHistoryText(historyString);
    out.setUserPayload(TezUtils.createUserPayloadFromConf(conf)).setHistoryText(historyString);

    if (edge.dataMovementType!=DataMovementType.BROADCAST && to.getEstimatedParallelism()!=-1 && (to.isGlobalSort()||to.isSkewedJoin())) {
        // Use custom edge
        return EdgeProperty.create((EdgeManagerPluginDescriptor)null,
                edge.dataSourceType, edge.schedulingType, out, in);
        }

    return EdgeProperty.create(edge.dataMovementType, edge.dataSourceType,
            edge.schedulingType, out, in);
}
 
Example 5
Source File: TestTaskSpec.java    From tez with Apache License 2.0 4 votes vote down vote up
@Test (timeout = 5000)
public void testSerDe() throws IOException {
  ByteBuffer payload = null;
  ProcessorDescriptor procDesc = ProcessorDescriptor.create("proc").setUserPayload(
      UserPayload.create(payload)).setHistoryText("historyText");

  List<InputSpec> inputSpecs = new ArrayList<>();
  InputSpec inputSpec = new InputSpec("src1", InputDescriptor.create("inputClass"),10);
  inputSpecs.add(inputSpec);
  List<OutputSpec> outputSpecs = new ArrayList<>();
  OutputSpec outputSpec = new OutputSpec("dest1", OutputDescriptor.create("outputClass"), 999);
  outputSpecs.add(outputSpec);
  List<GroupInputSpec> groupInputSpecs = null;

  Configuration taskConf = new Configuration(false);
  taskConf.set("foo", "bar");

  TezTaskAttemptID taId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(
      TezVertexID.getInstance(TezDAGID.getInstance("1234", 1, 1), 1), 1), 1);
  TaskSpec taskSpec = new TaskSpec(taId, "dagName", "vName", -1, procDesc, inputSpecs, outputSpecs,
      groupInputSpecs, taskConf);

  ByteArrayOutputStream bos = new ByteArrayOutputStream();
  DataOutput out = new DataOutputStream(bos);
  taskSpec.write(out);

  TaskSpec deSerTaskSpec = new TaskSpec();
  ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray());
  DataInput in = new DataInputStream(bis);
  deSerTaskSpec.readFields(in);

  Assert.assertEquals(taskSpec.getDAGName(), deSerTaskSpec.getDAGName());
  Assert.assertEquals(taskSpec.getVertexName(), deSerTaskSpec.getVertexName());
  Assert.assertEquals(taskSpec.getVertexParallelism(), deSerTaskSpec.getVertexParallelism());
  Assert.assertEquals(taskSpec.getInputs().size(), deSerTaskSpec.getInputs().size());
  Assert.assertEquals(taskSpec.getOutputs().size(), deSerTaskSpec.getOutputs().size());
  Assert.assertNull(deSerTaskSpec.getGroupInputs());
  Assert.assertEquals(taskSpec.getInputs().get(0).getSourceVertexName(),
      deSerTaskSpec.getInputs().get(0).getSourceVertexName());
  Assert.assertEquals(taskSpec.getOutputs().get(0).getDestinationVertexName(),
      deSerTaskSpec.getOutputs().get(0).getDestinationVertexName());

  Assert.assertEquals(taskConf.get("foo"), deSerTaskSpec.getTaskConf().get("foo"));
}
 
Example 6
Source File: TestLogicalIOProcessorRuntimeTask.java    From tez with Apache License 2.0 4 votes vote down vote up
private List<OutputSpec> createOutputSpecList(String outputClassName) {
  OutputDescriptor outputtDesc = OutputDescriptor.create(outputClassName);
  OutputSpec outputSpec = new OutputSpec("outedge", outputtDesc, 1);
  return Lists.newArrayList(outputSpec);
}