Java Code Examples for org.apache.tez.dag.api.EdgeProperty.DataMovementType#BROADCAST

The following examples show how to use org.apache.tez.dag.api.EdgeProperty.DataMovementType#BROADCAST . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestDAGVerify.java    From incubator-tez with Apache License 2.0 6 votes vote down vote up
@Test(timeout = 5000)
public void testVerifyBroadcast() {
  Vertex v1 = new Vertex("v1",
      new ProcessorDescriptor(dummyProcessorClassName),
      dummyTaskCount, dummyTaskResource);
  Vertex v2 = new Vertex("v2",
      new ProcessorDescriptor("MapProcessor"),
      dummyTaskCount, dummyTaskResource);
  Edge e1 = new Edge(v1, v2,
      new EdgeProperty(DataMovementType.BROADCAST, 
          DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, 
          new OutputDescriptor(dummyOutputClassName),
          new InputDescriptor(dummyInputClassName)));
  DAG dag = new DAG("testDag");
  dag.addVertex(v1);
  dag.addVertex(v2);
  dag.addEdge(e1);
  dag.verify();
}
 
Example 2
Source File: TezCompilerUtil.java    From spork with Apache License 2.0 5 votes vote down vote up
static public void configureValueOnlyTupleOutput(TezEdgeDescriptor edge, DataMovementType dataMovementType) {
    edge.dataMovementType = dataMovementType;
    if (dataMovementType == DataMovementType.BROADCAST || dataMovementType == DataMovementType.ONE_TO_ONE) {
        edge.outputClassName = UnorderedKVOutput.class.getName();
        edge.inputClassName = UnorderedKVInput.class.getName();
    } else if (dataMovementType == DataMovementType.SCATTER_GATHER) {
        edge.outputClassName = UnorderedPartitionedKVOutput.class.getName();
        edge.inputClassName = UnorderedKVInput.class.getName();
        edge.partitionerClass = RoundRobinPartitioner.class;
    }
    edge.setIntermediateOutputKeyClass(POValueOutputTez.EmptyWritable.class.getName());
    edge.setIntermediateOutputValueClass(TUPLE_CLASS);
}
 
Example 3
Source File: DagTypeConverters.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
public static DataMovementType convertFromDAGPlan(PlanEdgeDataMovementType type){
  switch(type){
    case ONE_TO_ONE : return DataMovementType.ONE_TO_ONE;
    case BROADCAST : return DataMovementType.BROADCAST;
    case SCATTER_GATHER : return DataMovementType.SCATTER_GATHER;
    default : throw new IllegalArgumentException("unknown 'dataMovementType': " + type);
  }
}
 
Example 4
Source File: DagTypeConverters.java    From tez with Apache License 2.0 5 votes vote down vote up
public static DataMovementType convertFromDAGPlan(PlanEdgeDataMovementType type){
  switch(type){
    case ONE_TO_ONE : return DataMovementType.ONE_TO_ONE;
    case BROADCAST : return DataMovementType.BROADCAST;
    case SCATTER_GATHER : return DataMovementType.SCATTER_GATHER;
    case CUSTOM : return DataMovementType.CUSTOM;
    default : throw new IllegalArgumentException("unknown 'dataMovementType': " + type);
  }
}
 
Example 5
Source File: TezDagBuilder.java    From spork with Apache License 2.0 4 votes vote down vote up
/**
 * Return EdgeProperty that connects two vertices.
 *
 * @param from
 * @param to
 * @return EdgeProperty
 * @throws IOException
 */
private EdgeProperty newEdge(TezOperator from, TezOperator to)
        throws IOException {
    TezEdgeDescriptor edge = to.inEdges.get(from.getOperatorKey());
    PhysicalPlan combinePlan = edge.combinePlan;

    InputDescriptor in = InputDescriptor.create(edge.inputClassName);
    OutputDescriptor out = OutputDescriptor.create(edge.outputClassName);

    Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties(), false);
    if (!combinePlan.isEmpty()) {
        addCombiner(combinePlan, to, conf);
    }

    List<POLocalRearrangeTez> lrs = PlanHelper.getPhysicalOperators(from.plan,
            POLocalRearrangeTez.class);

    for (POLocalRearrangeTez lr : lrs) {
        if (lr.getOutputKey().equals(to.getOperatorKey().toString())) {
            byte keyType = lr.getKeyType();
            setIntermediateOutputKeyValue(keyType, conf, to, lr.isConnectedToPackage());
            // In case of secondary key sort, main key type is the actual key type
            conf.set("pig.reduce.key.type", Byte.toString(lr.getMainKeyType()));
            break;
        }
    }

    conf.setIfUnset(TezRuntimeConfiguration.TEZ_RUNTIME_PARTITIONER_CLASS,
            MRPartitioner.class.getName());

    if (edge.getIntermediateOutputKeyClass() != null) {
        conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS,
                edge.getIntermediateOutputKeyClass());
    }

    if (edge.getIntermediateOutputValueClass() != null) {
        conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS,
                edge.getIntermediateOutputValueClass());
    }

    if (edge.getIntermediateOutputKeyComparatorClass() != null) {
        conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_COMPARATOR_CLASS,
                edge.getIntermediateOutputKeyComparatorClass());
    }

    conf.setBoolean(MRConfiguration.MAPPER_NEW_API, true);
    conf.set("pig.pigContext", ObjectSerializer.serialize(pc));
    conf.set("udf.import.list",
            ObjectSerializer.serialize(PigContext.getPackageImportList()));

    if(to.isGlobalSort() || to.isLimitAfterSort()){
        conf.set("pig.sortOrder",
                ObjectSerializer.serialize(to.getSortOrder()));
    }

    if (edge.isUseSecondaryKey()) {
        conf.set("pig.secondarySortOrder",
                ObjectSerializer.serialize(edge.getSecondarySortOrder()));
        conf.set(org.apache.hadoop.mapreduce.MRJobConfig.PARTITIONER_CLASS_ATTR,
                SecondaryKeyPartitioner.class.getName());
        // These needs to be on the vertex as well for POShuffleTezLoad to pick it up.
        // Tez framework also expects this to be per vertex and not edge. IFile.java picks
        // up keyClass and valueClass from vertex config. TODO - check with Tez folks
        // In MR - job.setSortComparatorClass() or MRJobConfig.KEY_COMPARATOR
        conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_COMPARATOR_CLASS,
                PigSecondaryKeyComparator.class.getName());
        // In MR - job.setOutputKeyClass() or MRJobConfig.OUTPUT_KEY_CLASS
        conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, NullableTuple.class.getName());
        setGroupingComparator(conf, PigSecondaryKeyGroupComparator.class.getName());
    }

    if (edge.partitionerClass != null) {
        conf.set(org.apache.hadoop.mapreduce.MRJobConfig.PARTITIONER_CLASS_ATTR,
                edge.partitionerClass.getName());
    }

    conf.set("udf.import.list",
            ObjectSerializer.serialize(PigContext.getPackageImportList()));

    MRToTezHelper.processMRSettings(conf, globalConf);

    String historyString = convertToHistoryText("", conf);
    in.setUserPayload(TezUtils.createUserPayloadFromConf(conf)).setHistoryText(historyString);
    out.setUserPayload(TezUtils.createUserPayloadFromConf(conf)).setHistoryText(historyString);

    if (edge.dataMovementType!=DataMovementType.BROADCAST && to.getEstimatedParallelism()!=-1 && (to.isGlobalSort()||to.isSkewedJoin())) {
        // Use custom edge
        return EdgeProperty.create((EdgeManagerPluginDescriptor)null,
                edge.dataSourceType, edge.schedulingType, out, in);
        }

    return EdgeProperty.create(edge.dataMovementType, edge.dataSourceType,
            edge.schedulingType, out, in);
}