Java Code Examples for org.apache.tez.runtime.library.common.ConfigUtils#getIntermediateInputKeyComparator()

The following examples show how to use org.apache.tez.runtime.library.common.ConfigUtils#getIntermediateInputKeyComparator() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: OrderedGroupedKVInput.java    From tez with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings({ "rawtypes", "unchecked" })
protected synchronized void createValuesIterator()
    throws IOException {
  // Not used by ReduceProcessor
  RawComparator rawComparator = ConfigUtils.getIntermediateInputKeyComparator(conf);
  Class<?> keyClass = ConfigUtils.getIntermediateInputKeyClass(conf);
  Class<?> valClass = ConfigUtils.getIntermediateInputValueClass(conf);
  LOG.info(getContext().getSourceVertexName() + ": " + "creating ValuesIterator with "
      + "comparator=" + rawComparator.getClass().getName()
      + ", keyClass=" + keyClass.getName()
      + ", valClass=" + valClass.getName());

  vIter = new ValuesIterator(rawIter, rawComparator, keyClass, valClass,
      conf, inputKeyCounter, inputValueCounter);

}
 
Example 2
Source File: WeightedRangePartitionerTez.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public void init() {
    Map<String, Object> quantileMap = null;
    if (PigProcessor.sampleMap != null) {
        // We've collected sampleMap in PigProcessor
        quantileMap = PigProcessor.sampleMap;
    } else {
        LOG.warn("Quantiles map is empty");
        inited = true;
        return;
    }

    long start = System.currentTimeMillis();
    try {
        DataBag quantilesList = (DataBag) quantileMap.get(FindQuantiles.QUANTILES_LIST);
        InternalMap weightedPartsData = (InternalMap) quantileMap.get(FindQuantiles.WEIGHTED_PARTS);
        estimatedNumPartitions = (Integer)quantileMap.get(PigProcessor.ESTIMATED_NUM_PARALLELISM);
        convertToArray(quantilesList);
        for (Entry<Object, Object> ent : weightedPartsData.entrySet()) {
            Tuple key = (Tuple) ent.getKey(); // sample item which repeats
            float[] probVec = getProbVec((Tuple) ent.getValue());
            weightedParts.put(getPigNullableWritable(key),
                    new DiscreteProbabilitySampleGenerator(probVec));
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    comparator = ConfigUtils.getIntermediateInputKeyComparator(job);
    LOG.info("Initialized WeightedRangePartitionerTez. Time taken: " + (System.currentTimeMillis() - start));
    inited = true;
}
 
Example 3
Source File: ShuffledMergedInput.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings({ "rawtypes", "unchecked" })
protected synchronized void createValuesIterator()
    throws IOException {
  // Not used by ReduceProcessor
  vIter = new ValuesIterator(rawIter,
      (RawComparator) ConfigUtils.getIntermediateInputKeyComparator(conf),
      ConfigUtils.getIntermediateInputKeyClass(conf),
      ConfigUtils.getIntermediateInputValueClass(conf), conf, inputKeyCounter, inputValueCounter);

}
 
Example 4
Source File: MRCombiner.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
public MRCombiner(TezTaskContext taskContext) throws IOException {
  this.conf = TezUtils.createConfFromUserPayload(taskContext.getUserPayload());

  assert(taskContext instanceof TezInputContext || taskContext instanceof TezOutputContext);
  if (taskContext instanceof TezOutputContext) {
    this.keyClass = ConfigUtils.getIntermediateOutputKeyClass(conf);
    this.valClass = ConfigUtils.getIntermediateOutputValueClass(conf);
    this.comparator = ConfigUtils.getIntermediateOutputKeyComparator(conf);
    this.reporter = new MRTaskReporter((TezOutputContext)taskContext);
  } else {
    this.keyClass = ConfigUtils.getIntermediateInputKeyClass(conf);
    this.valClass = ConfigUtils.getIntermediateInputValueClass(conf);
    this.comparator = ConfigUtils.getIntermediateInputKeyComparator(conf);
    this.reporter = new MRTaskReporter((TezInputContext)taskContext);
  }

  this.useNewApi = ConfigUtils.useNewApi(conf);
  
  combineInputKeyCounter = taskContext.getCounters().findCounter(TaskCounter.COMBINE_INPUT_RECORDS);
  combineInputValueCounter = taskContext.getCounters().findCounter(TaskCounter.COMBINE_OUTPUT_RECORDS);
  
  boolean isMap = conf.getBoolean(MRConfig.IS_MAP_PROCESSOR,false);
  this.mrTaskAttemptID = new TaskAttemptID(
      new TaskID(String.valueOf(taskContext.getApplicationId()
          .getClusterTimestamp()), taskContext.getApplicationId().getId(),
          isMap ? TaskType.MAP : TaskType.REDUCE,
          taskContext.getTaskIndex()), taskContext.getTaskAttemptNumber());
  
  LOG.info("Using combineKeyClass: " + keyClass + ", combineValueClass: " + valClass + ", combineComparator: " +comparator + ", useNewApi: " + useNewApi);
}
 
Example 5
Source File: MRCombiner.java    From tez with Apache License 2.0 5 votes vote down vote up
public MRCombiner(TaskContext taskContext) throws IOException {
  final Configuration userConf = TezUtils.createConfFromUserPayload(taskContext.getUserPayload());
  useNewApi = ConfigUtils.useNewApi(userConf);
  if (useNewApi) {
    conf = new JobConf(userConf);
  } else {
    conf = userConf;
  }

  assert(taskContext instanceof InputContext || taskContext instanceof OutputContext);
  if (taskContext instanceof OutputContext) {
    this.keyClass = ConfigUtils.getIntermediateOutputKeyClass(conf);
    this.valClass = ConfigUtils.getIntermediateOutputValueClass(conf);
    this.comparator = ConfigUtils.getIntermediateOutputKeyComparator(conf);
    this.reporter = new MRTaskReporter((OutputContext)taskContext);
  } else {
    this.keyClass = ConfigUtils.getIntermediateInputKeyClass(conf);
    this.valClass = ConfigUtils.getIntermediateInputValueClass(conf);
    this.comparator = ConfigUtils.getIntermediateInputKeyComparator(conf);
    this.reporter = new MRTaskReporter((InputContext)taskContext);
  }

  combineInputRecordsCounter = taskContext.getCounters().findCounter(TaskCounter.COMBINE_INPUT_RECORDS);
  combineOutputRecordsCounter = taskContext.getCounters().findCounter(TaskCounter.COMBINE_OUTPUT_RECORDS);
  
  boolean isMap = conf.getBoolean(MRConfig.IS_MAP_PROCESSOR,false);
  this.mrTaskAttemptID = new TaskAttemptID(
      new TaskID(String.valueOf(taskContext.getApplicationId()
          .getClusterTimestamp()), taskContext.getApplicationId().getId(),
          isMap ? TaskType.MAP : TaskType.REDUCE,
          taskContext.getTaskIndex()), taskContext.getTaskAttemptNumber());
  
  LOG.info("Using combineKeyClass: " + keyClass + ", combineValueClass: " + valClass + ", combineComparator: " +comparator + ", useNewApi: " + useNewApi);
}
 
Example 6
Source File: ShuffledMergedInput.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("rawtypes")
public RawComparator getInputKeyComparator() {
  return (RawComparator) ConfigUtils.getIntermediateInputKeyComparator(conf);
}
 
Example 7
Source File: LocalShuffle.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
public LocalShuffle(TezInputContext inputContext, Configuration conf, int numInputs) throws IOException {
   this.inputContext = inputContext;
   this.conf = conf;
   this.numInputs = numInputs;
   
   this.keyClass = ConfigUtils.getIntermediateInputKeyClass(conf);
   this.valClass = ConfigUtils.getIntermediateInputValueClass(conf);
   this.comparator = ConfigUtils.getIntermediateInputKeyComparator(conf);
   
   this.sortFactor =
       conf.getInt(
           TezJobConfig.TEZ_RUNTIME_IO_SORT_FACTOR, 
           TezJobConfig.TEZ_RUNTIME_IO_SORT_FACTOR_DEFAULT);
   
   this.rfs = FileSystem.getLocal(conf).getRaw();

   this.spilledRecordsCounter = inputContext.getCounters().findCounter(TaskCounter.SPILLED_RECORDS);
   
// compression
   if (ConfigUtils.isIntermediateInputCompressed(conf)) {
     Class<? extends CompressionCodec> codecClass =
         ConfigUtils.getIntermediateInputCompressorClass(conf, DefaultCodec.class);
     this.codec = ReflectionUtils.newInstance(codecClass, conf);
   } else {
     this.codec = null;
   }
   this.ifileReadAhead = conf.getBoolean(
       TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD,
       TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_DEFAULT);
   if (this.ifileReadAhead) {
     this.ifileReadAheadLength = conf.getInt(
         TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES,
         TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES_DEFAULT);
   } else {
     this.ifileReadAheadLength = 0;
   }
   this.ifileBufferSize = conf.getInt("io.file.buffer.size",
       TezJobConfig.TEZ_RUNTIME_IFILE_BUFFER_SIZE_DEFAULT);
   
   // Always local
   this.mapOutputFile = new TezLocalTaskOutputFiles(conf, inputContext.getUniqueIdentifier());
 }
 
Example 8
Source File: OrderedGroupedKVInput.java    From tez with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("rawtypes")
public RawComparator getInputKeyComparator() {
  return (RawComparator) ConfigUtils.getIntermediateInputKeyComparator(conf);
}