Java Code Examples for org.apache.tez.runtime.library.common.ConfigUtils#getIntermediateOutputValueClass()

The following examples show how to use org.apache.tez.runtime.library.common.ConfigUtils#getIntermediateOutputValueClass() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FileBasedKVWriter.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
public FileBasedKVWriter(TezOutputContext outputContext, Configuration conf) throws IOException {
  this.conf = conf;

  this.outputRecordsCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_RECORDS);
  this.outputBytesCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES);
  this.outputBytesCounterWithOverhead = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
  this.outputMaterializedBytesCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);

  this.rfs = ((LocalFileSystem) FileSystem.getLocal(this.conf)).getRaw();

  // Setup serialization
  keyClass = ConfigUtils.getIntermediateOutputKeyClass(this.conf);
  valClass = ConfigUtils.getIntermediateOutputValueClass(this.conf);

  // Setup compression
  if (ConfigUtils.shouldCompressIntermediateOutput(this.conf)) {
    Class<? extends CompressionCodec> codecClass = ConfigUtils
        .getIntermediateOutputCompressorClass(this.conf, DefaultCodec.class);
    codec = ReflectionUtils.newInstance(codecClass, this.conf);
  } else {
    codec = null;
  }

  this.ouputFileManager = TezRuntimeUtils.instantiateTaskOutputManager(conf,
      outputContext);
  LOG.info("Created KVWriter -> " + "compressionCodec: " + (codec == null ? "NoCompressionCodec"
      : codec.getClass().getName()));

  this.outputPath = ouputFileManager.getOutputFileForWrite();
  LOG.info("Writing data file: " + outputPath);

  // TODO NEWTEZ Consider making the buffer size configurable. Also consider
  // setting up an in-memory buffer which is occasionally flushed to disk so
  // that the output does not block.

  // TODO NEWTEZ maybe use appropriate counter
  this.writer = new IFile.Writer(conf, rfs, outputPath, keyClass, valClass,
      codec, null, outputBytesCounter);
}
 
Example 2
Source File: MRCombiner.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
public MRCombiner(TezTaskContext taskContext) throws IOException {
  this.conf = TezUtils.createConfFromUserPayload(taskContext.getUserPayload());

  assert(taskContext instanceof TezInputContext || taskContext instanceof TezOutputContext);
  if (taskContext instanceof TezOutputContext) {
    this.keyClass = ConfigUtils.getIntermediateOutputKeyClass(conf);
    this.valClass = ConfigUtils.getIntermediateOutputValueClass(conf);
    this.comparator = ConfigUtils.getIntermediateOutputKeyComparator(conf);
    this.reporter = new MRTaskReporter((TezOutputContext)taskContext);
  } else {
    this.keyClass = ConfigUtils.getIntermediateInputKeyClass(conf);
    this.valClass = ConfigUtils.getIntermediateInputValueClass(conf);
    this.comparator = ConfigUtils.getIntermediateInputKeyComparator(conf);
    this.reporter = new MRTaskReporter((TezInputContext)taskContext);
  }

  this.useNewApi = ConfigUtils.useNewApi(conf);
  
  combineInputKeyCounter = taskContext.getCounters().findCounter(TaskCounter.COMBINE_INPUT_RECORDS);
  combineInputValueCounter = taskContext.getCounters().findCounter(TaskCounter.COMBINE_OUTPUT_RECORDS);
  
  boolean isMap = conf.getBoolean(MRConfig.IS_MAP_PROCESSOR,false);
  this.mrTaskAttemptID = new TaskAttemptID(
      new TaskID(String.valueOf(taskContext.getApplicationId()
          .getClusterTimestamp()), taskContext.getApplicationId().getId(),
          isMap ? TaskType.MAP : TaskType.REDUCE,
          taskContext.getTaskIndex()), taskContext.getTaskAttemptNumber());
  
  LOG.info("Using combineKeyClass: " + keyClass + ", combineValueClass: " + valClass + ", combineComparator: " +comparator + ", useNewApi: " + useNewApi);
}
 
Example 3
Source File: MRCombiner.java    From tez with Apache License 2.0 5 votes vote down vote up
public MRCombiner(TaskContext taskContext) throws IOException {
  final Configuration userConf = TezUtils.createConfFromUserPayload(taskContext.getUserPayload());
  useNewApi = ConfigUtils.useNewApi(userConf);
  if (useNewApi) {
    conf = new JobConf(userConf);
  } else {
    conf = userConf;
  }

  assert(taskContext instanceof InputContext || taskContext instanceof OutputContext);
  if (taskContext instanceof OutputContext) {
    this.keyClass = ConfigUtils.getIntermediateOutputKeyClass(conf);
    this.valClass = ConfigUtils.getIntermediateOutputValueClass(conf);
    this.comparator = ConfigUtils.getIntermediateOutputKeyComparator(conf);
    this.reporter = new MRTaskReporter((OutputContext)taskContext);
  } else {
    this.keyClass = ConfigUtils.getIntermediateInputKeyClass(conf);
    this.valClass = ConfigUtils.getIntermediateInputValueClass(conf);
    this.comparator = ConfigUtils.getIntermediateInputKeyComparator(conf);
    this.reporter = new MRTaskReporter((InputContext)taskContext);
  }

  combineInputRecordsCounter = taskContext.getCounters().findCounter(TaskCounter.COMBINE_INPUT_RECORDS);
  combineOutputRecordsCounter = taskContext.getCounters().findCounter(TaskCounter.COMBINE_OUTPUT_RECORDS);
  
  boolean isMap = conf.getBoolean(MRConfig.IS_MAP_PROCESSOR,false);
  this.mrTaskAttemptID = new TaskAttemptID(
      new TaskID(String.valueOf(taskContext.getApplicationId()
          .getClusterTimestamp()), taskContext.getApplicationId().getId(),
          isMap ? TaskType.MAP : TaskType.REDUCE,
          taskContext.getTaskIndex()), taskContext.getTaskAttemptNumber());
  
  LOG.info("Using combineKeyClass: " + keyClass + ", combineValueClass: " + valClass + ", combineComparator: " +comparator + ", useNewApi: " + useNewApi);
}
 
Example 4
Source File: ExternalSorter.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
public ExternalSorter(TezOutputContext outputContext, Configuration conf, int numOutputs,
    long initialMemoryAvailable) throws IOException {
  this.outputContext = outputContext;
  this.conf = conf;
  this.partitions = numOutputs;

  rfs = ((LocalFileSystem)FileSystem.getLocal(this.conf)).getRaw();

  int assignedMb = (int) (initialMemoryAvailable >> 20);
  if (assignedMb <= 0) {
    if (initialMemoryAvailable > 0) { // Rounded down to 0MB - may be > 0 && < 1MB
      this.availableMemoryMb = 1;
      LOG.warn("initialAvailableMemory: " + initialMemoryAvailable
          + " is too low. Rounding to 1 MB");
    } else {
      throw new RuntimeException("InitialMemoryAssigned is <= 0: " + initialMemoryAvailable);
    }
  } else {
    this.availableMemoryMb = assignedMb;
  }

  // sorter
  sorter = ReflectionUtils.newInstance(this.conf.getClass(
      TezJobConfig.TEZ_RUNTIME_INTERNAL_SORTER_CLASS, QuickSort.class,
      IndexedSorter.class), this.conf);

  comparator = ConfigUtils.getIntermediateOutputKeyComparator(this.conf);

  // k/v serialization
  keyClass = ConfigUtils.getIntermediateOutputKeyClass(this.conf);
  valClass = ConfigUtils.getIntermediateOutputValueClass(this.conf);
  serializationFactory = new SerializationFactory(this.conf);
  keySerializer = serializationFactory.getSerializer(keyClass);
  valSerializer = serializationFactory.getSerializer(valClass);

  //    counters    
  mapOutputByteCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES);
  mapOutputRecordCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_RECORDS);
  outputBytesWithOverheadCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
  fileOutputByteCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);
  spilledRecordsCounter = outputContext.getCounters().findCounter(TaskCounter.SPILLED_RECORDS);
  additionalSpillBytesWritten = outputContext.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN);
  additionalSpillBytesRead = outputContext.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ);
  numAdditionalSpills = outputContext.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT);

  // compression
  if (ConfigUtils.shouldCompressIntermediateOutput(this.conf)) {
    Class<? extends CompressionCodec> codecClass =
        ConfigUtils.getIntermediateOutputCompressorClass(this.conf, DefaultCodec.class);
    codec = ReflectionUtils.newInstance(codecClass, this.conf);
  } else {
    codec = null;
  }

  this.ifileReadAhead = this.conf.getBoolean(
      TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD,
      TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_DEFAULT);
  if (this.ifileReadAhead) {
    this.ifileReadAheadLength = conf.getInt(
        TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES,
        TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES_DEFAULT);
  } else {
    this.ifileReadAheadLength = 0;
  }
  this.ifileBufferSize = conf.getInt("io.file.buffer.size",
      TezJobConfig.TEZ_RUNTIME_IFILE_BUFFER_SIZE_DEFAULT);

  
  // Task outputs
  mapOutputFile = TezRuntimeUtils.instantiateTaskOutputManager(conf, outputContext);
  
  LOG.info("Instantiating Partitioner: [" + conf.get(TezJobConfig.TEZ_RUNTIME_PARTITIONER_CLASS) + "]");
  this.conf.setInt(TezRuntimeFrameworkConfigs.TEZ_RUNTIME_NUM_EXPECTED_PARTITIONS, this.partitions);
  this.partitioner = TezRuntimeUtils.instantiatePartitioner(this.conf);
  this.combiner = TezRuntimeUtils.instantiateCombiner(this.conf, outputContext);
}
 
Example 5
Source File: BaseUnorderedPartitionedKVWriter.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
public BaseUnorderedPartitionedKVWriter(TezOutputContext outputContext, Configuration conf, int numOutputs) {
  this.outputContext = outputContext;
  this.conf = conf;
  this.numPartitions = numOutputs;
  
  // k/v serialization
  keyClass = ConfigUtils.getIntermediateOutputKeyClass(this.conf);
  valClass = ConfigUtils.getIntermediateOutputValueClass(this.conf);
  serializationFactory = new SerializationFactory(this.conf);
  keySerializer = serializationFactory.getSerializer(keyClass);
  valSerializer = serializationFactory.getSerializer(valClass);
  
  outputRecordBytesCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES);
  outputRecordsCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_RECORDS);
  outputBytesWithOverheadCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
  fileOutputBytesCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);
  spilledRecordsCounter = outputContext.getCounters().findCounter(TaskCounter.SPILLED_RECORDS);
  additionalSpillBytesWritternCounter = outputContext.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN);
  additionalSpillBytesReadCounter = outputContext.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ);
  numAdditionalSpillsCounter = outputContext.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT);
  
  // compression
  if (ConfigUtils.shouldCompressIntermediateOutput(this.conf)) {
    Class<? extends CompressionCodec> codecClass =
        ConfigUtils.getIntermediateOutputCompressorClass(this.conf, DefaultCodec.class);
    codec = ReflectionUtils.newInstance(codecClass, this.conf);
  } else {
    codec = null;
  }
  
  this.ifileReadAhead = this.conf.getBoolean(
      TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD,
      TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_DEFAULT);
  if (this.ifileReadAhead) {
    this.ifileReadAheadLength = conf.getInt(
        TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES,
        TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES_DEFAULT);
  } else {
    this.ifileReadAheadLength = 0;
  }
  this.ifileBufferSize = conf.getInt("io.file.buffer.size",
      TezJobConfig.TEZ_RUNTIME_IFILE_BUFFER_SIZE_DEFAULT);
  
  LOG.info("Instantiating Partitioner: [" + conf.get(TezJobConfig.TEZ_RUNTIME_PARTITIONER_CLASS) + "]");
  try {
    this.partitioner = TezRuntimeUtils.instantiatePartitioner(this.conf);
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
  outputFileHandler = TezRuntimeUtils.instantiateTaskOutputManager(conf, outputContext);
}