Java Code Examples for org.apache.tez.runtime.library.common.ConfigUtils#getIntermediateInputCompressorClass()

The following examples show how to use org.apache.tez.runtime.library.common.ConfigUtils#getIntermediateInputCompressorClass() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ShuffledUnorderedKVInput.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
@Override
public synchronized void start() throws IOException {
  if (!isStarted.get()) {
    ////// Initial configuration
    memoryUpdateCallbackHandler.validateUpdateReceived();
    CompressionCodec codec;
    if (ConfigUtils.isIntermediateInputCompressed(conf)) {
      Class<? extends CompressionCodec> codecClass = ConfigUtils
          .getIntermediateInputCompressorClass(conf, DefaultCodec.class);
      codec = ReflectionUtils.newInstance(codecClass, conf);
    } else {
      codec = null;
    }

    boolean ifileReadAhead = conf.getBoolean(TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD,
        TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_DEFAULT);
    int ifileReadAheadLength = 0;
    int ifileBufferSize = 0;

    if (ifileReadAhead) {
      ifileReadAheadLength = conf.getInt(TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES,
          TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES_DEFAULT);
    }
    ifileBufferSize = conf.getInt("io.file.buffer.size",
        TezJobConfig.TEZ_RUNTIME_IFILE_BUFFER_SIZE_DEFAULT);

    this.inputManager = new SimpleFetchedInputAllocator(getContext().getUniqueIdentifier(), conf,
        getContext().getTotalMemoryAvailableToTask(),
        memoryUpdateCallbackHandler.getMemoryAssigned());

    this.shuffleManager = new ShuffleManager(getContext(), conf, getNumPhysicalInputs(), ifileBufferSize,
        ifileReadAhead, ifileReadAheadLength, codec, inputManager);

    this.inputEventHandler = new ShuffleInputEventHandlerImpl(getContext(), shuffleManager,
        inputManager, codec, ifileReadAhead, ifileReadAheadLength);

    ////// End of Initial configuration

    this.shuffleManager.run();
    this.kvReader = createReader(inputRecordCounter, codec,
        ifileBufferSize, ifileReadAhead, ifileReadAheadLength);
    List<Event> pending = new LinkedList<Event>();
    pendingEvents.drainTo(pending);
    if (pending.size() > 0) {
      LOG.info("NoAutoStart delay in processing first event: "
          + (System.currentTimeMillis() - firstEventReceivedTime));
      inputEventHandler.handleEvents(pending);
    }
    isStarted.set(true);
  }
}
 
Example 2
Source File: LocalShuffle.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
public LocalShuffle(TezInputContext inputContext, Configuration conf, int numInputs) throws IOException {
   this.inputContext = inputContext;
   this.conf = conf;
   this.numInputs = numInputs;
   
   this.keyClass = ConfigUtils.getIntermediateInputKeyClass(conf);
   this.valClass = ConfigUtils.getIntermediateInputValueClass(conf);
   this.comparator = ConfigUtils.getIntermediateInputKeyComparator(conf);
   
   this.sortFactor =
       conf.getInt(
           TezJobConfig.TEZ_RUNTIME_IO_SORT_FACTOR, 
           TezJobConfig.TEZ_RUNTIME_IO_SORT_FACTOR_DEFAULT);
   
   this.rfs = FileSystem.getLocal(conf).getRaw();

   this.spilledRecordsCounter = inputContext.getCounters().findCounter(TaskCounter.SPILLED_RECORDS);
   
// compression
   if (ConfigUtils.isIntermediateInputCompressed(conf)) {
     Class<? extends CompressionCodec> codecClass =
         ConfigUtils.getIntermediateInputCompressorClass(conf, DefaultCodec.class);
     this.codec = ReflectionUtils.newInstance(codecClass, conf);
   } else {
     this.codec = null;
   }
   this.ifileReadAhead = conf.getBoolean(
       TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD,
       TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_DEFAULT);
   if (this.ifileReadAhead) {
     this.ifileReadAheadLength = conf.getInt(
         TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES,
         TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES_DEFAULT);
   } else {
     this.ifileReadAheadLength = 0;
   }
   this.ifileBufferSize = conf.getInt("io.file.buffer.size",
       TezJobConfig.TEZ_RUNTIME_IFILE_BUFFER_SIZE_DEFAULT);
   
   // Always local
   this.mapOutputFile = new TezLocalTaskOutputFiles(conf, inputContext.getUniqueIdentifier());
 }
 
Example 3
Source File: UnorderedKVInput.java    From tez with Apache License 2.0 4 votes vote down vote up
@Override
public synchronized void start() throws IOException {
  if (!isStarted.get()) {
    ////// Initial configuration
    memoryUpdateCallbackHandler.validateUpdateReceived();
    CompressionCodec codec;
    if (ConfigUtils.isIntermediateInputCompressed(conf)) {
      Class<? extends CompressionCodec> codecClass = ConfigUtils
          .getIntermediateInputCompressorClass(conf, DefaultCodec.class);
      codec = ReflectionUtils.newInstance(codecClass, conf);
    } else {
      codec = null;
    }

    boolean compositeFetch = ShuffleUtils.isTezShuffleHandler(conf);
    boolean ifileReadAhead = conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD,
        TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD_DEFAULT);
    int ifileReadAheadLength = 0;
    int ifileBufferSize = 0;

    if (ifileReadAhead) {
      ifileReadAheadLength = conf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD_BYTES,
          TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD_BYTES_DEFAULT);
    }
    ifileBufferSize = conf.getInt("io.file.buffer.size",
        TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_BUFFER_SIZE_DEFAULT);

    this.inputManager = new SimpleFetchedInputAllocator(
        TezUtilsInternal.cleanVertexName(getContext().getSourceVertexName()),
        getContext().getUniqueIdentifier(),
        getContext().getDagIdentifier(), conf,
        getContext().getTotalMemoryAvailableToTask(),
        memoryUpdateCallbackHandler.getMemoryAssigned());

    this.shuffleManager = new ShuffleManager(getContext(), conf, getNumPhysicalInputs(), ifileBufferSize,
        ifileReadAhead, ifileReadAheadLength, codec, inputManager);

    this.inputEventHandler = new ShuffleInputEventHandlerImpl(getContext(), shuffleManager,
        inputManager, codec, ifileReadAhead, ifileReadAheadLength, compositeFetch);

    ////// End of Initial configuration

    this.shuffleManager.run();
    this.kvReader = createReader(inputRecordCounter, codec,
        ifileBufferSize, ifileReadAhead, ifileReadAheadLength);
    List<Event> pending = new LinkedList<Event>();
    pendingEvents.drainTo(pending);
    if (pending.size() > 0) {
      if (LOG.isDebugEnabled()) {
        LOG.debug(getContext().getSourceVertexName() + ": " + "NoAutoStart delay in processing first event: "
            + (System.currentTimeMillis() - firstEventReceivedTime));
      }
      inputEventHandler.handleEvents(pending);
    }
    isStarted.set(true);
  }
}
 
Example 4
Source File: Shuffle.java    From tez with Apache License 2.0 4 votes vote down vote up
public Shuffle(InputContext inputContext, Configuration conf, int numInputs,
    long initialMemoryAvailable) throws IOException {
  this.inputContext = inputContext;
  this.conf = conf;

  this.srcNameTrimmed = TezUtilsInternal.cleanVertexName(inputContext.getSourceVertexName());
  

  if (ConfigUtils.isIntermediateInputCompressed(conf)) {
    Class<? extends CompressionCodec> codecClass =
        ConfigUtils.getIntermediateInputCompressorClass(conf, DefaultCodec.class);
    codec = ReflectionUtils.newInstance(codecClass, conf);
    // Work around needed for HADOOP-12191. Avoids the native initialization synchronization race
    codec.getDecompressorType();
  } else {
    codec = null;
  }
  this.ifileReadAhead = conf.getBoolean(
      TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD,
      TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD_DEFAULT);
  if (this.ifileReadAhead) {
    this.ifileReadAheadLength = conf.getInt(
        TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD_BYTES,
        TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD_BYTES_DEFAULT);
  } else {
    this.ifileReadAheadLength = 0;
  }
  
  Combiner combiner = TezRuntimeUtils.instantiateCombiner(conf, inputContext);
  
  FileSystem localFS = FileSystem.getLocal(this.conf);
  LocalDirAllocator localDirAllocator = 
      new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);

  // TODO TEZ Get rid of Map / Reduce references.
  TezCounter spilledRecordsCounter =
      inputContext.getCounters().findCounter(TaskCounter.SPILLED_RECORDS);
  TezCounter reduceCombineInputCounter =
      inputContext.getCounters().findCounter(TaskCounter.COMBINE_INPUT_RECORDS);
  TezCounter mergedMapOutputsCounter =
      inputContext.getCounters().findCounter(TaskCounter.MERGED_MAP_OUTPUTS);

  LOG.info(srcNameTrimmed + ": " + "Shuffle assigned with " + numInputs + " inputs" + ", codec: "
      + (codec == null ? "None" : codec.getClass().getName())
      + ", ifileReadAhead: " + ifileReadAhead);

  startTime = System.currentTimeMillis();
  merger = new MergeManager(
      this.conf,
      localFS,
      localDirAllocator,
      inputContext,
      combiner,
      spilledRecordsCounter,
      reduceCombineInputCounter,
      mergedMapOutputsCounter,
      this,
      initialMemoryAvailable,
      codec,
      ifileReadAhead,
      ifileReadAheadLength);

  scheduler = new ShuffleScheduler(
        this.inputContext,
        this.conf,
        numInputs,
        this,
        merger,
        merger,
        startTime,
        codec,
        ifileReadAhead,
        ifileReadAheadLength,
        srcNameTrimmed);

  this.mergePhaseTime = inputContext.getCounters().findCounter(TaskCounter.MERGE_PHASE_TIME);
  this.shufflePhaseTime = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_PHASE_TIME);



  eventHandler= new ShuffleInputEventHandlerOrderedGrouped(
      inputContext,
      scheduler,
      ShuffleUtils.isTezShuffleHandler(conf));
  
  ExecutorService rawExecutor = Executors.newFixedThreadPool(1, new ThreadFactoryBuilder()
      .setDaemon(true).setNameFormat("ShuffleAndMergeRunner {" + srcNameTrimmed + "}").build());


  executor = MoreExecutors.listeningDecorator(rawExecutor);
  runShuffleCallable = new RunShuffleCallable();
}