org.apache.tez.common.counters.TaskCounter Java Exaples

Source File: TestHistoryParser.java From tez with Apache License 2.0

6 votes

private void isCountersSame(BaseInfo info1, BaseInfo info2) {
  isCounterSame(info1.getCounter(TaskCounter.ADDITIONAL_SPILL_COUNT.name()),
      info2.getCounter(TaskCounter.ADDITIONAL_SPILL_COUNT.name()));

  isCounterSame(info1.getCounter(TaskCounter.SPILLED_RECORDS.name()),
      info2.getCounter(TaskCounter.SPILLED_RECORDS.name()));

  isCounterSame(info1.getCounter(TaskCounter.OUTPUT_RECORDS.name()),
      info2.getCounter(TaskCounter.OUTPUT_RECORDS.name()));

  isCounterSame(info1.getCounter(TaskCounter.OUTPUT_BYTES.name()),
      info2.getCounter(TaskCounter.OUTPUT_BYTES.name()));

  isCounterSame(info1.getCounter(TaskCounter.OUTPUT_RECORDS.name()),
      info2.getCounter(TaskCounter.OUTPUT_RECORDS.name()));

  isCounterSame(info1.getCounter(TaskCounter.REDUCE_INPUT_GROUPS.name()),
      info2.getCounter(TaskCounter.REDUCE_INPUT_GROUPS.name()));

  isCounterSame(info1.getCounter(TaskCounter.REDUCE_INPUT_RECORDS.name()),
      info2.getCounter(TaskCounter.REDUCE_INPUT_RECORDS.name()));
}

Source File: ShuffledMergedInput.java From incubator-tez with Apache License 2.0

6 votes

@Override
public synchronized List<Event> initialize() throws IOException {
  this.conf = TezUtils.createConfFromUserPayload(getContext().getUserPayload());

  if (this.getNumPhysicalInputs() == 0) {
    getContext().requestInitialMemory(0l, null);
    isStarted.set(true);
    getContext().inputIsReady();
    LOG.info("input fetch not required since there are 0 physical inputs for input vertex: "
        + getContext().getSourceVertexName());
    return Collections.emptyList();
  }

  long initialMemoryRequest = Shuffle.getInitialMemoryRequirement(conf,
      getContext().getTotalMemoryAvailableToTask());
  this.memoryUpdateCallbackHandler = new MemoryUpdateCallbackHandler();
  getContext().requestInitialMemory(initialMemoryRequest, memoryUpdateCallbackHandler);

  this.inputKeyCounter = getContext().getCounters().findCounter(TaskCounter.REDUCE_INPUT_GROUPS);
  this.inputValueCounter = getContext().getCounters().findCounter(
      TaskCounter.REDUCE_INPUT_RECORDS);
  this.conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, getContext().getWorkDirs());

  return Collections.emptyList();
}

Source File: ShuffledUnorderedKVInput.java From incubator-tez with Apache License 2.0

6 votes

@Override
public synchronized List<Event> initialize() throws Exception {
  Preconditions.checkArgument(getNumPhysicalInputs() != -1, "Number of Inputs has not been set");
  this.conf = TezUtils.createConfFromUserPayload(getContext().getUserPayload());

  if (getNumPhysicalInputs() == 0) {
    getContext().requestInitialMemory(0l, null);
    isStarted.set(true);
    getContext().inputIsReady();
    LOG.info("input fetch not required since there are 0 physical inputs for input vertex: "
        + getContext().getSourceVertexName());
    return Collections.emptyList();
  } else {
    long initalMemReq = getInitialMemoryReq();
    memoryUpdateCallbackHandler = new MemoryUpdateCallbackHandler();
    this.getContext().requestInitialMemory(initalMemReq, memoryUpdateCallbackHandler);
  }

  this.conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, getContext().getWorkDirs());
  this.inputRecordCounter = getContext().getCounters().findCounter(
      TaskCounter.INPUT_RECORDS_PROCESSED);
  return Collections.emptyList();
}

Source File: TestUnorderedKVReader.java From tez with Apache License 2.0

6 votes

@Test(timeout = 5000)
public void testInterruptOnNext() throws IOException, InterruptedException {
  ShuffleManager shuffleManager = mock(ShuffleManager.class);

  // Simulate an interrupt while waiting for the next fetched input.
  doThrow(new InterruptedException()).when(shuffleManager).getNextInput();
  TezCounters counters = new TezCounters();
  TezCounter inputRecords = counters.findCounter(TaskCounter.INPUT_RECORDS_PROCESSED);
  UnorderedKVReader<Text, Text> reader =
      new UnorderedKVReader<Text, Text>(shuffleManager, defaultConf, null, false, -1, -1,
          inputRecords, mock(InputContext.class));

  try {
    reader.next();
    fail("No data available to reader. Should not be able to access any record");
  } catch (IOInterruptedException e) {
    // Expected exception. Any other should fail the test.
  }
}

Source File: TaskCounterUpdater.java From incubator-tez with Apache License 2.0

6 votes

/**
 * Update resource information counters
 */
void updateResourceCounters() {
  // Update generic resource counters
  updateHeapUsageCounter();

  // Updating resources specified in ResourceCalculatorPlugin
  if (pTree == null) {
    return;
  }
  pTree.updateProcessTree();
  long cpuTime = pTree.getCumulativeCpuTime();
  long pMem = pTree.getCumulativeRssmem();
  long vMem = pTree.getCumulativeVmem();
  // Remove the CPU time consumed previously by JVM reuse
  cpuTime -= initCpuCumulativeTime;
  tezCounters.findCounter(TaskCounter.CPU_MILLISECONDS).setValue(cpuTime);
  tezCounters.findCounter(TaskCounter.PHYSICAL_MEMORY_BYTES).setValue(pMem);
  tezCounters.findCounter(TaskCounter.VIRTUAL_MEMORY_BYTES).setValue(vMem);
}

Source File: UnorderedKVOutput.java From tez with Apache License 2.0

6 votes

@Override
public synchronized List<Event> close() throws Exception {
  List<Event> returnEvents = null;
  if (isStarted.get()) {
    //TODO: Do we need to support sending payloads via events?
    returnEvents = kvWriter.close();
    kvWriter = null;
  } else {
    LOG.warn(getContext().getDestinationVertexName() +
        ": Attempting to close output {} of type {} before it was started. Generating empty events",
        getContext().getDestinationVertexName(), this.getClass().getSimpleName());
    returnEvents = new LinkedList<Event>();
    ShuffleUtils
        .generateEventsForNonStartedOutput(returnEvents, getNumPhysicalOutputs(), getContext(),
            false, false, TezCommonUtils.newBestCompressionDeflater());
  }

  // This works for non-started outputs since new counters will be created with an initial value of 0
  long outputSize = getContext().getCounters().findCounter(TaskCounter.OUTPUT_BYTES).getValue();
  getContext().getStatisticsReporter().reportDataSize(outputSize);
  long outputRecords = getContext().getCounters()
      .findCounter(TaskCounter.OUTPUT_RECORDS).getValue();
  getContext().getStatisticsReporter().reportItemsProcessed(outputRecords);

  return returnEvents;
}

Source File: UnorderedPartitionedKVOutput.java From tez with Apache License 2.0

6 votes

@Override
public synchronized List<Event> close() throws Exception {
  List<Event> returnEvents = null;
  if (isStarted.get()) {
    returnEvents = kvWriter.close();
    kvWriter = null;
  } else {
    LOG.warn(getContext().getDestinationVertexName() +
        ": Attempting to close output {} of type {} before it was started. Generating empty events",
        getContext().getDestinationVertexName(), this.getClass().getSimpleName());
    returnEvents = new LinkedList<Event>();
    ShuffleUtils
        .generateEventsForNonStartedOutput(returnEvents, getNumPhysicalOutputs(), getContext(),
            false, true, TezCommonUtils.newBestCompressionDeflater());
  }

  // This works for non-started outputs since new counters will be created with an initial value of 0
  long outputSize = getContext().getCounters().findCounter(TaskCounter.OUTPUT_BYTES).getValue();
  getContext().getStatisticsReporter().reportDataSize(outputSize);
  long outputRecords = getContext().getCounters()
      .findCounter(TaskCounter.OUTPUT_RECORDS).getValue();
  getContext().getStatisticsReporter().reportItemsProcessed(outputRecords);

  return returnEvents;
}

Source File: OrderedGroupedKVInput.java From tez with Apache License 2.0

6 votes

@Override
public synchronized List<Event> close() throws IOException {
  if (this.getNumPhysicalInputs() != 0 && rawIter != null) {
    rawIter.close();
  }
  if (shuffle != null) {
    shuffle.shutdown();
  }
  
  long dataSize = getContext().getCounters()
      .findCounter(TaskCounter.SHUFFLE_BYTES_DECOMPRESSED).getValue();
  getContext().getStatisticsReporter().reportDataSize(dataSize);
  long inputRecords = getContext().getCounters()
      .findCounter(TaskCounter.REDUCE_INPUT_RECORDS).getValue();
  getContext().getStatisticsReporter().reportItemsProcessed(inputRecords);
  
  return Collections.emptyList();
}

Source File: UnorderedKVInput.java From tez with Apache License 2.0

6 votes

@Override
public synchronized List<Event> initialize() throws Exception {
  Preconditions.checkArgument(getNumPhysicalInputs() != -1, "Number of Inputs has not been set");
  this.conf = TezUtils.createConfFromBaseConfAndPayload(getContext());

  if (getNumPhysicalInputs() == 0) {
    getContext().requestInitialMemory(0l, null);
    isStarted.set(true);
    getContext().inputIsReady();
    LOG.info("input fetch not required since there are 0 physical inputs for input vertex: "
        + getContext().getSourceVertexName());
    return Collections.emptyList();
  } else {
    long initalMemReq = getInitialMemoryReq();
    memoryUpdateCallbackHandler = new MemoryUpdateCallbackHandler();
    this.getContext().requestInitialMemory(initalMemReq, memoryUpdateCallbackHandler);
  }

  this.conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, getContext().getWorkDirs());
  this.inputRecordCounter = getContext().getCounters().findCounter(
      TaskCounter.INPUT_RECORDS_PROCESSED);
  return Collections.emptyList();
}

Source File: UnorderedKVInput.java From tez with Apache License 2.0

6 votes

@Override
public synchronized List<Event> close() throws Exception {
  if (this.inputEventHandler != null) {
    this.inputEventHandler.logProgress(true);
  }

  if (this.shuffleManager != null) {
    this.shuffleManager.shutdown();
  }
  
  long dataSize = getContext().getCounters()
      .findCounter(TaskCounter.SHUFFLE_BYTES_DECOMPRESSED).getValue();
  getContext().getStatisticsReporter().reportDataSize(dataSize);
  long inputRecords = getContext().getCounters()
      .findCounter(TaskCounter.INPUT_RECORDS_PROCESSED).getValue();
  getContext().getStatisticsReporter().reportItemsProcessed(inputRecords);

  return null;
}

Source File: TestPipelinedSorter.java From tez with Apache License 2.0

6 votes

@Test
  public void testEmptyDataWithPipelinedShuffle() throws IOException {
    this.numOutputs = 1;
    this.initialAvailableMem = 1 *1024 * 1024;
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, false);
    conf.setInt(TezRuntimeConfiguration
        .TEZ_RUNTIME_PIPELINED_SORTER_MIN_BLOCK_SIZE_IN_MB, 1);
    PipelinedSorter sorter = new PipelinedSorter(this.outputContext, conf, numOutputs,
        initialAvailableMem);

    writeData(sorter, 0, 1<<20);

    // final merge is disabled. Final output file would not be populated in this case.
    assertTrue(sorter.finalOutputFile == null);
    TezCounter numShuffleChunks = outputContext.getCounters().findCounter(TaskCounter.SHUFFLE_CHUNK_COUNT);
//    assertTrue(sorter.getNumSpills() == numShuffleChunks.getValue());
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, true);

  }

Source File: TestPipelinedSorter.java From tez with Apache License 2.0

6 votes

@Test
public void testExceedsKVWithPipelinedShuffle() throws IOException {
  this.numOutputs = 1;
  this.initialAvailableMem = 1 *1024 * 1024;
  conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, false);
  conf.setInt(TezRuntimeConfiguration
      .TEZ_RUNTIME_PIPELINED_SORTER_MIN_BLOCK_SIZE_IN_MB, 1);
  PipelinedSorter sorter = new PipelinedSorter(this.outputContext, conf, numOutputs,
      initialAvailableMem);

  writeData(sorter, 5, 1<<20);

  // final merge is disabled. Final output file would not be populated in this case.
  assertTrue(sorter.finalOutputFile == null);
  TezCounter numShuffleChunks = outputContext.getCounters().findCounter(TaskCounter.SHUFFLE_CHUNK_COUNT);
  assertTrue(sorter.getNumSpills() == numShuffleChunks.getValue());
  conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, true);
}

Source File: TaskCounterUpdater.java From tez with Apache License 2.0

6 votes

/**
 * Update resource information counters
 */
void updateResourceCounters() {
  // Update generic resource counters
  updateHeapUsageCounter();

  // Updating resources specified in ResourceCalculatorPlugin
  if (pTree == null) {
    return;
  }
  pTree.updateProcessTree();
  long cpuTime = pTree.getCumulativeCpuTime();
  long pMem = pTree.getRssMemorySize();
  long vMem = pTree.getVirtualMemorySize();
  // Remove the CPU time consumed previously by JVM reuse
  cpuTime -= initCpuCumulativeTime;
  tezCounters.findCounter(TaskCounter.CPU_MILLISECONDS).setValue(cpuTime);
  tezCounters.findCounter(TaskCounter.PHYSICAL_MEMORY_BYTES).setValue(pMem);
  tezCounters.findCounter(TaskCounter.VIRTUAL_MEMORY_BYTES).setValue(vMem);
}

Source File: MRInput.java From incubator-tez with Apache License 2.0

5 votes

@Private
void initializeInternal() throws IOException {
  // Primarily for visibility
  rrLock.lock();
  try {
    
    if (splitInfoViaEvents) {
      if (useNewApi) {
        mrReader = new MRReaderMapReduce(jobConf, getContext().getCounters(), inputRecordCounter,
            getContext().getApplicationId().getClusterTimestamp(), getContext()
                .getTaskVertexIndex(), getContext().getApplicationId().getId(), getContext()
                .getTaskIndex(), getContext().getTaskAttemptNumber());
      } else {
        mrReader = new MRReaderMapred(jobConf, getContext().getCounters(), inputRecordCounter);
      }
    } else {
      TaskSplitMetaInfo[] allMetaInfo = MRInputUtils.readSplits(jobConf);
      TaskSplitMetaInfo thisTaskMetaInfo = allMetaInfo[getContext().getTaskIndex()];
      TaskSplitIndex splitMetaInfo = new TaskSplitIndex(thisTaskMetaInfo.getSplitLocation(),
          thisTaskMetaInfo.getStartOffset());
      if (useNewApi) {
        org.apache.hadoop.mapreduce.InputSplit newInputSplit = MRInputUtils
            .getNewSplitDetailsFromDisk(splitMetaInfo, jobConf, getContext().getCounters()
                .findCounter(TaskCounter.SPLIT_RAW_BYTES));
        mrReader = new MRReaderMapReduce(jobConf, newInputSplit, getContext().getCounters(),
            inputRecordCounter, getContext().getApplicationId().getClusterTimestamp(),
            getContext().getTaskVertexIndex(), getContext().getApplicationId().getId(),
            getContext().getTaskIndex(), getContext().getTaskAttemptNumber());
      } else {
        org.apache.hadoop.mapred.InputSplit oldInputSplit = MRInputUtils
            .getOldSplitDetailsFromDisk(splitMetaInfo, jobConf, getContext().getCounters()
                .findCounter(TaskCounter.SPLIT_RAW_BYTES));
        mrReader = new MRReaderMapred(jobConf, oldInputSplit, getContext().getCounters(), inputRecordCounter);
      }
    }
  } finally {
    rrLock.unlock();
  }
  LOG.info("Initialzed MRInput: " + getContext().getSourceVertexName());
}

Source File: GcTimeUpdater.java From incubator-tez with Apache License 2.0

5 votes

/**
 * Increment the gc-elapsed-time counter.
 */
void incrementGcCounter() {
  if (null == counters) {
    return; // nothing to do.
  }

  TezCounter gcCounter = counters.findCounter(TaskCounter.GC_TIME_MILLIS);
  if (null != gcCounter) {
    gcCounter.increment(getElapsedGc());
  }
}

Source File: OrderedGroupedKVInput.java From tez with Apache License 2.0

5 votes

@Override
public synchronized List<Event> initialize() throws IOException {
  this.conf = TezUtils.createConfFromBaseConfAndPayload(getContext());

  if (this.getNumPhysicalInputs() == 0) {
    getContext().requestInitialMemory(0l, null);
    isStarted.set(true);
    getContext().inputIsReady();
    LOG.info("input fetch not required since there are 0 physical inputs for input vertex: "
        + getContext().getSourceVertexName());
    return Collections.emptyList();
  }

  long initialMemoryRequest = Shuffle.getInitialMemoryRequirement(conf,
      getContext().getTotalMemoryAvailableToTask());
  this.memoryUpdateCallbackHandler = new MemoryUpdateCallbackHandler();
  getContext().requestInitialMemory(initialMemoryRequest, memoryUpdateCallbackHandler);

  this.inputKeyCounter = getContext().getCounters().findCounter(TaskCounter.REDUCE_INPUT_GROUPS);
  this.inputValueCounter = getContext().getCounters().findCounter(
      TaskCounter.REDUCE_INPUT_RECORDS);
   this.shuffledInputs = getContext().getCounters().findCounter(
      TaskCounter.NUM_SHUFFLED_INPUTS);
  this.conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, getContext().getWorkDirs());

  return Collections.emptyList();
}

Source File: MRInputBase.java From incubator-tez with Apache License 2.0

5 votes

public List<Event> initialize() throws IOException {
  getContext().requestInitialMemory(0l, null); // mandatory call
  MRRuntimeProtos.MRInputUserPayloadProto mrUserPayload =
      MRHelpers.parseMRInputPayload(getContext().getUserPayload());
  Preconditions.checkArgument(mrUserPayload.hasSplits() == false,
      "Split information not expected in " + this.getClass().getName());
  Configuration conf = MRHelpers.createConfFromByteString(mrUserPayload.getConfigurationBytes());

  this.jobConf = new JobConf(conf);
  // Add tokens to the jobConf - in case they are accessed within the RR / IF
  jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials());

  TaskAttemptID taskAttemptId = new TaskAttemptID(
      new TaskID(
          Long.toString(getContext().getApplicationId().getClusterTimestamp()),
          getContext().getApplicationId().getId(), TaskType.MAP,
          getContext().getTaskIndex()),
      getContext().getTaskAttemptNumber());

  jobConf.set(MRJobConfig.TASK_ATTEMPT_ID,
      taskAttemptId.toString());
  jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID,
      getContext().getDAGAttemptNumber());

  this.inputRecordCounter = getContext().getCounters().findCounter(
      TaskCounter.INPUT_RECORDS_PROCESSED);

  useNewApi = this.jobConf.getUseNewMapper();
  return null;
}

Source File: MultiMRInput.java From tez with Apache License 2.0

5 votes

@Override
public List<Event> close() throws Exception {
  for (MRReader reader : readers) {
    reader.close();
  }
  long inputRecords = getContext().getCounters()
      .findCounter(TaskCounter.INPUT_RECORDS_PROCESSED).getValue();
  getContext().getStatisticsReporter().reportItemsProcessed(inputRecords);

  return null;
}

Source File: TaskImpl.java From tez with Apache License 2.0

5 votes

@Override
public void transition(TaskImpl task, TaskEvent event) {
  LOG.info("Scheduling a redundant attempt for task " + task.taskId);
  task.counters.findCounter(TaskCounter.NUM_SPECULATIONS).increment(1);
  TaskAttempt earliestUnfinishedAttempt = null;
  for (TaskAttempt ta : task.attempts.values()) {
    // find the oldest running attempt
    if (!ta.isFinished()) {
      earliestUnfinishedAttempt = ta;
      if (ta.getNodeId() != null) {
        task.nodesWithRunningAttempts.add(ta.getNodeId());
      }
    } else {
      if (TaskAttemptState.SUCCEEDED.equals(ta.getState())) {
        LOG.info("Ignore speculation scheduling for task {} since it has succeeded with attempt {}.",
            task.getTaskId(), ta.getID());
        return;
      }
    }
  }
  if (earliestUnfinishedAttempt == null) {
    // no running (or SUCCEEDED) task attempt at this moment, no need to schedule speculative attempt either
    LOG.info("Ignore speculation scheduling since there is no running attempt on task {}.", task.getTaskId());
    return;
  }
  if (task.commitAttempt != null) {
    LOG.info("Ignore speculation scheduling for task {} since commit has started with commitAttempt {}.",
        task.getTaskId(), task.commitAttempt);
    return;
  }
  task.addAndScheduleAttempt(earliestUnfinishedAttempt.getID());
}

Source File: ShuffleUtils.java From tez with Apache License 2.0

5 votes

public static VertexManagerEvent generateVMEvent(OutputContext context,
    long[] sizePerPartition, boolean reportDetailedPartitionStats, Deflater deflater)
        throws IOException {
  ShuffleUserPayloads.VertexManagerEventPayloadProto.Builder vmBuilder =
      ShuffleUserPayloads.VertexManagerEventPayloadProto.newBuilder();

  long outputSize = context.getCounters().
      findCounter(TaskCounter.OUTPUT_BYTES).getValue();

  // Set this information only when required.  In pipelined shuffle,
  // multiple events would end up adding up to final output size.
  // This is needed for auto-reduce parallelism to work properly.
  vmBuilder.setOutputSize(outputSize);
  vmBuilder.setNumRecord(context.getCounters().findCounter(TaskCounter.OUTPUT_RECORDS).getValue()
   + context.getCounters().findCounter(TaskCounter.OUTPUT_LARGE_RECORDS).getValue());

  //set partition stats
  if (sizePerPartition != null && sizePerPartition.length > 0) {
    if (reportDetailedPartitionStats) {
      vmBuilder.setDetailedPartitionStats(
          getDetailedPartitionStatsForPhysicalOutput(sizePerPartition));
    } else {
      RoaringBitmap stats = getPartitionStatsForPhysicalOutput(
          sizePerPartition);
      DataOutputBuffer dout = new DataOutputBuffer();
      stats.serialize(dout);
      ByteString partitionStatsBytes =
          TezCommonUtils.compressByteArrayToByteString(dout.getData(), deflater);
      vmBuilder.setPartitionStats(partitionStatsBytes);
    }
  }

  VertexManagerEvent vmEvent = VertexManagerEvent.create(
      context.getDestinationVertexName(),
      vmBuilder.build().toByteString().asReadOnlyByteBuffer());
  return vmEvent;
}

Source File: ExternalSorter.java From tez with Apache License 2.0

5 votes

protected synchronized void reportStatistics() {
  // This works for non-started outputs since new counters will be created with an initial value of 0
  long outputSize = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES).getValue();
  statsReporter.reportDataSize(outputSize);
  long outputRecords = outputContext.getCounters()
      .findCounter(TaskCounter.OUTPUT_RECORDS).getValue();
  statsReporter.reportItemsProcessed(outputRecords);
}

Source File: TezVertexStats.java From spork with Apache License 2.0

5 votes

public void addInputStatistics() {
    if (loads == null) {
        return;
    }

    for (FileSpec fs : loads) {
        long records = -1;
        long hdfsBytesRead = -1;
        String filename = fs.getFileName();
        if (counters != null) {
            Map<String, Long> taskCounter = counters.get(TASK_COUNTER_GROUP);
            if (taskCounter != null
                    && taskCounter.get(TaskCounter.INPUT_RECORDS_PROCESSED.name()) != null) {
                records = taskCounter.get(TaskCounter.INPUT_RECORDS_PROCESSED.name());
                if (this.isMapOpts) {
                    mapInputRecords += records;
                } else {
                    reduceInputRecords += records;
                }
            }
            if (counters.get(FS_COUNTER_GROUP) != null &&
                    counters.get(FS_COUNTER_GROUP).get(PigStatsUtil.HDFS_BYTES_READ) != null) {
                hdfsBytesRead = counters.get(FS_COUNTER_GROUP).get(PigStatsUtil.HDFS_BYTES_READ);
            }
        }
        InputStats is = new InputStats(filename, hdfsBytesRead,
                records, (state == JobState.SUCCESS));
        is.setConf(conf);
        inputs.add(is);
    }
}

Source File: FileBasedKVWriter.java From incubator-tez with Apache License 2.0

5 votes

public FileBasedKVWriter(TezOutputContext outputContext, Configuration conf) throws IOException {
  this.conf = conf;

  this.outputRecordsCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_RECORDS);
  this.outputBytesCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES);
  this.outputBytesCounterWithOverhead = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
  this.outputMaterializedBytesCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);

  this.rfs = ((LocalFileSystem) FileSystem.getLocal(this.conf)).getRaw();

  // Setup serialization
  keyClass = ConfigUtils.getIntermediateOutputKeyClass(this.conf);
  valClass = ConfigUtils.getIntermediateOutputValueClass(this.conf);

  // Setup compression
  if (ConfigUtils.shouldCompressIntermediateOutput(this.conf)) {
    Class<? extends CompressionCodec> codecClass = ConfigUtils
        .getIntermediateOutputCompressorClass(this.conf, DefaultCodec.class);
    codec = ReflectionUtils.newInstance(codecClass, this.conf);
  } else {
    codec = null;
  }

  this.ouputFileManager = TezRuntimeUtils.instantiateTaskOutputManager(conf,
      outputContext);
  LOG.info("Created KVWriter -> " + "compressionCodec: " + (codec == null ? "NoCompressionCodec"
      : codec.getClass().getName()));

  this.outputPath = ouputFileManager.getOutputFileForWrite();
  LOG.info("Writing data file: " + outputPath);

  // TODO NEWTEZ Consider making the buffer size configurable. Also consider
  // setting up an in-memory buffer which is occasionally flushed to disk so
  // that the output does not block.

  // TODO NEWTEZ maybe use appropriate counter
  this.writer = new IFile.Writer(conf, rfs, outputPath, keyClass, valClass,
      codec, null, outputBytesCounter);
}

Source File: TestTezJobs.java From incubator-tez with Apache License 2.0

5 votes

@Test
public void testNonDefaultFSStagingDir() throws Exception {
  SleepProcessorConfig spConf = new SleepProcessorConfig(1);

  DAG dag = new DAG("TezSleepProcessor");
  Vertex vertex = new Vertex("SleepVertex", new ProcessorDescriptor(
      SleepProcessor.class.getName()).setUserPayload(spConf.toUserPayload()), 1,
      Resource.newInstance(1024, 1));
  dag.addVertex(vertex);

  TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
  Path stagingDir = new Path(TEST_ROOT_DIR, "testNonDefaultFSStagingDir"
      + String.valueOf(random.nextInt(100000)));
  FileSystem localFs = FileSystem.getLocal(tezConf);
  stagingDir = localFs.makeQualified(stagingDir);
  localFs.mkdirs(stagingDir);
  tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir.toString());

  TezClient tezSession = new TezClient("TezSleepProcessor", tezConf, false);
  tezSession.start();

  DAGClient dagClient = tezSession.submitDAG(dag);

  DAGStatus dagStatus = dagClient.getDAGStatus(null);
  while (!dagStatus.isCompleted()) {
    LOG.info("Waiting for job to complete. Sleeping for 500ms." + " Current state: "
        + dagStatus.getState());
    Thread.sleep(500l);
    dagStatus = dagClient.getDAGStatus(null);
  }
  dagStatus = dagClient.getDAGStatus(Sets.newHashSet(StatusGetOpts.GET_COUNTERS));

  assertEquals(DAGStatus.State.SUCCEEDED, dagStatus.getState());
  assertNotNull(dagStatus.getDAGCounters());
  assertNotNull(dagStatus.getDAGCounters().getGroup(FileSystemCounter.class.getName()));
  assertNotNull(dagStatus.getDAGCounters().findCounter(TaskCounter.GC_TIME_MILLIS));
  ExampleDriver.printDAGStatus(dagClient, new String[] { "SleepVertex" }, true, true);
  tezSession.stop();
}

Source File: TestPipelinedSorter.java From tez with Apache License 2.0

5 votes

public void basicTest(int partitions, int numKeys, int keySize,
    long initialAvailableMem, int minBlockSize) throws IOException {
  this.numOutputs = partitions; // single output
  conf.setInt(TezRuntimeConfiguration
      .TEZ_RUNTIME_PIPELINED_SORTER_MIN_BLOCK_SIZE_IN_MB, minBlockSize >> 20);
  PipelinedSorter sorter = new PipelinedSorter(this.outputContext, conf, numOutputs,
      initialAvailableMem);

  writeData(sorter, numKeys, keySize);

  //partition stats;
  ReportPartitionStats partitionStats =
      ReportPartitionStats.fromString(conf.get(
      TezRuntimeConfiguration.TEZ_RUNTIME_REPORT_PARTITION_STATS,
      TezRuntimeConfiguration.TEZ_RUNTIME_REPORT_PARTITION_STATS_DEFAULT));
  if (partitionStats.isEnabled()) {
    assertTrue(sorter.getPartitionStats() != null);
  }

  verifyCounters(sorter, outputContext);
  verifyOutputPermissions(outputContext.getUniqueIdentifier());
  Path outputFile = sorter.finalOutputFile;
  FileSystem fs = outputFile.getFileSystem(conf);
  TezCounter finalOutputBytes =
      outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);
  if (finalOutputBytes.getValue() > 0) {
    IFile.Reader reader = new IFile.Reader(fs, outputFile, null, null, null, false, -1, 4096);
    verifyData(reader);
    reader.close();
  }
  //Verify dataset
  verify(outputContext, atLeastOnce()).notifyProgress();
}

Source File: TestPipelinedSorter.java From tez with Apache License 2.0

5 votes

private void verifyCounters(PipelinedSorter sorter, OutputContext context) {
  TezCounter numShuffleChunks = context.getCounters().findCounter(TaskCounter.SHUFFLE_CHUNK_COUNT);
  TezCounter additionalSpills =
      context.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT);
  TezCounter additionalSpillBytesWritten =
      context.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN);
  TezCounter additionalSpillBytesRead =
      context.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ);

  if (sorter.isFinalMergeEnabled()) {
    assertTrue(additionalSpills.getValue() == (sorter.getNumSpills() - 1));
    //Number of files served by shuffle-handler
    assertTrue(1 == numShuffleChunks.getValue());
    if (sorter.getNumSpills() > 1) {
      assertTrue(additionalSpillBytesRead.getValue() > 0);
      assertTrue(additionalSpillBytesWritten.getValue() > 0);
    }
  } else {
    assertTrue(0 == additionalSpills.getValue());
    //Number of files served by shuffle-handler
    assertTrue(sorter.getNumSpills() == numShuffleChunks.getValue());
    assertTrue(additionalSpillBytesRead.getValue() == 0);
    assertTrue(additionalSpillBytesWritten.getValue() == 0);
  }

  TezCounter finalOutputBytes =
      context.getCounters().findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);
  assertTrue(finalOutputBytes.getValue() >= 0);

  TezCounter outputBytesWithOverheadCounter = context.getCounters().findCounter
      (TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
  assertTrue(outputBytesWithOverheadCounter.getValue() >= 0);
}

Source File: TestDefaultSorter.java From tez with Apache License 2.0

5 votes

private void verifyCounters(DefaultSorter sorter, OutputContext context) {
  TezCounter numShuffleChunks = context.getCounters().findCounter(TaskCounter.SHUFFLE_CHUNK_COUNT);
  TezCounter additionalSpills = context.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT);
  TezCounter additionalSpillBytesWritten = context.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN);
  TezCounter additionalSpillBytesRead = context.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ);

  if (sorter.isFinalMergeEnabled()) {
    assertTrue(additionalSpills.getValue() == (sorter.getNumSpills() - 1));
    //Number of files served by shuffle-handler
    assertTrue(1 == numShuffleChunks.getValue());
    if (sorter.getNumSpills() > 1) {
      assertTrue(additionalSpillBytesRead.getValue() > 0);
      assertTrue(additionalSpillBytesWritten.getValue() > 0);
    }
  } else {
    assertTrue(0 == additionalSpills.getValue());
    //Number of files served by shuffle-handler
    assertTrue(sorter.getNumSpills() == numShuffleChunks.getValue());
    assertTrue(additionalSpillBytesRead.getValue() == 0);
    assertTrue(additionalSpillBytesWritten.getValue() == 0);
  }

  TezCounter finalOutputBytes = context.getCounters().findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);
  assertTrue(finalOutputBytes.getValue() >= 0);

  TezCounter outputBytesWithOverheadCounter = context.getCounters().findCounter
      (TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
  assertTrue(outputBytesWithOverheadCounter.getValue() >= 0);
  verify(context, atLeastOnce()).notifyProgress();
}

Source File: TestMultiMRInput.java From tez with Apache License 2.0

5 votes

private void assertReaders(MultiMRInput input, LinkedHashMap<LongWritable, Text> data,
    int expectedReaderCounts, long inputBytes) throws Exception {
  int readerCount = 0;
  int recordCount = 0;
  for (KeyValueReader reader : input.getKeyValueReaders()) {
    readerCount++;
    while (reader.next()) {
      verify(input.getContext(), times(++recordCount + readerCount - 1)).notifyProgress();
      if (data.size() == 0) {
        fail("Found more records than expected");
      }
      Object key = reader.getCurrentKey();
      Object val = reader.getCurrentValue();
      assertEquals(val, data.remove(key));
    }

    try {
      reader.next(); //should throw exception
      fail();
    } catch(IOException e) {
      assertTrue(e.getMessage().contains("For usage, please refer to"));
    }
  }
  long counterValue = input.getContext().getCounters()
      .findCounter(TaskCounter.INPUT_SPLIT_LENGTH_BYTES).getValue();
  assertEquals(inputBytes, counterValue);
  assertEquals(expectedReaderCounts, readerCount);
}

Source File: MRInput.java From tez with Apache License 2.0

5 votes

@Override
public List<Event> close() throws IOException {
  mrReader.close();
  long inputRecords = getContext().getCounters()
      .findCounter(TaskCounter.INPUT_RECORDS_PROCESSED).getValue();
  getContext().getStatisticsReporter().reportItemsProcessed(inputRecords);

  return null;
}

Source File: MROutput.java From tez with Apache License 2.0

5 votes

@Override
public synchronized List<Event> close() throws IOException {
  flush();
  LOG.info(getContext().getDestinationVertexName() + " closed");
  long outputRecords = getContext().getCounters()
      .findCounter(TaskCounter.OUTPUT_RECORDS).getValue();
  getContext().getStatisticsReporter().reportItemsProcessed(outputRecords);

  return null;
}

org.apache.tez.common.counters.TaskCounter Java Examples