org.apache.tez.common.counters.TaskCounter Java Examples

The following examples show how to use org.apache.tez.common.counters.TaskCounter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: TestHistoryParser.java From tez with Apache License 2.0

6 votes

private void isCountersSame(BaseInfo info1, BaseInfo info2) {
  isCounterSame(info1.getCounter(TaskCounter.ADDITIONAL_SPILL_COUNT.name()),
      info2.getCounter(TaskCounter.ADDITIONAL_SPILL_COUNT.name()));

  isCounterSame(info1.getCounter(TaskCounter.SPILLED_RECORDS.name()),
      info2.getCounter(TaskCounter.SPILLED_RECORDS.name()));

  isCounterSame(info1.getCounter(TaskCounter.OUTPUT_RECORDS.name()),
      info2.getCounter(TaskCounter.OUTPUT_RECORDS.name()));

  isCounterSame(info1.getCounter(TaskCounter.OUTPUT_BYTES.name()),
      info2.getCounter(TaskCounter.OUTPUT_BYTES.name()));

  isCounterSame(info1.getCounter(TaskCounter.OUTPUT_RECORDS.name()),
      info2.getCounter(TaskCounter.OUTPUT_RECORDS.name()));

  isCounterSame(info1.getCounter(TaskCounter.REDUCE_INPUT_GROUPS.name()),
      info2.getCounter(TaskCounter.REDUCE_INPUT_GROUPS.name()));

  isCounterSame(info1.getCounter(TaskCounter.REDUCE_INPUT_RECORDS.name()),
      info2.getCounter(TaskCounter.REDUCE_INPUT_RECORDS.name()));
}

Example #2

Source File: ShuffledMergedInput.java From incubator-tez with Apache License 2.0

6 votes

@Override
public synchronized List<Event> initialize() throws IOException {
  this.conf = TezUtils.createConfFromUserPayload(getContext().getUserPayload());

  if (this.getNumPhysicalInputs() == 0) {
    getContext().requestInitialMemory(0l, null);
    isStarted.set(true);
    getContext().inputIsReady();
    LOG.info("input fetch not required since there are 0 physical inputs for input vertex: "
        + getContext().getSourceVertexName());
    return Collections.emptyList();
  }

  long initialMemoryRequest = Shuffle.getInitialMemoryRequirement(conf,
      getContext().getTotalMemoryAvailableToTask());
  this.memoryUpdateCallbackHandler = new MemoryUpdateCallbackHandler();
  getContext().requestInitialMemory(initialMemoryRequest, memoryUpdateCallbackHandler);

  this.inputKeyCounter = getContext().getCounters().findCounter(TaskCounter.REDUCE_INPUT_GROUPS);
  this.inputValueCounter = getContext().getCounters().findCounter(
      TaskCounter.REDUCE_INPUT_RECORDS);
  this.conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, getContext().getWorkDirs());

  return Collections.emptyList();
}

Example #3

Source File: ShuffledUnorderedKVInput.java From incubator-tez with Apache License 2.0

6 votes

@Override
public synchronized List<Event> initialize() throws Exception {
  Preconditions.checkArgument(getNumPhysicalInputs() != -1, "Number of Inputs has not been set");
  this.conf = TezUtils.createConfFromUserPayload(getContext().getUserPayload());

  if (getNumPhysicalInputs() == 0) {
    getContext().requestInitialMemory(0l, null);
    isStarted.set(true);
    getContext().inputIsReady();
    LOG.info("input fetch not required since there are 0 physical inputs for input vertex: "
        + getContext().getSourceVertexName());
    return Collections.emptyList();
  } else {
    long initalMemReq = getInitialMemoryReq();
    memoryUpdateCallbackHandler = new MemoryUpdateCallbackHandler();
    this.getContext().requestInitialMemory(initalMemReq, memoryUpdateCallbackHandler);
  }

  this.conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, getContext().getWorkDirs());
  this.inputRecordCounter = getContext().getCounters().findCounter(
      TaskCounter.INPUT_RECORDS_PROCESSED);
  return Collections.emptyList();
}

Example #4

Source File: TestUnorderedKVReader.java From tez with Apache License 2.0

6 votes

@Test(timeout = 5000)
public void testInterruptOnNext() throws IOException, InterruptedException {
  ShuffleManager shuffleManager = mock(ShuffleManager.class);

  // Simulate an interrupt while waiting for the next fetched input.
  doThrow(new InterruptedException()).when(shuffleManager).getNextInput();
  TezCounters counters = new TezCounters();
  TezCounter inputRecords = counters.findCounter(TaskCounter.INPUT_RECORDS_PROCESSED);
  UnorderedKVReader<Text, Text> reader =
      new UnorderedKVReader<Text, Text>(shuffleManager, defaultConf, null, false, -1, -1,
          inputRecords, mock(InputContext.class));

  try {
    reader.next();
    fail("No data available to reader. Should not be able to access any record");
  } catch (IOInterruptedException e) {
    // Expected exception. Any other should fail the test.
  }
}

Example #5

Source File: TaskCounterUpdater.java From incubator-tez with Apache License 2.0

6 votes

/**
 * Update resource information counters
 */
void updateResourceCounters() {
  // Update generic resource counters
  updateHeapUsageCounter();

  // Updating resources specified in ResourceCalculatorPlugin
  if (pTree == null) {
    return;
  }
  pTree.updateProcessTree();
  long cpuTime = pTree.getCumulativeCpuTime();
  long pMem = pTree.getCumulativeRssmem();
  long vMem = pTree.getCumulativeVmem();
  // Remove the CPU time consumed previously by JVM reuse
  cpuTime -= initCpuCumulativeTime;
  tezCounters.findCounter(TaskCounter.CPU_MILLISECONDS).setValue(cpuTime);
  tezCounters.findCounter(TaskCounter.PHYSICAL_MEMORY_BYTES).setValue(pMem);
  tezCounters.findCounter(TaskCounter.VIRTUAL_MEMORY_BYTES).setValue(vMem);
}

Example #6

Source File: UnorderedKVOutput.java From tez with Apache License 2.0

6 votes

@Override
public synchronized List<Event> close() throws Exception {
  List<Event> returnEvents = null;
  if (isStarted.get()) {
    //TODO: Do we need to support sending payloads via events?
    returnEvents = kvWriter.close();
    kvWriter = null;
  } else {
    LOG.warn(getContext().getDestinationVertexName() +
        ": Attempting to close output {} of type {} before it was started. Generating empty events",
        getContext().getDestinationVertexName(), this.getClass().getSimpleName());
    returnEvents = new LinkedList<Event>();
    ShuffleUtils
        .generateEventsForNonStartedOutput(returnEvents, getNumPhysicalOutputs(), getContext(),
            false, false, TezCommonUtils.newBestCompressionDeflater());
  }

  // This works for non-started outputs since new counters will be created with an initial value of 0
  long outputSize = getContext().getCounters().findCounter(TaskCounter.OUTPUT_BYTES).getValue();
  getContext().getStatisticsReporter().reportDataSize(outputSize);
  long outputRecords = getContext().getCounters()
      .findCounter(TaskCounter.OUTPUT_RECORDS).getValue();
  getContext().getStatisticsReporter().reportItemsProcessed(outputRecords);

  return returnEvents;
}

Example #7

Source File: UnorderedPartitionedKVOutput.java From tez with Apache License 2.0

6 votes

@Override
public synchronized List<Event> close() throws Exception {
  List<Event> returnEvents = null;
  if (isStarted.get()) {
    returnEvents = kvWriter.close();
    kvWriter = null;
  } else {
    LOG.warn(getContext().getDestinationVertexName() +
        ": Attempting to close output {} of type {} before it was started. Generating empty events",
        getContext().getDestinationVertexName(), this.getClass().getSimpleName());
    returnEvents = new LinkedList<Event>();
    ShuffleUtils
        .generateEventsForNonStartedOutput(returnEvents, getNumPhysicalOutputs(), getContext(),
            false, true, TezCommonUtils.newBestCompressionDeflater());
  }

  // This works for non-started outputs since new counters will be created with an initial value of 0
  long outputSize = getContext().getCounters().findCounter(TaskCounter.OUTPUT_BYTES).getValue();
  getContext().getStatisticsReporter().reportDataSize(outputSize);
  long outputRecords = getContext().getCounters()
      .findCounter(TaskCounter.OUTPUT_RECORDS).getValue();
  getContext().getStatisticsReporter().reportItemsProcessed(outputRecords);

  return returnEvents;
}

Example #8

Source File: OrderedGroupedKVInput.java From tez with Apache License 2.0

6 votes

@Override
public synchronized List<Event> close() throws IOException {
  if (this.getNumPhysicalInputs() != 0 && rawIter != null) {
    rawIter.close();
  }
  if (shuffle != null) {
    shuffle.shutdown();
  }
  
  long dataSize = getContext().getCounters()
      .findCounter(TaskCounter.SHUFFLE_BYTES_DECOMPRESSED).getValue();
  getContext().getStatisticsReporter().reportDataSize(dataSize);
  long inputRecords = getContext().getCounters()
      .findCounter(TaskCounter.REDUCE_INPUT_RECORDS).getValue();
  getContext().getStatisticsReporter().reportItemsProcessed(inputRecords);
  
  return Collections.emptyList();
}

Example #9

Source File: UnorderedKVInput.java From tez with Apache License 2.0

6 votes

@Override
public synchronized List<Event> initialize() throws Exception {
  Preconditions.checkArgument(getNumPhysicalInputs() != -1, "Number of Inputs has not been set");
  this.conf = TezUtils.createConfFromBaseConfAndPayload(getContext());

  if (getNumPhysicalInputs() == 0) {
    getContext().requestInitialMemory(0l, null);
    isStarted.set(true);
    getContext().inputIsReady();
    LOG.info("input fetch not required since there are 0 physical inputs for input vertex: "
        + getContext().getSourceVertexName());
    return Collections.emptyList();
  } else {
    long initalMemReq = getInitialMemoryReq();
    memoryUpdateCallbackHandler = new MemoryUpdateCallbackHandler();
    this.getContext().requestInitialMemory(initalMemReq, memoryUpdateCallbackHandler);
  }

  this.conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, getContext().getWorkDirs());
  this.inputRecordCounter = getContext().getCounters().findCounter(
      TaskCounter.INPUT_RECORDS_PROCESSED);
  return Collections.emptyList();
}

Example #10

Source File: UnorderedKVInput.java From tez with Apache License 2.0

6 votes

@Override
public synchronized List<Event> close() throws Exception {
  if (this.inputEventHandler != null) {
    this.inputEventHandler.logProgress(true);
  }

  if (this.shuffleManager != null) {
    this.shuffleManager.shutdown();
  }
  
  long dataSize = getContext().getCounters()
      .findCounter(TaskCounter.SHUFFLE_BYTES_DECOMPRESSED).getValue();
  getContext().getStatisticsReporter().reportDataSize(dataSize);
  long inputRecords = getContext().getCounters()
      .findCounter(TaskCounter.INPUT_RECORDS_PROCESSED).getValue();
  getContext().getStatisticsReporter().reportItemsProcessed(inputRecords);

  return null;
}

Example #11

Source File: TestPipelinedSorter.java From tez with Apache License 2.0

6 votes

@Test
  public void testEmptyDataWithPipelinedShuffle() throws IOException {
    this.numOutputs = 1;
    this.initialAvailableMem = 1 *1024 * 1024;
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, false);
    conf.setInt(TezRuntimeConfiguration
        .TEZ_RUNTIME_PIPELINED_SORTER_MIN_BLOCK_SIZE_IN_MB, 1);
    PipelinedSorter sorter = new PipelinedSorter(this.outputContext, conf, numOutputs,
        initialAvailableMem);

    writeData(sorter, 0, 1<<20);

    // final merge is disabled. Final output file would not be populated in this case.
    assertTrue(sorter.finalOutputFile == null);
    TezCounter numShuffleChunks = outputContext.getCounters().findCounter(TaskCounter.SHUFFLE_CHUNK_COUNT);
//    assertTrue(sorter.getNumSpills() == numShuffleChunks.getValue());
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, true);

  }

Example #12

Source File: TestPipelinedSorter.java From tez with Apache License 2.0

6 votes

@Test
public void testExceedsKVWithPipelinedShuffle() throws IOException {
  this.numOutputs = 1;
  this.initialAvailableMem = 1 *1024 * 1024;
  conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, false);
  conf.setInt(TezRuntimeConfiguration
      .TEZ_RUNTIME_PIPELINED_SORTER_MIN_BLOCK_SIZE_IN_MB, 1);
  PipelinedSorter sorter = new PipelinedSorter(this.outputContext, conf, numOutputs,
      initialAvailableMem);

  writeData(sorter, 5, 1<<20);

  // final merge is disabled. Final output file would not be populated in this case.
  assertTrue(sorter.finalOutputFile == null);
  TezCounter numShuffleChunks = outputContext.getCounters().findCounter(TaskCounter.SHUFFLE_CHUNK_COUNT);
  assertTrue(sorter.getNumSpills() == numShuffleChunks.getValue());
  conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, true);
}

Example #13

Source File: TaskCounterUpdater.java From tez with Apache License 2.0

6 votes

/**
 * Update resource information counters
 */
void updateResourceCounters() {
  // Update generic resource counters
  updateHeapUsageCounter();

  // Updating resources specified in ResourceCalculatorPlugin
  if (pTree == null) {
    return;
  }
  pTree.updateProcessTree();
  long cpuTime = pTree.getCumulativeCpuTime();
  long pMem = pTree.getRssMemorySize();
  long vMem = pTree.getVirtualMemorySize();
  // Remove the CPU time consumed previously by JVM reuse
  cpuTime -= initCpuCumulativeTime;
  tezCounters.findCounter(TaskCounter.CPU_MILLISECONDS).setValue(cpuTime);
  tezCounters.findCounter(TaskCounter.PHYSICAL_MEMORY_BYTES).setValue(pMem);
  tezCounters.findCounter(TaskCounter.VIRTUAL_MEMORY_BYTES).setValue(vMem);
}

Example #14

Source File: MRInput.java From incubator-tez with Apache License 2.0

5 votes

@Private
void initializeInternal() throws IOException {
  // Primarily for visibility
  rrLock.lock();
  try {
    
    if (splitInfoViaEvents) {
      if (useNewApi) {
        mrReader = new MRReaderMapReduce(jobConf, getContext().getCounters(), inputRecordCounter,
            getContext().getApplicationId().getClusterTimestamp(), getContext()
                .getTaskVertexIndex(), getContext().getApplicationId().getId(), getContext()
                .getTaskIndex(), getContext().getTaskAttemptNumber());
      } else {
        mrReader = new MRReaderMapred(jobConf, getContext().getCounters(), inputRecordCounter);
      }
    } else {
      TaskSplitMetaInfo[] allMetaInfo = MRInputUtils.readSplits(jobConf);
      TaskSplitMetaInfo thisTaskMetaInfo = allMetaInfo[getContext().getTaskIndex()];
      TaskSplitIndex splitMetaInfo = new TaskSplitIndex(thisTaskMetaInfo.getSplitLocation(),
          thisTaskMetaInfo.getStartOffset());
      if (useNewApi) {
        org.apache.hadoop.mapreduce.InputSplit newInputSplit = MRInputUtils
            .getNewSplitDetailsFromDisk(splitMetaInfo, jobConf, getContext().getCounters()
                .findCounter(TaskCounter.SPLIT_RAW_BYTES));
        mrReader = new MRReaderMapReduce(jobConf, newInputSplit, getContext().getCounters(),
            inputRecordCounter, getContext().getApplicationId().getClusterTimestamp(),
            getContext().getTaskVertexIndex(), getContext().getApplicationId().getId(),
            getContext().getTaskIndex(), getContext().getTaskAttemptNumber());
      } else {
        org.apache.hadoop.mapred.InputSplit oldInputSplit = MRInputUtils
            .getOldSplitDetailsFromDisk(splitMetaInfo, jobConf, getContext().getCounters()
                .findCounter(TaskCounter.SPLIT_RAW_BYTES));
        mrReader = new MRReaderMapred(jobConf, oldInputSplit, getContext().getCounters(), inputRecordCounter);
      }
    }
  } finally {
    rrLock.unlock();
  }
  LOG.info("Initialzed MRInput: " + getContext().getSourceVertexName());
}

Example #15

Source File: GcTimeUpdater.java From incubator-tez with Apache License 2.0

5 votes

/**
 * Increment the gc-elapsed-time counter.
 */
void incrementGcCounter() {
  if (null == counters) {
    return; // nothing to do.
  }

  TezCounter gcCounter = counters.findCounter(TaskCounter.GC_TIME_MILLIS);
  if (null != gcCounter) {
    gcCounter.increment(getElapsedGc());
  }
}

Example #16

Source File: OrderedGroupedKVInput.java From tez with Apache License 2.0

5 votes

@Override
public synchronized List<Event> initialize() throws IOException {
  this.conf = TezUtils.createConfFromBaseConfAndPayload(getContext());

  if (this.getNumPhysicalInputs() == 0) {
    getContext().requestInitialMemory(0l, null);
    isStarted.set(true);
    getContext().inputIsReady();
    LOG.info("input fetch not required since there are 0 physical inputs for input vertex: "
        + getContext().getSourceVertexName());
    return Collections.emptyList();
  }

  long initialMemoryRequest = Shuffle.getInitialMemoryRequirement(conf,
      getContext().getTotalMemoryAvailableToTask());
  this.memoryUpdateCallbackHandler = new MemoryUpdateCallbackHandler();
  getContext().requestInitialMemory(initialMemoryRequest, memoryUpdateCallbackHandler);

  this.inputKeyCounter = getContext().getCounters().findCounter(TaskCounter.REDUCE_INPUT_GROUPS);
  this.inputValueCounter = getContext().getCounters().findCounter(
      TaskCounter.REDUCE_INPUT_RECORDS);
   this.shuffledInputs = getContext().getCounters().findCounter(
      TaskCounter.NUM_SHUFFLED_INPUTS);
  this.conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, getContext().getWorkDirs());

  return Collections.emptyList();
}

Example #17

Source File: MRInputBase.java From incubator-tez with Apache License 2.0

5 votes

public List<Event> initialize() throws IOException {
  getContext().requestInitialMemory(0l, null); // mandatory call
  MRRuntimeProtos.MRInputUserPayloadProto mrUserPayload =
      MRHelpers.parseMRInputPayload(getContext().getUserPayload());
  Preconditions.checkArgument(mrUserPayload.hasSplits() == false,
      "Split information not expected in " + this.getClass().getName());
  Configuration conf = MRHelpers.createConfFromByteString(mrUserPayload.getConfigurationBytes());

  this.jobConf = new JobConf(conf);
  // Add tokens to the jobConf - in case they are accessed within the RR / IF
  jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials());

  TaskAttemptID taskAttemptId = new TaskAttemptID(
      new TaskID(
          Long.toString(getContext().getApplicationId().getClusterTimestamp()),
          getContext().getApplicationId().getId(), TaskType.MAP,
          getContext().getTaskIndex()),
      getContext().getTaskAttemptNumber());

  jobConf.set(MRJobConfig.TASK_ATTEMPT_ID,
      taskAttemptId.toString());
  jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID,
      getContext().getDAGAttemptNumber());

  this.inputRecordCounter = getContext().getCounters().findCounter(
      TaskCounter.INPUT_RECORDS_PROCESSED);

  useNewApi = this.jobConf.getUseNewMapper();
  return null;
}

Example #18

Source File: MultiMRInput.java From tez with Apache License 2.0

5 votes

@Override
public List<Event> close() throws Exception {
  for (MRReader reader : readers) {
    reader.close();
  }
  long inputRecords = getContext().getCounters()
      .findCounter(TaskCounter.INPUT_RECORDS_PROCESSED).getValue();
  getContext().getStatisticsReporter().reportItemsProcessed(inputRecords);

  return null;
}

Example #19

Source File: TaskImpl.java From tez with Apache License 2.0

5 votes

@Override
public void transition(TaskImpl task, TaskEvent event) {
  LOG.info("Scheduling a redundant attempt for task " + task.taskId);
  task.counters.findCounter(TaskCounter.NUM_SPECULATIONS).increment(1);
  TaskAttempt earliestUnfinishedAttempt = null;
  for (TaskAttempt ta : task.attempts.values()) {
    // find the oldest running attempt
    if (!ta.isFinished()) {
      earliestUnfinishedAttempt = ta;
      if (ta.getNodeId() != null) {
        task.nodesWithRunningAttempts.add(ta.getNodeId());
      }
    } else {
      if (TaskAttemptState.SUCCEEDED.equals(ta.getState())) {
        LOG.info("Ignore speculation scheduling for task {} since it has succeeded with attempt {}.",
            task.getTaskId(), ta.getID());
        return;
      }
    }
  }
  if (earliestUnfinishedAttempt == null) {
    // no running (or SUCCEEDED) task attempt at this moment, no need to schedule speculative attempt either
    LOG.info("Ignore speculation scheduling since there is no running attempt on task {}.", task.getTaskId());
    return;
  }
  if (task.commitAttempt != null) {
    LOG.info("Ignore speculation scheduling for task {} since commit has started with commitAttempt {}.",
        task.getTaskId(), task.commitAttempt);
    return;
  }
  task.addAndScheduleAttempt(earliestUnfinishedAttempt.getID());
}

Example #20

Source File: ShuffleUtils.java From tez with Apache License 2.0

5 votes

public static VertexManagerEvent generateVMEvent(OutputContext context,
    long[] sizePerPartition, boolean reportDetailedPartitionStats, Deflater deflater)
        throws IOException {
  ShuffleUserPayloads.VertexManagerEventPayloadProto.Builder vmBuilder =
      ShuffleUserPayloads.VertexManagerEventPayloadProto.newBuilder();

  long outputSize = context.getCounters().
      findCounter(TaskCounter.OUTPUT_BYTES).getValue();

  // Set this information only when required.  In pipelined shuffle,
  // multiple events would end up adding up to final output size.
  // This is needed for auto-reduce parallelism to work properly.
  vmBuilder.setOutputSize(outputSize);
  vmBuilder.setNumRecord(context.getCounters().findCounter(TaskCounter.OUTPUT_RECORDS).getValue()
   + context.getCounters().findCounter(TaskCounter.OUTPUT_LARGE_RECORDS).getValue());

  //set partition stats
  if (sizePerPartition != null && sizePerPartition.length > 0) {
    if (reportDetailedPartitionStats) {
      vmBuilder.setDetailedPartitionStats(
          getDetailedPartitionStatsForPhysicalOutput(sizePerPartition));
    } else {
      RoaringBitmap stats = getPartitionStatsForPhysicalOutput(
          sizePerPartition);
      DataOutputBuffer dout = new DataOutputBuffer();
      stats.serialize(dout);
      ByteString partitionStatsBytes =
          TezCommonUtils.compressByteArrayToByteString(dout.getData(), deflater);
      vmBuilder.setPartitionStats(partitionStatsBytes);
    }
  }

  VertexManagerEvent vmEvent = VertexManagerEvent.create(
      context.getDestinationVertexName(),
      vmBuilder.build().toByteString().asReadOnlyByteBuffer());
  return vmEvent;
}

Example #21

Source File: ExternalSorter.java From tez with Apache License 2.0

5 votes

protected synchronized void reportStatistics() {
  // This works for non-started outputs since new counters will be created with an initial value of 0
  long outputSize = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES).getValue();
  statsReporter.reportDataSize(outputSize);
  long outputRecords = outputContext.getCounters()
      .findCounter(TaskCounter.OUTPUT_RECORDS).getValue();
  statsReporter.reportItemsProcessed(outputRecords);
}

Example #22

Source File: TezVertexStats.java From spork with Apache License 2.0

5 votes

public void addInputStatistics() {
    if (loads == null) {
        return;
    }

    for (FileSpec fs : loads) {
        long records = -1;
        long hdfsBytesRead = -1;
        String filename = fs.getFileName();
        if (counters != null) {
            Map<String, Long> taskCounter = counters.get(TASK_COUNTER_GROUP);
            if (taskCounter != null
                    && taskCounter.get(TaskCounter.INPUT_RECORDS_PROCESSED.name()) != null) {
                records = taskCounter.get(TaskCounter.INPUT_RECORDS_PROCESSED.name());
                if (this.isMapOpts) {
                    mapInputRecords += records;
                } else {
                    reduceInputRecords += records;
                }
            }
            if (counters.get(FS_COUNTER_GROUP) != null &&
                    counters.get(FS_COUNTER_GROUP).get(PigStatsUtil.HDFS_BYTES_READ) != null) {
                hdfsBytesRead = counters.get(FS_COUNTER_GROUP).get(PigStatsUtil.HDFS_BYTES_READ);
            }
        }
        InputStats is = new InputStats(filename, hdfsBytesRead,
                records, (state == JobState.SUCCESS));
        is.setConf(conf);
        inputs.add(is);
    }
}

Example #23

Source File: FileBasedKVWriter.java From incubator-tez with Apache License 2.0

5 votes

public FileBasedKVWriter(TezOutputContext outputContext, Configuration conf) throws IOException {
  this.conf = conf;

  this.outputRecordsCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_RECORDS);
  this.outputBytesCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES);
  this.outputBytesCounterWithOverhead = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
  this.outputMaterializedBytesCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);

  this.rfs = ((LocalFileSystem) FileSystem.getLocal(this.conf)).getRaw();

  // Setup serialization
  keyClass = ConfigUtils.getIntermediateOutputKeyClass(this.conf);
  valClass = ConfigUtils.getIntermediateOutputValueClass(this.conf);

  // Setup compression
  if (ConfigUtils.shouldCompressIntermediateOutput(this.conf)) {
    Class<? extends CompressionCodec> codecClass = ConfigUtils
        .getIntermediateOutputCompressorClass(this.conf, DefaultCodec.class);
    codec = ReflectionUtils.newInstance(codecClass, this.conf);
  } else {
    codec = null;
  }

  this.ouputFileManager = TezRuntimeUtils.instantiateTaskOutputManager(conf,
      outputContext);
  LOG.info("Created KVWriter -> " + "compressionCodec: " + (codec == null ? "NoCompressionCodec"
      : codec.getClass().getName()));

  this.outputPath = ouputFileManager.getOutputFileForWrite();
  LOG.info("Writing data file: " + outputPath);

  // TODO NEWTEZ Consider making the buffer size configurable. Also consider
  // setting up an in-memory buffer which is occasionally flushed to disk so
  // that the output does not block.

  // TODO NEWTEZ maybe use appropriate counter
  this.writer = new IFile.Writer(conf, rfs, outputPath, keyClass, valClass,
      codec, null, outputBytesCounter);
}

Example #24

Source File: TestTezJobs.java From incubator-tez with Apache License 2.0

5 votes

@Test
public void testNonDefaultFSStagingDir() throws Exception {
  SleepProcessorConfig spConf = new SleepProcessorConfig(1);

  DAG dag = new DAG("TezSleepProcessor");
  Vertex vertex = new Vertex("SleepVertex", new ProcessorDescriptor(
      SleepProcessor.class.getName()).setUserPayload(spConf.toUserPayload()), 1,
      Resource.newInstance(1024, 1));
  dag.addVertex(vertex);

  TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
  Path stagingDir = new Path(TEST_ROOT_DIR, "testNonDefaultFSStagingDir"
      + String.valueOf(random.nextInt(100000)));
  FileSystem localFs = FileSystem.getLocal(tezConf);
  stagingDir = localFs.makeQualified(stagingDir);
  localFs.mkdirs(stagingDir);
  tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir.toString());

  TezClient tezSession = new TezClient("TezSleepProcessor", tezConf, false);
  tezSession.start();

  DAGClient dagClient = tezSession.submitDAG(dag);

  DAGStatus dagStatus = dagClient.getDAGStatus(null);
  while (!dagStatus.isCompleted()) {
    LOG.info("Waiting for job to complete. Sleeping for 500ms." + " Current state: "
        + dagStatus.getState());
    Thread.sleep(500l);
    dagStatus = dagClient.getDAGStatus(null);
  }
  dagStatus = dagClient.getDAGStatus(Sets.newHashSet(StatusGetOpts.GET_COUNTERS));

  assertEquals(DAGStatus.State.SUCCEEDED, dagStatus.getState());
  assertNotNull(dagStatus.getDAGCounters());
  assertNotNull(dagStatus.getDAGCounters().getGroup(FileSystemCounter.class.getName()));
  assertNotNull(dagStatus.getDAGCounters().findCounter(TaskCounter.GC_TIME_MILLIS));
  ExampleDriver.printDAGStatus(dagClient, new String[] { "SleepVertex" }, true, true);
  tezSession.stop();
}

Example #25

Source File: TestPipelinedSorter.java From tez with Apache License 2.0

5 votes

public void basicTest(int partitions, int numKeys, int keySize,
    long initialAvailableMem, int minBlockSize) throws IOException {
  this.numOutputs = partitions; // single output
  conf.setInt(TezRuntimeConfiguration
      .TEZ_RUNTIME_PIPELINED_SORTER_MIN_BLOCK_SIZE_IN_MB, minBlockSize >> 20);
  PipelinedSorter sorter = new PipelinedSorter(this.outputContext, conf, numOutputs,
      initialAvailableMem);

  writeData(sorter, numKeys, keySize);

  //partition stats;
  ReportPartitionStats partitionStats =
      ReportPartitionStats.fromString(conf.get(
      TezRuntimeConfiguration.TEZ_RUNTIME_REPORT_PARTITION_STATS,
      TezRuntimeConfiguration.TEZ_RUNTIME_REPORT_PARTITION_STATS_DEFAULT));
  if (partitionStats.isEnabled()) {
    assertTrue(sorter.getPartitionStats() != null);
  }

  verifyCounters(sorter, outputContext);
  verifyOutputPermissions(outputContext.getUniqueIdentifier());
  Path outputFile = sorter.finalOutputFile;
  FileSystem fs = outputFile.getFileSystem(conf);
  TezCounter finalOutputBytes =
      outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);
  if (finalOutputBytes.getValue() > 0) {
    IFile.Reader reader = new IFile.Reader(fs, outputFile, null, null, null, false, -1, 4096);
    verifyData(reader);
    reader.close();
  }
  //Verify dataset
  verify(outputContext, atLeastOnce()).notifyProgress();
}

Example #26

Source File: TestPipelinedSorter.java From tez with Apache License 2.0

5 votes

private void verifyCounters(PipelinedSorter sorter, OutputContext context) {
  TezCounter numShuffleChunks = context.getCounters().findCounter(TaskCounter.SHUFFLE_CHUNK_COUNT);
  TezCounter additionalSpills =
      context.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT);
  TezCounter additionalSpillBytesWritten =
      context.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN);
  TezCounter additionalSpillBytesRead =
      context.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ);

  if (sorter.isFinalMergeEnabled()) {
    assertTrue(additionalSpills.getValue() == (sorter.getNumSpills() - 1));
    //Number of files served by shuffle-handler
    assertTrue(1 == numShuffleChunks.getValue());
    if (sorter.getNumSpills() > 1) {
      assertTrue(additionalSpillBytesRead.getValue() > 0);
      assertTrue(additionalSpillBytesWritten.getValue() > 0);
    }
  } else {
    assertTrue(0 == additionalSpills.getValue());
    //Number of files served by shuffle-handler
    assertTrue(sorter.getNumSpills() == numShuffleChunks.getValue());
    assertTrue(additionalSpillBytesRead.getValue() == 0);
    assertTrue(additionalSpillBytesWritten.getValue() == 0);
  }

  TezCounter finalOutputBytes =
      context.getCounters().findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);
  assertTrue(finalOutputBytes.getValue() >= 0);

  TezCounter outputBytesWithOverheadCounter = context.getCounters().findCounter
      (TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
  assertTrue(outputBytesWithOverheadCounter.getValue() >= 0);
}

Example #27

Source File: TestDefaultSorter.java From tez with Apache License 2.0

5 votes

private void verifyCounters(DefaultSorter sorter, OutputContext context) {
  TezCounter numShuffleChunks = context.getCounters().findCounter(TaskCounter.SHUFFLE_CHUNK_COUNT);
  TezCounter additionalSpills = context.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT);
  TezCounter additionalSpillBytesWritten = context.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN);
  TezCounter additionalSpillBytesRead = context.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ);

  if (sorter.isFinalMergeEnabled()) {
    assertTrue(additionalSpills.getValue() == (sorter.getNumSpills() - 1));
    //Number of files served by shuffle-handler
    assertTrue(1 == numShuffleChunks.getValue());
    if (sorter.getNumSpills() > 1) {
      assertTrue(additionalSpillBytesRead.getValue() > 0);
      assertTrue(additionalSpillBytesWritten.getValue() > 0);
    }
  } else {
    assertTrue(0 == additionalSpills.getValue());
    //Number of files served by shuffle-handler
    assertTrue(sorter.getNumSpills() == numShuffleChunks.getValue());
    assertTrue(additionalSpillBytesRead.getValue() == 0);
    assertTrue(additionalSpillBytesWritten.getValue() == 0);
  }

  TezCounter finalOutputBytes = context.getCounters().findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);
  assertTrue(finalOutputBytes.getValue() >= 0);

  TezCounter outputBytesWithOverheadCounter = context.getCounters().findCounter
      (TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
  assertTrue(outputBytesWithOverheadCounter.getValue() >= 0);
  verify(context, atLeastOnce()).notifyProgress();
}

Example #28

Source File: TestMultiMRInput.java From tez with Apache License 2.0

5 votes

private void assertReaders(MultiMRInput input, LinkedHashMap<LongWritable, Text> data,
    int expectedReaderCounts, long inputBytes) throws Exception {
  int readerCount = 0;
  int recordCount = 0;
  for (KeyValueReader reader : input.getKeyValueReaders()) {
    readerCount++;
    while (reader.next()) {
      verify(input.getContext(), times(++recordCount + readerCount - 1)).notifyProgress();
      if (data.size() == 0) {
        fail("Found more records than expected");
      }
      Object key = reader.getCurrentKey();
      Object val = reader.getCurrentValue();
      assertEquals(val, data.remove(key));
    }

    try {
      reader.next(); //should throw exception
      fail();
    } catch(IOException e) {
      assertTrue(e.getMessage().contains("For usage, please refer to"));
    }
  }
  long counterValue = input.getContext().getCounters()
      .findCounter(TaskCounter.INPUT_SPLIT_LENGTH_BYTES).getValue();
  assertEquals(inputBytes, counterValue);
  assertEquals(expectedReaderCounts, readerCount);
}

Example #29

Source File: MRInput.java From tez with Apache License 2.0

5 votes

@Override
public List<Event> close() throws IOException {
  mrReader.close();
  long inputRecords = getContext().getCounters()
      .findCounter(TaskCounter.INPUT_RECORDS_PROCESSED).getValue();
  getContext().getStatisticsReporter().reportItemsProcessed(inputRecords);

  return null;
}

Example #30

Source File: MROutput.java From tez with Apache License 2.0

5 votes

@Override
public synchronized List<Event> close() throws IOException {
  flush();
  LOG.info(getContext().getDestinationVertexName() + " closed");
  long outputRecords = getContext().getCounters()
      .findCounter(TaskCounter.OUTPUT_RECORDS).getValue();
  getContext().getStatisticsReporter().reportItemsProcessed(outputRecords);

  return null;
}