Java Code Examples for org.apache.hadoop.mapreduce.Job#getTaskReports()

The following examples show how to use org.apache.hadoop.mapreduce.Job#getTaskReports() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CLI.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Display the information about a job's tasks, of a particular type and
 * in a particular state
 * 
 * @param job the job
 * @param type the type of the task (map/reduce/setup/cleanup)
 * @param state the state of the task 
 * (pending/running/completed/failed/killed)
 */
protected void displayTasks(Job job, String type, String state) 
throws IOException, InterruptedException {
  TaskReport[] reports = job.getTaskReports(TaskType.valueOf(
      org.apache.hadoop.util.StringUtils.toUpperCase(type)));
  for (TaskReport report : reports) {
    TIPStatus status = report.getCurrentStatus();
    if ((state.equalsIgnoreCase("pending") && status ==TIPStatus.PENDING) ||
        (state.equalsIgnoreCase("running") && status ==TIPStatus.RUNNING) ||
        (state.equalsIgnoreCase("completed") && status == TIPStatus.COMPLETE) ||
        (state.equalsIgnoreCase("failed") && status == TIPStatus.FAILED) ||
        (state.equalsIgnoreCase("killed") && status == TIPStatus.KILLED)) {
      printTaskAttempts(report);
    }
  }
}
 
Example 2
Source File: CLI.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * Display the information about a job's tasks, of a particular type and
 * in a particular state
 * 
 * @param job the job
 * @param type the type of the task (map/reduce/setup/cleanup)
 * @param state the state of the task 
 * (pending/running/completed/failed/killed)
 */
protected void displayTasks(Job job, String type, String state) 
throws IOException, InterruptedException {
  TaskReport[] reports = job.getTaskReports(TaskType.valueOf(
      org.apache.hadoop.util.StringUtils.toUpperCase(type)));
  for (TaskReport report : reports) {
    TIPStatus status = report.getCurrentStatus();
    if ((state.equalsIgnoreCase("pending") && status ==TIPStatus.PENDING) ||
        (state.equalsIgnoreCase("running") && status ==TIPStatus.RUNNING) ||
        (state.equalsIgnoreCase("completed") && status == TIPStatus.COMPLETE) ||
        (state.equalsIgnoreCase("failed") && status == TIPStatus.FAILED) ||
        (state.equalsIgnoreCase("killed") && status == TIPStatus.KILLED)) {
      printTaskAttempts(report);
    }
  }
}
 
Example 3
Source File: UpdateColumnJob.java    From indexr with Apache License 2.0 5 votes vote down vote up
public boolean doRun(Config upcolConfig) throws Exception {
    JobConf jobConf = new JobConf(getConf(), UpdateColumnJob.class);
    jobConf.setKeepFailedTaskFiles(false);
    jobConf.setNumReduceTasks(0);
    String jobName = String.format("indexr-upcol-%s-%s-%s",
            upcolConfig.table,
            LocalDateTime.now().format(timeFormatter),
            RandomStringUtils.randomAlphabetic(5));
    jobConf.setJobName(jobName);
    jobConf.set(CONFKEY, JsonUtil.toJson(upcolConfig));
    Path workDir = new Path(jobConf.getWorkingDirectory(), jobName);
    jobConf.setWorkingDirectory(workDir);

    Job job = Job.getInstance(jobConf);
    job.setInputFormatClass(SegmentInputFormat.class);
    job.setMapperClass(UpColSegmentMapper.class);
    job.setJarByClass(UpdateColumnJob.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setMapSpeculativeExecution(false);
    job.setOutputFormatClass(UpColSegmentOutputFormat.class);

    job.submit();
    boolean ok = job.waitForCompletion(true);
    if (!ok) {
        TaskReport[] reports = job.getTaskReports(TaskType.MAP);
        if (reports != null) {
            for (TaskReport report : reports) {
                log.error("Error in task [%s] : %s", report.getTaskId(), Arrays.toString(report.getDiagnostics()));
            }
        }
    }
    return ok;
}
 
Example 4
Source File: IndexerJobDriver.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private PartitionedInputResult buildPartitionedInputData(String uuid, Path tmpPath, TableDescriptor descriptor,
    List<Path> inprogressPathList, String snapshot, Path fileCachePath) throws IOException, ClassNotFoundException,
    InterruptedException {
  Job job = Job.getInstance(getConf(), "Partitioning data for table [" + descriptor.getName() + "]");
  job.getConfiguration().set(BLUR_UPDATE_ID, uuid);

  // Needed for the bloom filter path information.
  BlurOutputFormat.setTableDescriptor(job, descriptor);
  BlurInputFormat.setLocalCachePath(job, fileCachePath);
  ExistingDataIndexLookupMapper.setSnapshot(job, snapshot);

  for (Path p : inprogressPathList) {
    FileInputFormat.addInputPath(job, p);
  }
  Path outputPath = new Path(tmpPath, UUID.randomUUID().toString());
  job.setJarByClass(getClass());
  job.setMapperClass(LookupBuilderMapper.class);
  job.setReducerClass(LookupBuilderReducer.class);

  int shardCount = descriptor.getShardCount();
  job.setNumReduceTasks(shardCount);
  job.setInputFormatClass(SequenceFileInputFormat.class);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(NullWritable.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(BooleanWritable.class);
  FileOutputFormat.setOutputPath(job, outputPath);
  if (job.waitForCompletion(true)) {
    return new PartitionedInputResult(outputPath, job.getCounters(), shardCount, job.getTaskReports(TaskType.REDUCE));
  } else {
    throw new IOException("Partitioning failed!");
  }
}
 
Example 5
Source File: TestClientRedirect.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test
public void testRedirect() throws Exception {

  Configuration conf = new YarnConfiguration();
  conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME);
  conf.set(YarnConfiguration.RM_ADDRESS, RMADDRESS);
  conf.set(JHAdminConfig.MR_HISTORY_ADDRESS, HSHOSTADDRESS);

  // Start the RM.
  RMService rmService = new RMService("test");
  rmService.init(conf);
  rmService.start();

  // Start the AM.
  AMService amService = new AMService();
  amService.init(conf);
  amService.start(conf);

  // Start the HS.
  HistoryService historyService = new HistoryService();
  historyService.init(conf);
  historyService.start(conf);

  LOG.info("services started");

  Cluster cluster = new Cluster(conf);
  org.apache.hadoop.mapreduce.JobID jobID =
    new org.apache.hadoop.mapred.JobID("201103121733", 1);
  org.apache.hadoop.mapreduce.Counters counters =
      cluster.getJob(jobID).getCounters();
  validateCounters(counters);
  Assert.assertTrue(amContact);

  LOG.info("Sleeping for 5 seconds before stop for" +
  " the client socket to not get EOF immediately..");
  Thread.sleep(5000);

  //bring down the AM service
  amService.stop();

  LOG.info("Sleeping for 5 seconds after stop for" +
  		" the server to exit cleanly..");
  Thread.sleep(5000);

  amRestarting = true;

  // Same client
  //results are returned from fake (not started job)
  counters = cluster.getJob(jobID).getCounters();
  Assert.assertEquals(0, counters.countCounters());
  Job job = cluster.getJob(jobID);
  org.apache.hadoop.mapreduce.TaskID taskId =
    new org.apache.hadoop.mapreduce.TaskID(jobID, TaskType.MAP, 0);
  TaskAttemptID tId = new TaskAttemptID(taskId, 0);

  //invoke all methods to check that no exception is thrown
  job.killJob();
  job.killTask(tId);
  job.failTask(tId);
  job.getTaskCompletionEvents(0, 100);
  job.getStatus();
  job.getTaskDiagnostics(tId);
  job.getTaskReports(TaskType.MAP);
  job.getTrackingURL();

  amRestarting = false;
  amService = new AMService();
  amService.init(conf);
  amService.start(conf);
  amContact = false; //reset

  counters = cluster.getJob(jobID).getCounters();
  validateCounters(counters);
  Assert.assertTrue(amContact);

  // Stop the AM. It is not even restarting. So it should be treated as
  // completed.
  amService.stop();

  // Same client
  counters = cluster.getJob(jobID).getCounters();
  validateCounters(counters);
  Assert.assertTrue(hsContact);

  rmService.stop();
  historyService.stop();
}
 
Example 6
Source File: TestClientRedirect.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test
public void testRedirect() throws Exception {

  Configuration conf = new YarnConfiguration();
  conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME);
  conf.set(YarnConfiguration.RM_ADDRESS, RMADDRESS);
  conf.set(JHAdminConfig.MR_HISTORY_ADDRESS, HSHOSTADDRESS);

  // Start the RM.
  RMService rmService = new RMService("test");
  rmService.init(conf);
  rmService.start();

  // Start the AM.
  AMService amService = new AMService();
  amService.init(conf);
  amService.start(conf);

  // Start the HS.
  HistoryService historyService = new HistoryService();
  historyService.init(conf);
  historyService.start(conf);

  LOG.info("services started");

  Cluster cluster = new Cluster(conf);
  org.apache.hadoop.mapreduce.JobID jobID =
    new org.apache.hadoop.mapred.JobID("201103121733", 1);
  org.apache.hadoop.mapreduce.Counters counters =
      cluster.getJob(jobID).getCounters();
  validateCounters(counters);
  Assert.assertTrue(amContact);

  LOG.info("Sleeping for 5 seconds before stop for" +
  " the client socket to not get EOF immediately..");
  Thread.sleep(5000);

  //bring down the AM service
  amService.stop();

  LOG.info("Sleeping for 5 seconds after stop for" +
  		" the server to exit cleanly..");
  Thread.sleep(5000);

  amRestarting = true;

  // Same client
  //results are returned from fake (not started job)
  counters = cluster.getJob(jobID).getCounters();
  Assert.assertEquals(0, counters.countCounters());
  Job job = cluster.getJob(jobID);
  org.apache.hadoop.mapreduce.TaskID taskId =
    new org.apache.hadoop.mapreduce.TaskID(jobID, TaskType.MAP, 0);
  TaskAttemptID tId = new TaskAttemptID(taskId, 0);

  //invoke all methods to check that no exception is thrown
  job.killJob();
  job.killTask(tId);
  job.failTask(tId);
  job.getTaskCompletionEvents(0, 100);
  job.getStatus();
  job.getTaskDiagnostics(tId);
  job.getTaskReports(TaskType.MAP);
  job.getTrackingURL();

  amRestarting = false;
  amService = new AMService();
  amService.init(conf);
  amService.start(conf);
  amContact = false; //reset

  counters = cluster.getJob(jobID).getCounters();
  validateCounters(counters);
  Assert.assertTrue(amContact);

  // Stop the AM. It is not even restarting. So it should be treated as
  // completed.
  amService.stop();

  // Same client
  counters = cluster.getJob(jobID).getCounters();
  validateCounters(counters);
  Assert.assertTrue(hsContact);

  rmService.stop();
  historyService.stop();
}