org.apache.hadoop.tools.rumen.JobStory Java Examples

The following examples show how to use org.apache.hadoop.tools.rumen.JobStory. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SimulatorJobTracker.java    From RDFS with Apache License 2.0 6 votes vote down vote up
@Override
public synchronized JobStatus submitJob(JobID jobId) throws IOException {
  boolean loggingEnabled = LOG.isDebugEnabled();
  if (loggingEnabled) {
    LOG.debug("submitJob for jobname = " + jobId);
  }
  if (jobs.containsKey(jobId)) {
    // job already running, don't start twice
    if (loggingEnabled) {
      LOG.debug("Job '" + jobId.getId() + "' already present ");
    }
    return jobs.get(jobId).getStatus();
  }
  JobStory jobStory = SimulatorJobCache.get(jobId);
  if (jobStory == null) {
    throw new IllegalArgumentException("Job not found in SimulatorJobCache: "+jobId);
  }
  validateAndSetClock(jobStory.getSubmissionTime());
  
  SimulatorJobInProgress job = new SimulatorJobInProgress(jobId, this,
                                                          this.conf, 
                                                          jobStory);
  return addJob(jobId, job);
}
 
Example #2
Source File: TestGridMixClasses.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Test (timeout=30000)
public void testCompareGridmixJob() throws Exception {
  Configuration conf = new Configuration();
  Path outRoot = new Path("target");
  JobStory jobDesc = mock(JobStory.class);
  when(jobDesc.getName()).thenReturn("JobName");
  when(jobDesc.getJobConf()).thenReturn(new JobConf(conf));
  UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
  GridmixJob j1 = new LoadJob(conf, 1000L, jobDesc, outRoot, ugi, 0);
  GridmixJob j2 = new LoadJob(conf, 1000L, jobDesc, outRoot, ugi, 0);
  GridmixJob j3 = new LoadJob(conf, 1000L, jobDesc, outRoot, ugi, 1);
  GridmixJob j4 = new LoadJob(conf, 1000L, jobDesc, outRoot, ugi, 1);

  assertTrue(j1.equals(j2));
  assertEquals(0, j1.compareTo(j2));
  // Only one parameter matters
  assertFalse(j1.equals(j3));
  // compare id and submissionMillis
  assertEquals(-1, j1.compareTo(j3));
  assertEquals(-1, j1.compareTo(j4));

}
 
Example #3
Source File: TestSleepJob.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Test  (timeout=600000)
public void testMapTasksOnlySleepJobs() throws Exception {
  Configuration configuration = GridmixTestUtils.mrvl.getConfig();

  DebugJobProducer jobProducer = new DebugJobProducer(5, configuration);
  configuration.setBoolean(SleepJob.SLEEPJOB_MAPTASK_ONLY, true);

  UserGroupInformation ugi = UserGroupInformation.getLoginUser();
  JobStory story;
  int seq = 1;
  while ((story = jobProducer.getNextJob()) != null) {
    GridmixJob gridmixJob = JobCreator.SLEEPJOB.createGridmixJob(configuration, 0,
            story, new Path("ignored"), ugi, seq++);
    gridmixJob.buildSplits(null);
    Job job = gridmixJob.call();
    assertEquals(0, job.getNumReduceTasks());
  }
  jobProducer.close();
  assertEquals(6, seq);
}
 
Example #4
Source File: TestSleepJob.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private void testRandomLocation(int locations, int njobs,
                                UserGroupInformation ugi) throws Exception {
  Configuration configuration = new Configuration();

  DebugJobProducer jobProducer = new DebugJobProducer(njobs, configuration);
  Configuration jconf = GridmixTestUtils.mrvl.getConfig();
  jconf.setInt(JobCreator.SLEEPJOB_RANDOM_LOCATIONS, locations);

  JobStory story;
  int seq = 1;
  while ((story = jobProducer.getNextJob()) != null) {
    GridmixJob gridmixJob = JobCreator.SLEEPJOB.createGridmixJob(jconf, 0,
            story, new Path("ignored"), ugi, seq++);
    gridmixJob.buildSplits(null);
    List<InputSplit> splits = new SleepJob.SleepInputFormat()
            .getSplits(gridmixJob.getJob());
    for (InputSplit split : splits) {
      assertEquals(locations, split.getLocations().length);
    }
  }
  jobProducer.close();
}
 
Example #5
Source File: TestSleepJob.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Test  (timeout=600000)
public void testMapTasksOnlySleepJobs() throws Exception {
  Configuration configuration = GridmixTestUtils.mrvl.getConfig();

  DebugJobProducer jobProducer = new DebugJobProducer(5, configuration);
  configuration.setBoolean(SleepJob.SLEEPJOB_MAPTASK_ONLY, true);

  UserGroupInformation ugi = UserGroupInformation.getLoginUser();
  JobStory story;
  int seq = 1;
  while ((story = jobProducer.getNextJob()) != null) {
    GridmixJob gridmixJob = JobCreator.SLEEPJOB.createGridmixJob(configuration, 0,
            story, new Path("ignored"), ugi, seq++);
    gridmixJob.buildSplits(null);
    Job job = gridmixJob.call();
    assertEquals(0, job.getNumReduceTasks());
  }
  jobProducer.close();
  assertEquals(6, seq);
}
 
Example #6
Source File: TestSleepJob.java    From big-c with Apache License 2.0 6 votes vote down vote up
private void testRandomLocation(int locations, int njobs,
                                UserGroupInformation ugi) throws Exception {
  Configuration configuration = new Configuration();

  DebugJobProducer jobProducer = new DebugJobProducer(njobs, configuration);
  Configuration jconf = GridmixTestUtils.mrvl.getConfig();
  jconf.setInt(JobCreator.SLEEPJOB_RANDOM_LOCATIONS, locations);

  JobStory story;
  int seq = 1;
  while ((story = jobProducer.getNextJob()) != null) {
    GridmixJob gridmixJob = JobCreator.SLEEPJOB.createGridmixJob(jconf, 0,
            story, new Path("ignored"), ugi, seq++);
    gridmixJob.buildSplits(null);
    List<InputSplit> splits = new SleepJob.SleepInputFormat()
            .getSplits(gridmixJob.getJob());
    for (InputSplit split : splits) {
      assertEquals(locations, split.getLocations().length);
    }
  }
  jobProducer.close();
}
 
Example #7
Source File: Statistics.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * Generates a job stats.
 */
public static JobStats generateJobStats(Job job, JobStory jobdesc) {
  int seq = GridmixJob.getJobSeqId(job);
  // bail out if job description is missing for a job to be simulated
  if (seq >= 0 && jobdesc == null) {
    throw new IllegalArgumentException("JobStory not available for job " 
                                       + job.getJobID());
  }
  
  int maps = -1;
  int reds = -1;
  if (jobdesc != null) {
    // Note that the ZombieJob will return a >= 0 value
    maps = jobdesc.getNumberMaps();
    reds = jobdesc.getNumberReduces();
  }
  return new JobStats(maps, reds, job);
}
 
Example #8
Source File: TestGridMixClasses.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Test (timeout=30000)
public void testCompareGridmixJob() throws Exception {
  Configuration conf = new Configuration();
  Path outRoot = new Path("target");
  JobStory jobDesc = mock(JobStory.class);
  when(jobDesc.getName()).thenReturn("JobName");
  when(jobDesc.getJobConf()).thenReturn(new JobConf(conf));
  UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
  GridmixJob j1 = new LoadJob(conf, 1000L, jobDesc, outRoot, ugi, 0);
  GridmixJob j2 = new LoadJob(conf, 1000L, jobDesc, outRoot, ugi, 0);
  GridmixJob j3 = new LoadJob(conf, 1000L, jobDesc, outRoot, ugi, 1);
  GridmixJob j4 = new LoadJob(conf, 1000L, jobDesc, outRoot, ugi, 1);

  assertTrue(j1.equals(j2));
  assertEquals(0, j1.compareTo(j2));
  // Only one parameter matters
  assertFalse(j1.equals(j3));
  // compare id and submissionMillis
  assertEquals(-1, j1.compareTo(j3));
  assertEquals(-1, j1.compareTo(j4));

}
 
Example #9
Source File: SimulatorJobStoryProducer.java    From RDFS with Apache License 2.0 6 votes vote down vote up
/**
 * Filter some jobs being fed to the simulator. For now, we filter out killed
 * jobs to facilitate debugging.
 * 
 * @throws IOException
 */
private JobStory getNextJobFiltered() throws IOException {
  while (true) {
    ZombieJob job = producer.getNextJob();
    if (job == null) {
      return null;
    }
    if (job.getOutcome() == Pre21JobHistoryConstants.Values.KILLED) {
      continue;
    }
    if (job.getNumberMaps() == 0) {
      continue;
    }
    if (job.getNumLoggedMaps() == 0) {
      continue;
    }
    return job;
  }
}
 
Example #10
Source File: Statistics.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Generates a job stats.
 */
public static JobStats generateJobStats(Job job, JobStory jobdesc) {
  int seq = GridmixJob.getJobSeqId(job);
  // bail out if job description is missing for a job to be simulated
  if (seq >= 0 && jobdesc == null) {
    throw new IllegalArgumentException("JobStory not available for job " 
                                       + job.getJobID());
  }
  
  int maps = -1;
  int reds = -1;
  if (jobdesc != null) {
    // Note that the ZombieJob will return a >= 0 value
    maps = jobdesc.getNumberMaps();
    reds = jobdesc.getNumberReduces();
  }
  return new JobStats(maps, reds, job);
}
 
Example #11
Source File: DebugJobProducer.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Override
public JobStory getNextJob() throws IOException {
  if (numJobs.getAndDecrement() > 0) {
    final MockJob ret = new MockJob(conf);
    submitted.add(ret);
    return ret;
  }
  return null;
}
 
Example #12
Source File: DebugJobProducer.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public DebugJobProducer(int numJobs, Configuration conf) {
  super();
  MockJob.reset();
  this.conf = conf;
  this.numJobs = new AtomicInteger(numJobs);
  this.submitted = new ArrayList<JobStory>();
}
 
Example #13
Source File: JobFactory.java    From big-c with Apache License 2.0 5 votes vote down vote up
private JobStory getNextJobFromTrace() throws IOException {
  JobStory story = jobProducer.getNextJob();
  if (story != null) {
    ++numJobsInTrace;
  }
  return story;
}
 
Example #14
Source File: TestGridmixSubmission.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Verifies that the given {@code JobStory} corresponds to the checked-in
 * WordCount {@code JobStory}. The verification is effected via JUnit
 * assertions.
 *
 * @param js the candidate JobStory.
 */
private void verifyWordCountJobStory(JobStory js) {
  assertNotNull("Null JobStory", js);
  String expectedJobStory = "WordCount:johndoe:default:1285322645148:3:1";
  String actualJobStory = js.getName() + ":" + js.getUser() + ":"
          + js.getQueueName() + ":" + js.getSubmissionTime() + ":"
          + js.getNumberMaps() + ":" + js.getNumberReduces();
  assertEquals("Unexpected JobStory", expectedJobStory, actualJobStory);
}
 
Example #15
Source File: JobFactory.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private JobStory getNextJobFromTrace() throws IOException {
  JobStory story = jobProducer.getNextJob();
  if (story != null) {
    ++numJobsInTrace;
  }
  return story;
}
 
Example #16
Source File: SleepJob.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public SleepJob(Configuration conf, long submissionMillis, JobStory jobdesc,
    Path outRoot, UserGroupInformation ugi, int seq, int numLocations,
    String[] hosts) throws IOException {
  super(conf, submissionMillis, jobdesc, outRoot, ugi, seq);
  this.fakeLocations = numLocations;
  this.hosts = hosts.clone();
  this.selector = (fakeLocations > 0)? new Selector(hosts.length, (float) fakeLocations
      / hosts.length, rand.get()) : null;
  this.mapTasksOnly = conf.getBoolean(SLEEPJOB_MAPTASK_ONLY, false);
  mapMaxSleepTime = conf.getLong(GRIDMIX_SLEEP_MAX_MAP_TIME, Long.MAX_VALUE);
  reduceMaxSleepTime = conf.getLong(GRIDMIX_SLEEP_MAX_REDUCE_TIME,
      Long.MAX_VALUE);
}
 
Example #17
Source File: DistributedCacheEmulator.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Create the list of unique distributed cache files needed for all the
 * simulated jobs and write the list to a special file.
 * @param jsp job story producer for the trace
 * @return exit code
 * @throws IOException
 */
private int buildDistCacheFilesList(JobStoryProducer jsp) throws IOException {
  // Read all the jobs from the trace file and build the list of unique
  // distributed cache files.
  JobStory jobStory;
  while ((jobStory = jsp.getNextJob()) != null) {
    if (jobStory.getOutcome() == Pre21JobHistoryConstants.Values.SUCCESS && 
       jobStory.getSubmissionTime() >= 0) {
      updateHDFSDistCacheFilesList(jobStory);
    }
  }
  jsp.close();

  return writeDistCacheFilesList();
}
 
Example #18
Source File: DistributedCacheEmulator.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Create the list of unique distributed cache files needed for all the
 * simulated jobs and write the list to a special file.
 * @param jsp job story producer for the trace
 * @return exit code
 * @throws IOException
 */
private int buildDistCacheFilesList(JobStoryProducer jsp) throws IOException {
  // Read all the jobs from the trace file and build the list of unique
  // distributed cache files.
  JobStory jobStory;
  while ((jobStory = jsp.getNextJob()) != null) {
    if (jobStory.getOutcome() == Pre21JobHistoryConstants.Values.SUCCESS && 
       jobStory.getSubmissionTime() >= 0) {
      updateHDFSDistCacheFilesList(jobStory);
    }
  }
  jsp.close();

  return writeDistCacheFilesList();
}
 
Example #19
Source File: SleepJob.java    From big-c with Apache License 2.0 5 votes vote down vote up
public SleepJob(Configuration conf, long submissionMillis, JobStory jobdesc,
    Path outRoot, UserGroupInformation ugi, int seq, int numLocations,
    String[] hosts) throws IOException {
  super(conf, submissionMillis, jobdesc, outRoot, ugi, seq);
  this.fakeLocations = numLocations;
  this.hosts = hosts.clone();
  this.selector = (fakeLocations > 0)? new Selector(hosts.length, (float) fakeLocations
      / hosts.length, rand.get()) : null;
  this.mapTasksOnly = conf.getBoolean(SLEEPJOB_MAPTASK_ONLY, false);
  mapMaxSleepTime = conf.getLong(GRIDMIX_SLEEP_MAX_MAP_TIME, Long.MAX_VALUE);
  reduceMaxSleepTime = conf.getLong(GRIDMIX_SLEEP_MAX_REDUCE_TIME,
      Long.MAX_VALUE);
}
 
Example #20
Source File: DebugJobProducer.java    From big-c with Apache License 2.0 5 votes vote down vote up
public DebugJobProducer(int numJobs, Configuration conf) {
  super();
  MockJob.reset();
  this.conf = conf;
  this.numJobs = new AtomicInteger(numJobs);
  this.submitted = new ArrayList<JobStory>();
}
 
Example #21
Source File: DebugJobProducer.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Override
public JobStory getNextJob() throws IOException {
  if (numJobs.getAndDecrement() > 0) {
    final MockJob ret = new MockJob(conf);
    submitted.add(ret);
    return ret;
  }
  return null;
}
 
Example #22
Source File: TestGridmixSubmission.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Verifies that the given {@code JobStory} corresponds to the checked-in
 * WordCount {@code JobStory}. The verification is effected via JUnit
 * assertions.
 *
 * @param js the candidate JobStory.
 */
private void verifyWordCountJobStory(JobStory js) {
  assertNotNull("Null JobStory", js);
  String expectedJobStory = "WordCount:johndoe:default:1285322645148:3:1";
  String actualJobStory = js.getName() + ":" + js.getUser() + ":"
          + js.getQueueName() + ":" + js.getSubmissionTime() + ":"
          + js.getNumberMaps() + ":" + js.getNumberReduces();
  assertEquals("Unexpected JobStory", expectedJobStory, actualJobStory);
}
 
Example #23
Source File: SimulatorJobClient.java    From RDFS with Apache License 2.0 5 votes vote down vote up
@Override
public List<SimulatorEvent> init(long when) throws IOException {
  JobStory job = jobStoryProducer.getNextJob();
  if (job.getSubmissionTime() != when) {
    throw new IOException("Inconsistent submission time for the first job: "
        + when + " != " + job.getSubmissionTime()+".");
  }
  JobSubmissionEvent event = new JobSubmissionEvent(this, when, job);
  return Collections.<SimulatorEvent> singletonList(event);
}
 
Example #24
Source File: SimulatorJobClient.java    From RDFS with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("deprecation")
private JobStatus submitJob(JobStory job)
    throws IOException, InterruptedException {
  // honor the JobID from JobStory first.
  JobID jobId = job.getJobID();
  if (jobId == null) {
    // If not available, obtain JobID from JobTracker.
    jobId = jobTracker.getNewJobId();
  }
  
  SimulatorJobCache.put(org.apache.hadoop.mapred.JobID.downgrade(jobId), job);
  return jobTracker.submitJob(org.apache.hadoop.mapred.JobID.downgrade(jobId));
}
 
Example #25
Source File: SimulatorJobStoryProducer.java    From RDFS with Apache License 2.0 5 votes vote down vote up
@Override
public JobStory getNextJob() throws IOException {
  JobStory job = getNextJobFiltered();
  if (job == null)
    return null;
  if (firstJob) {
    firstJob = false;
    relativeTime = job.getSubmissionTime() - firstJobStartTime;
  }

  return new SimulatorJobStory(job, job.getSubmissionTime() - relativeTime);
}
 
Example #26
Source File: TestSimulatorJobClient.java    From RDFS with Apache License 2.0 5 votes vote down vote up
@Override
public JobStory getNextJob() {
  if (index >= times.length) {
    return null;
  }
  return jobs.get(index++);
}
 
Example #27
Source File: MockSimulatorEngine.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public MockSimulatorEngine(int nJobs,
    @SuppressWarnings("unused") int nTrackers) {
  super();
  fixedJobs = nJobs;
  jobs = new HashMap<JobID, JobStory>();
  submittedJobs = new HashSet<JobID>();
  completedJobs = new HashSet<JobID>();
}
 
Example #28
Source File: GridmixJob.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public GridmixJob(Configuration conf, long submissionMillis,
    JobStory jobdesc, Path outRoot, int seq) throws IOException {
  ((StringBuilder)nameFormat.get().out()).setLength(JOBNAME.length());
  job = new Job(conf, nameFormat.get().format("%05d", seq).toString());
  submissionTimeNanos = TimeUnit.NANOSECONDS.convert(
      submissionMillis, TimeUnit.MILLISECONDS);
  this.jobdesc = jobdesc;
  this.seq = seq;
  outdir = new Path(outRoot, "" + seq);
}
 
Example #29
Source File: JobFactory.java    From RDFS with Apache License 2.0 5 votes vote down vote up
private JobStory getNextJobFiltered() throws IOException {
  JobStory job;
  do {
    job = jobProducer.getNextJob();
  } while (job != null
      && (job.getOutcome() != Pre21JobHistoryConstants.Values.SUCCESS ||
          job.getSubmissionTime() < 0));
  return null == job ? null : new FilterJobStory(job) {
      @Override
      public TaskInfo getTaskInfo(TaskType taskType, int taskNumber) {
        return new MinTaskInfo(this.job.getTaskInfo(taskType, taskNumber));
      }
    };
}
 
Example #30
Source File: DebugJobFactory.java    From RDFS with Apache License 2.0 5 votes vote down vote up
@Override
public JobStory getNextJob() throws IOException {
  if (numJobs.getAndDecrement() > 0) {
    final MockJob ret = new MockJob(conf);
    submitted.add(ret);
    return ret;
  }
  return null;
}