Java Code Examples for org.apache.hadoop.mapreduce.Job#addFileToClassPath()

The following examples show how to use org.apache.hadoop.mapreduce.Job#addFileToClassPath() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestMRAMWithNonNormalizedCapabilities.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * To ensure nothing broken after we removed normalization 
 * from the MRAM side
 * @throws Exception
 */
@Test
public void testJobWithNonNormalizedCapabilities() throws Exception {
  if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
    LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
              + " not found. Not running test.");
    return;
  }

  JobConf jobConf = new JobConf(mrCluster.getConfig());
  jobConf.setInt("mapreduce.map.memory.mb", 700);
  jobConf.setInt("mapred.reduce.memory.mb", 1500);

  SleepJob sleepJob = new SleepJob();
  sleepJob.setConf(jobConf);
  Job job = sleepJob.createJob(3, 2, 1000, 1, 500, 1);
  job.setJarByClass(SleepJob.class);
  job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
  job.submit();
  boolean completed = job.waitForCompletion(true);
  Assert.assertTrue("Job should be completed", completed);
  Assert.assertEquals("Job should be finished successfully", 
                  JobStatus.State.SUCCEEDED, job.getJobState());
}
 
Example 2
Source File: MapReduceJobConfiguration.java    From datawave with Apache License 2.0 6 votes vote down vote up
private void addArchiveFile(File source, Pattern pattern, Path classpath, String jobId, Job job, FileSystem fs) throws IOException {
    if (!source.isFile() || !source.canRead()) {
        throw new IOException(source + " is not a regular file.");
    }
    try (JarInputStream jarInputStream = new JarInputStream(new FileInputStream(source))) {
        // Check each file in the archive, and if it matches the supplied pattern, then copy it to HDFS and add it to the classpath.
        for (JarEntry jarEntry = jarInputStream.getNextJarEntry(); jarEntry != null; jarEntry = jarInputStream.getNextJarEntry()) {
            if (pattern.matcher(jarEntry.getName()).matches()) {
                log.trace("Adding {} to the classpath for job {}", jarEntry.getName(), jobId);
                int slashIdx = jarEntry.getName().lastIndexOf('/');
                String outputFileName = jarEntry.getName().substring(slashIdx + 1);
                Path cachedJarPath = new Path(classpath, outputFileName);
                try (FSDataOutputStream hadoopOutputStream = fs.create(cachedJarPath, false)) {
                    ByteStreams.copy(jarInputStream, hadoopOutputStream);
                }
                job.addFileToClassPath(cachedJarPath);
            } else {
                log.trace("Skipping {} since it does not match the pattern {}", jarEntry.getName(), pattern.pattern());
            }
        }
    }
}
 
Example 3
Source File: DistCacheConfigurer.java    From titan1withtp3.1 with Apache License 2.0 6 votes vote down vote up
@Override
public void configure(Job job) throws IOException {

    Configuration conf = job.getConfiguration();
    FileSystem localFS = FileSystem.getLocal(conf);
    FileSystem jobFS = FileSystem.get(conf);

    for (Path p : getLocalPaths()) {
        Path stagedPath = uploadFileIfNecessary(localFS, p, jobFS);
        // Calling this method decompresses the archive and makes Hadoop
        // handle its classfiles individually.  This leads to crippling
        // overhead times (10+ seconds) even with the LocalJobRunner
        // courtesy of o.a.h.yarn.util.FSDownload.changePermissions
        // copying and chmodding each classfile copy file individually.
        //job.addArchiveToClassPath(p);
        // Just add the compressed archive instead:
        job.addFileToClassPath(stagedPath);
    }

    // We don't really need to set a mapred job jar here,
    // but doing so suppresses a warning
    String mj = getMapredJar();
    if (null != mj)
        job.setJar(mj);
}
 
Example 4
Source File: TestMiniMRChildTask.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * Launch tests 
 * @param conf Configuration of the mapreduce job.
 * @param inDir input path
 * @param outDir output path
 * @param input Input text
 * @throws IOException
 */
public void launchTest(JobConf conf,
                       Path inDir,
                       Path outDir,
                       String input)
throws IOException, InterruptedException, ClassNotFoundException {

  FileSystem outFs = outDir.getFileSystem(conf);
  
  // Launch job with default option for temp dir. 
  // i.e. temp dir is ./tmp 
  Job job = Job.getInstance(conf);
  job.addFileToClassPath(APP_JAR);
  job.setJarByClass(TestMiniMRChildTask.class);
  job.setMaxMapAttempts(1); // speed up failures
  job.waitForCompletion(true);
  boolean succeeded = job.waitForCompletion(true);
  assertTrue(succeeded);
  outFs.delete(outDir, true);
}
 
Example 5
Source File: TestMRAMWithNonNormalizedCapabilities.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * To ensure nothing broken after we removed normalization 
 * from the MRAM side
 * @throws Exception
 */
@Test
public void testJobWithNonNormalizedCapabilities() throws Exception {
  if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
    LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
              + " not found. Not running test.");
    return;
  }

  JobConf jobConf = new JobConf(mrCluster.getConfig());
  jobConf.setInt("mapreduce.map.memory.mb", 700);
  jobConf.setInt("mapred.reduce.memory.mb", 1500);

  SleepJob sleepJob = new SleepJob();
  sleepJob.setConf(jobConf);
  Job job = sleepJob.createJob(3, 2, 1000, 1, 500, 1);
  job.setJarByClass(SleepJob.class);
  job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
  job.submit();
  boolean completed = job.waitForCompletion(true);
  Assert.assertTrue("Job should be completed", completed);
  Assert.assertEquals("Job should be finished successfully", 
                  JobStatus.State.SUCCEEDED, job.getJobState());
}
 
Example 6
Source File: TestMRJobs.java    From big-c with Apache License 2.0 5 votes vote down vote up
protected Job runFailingMapperJob()
throws IOException, InterruptedException, ClassNotFoundException {
  Configuration myConf = new Configuration(mrCluster.getConfig());
  myConf.setInt(MRJobConfig.NUM_MAPS, 1);
  myConf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 2); //reduce the number of attempts

  Job job = Job.getInstance(myConf);

  job.setJarByClass(FailingMapper.class);
  job.setJobName("failmapper");
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setInputFormatClass(RandomInputFormat.class);
  job.setOutputFormatClass(TextOutputFormat.class);
  job.setMapperClass(FailingMapper.class);
  job.setNumReduceTasks(0);
  
  FileOutputFormat.setOutputPath(job, new Path(OUTPUT_ROOT_DIR,
    "failmapper-output"));
  job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
  job.submit();
  String trackingUrl = job.getTrackingURL();
  String jobId = job.getJobID().toString();
  boolean succeeded = job.waitForCompletion(true);
  Assert.assertFalse(succeeded);
  Assert.assertTrue("Tracking URL was " + trackingUrl +
                    " but didn't Match Job ID " + jobId ,
        trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
  return job;
}
 
Example 7
Source File: TestSpeculativeExecution.java    From big-c with Apache License 2.0 5 votes vote down vote up
private Job runSpecTest(boolean mapspec, boolean redspec)
    throws IOException, ClassNotFoundException, InterruptedException {

  Path first = createTempFile("specexec_map_input1", "a\nz");
  Path secnd = createTempFile("specexec_map_input2", "a\nz");

  Configuration conf = mrCluster.getConfig();
  conf.setBoolean(MRJobConfig.MAP_SPECULATIVE,mapspec);
  conf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE,redspec);
  conf.setClass(MRJobConfig.MR_AM_TASK_ESTIMATOR,
          TestSpecEstimator.class,
          TaskRuntimeEstimator.class);

  Job job = Job.getInstance(conf);
  job.setJarByClass(TestSpeculativeExecution.class);
  job.setMapperClass(SpeculativeMapper.class);
  job.setReducerClass(SpeculativeReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  job.setNumReduceTasks(2);
  FileInputFormat.setInputPaths(job, first);
  FileInputFormat.addInputPath(job, secnd);
  FileOutputFormat.setOutputPath(job, TEST_OUT_DIR);

  // Delete output directory if it exists.
  try {
    localFs.delete(TEST_OUT_DIR,true);
  } catch (IOException e) {
    // ignore
  }

  // Creates the Job Configuration
  job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
  job.setMaxMapAttempts(2);

  job.submit();

  return job;
}
 
Example 8
Source File: TestMRWithDistributedCache.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private void testWithConf(Configuration conf) throws IOException,
    InterruptedException, ClassNotFoundException, URISyntaxException {
  // Create a temporary file of length 1.
  Path first = createTempFile("distributed.first", "x");
  // Create two jars with a single file inside them.
  Path second =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.second.jar"), 2);
  Path third =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.third.jar"), 3);
  Path fourth =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.fourth.jar"), 4);


  Job job = Job.getInstance(conf);
  job.setMapperClass(DistributedCacheCheckerMapper.class);
  job.setReducerClass(DistributedCacheCheckerReducer.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  FileInputFormat.setInputPaths(job, first);
  // Creates the Job Configuration
  job.addCacheFile(
    new URI(first.toUri().toString() + "#distributed.first.symlink"));
  job.addFileToClassPath(second);
  job.addArchiveToClassPath(third);
  job.addCacheArchive(fourth.toUri());
  job.setMaxMapAttempts(1); // speed up failures

  job.submit();
  assertTrue(job.waitForCompletion(false));
}
 
Example 9
Source File: TestSpeculativeExecution.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private Job runSpecTest(boolean mapspec, boolean redspec)
    throws IOException, ClassNotFoundException, InterruptedException {

  Path first = createTempFile("specexec_map_input1", "a\nz");
  Path secnd = createTempFile("specexec_map_input2", "a\nz");

  Configuration conf = mrCluster.getConfig();
  conf.setBoolean(MRJobConfig.MAP_SPECULATIVE,mapspec);
  conf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE,redspec);
  conf.setClass(MRJobConfig.MR_AM_TASK_ESTIMATOR,
          TestSpecEstimator.class,
          TaskRuntimeEstimator.class);

  Job job = Job.getInstance(conf);
  job.setJarByClass(TestSpeculativeExecution.class);
  job.setMapperClass(SpeculativeMapper.class);
  job.setReducerClass(SpeculativeReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  job.setNumReduceTasks(2);
  FileInputFormat.setInputPaths(job, first);
  FileInputFormat.addInputPath(job, secnd);
  FileOutputFormat.setOutputPath(job, TEST_OUT_DIR);

  // Delete output directory if it exists.
  try {
    localFs.delete(TEST_OUT_DIR,true);
  } catch (IOException e) {
    // ignore
  }

  // Creates the Job Configuration
  job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
  job.setMaxMapAttempts(2);

  job.submit();

  return job;
}
 
Example 10
Source File: TestMRJobs.java    From hadoop with Apache License 2.0 5 votes vote down vote up
protected Job runFailingMapperJob()
throws IOException, InterruptedException, ClassNotFoundException {
  Configuration myConf = new Configuration(mrCluster.getConfig());
  myConf.setInt(MRJobConfig.NUM_MAPS, 1);
  myConf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 2); //reduce the number of attempts

  Job job = Job.getInstance(myConf);

  job.setJarByClass(FailingMapper.class);
  job.setJobName("failmapper");
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setInputFormatClass(RandomInputFormat.class);
  job.setOutputFormatClass(TextOutputFormat.class);
  job.setMapperClass(FailingMapper.class);
  job.setNumReduceTasks(0);
  
  FileOutputFormat.setOutputPath(job, new Path(OUTPUT_ROOT_DIR,
    "failmapper-output"));
  job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
  job.submit();
  String trackingUrl = job.getTrackingURL();
  String jobId = job.getJobID().toString();
  boolean succeeded = job.waitForCompletion(true);
  Assert.assertFalse(succeeded);
  Assert.assertTrue("Tracking URL was " + trackingUrl +
                    " but didn't Match Job ID " + jobId ,
        trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
  return job;
}
 
Example 11
Source File: MapReduceJobConfiguration.java    From datawave with Apache License 2.0 5 votes vote down vote up
protected void addSingleFile(String source, Path destination, String jobId, Job job, FileSystem fs) throws IOException {
    try (FSDataOutputStream hadoopOutputStream = fs.create(destination, false); InputStream urlInputStream = new URL(source).openStream()) {
        
        // Copy raw file to hadoop
        if (!(urlInputStream instanceof JarInputStream)) {
            ByteStreams.copy(urlInputStream, hadoopOutputStream);
        }
        // Copy jar file to hadoop - Wildfly VFS returns files as JarInputStreams
        else {
            JarInputStream jarInputStream = (JarInputStream) urlInputStream;
            try (JarOutputStream jarOutputStream = new JarOutputStream(hadoopOutputStream)) {
                for (JarEntry jarEntry = jarInputStream.getNextJarEntry(); jarEntry != null; jarEntry = jarInputStream.getNextJarEntry()) {
                    jarOutputStream.putNextEntry(jarEntry);
                    ByteStreams.copy(urlInputStream, jarOutputStream);
                }
            }
        }
        
        // Add the jar to the job classpath
        log.trace("Adding {} to the classpath for job {}", source, jobId);
        job.addFileToClassPath(destination);
    } catch (IOException e) {
        // If the file already exists, ignore error
        if (!e.getMessage().endsWith("already exists"))
            throw e;
    }
}
 
Example 12
Source File: TestMRJobs.java    From hadoop with Apache License 2.0 4 votes vote down vote up
public void _testDistributedCache(String jobJarPath) throws Exception {
  if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
    LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
         + " not found. Not running test.");
    return;
  }

  // Create a temporary file of length 1.
  Path first = createTempFile("distributed.first", "x");
  // Create two jars with a single file inside them.
  Path second =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.second.jar"), 2);
  Path third =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.third.jar"), 3);
  Path fourth =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.fourth.jar"), 4);

  Job job = Job.getInstance(mrCluster.getConfig());
  
  // Set the job jar to a new "dummy" jar so we can check that its extracted 
  // properly
  job.setJar(jobJarPath);
  // Because the job jar is a "dummy" jar, we need to include the jar with
  // DistributedCacheChecker or it won't be able to find it
  Path distributedCacheCheckerJar = new Path(
          JarFinder.getJar(DistributedCacheChecker.class));
  job.addFileToClassPath(distributedCacheCheckerJar.makeQualified(
          localFs.getUri(), distributedCacheCheckerJar.getParent()));
  
  job.setMapperClass(DistributedCacheChecker.class);
  job.setOutputFormatClass(NullOutputFormat.class);

  FileInputFormat.setInputPaths(job, first);
  // Creates the Job Configuration
  job.addCacheFile(
      new URI(first.toUri().toString() + "#distributed.first.symlink"));
  job.addFileToClassPath(second);
  // The AppMaster jar itself
  job.addFileToClassPath(
          APP_JAR.makeQualified(localFs.getUri(), APP_JAR.getParent())); 
  job.addArchiveToClassPath(third);
  job.addCacheArchive(fourth.toUri());
  job.setMaxMapAttempts(1); // speed up failures

  job.submit();
  String trackingUrl = job.getTrackingURL();
  String jobId = job.getJobID().toString();
  Assert.assertTrue(job.waitForCompletion(false));
  Assert.assertTrue("Tracking URL was " + trackingUrl +
                    " but didn't Match Job ID " + jobId ,
        trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
}
 
Example 13
Source File: TestMRJobsWithHistoryService.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test (timeout = 90000)
public void testJobHistoryData() throws IOException, InterruptedException,
    AvroRemoteException, ClassNotFoundException {
  if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
    LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
        + " not found. Not running test.");
    return;
  }


  
  SleepJob sleepJob = new SleepJob();
  sleepJob.setConf(mrCluster.getConfig());
  // Job with 3 maps and 2 reduces
  Job job = sleepJob.createJob(3, 2, 1000, 1, 500, 1);
  job.setJarByClass(SleepJob.class);
  job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
  job.waitForCompletion(true);
  Counters counterMR = job.getCounters();
  JobId jobId = TypeConverter.toYarn(job.getJobID());
  ApplicationId appID = jobId.getAppId();
  int pollElapsed = 0;
  while (true) {
    Thread.sleep(1000);
    pollElapsed += 1000;

    if (TERMINAL_RM_APP_STATES.contains(
        mrCluster.getResourceManager().getRMContext().getRMApps().get(appID)
        .getState())) {
      break;
    }

    if (pollElapsed >= 60000) {
      LOG.warn("application did not reach terminal state within 60 seconds");
      break;
    }
  }
  Assert.assertEquals(RMAppState.FINISHED, mrCluster.getResourceManager()
    .getRMContext().getRMApps().get(appID).getState());
  Counters counterHS = job.getCounters();
  //TODO the Assert below worked. need to check
  //Should we compare each field or convert to V2 counter and compare
  LOG.info("CounterHS " + counterHS);
  LOG.info("CounterMR " + counterMR);
  Assert.assertEquals(counterHS, counterMR);
  
  HSClientProtocol historyClient = instantiateHistoryProxy();
  GetJobReportRequest gjReq = Records.newRecord(GetJobReportRequest.class);
  gjReq.setJobId(jobId);
  JobReport jobReport = historyClient.getJobReport(gjReq).getJobReport();
  verifyJobReport(jobReport, jobId);
}
 
Example 14
Source File: TestMRJobs.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test (timeout = 60000)
public void testRandomWriter() throws IOException, InterruptedException,
    ClassNotFoundException {
  
  LOG.info("\n\n\nStarting testRandomWriter().");
  if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
    LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
             + " not found. Not running test.");
    return;
  }

  RandomTextWriterJob randomWriterJob = new RandomTextWriterJob();
  mrCluster.getConfig().set(RandomTextWriterJob.TOTAL_BYTES, "3072");
  mrCluster.getConfig().set(RandomTextWriterJob.BYTES_PER_MAP, "1024");
  Job job = randomWriterJob.createJob(mrCluster.getConfig());
  Path outputDir = new Path(OUTPUT_ROOT_DIR, "random-output");
  FileOutputFormat.setOutputPath(job, outputDir);
  job.setSpeculativeExecution(false);
  job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
  job.setJarByClass(RandomTextWriterJob.class);
  job.setMaxMapAttempts(1); // speed up failures
  job.submit();
  String trackingUrl = job.getTrackingURL();
  String jobId = job.getJobID().toString();
  boolean succeeded = job.waitForCompletion(true);
  Assert.assertTrue(succeeded);
  Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
  Assert.assertTrue("Tracking URL was " + trackingUrl +
                    " but didn't Match Job ID " + jobId ,
        trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
  
  // Make sure there are three files in the output-dir
  
  RemoteIterator<FileStatus> iterator =
      FileContext.getFileContext(mrCluster.getConfig()).listStatus(
          outputDir);
  int count = 0;
  while (iterator.hasNext()) {
    FileStatus file = iterator.next();
    if (!file.getPath().getName()
        .equals(FileOutputCommitter.SUCCEEDED_FILE_NAME)) {
      count++;
    }
  }
  Assert.assertEquals("Number of part files is wrong!", 3, count);
  verifyRandomWriterCounters(job);

  // TODO later:  add explicit "isUber()" checks of some sort
}
 
Example 15
Source File: TestMRJobs.java    From hadoop with Apache License 2.0 4 votes vote down vote up
private void testJobClassloader(boolean useCustomClasses) throws IOException,
    InterruptedException, ClassNotFoundException {
  LOG.info("\n\n\nStarting testJobClassloader()"
      + " useCustomClasses=" + useCustomClasses);

  if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
    LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
             + " not found. Not running test.");
    return;
  }
  final Configuration sleepConf = new Configuration(mrCluster.getConfig());
  // set master address to local to test that local mode applied iff framework == local
  sleepConf.set(MRConfig.MASTER_ADDRESS, "local");
  sleepConf.setBoolean(MRJobConfig.MAPREDUCE_JOB_CLASSLOADER, true);
  if (useCustomClasses) {
    // to test AM loading user classes such as output format class, we want
    // to blacklist them from the system classes (they need to be prepended
    // as the first match wins)
    String systemClasses = ApplicationClassLoader.SYSTEM_CLASSES_DEFAULT;
    // exclude the custom classes from system classes
    systemClasses = "-" + CustomOutputFormat.class.getName() + ",-" +
        CustomSpeculator.class.getName() + "," +
        systemClasses;
    sleepConf.set(MRJobConfig.MAPREDUCE_JOB_CLASSLOADER_SYSTEM_CLASSES,
        systemClasses);
  }
  sleepConf.set(MRJobConfig.IO_SORT_MB, TEST_IO_SORT_MB);
  sleepConf.set(MRJobConfig.MR_AM_LOG_LEVEL, Level.ALL.toString());
  sleepConf.set(MRJobConfig.MAP_LOG_LEVEL, Level.ALL.toString());
  sleepConf.set(MRJobConfig.REDUCE_LOG_LEVEL, Level.ALL.toString());
  sleepConf.set(MRJobConfig.MAP_JAVA_OPTS, "-verbose:class");
  final SleepJob sleepJob = new SleepJob();
  sleepJob.setConf(sleepConf);
  final Job job = sleepJob.createJob(1, 1, 10, 1, 10, 1);
  job.setMapperClass(ConfVerificationMapper.class);
  job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
  job.setJarByClass(SleepJob.class);
  job.setMaxMapAttempts(1); // speed up failures
  if (useCustomClasses) {
    // set custom output format class and speculator class
    job.setOutputFormatClass(CustomOutputFormat.class);
    final Configuration jobConf = job.getConfiguration();
    jobConf.setClass(MRJobConfig.MR_AM_JOB_SPECULATOR, CustomSpeculator.class,
        Speculator.class);
    // speculation needs to be enabled for the speculator to be loaded
    jobConf.setBoolean(MRJobConfig.MAP_SPECULATIVE, true);
  }
  job.submit();
  boolean succeeded = job.waitForCompletion(true);
  Assert.assertTrue("Job status: " + job.getStatus().getFailureInfo(),
      succeeded);
}
 
Example 16
Source File: TestMRJobs.java    From hadoop with Apache License 2.0 4 votes vote down vote up
private void testSleepJobInternal(boolean useRemoteJar) throws Exception {
  LOG.info("\n\n\nStarting testSleepJob: useRemoteJar=" + useRemoteJar);

  if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
    LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
             + " not found. Not running test.");
    return;
  }

  Configuration sleepConf = new Configuration(mrCluster.getConfig());
  // set master address to local to test that local mode applied iff framework == local
  sleepConf.set(MRConfig.MASTER_ADDRESS, "local");	
  
  SleepJob sleepJob = new SleepJob();
  sleepJob.setConf(sleepConf);
 
  // job with 3 maps (10s) and numReduces reduces (5s), 1 "record" each:
  Job job = sleepJob.createJob(3, numSleepReducers, 10000, 1, 5000, 1);

  job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
  if (useRemoteJar) {
    final Path localJar = new Path(
        ClassUtil.findContainingJar(SleepJob.class));
    ConfigUtil.addLink(job.getConfiguration(), "/jobjars",
        localFs.makeQualified(localJar.getParent()).toUri());
    job.setJar("viewfs:///jobjars/" + localJar.getName());
  } else {
    job.setJarByClass(SleepJob.class);
  }
  job.setMaxMapAttempts(1); // speed up failures
  job.submit();
  String trackingUrl = job.getTrackingURL();
  String jobId = job.getJobID().toString();
  boolean succeeded = job.waitForCompletion(true);
  Assert.assertTrue(succeeded);
  Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
  Assert.assertTrue("Tracking URL was " + trackingUrl +
                    " but didn't Match Job ID " + jobId ,
        trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
  verifySleepJobCounters(job);
  verifyTaskProgress(job);
  
  // TODO later:  add explicit "isUber()" checks of some sort (extend
  // JobStatus?)--compare against MRJobConfig.JOB_UBERTASK_ENABLE value
}
 
Example 17
Source File: TestMRJobs.java    From big-c with Apache License 2.0 4 votes vote down vote up
public void _testDistributedCache(String jobJarPath) throws Exception {
  if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
    LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
         + " not found. Not running test.");
    return;
  }

  // Create a temporary file of length 1.
  Path first = createTempFile("distributed.first", "x");
  // Create two jars with a single file inside them.
  Path second =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.second.jar"), 2);
  Path third =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.third.jar"), 3);
  Path fourth =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.fourth.jar"), 4);

  Job job = Job.getInstance(mrCluster.getConfig());
  
  // Set the job jar to a new "dummy" jar so we can check that its extracted 
  // properly
  job.setJar(jobJarPath);
  // Because the job jar is a "dummy" jar, we need to include the jar with
  // DistributedCacheChecker or it won't be able to find it
  Path distributedCacheCheckerJar = new Path(
          JarFinder.getJar(DistributedCacheChecker.class));
  job.addFileToClassPath(distributedCacheCheckerJar.makeQualified(
          localFs.getUri(), distributedCacheCheckerJar.getParent()));
  
  job.setMapperClass(DistributedCacheChecker.class);
  job.setOutputFormatClass(NullOutputFormat.class);

  FileInputFormat.setInputPaths(job, first);
  // Creates the Job Configuration
  job.addCacheFile(
      new URI(first.toUri().toString() + "#distributed.first.symlink"));
  job.addFileToClassPath(second);
  // The AppMaster jar itself
  job.addFileToClassPath(
          APP_JAR.makeQualified(localFs.getUri(), APP_JAR.getParent())); 
  job.addArchiveToClassPath(third);
  job.addCacheArchive(fourth.toUri());
  job.setMaxMapAttempts(1); // speed up failures

  job.submit();
  String trackingUrl = job.getTrackingURL();
  String jobId = job.getJobID().toString();
  Assert.assertTrue(job.waitForCompletion(false));
  Assert.assertTrue("Tracking URL was " + trackingUrl +
                    " but didn't Match Job ID " + jobId ,
        trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
}
 
Example 18
Source File: TestMRJobs.java    From big-c with Apache License 2.0 4 votes vote down vote up
private void testSleepJobInternal(boolean useRemoteJar) throws Exception {
  LOG.info("\n\n\nStarting testSleepJob: useRemoteJar=" + useRemoteJar);

  if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
    LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
             + " not found. Not running test.");
    return;
  }

  Configuration sleepConf = new Configuration(mrCluster.getConfig());
  // set master address to local to test that local mode applied iff framework == local
  sleepConf.set(MRConfig.MASTER_ADDRESS, "local");	
  
  SleepJob sleepJob = new SleepJob();
  sleepJob.setConf(sleepConf);
 
  // job with 3 maps (10s) and numReduces reduces (5s), 1 "record" each:
  Job job = sleepJob.createJob(3, numSleepReducers, 10000, 1, 5000, 1);

  job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
  if (useRemoteJar) {
    final Path localJar = new Path(
        ClassUtil.findContainingJar(SleepJob.class));
    ConfigUtil.addLink(job.getConfiguration(), "/jobjars",
        localFs.makeQualified(localJar.getParent()).toUri());
    job.setJar("viewfs:///jobjars/" + localJar.getName());
  } else {
    job.setJarByClass(SleepJob.class);
  }
  job.setMaxMapAttempts(1); // speed up failures
  job.submit();
  String trackingUrl = job.getTrackingURL();
  String jobId = job.getJobID().toString();
  boolean succeeded = job.waitForCompletion(true);
  Assert.assertTrue(succeeded);
  Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
  Assert.assertTrue("Tracking URL was " + trackingUrl +
                    " but didn't Match Job ID " + jobId ,
        trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
  verifySleepJobCounters(job);
  verifyTaskProgress(job);
  
  // TODO later:  add explicit "isUber()" checks of some sort (extend
  // JobStatus?)--compare against MRJobConfig.JOB_UBERTASK_ENABLE value
}
 
Example 19
Source File: TestMiniMRChildTask.java    From hadoop with Apache License 2.0 4 votes vote down vote up
void runTestTaskEnv(JobConf conf, Path inDir, Path outDir, boolean oldConfigs) 
throws IOException, InterruptedException, ClassNotFoundException {
  String input = "The input";
  configure(conf, inDir, outDir, input, 
            EnvCheckMapper.class, EnvCheckReducer.class);
  // test 
  //  - new SET of new var (MY_PATH)
  //  - set of old var (LANG)
  //  - append to an old var from modified env (LD_LIBRARY_PATH)
  //  - append to an old var from tt's env (PATH)
  //  - append to a new var (NEW_PATH)
  String mapTaskEnvKey = JobConf.MAPRED_MAP_TASK_ENV;
  String reduceTaskEnvKey = JobConf.MAPRED_MAP_TASK_ENV;
  String mapTaskJavaOptsKey = JobConf.MAPRED_MAP_TASK_JAVA_OPTS;
  String reduceTaskJavaOptsKey = JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS;
  String mapTaskJavaOpts = MAP_OPTS_VAL;
  String reduceTaskJavaOpts = REDUCE_OPTS_VAL;
  conf.setBoolean(OLD_CONFIGS, oldConfigs);
  if (oldConfigs) {
    mapTaskEnvKey = reduceTaskEnvKey = JobConf.MAPRED_TASK_ENV;
    mapTaskJavaOptsKey = reduceTaskJavaOptsKey = JobConf.MAPRED_TASK_JAVA_OPTS;
    mapTaskJavaOpts = reduceTaskJavaOpts = TASK_OPTS_VAL;
  }
  conf.set(
      mapTaskEnvKey,
      Shell.WINDOWS ? "MY_PATH=/tmp,LANG=en_us_8859_1,LD_LIBRARY_PATH=%LD_LIBRARY_PATH%;/tmp,"
          + "PATH=%PATH%;/tmp,NEW_PATH=%NEW_PATH%;/tmp"
          : "MY_PATH=/tmp,LANG=en_us_8859_1,LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/tmp,"
              + "PATH=$PATH:/tmp,NEW_PATH=$NEW_PATH:/tmp");
  conf.set(
      reduceTaskEnvKey,
      Shell.WINDOWS ? "MY_PATH=/tmp,LANG=en_us_8859_1,LD_LIBRARY_PATH=%LD_LIBRARY_PATH%;/tmp,"
          + "PATH=%PATH%;/tmp,NEW_PATH=%NEW_PATH%;/tmp"
          : "MY_PATH=/tmp,LANG=en_us_8859_1,LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/tmp,"
              + "PATH=$PATH:/tmp,NEW_PATH=$NEW_PATH:/tmp");
  conf.set("path", System.getenv("PATH"));
  conf.set(mapTaskJavaOptsKey, mapTaskJavaOpts);
  conf.set(reduceTaskJavaOptsKey, reduceTaskJavaOpts);

  Job job = Job.getInstance(conf);
  job.addFileToClassPath(APP_JAR);
  job.setJarByClass(TestMiniMRChildTask.class);
  job.setMaxMapAttempts(1); // speed up failures
  job.waitForCompletion(true);
  boolean succeeded = job.waitForCompletion(true);
  assertTrue("The environment checker job failed.", succeeded);
}
 
Example 20
Source File: TestMRJobsWithHistoryService.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test (timeout = 90000)
public void testJobHistoryData() throws IOException, InterruptedException,
    AvroRemoteException, ClassNotFoundException {
  if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
    LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
        + " not found. Not running test.");
    return;
  }


  
  SleepJob sleepJob = new SleepJob();
  sleepJob.setConf(mrCluster.getConfig());
  // Job with 3 maps and 2 reduces
  Job job = sleepJob.createJob(3, 2, 1000, 1, 500, 1);
  job.setJarByClass(SleepJob.class);
  job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
  job.waitForCompletion(true);
  Counters counterMR = job.getCounters();
  JobId jobId = TypeConverter.toYarn(job.getJobID());
  ApplicationId appID = jobId.getAppId();
  int pollElapsed = 0;
  while (true) {
    Thread.sleep(1000);
    pollElapsed += 1000;

    if (TERMINAL_RM_APP_STATES.contains(
        mrCluster.getResourceManager().getRMContext().getRMApps().get(appID)
        .getState())) {
      break;
    }

    if (pollElapsed >= 60000) {
      LOG.warn("application did not reach terminal state within 60 seconds");
      break;
    }
  }
  Assert.assertEquals(RMAppState.FINISHED, mrCluster.getResourceManager()
    .getRMContext().getRMApps().get(appID).getState());
  Counters counterHS = job.getCounters();
  //TODO the Assert below worked. need to check
  //Should we compare each field or convert to V2 counter and compare
  LOG.info("CounterHS " + counterHS);
  LOG.info("CounterMR " + counterMR);
  Assert.assertEquals(counterHS, counterMR);
  
  HSClientProtocol historyClient = instantiateHistoryProxy();
  GetJobReportRequest gjReq = Records.newRecord(GetJobReportRequest.class);
  gjReq.setJobId(jobId);
  JobReport jobReport = historyClient.getJobReport(gjReq).getJobReport();
  verifyJobReport(jobReport, jobId);
}