Java Code Examples for org.apache.hadoop.mapreduce.Job#addFileToClassPath()
The following examples show how to use
org.apache.hadoop.mapreduce.Job#addFileToClassPath() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestMRAMWithNonNormalizedCapabilities.java From big-c with Apache License 2.0 | 6 votes |
/** * To ensure nothing broken after we removed normalization * from the MRAM side * @throws Exception */ @Test public void testJobWithNonNormalizedCapabilities() throws Exception { if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) { LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + " not found. Not running test."); return; } JobConf jobConf = new JobConf(mrCluster.getConfig()); jobConf.setInt("mapreduce.map.memory.mb", 700); jobConf.setInt("mapred.reduce.memory.mb", 1500); SleepJob sleepJob = new SleepJob(); sleepJob.setConf(jobConf); Job job = sleepJob.createJob(3, 2, 1000, 1, 500, 1); job.setJarByClass(SleepJob.class); job.addFileToClassPath(APP_JAR); // The AppMaster jar itself. job.submit(); boolean completed = job.waitForCompletion(true); Assert.assertTrue("Job should be completed", completed); Assert.assertEquals("Job should be finished successfully", JobStatus.State.SUCCEEDED, job.getJobState()); }
Example 2
Source File: MapReduceJobConfiguration.java From datawave with Apache License 2.0 | 6 votes |
private void addArchiveFile(File source, Pattern pattern, Path classpath, String jobId, Job job, FileSystem fs) throws IOException { if (!source.isFile() || !source.canRead()) { throw new IOException(source + " is not a regular file."); } try (JarInputStream jarInputStream = new JarInputStream(new FileInputStream(source))) { // Check each file in the archive, and if it matches the supplied pattern, then copy it to HDFS and add it to the classpath. for (JarEntry jarEntry = jarInputStream.getNextJarEntry(); jarEntry != null; jarEntry = jarInputStream.getNextJarEntry()) { if (pattern.matcher(jarEntry.getName()).matches()) { log.trace("Adding {} to the classpath for job {}", jarEntry.getName(), jobId); int slashIdx = jarEntry.getName().lastIndexOf('/'); String outputFileName = jarEntry.getName().substring(slashIdx + 1); Path cachedJarPath = new Path(classpath, outputFileName); try (FSDataOutputStream hadoopOutputStream = fs.create(cachedJarPath, false)) { ByteStreams.copy(jarInputStream, hadoopOutputStream); } job.addFileToClassPath(cachedJarPath); } else { log.trace("Skipping {} since it does not match the pattern {}", jarEntry.getName(), pattern.pattern()); } } } }
Example 3
Source File: DistCacheConfigurer.java From titan1withtp3.1 with Apache License 2.0 | 6 votes |
@Override public void configure(Job job) throws IOException { Configuration conf = job.getConfiguration(); FileSystem localFS = FileSystem.getLocal(conf); FileSystem jobFS = FileSystem.get(conf); for (Path p : getLocalPaths()) { Path stagedPath = uploadFileIfNecessary(localFS, p, jobFS); // Calling this method decompresses the archive and makes Hadoop // handle its classfiles individually. This leads to crippling // overhead times (10+ seconds) even with the LocalJobRunner // courtesy of o.a.h.yarn.util.FSDownload.changePermissions // copying and chmodding each classfile copy file individually. //job.addArchiveToClassPath(p); // Just add the compressed archive instead: job.addFileToClassPath(stagedPath); } // We don't really need to set a mapred job jar here, // but doing so suppresses a warning String mj = getMapredJar(); if (null != mj) job.setJar(mj); }
Example 4
Source File: TestMiniMRChildTask.java From big-c with Apache License 2.0 | 6 votes |
/** * Launch tests * @param conf Configuration of the mapreduce job. * @param inDir input path * @param outDir output path * @param input Input text * @throws IOException */ public void launchTest(JobConf conf, Path inDir, Path outDir, String input) throws IOException, InterruptedException, ClassNotFoundException { FileSystem outFs = outDir.getFileSystem(conf); // Launch job with default option for temp dir. // i.e. temp dir is ./tmp Job job = Job.getInstance(conf); job.addFileToClassPath(APP_JAR); job.setJarByClass(TestMiniMRChildTask.class); job.setMaxMapAttempts(1); // speed up failures job.waitForCompletion(true); boolean succeeded = job.waitForCompletion(true); assertTrue(succeeded); outFs.delete(outDir, true); }
Example 5
Source File: TestMRAMWithNonNormalizedCapabilities.java From hadoop with Apache License 2.0 | 6 votes |
/** * To ensure nothing broken after we removed normalization * from the MRAM side * @throws Exception */ @Test public void testJobWithNonNormalizedCapabilities() throws Exception { if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) { LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + " not found. Not running test."); return; } JobConf jobConf = new JobConf(mrCluster.getConfig()); jobConf.setInt("mapreduce.map.memory.mb", 700); jobConf.setInt("mapred.reduce.memory.mb", 1500); SleepJob sleepJob = new SleepJob(); sleepJob.setConf(jobConf); Job job = sleepJob.createJob(3, 2, 1000, 1, 500, 1); job.setJarByClass(SleepJob.class); job.addFileToClassPath(APP_JAR); // The AppMaster jar itself. job.submit(); boolean completed = job.waitForCompletion(true); Assert.assertTrue("Job should be completed", completed); Assert.assertEquals("Job should be finished successfully", JobStatus.State.SUCCEEDED, job.getJobState()); }
Example 6
Source File: TestMRJobs.java From big-c with Apache License 2.0 | 5 votes |
protected Job runFailingMapperJob() throws IOException, InterruptedException, ClassNotFoundException { Configuration myConf = new Configuration(mrCluster.getConfig()); myConf.setInt(MRJobConfig.NUM_MAPS, 1); myConf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 2); //reduce the number of attempts Job job = Job.getInstance(myConf); job.setJarByClass(FailingMapper.class); job.setJobName("failmapper"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(RandomInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(FailingMapper.class); job.setNumReduceTasks(0); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_ROOT_DIR, "failmapper-output")); job.addFileToClassPath(APP_JAR); // The AppMaster jar itself. job.submit(); String trackingUrl = job.getTrackingURL(); String jobId = job.getJobID().toString(); boolean succeeded = job.waitForCompletion(true); Assert.assertFalse(succeeded); Assert.assertTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId , trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/")); return job; }
Example 7
Source File: TestSpeculativeExecution.java From big-c with Apache License 2.0 | 5 votes |
private Job runSpecTest(boolean mapspec, boolean redspec) throws IOException, ClassNotFoundException, InterruptedException { Path first = createTempFile("specexec_map_input1", "a\nz"); Path secnd = createTempFile("specexec_map_input2", "a\nz"); Configuration conf = mrCluster.getConfig(); conf.setBoolean(MRJobConfig.MAP_SPECULATIVE,mapspec); conf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE,redspec); conf.setClass(MRJobConfig.MR_AM_TASK_ESTIMATOR, TestSpecEstimator.class, TaskRuntimeEstimator.class); Job job = Job.getInstance(conf); job.setJarByClass(TestSpeculativeExecution.class); job.setMapperClass(SpeculativeMapper.class); job.setReducerClass(SpeculativeReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(2); FileInputFormat.setInputPaths(job, first); FileInputFormat.addInputPath(job, secnd); FileOutputFormat.setOutputPath(job, TEST_OUT_DIR); // Delete output directory if it exists. try { localFs.delete(TEST_OUT_DIR,true); } catch (IOException e) { // ignore } // Creates the Job Configuration job.addFileToClassPath(APP_JAR); // The AppMaster jar itself. job.setMaxMapAttempts(2); job.submit(); return job; }
Example 8
Source File: TestMRWithDistributedCache.java From hadoop with Apache License 2.0 | 5 votes |
private void testWithConf(Configuration conf) throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException { // Create a temporary file of length 1. Path first = createTempFile("distributed.first", "x"); // Create two jars with a single file inside them. Path second = makeJar(new Path(TEST_ROOT_DIR, "distributed.second.jar"), 2); Path third = makeJar(new Path(TEST_ROOT_DIR, "distributed.third.jar"), 3); Path fourth = makeJar(new Path(TEST_ROOT_DIR, "distributed.fourth.jar"), 4); Job job = Job.getInstance(conf); job.setMapperClass(DistributedCacheCheckerMapper.class); job.setReducerClass(DistributedCacheCheckerReducer.class); job.setOutputFormatClass(NullOutputFormat.class); FileInputFormat.setInputPaths(job, first); // Creates the Job Configuration job.addCacheFile( new URI(first.toUri().toString() + "#distributed.first.symlink")); job.addFileToClassPath(second); job.addArchiveToClassPath(third); job.addCacheArchive(fourth.toUri()); job.setMaxMapAttempts(1); // speed up failures job.submit(); assertTrue(job.waitForCompletion(false)); }
Example 9
Source File: TestSpeculativeExecution.java From hadoop with Apache License 2.0 | 5 votes |
private Job runSpecTest(boolean mapspec, boolean redspec) throws IOException, ClassNotFoundException, InterruptedException { Path first = createTempFile("specexec_map_input1", "a\nz"); Path secnd = createTempFile("specexec_map_input2", "a\nz"); Configuration conf = mrCluster.getConfig(); conf.setBoolean(MRJobConfig.MAP_SPECULATIVE,mapspec); conf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE,redspec); conf.setClass(MRJobConfig.MR_AM_TASK_ESTIMATOR, TestSpecEstimator.class, TaskRuntimeEstimator.class); Job job = Job.getInstance(conf); job.setJarByClass(TestSpeculativeExecution.class); job.setMapperClass(SpeculativeMapper.class); job.setReducerClass(SpeculativeReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(2); FileInputFormat.setInputPaths(job, first); FileInputFormat.addInputPath(job, secnd); FileOutputFormat.setOutputPath(job, TEST_OUT_DIR); // Delete output directory if it exists. try { localFs.delete(TEST_OUT_DIR,true); } catch (IOException e) { // ignore } // Creates the Job Configuration job.addFileToClassPath(APP_JAR); // The AppMaster jar itself. job.setMaxMapAttempts(2); job.submit(); return job; }
Example 10
Source File: TestMRJobs.java From hadoop with Apache License 2.0 | 5 votes |
protected Job runFailingMapperJob() throws IOException, InterruptedException, ClassNotFoundException { Configuration myConf = new Configuration(mrCluster.getConfig()); myConf.setInt(MRJobConfig.NUM_MAPS, 1); myConf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 2); //reduce the number of attempts Job job = Job.getInstance(myConf); job.setJarByClass(FailingMapper.class); job.setJobName("failmapper"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(RandomInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(FailingMapper.class); job.setNumReduceTasks(0); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_ROOT_DIR, "failmapper-output")); job.addFileToClassPath(APP_JAR); // The AppMaster jar itself. job.submit(); String trackingUrl = job.getTrackingURL(); String jobId = job.getJobID().toString(); boolean succeeded = job.waitForCompletion(true); Assert.assertFalse(succeeded); Assert.assertTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId , trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/")); return job; }
Example 11
Source File: MapReduceJobConfiguration.java From datawave with Apache License 2.0 | 5 votes |
protected void addSingleFile(String source, Path destination, String jobId, Job job, FileSystem fs) throws IOException { try (FSDataOutputStream hadoopOutputStream = fs.create(destination, false); InputStream urlInputStream = new URL(source).openStream()) { // Copy raw file to hadoop if (!(urlInputStream instanceof JarInputStream)) { ByteStreams.copy(urlInputStream, hadoopOutputStream); } // Copy jar file to hadoop - Wildfly VFS returns files as JarInputStreams else { JarInputStream jarInputStream = (JarInputStream) urlInputStream; try (JarOutputStream jarOutputStream = new JarOutputStream(hadoopOutputStream)) { for (JarEntry jarEntry = jarInputStream.getNextJarEntry(); jarEntry != null; jarEntry = jarInputStream.getNextJarEntry()) { jarOutputStream.putNextEntry(jarEntry); ByteStreams.copy(urlInputStream, jarOutputStream); } } } // Add the jar to the job classpath log.trace("Adding {} to the classpath for job {}", source, jobId); job.addFileToClassPath(destination); } catch (IOException e) { // If the file already exists, ignore error if (!e.getMessage().endsWith("already exists")) throw e; } }
Example 12
Source File: TestMRJobs.java From hadoop with Apache License 2.0 | 4 votes |
public void _testDistributedCache(String jobJarPath) throws Exception { if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) { LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + " not found. Not running test."); return; } // Create a temporary file of length 1. Path first = createTempFile("distributed.first", "x"); // Create two jars with a single file inside them. Path second = makeJar(new Path(TEST_ROOT_DIR, "distributed.second.jar"), 2); Path third = makeJar(new Path(TEST_ROOT_DIR, "distributed.third.jar"), 3); Path fourth = makeJar(new Path(TEST_ROOT_DIR, "distributed.fourth.jar"), 4); Job job = Job.getInstance(mrCluster.getConfig()); // Set the job jar to a new "dummy" jar so we can check that its extracted // properly job.setJar(jobJarPath); // Because the job jar is a "dummy" jar, we need to include the jar with // DistributedCacheChecker or it won't be able to find it Path distributedCacheCheckerJar = new Path( JarFinder.getJar(DistributedCacheChecker.class)); job.addFileToClassPath(distributedCacheCheckerJar.makeQualified( localFs.getUri(), distributedCacheCheckerJar.getParent())); job.setMapperClass(DistributedCacheChecker.class); job.setOutputFormatClass(NullOutputFormat.class); FileInputFormat.setInputPaths(job, first); // Creates the Job Configuration job.addCacheFile( new URI(first.toUri().toString() + "#distributed.first.symlink")); job.addFileToClassPath(second); // The AppMaster jar itself job.addFileToClassPath( APP_JAR.makeQualified(localFs.getUri(), APP_JAR.getParent())); job.addArchiveToClassPath(third); job.addCacheArchive(fourth.toUri()); job.setMaxMapAttempts(1); // speed up failures job.submit(); String trackingUrl = job.getTrackingURL(); String jobId = job.getJobID().toString(); Assert.assertTrue(job.waitForCompletion(false)); Assert.assertTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId , trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/")); }
Example 13
Source File: TestMRJobsWithHistoryService.java From hadoop with Apache License 2.0 | 4 votes |
@Test (timeout = 90000) public void testJobHistoryData() throws IOException, InterruptedException, AvroRemoteException, ClassNotFoundException { if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) { LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + " not found. Not running test."); return; } SleepJob sleepJob = new SleepJob(); sleepJob.setConf(mrCluster.getConfig()); // Job with 3 maps and 2 reduces Job job = sleepJob.createJob(3, 2, 1000, 1, 500, 1); job.setJarByClass(SleepJob.class); job.addFileToClassPath(APP_JAR); // The AppMaster jar itself. job.waitForCompletion(true); Counters counterMR = job.getCounters(); JobId jobId = TypeConverter.toYarn(job.getJobID()); ApplicationId appID = jobId.getAppId(); int pollElapsed = 0; while (true) { Thread.sleep(1000); pollElapsed += 1000; if (TERMINAL_RM_APP_STATES.contains( mrCluster.getResourceManager().getRMContext().getRMApps().get(appID) .getState())) { break; } if (pollElapsed >= 60000) { LOG.warn("application did not reach terminal state within 60 seconds"); break; } } Assert.assertEquals(RMAppState.FINISHED, mrCluster.getResourceManager() .getRMContext().getRMApps().get(appID).getState()); Counters counterHS = job.getCounters(); //TODO the Assert below worked. need to check //Should we compare each field or convert to V2 counter and compare LOG.info("CounterHS " + counterHS); LOG.info("CounterMR " + counterMR); Assert.assertEquals(counterHS, counterMR); HSClientProtocol historyClient = instantiateHistoryProxy(); GetJobReportRequest gjReq = Records.newRecord(GetJobReportRequest.class); gjReq.setJobId(jobId); JobReport jobReport = historyClient.getJobReport(gjReq).getJobReport(); verifyJobReport(jobReport, jobId); }
Example 14
Source File: TestMRJobs.java From hadoop with Apache License 2.0 | 4 votes |
@Test (timeout = 60000) public void testRandomWriter() throws IOException, InterruptedException, ClassNotFoundException { LOG.info("\n\n\nStarting testRandomWriter()."); if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) { LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + " not found. Not running test."); return; } RandomTextWriterJob randomWriterJob = new RandomTextWriterJob(); mrCluster.getConfig().set(RandomTextWriterJob.TOTAL_BYTES, "3072"); mrCluster.getConfig().set(RandomTextWriterJob.BYTES_PER_MAP, "1024"); Job job = randomWriterJob.createJob(mrCluster.getConfig()); Path outputDir = new Path(OUTPUT_ROOT_DIR, "random-output"); FileOutputFormat.setOutputPath(job, outputDir); job.setSpeculativeExecution(false); job.addFileToClassPath(APP_JAR); // The AppMaster jar itself. job.setJarByClass(RandomTextWriterJob.class); job.setMaxMapAttempts(1); // speed up failures job.submit(); String trackingUrl = job.getTrackingURL(); String jobId = job.getJobID().toString(); boolean succeeded = job.waitForCompletion(true); Assert.assertTrue(succeeded); Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState()); Assert.assertTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId , trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/")); // Make sure there are three files in the output-dir RemoteIterator<FileStatus> iterator = FileContext.getFileContext(mrCluster.getConfig()).listStatus( outputDir); int count = 0; while (iterator.hasNext()) { FileStatus file = iterator.next(); if (!file.getPath().getName() .equals(FileOutputCommitter.SUCCEEDED_FILE_NAME)) { count++; } } Assert.assertEquals("Number of part files is wrong!", 3, count); verifyRandomWriterCounters(job); // TODO later: add explicit "isUber()" checks of some sort }
Example 15
Source File: TestMRJobs.java From hadoop with Apache License 2.0 | 4 votes |
private void testJobClassloader(boolean useCustomClasses) throws IOException, InterruptedException, ClassNotFoundException { LOG.info("\n\n\nStarting testJobClassloader()" + " useCustomClasses=" + useCustomClasses); if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) { LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + " not found. Not running test."); return; } final Configuration sleepConf = new Configuration(mrCluster.getConfig()); // set master address to local to test that local mode applied iff framework == local sleepConf.set(MRConfig.MASTER_ADDRESS, "local"); sleepConf.setBoolean(MRJobConfig.MAPREDUCE_JOB_CLASSLOADER, true); if (useCustomClasses) { // to test AM loading user classes such as output format class, we want // to blacklist them from the system classes (they need to be prepended // as the first match wins) String systemClasses = ApplicationClassLoader.SYSTEM_CLASSES_DEFAULT; // exclude the custom classes from system classes systemClasses = "-" + CustomOutputFormat.class.getName() + ",-" + CustomSpeculator.class.getName() + "," + systemClasses; sleepConf.set(MRJobConfig.MAPREDUCE_JOB_CLASSLOADER_SYSTEM_CLASSES, systemClasses); } sleepConf.set(MRJobConfig.IO_SORT_MB, TEST_IO_SORT_MB); sleepConf.set(MRJobConfig.MR_AM_LOG_LEVEL, Level.ALL.toString()); sleepConf.set(MRJobConfig.MAP_LOG_LEVEL, Level.ALL.toString()); sleepConf.set(MRJobConfig.REDUCE_LOG_LEVEL, Level.ALL.toString()); sleepConf.set(MRJobConfig.MAP_JAVA_OPTS, "-verbose:class"); final SleepJob sleepJob = new SleepJob(); sleepJob.setConf(sleepConf); final Job job = sleepJob.createJob(1, 1, 10, 1, 10, 1); job.setMapperClass(ConfVerificationMapper.class); job.addFileToClassPath(APP_JAR); // The AppMaster jar itself. job.setJarByClass(SleepJob.class); job.setMaxMapAttempts(1); // speed up failures if (useCustomClasses) { // set custom output format class and speculator class job.setOutputFormatClass(CustomOutputFormat.class); final Configuration jobConf = job.getConfiguration(); jobConf.setClass(MRJobConfig.MR_AM_JOB_SPECULATOR, CustomSpeculator.class, Speculator.class); // speculation needs to be enabled for the speculator to be loaded jobConf.setBoolean(MRJobConfig.MAP_SPECULATIVE, true); } job.submit(); boolean succeeded = job.waitForCompletion(true); Assert.assertTrue("Job status: " + job.getStatus().getFailureInfo(), succeeded); }
Example 16
Source File: TestMRJobs.java From hadoop with Apache License 2.0 | 4 votes |
private void testSleepJobInternal(boolean useRemoteJar) throws Exception { LOG.info("\n\n\nStarting testSleepJob: useRemoteJar=" + useRemoteJar); if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) { LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + " not found. Not running test."); return; } Configuration sleepConf = new Configuration(mrCluster.getConfig()); // set master address to local to test that local mode applied iff framework == local sleepConf.set(MRConfig.MASTER_ADDRESS, "local"); SleepJob sleepJob = new SleepJob(); sleepJob.setConf(sleepConf); // job with 3 maps (10s) and numReduces reduces (5s), 1 "record" each: Job job = sleepJob.createJob(3, numSleepReducers, 10000, 1, 5000, 1); job.addFileToClassPath(APP_JAR); // The AppMaster jar itself. if (useRemoteJar) { final Path localJar = new Path( ClassUtil.findContainingJar(SleepJob.class)); ConfigUtil.addLink(job.getConfiguration(), "/jobjars", localFs.makeQualified(localJar.getParent()).toUri()); job.setJar("viewfs:///jobjars/" + localJar.getName()); } else { job.setJarByClass(SleepJob.class); } job.setMaxMapAttempts(1); // speed up failures job.submit(); String trackingUrl = job.getTrackingURL(); String jobId = job.getJobID().toString(); boolean succeeded = job.waitForCompletion(true); Assert.assertTrue(succeeded); Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState()); Assert.assertTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId , trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/")); verifySleepJobCounters(job); verifyTaskProgress(job); // TODO later: add explicit "isUber()" checks of some sort (extend // JobStatus?)--compare against MRJobConfig.JOB_UBERTASK_ENABLE value }
Example 17
Source File: TestMRJobs.java From big-c with Apache License 2.0 | 4 votes |
public void _testDistributedCache(String jobJarPath) throws Exception { if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) { LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + " not found. Not running test."); return; } // Create a temporary file of length 1. Path first = createTempFile("distributed.first", "x"); // Create two jars with a single file inside them. Path second = makeJar(new Path(TEST_ROOT_DIR, "distributed.second.jar"), 2); Path third = makeJar(new Path(TEST_ROOT_DIR, "distributed.third.jar"), 3); Path fourth = makeJar(new Path(TEST_ROOT_DIR, "distributed.fourth.jar"), 4); Job job = Job.getInstance(mrCluster.getConfig()); // Set the job jar to a new "dummy" jar so we can check that its extracted // properly job.setJar(jobJarPath); // Because the job jar is a "dummy" jar, we need to include the jar with // DistributedCacheChecker or it won't be able to find it Path distributedCacheCheckerJar = new Path( JarFinder.getJar(DistributedCacheChecker.class)); job.addFileToClassPath(distributedCacheCheckerJar.makeQualified( localFs.getUri(), distributedCacheCheckerJar.getParent())); job.setMapperClass(DistributedCacheChecker.class); job.setOutputFormatClass(NullOutputFormat.class); FileInputFormat.setInputPaths(job, first); // Creates the Job Configuration job.addCacheFile( new URI(first.toUri().toString() + "#distributed.first.symlink")); job.addFileToClassPath(second); // The AppMaster jar itself job.addFileToClassPath( APP_JAR.makeQualified(localFs.getUri(), APP_JAR.getParent())); job.addArchiveToClassPath(third); job.addCacheArchive(fourth.toUri()); job.setMaxMapAttempts(1); // speed up failures job.submit(); String trackingUrl = job.getTrackingURL(); String jobId = job.getJobID().toString(); Assert.assertTrue(job.waitForCompletion(false)); Assert.assertTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId , trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/")); }
Example 18
Source File: TestMRJobs.java From big-c with Apache License 2.0 | 4 votes |
private void testSleepJobInternal(boolean useRemoteJar) throws Exception { LOG.info("\n\n\nStarting testSleepJob: useRemoteJar=" + useRemoteJar); if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) { LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + " not found. Not running test."); return; } Configuration sleepConf = new Configuration(mrCluster.getConfig()); // set master address to local to test that local mode applied iff framework == local sleepConf.set(MRConfig.MASTER_ADDRESS, "local"); SleepJob sleepJob = new SleepJob(); sleepJob.setConf(sleepConf); // job with 3 maps (10s) and numReduces reduces (5s), 1 "record" each: Job job = sleepJob.createJob(3, numSleepReducers, 10000, 1, 5000, 1); job.addFileToClassPath(APP_JAR); // The AppMaster jar itself. if (useRemoteJar) { final Path localJar = new Path( ClassUtil.findContainingJar(SleepJob.class)); ConfigUtil.addLink(job.getConfiguration(), "/jobjars", localFs.makeQualified(localJar.getParent()).toUri()); job.setJar("viewfs:///jobjars/" + localJar.getName()); } else { job.setJarByClass(SleepJob.class); } job.setMaxMapAttempts(1); // speed up failures job.submit(); String trackingUrl = job.getTrackingURL(); String jobId = job.getJobID().toString(); boolean succeeded = job.waitForCompletion(true); Assert.assertTrue(succeeded); Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState()); Assert.assertTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId , trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/")); verifySleepJobCounters(job); verifyTaskProgress(job); // TODO later: add explicit "isUber()" checks of some sort (extend // JobStatus?)--compare against MRJobConfig.JOB_UBERTASK_ENABLE value }
Example 19
Source File: TestMiniMRChildTask.java From hadoop with Apache License 2.0 | 4 votes |
void runTestTaskEnv(JobConf conf, Path inDir, Path outDir, boolean oldConfigs) throws IOException, InterruptedException, ClassNotFoundException { String input = "The input"; configure(conf, inDir, outDir, input, EnvCheckMapper.class, EnvCheckReducer.class); // test // - new SET of new var (MY_PATH) // - set of old var (LANG) // - append to an old var from modified env (LD_LIBRARY_PATH) // - append to an old var from tt's env (PATH) // - append to a new var (NEW_PATH) String mapTaskEnvKey = JobConf.MAPRED_MAP_TASK_ENV; String reduceTaskEnvKey = JobConf.MAPRED_MAP_TASK_ENV; String mapTaskJavaOptsKey = JobConf.MAPRED_MAP_TASK_JAVA_OPTS; String reduceTaskJavaOptsKey = JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS; String mapTaskJavaOpts = MAP_OPTS_VAL; String reduceTaskJavaOpts = REDUCE_OPTS_VAL; conf.setBoolean(OLD_CONFIGS, oldConfigs); if (oldConfigs) { mapTaskEnvKey = reduceTaskEnvKey = JobConf.MAPRED_TASK_ENV; mapTaskJavaOptsKey = reduceTaskJavaOptsKey = JobConf.MAPRED_TASK_JAVA_OPTS; mapTaskJavaOpts = reduceTaskJavaOpts = TASK_OPTS_VAL; } conf.set( mapTaskEnvKey, Shell.WINDOWS ? "MY_PATH=/tmp,LANG=en_us_8859_1,LD_LIBRARY_PATH=%LD_LIBRARY_PATH%;/tmp," + "PATH=%PATH%;/tmp,NEW_PATH=%NEW_PATH%;/tmp" : "MY_PATH=/tmp,LANG=en_us_8859_1,LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/tmp," + "PATH=$PATH:/tmp,NEW_PATH=$NEW_PATH:/tmp"); conf.set( reduceTaskEnvKey, Shell.WINDOWS ? "MY_PATH=/tmp,LANG=en_us_8859_1,LD_LIBRARY_PATH=%LD_LIBRARY_PATH%;/tmp," + "PATH=%PATH%;/tmp,NEW_PATH=%NEW_PATH%;/tmp" : "MY_PATH=/tmp,LANG=en_us_8859_1,LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/tmp," + "PATH=$PATH:/tmp,NEW_PATH=$NEW_PATH:/tmp"); conf.set("path", System.getenv("PATH")); conf.set(mapTaskJavaOptsKey, mapTaskJavaOpts); conf.set(reduceTaskJavaOptsKey, reduceTaskJavaOpts); Job job = Job.getInstance(conf); job.addFileToClassPath(APP_JAR); job.setJarByClass(TestMiniMRChildTask.class); job.setMaxMapAttempts(1); // speed up failures job.waitForCompletion(true); boolean succeeded = job.waitForCompletion(true); assertTrue("The environment checker job failed.", succeeded); }
Example 20
Source File: TestMRJobsWithHistoryService.java From big-c with Apache License 2.0 | 4 votes |
@Test (timeout = 90000) public void testJobHistoryData() throws IOException, InterruptedException, AvroRemoteException, ClassNotFoundException { if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) { LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + " not found. Not running test."); return; } SleepJob sleepJob = new SleepJob(); sleepJob.setConf(mrCluster.getConfig()); // Job with 3 maps and 2 reduces Job job = sleepJob.createJob(3, 2, 1000, 1, 500, 1); job.setJarByClass(SleepJob.class); job.addFileToClassPath(APP_JAR); // The AppMaster jar itself. job.waitForCompletion(true); Counters counterMR = job.getCounters(); JobId jobId = TypeConverter.toYarn(job.getJobID()); ApplicationId appID = jobId.getAppId(); int pollElapsed = 0; while (true) { Thread.sleep(1000); pollElapsed += 1000; if (TERMINAL_RM_APP_STATES.contains( mrCluster.getResourceManager().getRMContext().getRMApps().get(appID) .getState())) { break; } if (pollElapsed >= 60000) { LOG.warn("application did not reach terminal state within 60 seconds"); break; } } Assert.assertEquals(RMAppState.FINISHED, mrCluster.getResourceManager() .getRMContext().getRMApps().get(appID).getState()); Counters counterHS = job.getCounters(); //TODO the Assert below worked. need to check //Should we compare each field or convert to V2 counter and compare LOG.info("CounterHS " + counterHS); LOG.info("CounterMR " + counterMR); Assert.assertEquals(counterHS, counterMR); HSClientProtocol historyClient = instantiateHistoryProxy(); GetJobReportRequest gjReq = Records.newRecord(GetJobReportRequest.class); gjReq.setJobId(jobId); JobReport jobReport = historyClient.getJobReport(gjReq).getJobReport(); verifyJobReport(jobReport, jobId); }