Java Code Examples for org.apache.hadoop.mapreduce.Job#setMaxMapAttempts()

The following examples show how to use org.apache.hadoop.mapreduce.Job#setMaxMapAttempts() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: hadoop   File: TestMiniMRChildTask.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Launch tests 
 * @param conf Configuration of the mapreduce job.
 * @param inDir input path
 * @param outDir output path
 * @param input Input text
 * @throws IOException
 */
public void launchTest(JobConf conf,
                       Path inDir,
                       Path outDir,
                       String input)
throws IOException, InterruptedException, ClassNotFoundException {

  FileSystem outFs = outDir.getFileSystem(conf);
  
  // Launch job with default option for temp dir. 
  // i.e. temp dir is ./tmp 
  Job job = Job.getInstance(conf);
  job.addFileToClassPath(APP_JAR);
  job.setJarByClass(TestMiniMRChildTask.class);
  job.setMaxMapAttempts(1); // speed up failures
  job.waitForCompletion(true);
  boolean succeeded = job.waitForCompletion(true);
  assertTrue(succeeded);
  outFs.delete(outDir, true);
}
 
Example 2
Source Project: tez   File: TestMRRJobs.java    License: Apache License 2.0 5 votes vote down vote up
@Test (timeout = 60000)
public void testFailingJob() throws IOException, InterruptedException,
    ClassNotFoundException {

  LOG.info("\n\n\nStarting testFailingJob().");

  if (!(new File(MiniTezCluster.APPJAR)).exists()) {
    LOG.info("MRAppJar " + MiniTezCluster.APPJAR
             + " not found. Not running test.");
    return;
  }

  Configuration sleepConf = new Configuration(mrrTezCluster.getConfig());

  MRRSleepJob sleepJob = new MRRSleepJob();
  sleepJob.setConf(sleepConf);

  Job job = sleepJob.createJob(1, 1, 1, 1, 1,
      1, 1, 1, 1, 1);

  job.setJarByClass(MRRSleepJob.class);
  job.setMaxMapAttempts(1); // speed up failures
  job.getConfiguration().setBoolean(MRRSleepJob.MAP_FATAL_ERROR, true);
  job.getConfiguration().set(MRRSleepJob.MAP_ERROR_TASK_IDS, "*");

  job.submit();
  boolean succeeded = job.waitForCompletion(true);
  Assert.assertFalse(succeeded);
  Assert.assertEquals(JobStatus.State.FAILED, job.getJobState());

  // FIXME once counters and task progress can be obtained properly
  // TODO verify failed task diagnostics
}
 
Example 3
Source Project: tez   File: TestMRRJobs.java    License: Apache License 2.0 5 votes vote down vote up
@Test (timeout = 60000)
public void testMRRSleepJob() throws IOException, InterruptedException,
    ClassNotFoundException {
  LOG.info("\n\n\nStarting testMRRSleepJob().");

  if (!(new File(MiniTezCluster.APPJAR)).exists()) {
    LOG.info("MRAppJar " + MiniTezCluster.APPJAR
             + " not found. Not running test.");
    return;
  }

  Configuration sleepConf = new Configuration(mrrTezCluster.getConfig());

  MRRSleepJob sleepJob = new MRRSleepJob();
  sleepJob.setConf(sleepConf);

  Job job = sleepJob.createJob(1, 1, 1, 1, 1,
      1, 1, 1, 1, 1);

  job.setJarByClass(MRRSleepJob.class);
  job.setMaxMapAttempts(1); // speed up failures
  job.submit();
  String trackingUrl = job.getTrackingURL();
  String jobId = job.getJobID().toString();
  boolean succeeded = job.waitForCompletion(true);
  Assert.assertTrue(succeeded);
  Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
  // There's one bug in YARN that there may be some suffix at the end of trackingURL (YARN-2246)
  // After TEZ-1961, the tracking will change from http://localhost:53419/proxy/application_1430963524753_0005
  // to http://localhost:53419/proxy/application_1430963524753_0005/ui/
  // So here use String#contains to verify.
  Assert.assertTrue("Tracking URL was " + trackingUrl +
                    " but didn't Match Job ID " + jobId ,
        trackingUrl.contains(jobId.substring(jobId.indexOf("_"))));

  // FIXME once counters and task progress can be obtained properly
  // TODO use dag client to test counters and task progress?
  // what about completed jobs?
}
 
Example 4
private void testWithConf(Configuration conf) throws IOException,
    InterruptedException, ClassNotFoundException, URISyntaxException {
  // Create a temporary file of length 1.
  Path first = createTempFile("distributed.first", "x");
  // Create two jars with a single file inside them.
  Path second =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.second.jar"), 2);
  Path third =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.third.jar"), 3);
  Path fourth =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.fourth.jar"), 4);


  Job job = Job.getInstance(conf);
  job.setMapperClass(DistributedCacheCheckerMapper.class);
  job.setReducerClass(DistributedCacheCheckerReducer.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  FileInputFormat.setInputPaths(job, first);
  // Creates the Job Configuration
  job.addCacheFile(
    new URI(first.toUri().toString() + "#distributed.first.symlink"));
  job.addFileToClassPath(second);
  job.addArchiveToClassPath(third);
  job.addCacheArchive(fourth.toUri());
  job.setMaxMapAttempts(1); // speed up failures

  job.submit();
  assertTrue(job.waitForCompletion(false));
}
 
Example 5
Source Project: big-c   File: TestSpeculativeExecution.java    License: Apache License 2.0 5 votes vote down vote up
private Job runSpecTest(boolean mapspec, boolean redspec)
    throws IOException, ClassNotFoundException, InterruptedException {

  Path first = createTempFile("specexec_map_input1", "a\nz");
  Path secnd = createTempFile("specexec_map_input2", "a\nz");

  Configuration conf = mrCluster.getConfig();
  conf.setBoolean(MRJobConfig.MAP_SPECULATIVE,mapspec);
  conf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE,redspec);
  conf.setClass(MRJobConfig.MR_AM_TASK_ESTIMATOR,
          TestSpecEstimator.class,
          TaskRuntimeEstimator.class);

  Job job = Job.getInstance(conf);
  job.setJarByClass(TestSpeculativeExecution.class);
  job.setMapperClass(SpeculativeMapper.class);
  job.setReducerClass(SpeculativeReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  job.setNumReduceTasks(2);
  FileInputFormat.setInputPaths(job, first);
  FileInputFormat.addInputPath(job, secnd);
  FileOutputFormat.setOutputPath(job, TEST_OUT_DIR);

  // Delete output directory if it exists.
  try {
    localFs.delete(TEST_OUT_DIR,true);
  } catch (IOException e) {
    // ignore
  }

  // Creates the Job Configuration
  job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
  job.setMaxMapAttempts(2);

  job.submit();

  return job;
}
 
Example 6
Source Project: tez   File: TestMRRJobs.java    License: Apache License 2.0 5 votes vote down vote up
@Test (timeout = 60000)
public void testFailingAttempt() throws IOException, InterruptedException,
    ClassNotFoundException {

  LOG.info("\n\n\nStarting testFailingAttempt().");

  if (!(new File(MiniTezCluster.APPJAR)).exists()) {
    LOG.info("MRAppJar " + MiniTezCluster.APPJAR
             + " not found. Not running test.");
    return;
  }

  Configuration sleepConf = new Configuration(mrrTezCluster.getConfig());

  MRRSleepJob sleepJob = new MRRSleepJob();
  sleepJob.setConf(sleepConf);

  Job job = sleepJob.createJob(1, 1, 1, 1, 1,
      1, 1, 1, 1, 1);

  job.setJarByClass(MRRSleepJob.class);
  job.setMaxMapAttempts(3); // speed up failures
  job.getConfiguration().setBoolean(MRRSleepJob.MAP_THROW_ERROR, true);
  job.getConfiguration().set(MRRSleepJob.MAP_ERROR_TASK_IDS, "0");

  job.submit();
  boolean succeeded = job.waitForCompletion(true);
  Assert.assertTrue(succeeded);
  Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());

  // FIXME once counters and task progress can be obtained properly
  // TODO verify failed task diagnostics
}
 
Example 7
Source Project: incubator-tez   File: TestMRRJobs.java    License: Apache License 2.0 5 votes vote down vote up
@Test (timeout = 60000)
public void testFailingAttempt() throws IOException, InterruptedException,
    ClassNotFoundException {

  LOG.info("\n\n\nStarting testFailingAttempt().");

  if (!(new File(MiniTezCluster.APPJAR)).exists()) {
    LOG.info("MRAppJar " + MiniTezCluster.APPJAR
             + " not found. Not running test.");
    return;
  }

  Configuration sleepConf = new Configuration(mrrTezCluster.getConfig());

  MRRSleepJob sleepJob = new MRRSleepJob();
  sleepJob.setConf(sleepConf);

  Job job = sleepJob.createJob(1, 1, 1, 1, 1,
      1, 1, 1, 1, 1);

  job.setJarByClass(MRRSleepJob.class);
  job.setMaxMapAttempts(3); // speed up failures
  job.getConfiguration().setBoolean(MRRSleepJob.MAP_THROW_ERROR, true);
  job.getConfiguration().set(MRRSleepJob.MAP_ERROR_TASK_IDS, "0");

  job.submit();
  boolean succeeded = job.waitForCompletion(true);
  Assert.assertTrue(succeeded);
  Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());

  // FIXME once counters and task progress can be obtained properly
  // TODO verify failed task diagnostics
}
 
Example 8
Source Project: incubator-tez   File: TestMRRJobs.java    License: Apache License 2.0 5 votes vote down vote up
@Test (timeout = 60000)
public void testFailingJob() throws IOException, InterruptedException,
    ClassNotFoundException {

  LOG.info("\n\n\nStarting testFailingJob().");

  if (!(new File(MiniTezCluster.APPJAR)).exists()) {
    LOG.info("MRAppJar " + MiniTezCluster.APPJAR
             + " not found. Not running test.");
    return;
  }

  Configuration sleepConf = new Configuration(mrrTezCluster.getConfig());

  MRRSleepJob sleepJob = new MRRSleepJob();
  sleepJob.setConf(sleepConf);

  Job job = sleepJob.createJob(1, 1, 1, 1, 1,
      1, 1, 1, 1, 1);

  job.setJarByClass(MRRSleepJob.class);
  job.setMaxMapAttempts(1); // speed up failures
  job.getConfiguration().setBoolean(MRRSleepJob.MAP_FATAL_ERROR, true);
  job.getConfiguration().set(MRRSleepJob.MAP_ERROR_TASK_IDS, "*");

  job.submit();
  boolean succeeded = job.waitForCompletion(true);
  Assert.assertFalse(succeeded);
  Assert.assertEquals(JobStatus.State.FAILED, job.getJobState());

  // FIXME once counters and task progress can be obtained properly
  // TODO verify failed task diagnostics
}
 
Example 9
Source Project: hadoop   File: TestMiniMRChildTask.java    License: Apache License 2.0 4 votes vote down vote up
void runTestTaskEnv(JobConf conf, Path inDir, Path outDir, boolean oldConfigs) 
throws IOException, InterruptedException, ClassNotFoundException {
  String input = "The input";
  configure(conf, inDir, outDir, input, 
            EnvCheckMapper.class, EnvCheckReducer.class);
  // test 
  //  - new SET of new var (MY_PATH)
  //  - set of old var (LANG)
  //  - append to an old var from modified env (LD_LIBRARY_PATH)
  //  - append to an old var from tt's env (PATH)
  //  - append to a new var (NEW_PATH)
  String mapTaskEnvKey = JobConf.MAPRED_MAP_TASK_ENV;
  String reduceTaskEnvKey = JobConf.MAPRED_MAP_TASK_ENV;
  String mapTaskJavaOptsKey = JobConf.MAPRED_MAP_TASK_JAVA_OPTS;
  String reduceTaskJavaOptsKey = JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS;
  String mapTaskJavaOpts = MAP_OPTS_VAL;
  String reduceTaskJavaOpts = REDUCE_OPTS_VAL;
  conf.setBoolean(OLD_CONFIGS, oldConfigs);
  if (oldConfigs) {
    mapTaskEnvKey = reduceTaskEnvKey = JobConf.MAPRED_TASK_ENV;
    mapTaskJavaOptsKey = reduceTaskJavaOptsKey = JobConf.MAPRED_TASK_JAVA_OPTS;
    mapTaskJavaOpts = reduceTaskJavaOpts = TASK_OPTS_VAL;
  }
  conf.set(
      mapTaskEnvKey,
      Shell.WINDOWS ? "MY_PATH=/tmp,LANG=en_us_8859_1,LD_LIBRARY_PATH=%LD_LIBRARY_PATH%;/tmp,"
          + "PATH=%PATH%;/tmp,NEW_PATH=%NEW_PATH%;/tmp"
          : "MY_PATH=/tmp,LANG=en_us_8859_1,LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/tmp,"
              + "PATH=$PATH:/tmp,NEW_PATH=$NEW_PATH:/tmp");
  conf.set(
      reduceTaskEnvKey,
      Shell.WINDOWS ? "MY_PATH=/tmp,LANG=en_us_8859_1,LD_LIBRARY_PATH=%LD_LIBRARY_PATH%;/tmp,"
          + "PATH=%PATH%;/tmp,NEW_PATH=%NEW_PATH%;/tmp"
          : "MY_PATH=/tmp,LANG=en_us_8859_1,LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/tmp,"
              + "PATH=$PATH:/tmp,NEW_PATH=$NEW_PATH:/tmp");
  conf.set("path", System.getenv("PATH"));
  conf.set(mapTaskJavaOptsKey, mapTaskJavaOpts);
  conf.set(reduceTaskJavaOptsKey, reduceTaskJavaOpts);

  Job job = Job.getInstance(conf);
  job.addFileToClassPath(APP_JAR);
  job.setJarByClass(TestMiniMRChildTask.class);
  job.setMaxMapAttempts(1); // speed up failures
  job.waitForCompletion(true);
  boolean succeeded = job.waitForCompletion(true);
  assertTrue("The environment checker job failed.", succeeded);
}
 
Example 10
Source Project: hadoop   File: TestMRJobs.java    License: Apache License 2.0 4 votes vote down vote up
private void testSleepJobInternal(boolean useRemoteJar) throws Exception {
  LOG.info("\n\n\nStarting testSleepJob: useRemoteJar=" + useRemoteJar);

  if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
    LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
             + " not found. Not running test.");
    return;
  }

  Configuration sleepConf = new Configuration(mrCluster.getConfig());
  // set master address to local to test that local mode applied iff framework == local
  sleepConf.set(MRConfig.MASTER_ADDRESS, "local");	
  
  SleepJob sleepJob = new SleepJob();
  sleepJob.setConf(sleepConf);
 
  // job with 3 maps (10s) and numReduces reduces (5s), 1 "record" each:
  Job job = sleepJob.createJob(3, numSleepReducers, 10000, 1, 5000, 1);

  job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
  if (useRemoteJar) {
    final Path localJar = new Path(
        ClassUtil.findContainingJar(SleepJob.class));
    ConfigUtil.addLink(job.getConfiguration(), "/jobjars",
        localFs.makeQualified(localJar.getParent()).toUri());
    job.setJar("viewfs:///jobjars/" + localJar.getName());
  } else {
    job.setJarByClass(SleepJob.class);
  }
  job.setMaxMapAttempts(1); // speed up failures
  job.submit();
  String trackingUrl = job.getTrackingURL();
  String jobId = job.getJobID().toString();
  boolean succeeded = job.waitForCompletion(true);
  Assert.assertTrue(succeeded);
  Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
  Assert.assertTrue("Tracking URL was " + trackingUrl +
                    " but didn't Match Job ID " + jobId ,
        trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
  verifySleepJobCounters(job);
  verifyTaskProgress(job);
  
  // TODO later:  add explicit "isUber()" checks of some sort (extend
  // JobStatus?)--compare against MRJobConfig.JOB_UBERTASK_ENABLE value
}
 
Example 11
Source Project: hadoop   File: TestMRJobs.java    License: Apache License 2.0 4 votes vote down vote up
private void testJobClassloader(boolean useCustomClasses) throws IOException,
    InterruptedException, ClassNotFoundException {
  LOG.info("\n\n\nStarting testJobClassloader()"
      + " useCustomClasses=" + useCustomClasses);

  if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
    LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
             + " not found. Not running test.");
    return;
  }
  final Configuration sleepConf = new Configuration(mrCluster.getConfig());
  // set master address to local to test that local mode applied iff framework == local
  sleepConf.set(MRConfig.MASTER_ADDRESS, "local");
  sleepConf.setBoolean(MRJobConfig.MAPREDUCE_JOB_CLASSLOADER, true);
  if (useCustomClasses) {
    // to test AM loading user classes such as output format class, we want
    // to blacklist them from the system classes (they need to be prepended
    // as the first match wins)
    String systemClasses = ApplicationClassLoader.SYSTEM_CLASSES_DEFAULT;
    // exclude the custom classes from system classes
    systemClasses = "-" + CustomOutputFormat.class.getName() + ",-" +
        CustomSpeculator.class.getName() + "," +
        systemClasses;
    sleepConf.set(MRJobConfig.MAPREDUCE_JOB_CLASSLOADER_SYSTEM_CLASSES,
        systemClasses);
  }
  sleepConf.set(MRJobConfig.IO_SORT_MB, TEST_IO_SORT_MB);
  sleepConf.set(MRJobConfig.MR_AM_LOG_LEVEL, Level.ALL.toString());
  sleepConf.set(MRJobConfig.MAP_LOG_LEVEL, Level.ALL.toString());
  sleepConf.set(MRJobConfig.REDUCE_LOG_LEVEL, Level.ALL.toString());
  sleepConf.set(MRJobConfig.MAP_JAVA_OPTS, "-verbose:class");
  final SleepJob sleepJob = new SleepJob();
  sleepJob.setConf(sleepConf);
  final Job job = sleepJob.createJob(1, 1, 10, 1, 10, 1);
  job.setMapperClass(ConfVerificationMapper.class);
  job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
  job.setJarByClass(SleepJob.class);
  job.setMaxMapAttempts(1); // speed up failures
  if (useCustomClasses) {
    // set custom output format class and speculator class
    job.setOutputFormatClass(CustomOutputFormat.class);
    final Configuration jobConf = job.getConfiguration();
    jobConf.setClass(MRJobConfig.MR_AM_JOB_SPECULATOR, CustomSpeculator.class,
        Speculator.class);
    // speculation needs to be enabled for the speculator to be loaded
    jobConf.setBoolean(MRJobConfig.MAP_SPECULATIVE, true);
  }
  job.submit();
  boolean succeeded = job.waitForCompletion(true);
  Assert.assertTrue("Job status: " + job.getStatus().getFailureInfo(),
      succeeded);
}
 
Example 12
Source Project: hadoop   File: TestMRJobs.java    License: Apache License 2.0 4 votes vote down vote up
@Test (timeout = 60000)
public void testRandomWriter() throws IOException, InterruptedException,
    ClassNotFoundException {
  
  LOG.info("\n\n\nStarting testRandomWriter().");
  if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
    LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
             + " not found. Not running test.");
    return;
  }

  RandomTextWriterJob randomWriterJob = new RandomTextWriterJob();
  mrCluster.getConfig().set(RandomTextWriterJob.TOTAL_BYTES, "3072");
  mrCluster.getConfig().set(RandomTextWriterJob.BYTES_PER_MAP, "1024");
  Job job = randomWriterJob.createJob(mrCluster.getConfig());
  Path outputDir = new Path(OUTPUT_ROOT_DIR, "random-output");
  FileOutputFormat.setOutputPath(job, outputDir);
  job.setSpeculativeExecution(false);
  job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
  job.setJarByClass(RandomTextWriterJob.class);
  job.setMaxMapAttempts(1); // speed up failures
  job.submit();
  String trackingUrl = job.getTrackingURL();
  String jobId = job.getJobID().toString();
  boolean succeeded = job.waitForCompletion(true);
  Assert.assertTrue(succeeded);
  Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
  Assert.assertTrue("Tracking URL was " + trackingUrl +
                    " but didn't Match Job ID " + jobId ,
        trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
  
  // Make sure there are three files in the output-dir
  
  RemoteIterator<FileStatus> iterator =
      FileContext.getFileContext(mrCluster.getConfig()).listStatus(
          outputDir);
  int count = 0;
  while (iterator.hasNext()) {
    FileStatus file = iterator.next();
    if (!file.getPath().getName()
        .equals(FileOutputCommitter.SUCCEEDED_FILE_NAME)) {
      count++;
    }
  }
  Assert.assertEquals("Number of part files is wrong!", 3, count);
  verifyRandomWriterCounters(job);

  // TODO later:  add explicit "isUber()" checks of some sort
}
 
Example 13
Source Project: incubator-tez   File: TestMRRJobs.java    License: Apache License 2.0 4 votes vote down vote up
@Test (timeout = 60000)
public void testMRRSleepJobWithCompression() throws IOException,
    InterruptedException, ClassNotFoundException {
  LOG.info("\n\n\nStarting testMRRSleepJobWithCompression().");

  if (!(new File(MiniTezCluster.APPJAR)).exists()) {
    LOG.info("MRAppJar " + MiniTezCluster.APPJAR
             + " not found. Not running test.");
    return;
  }

  Configuration sleepConf = new Configuration(mrrTezCluster.getConfig());

  MRRSleepJob sleepJob = new MRRSleepJob();
  sleepJob.setConf(sleepConf);

  Job job = sleepJob.createJob(1, 1, 2, 1, 1,
      1, 1, 1, 1, 1);

  job.setJarByClass(MRRSleepJob.class);
  job.setMaxMapAttempts(1); // speed up failures

  // enable compression
  job.getConfiguration().setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
  job.getConfiguration().set(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC,
      DefaultCodec.class.getName());

  job.submit();
  String trackingUrl = job.getTrackingURL();
  String jobId = job.getJobID().toString();
  boolean succeeded = job.waitForCompletion(true);
  Assert.assertTrue(succeeded);
  Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
  Assert.assertTrue("Tracking URL was " + trackingUrl +
                    " but didn't Match Job ID " + jobId ,
        trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));

  // FIXME once counters and task progress can be obtained properly
  // TODO use dag client to test counters and task progress?
  // what about completed jobs?

}
 
Example 14
Source Project: big-c   File: TestMiniMRChildTask.java    License: Apache License 2.0 4 votes vote down vote up
void runTestTaskEnv(JobConf conf, Path inDir, Path outDir, boolean oldConfigs) 
throws IOException, InterruptedException, ClassNotFoundException {
  String input = "The input";
  configure(conf, inDir, outDir, input, 
            EnvCheckMapper.class, EnvCheckReducer.class);
  // test 
  //  - new SET of new var (MY_PATH)
  //  - set of old var (LANG)
  //  - append to an old var from modified env (LD_LIBRARY_PATH)
  //  - append to an old var from tt's env (PATH)
  //  - append to a new var (NEW_PATH)
  String mapTaskEnvKey = JobConf.MAPRED_MAP_TASK_ENV;
  String reduceTaskEnvKey = JobConf.MAPRED_MAP_TASK_ENV;
  String mapTaskJavaOptsKey = JobConf.MAPRED_MAP_TASK_JAVA_OPTS;
  String reduceTaskJavaOptsKey = JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS;
  String mapTaskJavaOpts = MAP_OPTS_VAL;
  String reduceTaskJavaOpts = REDUCE_OPTS_VAL;
  conf.setBoolean(OLD_CONFIGS, oldConfigs);
  if (oldConfigs) {
    mapTaskEnvKey = reduceTaskEnvKey = JobConf.MAPRED_TASK_ENV;
    mapTaskJavaOptsKey = reduceTaskJavaOptsKey = JobConf.MAPRED_TASK_JAVA_OPTS;
    mapTaskJavaOpts = reduceTaskJavaOpts = TASK_OPTS_VAL;
  }
  conf.set(
      mapTaskEnvKey,
      Shell.WINDOWS ? "MY_PATH=/tmp,LANG=en_us_8859_1,LD_LIBRARY_PATH=%LD_LIBRARY_PATH%;/tmp,"
          + "PATH=%PATH%;/tmp,NEW_PATH=%NEW_PATH%;/tmp"
          : "MY_PATH=/tmp,LANG=en_us_8859_1,LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/tmp,"
              + "PATH=$PATH:/tmp,NEW_PATH=$NEW_PATH:/tmp");
  conf.set(
      reduceTaskEnvKey,
      Shell.WINDOWS ? "MY_PATH=/tmp,LANG=en_us_8859_1,LD_LIBRARY_PATH=%LD_LIBRARY_PATH%;/tmp,"
          + "PATH=%PATH%;/tmp,NEW_PATH=%NEW_PATH%;/tmp"
          : "MY_PATH=/tmp,LANG=en_us_8859_1,LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/tmp,"
              + "PATH=$PATH:/tmp,NEW_PATH=$NEW_PATH:/tmp");
  conf.set("path", System.getenv("PATH"));
  conf.set(mapTaskJavaOptsKey, mapTaskJavaOpts);
  conf.set(reduceTaskJavaOptsKey, reduceTaskJavaOpts);

  Job job = Job.getInstance(conf);
  job.addFileToClassPath(APP_JAR);
  job.setJarByClass(TestMiniMRChildTask.class);
  job.setMaxMapAttempts(1); // speed up failures
  job.waitForCompletion(true);
  boolean succeeded = job.waitForCompletion(true);
  assertTrue("The environment checker job failed.", succeeded);
}
 
Example 15
Source Project: tez   File: TestMRRJobs.java    License: Apache License 2.0 4 votes vote down vote up
@Test (timeout = 60000)
public void testRandomWriter() throws IOException, InterruptedException,
    ClassNotFoundException {

  LOG.info("\n\n\nStarting testRandomWriter().");
  if (!(new File(MiniTezCluster.APPJAR)).exists()) {
    LOG.info("MRAppJar " + MiniTezCluster.APPJAR
             + " not found. Not running test.");
    return;
  }

  RandomTextWriterJob randomWriterJob = new RandomTextWriterJob();
  mrrTezCluster.getConfig().set(RandomTextWriterJob.TOTAL_BYTES, "3072");
  mrrTezCluster.getConfig().set(RandomTextWriterJob.BYTES_PER_MAP, "1024");
  Job job = randomWriterJob.createJob(mrrTezCluster.getConfig());
  Path outputDir = new Path(OUTPUT_ROOT_DIR, "random-output");
  FileOutputFormat.setOutputPath(job, outputDir);
  job.setSpeculativeExecution(false);
  job.setJarByClass(RandomTextWriterJob.class);
  job.setMaxMapAttempts(1); // speed up failures
  job.submit();
  String trackingUrl = job.getTrackingURL();
  String jobId = job.getJobID().toString();
  boolean succeeded = job.waitForCompletion(true);
  Assert.assertTrue(succeeded);
  Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
  Assert.assertTrue("Tracking URL was " + trackingUrl +
                    " but didn't Match Job ID " + jobId ,
        trackingUrl.contains(jobId.substring(jobId.indexOf("_"))));

  // Make sure there are three files in the output-dir

  RemoteIterator<FileStatus> iterator =
      FileContext.getFileContext(mrrTezCluster.getConfig()).listStatus(
          outputDir);
  int count = 0;
  while (iterator.hasNext()) {
    FileStatus file = iterator.next();
    if (!file.getPath().getName()
        .equals(FileOutputCommitter.SUCCEEDED_FILE_NAME)) {
      count++;
    }
  }
  Assert.assertEquals("Number of part files is wrong!", 3, count);

}
 
Example 16
Source Project: big-c   File: TestMRJobs.java    License: Apache License 2.0 4 votes vote down vote up
private void testJobClassloader(boolean useCustomClasses) throws IOException,
    InterruptedException, ClassNotFoundException {
  LOG.info("\n\n\nStarting testJobClassloader()"
      + " useCustomClasses=" + useCustomClasses);

  if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
    LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
             + " not found. Not running test.");
    return;
  }
  final Configuration sleepConf = new Configuration(mrCluster.getConfig());
  // set master address to local to test that local mode applied iff framework == local
  sleepConf.set(MRConfig.MASTER_ADDRESS, "local");
  sleepConf.setBoolean(MRJobConfig.MAPREDUCE_JOB_CLASSLOADER, true);
  if (useCustomClasses) {
    // to test AM loading user classes such as output format class, we want
    // to blacklist them from the system classes (they need to be prepended
    // as the first match wins)
    String systemClasses = ApplicationClassLoader.SYSTEM_CLASSES_DEFAULT;
    // exclude the custom classes from system classes
    systemClasses = "-" + CustomOutputFormat.class.getName() + ",-" +
        CustomSpeculator.class.getName() + "," +
        systemClasses;
    sleepConf.set(MRJobConfig.MAPREDUCE_JOB_CLASSLOADER_SYSTEM_CLASSES,
        systemClasses);
  }
  sleepConf.set(MRJobConfig.IO_SORT_MB, TEST_IO_SORT_MB);
  sleepConf.set(MRJobConfig.MR_AM_LOG_LEVEL, Level.ALL.toString());
  sleepConf.set(MRJobConfig.MAP_LOG_LEVEL, Level.ALL.toString());
  sleepConf.set(MRJobConfig.REDUCE_LOG_LEVEL, Level.ALL.toString());
  sleepConf.set(MRJobConfig.MAP_JAVA_OPTS, "-verbose:class");
  final SleepJob sleepJob = new SleepJob();
  sleepJob.setConf(sleepConf);
  final Job job = sleepJob.createJob(1, 1, 10, 1, 10, 1);
  job.setMapperClass(ConfVerificationMapper.class);
  job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
  job.setJarByClass(SleepJob.class);
  job.setMaxMapAttempts(1); // speed up failures
  if (useCustomClasses) {
    // set custom output format class and speculator class
    job.setOutputFormatClass(CustomOutputFormat.class);
    final Configuration jobConf = job.getConfiguration();
    jobConf.setClass(MRJobConfig.MR_AM_JOB_SPECULATOR, CustomSpeculator.class,
        Speculator.class);
    // speculation needs to be enabled for the speculator to be loaded
    jobConf.setBoolean(MRJobConfig.MAP_SPECULATIVE, true);
  }
  job.submit();
  boolean succeeded = job.waitForCompletion(true);
  Assert.assertTrue("Job status: " + job.getStatus().getFailureInfo(),
      succeeded);
}
 
Example 17
Source Project: big-c   File: TestMRJobs.java    License: Apache License 2.0 4 votes vote down vote up
@Test (timeout = 60000)
public void testRandomWriter() throws IOException, InterruptedException,
    ClassNotFoundException {
  
  LOG.info("\n\n\nStarting testRandomWriter().");
  if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
    LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
             + " not found. Not running test.");
    return;
  }

  RandomTextWriterJob randomWriterJob = new RandomTextWriterJob();
  mrCluster.getConfig().set(RandomTextWriterJob.TOTAL_BYTES, "3072");
  mrCluster.getConfig().set(RandomTextWriterJob.BYTES_PER_MAP, "1024");
  Job job = randomWriterJob.createJob(mrCluster.getConfig());
  Path outputDir = new Path(OUTPUT_ROOT_DIR, "random-output");
  FileOutputFormat.setOutputPath(job, outputDir);
  job.setSpeculativeExecution(false);
  job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
  job.setJarByClass(RandomTextWriterJob.class);
  job.setMaxMapAttempts(1); // speed up failures
  job.submit();
  String trackingUrl = job.getTrackingURL();
  String jobId = job.getJobID().toString();
  boolean succeeded = job.waitForCompletion(true);
  Assert.assertTrue(succeeded);
  Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
  Assert.assertTrue("Tracking URL was " + trackingUrl +
                    " but didn't Match Job ID " + jobId ,
        trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
  
  // Make sure there are three files in the output-dir
  
  RemoteIterator<FileStatus> iterator =
      FileContext.getFileContext(mrCluster.getConfig()).listStatus(
          outputDir);
  int count = 0;
  while (iterator.hasNext()) {
    FileStatus file = iterator.next();
    if (!file.getPath().getName()
        .equals(FileOutputCommitter.SUCCEEDED_FILE_NAME)) {
      count++;
    }
  }
  Assert.assertEquals("Number of part files is wrong!", 3, count);
  verifyRandomWriterCounters(job);

  // TODO later:  add explicit "isUber()" checks of some sort
}
 
Example 18
Source Project: big-c   File: TestMRJobs.java    License: Apache License 2.0 4 votes vote down vote up
public void _testDistributedCache(String jobJarPath) throws Exception {
  if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
    LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
         + " not found. Not running test.");
    return;
  }

  // Create a temporary file of length 1.
  Path first = createTempFile("distributed.first", "x");
  // Create two jars with a single file inside them.
  Path second =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.second.jar"), 2);
  Path third =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.third.jar"), 3);
  Path fourth =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.fourth.jar"), 4);

  Job job = Job.getInstance(mrCluster.getConfig());
  
  // Set the job jar to a new "dummy" jar so we can check that its extracted 
  // properly
  job.setJar(jobJarPath);
  // Because the job jar is a "dummy" jar, we need to include the jar with
  // DistributedCacheChecker or it won't be able to find it
  Path distributedCacheCheckerJar = new Path(
          JarFinder.getJar(DistributedCacheChecker.class));
  job.addFileToClassPath(distributedCacheCheckerJar.makeQualified(
          localFs.getUri(), distributedCacheCheckerJar.getParent()));
  
  job.setMapperClass(DistributedCacheChecker.class);
  job.setOutputFormatClass(NullOutputFormat.class);

  FileInputFormat.setInputPaths(job, first);
  // Creates the Job Configuration
  job.addCacheFile(
      new URI(first.toUri().toString() + "#distributed.first.symlink"));
  job.addFileToClassPath(second);
  // The AppMaster jar itself
  job.addFileToClassPath(
          APP_JAR.makeQualified(localFs.getUri(), APP_JAR.getParent())); 
  job.addArchiveToClassPath(third);
  job.addCacheArchive(fourth.toUri());
  job.setMaxMapAttempts(1); // speed up failures

  job.submit();
  String trackingUrl = job.getTrackingURL();
  String jobId = job.getJobID().toString();
  Assert.assertTrue(job.waitForCompletion(false));
  Assert.assertTrue("Tracking URL was " + trackingUrl +
                    " but didn't Match Job ID " + jobId ,
        trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
}
 
Example 19
Source Project: Halyard   File: HalyardBulkExport.java    License: Apache License 2.0 4 votes vote down vote up
@Override
protected int run(CommandLine cmd) throws Exception {
    if (!cmd.getArgList().isEmpty()) throw new HalyardExport.ExportException("Unknown arguments: " + cmd.getArgList().toString());
    String source = cmd.getOptionValue('s');
    String queryFiles = cmd.getOptionValue('q');
    String target = cmd.getOptionValue('t');
    if (!target.contains("{0}")) {
        throw new HalyardExport.ExportException("Bulk export target must contain '{0}' to be replaced by stripped filename of the actual SPARQL query.");
    }
    getConf().set(SOURCE, source);
    getConf().set(TARGET, target);
    String driver = cmd.getOptionValue('c');
    if (driver != null) {
        getConf().set(JDBC_DRIVER, driver);
    }
    String props[] = cmd.getOptionValues('p');
    if (props != null) {
        for (int i=0; i<props.length; i++) {
            props[i] = Base64.encodeBase64String(props[i].getBytes(StandardCharsets.UTF_8));
        }
        getConf().setStrings(JDBC_PROPERTIES, props);
    }
    if (cmd.hasOption('i')) getConf().set(HalyardBulkUpdate.ELASTIC_INDEX_URL, cmd.getOptionValue('i'));
    TableMapReduceUtil.addDependencyJars(getConf(),
           HalyardExport.class,
           NTriplesUtil.class,
           Rio.class,
           AbstractRDFHandler.class,
           RDFFormat.class,
           RDFParser.class,
           HTable.class,
           HBaseConfiguration.class,
           AuthenticationProtos.class,
           Trace.class,
           Gauge.class);
    HBaseConfiguration.addHbaseResources(getConf());
    String cp = cmd.getOptionValue('l');
    if (cp != null) {
        String jars[] = cp.split(":");
        StringBuilder newCp = new StringBuilder();
        for (int i=0; i<jars.length; i++) {
            if (i > 0) newCp.append(':');
            newCp.append(addTmpFile(jars[i])); //append clappspath entris to tmpfiles and trim paths from the classpath
        }
        getConf().set(JDBC_CLASSPATH, newCp.toString());
    }
    Job job = Job.getInstance(getConf(), "HalyardBulkExport " + source + " -> " + target);
    job.setJarByClass(HalyardBulkExport.class);
    job.setMaxMapAttempts(1);
    job.setMapperClass(BulkExportMapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Void.class);
    job.setNumReduceTasks(0);
    job.setInputFormatClass(QueryInputFormat.class);
    QueryInputFormat.setQueriesFromDirRecursive(job.getConfiguration(), queryFiles, false, 0);
    job.setOutputFormatClass(NullOutputFormat.class);
    TableMapReduceUtil.initCredentials(job);
    if (job.waitForCompletion(true)) {
        LOG.info("Bulk Export Completed..");
        return 0;
    }
    return -1;
}
 
Example 20
Source Project: incubator-tez   File: TestMRRJobs.java    License: Apache License 2.0 4 votes vote down vote up
@Test (timeout = 60000)
public void testRandomWriter() throws IOException, InterruptedException,
    ClassNotFoundException {

  LOG.info("\n\n\nStarting testRandomWriter().");
  if (!(new File(MiniTezCluster.APPJAR)).exists()) {
    LOG.info("MRAppJar " + MiniTezCluster.APPJAR
             + " not found. Not running test.");
    return;
  }

  RandomTextWriterJob randomWriterJob = new RandomTextWriterJob();
  mrrTezCluster.getConfig().set(RandomTextWriterJob.TOTAL_BYTES, "3072");
  mrrTezCluster.getConfig().set(RandomTextWriterJob.BYTES_PER_MAP, "1024");
  Job job = randomWriterJob.createJob(mrrTezCluster.getConfig());
  Path outputDir = new Path(OUTPUT_ROOT_DIR, "random-output");
  FileOutputFormat.setOutputPath(job, outputDir);
  job.setSpeculativeExecution(false);
  job.setJarByClass(RandomTextWriterJob.class);
  job.setMaxMapAttempts(1); // speed up failures
  job.submit();
  String trackingUrl = job.getTrackingURL();
  String jobId = job.getJobID().toString();
  boolean succeeded = job.waitForCompletion(true);
  Assert.assertTrue(succeeded);
  Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
  Assert.assertTrue("Tracking URL was " + trackingUrl +
                    " but didn't Match Job ID " + jobId ,
        trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));

  // Make sure there are three files in the output-dir

  RemoteIterator<FileStatus> iterator =
      FileContext.getFileContext(mrrTezCluster.getConfig()).listStatus(
          outputDir);
  int count = 0;
  while (iterator.hasNext()) {
    FileStatus file = iterator.next();
    if (!file.getPath().getName()
        .equals(FileOutputCommitter.SUCCEEDED_FILE_NAME)) {
      count++;
    }
  }
  Assert.assertEquals("Number of part files is wrong!", 3, count);

}