org.apache.hadoop.mapreduce.MRJobConfig Java Examples

The following examples show how to use org.apache.hadoop.mapreduce.MRJobConfig. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestJobImpl.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Test(timeout=20000)
public void testKilledDuringCommit() throws Exception {
  Configuration conf = new Configuration();
  conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
  AsyncDispatcher dispatcher = new AsyncDispatcher();
  dispatcher.init(conf);
  dispatcher.start();
  CyclicBarrier syncBarrier = new CyclicBarrier(2);
  OutputCommitter committer = new WaitingOutputCommitter(syncBarrier, true);
  CommitterEventHandler commitHandler =
      createCommitterEventHandler(dispatcher, committer);
  commitHandler.init(conf);
  commitHandler.start();

  JobImpl job = createRunningStubbedJob(conf, dispatcher, 2, null);
  completeJobTasks(job);
  assertJobState(job, JobStateInternal.COMMITTING);

  syncBarrier.await();
  job.handle(new JobEvent(job.getID(), JobEventType.JOB_KILL));
  assertJobState(job, JobStateInternal.KILLED);
  dispatcher.stop();
  commitHandler.stop();
}
 
Example #2
Source File: PipeMapper.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public void configure(JobConf job) {
  super.configure(job);
  //disable the auto increment of the counter. For streaming, no of 
  //processed records could be different(equal or less) than the no of 
  //records input.
  SkipBadRecords.setAutoIncrMapperProcCount(job, false);
  skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false);
  if (mapInputWriterClass_.getCanonicalName().equals(TextInputWriter.class.getCanonicalName())) {
    String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName();
    ignoreKey = job.getBoolean("stream.map.input.ignoreKey", 
      inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()));
  }
  
  try {
    mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8");
    mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8");
    numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1);
  } catch (UnsupportedEncodingException e) {
    throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
  }
}
 
Example #3
Source File: TestCompressionEmulationUtils.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Runs a GridMix data-generation job.
 */
private static void runDataGenJob(Configuration conf, Path tempDir) 
throws IOException, ClassNotFoundException, InterruptedException {
  JobClient client = new JobClient(conf);
  
  // get the local job runner
  conf.setInt(MRJobConfig.NUM_MAPS, 1);
  
  Job job = Job.getInstance(conf);
  
  CompressionEmulationUtil.configure(job);
  job.setInputFormatClass(CustomInputFormat.class);
  
  // set the output path
  FileOutputFormat.setOutputPath(job, tempDir);
  
  // submit and wait for completion
  job.submit();
  int ret = job.waitForCompletion(true) ? 0 : 1;

  assertEquals("Job Failed", 0, ret);
}
 
Example #4
Source File: TestBinaryTokenFile.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * run a distributed job with -tokenCacheFile option parameter and
 * verify that no exception happens.
 * @throws IOException
*/
@Test
public void testTokenCacheFile() throws IOException {
  Configuration conf = mrCluster.getConfig();
  createBinaryTokenFile(conf);
  // provide namenodes names for the job to get the delegation tokens for
  final String nnUri = dfsCluster.getURI(0).toString();
  conf.set(MRJobConfig.JOB_NAMENODES, nnUri + "," + nnUri);

  // using argument to pass the file name
  final String[] args = {
      "-tokenCacheFile", binaryTokenFileName.toString(),
      "-m", "1", "-r", "1", "-mt", "1", "-rt", "1"
      };
  int res = -1;
  try {
    res = ToolRunner.run(conf, new SleepJob(), args);
  } catch (Exception e) {
    System.out.println("Job failed with " + e.getLocalizedMessage());
    e.printStackTrace(System.out);
    fail("Job failed");
  }
  assertEquals("dist job res is not 0:", 0, res);
}
 
Example #5
Source File: MRApps.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a {@link ApplicationClassLoader} if
 * {@link MRJobConfig#MAPREDUCE_JOB_CLASSLOADER} is set to true, and
 * the APP_CLASSPATH environment variable is set.
 * @param conf
 * @return the created job classloader, or null if the job classloader is not
 * enabled or the APP_CLASSPATH environment variable is not set
 * @throws IOException
 */
public static ClassLoader createJobClassLoader(Configuration conf)
    throws IOException {
  ClassLoader jobClassLoader = null;
  if (conf.getBoolean(MRJobConfig.MAPREDUCE_JOB_CLASSLOADER, false)) {
    String appClasspath = System.getenv(Environment.APP_CLASSPATH.key());
    if (appClasspath == null) {
      LOG.warn("Not creating job classloader since APP_CLASSPATH is not set.");
    } else {
      LOG.info("Creating job classloader");
      if (LOG.isDebugEnabled()) {
        LOG.debug("APP_CLASSPATH=" + appClasspath);
      }
      String[] systemClasses = getSystemClasses(conf);
      jobClassLoader = createJobClassLoader(appClasspath,
          systemClasses);
    }
  }
  return jobClassLoader;
}
 
Example #6
Source File: TestFail.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Test
//All Task attempts are timed out, leading to Job failure
public void testTimedOutTask() throws Exception {
  MRApp app = new TimeOutTaskMRApp(1, 0);
  Configuration conf = new Configuration();
  int maxAttempts = 2;
  conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, maxAttempts);
  // disable uberization (requires entire job to be reattempted, so max for
  // subtask attempts is overridden to 1)
  conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
  Job job = app.submit(conf);
  app.waitForState(job, JobState.FAILED);
  Map<TaskId,Task> tasks = job.getTasks();
  Assert.assertEquals("Num tasks is not correct", 1, tasks.size());
  Task task = tasks.values().iterator().next();
  Assert.assertEquals("Task state not correct", TaskState.FAILED,
      task.getReport().getTaskState());
  Map<TaskAttemptId, TaskAttempt> attempts =
      tasks.values().iterator().next().getAttempts();
  Assert.assertEquals("Num attempts is not correct", maxAttempts,
      attempts.size());
  for (TaskAttempt attempt : attempts.values()) {
    Assert.assertEquals("Attempt state not correct", TaskAttemptState.FAILED,
        attempt.getReport().getTaskAttemptState());
  }
}
 
Example #7
Source File: ClientServiceDelegate.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public ClientServiceDelegate(Configuration conf, ResourceMgrDelegate rm,
    JobID jobId, MRClientProtocol historyServerProxy) {
  this.conf = new Configuration(conf); // Cloning for modifying.
  // For faster redirects from AM to HS.
  this.conf.setInt(
      CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY,
      this.conf.getInt(MRJobConfig.MR_CLIENT_TO_AM_IPC_MAX_RETRIES,
          MRJobConfig.DEFAULT_MR_CLIENT_TO_AM_IPC_MAX_RETRIES));
  this.conf.setInt(
      CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY,
      this.conf.getInt(MRJobConfig.MR_CLIENT_TO_AM_IPC_MAX_RETRIES_ON_TIMEOUTS,
          MRJobConfig.DEFAULT_MR_CLIENT_TO_AM_IPC_MAX_RETRIES_ON_TIMEOUTS));
  this.rm = rm;
  this.jobId = jobId;
  this.historyServerProxy = historyServerProxy;
  this.appId = TypeConverter.toYarn(jobId).getAppId();
  notRunningJobs = new HashMap<JobState, HashMap<String, NotRunningJob>>();
}
 
Example #8
Source File: TestSpeculativeExecution.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public void reduce(Text key, Iterable<IntWritable> values, 
                       Context context) throws IOException, InterruptedException {
  // Make one reducer slower for speculative execution
  TaskAttemptID taid = context.getTaskAttemptID();
  long sleepTime = 100;
  Configuration conf = context.getConfiguration();
  boolean test_speculate_reduce =
            conf.getBoolean(MRJobConfig.REDUCE_SPECULATIVE, false);

  // IF TESTING REDUCE SPECULATIVE EXECUTION:
  //   Make the "*_r_000000_0" attempt take much longer than the others.
  //   When speculative execution is enabled, this should cause the attempt
  //   to be killed and restarted. At that point, the attempt ID will be
  //   "*_r_000000_1", so sleepTime will still remain 100ms.
  if ( (taid.getTaskType() == TaskType.REDUCE) && test_speculate_reduce
        && (taid.getTaskID().getId() == 0) && (taid.getId() == 0)) {
    sleepTime = 10000;
  }
  try{
    Thread.sleep(sleepTime);
  } catch(InterruptedException ie) {
    // Ignore
  }
  context.write(key,new IntWritable(0));
}
 
Example #9
Source File: CombineFileRecordReaderWrapper.java    From big-c with Apache License 2.0 6 votes vote down vote up
private boolean fileSplitIsValid(TaskAttemptContext context) {
  Configuration conf = context.getConfiguration();
  long offset = conf.getLong(MRJobConfig.MAP_INPUT_START, 0L);
  if (fileSplit.getStart() != offset) {
    return false;
  }
  long length = conf.getLong(MRJobConfig.MAP_INPUT_PATH, 0L);
  if (fileSplit.getLength() != length) {
    return false;
  }
  String path = conf.get(MRJobConfig.MAP_INPUT_FILE);
  if (!fileSplit.getPath().toString().equals(path)) {
    return false;
  }
  return true;
}
 
Example #10
Source File: PipeReducer.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public void configure(JobConf job) {
  super.configure(job);
  //disable the auto increment of the counter. For streaming, no of 
  //processed records could be different(equal or less) than the no of 
  //records input.
  SkipBadRecords.setAutoIncrReducerProcCount(job, false);
  skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false);

  try {
    reduceOutFieldSeparator = job_.get("stream.reduce.output.field.separator", "\t").getBytes("UTF-8");
    reduceInputFieldSeparator = job_.get("stream.reduce.input.field.separator", "\t").getBytes("UTF-8");
    this.numOfReduceOutputKeyFields = job_.getInt("stream.num.reduce.output.key.fields", 1);
  } catch (UnsupportedEncodingException e) {
    throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
  }
}
 
Example #11
Source File: TestSpeculativeExecution.java    From big-c with Apache License 2.0 6 votes vote down vote up
public void reduce(Text key, Iterable<IntWritable> values, 
                       Context context) throws IOException, InterruptedException {
  // Make one reducer slower for speculative execution
  TaskAttemptID taid = context.getTaskAttemptID();
  long sleepTime = 100;
  Configuration conf = context.getConfiguration();
  boolean test_speculate_reduce =
            conf.getBoolean(MRJobConfig.REDUCE_SPECULATIVE, false);

  // IF TESTING REDUCE SPECULATIVE EXECUTION:
  //   Make the "*_r_000000_0" attempt take much longer than the others.
  //   When speculative execution is enabled, this should cause the attempt
  //   to be killed and restarted. At that point, the attempt ID will be
  //   "*_r_000000_1", so sleepTime will still remain 100ms.
  if ( (taid.getTaskType() == TaskType.REDUCE) && test_speculate_reduce
        && (taid.getTaskID().getId() == 0) && (taid.getId() == 0)) {
    sleepTime = 10000;
  }
  try{
    Thread.sleep(sleepTime);
  } catch(InterruptedException ie) {
    // Ignore
  }
  context.write(key,new IntWritable(0));
}
 
Example #12
Source File: TestGridmixMemoryEmulation.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Test disabled task heap options configuration in {@link GridmixJob}.
 */
@Test
@SuppressWarnings("deprecation")
public void testJavaHeapOptionsDisabled() throws Exception {
  Configuration gridmixConf = new Configuration();
  gridmixConf.setBoolean(GridmixJob.GRIDMIX_TASK_JVM_OPTIONS_ENABLE, false);
  
  // set the default values of simulated job
  gridmixConf.set(MRJobConfig.MAP_JAVA_OPTS, "-Xmx1m");
  gridmixConf.set(MRJobConfig.REDUCE_JAVA_OPTS, "-Xmx2m");
  gridmixConf.set(JobConf.MAPRED_TASK_JAVA_OPTS, "-Xmx3m");
  
  // set the default map and reduce task options for original job
  final JobConf originalConf = new JobConf();
  originalConf.set(MRJobConfig.MAP_JAVA_OPTS, "-Xmx10m");
  originalConf.set(MRJobConfig.REDUCE_JAVA_OPTS, "-Xmx20m");
  originalConf.set(JobConf.MAPRED_TASK_JAVA_OPTS, "-Xmx30m");
  
  // define a mock job
  MockJob story = new MockJob(originalConf) {
    public JobConf getJobConf() {
      return originalConf;
    }
  };
  
  GridmixJob job = new DummyGridmixJob(gridmixConf, story);
  Job simulatedJob = job.getJob();
  Configuration simulatedConf = simulatedJob.getConfiguration();
  
  assertEquals("Map heap options works when disabled!", "-Xmx1m", 
               simulatedConf.get(MRJobConfig.MAP_JAVA_OPTS));
  assertEquals("Reduce heap options works when disabled!", "-Xmx2m", 
               simulatedConf.get(MRJobConfig.REDUCE_JAVA_OPTS));
  assertEquals("Task heap options works when disabled!", "-Xmx3m", 
               simulatedConf.get(JobConf.MAPRED_TASK_JAVA_OPTS));
}
 
Example #13
Source File: TestJobEndNotifier.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private void testProxyConfiguration(Configuration conf) {
  conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_PROXY, "somehost");
  setConf(conf);
  Assert.assertTrue("Proxy shouldn't be set because port wasn't specified",
    proxyToUse.type() == Proxy.Type.DIRECT);
  conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_PROXY, "somehost:someport");
  setConf(conf);
  Assert.assertTrue("Proxy shouldn't be set because port wasn't numeric",
    proxyToUse.type() == Proxy.Type.DIRECT);
  conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_PROXY, "somehost:1000");
  setConf(conf);
  Assert.assertTrue("Proxy should have been set but wasn't ",
    proxyToUse.toString().equals("HTTP @ somehost:1000"));
  conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_PROXY, "socks@somehost:1000");
  setConf(conf);
  Assert.assertTrue("Proxy should have been socks but wasn't ",
    proxyToUse.toString().equals("SOCKS @ somehost:1000"));
  conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_PROXY, "SOCKS@somehost:1000");
  setConf(conf);
  Assert.assertTrue("Proxy should have been socks but wasn't ",
    proxyToUse.toString().equals("SOCKS @ somehost:1000"));
  conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_PROXY, "sfafn@somehost:1000");
  setConf(conf);
  Assert.assertTrue("Proxy should have been http but wasn't ",
    proxyToUse.toString().equals("HTTP @ somehost:1000"));
  
}
 
Example #14
Source File: HadoopArchives.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public void configure(JobConf conf) {
  this.conf = conf;

  // this is tightly tied to map reduce
  // since it does not expose an api 
  // to get the partition
  partId = conf.getInt(MRJobConfig.TASK_PARTITION, -1);
  // create a file name using the partition
  // we need to write to this directory
  tmpOutputDir = FileOutputFormat.getWorkOutputPath(conf);
  blockSize = conf.getLong(HAR_BLOCKSIZE_LABEL, blockSize);
  // get the output path and write to the tmp 
  // directory 
  partname = "part-" + partId;
  tmpOutput = new Path(tmpOutputDir, partname);
  rootPath = (conf.get(SRC_PARENT_LABEL, null) == null) ? null :
              new Path(conf.get(SRC_PARENT_LABEL));
  if (rootPath == null) {
    throw new RuntimeException("Unable to read parent " +
    		"path for har from config");
  }
  try {
    destFs = tmpOutput.getFileSystem(conf);
    //this was a stale copy
    if (destFs.exists(tmpOutput)) {
      destFs.delete(tmpOutput, false);
    } 
    partStream = destFs.create(tmpOutput, false, conf.getInt("io.file.buffer.size", 4096), 
        destFs.getDefaultReplication(tmpOutput), blockSize);
  } catch(IOException ie) {
    throw new RuntimeException("Unable to open output file " + tmpOutput, ie);
  }
  buffer = new byte[buf_size];
}
 
Example #15
Source File: TestYARNRunner.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test(timeout=20000)
public void testWarnCommandOpts() throws Exception {
  Logger logger = Logger.getLogger(YARNRunner.class);
  
  ByteArrayOutputStream bout = new ByteArrayOutputStream();
  Layout layout = new SimpleLayout();
  Appender appender = new WriterAppender(layout, bout);
  logger.addAppender(appender);
  
  JobConf jobConf = new JobConf();
  
  jobConf.set(MRJobConfig.MR_AM_ADMIN_COMMAND_OPTS, "-Djava.net.preferIPv4Stack=true -Djava.library.path=foo");
  jobConf.set(MRJobConfig.MR_AM_COMMAND_OPTS, "-Xmx1024m -Djava.library.path=bar");
  
  YARNRunner yarnRunner = new YARNRunner(jobConf);
  
  @SuppressWarnings("unused")
  ApplicationSubmissionContext submissionContext =
      buildSubmitContext(yarnRunner, jobConf);
 
  String logMsg = bout.toString();
  assertTrue(logMsg.contains("WARN - Usage of -Djava.library.path in " + 
  		"yarn.app.mapreduce.am.admin-command-opts can cause programs to no " +
      "longer function if hadoop native libraries are used. These values " + 
  		"should be set as part of the LD_LIBRARY_PATH in the app master JVM " +
      "env using yarn.app.mapreduce.am.admin.user.env config settings."));
  assertTrue(logMsg.contains("WARN - Usage of -Djava.library.path in " + 
      "yarn.app.mapreduce.am.command-opts can cause programs to no longer " +
      "function if hadoop native libraries are used. These values should " +
      "be set as part of the LD_LIBRARY_PATH in the app master JVM env " +
      "using yarn.app.mapreduce.am.env config settings."));
}
 
Example #16
Source File: TestMRCJCFileOutputCommitter.java    From big-c with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public void testAbort() throws IOException, InterruptedException {
  Job job = Job.getInstance();
  FileOutputFormat.setOutputPath(job, outDir);
  Configuration conf = job.getConfiguration();
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
  JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
  FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

  // do setup
  committer.setupJob(jContext);
  committer.setupTask(tContext);

  // write output
  TextOutputFormat theOutputFormat = new TextOutputFormat();
  RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
  writeOutput(theRecordWriter, tContext);

  // do abort
  committer.abortTask(tContext);
  File expectedFile = new File(new Path(committer.getWorkPath(), partFile)
      .toString());
  assertFalse("task temp dir still exists", expectedFile.exists());

  committer.abortJob(jContext, JobStatus.State.FAILED);
  expectedFile = new File(new Path(outDir, FileOutputCommitter.PENDING_DIR_NAME)
      .toString());
  assertFalse("job temp dir still exists", expectedFile.exists());
  assertEquals("Output directory not empty", 0, new File(outDir.toString())
      .listFiles().length);
  FileUtil.fullyDelete(new File(outDir.toString()));
}
 
Example #17
Source File: DistributedCache.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Add an archive path to the current set of classpath entries. It adds the
 * archive to cache as well.  Intended to be used by user code.
 *
 * @param archive Path of the archive to be added
 * @param conf Configuration that contains the classpath setting
 * @param fs FileSystem with respect to which {@code archive} should be interpreted.
 */
public static void addArchiveToClassPath
       (Path archive, Configuration conf, FileSystem fs)
    throws IOException {
  String classpath = conf.get(MRJobConfig.CLASSPATH_ARCHIVES);
  conf.set(MRJobConfig.CLASSPATH_ARCHIVES, classpath == null ? archive
           .toString() : classpath + "," + archive.toString());
  URI uri = fs.makeQualified(archive).toUri();

  addCacheArchive(uri, conf);
}
 
Example #18
Source File: DistributedCache.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Add an archive path to the current set of classpath entries. It adds the
 * archive to cache as well.  Intended to be used by user code.
 *
 * @param archive Path of the archive to be added
 * @param conf Configuration that contains the classpath setting
 * @param fs FileSystem with respect to which {@code archive} should be interpreted.
 */
public static void addArchiveToClassPath
       (Path archive, Configuration conf, FileSystem fs)
    throws IOException {
  String classpath = conf.get(MRJobConfig.CLASSPATH_ARCHIVES);
  conf.set(MRJobConfig.CLASSPATH_ARCHIVES, classpath == null ? archive
           .toString() : classpath + "," + archive.toString());
  URI uri = fs.makeQualified(archive).toUri();

  addCacheArchive(uri, conf);
}
 
Example #19
Source File: TestDistCacheEmulation.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Configures 5 HDFS-based dist cache files and 1 local-FS-based dist cache
 * file in the given Configuration object <code>conf</code>.
 * 
 * @param conf
 *          configuration where dist cache config properties are to be set
 * @return array of sorted HDFS-based distributed cache file sizes
 * @throws IOException
 */
private long[] configureDummyDistCacheFiles(Configuration conf)
    throws IOException {
  String user = UserGroupInformation.getCurrentUser().getShortUserName();
  conf.set("user.name", user);
  
  // Set some dummy dist cache files in gridmix configuration so that they go
  // into the configuration of JobStory objects.
  String[] distCacheFiles = { "hdfs:///tmp/file1.txt",
      "/tmp/" + user + "/.staging/job_1/file2.txt",
      "hdfs:///user/user1/file3.txt", "/home/user2/file4.txt",
      "subdir1/file5.txt", "subdir2/file6.gz" };

  String[] fileSizes = { "400", "2500", "700", "1200", "1500", "500" };

  String[] visibilities = { "true", "false", "false", "true", "true", "false" };
  String[] timeStamps = { "1234", "2345", "34567", "5434", "125", "134" };

  // DistributedCache.setCacheFiles(fileCaches, conf);
  conf.setStrings(MRJobConfig.CACHE_FILES, distCacheFiles);
  conf.setStrings(MRJobConfig.CACHE_FILES_SIZES, fileSizes);
  conf.setStrings(JobContext.CACHE_FILE_VISIBILITIES, visibilities);
  conf.setStrings(MRJobConfig.CACHE_FILE_TIMESTAMPS, timeStamps);

  // local FS based dist cache file whose path contains <user>/.staging is
  // not created on HDFS. So file size 2500 is not added to sortedFileSizes.
  long[] sortedFileSizes = new long[] { 1500, 1200, 700, 500, 400 };
  return sortedFileSizes;
}
 
Example #20
Source File: MROutputFiles.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Create a local reduce input file name.
 *
 * @param mapId a map task id
 * @param size the size of the file
 * @return path
 * @throws IOException
 */
@Override
public Path getInputFileForWrite(org.apache.hadoop.mapreduce.TaskID mapId,
                                 long size)
    throws IOException {
  return lDirAlloc.getLocalPathForWrite(String.format(
      REDUCE_INPUT_FILE_FORMAT_STRING, MRJobConfig.OUTPUT, mapId.getId()),
      size, getConf());
}
 
Example #21
Source File: TestFileOutputCommitter.java    From big-c with Apache License 2.0 5 votes vote down vote up
private void testMapFileOutputCommitterInternal(int version)
    throws Exception {
  Job job = Job.getInstance();
  FileOutputFormat.setOutputPath(job, outDir);
  Configuration conf = job.getConfiguration();
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
  conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION,
      version);
  JobContext jContext = new JobContextImpl(conf, taskID.getJobID());    
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
  FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

  // setup
  committer.setupJob(jContext);
  committer.setupTask(tContext);

  // write output
  MapFileOutputFormat theOutputFormat = new MapFileOutputFormat();
  RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
  writeMapFileOutput(theRecordWriter, tContext);

  // do commit
  committer.commitTask(tContext);
  committer.commitJob(jContext);

  // validate output
  validateMapFileOutputContent(FileSystem.get(job.getConfiguration()), outDir);
  FileUtil.fullyDelete(new File(outDir.toString()));
}
 
Example #22
Source File: TestSpeculativeExecution.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private Job runSpecTest(boolean mapspec, boolean redspec)
    throws IOException, ClassNotFoundException, InterruptedException {

  Path first = createTempFile("specexec_map_input1", "a\nz");
  Path secnd = createTempFile("specexec_map_input2", "a\nz");

  Configuration conf = mrCluster.getConfig();
  conf.setBoolean(MRJobConfig.MAP_SPECULATIVE,mapspec);
  conf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE,redspec);
  conf.setClass(MRJobConfig.MR_AM_TASK_ESTIMATOR,
          TestSpecEstimator.class,
          TaskRuntimeEstimator.class);

  Job job = Job.getInstance(conf);
  job.setJarByClass(TestSpeculativeExecution.class);
  job.setMapperClass(SpeculativeMapper.class);
  job.setReducerClass(SpeculativeReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  job.setNumReduceTasks(2);
  FileInputFormat.setInputPaths(job, first);
  FileInputFormat.addInputPath(job, secnd);
  FileOutputFormat.setOutputPath(job, TEST_OUT_DIR);

  // Delete output directory if it exists.
  try {
    localFs.delete(TEST_OUT_DIR,true);
  } catch (IOException e) {
    // ignore
  }

  // Creates the Job Configuration
  job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
  job.setMaxMapAttempts(2);

  job.submit();

  return job;
}
 
Example #23
Source File: DistributedCache.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Get the file entries in classpath as an array of Path.
 * Used by internal DistributedCache code.
 * 
 * @param conf Configuration that contains the classpath setting
 * @deprecated Use {@link JobContext#getFileClassPaths()} instead 
 */
@Deprecated
public static Path[] getFileClassPaths(Configuration conf) {
  ArrayList<String> list = (ArrayList<String>)conf.getStringCollection(
                              MRJobConfig.CLASSPATH_FILES);
  if (list.size() == 0) { 
    return null; 
  }
  Path[] paths = new Path[list.size()];
  for (int i = 0; i < list.size(); i++) {
    paths[i] = new Path(list.get(i));
  }
  return paths;
}
 
Example #24
Source File: TestMRApps.java    From big-c with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("deprecation")
public void testSetupDistributedCacheConflictsFiles() throws Exception {
  Configuration conf = new Configuration();
  conf.setClass("fs.mockfs.impl", MockFileSystem.class, FileSystem.class);
  
  URI mockUri = URI.create("mockfs://mock/");
  FileSystem mockFs = ((FilterFileSystem)FileSystem.get(mockUri, conf))
      .getRawFileSystem();
  
  URI file = new URI("mockfs://mock/tmp/something.zip#something");
  Path filePath = new Path(file);
  URI file2 = new URI("mockfs://mock/tmp/something.txt#something");
  Path file2Path = new Path(file2);
  
  when(mockFs.resolvePath(filePath)).thenReturn(filePath);
  when(mockFs.resolvePath(file2Path)).thenReturn(file2Path);
  
  DistributedCache.addCacheFile(file, conf);
  DistributedCache.addCacheFile(file2, conf);
  conf.set(MRJobConfig.CACHE_FILE_TIMESTAMPS, "10,11");
  conf.set(MRJobConfig.CACHE_FILES_SIZES, "10,11");
  conf.set(MRJobConfig.CACHE_FILE_VISIBILITIES, "true,true");
  Map<String, LocalResource> localResources = 
    new HashMap<String, LocalResource>();
  MRApps.setupDistributedCache(conf, localResources);
  
  assertEquals(1, localResources.size());
  LocalResource lr = localResources.get("something");
  //First one wins
  assertNotNull(lr);
  assertEquals(10l, lr.getSize());
  assertEquals(10l, lr.getTimestamp());
  assertEquals(LocalResourceType.FILE, lr.getType());
}
 
Example #25
Source File: TestAMWebServicesJobConf.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Override
protected void configureServlets() {

  Path confPath = new Path(testConfDir.toString(),
      MRJobConfig.JOB_CONF_FILE);
  Configuration config = new Configuration();

  FileSystem localFs;
  try {
    localFs = FileSystem.getLocal(config);
    confPath = localFs.makeQualified(confPath);

    OutputStream out = localFs.create(confPath);
    try {
      conf.writeXml(out);
    } finally {
      out.close();
    }
    if (!localFs.exists(confPath)) {
      fail("error creating config file: " + confPath);
    }

  } catch (IOException e) {
    fail("error creating config file: " + e.getMessage());
  }

  appContext = new MockAppContext(0, 2, 1, confPath);

  bind(JAXBContextResolver.class);
  bind(AMWebServices.class);
  bind(GenericExceptionHandler.class);
  bind(AppContext.class).toInstance(appContext);
  bind(Configuration.class).toInstance(conf);

  serve("/*").with(GuiceContainer.class);
}
 
Example #26
Source File: TestStreamAggregate.java    From hadoop with Apache License 2.0 5 votes vote down vote up
protected String[] genArgs() {
  return new String[] {
    "-input", INPUT_FILE.getAbsolutePath(),
    "-output", OUTPUT_DIR.getAbsolutePath(),
    "-mapper", map,
    "-reducer", "aggregate",
    "-jobconf", MRJobConfig.PRESERVE_FAILED_TASK_FILES + "=true",
    "-jobconf", "stream.tmpdir="+System.getProperty("test.build.data","/tmp")
  };
}
 
Example #27
Source File: TestTaskHeartbeatHandler.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings({ "rawtypes", "unchecked" })
@Test
public void testTimeout() throws InterruptedException {
  EventHandler mockHandler = mock(EventHandler.class);
  Clock clock = new SystemClock();
  TaskHeartbeatHandler hb = new TaskHeartbeatHandler(mockHandler, clock, 1);
  
  
  Configuration conf = new Configuration();
  conf.setInt(MRJobConfig.TASK_TIMEOUT, 10); //10 ms
  conf.setInt(MRJobConfig.TASK_TIMEOUT_CHECK_INTERVAL_MS, 10); //10 ms
  
  hb.init(conf);
  hb.start();
  try {
    ApplicationId appId = ApplicationId.newInstance(0l, 5);
    JobId jobId = MRBuilderUtils.newJobId(appId, 4);
    TaskId tid = MRBuilderUtils.newTaskId(jobId, 3, TaskType.MAP);
    TaskAttemptId taid = MRBuilderUtils.newTaskAttemptId(tid, 2);
    hb.register(taid);
    Thread.sleep(100);
    //Events only happen when the task is canceled
    verify(mockHandler, times(2)).handle(any(Event.class));
  } finally {
    hb.stop();
  }
}
 
Example #28
Source File: TestFileOutputCommitter.java    From big-c with Apache License 2.0 5 votes vote down vote up
public void testInvalidVersionNumber() throws IOException {
  Job job = Job.getInstance();
  FileOutputFormat.setOutputPath(job, outDir);
  Configuration conf = job.getConfiguration();
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
  conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 3);
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
  try {
    new FileOutputCommitter(outDir, tContext);
    fail("should've thrown an exception!");
  } catch (IOException e) {
    //test passed
  }
}
 
Example #29
Source File: TestJobImpl.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test(timeout=20000)
public void testKilledDuringKillAbort() throws Exception {
  Configuration conf = new Configuration();
  conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
  AsyncDispatcher dispatcher = new AsyncDispatcher();
  dispatcher.init(conf);
  dispatcher.start();
  OutputCommitter committer = new StubbedOutputCommitter() {
    @Override
    public synchronized void abortJob(JobContext jobContext, State state)
        throws IOException {
      while (!Thread.interrupted()) {
        try {
          wait();
        } catch (InterruptedException e) {
        }
      }
    }
  };
  CommitterEventHandler commitHandler =
      createCommitterEventHandler(dispatcher, committer);
  commitHandler.init(conf);
  commitHandler.start();

  JobImpl job = createStubbedJob(conf, dispatcher, 2, null);
  JobId jobId = job.getID();
  job.handle(new JobEvent(jobId, JobEventType.JOB_INIT));
  assertJobState(job, JobStateInternal.INITED);
  job.handle(new JobStartEvent(jobId));
  assertJobState(job, JobStateInternal.SETUP);

  job.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
  assertJobState(job, JobStateInternal.KILL_ABORT);

  job.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
  assertJobState(job, JobStateInternal.KILLED);
  dispatcher.stop();
  commitHandler.stop();
}
 
Example #30
Source File: TaskAttemptImpl.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private WrappedProgressSplitsBlock getProgressSplitBlock() {
  readLock.lock();
  try {
    if (progressSplitBlock == null) {
      progressSplitBlock = new WrappedProgressSplitsBlock(conf.getInt(
          MRJobConfig.MR_AM_NUM_PROGRESS_SPLITS,
          MRJobConfig.DEFAULT_MR_AM_NUM_PROGRESS_SPLITS));
    }
    return progressSplitBlock;
  } finally {
    readLock.unlock();
  }
}