org.apache.hadoop.mapreduce.MRJobConfig Java Exaples

Source File: TestJobImpl.java From hadoop with Apache License 2.0

6 votes

@Test(timeout=20000)
public void testKilledDuringCommit() throws Exception {
  Configuration conf = new Configuration();
  conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
  AsyncDispatcher dispatcher = new AsyncDispatcher();
  dispatcher.init(conf);
  dispatcher.start();
  CyclicBarrier syncBarrier = new CyclicBarrier(2);
  OutputCommitter committer = new WaitingOutputCommitter(syncBarrier, true);
  CommitterEventHandler commitHandler =
      createCommitterEventHandler(dispatcher, committer);
  commitHandler.init(conf);
  commitHandler.start();

  JobImpl job = createRunningStubbedJob(conf, dispatcher, 2, null);
  completeJobTasks(job);
  assertJobState(job, JobStateInternal.COMMITTING);

  syncBarrier.await();
  job.handle(new JobEvent(job.getID(), JobEventType.JOB_KILL));
  assertJobState(job, JobStateInternal.KILLED);
  dispatcher.stop();
  commitHandler.stop();
}

Source File: PipeMapper.java From hadoop with Apache License 2.0

6 votes

public void configure(JobConf job) {
  super.configure(job);
  //disable the auto increment of the counter. For streaming, no of 
  //processed records could be different(equal or less) than the no of 
  //records input.
  SkipBadRecords.setAutoIncrMapperProcCount(job, false);
  skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false);
  if (mapInputWriterClass_.getCanonicalName().equals(TextInputWriter.class.getCanonicalName())) {
    String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName();
    ignoreKey = job.getBoolean("stream.map.input.ignoreKey", 
      inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()));
  }
  
  try {
    mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8");
    mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8");
    numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1);
  } catch (UnsupportedEncodingException e) {
    throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
  }
}

Source File: TestCompressionEmulationUtils.java From hadoop with Apache License 2.0

6 votes

/**
 * Runs a GridMix data-generation job.
 */
private static void runDataGenJob(Configuration conf, Path tempDir) 
throws IOException, ClassNotFoundException, InterruptedException {
  JobClient client = new JobClient(conf);
  
  // get the local job runner
  conf.setInt(MRJobConfig.NUM_MAPS, 1);
  
  Job job = Job.getInstance(conf);
  
  CompressionEmulationUtil.configure(job);
  job.setInputFormatClass(CustomInputFormat.class);
  
  // set the output path
  FileOutputFormat.setOutputPath(job, tempDir);
  
  // submit and wait for completion
  job.submit();
  int ret = job.waitForCompletion(true) ? 0 : 1;

  assertEquals("Job Failed", 0, ret);
}

Source File: TestBinaryTokenFile.java From hadoop with Apache License 2.0

6 votes

/**
 * run a distributed job with -tokenCacheFile option parameter and
 * verify that no exception happens.
 * @throws IOException
*/
@Test
public void testTokenCacheFile() throws IOException {
  Configuration conf = mrCluster.getConfig();
  createBinaryTokenFile(conf);
  // provide namenodes names for the job to get the delegation tokens for
  final String nnUri = dfsCluster.getURI(0).toString();
  conf.set(MRJobConfig.JOB_NAMENODES, nnUri + "," + nnUri);

  // using argument to pass the file name
  final String[] args = {
      "-tokenCacheFile", binaryTokenFileName.toString(),
      "-m", "1", "-r", "1", "-mt", "1", "-rt", "1"
      };
  int res = -1;
  try {
    res = ToolRunner.run(conf, new SleepJob(), args);
  } catch (Exception e) {
    System.out.println("Job failed with " + e.getLocalizedMessage());
    e.printStackTrace(System.out);
    fail("Job failed");
  }
  assertEquals("dist job res is not 0:", 0, res);
}

Source File: MRApps.java From big-c with Apache License 2.0

6 votes

/**
 * Creates a {@link ApplicationClassLoader} if
 * {@link MRJobConfig#MAPREDUCE_JOB_CLASSLOADER} is set to true, and
 * the APP_CLASSPATH environment variable is set.
 * @param conf
 * @return the created job classloader, or null if the job classloader is not
 * enabled or the APP_CLASSPATH environment variable is not set
 * @throws IOException
 */
public static ClassLoader createJobClassLoader(Configuration conf)
    throws IOException {
  ClassLoader jobClassLoader = null;
  if (conf.getBoolean(MRJobConfig.MAPREDUCE_JOB_CLASSLOADER, false)) {
    String appClasspath = System.getenv(Environment.APP_CLASSPATH.key());
    if (appClasspath == null) {
      LOG.warn("Not creating job classloader since APP_CLASSPATH is not set.");
    } else {
      LOG.info("Creating job classloader");
      if (LOG.isDebugEnabled()) {
        LOG.debug("APP_CLASSPATH=" + appClasspath);
      }
      String[] systemClasses = getSystemClasses(conf);
      jobClassLoader = createJobClassLoader(appClasspath,
          systemClasses);
    }
  }
  return jobClassLoader;
}

Source File: TestFail.java From big-c with Apache License 2.0

6 votes

@Test
//All Task attempts are timed out, leading to Job failure
public void testTimedOutTask() throws Exception {
  MRApp app = new TimeOutTaskMRApp(1, 0);
  Configuration conf = new Configuration();
  int maxAttempts = 2;
  conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, maxAttempts);
  // disable uberization (requires entire job to be reattempted, so max for
  // subtask attempts is overridden to 1)
  conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
  Job job = app.submit(conf);
  app.waitForState(job, JobState.FAILED);
  Map<TaskId,Task> tasks = job.getTasks();
  Assert.assertEquals("Num tasks is not correct", 1, tasks.size());
  Task task = tasks.values().iterator().next();
  Assert.assertEquals("Task state not correct", TaskState.FAILED,
      task.getReport().getTaskState());
  Map<TaskAttemptId, TaskAttempt> attempts =
      tasks.values().iterator().next().getAttempts();
  Assert.assertEquals("Num attempts is not correct", maxAttempts,
      attempts.size());
  for (TaskAttempt attempt : attempts.values()) {
    Assert.assertEquals("Attempt state not correct", TaskAttemptState.FAILED,
        attempt.getReport().getTaskAttemptState());
  }
}

Source File: ClientServiceDelegate.java From hadoop with Apache License 2.0

6 votes

public ClientServiceDelegate(Configuration conf, ResourceMgrDelegate rm,
    JobID jobId, MRClientProtocol historyServerProxy) {
  this.conf = new Configuration(conf); // Cloning for modifying.
  // For faster redirects from AM to HS.
  this.conf.setInt(
      CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY,
      this.conf.getInt(MRJobConfig.MR_CLIENT_TO_AM_IPC_MAX_RETRIES,
          MRJobConfig.DEFAULT_MR_CLIENT_TO_AM_IPC_MAX_RETRIES));
  this.conf.setInt(
      CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY,
      this.conf.getInt(MRJobConfig.MR_CLIENT_TO_AM_IPC_MAX_RETRIES_ON_TIMEOUTS,
          MRJobConfig.DEFAULT_MR_CLIENT_TO_AM_IPC_MAX_RETRIES_ON_TIMEOUTS));
  this.rm = rm;
  this.jobId = jobId;
  this.historyServerProxy = historyServerProxy;
  this.appId = TypeConverter.toYarn(jobId).getAppId();
  notRunningJobs = new HashMap<JobState, HashMap<String, NotRunningJob>>();
}

Source File: TestSpeculativeExecution.java From hadoop with Apache License 2.0

6 votes

public void reduce(Text key, Iterable<IntWritable> values, 
                       Context context) throws IOException, InterruptedException {
  // Make one reducer slower for speculative execution
  TaskAttemptID taid = context.getTaskAttemptID();
  long sleepTime = 100;
  Configuration conf = context.getConfiguration();
  boolean test_speculate_reduce =
            conf.getBoolean(MRJobConfig.REDUCE_SPECULATIVE, false);

  // IF TESTING REDUCE SPECULATIVE EXECUTION:
  //   Make the "*_r_000000_0" attempt take much longer than the others.
  //   When speculative execution is enabled, this should cause the attempt
  //   to be killed and restarted. At that point, the attempt ID will be
  //   "*_r_000000_1", so sleepTime will still remain 100ms.
  if ( (taid.getTaskType() == TaskType.REDUCE) && test_speculate_reduce
        && (taid.getTaskID().getId() == 0) && (taid.getId() == 0)) {
    sleepTime = 10000;
  }
  try{
    Thread.sleep(sleepTime);
  } catch(InterruptedException ie) {
    // Ignore
  }
  context.write(key,new IntWritable(0));
}

Source File: CombineFileRecordReaderWrapper.java From big-c with Apache License 2.0

6 votes

private boolean fileSplitIsValid(TaskAttemptContext context) {
  Configuration conf = context.getConfiguration();
  long offset = conf.getLong(MRJobConfig.MAP_INPUT_START, 0L);
  if (fileSplit.getStart() != offset) {
    return false;
  }
  long length = conf.getLong(MRJobConfig.MAP_INPUT_PATH, 0L);
  if (fileSplit.getLength() != length) {
    return false;
  }
  String path = conf.get(MRJobConfig.MAP_INPUT_FILE);
  if (!fileSplit.getPath().toString().equals(path)) {
    return false;
  }
  return true;
}

Source File: PipeReducer.java From hadoop with Apache License 2.0

6 votes

public void configure(JobConf job) {
  super.configure(job);
  //disable the auto increment of the counter. For streaming, no of 
  //processed records could be different(equal or less) than the no of 
  //records input.
  SkipBadRecords.setAutoIncrReducerProcCount(job, false);
  skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false);

  try {
    reduceOutFieldSeparator = job_.get("stream.reduce.output.field.separator", "\t").getBytes("UTF-8");
    reduceInputFieldSeparator = job_.get("stream.reduce.input.field.separator", "\t").getBytes("UTF-8");
    this.numOfReduceOutputKeyFields = job_.getInt("stream.num.reduce.output.key.fields", 1);
  } catch (UnsupportedEncodingException e) {
    throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
  }
}

Source File: TestSpeculativeExecution.java From big-c with Apache License 2.0

6 votes

public void reduce(Text key, Iterable<IntWritable> values, 
                       Context context) throws IOException, InterruptedException {
  // Make one reducer slower for speculative execution
  TaskAttemptID taid = context.getTaskAttemptID();
  long sleepTime = 100;
  Configuration conf = context.getConfiguration();
  boolean test_speculate_reduce =
            conf.getBoolean(MRJobConfig.REDUCE_SPECULATIVE, false);

  // IF TESTING REDUCE SPECULATIVE EXECUTION:
  //   Make the "*_r_000000_0" attempt take much longer than the others.
  //   When speculative execution is enabled, this should cause the attempt
  //   to be killed and restarted. At that point, the attempt ID will be
  //   "*_r_000000_1", so sleepTime will still remain 100ms.
  if ( (taid.getTaskType() == TaskType.REDUCE) && test_speculate_reduce
        && (taid.getTaskID().getId() == 0) && (taid.getId() == 0)) {
    sleepTime = 10000;
  }
  try{
    Thread.sleep(sleepTime);
  } catch(InterruptedException ie) {
    // Ignore
  }
  context.write(key,new IntWritable(0));
}

Source File: TestGridmixMemoryEmulation.java From big-c with Apache License 2.0

5 votes

/**
 * Test disabled task heap options configuration in {@link GridmixJob}.
 */
@Test
@SuppressWarnings("deprecation")
public void testJavaHeapOptionsDisabled() throws Exception {
  Configuration gridmixConf = new Configuration();
  gridmixConf.setBoolean(GridmixJob.GRIDMIX_TASK_JVM_OPTIONS_ENABLE, false);
  
  // set the default values of simulated job
  gridmixConf.set(MRJobConfig.MAP_JAVA_OPTS, "-Xmx1m");
  gridmixConf.set(MRJobConfig.REDUCE_JAVA_OPTS, "-Xmx2m");
  gridmixConf.set(JobConf.MAPRED_TASK_JAVA_OPTS, "-Xmx3m");
  
  // set the default map and reduce task options for original job
  final JobConf originalConf = new JobConf();
  originalConf.set(MRJobConfig.MAP_JAVA_OPTS, "-Xmx10m");
  originalConf.set(MRJobConfig.REDUCE_JAVA_OPTS, "-Xmx20m");
  originalConf.set(JobConf.MAPRED_TASK_JAVA_OPTS, "-Xmx30m");
  
  // define a mock job
  MockJob story = new MockJob(originalConf) {
    public JobConf getJobConf() {
      return originalConf;
    }
  };
  
  GridmixJob job = new DummyGridmixJob(gridmixConf, story);
  Job simulatedJob = job.getJob();
  Configuration simulatedConf = simulatedJob.getConfiguration();
  
  assertEquals("Map heap options works when disabled!", "-Xmx1m", 
               simulatedConf.get(MRJobConfig.MAP_JAVA_OPTS));
  assertEquals("Reduce heap options works when disabled!", "-Xmx2m", 
               simulatedConf.get(MRJobConfig.REDUCE_JAVA_OPTS));
  assertEquals("Task heap options works when disabled!", "-Xmx3m", 
               simulatedConf.get(JobConf.MAPRED_TASK_JAVA_OPTS));
}

Source File: TestJobEndNotifier.java From hadoop with Apache License 2.0

5 votes

private void testProxyConfiguration(Configuration conf) {
  conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_PROXY, "somehost");
  setConf(conf);
  Assert.assertTrue("Proxy shouldn't be set because port wasn't specified",
    proxyToUse.type() == Proxy.Type.DIRECT);
  conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_PROXY, "somehost:someport");
  setConf(conf);
  Assert.assertTrue("Proxy shouldn't be set because port wasn't numeric",
    proxyToUse.type() == Proxy.Type.DIRECT);
  conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_PROXY, "somehost:1000");
  setConf(conf);
  Assert.assertTrue("Proxy should have been set but wasn't ",
    proxyToUse.toString().equals("HTTP @ somehost:1000"));
  conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_PROXY, "socks@somehost:1000");
  setConf(conf);
  Assert.assertTrue("Proxy should have been socks but wasn't ",
    proxyToUse.toString().equals("SOCKS @ somehost:1000"));
  conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_PROXY, "SOCKS@somehost:1000");
  setConf(conf);
  Assert.assertTrue("Proxy should have been socks but wasn't ",
    proxyToUse.toString().equals("SOCKS @ somehost:1000"));
  conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_PROXY, "sfafn@somehost:1000");
  setConf(conf);
  Assert.assertTrue("Proxy should have been http but wasn't ",
    proxyToUse.toString().equals("HTTP @ somehost:1000"));
  
}

Source File: HadoopArchives.java From hadoop with Apache License 2.0

5 votes

public void configure(JobConf conf) {
  this.conf = conf;

  // this is tightly tied to map reduce
  // since it does not expose an api 
  // to get the partition
  partId = conf.getInt(MRJobConfig.TASK_PARTITION, -1);
  // create a file name using the partition
  // we need to write to this directory
  tmpOutputDir = FileOutputFormat.getWorkOutputPath(conf);
  blockSize = conf.getLong(HAR_BLOCKSIZE_LABEL, blockSize);
  // get the output path and write to the tmp 
  // directory 
  partname = "part-" + partId;
  tmpOutput = new Path(tmpOutputDir, partname);
  rootPath = (conf.get(SRC_PARENT_LABEL, null) == null) ? null :
              new Path(conf.get(SRC_PARENT_LABEL));
  if (rootPath == null) {
    throw new RuntimeException("Unable to read parent " +
    		"path for har from config");
  }
  try {
    destFs = tmpOutput.getFileSystem(conf);
    //this was a stale copy
    if (destFs.exists(tmpOutput)) {
      destFs.delete(tmpOutput, false);
    } 
    partStream = destFs.create(tmpOutput, false, conf.getInt("io.file.buffer.size", 4096), 
        destFs.getDefaultReplication(tmpOutput), blockSize);
  } catch(IOException ie) {
    throw new RuntimeException("Unable to open output file " + tmpOutput, ie);
  }
  buffer = new byte[buf_size];
}

Source File: TestYARNRunner.java From hadoop with Apache License 2.0

5 votes

@Test(timeout=20000)
public void testWarnCommandOpts() throws Exception {
  Logger logger = Logger.getLogger(YARNRunner.class);
  
  ByteArrayOutputStream bout = new ByteArrayOutputStream();
  Layout layout = new SimpleLayout();
  Appender appender = new WriterAppender(layout, bout);
  logger.addAppender(appender);
  
  JobConf jobConf = new JobConf();
  
  jobConf.set(MRJobConfig.MR_AM_ADMIN_COMMAND_OPTS, "-Djava.net.preferIPv4Stack=true -Djava.library.path=foo");
  jobConf.set(MRJobConfig.MR_AM_COMMAND_OPTS, "-Xmx1024m -Djava.library.path=bar");
  
  YARNRunner yarnRunner = new YARNRunner(jobConf);
  
  @SuppressWarnings("unused")
  ApplicationSubmissionContext submissionContext =
      buildSubmitContext(yarnRunner, jobConf);
 
  String logMsg = bout.toString();
  assertTrue(logMsg.contains("WARN - Usage of -Djava.library.path in " + 
  		"yarn.app.mapreduce.am.admin-command-opts can cause programs to no " +
      "longer function if hadoop native libraries are used. These values " + 
  		"should be set as part of the LD_LIBRARY_PATH in the app master JVM " +
      "env using yarn.app.mapreduce.am.admin.user.env config settings."));
  assertTrue(logMsg.contains("WARN - Usage of -Djava.library.path in " + 
      "yarn.app.mapreduce.am.command-opts can cause programs to no longer " +
      "function if hadoop native libraries are used. These values should " +
      "be set as part of the LD_LIBRARY_PATH in the app master JVM env " +
      "using yarn.app.mapreduce.am.env config settings."));
}

Source File: TestMRCJCFileOutputCommitter.java From big-c with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
public void testAbort() throws IOException, InterruptedException {
  Job job = Job.getInstance();
  FileOutputFormat.setOutputPath(job, outDir);
  Configuration conf = job.getConfiguration();
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
  JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
  FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

  // do setup
  committer.setupJob(jContext);
  committer.setupTask(tContext);

  // write output
  TextOutputFormat theOutputFormat = new TextOutputFormat();
  RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
  writeOutput(theRecordWriter, tContext);

  // do abort
  committer.abortTask(tContext);
  File expectedFile = new File(new Path(committer.getWorkPath(), partFile)
      .toString());
  assertFalse("task temp dir still exists", expectedFile.exists());

  committer.abortJob(jContext, JobStatus.State.FAILED);
  expectedFile = new File(new Path(outDir, FileOutputCommitter.PENDING_DIR_NAME)
      .toString());
  assertFalse("job temp dir still exists", expectedFile.exists());
  assertEquals("Output directory not empty", 0, new File(outDir.toString())
      .listFiles().length);
  FileUtil.fullyDelete(new File(outDir.toString()));
}

Source File: DistributedCache.java From hadoop with Apache License 2.0

5 votes

/**
 * Add an archive path to the current set of classpath entries. It adds the
 * archive to cache as well.  Intended to be used by user code.
 *
 * @param archive Path of the archive to be added
 * @param conf Configuration that contains the classpath setting
 * @param fs FileSystem with respect to which {@code archive} should be interpreted.
 */
public static void addArchiveToClassPath
       (Path archive, Configuration conf, FileSystem fs)
    throws IOException {
  String classpath = conf.get(MRJobConfig.CLASSPATH_ARCHIVES);
  conf.set(MRJobConfig.CLASSPATH_ARCHIVES, classpath == null ? archive
           .toString() : classpath + "," + archive.toString());
  URI uri = fs.makeQualified(archive).toUri();

  addCacheArchive(uri, conf);
}

Source File: DistributedCache.java From big-c with Apache License 2.0

5 votes

/**
 * Add an archive path to the current set of classpath entries. It adds the
 * archive to cache as well.  Intended to be used by user code.
 *
 * @param archive Path of the archive to be added
 * @param conf Configuration that contains the classpath setting
 * @param fs FileSystem with respect to which {@code archive} should be interpreted.
 */
public static void addArchiveToClassPath
       (Path archive, Configuration conf, FileSystem fs)
    throws IOException {
  String classpath = conf.get(MRJobConfig.CLASSPATH_ARCHIVES);
  conf.set(MRJobConfig.CLASSPATH_ARCHIVES, classpath == null ? archive
           .toString() : classpath + "," + archive.toString());
  URI uri = fs.makeQualified(archive).toUri();

  addCacheArchive(uri, conf);
}

Source File: TestDistCacheEmulation.java From big-c with Apache License 2.0

5 votes

/**
 * Configures 5 HDFS-based dist cache files and 1 local-FS-based dist cache
 * file in the given Configuration object <code>conf</code>.
 * 
 * @param conf
 *          configuration where dist cache config properties are to be set
 * @return array of sorted HDFS-based distributed cache file sizes
 * @throws IOException
 */
private long[] configureDummyDistCacheFiles(Configuration conf)
    throws IOException {
  String user = UserGroupInformation.getCurrentUser().getShortUserName();
  conf.set("user.name", user);
  
  // Set some dummy dist cache files in gridmix configuration so that they go
  // into the configuration of JobStory objects.
  String[] distCacheFiles = { "hdfs:///tmp/file1.txt",
      "/tmp/" + user + "/.staging/job_1/file2.txt",
      "hdfs:///user/user1/file3.txt", "/home/user2/file4.txt",
      "subdir1/file5.txt", "subdir2/file6.gz" };

  String[] fileSizes = { "400", "2500", "700", "1200", "1500", "500" };

  String[] visibilities = { "true", "false", "false", "true", "true", "false" };
  String[] timeStamps = { "1234", "2345", "34567", "5434", "125", "134" };

  // DistributedCache.setCacheFiles(fileCaches, conf);
  conf.setStrings(MRJobConfig.CACHE_FILES, distCacheFiles);
  conf.setStrings(MRJobConfig.CACHE_FILES_SIZES, fileSizes);
  conf.setStrings(JobContext.CACHE_FILE_VISIBILITIES, visibilities);
  conf.setStrings(MRJobConfig.CACHE_FILE_TIMESTAMPS, timeStamps);

  // local FS based dist cache file whose path contains <user>/.staging is
  // not created on HDFS. So file size 2500 is not added to sortedFileSizes.
  long[] sortedFileSizes = new long[] { 1500, 1200, 700, 500, 400 };
  return sortedFileSizes;
}

Source File: MROutputFiles.java From big-c with Apache License 2.0

5 votes

/**
 * Create a local reduce input file name.
 *
 * @param mapId a map task id
 * @param size the size of the file
 * @return path
 * @throws IOException
 */
@Override
public Path getInputFileForWrite(org.apache.hadoop.mapreduce.TaskID mapId,
                                 long size)
    throws IOException {
  return lDirAlloc.getLocalPathForWrite(String.format(
      REDUCE_INPUT_FILE_FORMAT_STRING, MRJobConfig.OUTPUT, mapId.getId()),
      size, getConf());
}

Source File: TestFileOutputCommitter.java From big-c with Apache License 2.0

5 votes

private void testMapFileOutputCommitterInternal(int version)
    throws Exception {
  Job job = Job.getInstance();
  FileOutputFormat.setOutputPath(job, outDir);
  Configuration conf = job.getConfiguration();
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
  conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION,
      version);
  JobContext jContext = new JobContextImpl(conf, taskID.getJobID());    
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
  FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

  // setup
  committer.setupJob(jContext);
  committer.setupTask(tContext);

  // write output
  MapFileOutputFormat theOutputFormat = new MapFileOutputFormat();
  RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
  writeMapFileOutput(theRecordWriter, tContext);

  // do commit
  committer.commitTask(tContext);
  committer.commitJob(jContext);

  // validate output
  validateMapFileOutputContent(FileSystem.get(job.getConfiguration()), outDir);
  FileUtil.fullyDelete(new File(outDir.toString()));
}

Source File: TestSpeculativeExecution.java From hadoop with Apache License 2.0

5 votes

private Job runSpecTest(boolean mapspec, boolean redspec)
    throws IOException, ClassNotFoundException, InterruptedException {

  Path first = createTempFile("specexec_map_input1", "a\nz");
  Path secnd = createTempFile("specexec_map_input2", "a\nz");

  Configuration conf = mrCluster.getConfig();
  conf.setBoolean(MRJobConfig.MAP_SPECULATIVE,mapspec);
  conf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE,redspec);
  conf.setClass(MRJobConfig.MR_AM_TASK_ESTIMATOR,
          TestSpecEstimator.class,
          TaskRuntimeEstimator.class);

  Job job = Job.getInstance(conf);
  job.setJarByClass(TestSpeculativeExecution.class);
  job.setMapperClass(SpeculativeMapper.class);
  job.setReducerClass(SpeculativeReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  job.setNumReduceTasks(2);
  FileInputFormat.setInputPaths(job, first);
  FileInputFormat.addInputPath(job, secnd);
  FileOutputFormat.setOutputPath(job, TEST_OUT_DIR);

  // Delete output directory if it exists.
  try {
    localFs.delete(TEST_OUT_DIR,true);
  } catch (IOException e) {
    // ignore
  }

  // Creates the Job Configuration
  job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
  job.setMaxMapAttempts(2);

  job.submit();

  return job;
}

Source File: DistributedCache.java From hadoop with Apache License 2.0

5 votes

/**
 * Get the file entries in classpath as an array of Path.
 * Used by internal DistributedCache code.
 * 
 * @param conf Configuration that contains the classpath setting
 * @deprecated Use {@link JobContext#getFileClassPaths()} instead 
 */
@Deprecated
public static Path[] getFileClassPaths(Configuration conf) {
  ArrayList<String> list = (ArrayList<String>)conf.getStringCollection(
                              MRJobConfig.CLASSPATH_FILES);
  if (list.size() == 0) { 
    return null; 
  }
  Path[] paths = new Path[list.size()];
  for (int i = 0; i < list.size(); i++) {
    paths[i] = new Path(list.get(i));
  }
  return paths;
}

Source File: TestMRApps.java From big-c with Apache License 2.0

5 votes

@SuppressWarnings("deprecation")
public void testSetupDistributedCacheConflictsFiles() throws Exception {
  Configuration conf = new Configuration();
  conf.setClass("fs.mockfs.impl", MockFileSystem.class, FileSystem.class);
  
  URI mockUri = URI.create("mockfs://mock/");
  FileSystem mockFs = ((FilterFileSystem)FileSystem.get(mockUri, conf))
      .getRawFileSystem();
  
  URI file = new URI("mockfs://mock/tmp/something.zip#something");
  Path filePath = new Path(file);
  URI file2 = new URI("mockfs://mock/tmp/something.txt#something");
  Path file2Path = new Path(file2);
  
  when(mockFs.resolvePath(filePath)).thenReturn(filePath);
  when(mockFs.resolvePath(file2Path)).thenReturn(file2Path);
  
  DistributedCache.addCacheFile(file, conf);
  DistributedCache.addCacheFile(file2, conf);
  conf.set(MRJobConfig.CACHE_FILE_TIMESTAMPS, "10,11");
  conf.set(MRJobConfig.CACHE_FILES_SIZES, "10,11");
  conf.set(MRJobConfig.CACHE_FILE_VISIBILITIES, "true,true");
  Map<String, LocalResource> localResources = 
    new HashMap<String, LocalResource>();
  MRApps.setupDistributedCache(conf, localResources);
  
  assertEquals(1, localResources.size());
  LocalResource lr = localResources.get("something");
  //First one wins
  assertNotNull(lr);
  assertEquals(10l, lr.getSize());
  assertEquals(10l, lr.getTimestamp());
  assertEquals(LocalResourceType.FILE, lr.getType());
}

Source File: TestAMWebServicesJobConf.java From big-c with Apache License 2.0

5 votes

@Override
protected void configureServlets() {

  Path confPath = new Path(testConfDir.toString(),
      MRJobConfig.JOB_CONF_FILE);
  Configuration config = new Configuration();

  FileSystem localFs;
  try {
    localFs = FileSystem.getLocal(config);
    confPath = localFs.makeQualified(confPath);

    OutputStream out = localFs.create(confPath);
    try {
      conf.writeXml(out);
    } finally {
      out.close();
    }
    if (!localFs.exists(confPath)) {
      fail("error creating config file: " + confPath);
    }

  } catch (IOException e) {
    fail("error creating config file: " + e.getMessage());
  }

  appContext = new MockAppContext(0, 2, 1, confPath);

  bind(JAXBContextResolver.class);
  bind(AMWebServices.class);
  bind(GenericExceptionHandler.class);
  bind(AppContext.class).toInstance(appContext);
  bind(Configuration.class).toInstance(conf);

  serve("/*").with(GuiceContainer.class);
}

Source File: TestStreamAggregate.java From hadoop with Apache License 2.0

5 votes

protected String[] genArgs() {
  return new String[] {
    "-input", INPUT_FILE.getAbsolutePath(),
    "-output", OUTPUT_DIR.getAbsolutePath(),
    "-mapper", map,
    "-reducer", "aggregate",
    "-jobconf", MRJobConfig.PRESERVE_FAILED_TASK_FILES + "=true",
    "-jobconf", "stream.tmpdir="+System.getProperty("test.build.data","/tmp")
  };
}

Source File: TestTaskHeartbeatHandler.java From hadoop with Apache License 2.0

5 votes

@SuppressWarnings({ "rawtypes", "unchecked" })
@Test
public void testTimeout() throws InterruptedException {
  EventHandler mockHandler = mock(EventHandler.class);
  Clock clock = new SystemClock();
  TaskHeartbeatHandler hb = new TaskHeartbeatHandler(mockHandler, clock, 1);
  
  
  Configuration conf = new Configuration();
  conf.setInt(MRJobConfig.TASK_TIMEOUT, 10); //10 ms
  conf.setInt(MRJobConfig.TASK_TIMEOUT_CHECK_INTERVAL_MS, 10); //10 ms
  
  hb.init(conf);
  hb.start();
  try {
    ApplicationId appId = ApplicationId.newInstance(0l, 5);
    JobId jobId = MRBuilderUtils.newJobId(appId, 4);
    TaskId tid = MRBuilderUtils.newTaskId(jobId, 3, TaskType.MAP);
    TaskAttemptId taid = MRBuilderUtils.newTaskAttemptId(tid, 2);
    hb.register(taid);
    Thread.sleep(100);
    //Events only happen when the task is canceled
    verify(mockHandler, times(2)).handle(any(Event.class));
  } finally {
    hb.stop();
  }
}

Source File: TestFileOutputCommitter.java From big-c with Apache License 2.0

5 votes

public void testInvalidVersionNumber() throws IOException {
  Job job = Job.getInstance();
  FileOutputFormat.setOutputPath(job, outDir);
  Configuration conf = job.getConfiguration();
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
  conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 3);
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
  try {
    new FileOutputCommitter(outDir, tContext);
    fail("should've thrown an exception!");
  } catch (IOException e) {
    //test passed
  }
}

Source File: TestJobImpl.java From hadoop with Apache License 2.0

5 votes

@Test(timeout=20000)
public void testKilledDuringKillAbort() throws Exception {
  Configuration conf = new Configuration();
  conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
  AsyncDispatcher dispatcher = new AsyncDispatcher();
  dispatcher.init(conf);
  dispatcher.start();
  OutputCommitter committer = new StubbedOutputCommitter() {
    @Override
    public synchronized void abortJob(JobContext jobContext, State state)
        throws IOException {
      while (!Thread.interrupted()) {
        try {
          wait();
        } catch (InterruptedException e) {
        }
      }
    }
  };
  CommitterEventHandler commitHandler =
      createCommitterEventHandler(dispatcher, committer);
  commitHandler.init(conf);
  commitHandler.start();

  JobImpl job = createStubbedJob(conf, dispatcher, 2, null);
  JobId jobId = job.getID();
  job.handle(new JobEvent(jobId, JobEventType.JOB_INIT));
  assertJobState(job, JobStateInternal.INITED);
  job.handle(new JobStartEvent(jobId));
  assertJobState(job, JobStateInternal.SETUP);

  job.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
  assertJobState(job, JobStateInternal.KILL_ABORT);

  job.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
  assertJobState(job, JobStateInternal.KILLED);
  dispatcher.stop();
  commitHandler.stop();
}

Source File: TaskAttemptImpl.java From hadoop with Apache License 2.0

5 votes

private WrappedProgressSplitsBlock getProgressSplitBlock() {
  readLock.lock();
  try {
    if (progressSplitBlock == null) {
      progressSplitBlock = new WrappedProgressSplitsBlock(conf.getInt(
          MRJobConfig.MR_AM_NUM_PROGRESS_SPLITS,
          MRJobConfig.DEFAULT_MR_AM_NUM_PROGRESS_SPLITS));
    }
    return progressSplitBlock;
  } finally {
    readLock.unlock();
  }
}

org.apache.hadoop.mapreduce.MRJobConfig Java Examples