Java Code Examples for org.apache.hadoop.mapred.JobConf#setInt()

The following examples show how to use org.apache.hadoop.mapred.JobConf#setInt() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestDatamerge.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private static void joinAs(String jointype,
    Class<? extends SimpleCheckerBase> c) throws Exception {
  final int srcs = 4;
  Configuration conf = new Configuration();
  JobConf job = new JobConf(conf, c);
  Path base = cluster.getFileSystem().makeQualified(new Path("/"+jointype));
  Path[] src = writeSimpleSrc(base, conf, srcs);
  job.set("mapreduce.join.expr", CompositeInputFormat.compose(jointype,
      SequenceFileInputFormat.class, src));
  job.setInt("testdatamerge.sources", srcs);
  job.setInputFormat(CompositeInputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(base, "out"));

  job.setMapperClass(c);
  job.setReducerClass(c);
  job.setOutputKeyClass(IntWritable.class);
  job.setOutputValueClass(IntWritable.class);
  JobClient.runJob(job);
  base.getFileSystem(job).delete(base, true);
}
 
Example 2
Source File: TestMiniCoronaRunJob.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public void testMemoryLimit() throws Exception {
  LOG.info("Starting testMemoryLimit");
  JobConf conf = new JobConf();
  conf.setInt(CoronaConf.NODE_RESERVED_MEMORY_MB, Integer.MAX_VALUE);
  corona = new MiniCoronaCluster.Builder().conf(conf).numTaskTrackers(2).build();
  final JobConf jobConf = corona.createJobConf();
  long start = System.currentTimeMillis();
  FutureTask<Boolean> task = submitSleepJobFutureTask(jobConf);
  checkTaskNotDone(task, 10);
  NodeManager nm =  corona.getClusterManager().getNodeManager();
  nm.getResourceLimit().setNodeReservedMemoryMB(0);
  Assert.assertTrue(task.get());
  long end = System.currentTimeMillis();
  LOG.info("Task Done. Verifying");
  new ClusterManagerMetricsVerifier(corona.getClusterManager(),
      1, 1, 1, 1, 1, 1, 0, 0).verifyAll();
  LOG.info("Time spent for testMemoryLimit:" +
      (end - start));
}
 
Example 3
Source File: MRTask.java    From incubator-tez with Apache License 2.0 6 votes vote down vote up
public void localizeConfiguration(JobConf jobConf)
    throws IOException, InterruptedException {
  jobConf.set(JobContext.TASK_ID, taskAttemptId.getTaskID().toString());
  jobConf.set(JobContext.TASK_ATTEMPT_ID, taskAttemptId.toString());
  jobConf.setInt(JobContext.TASK_PARTITION,
      taskAttemptId.getTaskID().getId());
  jobConf.set(JobContext.ID, taskAttemptId.getJobID().toString());
  
  jobConf.setBoolean(MRJobConfig.TASK_ISMAP, isMap);
  
  Path outputPath = FileOutputFormat.getOutputPath(jobConf);
  if (outputPath != null) {
    if ((committer instanceof FileOutputCommitter)) {
      FileOutputFormat.setWorkOutputPath(jobConf, 
        ((FileOutputCommitter)committer).getTaskAttemptPath(taskAttemptContext));
    } else {
      FileOutputFormat.setWorkOutputPath(jobConf, outputPath);
    }
  }
}
 
Example 4
Source File: TeraSort.java    From hadoop-book with Apache License 2.0 6 votes vote down vote up
public int run(String[] args) throws Exception {
  LOG.info("starting");
  JobConf job = (JobConf) getConf();
  Path inputDir = new Path(args[0]);
  inputDir = inputDir.makeQualified(inputDir.getFileSystem(job));
  Path partitionFile = new Path(inputDir, TeraInputFormat.PARTITION_FILENAME);
  URI partitionUri = new URI(partitionFile.toString() +
                             "#" + TeraInputFormat.PARTITION_FILENAME);
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraSort");
  job.setJarByClass(TeraSort.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setInputFormat(TeraInputFormat.class);
  job.setOutputFormat(TeraOutputFormat.class);
  job.setPartitionerClass(TotalOrderPartitioner.class);
  TeraInputFormat.writePartitionFile(job, partitionFile);
  DistributedCache.addCacheFile(partitionUri, job);
  DistributedCache.createSymlink(job);
  job.setInt("dfs.replication", 1);
  TeraOutputFormat.setFinalSync(job, true);
  JobClient.runJob(job);
  LOG.info("done");
  return 0;
}
 
Example 5
Source File: MRTask.java    From tez with Apache License 2.0 6 votes vote down vote up
public void localizeConfiguration(JobConf jobConf)
    throws IOException, InterruptedException {
  jobConf.set(JobContext.TASK_ID, taskAttemptId.getTaskID().toString());
  jobConf.set(JobContext.TASK_ATTEMPT_ID, taskAttemptId.toString());
  jobConf.setInt(JobContext.TASK_PARTITION,
      taskAttemptId.getTaskID().getId());
  jobConf.set(JobContext.ID, taskAttemptId.getJobID().toString());
  
  jobConf.setBoolean(MRJobConfig.TASK_ISMAP, isMap);
  
  Path outputPath = FileOutputFormat.getOutputPath(jobConf);
  if (outputPath != null) {
    if ((committer instanceof FileOutputCommitter)) {
      FileOutputFormat.setWorkOutputPath(jobConf, 
        ((FileOutputCommitter)committer).getTaskAttemptPath(taskAttemptContext));
    } else {
      FileOutputFormat.setWorkOutputPath(jobConf, outputPath);
    }
  }
}
 
Example 6
Source File: TestDeprecatedKeys.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
@Test
public void verifyReduceKeyTranslation() {
  JobConf jobConf = new JobConf();

  jobConf.setFloat(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT, 0.4f);
  jobConf.setLong(MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES, 20000l);
  jobConf.setInt(MRJobConfig.IO_SORT_FACTOR, 2000);
  jobConf.setFloat(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT, 0.55f);
  jobConf.setFloat(MRJobConfig.REDUCE_MEMTOMEM_THRESHOLD, 0.60f);
  jobConf.setFloat(MRJobConfig.SHUFFLE_MERGE_PERCENT, 0.22f);
  jobConf.setBoolean(MRJobConfig.REDUCE_MEMTOMEM_ENABLED, true);
  jobConf.setFloat(MRJobConfig.REDUCE_INPUT_BUFFER_PERCENT, 0.33f);

  MRHelpers.translateVertexConfToTez(jobConf);

  assertEquals(0.4f, jobConf.getFloat(
      TezJobConfig.TEZ_RUNTIME_SHUFFLE_INPUT_BUFFER_PERCENT, 0f), 0.01f);
  assertEquals(20000l, jobConf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY, 0));
  assertEquals(2000,
      jobConf.getInt(TezJobConfig.TEZ_RUNTIME_IO_SORT_FACTOR, 0));
  assertEquals(0.55f, jobConf.getFloat(
      TezJobConfig.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, 0), 0.01f);
  assertEquals(0.60f,
      jobConf.getFloat(TezJobConfig.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 0),
      0.01f);
  assertEquals(0.22f,
      jobConf.getFloat(TezJobConfig.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT, 0),
      0.01f);
  assertEquals(true, jobConf.getBoolean(
      TezJobConfig.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, false));
  assertEquals(0.33f,
      jobConf.getFloat(TezJobConfig.TEZ_RUNTIME_INPUT_BUFFER_PERCENT, 0),
      0.01f);
}
 
Example 7
Source File: AbstractMROldApiSaveTest.java    From elasticsearch-hadoop with Apache License 2.0 5 votes vote down vote up
@Parameters
public static Collection<Object[]> configs() throws Exception {
    JobConf conf = HdpBootstrap.hadoopConfig();

    conf.setInputFormat(SplittableTextInputFormat.class);
    conf.setOutputFormat(EsOutputFormat.class);
    conf.setReducerClass(IdentityReducer.class);
    HadoopCfgUtils.setGenericOptions(conf);
    conf.setNumMapTasks(2);
    conf.setInt("actual.splits", 2);
    conf.setNumReduceTasks(0);


    JobConf standard = new JobConf(conf);
    standard.setMapperClass(TabMapper.class);
    standard.setMapOutputValueClass(LinkedMapWritable.class);
    standard.set(ConfigurationOptions.ES_INPUT_JSON, "false");
    FileInputFormat.setInputPaths(standard, new Path(MRSuite.testData.sampleArtistsDat(conf)));

    JobConf json = new JobConf(conf);
    json.setMapperClass(IdentityMapper.class);
    json.setMapOutputValueClass(Text.class);
    json.set(ConfigurationOptions.ES_INPUT_JSON, "true");
    FileInputFormat.setInputPaths(json, new Path(MRSuite.testData.sampleArtistsJson(conf)));

    return Arrays.asList(new Object[][] {
            { standard, "" },
            { json, "json-" }
    });
}
 
Example 8
Source File: ValueAggregatorJob.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public static void setAggregatorDescriptors(JobConf job
    , Class<? extends ValueAggregatorDescriptor>[] descriptors) {
  job.setInt("aggregator.descriptor.num", descriptors.length);
  //specify the aggregator descriptors
  for(int i=0; i< descriptors.length; i++) {
    job.set("aggregator.descriptor." + i, "UserDefined," + descriptors[i].getName());
  }    
}
 
Example 9
Source File: TeraSort.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public int run(String[] args) throws Exception {
  LOG.info("starting");
  JobConf job = (JobConf) getConf();
  Path inputDir = new Path(args[0]);
  inputDir = inputDir.makeQualified(inputDir.getFileSystem(job));
  Path partitionFile = new Path(inputDir, TeraInputFormat.PARTITION_FILENAME);
  URI partitionUri = new URI(partitionFile.toString() +
                             "#" + TeraInputFormat.PARTITION_FILENAME);
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraSort");
  job.setJarByClass(TeraSort.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setInputFormat(TeraInputFormat.class);
  job.setOutputFormat(TeraOutputFormat.class);
  job.setPartitionerClass(TotalOrderPartitioner.class);
  TeraInputFormat.writePartitionFile(job, partitionFile);
  DistributedCache.addCacheFile(partitionUri, job);
  DistributedCache.createSymlink(job);
  job.setInt("dfs.replication", 1);
  TeraOutputFormat.setFinalSync(job, true);
  long startTime = System.currentTimeMillis();
  JobClient.runJob(job);
  long endTime = System.currentTimeMillis();
  System.out.println((float)(endTime-startTime)/1000);
  LOG.info("done");
  return 0;
}
 
Example 10
Source File: TestMRHelpers.java    From tez with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 5000)
public void testContainerResourceConstruction() {
  JobConf conf = new JobConf(new Configuration());
  Resource mapResource = MRHelpers.getResourceForMRMapper(conf);
  Resource reduceResource = MRHelpers.getResourceForMRReducer(conf);

  Assert.assertEquals(MRJobConfig.DEFAULT_MAP_CPU_VCORES,
      mapResource.getVirtualCores());
  Assert.assertEquals(MRJobConfig.DEFAULT_MAP_MEMORY_MB,
      mapResource.getMemory());
  Assert.assertEquals(MRJobConfig.DEFAULT_REDUCE_CPU_VCORES,
      reduceResource.getVirtualCores());
  Assert.assertEquals(MRJobConfig.DEFAULT_REDUCE_MEMORY_MB,
      reduceResource.getMemory());

  conf.setInt(MRJobConfig.MAP_CPU_VCORES, 2);
  conf.setInt(MRJobConfig.MAP_MEMORY_MB, 123);
  conf.setInt(MRJobConfig.REDUCE_CPU_VCORES, 20);
  conf.setInt(MRJobConfig.REDUCE_MEMORY_MB, 1234);

  mapResource = MRHelpers.getResourceForMRMapper(conf);
  reduceResource = MRHelpers.getResourceForMRReducer(conf);

  Assert.assertEquals(2, mapResource.getVirtualCores());
  Assert.assertEquals(123, mapResource.getMemory());
  Assert.assertEquals(20, reduceResource.getVirtualCores());
  Assert.assertEquals(1234, reduceResource.getMemory());
}
 
Example 11
Source File: TestEncryptedShuffle.java    From big-c with Apache License 2.0 5 votes vote down vote up
private void encryptedShuffleWithCerts(boolean useClientCerts)
  throws Exception {
  try {
    Configuration conf = new Configuration();
    String keystoresDir = new File(BASEDIR).getAbsolutePath();
    String sslConfsDir =
      KeyStoreTestUtil.getClasspathDir(TestEncryptedShuffle.class);
    KeyStoreTestUtil.setupSSLConfig(keystoresDir, sslConfsDir, conf,
                                    useClientCerts);
    conf.setBoolean(MRConfig.SHUFFLE_SSL_ENABLED_KEY, true);
    startCluster(conf);
    FileSystem fs = FileSystem.get(getJobConf());
    Path inputDir = new Path("input");
    fs.mkdirs(inputDir);
    Writer writer =
      new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt")));
    writer.write("hello");
    writer.close();

    Path outputDir = new Path("output", "output");

    JobConf jobConf = new JobConf(getJobConf());
    jobConf.setInt("mapred.map.tasks", 1);
    jobConf.setInt("mapred.map.max.attempts", 1);
    jobConf.setInt("mapred.reduce.max.attempts", 1);
    jobConf.set("mapred.input.dir", inputDir.toString());
    jobConf.set("mapred.output.dir", outputDir.toString());
    JobClient jobClient = new JobClient(jobConf);
    RunningJob runJob = jobClient.submitJob(jobConf);
    runJob.waitForCompletion();
    Assert.assertTrue(runJob.isComplete());
    Assert.assertTrue(runJob.isSuccessful());
  } finally {
    stopCluster();
  }
}
 
Example 12
Source File: TestEncryptedShuffle.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private void encryptedShuffleWithCerts(boolean useClientCerts)
  throws Exception {
  try {
    Configuration conf = new Configuration();
    String keystoresDir = new File(BASEDIR).getAbsolutePath();
    String sslConfsDir =
      KeyStoreTestUtil.getClasspathDir(TestEncryptedShuffle.class);
    KeyStoreTestUtil.setupSSLConfig(keystoresDir, sslConfsDir, conf,
                                    useClientCerts);
    conf.setBoolean(MRConfig.SHUFFLE_SSL_ENABLED_KEY, true);
    startCluster(conf);
    FileSystem fs = FileSystem.get(getJobConf());
    Path inputDir = new Path("input");
    fs.mkdirs(inputDir);
    Writer writer =
      new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt")));
    writer.write("hello");
    writer.close();

    Path outputDir = new Path("output", "output");

    JobConf jobConf = new JobConf(getJobConf());
    jobConf.setInt("mapred.map.tasks", 1);
    jobConf.setInt("mapred.map.max.attempts", 1);
    jobConf.setInt("mapred.reduce.max.attempts", 1);
    jobConf.set("mapred.input.dir", inputDir.toString());
    jobConf.set("mapred.output.dir", outputDir.toString());
    JobClient jobClient = new JobClient(jobConf);
    RunningJob runJob = jobClient.submitJob(jobConf);
    runJob.waitForCompletion();
    Assert.assertTrue(runJob.isComplete());
    Assert.assertTrue(runJob.isSuccessful());
  } finally {
    stopCluster();
  }
}
 
Example 13
Source File: TestKeyFieldBasedPartitioner.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Test is key-field-based partitioned works with empty key.
 */
@Test
public void testEmptyKey() throws Exception {
  KeyFieldBasedPartitioner<Text, Text> kfbp = 
    new KeyFieldBasedPartitioner<Text, Text>();
  JobConf conf = new JobConf();
  conf.setInt("num.key.fields.for.partition", 10);
  kfbp.configure(conf);
  assertEquals("Empty key should map to 0th partition", 
               0, kfbp.getPartition(new Text(), new Text(), 10));
}
 
Example 14
Source File: MneMapredBufferDataTest.java    From mnemonic with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public void setUp() throws IOException {
  m_workdir = new Path(
      System.getProperty("test.tmp.dir", DEFAULT_WORK_DIR));
  m_conf = new JobConf();
  m_rand = Utils.createRandom();
  m_partfns = new ArrayList<String>();

  try {
    m_fs = FileSystem.getLocal(m_conf).getRaw();
    m_fs.delete(m_workdir, true);
    m_fs.mkdirs(m_workdir);
  } catch (IOException e) {
    throw new IllegalStateException("bad fs init", e);
  }

  m_conf.setInt(JobContext.TASK_PARTITION, TASK_PARTITION);
  
  MneConfigHelper.setDir(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, m_workdir.toString());
  MneConfigHelper.setBaseOutputName(m_conf, null, "mapred-buffer-data");

  MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SERVICE_NAME);
  MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SLOT_KEY_ID);
  MneConfigHelper.setDurableTypes(m_conf,
      MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new DurableType[] {DurableType.BUFFER});
  MneConfigHelper.setEntityFactoryProxies(m_conf,
      MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new Class<?>[] {});
  MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SERVICE_NAME);
  MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SLOT_KEY_ID);
  MneConfigHelper.setMemPoolSize(m_conf,
      MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, 1024L * 1024 * 1024 * 4);
  MneConfigHelper.setDurableTypes(m_conf,
      MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new DurableType[] {DurableType.BUFFER});
  MneConfigHelper.setEntityFactoryProxies(m_conf,
      MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new Class<?>[] {});
}
 
Example 15
Source File: ValueAggregatorJob.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public static void setAggregatorDescriptors(JobConf job
    , Class<? extends ValueAggregatorDescriptor>[] descriptors) {
  job.setInt("aggregator.descriptor.num", descriptors.length);
  //specify the aggregator descriptors
  for(int i=0; i< descriptors.length; i++) {
    job.set("aggregator.descriptor." + i, "UserDefined," + descriptors[i].getName());
  }    
}
 
Example 16
Source File: TestMapProcessor.java    From tez with Apache License 2.0 4 votes vote down vote up
@Test(timeout = 30000)
public void testMapProcessorProgress() throws Exception {
  String dagName = "mrdag0";
  String vertexName = MultiStageMRConfigUtil.getInitialMapVertexName();
  JobConf jobConf = new JobConf(defaultConf);
  setUpJobConf(jobConf);

  MRHelpers.translateMRConfToTez(jobConf);
  jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);

  jobConf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false);

  jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, new Path(workDir,
      "localized-resources").toUri().toString());

  Path mapInput = new Path(workDir, "map0");


  MapUtils.generateInputSplit(localFs, workDir, jobConf, mapInput, 100000);

  InputSpec mapInputSpec = new InputSpec("NullSrcVertex",
      InputDescriptor.create(MRInputLegacy.class.getName())
          .setUserPayload(UserPayload.create(ByteBuffer.wrap(
              MRRuntimeProtos.MRInputUserPayloadProto.newBuilder()
                  .setConfigurationBytes(TezUtils.createByteStringFromConf
                      (jobConf)).build()
                  .toByteArray()))),
      1);
  OutputSpec mapOutputSpec = new OutputSpec("NullDestVertex",
      OutputDescriptor.create(OrderedPartitionedKVOutput.class.getName())
          .setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)), 1);

  TezSharedExecutor sharedExecutor = new TezSharedExecutor(jobConf);
  final LogicalIOProcessorRuntimeTask task = MapUtils.createLogicalTask
      (localFs, workDir, jobConf, 0,
          new Path(workDir, "map0"), new TestUmbilical(), dagName, vertexName,
          Collections.singletonList(mapInputSpec),
          Collections.singletonList(mapOutputSpec), sharedExecutor);

  ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1);
  Thread monitorProgress = new Thread(new Runnable() {
    @Override
    public void run() {
      float prog = task.getProgress();
      if(prog > 0.0f && prog < 1.0f)
        progressUpdate = prog;
    }
  });

  task.initialize();
  scheduler.scheduleAtFixedRate(monitorProgress, 0, 1,
      TimeUnit.MILLISECONDS);
  task.run();
  Assert.assertTrue("Progress Updates should be captured!",
      progressUpdate > 0.0f && progressUpdate < 1.0f);
  task.close();
  sharedExecutor.shutdownNow();
}
 
Example 17
Source File: TestReduceTaskFetchFail.java    From RDFS with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("deprecation")
@Test
public void testcheckAndInformJobTracker() throws Exception {
  //mock creation
  TaskUmbilicalProtocol mockUmbilical = mock(TaskUmbilicalProtocol.class);
  TaskReporter mockTaskReporter = mock(TaskReporter.class);

  JobConf conf = new JobConf();
  conf.setUser("testuser");
  conf.setJobName("testJob");
  conf.setSessionId("testSession");

  TaskAttemptID tid =  new TaskAttemptID();
  TestReduceTask rTask = new TestReduceTask();
  rTask.setConf(conf);

  ReduceTask.ReduceCopier reduceCopier = rTask.new TestReduceCopier(mockUmbilical, conf, mockTaskReporter);
  reduceCopier.checkAndInformJobTracker(1, tid, false);

  verify(mockTaskReporter, never()).progress();

  reduceCopier.checkAndInformJobTracker(10, tid, false);
  verify(mockTaskReporter, times(1)).progress();

  // Test the config setting
  conf.setInt("mapreduce.reduce.shuffle.maxfetchfailures", 3);

  rTask.setConf(conf);
  reduceCopier = rTask.new TestReduceCopier(mockUmbilical, conf, mockTaskReporter);

  reduceCopier.checkAndInformJobTracker(1, tid, false);
  verify(mockTaskReporter, times(1)).progress();

  reduceCopier.checkAndInformJobTracker(3, tid, false);
  verify(mockTaskReporter, times(2)).progress();

  reduceCopier.checkAndInformJobTracker(5, tid, false);
  verify(mockTaskReporter, times(2)).progress();

  reduceCopier.checkAndInformJobTracker(6, tid, false);
  verify(mockTaskReporter, times(3)).progress();

  // test readError and its config
  reduceCopier.checkAndInformJobTracker(7, tid, true);
  verify(mockTaskReporter, times(4)).progress();

  conf.setBoolean("mapreduce.reduce.shuffle.notify.readerror", false);

  rTask.setConf(conf);
  reduceCopier = rTask.new TestReduceCopier(mockUmbilical, conf, mockTaskReporter);

  reduceCopier.checkAndInformJobTracker(7, tid, true);
  verify(mockTaskReporter, times(4)).progress();

}
 
Example 18
Source File: DataFsck.java    From RDFS with Apache License 2.0 4 votes vote down vote up
List<JobContext> submitJobs(BufferedReader inputReader, int filesPerJob) throws IOException {
  boolean done = false;
  JobClient jClient = new JobClient(createJobConf());
  List<JobContext> submitted = new ArrayList<JobContext>();
  Random rand = new Random();
  do {
    JobConf jobConf = createJobConf();
    final String randomId = Integer.toString(rand.nextInt(Integer.MAX_VALUE), 36);
    Path jobDir = new Path(jClient.getSystemDir(), NAME + "_" + randomId);
    jobConf.set(JOB_DIR_LABEL, jobDir.toString());
    Path log = new Path(jobDir, "_logs");
    FileOutputFormat.setOutputPath(jobConf, log);
    LOG.info("log=" + log);

    // create operation list
    FileSystem fs = jobDir.getFileSystem(jobConf);
    Path opList = new Path(jobDir, "_" + OP_LIST_LABEL);
    jobConf.set(OP_LIST_LABEL, opList.toString());
    int opCount = 0, synCount = 0;
    SequenceFile.Writer opWriter = null;

    try {
      opWriter = SequenceFile.createWriter(fs, jobConf, opList, Text.class,
          Text.class, SequenceFile.CompressionType.NONE);
      String f = null;
      do {
        f = inputReader.readLine();
        if (f == null) {
          done = true;
          break;
        }
        opWriter.append(new Text(f), new Text(f));
        opCount++;
        if (++synCount > SYNC_FILE_MAX) {
          opWriter.sync();
          synCount = 0;
        }
      } while (opCount < filesPerJob);
    } finally {
      if (opWriter != null) {
        opWriter.close();
      }
      fs.setReplication(opList, OP_LIST_REPLICATION); // increase replication for control file
    }

    jobConf.setInt(OP_COUNT_LABEL, opCount);
    RunningJob rJob = jClient.submitJob(jobConf);
    JobContext ctx = new JobContext(rJob, jobConf);
    submitted.add(ctx);
  } while (!done);

  return submitted;
}
 
Example 19
Source File: TableMapReduceUtil.java    From hbase with Apache License 2.0 2 votes vote down vote up
/**
 * Sets the number of rows to return and cache with each scanner iteration.
 * Higher caching values will enable faster mapreduce jobs at the expense of
 * requiring more heap to contain the cached rows.
 *
 * @param job The current job configuration to adjust.
 * @param batchSize The number of rows to return in batch with each scanner
 * iteration.
 */
public static void setScannerCaching(JobConf job, int batchSize) {
  job.setInt("hbase.client.scanner.caching", batchSize);
}
 
Example 20
Source File: HadoopTeraSortTest.java    From ignite with Apache License 2.0 2 votes vote down vote up
/**
 * Does actual test TeraSort job Through Ignite API
 *
 * @param gzip Whether to use GZIP.
 */
protected final void teraSort(boolean gzip) throws Exception {
    System.out.println("TeraSort ===============================================================");

    getFileSystem().delete(new Path(sortOutDir), true);

    final JobConf jobConf = new JobConf();

    jobConf.setUser(getUser());

    jobConf.set("fs.defaultFS", getFsBase());

    log().info("Desired number of reduces: " + numReduces());

    jobConf.set("mapreduce.job.reduces", String.valueOf(numReduces()));

    log().info("Desired number of maps: " + numMaps());

    final long splitSize = dataSizeBytes() / numMaps();

    log().info("Desired split size: " + splitSize);

    // Force the split to be of the desired size:
    jobConf.set("mapred.min.split.size", String.valueOf(splitSize));
    jobConf.set("mapred.max.split.size", String.valueOf(splitSize));

    jobConf.setBoolean(HadoopJobProperty.SHUFFLE_MAPPER_STRIPED_OUTPUT.propertyName(), true);
    jobConf.setInt(HadoopJobProperty.SHUFFLE_MSG_SIZE.propertyName(), 4096);

    if (gzip)
        jobConf.setBoolean(HadoopJobProperty.SHUFFLE_MSG_GZIP.propertyName(), true);

    jobConf.set(HadoopJobProperty.JOB_PARTIALLY_RAW_COMPARATOR.propertyName(),
        TextPartiallyRawComparator.class.getName());

    Job job = setupConfig(jobConf);

    HadoopJobId jobId = new HadoopJobId(UUID.randomUUID(), 1);

    IgniteInternalFuture<?> fut = grid(0).hadoop().submit(jobId, createJobInfo(job.getConfiguration(), null));

    fut.get();
}