Java Code Examples for org.apache.hadoop.mapred.JobConf#setInt()

The following examples show how to use org.apache.hadoop.mapred.JobConf#setInt() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: TestDatamerge.java From hadoop with Apache License 2.0

6 votes

private static void joinAs(String jointype,
    Class<? extends SimpleCheckerBase> c) throws Exception {
  final int srcs = 4;
  Configuration conf = new Configuration();
  JobConf job = new JobConf(conf, c);
  Path base = cluster.getFileSystem().makeQualified(new Path("/"+jointype));
  Path[] src = writeSimpleSrc(base, conf, srcs);
  job.set("mapreduce.join.expr", CompositeInputFormat.compose(jointype,
      SequenceFileInputFormat.class, src));
  job.setInt("testdatamerge.sources", srcs);
  job.setInputFormat(CompositeInputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(base, "out"));

  job.setMapperClass(c);
  job.setReducerClass(c);
  job.setOutputKeyClass(IntWritable.class);
  job.setOutputValueClass(IntWritable.class);
  JobClient.runJob(job);
  base.getFileSystem(job).delete(base, true);
}

Example 2

Source File: TestMiniCoronaRunJob.java From RDFS with Apache License 2.0

6 votes

public void testMemoryLimit() throws Exception {
  LOG.info("Starting testMemoryLimit");
  JobConf conf = new JobConf();
  conf.setInt(CoronaConf.NODE_RESERVED_MEMORY_MB, Integer.MAX_VALUE);
  corona = new MiniCoronaCluster.Builder().conf(conf).numTaskTrackers(2).build();
  final JobConf jobConf = corona.createJobConf();
  long start = System.currentTimeMillis();
  FutureTask<Boolean> task = submitSleepJobFutureTask(jobConf);
  checkTaskNotDone(task, 10);
  NodeManager nm =  corona.getClusterManager().getNodeManager();
  nm.getResourceLimit().setNodeReservedMemoryMB(0);
  Assert.assertTrue(task.get());
  long end = System.currentTimeMillis();
  LOG.info("Task Done. Verifying");
  new ClusterManagerMetricsVerifier(corona.getClusterManager(),
      1, 1, 1, 1, 1, 1, 0, 0).verifyAll();
  LOG.info("Time spent for testMemoryLimit:" +
      (end - start));
}

Example 3

Source File: MRTask.java From incubator-tez with Apache License 2.0

6 votes

public void localizeConfiguration(JobConf jobConf)
    throws IOException, InterruptedException {
  jobConf.set(JobContext.TASK_ID, taskAttemptId.getTaskID().toString());
  jobConf.set(JobContext.TASK_ATTEMPT_ID, taskAttemptId.toString());
  jobConf.setInt(JobContext.TASK_PARTITION,
      taskAttemptId.getTaskID().getId());
  jobConf.set(JobContext.ID, taskAttemptId.getJobID().toString());
  
  jobConf.setBoolean(MRJobConfig.TASK_ISMAP, isMap);
  
  Path outputPath = FileOutputFormat.getOutputPath(jobConf);
  if (outputPath != null) {
    if ((committer instanceof FileOutputCommitter)) {
      FileOutputFormat.setWorkOutputPath(jobConf, 
        ((FileOutputCommitter)committer).getTaskAttemptPath(taskAttemptContext));
    } else {
      FileOutputFormat.setWorkOutputPath(jobConf, outputPath);
    }
  }
}

Example 4

Source File: TeraSort.java From hadoop-book with Apache License 2.0

6 votes

public int run(String[] args) throws Exception {
  LOG.info("starting");
  JobConf job = (JobConf) getConf();
  Path inputDir = new Path(args[0]);
  inputDir = inputDir.makeQualified(inputDir.getFileSystem(job));
  Path partitionFile = new Path(inputDir, TeraInputFormat.PARTITION_FILENAME);
  URI partitionUri = new URI(partitionFile.toString() +
                             "#" + TeraInputFormat.PARTITION_FILENAME);
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraSort");
  job.setJarByClass(TeraSort.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setInputFormat(TeraInputFormat.class);
  job.setOutputFormat(TeraOutputFormat.class);
  job.setPartitionerClass(TotalOrderPartitioner.class);
  TeraInputFormat.writePartitionFile(job, partitionFile);
  DistributedCache.addCacheFile(partitionUri, job);
  DistributedCache.createSymlink(job);
  job.setInt("dfs.replication", 1);
  TeraOutputFormat.setFinalSync(job, true);
  JobClient.runJob(job);
  LOG.info("done");
  return 0;
}

Example 5

Source File: MRTask.java From tez with Apache License 2.0

6 votes

public void localizeConfiguration(JobConf jobConf)
    throws IOException, InterruptedException {
  jobConf.set(JobContext.TASK_ID, taskAttemptId.getTaskID().toString());
  jobConf.set(JobContext.TASK_ATTEMPT_ID, taskAttemptId.toString());
  jobConf.setInt(JobContext.TASK_PARTITION,
      taskAttemptId.getTaskID().getId());
  jobConf.set(JobContext.ID, taskAttemptId.getJobID().toString());
  
  jobConf.setBoolean(MRJobConfig.TASK_ISMAP, isMap);
  
  Path outputPath = FileOutputFormat.getOutputPath(jobConf);
  if (outputPath != null) {
    if ((committer instanceof FileOutputCommitter)) {
      FileOutputFormat.setWorkOutputPath(jobConf, 
        ((FileOutputCommitter)committer).getTaskAttemptPath(taskAttemptContext));
    } else {
      FileOutputFormat.setWorkOutputPath(jobConf, outputPath);
    }
  }
}

Example 6

Source File: TestDeprecatedKeys.java From incubator-tez with Apache License 2.0

5 votes

@Test
public void verifyReduceKeyTranslation() {
  JobConf jobConf = new JobConf();

  jobConf.setFloat(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT, 0.4f);
  jobConf.setLong(MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES, 20000l);
  jobConf.setInt(MRJobConfig.IO_SORT_FACTOR, 2000);
  jobConf.setFloat(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT, 0.55f);
  jobConf.setFloat(MRJobConfig.REDUCE_MEMTOMEM_THRESHOLD, 0.60f);
  jobConf.setFloat(MRJobConfig.SHUFFLE_MERGE_PERCENT, 0.22f);
  jobConf.setBoolean(MRJobConfig.REDUCE_MEMTOMEM_ENABLED, true);
  jobConf.setFloat(MRJobConfig.REDUCE_INPUT_BUFFER_PERCENT, 0.33f);

  MRHelpers.translateVertexConfToTez(jobConf);

  assertEquals(0.4f, jobConf.getFloat(
      TezJobConfig.TEZ_RUNTIME_SHUFFLE_INPUT_BUFFER_PERCENT, 0f), 0.01f);
  assertEquals(20000l, jobConf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY, 0));
  assertEquals(2000,
      jobConf.getInt(TezJobConfig.TEZ_RUNTIME_IO_SORT_FACTOR, 0));
  assertEquals(0.55f, jobConf.getFloat(
      TezJobConfig.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, 0), 0.01f);
  assertEquals(0.60f,
      jobConf.getFloat(TezJobConfig.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 0),
      0.01f);
  assertEquals(0.22f,
      jobConf.getFloat(TezJobConfig.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT, 0),
      0.01f);
  assertEquals(true, jobConf.getBoolean(
      TezJobConfig.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, false));
  assertEquals(0.33f,
      jobConf.getFloat(TezJobConfig.TEZ_RUNTIME_INPUT_BUFFER_PERCENT, 0),
      0.01f);
}

Example 7

Source File: AbstractMROldApiSaveTest.java From elasticsearch-hadoop with Apache License 2.0

5 votes

@Parameters
public static Collection<Object[]> configs() throws Exception {
    JobConf conf = HdpBootstrap.hadoopConfig();

    conf.setInputFormat(SplittableTextInputFormat.class);
    conf.setOutputFormat(EsOutputFormat.class);
    conf.setReducerClass(IdentityReducer.class);
    HadoopCfgUtils.setGenericOptions(conf);
    conf.setNumMapTasks(2);
    conf.setInt("actual.splits", 2);
    conf.setNumReduceTasks(0);


    JobConf standard = new JobConf(conf);
    standard.setMapperClass(TabMapper.class);
    standard.setMapOutputValueClass(LinkedMapWritable.class);
    standard.set(ConfigurationOptions.ES_INPUT_JSON, "false");
    FileInputFormat.setInputPaths(standard, new Path(MRSuite.testData.sampleArtistsDat(conf)));

    JobConf json = new JobConf(conf);
    json.setMapperClass(IdentityMapper.class);
    json.setMapOutputValueClass(Text.class);
    json.set(ConfigurationOptions.ES_INPUT_JSON, "true");
    FileInputFormat.setInputPaths(json, new Path(MRSuite.testData.sampleArtistsJson(conf)));

    return Arrays.asList(new Object[][] {
            { standard, "" },
            { json, "json-" }
    });
}

Example 8

Source File: ValueAggregatorJob.java From hadoop with Apache License 2.0

5 votes

public static void setAggregatorDescriptors(JobConf job
    , Class<? extends ValueAggregatorDescriptor>[] descriptors) {
  job.setInt("aggregator.descriptor.num", descriptors.length);
  //specify the aggregator descriptors
  for(int i=0; i< descriptors.length; i++) {
    job.set("aggregator.descriptor." + i, "UserDefined," + descriptors[i].getName());
  }    
}

Example 9

Source File: TeraSort.java From RDFS with Apache License 2.0

5 votes

public int run(String[] args) throws Exception {
  LOG.info("starting");
  JobConf job = (JobConf) getConf();
  Path inputDir = new Path(args[0]);
  inputDir = inputDir.makeQualified(inputDir.getFileSystem(job));
  Path partitionFile = new Path(inputDir, TeraInputFormat.PARTITION_FILENAME);
  URI partitionUri = new URI(partitionFile.toString() +
                             "#" + TeraInputFormat.PARTITION_FILENAME);
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraSort");
  job.setJarByClass(TeraSort.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setInputFormat(TeraInputFormat.class);
  job.setOutputFormat(TeraOutputFormat.class);
  job.setPartitionerClass(TotalOrderPartitioner.class);
  TeraInputFormat.writePartitionFile(job, partitionFile);
  DistributedCache.addCacheFile(partitionUri, job);
  DistributedCache.createSymlink(job);
  job.setInt("dfs.replication", 1);
  TeraOutputFormat.setFinalSync(job, true);
  long startTime = System.currentTimeMillis();
  JobClient.runJob(job);
  long endTime = System.currentTimeMillis();
  System.out.println((float)(endTime-startTime)/1000);
  LOG.info("done");
  return 0;
}

Example 10

Source File: TestMRHelpers.java From tez with Apache License 2.0

5 votes

@Test(timeout = 5000)
public void testContainerResourceConstruction() {
  JobConf conf = new JobConf(new Configuration());
  Resource mapResource = MRHelpers.getResourceForMRMapper(conf);
  Resource reduceResource = MRHelpers.getResourceForMRReducer(conf);

  Assert.assertEquals(MRJobConfig.DEFAULT_MAP_CPU_VCORES,
      mapResource.getVirtualCores());
  Assert.assertEquals(MRJobConfig.DEFAULT_MAP_MEMORY_MB,
      mapResource.getMemory());
  Assert.assertEquals(MRJobConfig.DEFAULT_REDUCE_CPU_VCORES,
      reduceResource.getVirtualCores());
  Assert.assertEquals(MRJobConfig.DEFAULT_REDUCE_MEMORY_MB,
      reduceResource.getMemory());

  conf.setInt(MRJobConfig.MAP_CPU_VCORES, 2);
  conf.setInt(MRJobConfig.MAP_MEMORY_MB, 123);
  conf.setInt(MRJobConfig.REDUCE_CPU_VCORES, 20);
  conf.setInt(MRJobConfig.REDUCE_MEMORY_MB, 1234);

  mapResource = MRHelpers.getResourceForMRMapper(conf);
  reduceResource = MRHelpers.getResourceForMRReducer(conf);

  Assert.assertEquals(2, mapResource.getVirtualCores());
  Assert.assertEquals(123, mapResource.getMemory());
  Assert.assertEquals(20, reduceResource.getVirtualCores());
  Assert.assertEquals(1234, reduceResource.getMemory());
}

Example 11

Source File: TestEncryptedShuffle.java From big-c with Apache License 2.0

5 votes

private void encryptedShuffleWithCerts(boolean useClientCerts)
  throws Exception {
  try {
    Configuration conf = new Configuration();
    String keystoresDir = new File(BASEDIR).getAbsolutePath();
    String sslConfsDir =
      KeyStoreTestUtil.getClasspathDir(TestEncryptedShuffle.class);
    KeyStoreTestUtil.setupSSLConfig(keystoresDir, sslConfsDir, conf,
                                    useClientCerts);
    conf.setBoolean(MRConfig.SHUFFLE_SSL_ENABLED_KEY, true);
    startCluster(conf);
    FileSystem fs = FileSystem.get(getJobConf());
    Path inputDir = new Path("input");
    fs.mkdirs(inputDir);
    Writer writer =
      new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt")));
    writer.write("hello");
    writer.close();

    Path outputDir = new Path("output", "output");

    JobConf jobConf = new JobConf(getJobConf());
    jobConf.setInt("mapred.map.tasks", 1);
    jobConf.setInt("mapred.map.max.attempts", 1);
    jobConf.setInt("mapred.reduce.max.attempts", 1);
    jobConf.set("mapred.input.dir", inputDir.toString());
    jobConf.set("mapred.output.dir", outputDir.toString());
    JobClient jobClient = new JobClient(jobConf);
    RunningJob runJob = jobClient.submitJob(jobConf);
    runJob.waitForCompletion();
    Assert.assertTrue(runJob.isComplete());
    Assert.assertTrue(runJob.isSuccessful());
  } finally {
    stopCluster();
  }
}

Example 12

Source File: TestEncryptedShuffle.java From hadoop with Apache License 2.0

5 votes

private void encryptedShuffleWithCerts(boolean useClientCerts)
  throws Exception {
  try {
    Configuration conf = new Configuration();
    String keystoresDir = new File(BASEDIR).getAbsolutePath();
    String sslConfsDir =
      KeyStoreTestUtil.getClasspathDir(TestEncryptedShuffle.class);
    KeyStoreTestUtil.setupSSLConfig(keystoresDir, sslConfsDir, conf,
                                    useClientCerts);
    conf.setBoolean(MRConfig.SHUFFLE_SSL_ENABLED_KEY, true);
    startCluster(conf);
    FileSystem fs = FileSystem.get(getJobConf());
    Path inputDir = new Path("input");
    fs.mkdirs(inputDir);
    Writer writer =
      new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt")));
    writer.write("hello");
    writer.close();

    Path outputDir = new Path("output", "output");

    JobConf jobConf = new JobConf(getJobConf());
    jobConf.setInt("mapred.map.tasks", 1);
    jobConf.setInt("mapred.map.max.attempts", 1);
    jobConf.setInt("mapred.reduce.max.attempts", 1);
    jobConf.set("mapred.input.dir", inputDir.toString());
    jobConf.set("mapred.output.dir", outputDir.toString());
    JobClient jobClient = new JobClient(jobConf);
    RunningJob runJob = jobClient.submitJob(jobConf);
    runJob.waitForCompletion();
    Assert.assertTrue(runJob.isComplete());
    Assert.assertTrue(runJob.isSuccessful());
  } finally {
    stopCluster();
  }
}

Example 13

Source File: TestKeyFieldBasedPartitioner.java From hadoop with Apache License 2.0

5 votes

/**
 * Test is key-field-based partitioned works with empty key.
 */
@Test
public void testEmptyKey() throws Exception {
  KeyFieldBasedPartitioner<Text, Text> kfbp = 
    new KeyFieldBasedPartitioner<Text, Text>();
  JobConf conf = new JobConf();
  conf.setInt("num.key.fields.for.partition", 10);
  kfbp.configure(conf);
  assertEquals("Empty key should map to 0th partition", 
               0, kfbp.getPartition(new Text(), new Text(), 10));
}

Example 14

Source File: MneMapredBufferDataTest.java From mnemonic with Apache License 2.0

5 votes

@BeforeClass
public void setUp() throws IOException {
  m_workdir = new Path(
      System.getProperty("test.tmp.dir", DEFAULT_WORK_DIR));
  m_conf = new JobConf();
  m_rand = Utils.createRandom();
  m_partfns = new ArrayList<String>();

  try {
    m_fs = FileSystem.getLocal(m_conf).getRaw();
    m_fs.delete(m_workdir, true);
    m_fs.mkdirs(m_workdir);
  } catch (IOException e) {
    throw new IllegalStateException("bad fs init", e);
  }

  m_conf.setInt(JobContext.TASK_PARTITION, TASK_PARTITION);
  
  MneConfigHelper.setDir(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, m_workdir.toString());
  MneConfigHelper.setBaseOutputName(m_conf, null, "mapred-buffer-data");

  MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SERVICE_NAME);
  MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SLOT_KEY_ID);
  MneConfigHelper.setDurableTypes(m_conf,
      MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new DurableType[] {DurableType.BUFFER});
  MneConfigHelper.setEntityFactoryProxies(m_conf,
      MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new Class<?>[] {});
  MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SERVICE_NAME);
  MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SLOT_KEY_ID);
  MneConfigHelper.setMemPoolSize(m_conf,
      MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, 1024L * 1024 * 1024 * 4);
  MneConfigHelper.setDurableTypes(m_conf,
      MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new DurableType[] {DurableType.BUFFER});
  MneConfigHelper.setEntityFactoryProxies(m_conf,
      MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new Class<?>[] {});
}

Example 15

Source File: ValueAggregatorJob.java From RDFS with Apache License 2.0

5 votes

public static void setAggregatorDescriptors(JobConf job
    , Class<? extends ValueAggregatorDescriptor>[] descriptors) {
  job.setInt("aggregator.descriptor.num", descriptors.length);
  //specify the aggregator descriptors
  for(int i=0; i< descriptors.length; i++) {
    job.set("aggregator.descriptor." + i, "UserDefined," + descriptors[i].getName());
  }    
}

Example 16

Source File: TestMapProcessor.java From tez with Apache License 2.0

4 votes

@Test(timeout = 30000)
public void testMapProcessorProgress() throws Exception {
  String dagName = "mrdag0";
  String vertexName = MultiStageMRConfigUtil.getInitialMapVertexName();
  JobConf jobConf = new JobConf(defaultConf);
  setUpJobConf(jobConf);

  MRHelpers.translateMRConfToTez(jobConf);
  jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);

  jobConf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false);

  jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, new Path(workDir,
      "localized-resources").toUri().toString());

  Path mapInput = new Path(workDir, "map0");


  MapUtils.generateInputSplit(localFs, workDir, jobConf, mapInput, 100000);

  InputSpec mapInputSpec = new InputSpec("NullSrcVertex",
      InputDescriptor.create(MRInputLegacy.class.getName())
          .setUserPayload(UserPayload.create(ByteBuffer.wrap(
              MRRuntimeProtos.MRInputUserPayloadProto.newBuilder()
                  .setConfigurationBytes(TezUtils.createByteStringFromConf
                      (jobConf)).build()
                  .toByteArray()))),
      1);
  OutputSpec mapOutputSpec = new OutputSpec("NullDestVertex",
      OutputDescriptor.create(OrderedPartitionedKVOutput.class.getName())
          .setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)), 1);

  TezSharedExecutor sharedExecutor = new TezSharedExecutor(jobConf);
  final LogicalIOProcessorRuntimeTask task = MapUtils.createLogicalTask
      (localFs, workDir, jobConf, 0,
          new Path(workDir, "map0"), new TestUmbilical(), dagName, vertexName,
          Collections.singletonList(mapInputSpec),
          Collections.singletonList(mapOutputSpec), sharedExecutor);

  ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1);
  Thread monitorProgress = new Thread(new Runnable() {
    @Override
    public void run() {
      float prog = task.getProgress();
      if(prog > 0.0f && prog < 1.0f)
        progressUpdate = prog;
    }
  });

  task.initialize();
  scheduler.scheduleAtFixedRate(monitorProgress, 0, 1,
      TimeUnit.MILLISECONDS);
  task.run();
  Assert.assertTrue("Progress Updates should be captured!",
      progressUpdate > 0.0f && progressUpdate < 1.0f);
  task.close();
  sharedExecutor.shutdownNow();
}

Example 17

Source File: TestReduceTaskFetchFail.java From RDFS with Apache License 2.0

4 votes

@SuppressWarnings("deprecation")
@Test
public void testcheckAndInformJobTracker() throws Exception {
  //mock creation
  TaskUmbilicalProtocol mockUmbilical = mock(TaskUmbilicalProtocol.class);
  TaskReporter mockTaskReporter = mock(TaskReporter.class);

  JobConf conf = new JobConf();
  conf.setUser("testuser");
  conf.setJobName("testJob");
  conf.setSessionId("testSession");

  TaskAttemptID tid =  new TaskAttemptID();
  TestReduceTask rTask = new TestReduceTask();
  rTask.setConf(conf);

  ReduceTask.ReduceCopier reduceCopier = rTask.new TestReduceCopier(mockUmbilical, conf, mockTaskReporter);
  reduceCopier.checkAndInformJobTracker(1, tid, false);

  verify(mockTaskReporter, never()).progress();

  reduceCopier.checkAndInformJobTracker(10, tid, false);
  verify(mockTaskReporter, times(1)).progress();

  // Test the config setting
  conf.setInt("mapreduce.reduce.shuffle.maxfetchfailures", 3);

  rTask.setConf(conf);
  reduceCopier = rTask.new TestReduceCopier(mockUmbilical, conf, mockTaskReporter);

  reduceCopier.checkAndInformJobTracker(1, tid, false);
  verify(mockTaskReporter, times(1)).progress();

  reduceCopier.checkAndInformJobTracker(3, tid, false);
  verify(mockTaskReporter, times(2)).progress();

  reduceCopier.checkAndInformJobTracker(5, tid, false);
  verify(mockTaskReporter, times(2)).progress();

  reduceCopier.checkAndInformJobTracker(6, tid, false);
  verify(mockTaskReporter, times(3)).progress();

  // test readError and its config
  reduceCopier.checkAndInformJobTracker(7, tid, true);
  verify(mockTaskReporter, times(4)).progress();

  conf.setBoolean("mapreduce.reduce.shuffle.notify.readerror", false);

  rTask.setConf(conf);
  reduceCopier = rTask.new TestReduceCopier(mockUmbilical, conf, mockTaskReporter);

  reduceCopier.checkAndInformJobTracker(7, tid, true);
  verify(mockTaskReporter, times(4)).progress();

}

Example 18

Source File: DataFsck.java From RDFS with Apache License 2.0

4 votes

List<JobContext> submitJobs(BufferedReader inputReader, int filesPerJob) throws IOException {
  boolean done = false;
  JobClient jClient = new JobClient(createJobConf());
  List<JobContext> submitted = new ArrayList<JobContext>();
  Random rand = new Random();
  do {
    JobConf jobConf = createJobConf();
    final String randomId = Integer.toString(rand.nextInt(Integer.MAX_VALUE), 36);
    Path jobDir = new Path(jClient.getSystemDir(), NAME + "_" + randomId);
    jobConf.set(JOB_DIR_LABEL, jobDir.toString());
    Path log = new Path(jobDir, "_logs");
    FileOutputFormat.setOutputPath(jobConf, log);
    LOG.info("log=" + log);

    // create operation list
    FileSystem fs = jobDir.getFileSystem(jobConf);
    Path opList = new Path(jobDir, "_" + OP_LIST_LABEL);
    jobConf.set(OP_LIST_LABEL, opList.toString());
    int opCount = 0, synCount = 0;
    SequenceFile.Writer opWriter = null;

    try {
      opWriter = SequenceFile.createWriter(fs, jobConf, opList, Text.class,
          Text.class, SequenceFile.CompressionType.NONE);
      String f = null;
      do {
        f = inputReader.readLine();
        if (f == null) {
          done = true;
          break;
        }
        opWriter.append(new Text(f), new Text(f));
        opCount++;
        if (++synCount > SYNC_FILE_MAX) {
          opWriter.sync();
          synCount = 0;
        }
      } while (opCount < filesPerJob);
    } finally {
      if (opWriter != null) {
        opWriter.close();
      }
      fs.setReplication(opList, OP_LIST_REPLICATION); // increase replication for control file
    }

    jobConf.setInt(OP_COUNT_LABEL, opCount);
    RunningJob rJob = jClient.submitJob(jobConf);
    JobContext ctx = new JobContext(rJob, jobConf);
    submitted.add(ctx);
  } while (!done);

  return submitted;
}

Example 19

Source File: TableMapReduceUtil.java From hbase with Apache License 2.0

2 votes

/**
 * Sets the number of rows to return and cache with each scanner iteration.
 * Higher caching values will enable faster mapreduce jobs at the expense of
 * requiring more heap to contain the cached rows.
 *
 * @param job The current job configuration to adjust.
 * @param batchSize The number of rows to return in batch with each scanner
 * iteration.
 */
public static void setScannerCaching(JobConf job, int batchSize) {
  job.setInt("hbase.client.scanner.caching", batchSize);
}

Example 20

Source File: HadoopTeraSortTest.java From ignite with Apache License 2.0

2 votes

/**
 * Does actual test TeraSort job Through Ignite API
 *
 * @param gzip Whether to use GZIP.
 */
protected final void teraSort(boolean gzip) throws Exception {
    System.out.println("TeraSort ===============================================================");

    getFileSystem().delete(new Path(sortOutDir), true);

    final JobConf jobConf = new JobConf();

    jobConf.setUser(getUser());

    jobConf.set("fs.defaultFS", getFsBase());

    log().info("Desired number of reduces: " + numReduces());

    jobConf.set("mapreduce.job.reduces", String.valueOf(numReduces()));

    log().info("Desired number of maps: " + numMaps());

    final long splitSize = dataSizeBytes() / numMaps();

    log().info("Desired split size: " + splitSize);

    // Force the split to be of the desired size:
    jobConf.set("mapred.min.split.size", String.valueOf(splitSize));
    jobConf.set("mapred.max.split.size", String.valueOf(splitSize));

    jobConf.setBoolean(HadoopJobProperty.SHUFFLE_MAPPER_STRIPED_OUTPUT.propertyName(), true);
    jobConf.setInt(HadoopJobProperty.SHUFFLE_MSG_SIZE.propertyName(), 4096);

    if (gzip)
        jobConf.setBoolean(HadoopJobProperty.SHUFFLE_MSG_GZIP.propertyName(), true);

    jobConf.set(HadoopJobProperty.JOB_PARTIALLY_RAW_COMPARATOR.propertyName(),
        TextPartiallyRawComparator.class.getName());

    Job job = setupConfig(jobConf);

    HadoopJobId jobId = new HadoopJobId(UUID.randomUUID(), 1);

    IgniteInternalFuture<?> fut = grid(0).hadoop().submit(jobId, createJobInfo(job.getConfiguration(), null));

    fut.get();
}