Java Code Examples for org.apache.hadoop.mapred.JobConf#setLong()

The following examples show how to use org.apache.hadoop.mapred.JobConf#setLong() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: DFSGeneralTest.java From RDFS with Apache License 2.0

6 votes

private void updateJobConf(JobConf conf, Path inputPath, Path outputPath) {
  // set specific job config
  conf.setLong(NUMBER_OF_MAPS_KEY, nmaps);
  conf.setLong(NUMBER_OF_THREADS_KEY, nthreads);
  conf.setInt(BUFFER_SIZE_KEY, buffersize);
  conf.setLong(WRITER_DATARATE_KEY, datarate);
  conf.setLong("mapred.task.timeout", Long.MAX_VALUE);
  conf.set(OUTPUT_DIR_KEY, output);
  
  // set the output and input for the map reduce
  FileInputFormat.setInputPaths(conf, inputPath);
  FileOutputFormat.setOutputPath(conf, outputPath);

  conf.setInputFormat(SequenceFileInputFormat.class);
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(Text.class);
  conf.setNumReduceTasks(1);
  conf.setSpeculativeExecution(false);
}

Example 2

Source File: TeraValidate.java From RDFS with Apache License 2.0

6 votes

public int run(String[] args) throws Exception {
  JobConf job = (JobConf) getConf();
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraValidate");
  job.setJarByClass(TeraValidate.class);
  job.setMapperClass(ValidateMapper.class);
  job.setReducerClass(ValidateReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  // force a single reducer
  job.setNumReduceTasks(1);
  // force a single split 
  job.setLong("mapred.min.split.size", Long.MAX_VALUE);
  job.setInputFormat(TeraInputFormat.class);
  JobClient.runJob(job);
  return 0;
}

Example 3

Source File: TeraValidate.java From hadoop-gpu with Apache License 2.0

6 votes

public int run(String[] args) throws Exception {
  JobConf job = (JobConf) getConf();
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraValidate");
  job.setJarByClass(TeraValidate.class);
  job.setMapperClass(ValidateMapper.class);
  job.setReducerClass(ValidateReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  // force a single reducer
  job.setNumReduceTasks(1);
  // force a single split 
  job.setLong("mapred.min.split.size", Long.MAX_VALUE);
  job.setInputFormat(TeraInputFormat.class);
  JobClient.runJob(job);
  return 0;
}

Example 4

Source File: TeraValidate.java From hadoop-book with Apache License 2.0

6 votes

public int run(String[] args) throws Exception {
  JobConf job = (JobConf) getConf();
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraValidate");
  job.setJarByClass(TeraValidate.class);
  job.setMapperClass(ValidateMapper.class);
  job.setReducerClass(ValidateReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  // force a single reducer
  job.setNumReduceTasks(1);
  // force a single split 
  job.setLong("mapred.min.split.size", Long.MAX_VALUE);
  job.setInputFormat(TeraInputFormat.class);
  JobClient.runJob(job);
  return 0;
}

Example 5

Source File: MapProcessor.java From tez with Apache License 2.0

5 votes

/**
 * Update the job with details about the file split
 * @param job the job configuration to update
 * @param inputSplit the file split
 */
private void updateJobWithSplit(final JobConf job, InputSplit inputSplit) {
  if (inputSplit instanceof FileSplit) {
    FileSplit fileSplit = (FileSplit) inputSplit;
    job.set(JobContext.MAP_INPUT_FILE, fileSplit.getPath().toString());
    job.setLong(JobContext.MAP_INPUT_START, fileSplit.getStart());
    job.setLong(JobContext.MAP_INPUT_PATH, fileSplit.getLength());
  }
  LOG.info("Processing mapred split: " + inputSplit);
}

Example 6

Source File: TestGroupedSplits.java From incubator-tez with Apache License 2.0

5 votes

@SuppressWarnings({ "rawtypes", "unchecked" })
@Test(timeout=10000)
public void testGroupedSplitSize() throws IOException {
  JobConf job = new JobConf(defaultConf);
  InputFormat mockWrappedFormat = mock(InputFormat.class);
  TezGroupedSplitsInputFormat<LongWritable , Text> format = 
      new TezGroupedSplitsInputFormat<LongWritable, Text>();
  format.setConf(job);
  format.setInputFormat(mockWrappedFormat);
  
  job.setLong(TezConfiguration.TEZ_AM_GROUPING_SPLIT_MAX_SIZE, 500*1000*1000l);
  job.setLong(TezConfiguration.TEZ_AM_GROUPING_SPLIT_MIN_SIZE, 50*1000*1000l);
  InputSplit mockSplit1 = mock(InputSplit.class);
  when(mockSplit1.getLength()).thenReturn(10*1000*1000l);
  when(mockSplit1.getLocations()).thenReturn(null);
  int numSplits = 100;
  InputSplit[] mockSplits = new InputSplit[numSplits];
  for (int i=0; i<numSplits; i++) {
    mockSplits[i] = mockSplit1;
  }
  when(mockWrappedFormat.getSplits((JobConf)anyObject(), anyInt())).thenReturn(mockSplits);
  
  // desired splits not set. We end up choosing min/max split size based on 
  // total data and num original splits. In this case, min size will be hit
  InputSplit[] splits = format.getSplits(job, 0);
  Assert.assertEquals(25, splits.length);
  
  // split too big. override with max
  format.setDesiredNumberOfSplits(1);
  splits = format.getSplits(job, 0);
  Assert.assertEquals(4, splits.length);
  
  // splits too small. override with min
  format.setDesiredNumberOfSplits(1000);
  splits = format.getSplits(job, 0);
  Assert.assertEquals(25, splits.length);
  
}

Example 7

Source File: MapProcessor.java From incubator-tez with Apache License 2.0

5 votes

/**
 * Update the job with details about the file split
 * @param job the job configuration to update
 * @param inputSplit the file split
 */
private void updateJobWithSplit(final JobConf job, InputSplit inputSplit) {
  if (inputSplit instanceof FileSplit) {
    FileSplit fileSplit = (FileSplit) inputSplit;
    job.set(JobContext.MAP_INPUT_FILE, fileSplit.getPath().toString());
    job.setLong(JobContext.MAP_INPUT_START, fileSplit.getStart());
    job.setLong(JobContext.MAP_INPUT_PATH, fileSplit.getLength());
  }
  LOG.info("Processing mapred split: " + inputSplit);
}

Example 8

Source File: TestDeprecatedKeys.java From tez with Apache License 2.0

5 votes

@Test(timeout = 5000)
public void verifyReduceKeyTranslation() {
  JobConf jobConf = new JobConf();

  jobConf.setFloat(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT, 0.4f);
  jobConf.setLong(MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES, 20000l);
  jobConf.setInt(MRJobConfig.IO_SORT_FACTOR, 2000);
  jobConf.setFloat(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT, 0.55f);
  jobConf.setFloat(MRJobConfig.REDUCE_MEMTOMEM_THRESHOLD, 0.60f);
  jobConf.setFloat(MRJobConfig.SHUFFLE_MERGE_PERCENT, 0.22f);
  jobConf.setBoolean(MRJobConfig.REDUCE_MEMTOMEM_ENABLED, true);
  jobConf.setFloat(MRJobConfig.REDUCE_INPUT_BUFFER_PERCENT, 0.33f);
  jobConf.setBoolean(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, false);

  MRHelpers.translateMRConfToTez(jobConf);

  assertEquals(0.4f, jobConf.getFloat(
      TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0f), 0.01f);
  assertEquals(20000l, jobConf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY, 0));
  assertEquals(2000,
      jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR, 0));
  assertEquals(0.55f, jobConf.getFloat(
      TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, 0), 0.01f);
  assertEquals(0.60f,
      jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 0),
      0.01f);
  assertEquals(0.22f,
      jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT, 0),
      0.01f);
  assertEquals(true, jobConf.getBoolean(
      TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, false));
  assertEquals(0.33f,
      jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0),
      0.01f);
  assertEquals(false, jobConf.getBoolean(TezConfiguration.TEZ_USER_CLASSPATH_FIRST, true));
}

Example 9

Source File: MapProcessor.java From incubator-tez with Apache License 2.0

5 votes

private void updateJobWithSplit(
    final JobConf job, org.apache.hadoop.mapreduce.InputSplit inputSplit) {
  if (inputSplit instanceof org.apache.hadoop.mapreduce.lib.input.FileSplit) {
    org.apache.hadoop.mapreduce.lib.input.FileSplit fileSplit = 
        (org.apache.hadoop.mapreduce.lib.input.FileSplit) inputSplit;
    job.set(JobContext.MAP_INPUT_FILE, fileSplit.getPath().toString());
    job.setLong(JobContext.MAP_INPUT_START, fileSplit.getStart());
    job.setLong(JobContext.MAP_INPUT_PATH, fileSplit.getLength());
  }
  LOG.info("Processing mapreduce split: " + inputSplit);
}

Example 10

Source File: RandomWriter.java From hadoop-gpu with Apache License 2.0

4 votes

/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.
 * 
 * @throws IOException 
 */
public int run(String[] args) throws Exception {    
  if (args.length == 0) {
    System.out.println("Usage: writer <out-dir>");
    ToolRunner.printGenericCommandUsage(System.out);
    return -1;
  }
  
  Path outDir = new Path(args[0]);
  JobConf job = new JobConf(getConf());
  
  job.setJarByClass(RandomWriter.class);
  job.setJobName("random-writer");
  FileOutputFormat.setOutputPath(job, outDir);
  
  job.setOutputKeyClass(BytesWritable.class);
  job.setOutputValueClass(BytesWritable.class);
  
  job.setInputFormat(RandomInputFormat.class);
  job.setMapperClass(Map.class);        
  job.setReducerClass(IdentityReducer.class);
  job.setOutputFormat(SequenceFileOutputFormat.class);
  
  JobClient client = new JobClient(job);
  ClusterStatus cluster = client.getClusterStatus();
  int numMapsPerHost = job.getInt("test.randomwriter.maps_per_host", 10);
  long numBytesToWritePerMap = job.getLong("test.randomwrite.bytes_per_map",
                                           1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    System.err.println("Cannot have test.randomwrite.bytes_per_map set to 0");
    return -2;
  }
  long totalBytesToWrite = job.getLong("test.randomwrite.total_bytes", 
       numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
  int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    job.setLong("test.randomwrite.bytes_per_map", totalBytesToWrite);
  }
  
  job.setNumMapTasks(numMaps);
  System.out.println("Running " + numMaps + " maps.");
  
  // reducer NONE
  job.setNumReduceTasks(0);
  
  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  JobClient.runJob(job);
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " + 
                     (endTime.getTime() - startTime.getTime()) /1000 + 
                     " seconds.");
  
  return 0;
}

Example 11

Source File: CrawlDbReader.java From nutch-htmlunit with Apache License 2.0

4 votes

public void processTopNJob(String crawlDb, long topN, float min, String output, Configuration config) throws IOException {

    if (LOG.isInfoEnabled()) {
      LOG.info("CrawlDb topN: starting (topN=" + topN + ", min=" + min + ")");
      LOG.info("CrawlDb db: " + crawlDb);
    }

    Path outFolder = new Path(output);
    Path tempDir =
      new Path(config.get("mapred.temp.dir", ".") +
               "/readdb-topN-temp-"+
               Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    JobConf job = new NutchJob(config);
    job.setJobName("topN prepare " + crawlDb);
    FileInputFormat.addInputPath(job, new Path(crawlDb, CrawlDb.CURRENT_NAME));
    job.setInputFormat(SequenceFileInputFormat.class);
    job.setMapperClass(CrawlDbTopNMapper.class);
    job.setReducerClass(IdentityReducer.class);

    FileOutputFormat.setOutputPath(job, tempDir);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(FloatWritable.class);
    job.setOutputValueClass(Text.class);

    // XXX hmmm, no setFloat() in the API ... :(
    job.setLong("db.reader.topn.min", Math.round(1000000.0 * min));
    JobClient.runJob(job);

    if (LOG.isInfoEnabled()) {
      LOG.info("CrawlDb topN: collecting topN scores.");
    }
    job = new NutchJob(config);
    job.setJobName("topN collect " + crawlDb);
    job.setLong("db.reader.topn", topN);

    FileInputFormat.addInputPath(job, tempDir);
    job.setInputFormat(SequenceFileInputFormat.class);
    job.setMapperClass(IdentityMapper.class);
    job.setReducerClass(CrawlDbTopNReducer.class);

    FileOutputFormat.setOutputPath(job, outFolder);
    job.setOutputFormat(TextOutputFormat.class);
    job.setOutputKeyClass(FloatWritable.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(1); // create a single file.

    JobClient.runJob(job);
    FileSystem fs = FileSystem.get(config);
    fs.delete(tempDir, true);
    if (LOG.isInfoEnabled()) { LOG.info("CrawlDb topN: done"); }

  }

Example 12

Source File: CompositeInputFormat.java From RDFS with Apache License 2.0

4 votes

/**
 * Build a CompositeInputSplit from the child InputFormats by assigning the
 * ith split from each child to the ith composite split.
 */
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
  setFormat(job);
  job.setLong("mapred.min.split.size", Long.MAX_VALUE);
  return root.getSplits(job, numSplits);
}

Example 13

Source File: TeraGen.java From RDFS with Apache License 2.0

4 votes

static void setNumberOfRows(JobConf job, long numRows) {
  job.setLong("terasort.num-rows", numRows);
}

Example 14

Source File: TeraGen.java From hadoop-book with Apache License 2.0

4 votes

static void setNumberOfRows(JobConf job, long numRows) {
  job.setLong("terasort.num-rows", numRows);
}

Example 15

Source File: DataJoinJob.java From RDFS with Apache License 2.0

4 votes

public static JobConf createDataJoinJob(String args[]) throws IOException {

    String inputDir = args[0];
    String outputDir = args[1];
    Class inputFormat = SequenceFileInputFormat.class;
    if (args[2].compareToIgnoreCase("text") != 0) {
      System.out.println("Using SequenceFileInputFormat: " + args[2]);
    } else {
      System.out.println("Using TextInputFormat: " + args[2]);
      inputFormat = TextInputFormat.class;
    }
    int numOfReducers = Integer.parseInt(args[3]);
    Class mapper = getClassByName(args[4]);
    Class reducer = getClassByName(args[5]);
    Class mapoutputValueClass = getClassByName(args[6]);
    Class outputFormat = TextOutputFormat.class;
    Class outputValueClass = Text.class;
    if (args[7].compareToIgnoreCase("text") != 0) {
      System.out.println("Using SequenceFileOutputFormat: " + args[7]);
      outputFormat = SequenceFileOutputFormat.class;
      outputValueClass = getClassByName(args[7]);
    } else {
      System.out.println("Using TextOutputFormat: " + args[7]);
    }
    long maxNumOfValuesPerGroup = 100;
    String jobName = "";
    if (args.length > 8) {
      maxNumOfValuesPerGroup = Long.parseLong(args[8]);
    }
    if (args.length > 9) {
      jobName = args[9];
    }
    Configuration defaults = new Configuration();
    JobConf job = new JobConf(defaults, DataJoinJob.class);
    job.setJobName("DataJoinJob: " + jobName);

    FileSystem fs = FileSystem.get(defaults);
    fs.delete(new Path(outputDir));
    FileInputFormat.setInputPaths(job, inputDir);

    job.setInputFormat(inputFormat);

    job.setMapperClass(mapper);
    FileOutputFormat.setOutputPath(job, new Path(outputDir));
    job.setOutputFormat(outputFormat);
    SequenceFileOutputFormat.setOutputCompressionType(job,
            SequenceFile.CompressionType.BLOCK);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(mapoutputValueClass);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(outputValueClass);
    job.setReducerClass(reducer);

    job.setNumMapTasks(1);
    job.setNumReduceTasks(numOfReducers);
    job.setLong("datajoin.maxNumOfValuesPerGroup", maxNumOfValuesPerGroup);
    return job;
  }

Example 16

Source File: TestMergeManager.java From big-c with Apache License 2.0

4 votes

@Test(timeout=10000)
public void testMemoryMerge() throws Exception {
  final int TOTAL_MEM_BYTES = 10000;
  final int OUTPUT_SIZE = 7950;
  JobConf conf = new JobConf();
  conf.setFloat(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT, 1.0f);
  conf.setLong(MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES, TOTAL_MEM_BYTES);
  conf.setFloat(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT, 0.8f);
  conf.setFloat(MRJobConfig.SHUFFLE_MERGE_PERCENT, 0.9f);
  TestExceptionReporter reporter = new TestExceptionReporter();
  CyclicBarrier mergeStart = new CyclicBarrier(2);
  CyclicBarrier mergeComplete = new CyclicBarrier(2);
  StubbedMergeManager mgr = new StubbedMergeManager(conf, reporter,
      mergeStart, mergeComplete);

  // reserve enough map output to cause a merge when it is committed
  MapOutput<Text, Text> out1 = mgr.reserve(null, OUTPUT_SIZE, 0);
  Assert.assertTrue("Should be a memory merge",
                    (out1 instanceof InMemoryMapOutput));
  InMemoryMapOutput<Text, Text> mout1 = (InMemoryMapOutput<Text, Text>)out1;
  fillOutput(mout1);
  MapOutput<Text, Text> out2 = mgr.reserve(null, OUTPUT_SIZE, 0);
  Assert.assertTrue("Should be a memory merge",
                    (out2 instanceof InMemoryMapOutput));
  InMemoryMapOutput<Text, Text> mout2 = (InMemoryMapOutput<Text, Text>)out2;
  fillOutput(mout2);

  // next reservation should be a WAIT
  MapOutput<Text, Text> out3 = mgr.reserve(null, OUTPUT_SIZE, 0);
  Assert.assertEquals("Should be told to wait", null, out3);

  // trigger the first merge and wait for merge thread to start merging
  // and free enough output to reserve more
  mout1.commit();
  mout2.commit();
  mergeStart.await();

  Assert.assertEquals(1, mgr.getNumMerges());

  // reserve enough map output to cause another merge when committed
  out1 = mgr.reserve(null, OUTPUT_SIZE, 0);
  Assert.assertTrue("Should be a memory merge",
                     (out1 instanceof InMemoryMapOutput));
  mout1 = (InMemoryMapOutput<Text, Text>)out1;
  fillOutput(mout1);
  out2 = mgr.reserve(null, OUTPUT_SIZE, 0);
  Assert.assertTrue("Should be a memory merge",
                     (out2 instanceof InMemoryMapOutput));
  mout2 = (InMemoryMapOutput<Text, Text>)out2;
  fillOutput(mout2);

  // next reservation should be null
  out3 = mgr.reserve(null, OUTPUT_SIZE, 0);
  Assert.assertEquals("Should be told to wait", null, out3);

  // commit output *before* merge thread completes
  mout1.commit();
  mout2.commit();

  // allow the first merge to complete
  mergeComplete.await();

  // start the second merge and verify
  mergeStart.await();
  Assert.assertEquals(2, mgr.getNumMerges());

  // trigger the end of the second merge
  mergeComplete.await();

  Assert.assertEquals(2, mgr.getNumMerges());
  Assert.assertEquals("exception reporter invoked",
      0, reporter.getNumExceptions());
}

Example 17

Source File: CompositeInputFormat.java From big-c with Apache License 2.0

4 votes

/**
 * Build a CompositeInputSplit from the child InputFormats by assigning the
 * ith split from each child to the ith composite split.
 */
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
  setFormat(job);
  job.setLong("mapred.min.split.size", Long.MAX_VALUE);
  return root.getSplits(job, numSplits);
}

Example 18

Source File: CompositeInputFormat.java From hadoop-gpu with Apache License 2.0

4 votes

/**
 * Build a CompositeInputSplit from the child InputFormats by assigning the
 * ith split from each child to the ith composite split.
 */
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
  setFormat(job);
  job.setLong("mapred.min.split.size", Long.MAX_VALUE);
  return root.getSplits(job, numSplits);
}

Example 19

Source File: TestMergeManager.java From hadoop with Apache License 2.0

4 votes

@Test(timeout=10000)
public void testMemoryMerge() throws Exception {
  final int TOTAL_MEM_BYTES = 10000;
  final int OUTPUT_SIZE = 7950;
  JobConf conf = new JobConf();
  conf.setFloat(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT, 1.0f);
  conf.setLong(MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES, TOTAL_MEM_BYTES);
  conf.setFloat(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT, 0.8f);
  conf.setFloat(MRJobConfig.SHUFFLE_MERGE_PERCENT, 0.9f);
  TestExceptionReporter reporter = new TestExceptionReporter();
  CyclicBarrier mergeStart = new CyclicBarrier(2);
  CyclicBarrier mergeComplete = new CyclicBarrier(2);
  StubbedMergeManager mgr = new StubbedMergeManager(conf, reporter,
      mergeStart, mergeComplete);

  // reserve enough map output to cause a merge when it is committed
  MapOutput<Text, Text> out1 = mgr.reserve(null, OUTPUT_SIZE, 0);
  Assert.assertTrue("Should be a memory merge",
                    (out1 instanceof InMemoryMapOutput));
  InMemoryMapOutput<Text, Text> mout1 = (InMemoryMapOutput<Text, Text>)out1;
  fillOutput(mout1);
  MapOutput<Text, Text> out2 = mgr.reserve(null, OUTPUT_SIZE, 0);
  Assert.assertTrue("Should be a memory merge",
                    (out2 instanceof InMemoryMapOutput));
  InMemoryMapOutput<Text, Text> mout2 = (InMemoryMapOutput<Text, Text>)out2;
  fillOutput(mout2);

  // next reservation should be a WAIT
  MapOutput<Text, Text> out3 = mgr.reserve(null, OUTPUT_SIZE, 0);
  Assert.assertEquals("Should be told to wait", null, out3);

  // trigger the first merge and wait for merge thread to start merging
  // and free enough output to reserve more
  mout1.commit();
  mout2.commit();
  mergeStart.await();

  Assert.assertEquals(1, mgr.getNumMerges());

  // reserve enough map output to cause another merge when committed
  out1 = mgr.reserve(null, OUTPUT_SIZE, 0);
  Assert.assertTrue("Should be a memory merge",
                     (out1 instanceof InMemoryMapOutput));
  mout1 = (InMemoryMapOutput<Text, Text>)out1;
  fillOutput(mout1);
  out2 = mgr.reserve(null, OUTPUT_SIZE, 0);
  Assert.assertTrue("Should be a memory merge",
                     (out2 instanceof InMemoryMapOutput));
  mout2 = (InMemoryMapOutput<Text, Text>)out2;
  fillOutput(mout2);

  // next reservation should be null
  out3 = mgr.reserve(null, OUTPUT_SIZE, 0);
  Assert.assertEquals("Should be told to wait", null, out3);

  // commit output *before* merge thread completes
  mout1.commit();
  mout2.commit();

  // allow the first merge to complete
  mergeComplete.await();

  // start the second merge and verify
  mergeStart.await();
  Assert.assertEquals(2, mgr.getNumMerges());

  // trigger the end of the second merge
  mergeComplete.await();

  Assert.assertEquals(2, mgr.getNumMerges());
  Assert.assertEquals("exception reporter invoked",
      0, reporter.getNumExceptions());
}

Example 20

Source File: CompositeInputFormat.java From hadoop with Apache License 2.0

4 votes

/**
 * Build a CompositeInputSplit from the child InputFormats by assigning the
 * ith split from each child to the ith composite split.
 */
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
  setFormat(job);
  job.setLong("mapred.min.split.size", Long.MAX_VALUE);
  return root.getSplits(job, numSplits);
}