Java Code Examples for org.apache.hadoop.mapred.JobConf#setLong()

The following examples show how to use org.apache.hadoop.mapred.JobConf#setLong() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DFSGeneralTest.java    From RDFS with Apache License 2.0 6 votes vote down vote up
private void updateJobConf(JobConf conf, Path inputPath, Path outputPath) {
  // set specific job config
  conf.setLong(NUMBER_OF_MAPS_KEY, nmaps);
  conf.setLong(NUMBER_OF_THREADS_KEY, nthreads);
  conf.setInt(BUFFER_SIZE_KEY, buffersize);
  conf.setLong(WRITER_DATARATE_KEY, datarate);
  conf.setLong("mapred.task.timeout", Long.MAX_VALUE);
  conf.set(OUTPUT_DIR_KEY, output);
  
  // set the output and input for the map reduce
  FileInputFormat.setInputPaths(conf, inputPath);
  FileOutputFormat.setOutputPath(conf, outputPath);

  conf.setInputFormat(SequenceFileInputFormat.class);
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(Text.class);
  conf.setNumReduceTasks(1);
  conf.setSpeculativeExecution(false);
}
 
Example 2
Source File: TeraValidate.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public int run(String[] args) throws Exception {
  JobConf job = (JobConf) getConf();
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraValidate");
  job.setJarByClass(TeraValidate.class);
  job.setMapperClass(ValidateMapper.class);
  job.setReducerClass(ValidateReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  // force a single reducer
  job.setNumReduceTasks(1);
  // force a single split 
  job.setLong("mapred.min.split.size", Long.MAX_VALUE);
  job.setInputFormat(TeraInputFormat.class);
  JobClient.runJob(job);
  return 0;
}
 
Example 3
Source File: TeraValidate.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
public int run(String[] args) throws Exception {
  JobConf job = (JobConf) getConf();
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraValidate");
  job.setJarByClass(TeraValidate.class);
  job.setMapperClass(ValidateMapper.class);
  job.setReducerClass(ValidateReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  // force a single reducer
  job.setNumReduceTasks(1);
  // force a single split 
  job.setLong("mapred.min.split.size", Long.MAX_VALUE);
  job.setInputFormat(TeraInputFormat.class);
  JobClient.runJob(job);
  return 0;
}
 
Example 4
Source File: TeraValidate.java    From hadoop-book with Apache License 2.0 6 votes vote down vote up
public int run(String[] args) throws Exception {
  JobConf job = (JobConf) getConf();
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraValidate");
  job.setJarByClass(TeraValidate.class);
  job.setMapperClass(ValidateMapper.class);
  job.setReducerClass(ValidateReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  // force a single reducer
  job.setNumReduceTasks(1);
  // force a single split 
  job.setLong("mapred.min.split.size", Long.MAX_VALUE);
  job.setInputFormat(TeraInputFormat.class);
  JobClient.runJob(job);
  return 0;
}
 
Example 5
Source File: MapProcessor.java    From tez with Apache License 2.0 5 votes vote down vote up
/**
 * Update the job with details about the file split
 * @param job the job configuration to update
 * @param inputSplit the file split
 */
private void updateJobWithSplit(final JobConf job, InputSplit inputSplit) {
  if (inputSplit instanceof FileSplit) {
    FileSplit fileSplit = (FileSplit) inputSplit;
    job.set(JobContext.MAP_INPUT_FILE, fileSplit.getPath().toString());
    job.setLong(JobContext.MAP_INPUT_START, fileSplit.getStart());
    job.setLong(JobContext.MAP_INPUT_PATH, fileSplit.getLength());
  }
  LOG.info("Processing mapred split: " + inputSplit);
}
 
Example 6
Source File: TestGroupedSplits.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings({ "rawtypes", "unchecked" })
@Test(timeout=10000)
public void testGroupedSplitSize() throws IOException {
  JobConf job = new JobConf(defaultConf);
  InputFormat mockWrappedFormat = mock(InputFormat.class);
  TezGroupedSplitsInputFormat<LongWritable , Text> format = 
      new TezGroupedSplitsInputFormat<LongWritable, Text>();
  format.setConf(job);
  format.setInputFormat(mockWrappedFormat);
  
  job.setLong(TezConfiguration.TEZ_AM_GROUPING_SPLIT_MAX_SIZE, 500*1000*1000l);
  job.setLong(TezConfiguration.TEZ_AM_GROUPING_SPLIT_MIN_SIZE, 50*1000*1000l);
  InputSplit mockSplit1 = mock(InputSplit.class);
  when(mockSplit1.getLength()).thenReturn(10*1000*1000l);
  when(mockSplit1.getLocations()).thenReturn(null);
  int numSplits = 100;
  InputSplit[] mockSplits = new InputSplit[numSplits];
  for (int i=0; i<numSplits; i++) {
    mockSplits[i] = mockSplit1;
  }
  when(mockWrappedFormat.getSplits((JobConf)anyObject(), anyInt())).thenReturn(mockSplits);
  
  // desired splits not set. We end up choosing min/max split size based on 
  // total data and num original splits. In this case, min size will be hit
  InputSplit[] splits = format.getSplits(job, 0);
  Assert.assertEquals(25, splits.length);
  
  // split too big. override with max
  format.setDesiredNumberOfSplits(1);
  splits = format.getSplits(job, 0);
  Assert.assertEquals(4, splits.length);
  
  // splits too small. override with min
  format.setDesiredNumberOfSplits(1000);
  splits = format.getSplits(job, 0);
  Assert.assertEquals(25, splits.length);
  
}
 
Example 7
Source File: MapProcessor.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
/**
 * Update the job with details about the file split
 * @param job the job configuration to update
 * @param inputSplit the file split
 */
private void updateJobWithSplit(final JobConf job, InputSplit inputSplit) {
  if (inputSplit instanceof FileSplit) {
    FileSplit fileSplit = (FileSplit) inputSplit;
    job.set(JobContext.MAP_INPUT_FILE, fileSplit.getPath().toString());
    job.setLong(JobContext.MAP_INPUT_START, fileSplit.getStart());
    job.setLong(JobContext.MAP_INPUT_PATH, fileSplit.getLength());
  }
  LOG.info("Processing mapred split: " + inputSplit);
}
 
Example 8
Source File: TestDeprecatedKeys.java    From tez with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 5000)
public void verifyReduceKeyTranslation() {
  JobConf jobConf = new JobConf();

  jobConf.setFloat(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT, 0.4f);
  jobConf.setLong(MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES, 20000l);
  jobConf.setInt(MRJobConfig.IO_SORT_FACTOR, 2000);
  jobConf.setFloat(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT, 0.55f);
  jobConf.setFloat(MRJobConfig.REDUCE_MEMTOMEM_THRESHOLD, 0.60f);
  jobConf.setFloat(MRJobConfig.SHUFFLE_MERGE_PERCENT, 0.22f);
  jobConf.setBoolean(MRJobConfig.REDUCE_MEMTOMEM_ENABLED, true);
  jobConf.setFloat(MRJobConfig.REDUCE_INPUT_BUFFER_PERCENT, 0.33f);
  jobConf.setBoolean(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, false);

  MRHelpers.translateMRConfToTez(jobConf);

  assertEquals(0.4f, jobConf.getFloat(
      TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0f), 0.01f);
  assertEquals(20000l, jobConf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY, 0));
  assertEquals(2000,
      jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR, 0));
  assertEquals(0.55f, jobConf.getFloat(
      TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, 0), 0.01f);
  assertEquals(0.60f,
      jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 0),
      0.01f);
  assertEquals(0.22f,
      jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT, 0),
      0.01f);
  assertEquals(true, jobConf.getBoolean(
      TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, false));
  assertEquals(0.33f,
      jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0),
      0.01f);
  assertEquals(false, jobConf.getBoolean(TezConfiguration.TEZ_USER_CLASSPATH_FIRST, true));
}
 
Example 9
Source File: MapProcessor.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
private void updateJobWithSplit(
    final JobConf job, org.apache.hadoop.mapreduce.InputSplit inputSplit) {
  if (inputSplit instanceof org.apache.hadoop.mapreduce.lib.input.FileSplit) {
    org.apache.hadoop.mapreduce.lib.input.FileSplit fileSplit = 
        (org.apache.hadoop.mapreduce.lib.input.FileSplit) inputSplit;
    job.set(JobContext.MAP_INPUT_FILE, fileSplit.getPath().toString());
    job.setLong(JobContext.MAP_INPUT_START, fileSplit.getStart());
    job.setLong(JobContext.MAP_INPUT_PATH, fileSplit.getLength());
  }
  LOG.info("Processing mapreduce split: " + inputSplit);
}
 
Example 10
Source File: RandomWriter.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.
 * 
 * @throws IOException 
 */
public int run(String[] args) throws Exception {    
  if (args.length == 0) {
    System.out.println("Usage: writer <out-dir>");
    ToolRunner.printGenericCommandUsage(System.out);
    return -1;
  }
  
  Path outDir = new Path(args[0]);
  JobConf job = new JobConf(getConf());
  
  job.setJarByClass(RandomWriter.class);
  job.setJobName("random-writer");
  FileOutputFormat.setOutputPath(job, outDir);
  
  job.setOutputKeyClass(BytesWritable.class);
  job.setOutputValueClass(BytesWritable.class);
  
  job.setInputFormat(RandomInputFormat.class);
  job.setMapperClass(Map.class);        
  job.setReducerClass(IdentityReducer.class);
  job.setOutputFormat(SequenceFileOutputFormat.class);
  
  JobClient client = new JobClient(job);
  ClusterStatus cluster = client.getClusterStatus();
  int numMapsPerHost = job.getInt("test.randomwriter.maps_per_host", 10);
  long numBytesToWritePerMap = job.getLong("test.randomwrite.bytes_per_map",
                                           1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    System.err.println("Cannot have test.randomwrite.bytes_per_map set to 0");
    return -2;
  }
  long totalBytesToWrite = job.getLong("test.randomwrite.total_bytes", 
       numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
  int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    job.setLong("test.randomwrite.bytes_per_map", totalBytesToWrite);
  }
  
  job.setNumMapTasks(numMaps);
  System.out.println("Running " + numMaps + " maps.");
  
  // reducer NONE
  job.setNumReduceTasks(0);
  
  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  JobClient.runJob(job);
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " + 
                     (endTime.getTime() - startTime.getTime()) /1000 + 
                     " seconds.");
  
  return 0;
}
 
Example 11
Source File: CrawlDbReader.java    From nutch-htmlunit with Apache License 2.0 4 votes vote down vote up
public void processTopNJob(String crawlDb, long topN, float min, String output, Configuration config) throws IOException {

    if (LOG.isInfoEnabled()) {
      LOG.info("CrawlDb topN: starting (topN=" + topN + ", min=" + min + ")");
      LOG.info("CrawlDb db: " + crawlDb);
    }

    Path outFolder = new Path(output);
    Path tempDir =
      new Path(config.get("mapred.temp.dir", ".") +
               "/readdb-topN-temp-"+
               Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    JobConf job = new NutchJob(config);
    job.setJobName("topN prepare " + crawlDb);
    FileInputFormat.addInputPath(job, new Path(crawlDb, CrawlDb.CURRENT_NAME));
    job.setInputFormat(SequenceFileInputFormat.class);
    job.setMapperClass(CrawlDbTopNMapper.class);
    job.setReducerClass(IdentityReducer.class);

    FileOutputFormat.setOutputPath(job, tempDir);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(FloatWritable.class);
    job.setOutputValueClass(Text.class);

    // XXX hmmm, no setFloat() in the API ... :(
    job.setLong("db.reader.topn.min", Math.round(1000000.0 * min));
    JobClient.runJob(job);

    if (LOG.isInfoEnabled()) {
      LOG.info("CrawlDb topN: collecting topN scores.");
    }
    job = new NutchJob(config);
    job.setJobName("topN collect " + crawlDb);
    job.setLong("db.reader.topn", topN);

    FileInputFormat.addInputPath(job, tempDir);
    job.setInputFormat(SequenceFileInputFormat.class);
    job.setMapperClass(IdentityMapper.class);
    job.setReducerClass(CrawlDbTopNReducer.class);

    FileOutputFormat.setOutputPath(job, outFolder);
    job.setOutputFormat(TextOutputFormat.class);
    job.setOutputKeyClass(FloatWritable.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(1); // create a single file.

    JobClient.runJob(job);
    FileSystem fs = FileSystem.get(config);
    fs.delete(tempDir, true);
    if (LOG.isInfoEnabled()) { LOG.info("CrawlDb topN: done"); }

  }
 
Example 12
Source File: CompositeInputFormat.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/**
 * Build a CompositeInputSplit from the child InputFormats by assigning the
 * ith split from each child to the ith composite split.
 */
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
  setFormat(job);
  job.setLong("mapred.min.split.size", Long.MAX_VALUE);
  return root.getSplits(job, numSplits);
}
 
Example 13
Source File: TeraGen.java    From RDFS with Apache License 2.0 4 votes vote down vote up
static void setNumberOfRows(JobConf job, long numRows) {
  job.setLong("terasort.num-rows", numRows);
}
 
Example 14
Source File: TeraGen.java    From hadoop-book with Apache License 2.0 4 votes vote down vote up
static void setNumberOfRows(JobConf job, long numRows) {
  job.setLong("terasort.num-rows", numRows);
}
 
Example 15
Source File: DataJoinJob.java    From RDFS with Apache License 2.0 4 votes vote down vote up
public static JobConf createDataJoinJob(String args[]) throws IOException {

    String inputDir = args[0];
    String outputDir = args[1];
    Class inputFormat = SequenceFileInputFormat.class;
    if (args[2].compareToIgnoreCase("text") != 0) {
      System.out.println("Using SequenceFileInputFormat: " + args[2]);
    } else {
      System.out.println("Using TextInputFormat: " + args[2]);
      inputFormat = TextInputFormat.class;
    }
    int numOfReducers = Integer.parseInt(args[3]);
    Class mapper = getClassByName(args[4]);
    Class reducer = getClassByName(args[5]);
    Class mapoutputValueClass = getClassByName(args[6]);
    Class outputFormat = TextOutputFormat.class;
    Class outputValueClass = Text.class;
    if (args[7].compareToIgnoreCase("text") != 0) {
      System.out.println("Using SequenceFileOutputFormat: " + args[7]);
      outputFormat = SequenceFileOutputFormat.class;
      outputValueClass = getClassByName(args[7]);
    } else {
      System.out.println("Using TextOutputFormat: " + args[7]);
    }
    long maxNumOfValuesPerGroup = 100;
    String jobName = "";
    if (args.length > 8) {
      maxNumOfValuesPerGroup = Long.parseLong(args[8]);
    }
    if (args.length > 9) {
      jobName = args[9];
    }
    Configuration defaults = new Configuration();
    JobConf job = new JobConf(defaults, DataJoinJob.class);
    job.setJobName("DataJoinJob: " + jobName);

    FileSystem fs = FileSystem.get(defaults);
    fs.delete(new Path(outputDir));
    FileInputFormat.setInputPaths(job, inputDir);

    job.setInputFormat(inputFormat);

    job.setMapperClass(mapper);
    FileOutputFormat.setOutputPath(job, new Path(outputDir));
    job.setOutputFormat(outputFormat);
    SequenceFileOutputFormat.setOutputCompressionType(job,
            SequenceFile.CompressionType.BLOCK);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(mapoutputValueClass);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(outputValueClass);
    job.setReducerClass(reducer);

    job.setNumMapTasks(1);
    job.setNumReduceTasks(numOfReducers);
    job.setLong("datajoin.maxNumOfValuesPerGroup", maxNumOfValuesPerGroup);
    return job;
  }
 
Example 16
Source File: TestMergeManager.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test(timeout=10000)
public void testMemoryMerge() throws Exception {
  final int TOTAL_MEM_BYTES = 10000;
  final int OUTPUT_SIZE = 7950;
  JobConf conf = new JobConf();
  conf.setFloat(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT, 1.0f);
  conf.setLong(MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES, TOTAL_MEM_BYTES);
  conf.setFloat(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT, 0.8f);
  conf.setFloat(MRJobConfig.SHUFFLE_MERGE_PERCENT, 0.9f);
  TestExceptionReporter reporter = new TestExceptionReporter();
  CyclicBarrier mergeStart = new CyclicBarrier(2);
  CyclicBarrier mergeComplete = new CyclicBarrier(2);
  StubbedMergeManager mgr = new StubbedMergeManager(conf, reporter,
      mergeStart, mergeComplete);

  // reserve enough map output to cause a merge when it is committed
  MapOutput<Text, Text> out1 = mgr.reserve(null, OUTPUT_SIZE, 0);
  Assert.assertTrue("Should be a memory merge",
                    (out1 instanceof InMemoryMapOutput));
  InMemoryMapOutput<Text, Text> mout1 = (InMemoryMapOutput<Text, Text>)out1;
  fillOutput(mout1);
  MapOutput<Text, Text> out2 = mgr.reserve(null, OUTPUT_SIZE, 0);
  Assert.assertTrue("Should be a memory merge",
                    (out2 instanceof InMemoryMapOutput));
  InMemoryMapOutput<Text, Text> mout2 = (InMemoryMapOutput<Text, Text>)out2;
  fillOutput(mout2);

  // next reservation should be a WAIT
  MapOutput<Text, Text> out3 = mgr.reserve(null, OUTPUT_SIZE, 0);
  Assert.assertEquals("Should be told to wait", null, out3);

  // trigger the first merge and wait for merge thread to start merging
  // and free enough output to reserve more
  mout1.commit();
  mout2.commit();
  mergeStart.await();

  Assert.assertEquals(1, mgr.getNumMerges());

  // reserve enough map output to cause another merge when committed
  out1 = mgr.reserve(null, OUTPUT_SIZE, 0);
  Assert.assertTrue("Should be a memory merge",
                     (out1 instanceof InMemoryMapOutput));
  mout1 = (InMemoryMapOutput<Text, Text>)out1;
  fillOutput(mout1);
  out2 = mgr.reserve(null, OUTPUT_SIZE, 0);
  Assert.assertTrue("Should be a memory merge",
                     (out2 instanceof InMemoryMapOutput));
  mout2 = (InMemoryMapOutput<Text, Text>)out2;
  fillOutput(mout2);

  // next reservation should be null
  out3 = mgr.reserve(null, OUTPUT_SIZE, 0);
  Assert.assertEquals("Should be told to wait", null, out3);

  // commit output *before* merge thread completes
  mout1.commit();
  mout2.commit();

  // allow the first merge to complete
  mergeComplete.await();

  // start the second merge and verify
  mergeStart.await();
  Assert.assertEquals(2, mgr.getNumMerges());

  // trigger the end of the second merge
  mergeComplete.await();

  Assert.assertEquals(2, mgr.getNumMerges());
  Assert.assertEquals("exception reporter invoked",
      0, reporter.getNumExceptions());
}
 
Example 17
Source File: CompositeInputFormat.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Build a CompositeInputSplit from the child InputFormats by assigning the
 * ith split from each child to the ith composite split.
 */
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
  setFormat(job);
  job.setLong("mapred.min.split.size", Long.MAX_VALUE);
  return root.getSplits(job, numSplits);
}
 
Example 18
Source File: CompositeInputFormat.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
/**
 * Build a CompositeInputSplit from the child InputFormats by assigning the
 * ith split from each child to the ith composite split.
 */
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
  setFormat(job);
  job.setLong("mapred.min.split.size", Long.MAX_VALUE);
  return root.getSplits(job, numSplits);
}
 
Example 19
Source File: TestMergeManager.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test(timeout=10000)
public void testMemoryMerge() throws Exception {
  final int TOTAL_MEM_BYTES = 10000;
  final int OUTPUT_SIZE = 7950;
  JobConf conf = new JobConf();
  conf.setFloat(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT, 1.0f);
  conf.setLong(MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES, TOTAL_MEM_BYTES);
  conf.setFloat(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT, 0.8f);
  conf.setFloat(MRJobConfig.SHUFFLE_MERGE_PERCENT, 0.9f);
  TestExceptionReporter reporter = new TestExceptionReporter();
  CyclicBarrier mergeStart = new CyclicBarrier(2);
  CyclicBarrier mergeComplete = new CyclicBarrier(2);
  StubbedMergeManager mgr = new StubbedMergeManager(conf, reporter,
      mergeStart, mergeComplete);

  // reserve enough map output to cause a merge when it is committed
  MapOutput<Text, Text> out1 = mgr.reserve(null, OUTPUT_SIZE, 0);
  Assert.assertTrue("Should be a memory merge",
                    (out1 instanceof InMemoryMapOutput));
  InMemoryMapOutput<Text, Text> mout1 = (InMemoryMapOutput<Text, Text>)out1;
  fillOutput(mout1);
  MapOutput<Text, Text> out2 = mgr.reserve(null, OUTPUT_SIZE, 0);
  Assert.assertTrue("Should be a memory merge",
                    (out2 instanceof InMemoryMapOutput));
  InMemoryMapOutput<Text, Text> mout2 = (InMemoryMapOutput<Text, Text>)out2;
  fillOutput(mout2);

  // next reservation should be a WAIT
  MapOutput<Text, Text> out3 = mgr.reserve(null, OUTPUT_SIZE, 0);
  Assert.assertEquals("Should be told to wait", null, out3);

  // trigger the first merge and wait for merge thread to start merging
  // and free enough output to reserve more
  mout1.commit();
  mout2.commit();
  mergeStart.await();

  Assert.assertEquals(1, mgr.getNumMerges());

  // reserve enough map output to cause another merge when committed
  out1 = mgr.reserve(null, OUTPUT_SIZE, 0);
  Assert.assertTrue("Should be a memory merge",
                     (out1 instanceof InMemoryMapOutput));
  mout1 = (InMemoryMapOutput<Text, Text>)out1;
  fillOutput(mout1);
  out2 = mgr.reserve(null, OUTPUT_SIZE, 0);
  Assert.assertTrue("Should be a memory merge",
                     (out2 instanceof InMemoryMapOutput));
  mout2 = (InMemoryMapOutput<Text, Text>)out2;
  fillOutput(mout2);

  // next reservation should be null
  out3 = mgr.reserve(null, OUTPUT_SIZE, 0);
  Assert.assertEquals("Should be told to wait", null, out3);

  // commit output *before* merge thread completes
  mout1.commit();
  mout2.commit();

  // allow the first merge to complete
  mergeComplete.await();

  // start the second merge and verify
  mergeStart.await();
  Assert.assertEquals(2, mgr.getNumMerges());

  // trigger the end of the second merge
  mergeComplete.await();

  Assert.assertEquals(2, mgr.getNumMerges());
  Assert.assertEquals("exception reporter invoked",
      0, reporter.getNumExceptions());
}
 
Example 20
Source File: CompositeInputFormat.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Build a CompositeInputSplit from the child InputFormats by assigning the
 * ith split from each child to the ith composite split.
 */
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
  setFormat(job);
  job.setLong("mapred.min.split.size", Long.MAX_VALUE);
  return root.getSplits(job, numSplits);
}