org.apache.hadoop.mapred.lib.IdentityReducer Java Examples

The following examples show how to use org.apache.hadoop.mapred.lib.IdentityReducer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestCompressedEmptyMapOutputs.java    From RDFS with Apache License 2.0 6 votes vote down vote up
private static void runSort(JobConf jobConf, Path sortInput, Path sortOutput) 
throws Exception {
  // Set up the job
  jobConf.setJobName("null-sorter");
  
  jobConf.setMapperClass(SinkMapper.class);
  jobConf.setReducerClass(IdentityReducer.class);

  jobConf.setNumReduceTasks(2);

  jobConf.setInputFormat(SequenceFileInputFormat.class);
  jobConf.setOutputFormat(SequenceFileOutputFormat.class);

  jobConf.setOutputKeyClass(BytesWritable.class);
  jobConf.setOutputValueClass(BytesWritable.class);
  
  FileInputFormat.setInputPaths(jobConf, sortInput);
  FileOutputFormat.setOutputPath(jobConf, sortOutput);

  // Compress the intermediate map-outputs!
  jobConf.setCompressMapOutput(true);

  // Run the job
  JobClient.runJob(jobConf);
}
 
Example #2
Source File: TestReduceFetch.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
public static Counters runJob(JobConf conf) throws Exception {
  conf.setMapperClass(MapMB.class);
  conf.setReducerClass(IdentityReducer.class);
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(Text.class);
  conf.setNumReduceTasks(1);
  conf.setInputFormat(FakeIF.class);
  FileInputFormat.setInputPaths(conf, new Path("/in"));
  final Path outp = new Path("/out");
  FileOutputFormat.setOutputPath(conf, outp);
  RunningJob job = null;
  try {
    job = JobClient.runJob(conf);
    assertTrue(job.isSuccessful());
  } finally {
    FileSystem fs = dfsCluster.getFileSystem();
    if (fs.exists(outp)) {
      fs.delete(outp, true);
    }
  }
  return job.getCounters();
}
 
Example #3
Source File: TestMultipleLevelCaching.java    From hadoop with Apache License 2.0 6 votes vote down vote up
static RunningJob launchJob(JobConf jobConf, Path inDir, Path outputPath,
    int numMaps, String jobName) throws IOException {
  jobConf.setJobName(jobName);
  jobConf.setInputFormat(NonSplitableSequenceFileInputFormat.class);
  jobConf.setOutputFormat(SequenceFileOutputFormat.class);
  FileInputFormat.setInputPaths(jobConf, inDir);
  FileOutputFormat.setOutputPath(jobConf, outputPath);
  jobConf.setMapperClass(IdentityMapper.class);
  jobConf.setReducerClass(IdentityReducer.class);
  jobConf.setOutputKeyClass(BytesWritable.class);
  jobConf.setOutputValueClass(BytesWritable.class);
  jobConf.setNumMapTasks(numMaps);
  jobConf.setNumReduceTasks(0);
  jobConf.setJar("build/test/mapred/testjar/testjob.jar");
  return JobClient.runJob(jobConf);
}
 
Example #4
Source File: TestDatamerge.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public void testEmptyJoin() throws Exception {
  JobConf job = new JobConf();
  Path base = cluster.getFileSystem().makeQualified(new Path("/empty"));
  Path[] src = { new Path(base,"i0"), new Path("i1"), new Path("i2") };
  job.set("mapreduce.join.expr", CompositeInputFormat.compose("outer",
      Fake_IF.class, src));
  job.setInputFormat(CompositeInputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(base, "out"));

  job.setMapperClass(IdentityMapper.class);
  job.setReducerClass(IdentityReducer.class);
  job.setOutputKeyClass(IncomparableKey.class);
  job.setOutputValueClass(NullWritable.class);

  JobClient.runJob(job);
  base.getFileSystem(job).delete(base, true);
}
 
Example #5
Source File: UtilsForTests.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public static RunningJob runJobFail(JobConf conf, Path inDir, Path outDir)
       throws IOException {
  conf.setJobName("test-job-fail");
  conf.setMapperClass(FailMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  conf.setMaxMapAttempts(1);
  
  RunningJob job = UtilsForTests.runJob(conf, inDir, outDir);
  long sleepCount = 0;
  while (!job.isComplete()) {
    try {
      if (sleepCount > 300) { // 30 seconds
        throw new IOException("Job didn't finish in 30 seconds");
      }
      Thread.sleep(100);
      sleepCount++;
    } catch (InterruptedException e) {
      break;
    }
  }

  return job;
}
 
Example #6
Source File: UtilsForTests.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public static RunningJob runJobSucceed(JobConf conf, Path inDir, Path outDir)
       throws IOException {
  conf.setJobName("test-job-succeed");
  conf.setMapperClass(IdentityMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  
  RunningJob job = UtilsForTests.runJob(conf, inDir, outDir);
  long sleepCount = 0;
  while (!job.isComplete()) {
    try {
      if (sleepCount > 300) { // 30 seconds
        throw new IOException("Job didn't finish in 30 seconds");
      }
      Thread.sleep(100);
      sleepCount++;
    } catch (InterruptedException e) {
      break;
    }
  }

  return job;
}
 
Example #7
Source File: UtilsForTests.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Configure a waiting job
 */
static void configureWaitingJobConf(JobConf jobConf, Path inDir,
                                    Path outputPath, int numMaps, int numRed,
                                    String jobName, String mapSignalFilename,
                                    String redSignalFilename)
throws IOException {
  jobConf.setJobName(jobName);
  jobConf.setInputFormat(NonSplitableSequenceFileInputFormat.class);
  jobConf.setOutputFormat(SequenceFileOutputFormat.class);
  FileInputFormat.setInputPaths(jobConf, inDir);
  FileOutputFormat.setOutputPath(jobConf, outputPath);
  jobConf.setMapperClass(UtilsForTests.HalfWaitingMapper.class);
  jobConf.setReducerClass(IdentityReducer.class);
  jobConf.setOutputKeyClass(BytesWritable.class);
  jobConf.setOutputValueClass(BytesWritable.class);
  jobConf.setInputFormat(RandomInputFormat.class);
  jobConf.setNumMapTasks(numMaps);
  jobConf.setNumReduceTasks(numRed);
  jobConf.setJar("build/test/mapred/testjar/testjob.jar");
  jobConf.set(getTaskSignalParameter(true), mapSignalFilename);
  jobConf.set(getTaskSignalParameter(false), redSignalFilename);
}
 
Example #8
Source File: UtilsForTests.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * Configure a waiting job
 */
static void configureWaitingJobConf(JobConf jobConf, Path inDir,
                                    Path outputPath, int numMaps, int numRed,
                                    String jobName, String mapSignalFilename,
                                    String redSignalFilename)
throws IOException {
  jobConf.setJobName(jobName);
  jobConf.setInputFormat(NonSplitableSequenceFileInputFormat.class);
  jobConf.setOutputFormat(SequenceFileOutputFormat.class);
  FileInputFormat.setInputPaths(jobConf, inDir);
  FileOutputFormat.setOutputPath(jobConf, outputPath);
  jobConf.setMapperClass(UtilsForTests.HalfWaitingMapper.class);
  jobConf.setReducerClass(IdentityReducer.class);
  jobConf.setOutputKeyClass(BytesWritable.class);
  jobConf.setOutputValueClass(BytesWritable.class);
  jobConf.setInputFormat(RandomInputFormat.class);
  jobConf.setNumMapTasks(numMaps);
  jobConf.setNumReduceTasks(numRed);
  jobConf.setJar("build/test/mapred/testjar/testjob.jar");
  jobConf.set(getTaskSignalParameter(true), mapSignalFilename);
  jobConf.set(getTaskSignalParameter(false), redSignalFilename);
}
 
Example #9
Source File: UtilsForTests.java    From big-c with Apache License 2.0 6 votes vote down vote up
public static RunningJob runJobSucceed(JobConf conf, Path inDir, Path outDir)
       throws IOException {
  conf.setJobName("test-job-succeed");
  conf.setMapperClass(IdentityMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  
  RunningJob job = UtilsForTests.runJob(conf, inDir, outDir);
  long sleepCount = 0;
  while (!job.isComplete()) {
    try {
      if (sleepCount > 300) { // 30 seconds
        throw new IOException("Job didn't finish in 30 seconds");
      }
      Thread.sleep(100);
      sleepCount++;
    } catch (InterruptedException e) {
      break;
    }
  }

  return job;
}
 
Example #10
Source File: UtilsForTests.java    From big-c with Apache License 2.0 6 votes vote down vote up
public static RunningJob runJobFail(JobConf conf, Path inDir, Path outDir)
       throws IOException {
  conf.setJobName("test-job-fail");
  conf.setMapperClass(FailMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  conf.setMaxMapAttempts(1);
  
  RunningJob job = UtilsForTests.runJob(conf, inDir, outDir);
  long sleepCount = 0;
  while (!job.isComplete()) {
    try {
      if (sleepCount > 300) { // 30 seconds
        throw new IOException("Job didn't finish in 30 seconds");
      }
      Thread.sleep(100);
      sleepCount++;
    } catch (InterruptedException e) {
      break;
    }
  }

  return job;
}
 
Example #11
Source File: UtilsForTests.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
static RunningJob runJobFail(JobConf conf, Path inDir, Path outDir)
       throws IOException {
  conf.setJobName("test-job-fail");
  conf.setMapperClass(FailMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  
  RunningJob job = UtilsForTests.runJob(conf, inDir, outDir);
  while (!job.isComplete()) {
    try {
      Thread.sleep(100);
    } catch (InterruptedException e) {
      break;
    }
  }

  return job;
}
 
Example #12
Source File: TestReduceTaskNoMapOutput.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public static TaskCompletionEvent[] runJob(JobConf conf, Class mapperClass,
                boolean enableNoFetchEmptyMapOutputs) throws Exception {
  conf.setMapperClass(mapperClass);
  conf.setReducerClass(IdentityReducer.class);
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(Text.class);
  conf.setNumMapTasks(3);
  conf.setNumReduceTasks(1);
  conf.setInputFormat(FakeIF.class);
  conf.setBoolean("mapred.enable.no.fetch.map.outputs", enableNoFetchEmptyMapOutputs);
  FileInputFormat.setInputPaths(conf, new Path("/in"));
  final Path outp = new Path("/out");
  FileOutputFormat.setOutputPath(conf, outp);
  RunningJob job = null;
  
  job = JobClient.runJob(conf);
  assertTrue(job.isSuccessful());
  return job.getTaskCompletionEvents(0);
}
 
Example #13
Source File: TestDatamerge.java    From big-c with Apache License 2.0 6 votes vote down vote up
public void testEmptyJoin() throws Exception {
  JobConf job = new JobConf();
  Path base = cluster.getFileSystem().makeQualified(new Path("/empty"));
  Path[] src = { new Path(base,"i0"), new Path("i1"), new Path("i2") };
  job.set("mapreduce.join.expr", CompositeInputFormat.compose("outer",
      Fake_IF.class, src));
  job.setInputFormat(CompositeInputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(base, "out"));

  job.setMapperClass(IdentityMapper.class);
  job.setReducerClass(IdentityReducer.class);
  job.setOutputKeyClass(IncomparableKey.class);
  job.setOutputValueClass(NullWritable.class);

  JobClient.runJob(job);
  base.getFileSystem(job).delete(base, true);
}
 
Example #14
Source File: TestMiniMRChildTask.java    From RDFS with Apache License 2.0 6 votes vote down vote up
/**
 * Launch tests 
 * @param conf Configuration of the mapreduce job.
 * @param inDir input path
 * @param outDir output path
 * @param input Input text
 * @throws IOException
 */
public void launchTest(JobConf conf,
                       Path inDir,
                       Path outDir,
                       String input)
throws IOException {
  configure(conf, inDir, outDir, input, 
            MapClass.class, IdentityReducer.class);

  FileSystem outFs = outDir.getFileSystem(conf);
  
  // Launch job with default option for temp dir. 
  // i.e. temp dir is ./tmp 
  JobClient.runJob(conf);
  outFs.delete(outDir, true);

  // Launch job by giving relative path to temp dir.
  conf.set("mapred.child.tmp", "../temp");
  JobClient.runJob(conf);
  outFs.delete(outDir, true);

  // Launch job by giving absolute path to temp dir
  conf.set("mapred.child.tmp", "/tmp");
  JobClient.runJob(conf);
  outFs.delete(outDir, true);
}
 
Example #15
Source File: UtilsForTests.java    From RDFS with Apache License 2.0 6 votes vote down vote up
static RunningJob runJobKill(JobConf conf,  Path inDir, Path outDir)
       throws IOException {

  conf.setJobName("test-job-kill");
  conf.setMapperClass(KillMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  
  RunningJob job = UtilsForTests.runJob(conf, inDir, outDir);
  while (job.getJobState() != JobStatus.RUNNING) {
    try {
      Thread.sleep(100);
    } catch (InterruptedException e) {
      break;
    }
  }
  job.killJob();
  while (job.cleanupProgress() == 0.0f) {
    try {
      Thread.sleep(10);
    } catch (InterruptedException ie) {
      break;
    }
  }

  return job;
}
 
Example #16
Source File: UtilsForTests.java    From RDFS with Apache License 2.0 6 votes vote down vote up
static RunningJob runJobFail(JobConf conf, Path inDir, Path outDir)
       throws IOException {
  conf.setJobName("test-job-fail");
  conf.setMapperClass(FailMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  
  RunningJob job = UtilsForTests.runJob(conf, inDir, outDir);
  while (!job.isComplete()) {
    try {
      Thread.sleep(100);
    } catch (InterruptedException e) {
      break;
    }
  }

  return job;
}
 
Example #17
Source File: TestMiniMRChildTask.java    From RDFS with Apache License 2.0 6 votes vote down vote up
void runTestTaskEnv(JobConf conf, Path inDir, Path outDir) throws IOException {
  String input = "The input";
  configure(conf, inDir, outDir, input, EnvCheckMapper.class, 
      IdentityReducer.class);
  // test 
  //  - new SET of new var (MY_PATH)
  //  - set of old var (HOME)
  //  - append to an old var from modified env (LD_LIBRARY_PATH)
  //  - append to an old var from tt's env (PATH)
  //  - append to a new var (NEW_PATH)
  conf.set("mapred.child.env", 
           "MY_PATH=/tmp,HOME=/tmp,LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/tmp,"
           + "PATH=$PATH:/tmp,NEW_PATH=$NEW_PATH:/tmp");
  conf.set("path", System.getenv("PATH"));
  RunningJob job = JobClient.runJob(conf);
  assertTrue("The environment checker job failed.", job.isSuccessful());
}
 
Example #18
Source File: UtilsForTests.java    From RDFS with Apache License 2.0 6 votes vote down vote up
/**
 * Configure a waiting job
 */
static void configureWaitingJobConf(JobConf jobConf, Path inDir,
                                    Path outputPath, int numMaps, int numRed,
                                    String jobName, String mapSignalFilename,
                                    String redSignalFilename)
throws IOException {
  jobConf.setJobName(jobName);
  jobConf.setInputFormat(NonSplitableSequenceFileInputFormat.class);
  jobConf.setOutputFormat(SequenceFileOutputFormat.class);
  FileInputFormat.setInputPaths(jobConf, inDir);
  FileOutputFormat.setOutputPath(jobConf, outputPath);
  jobConf.setMapperClass(UtilsForTests.HalfWaitingMapper.class);
  jobConf.setReducerClass(IdentityReducer.class);
  jobConf.setOutputKeyClass(BytesWritable.class);
  jobConf.setOutputValueClass(BytesWritable.class);
  jobConf.setInputFormat(RandomInputFormat.class);
  jobConf.setNumMapTasks(numMaps);
  jobConf.setNumReduceTasks(numRed);
  jobConf.setJar("build/test/testjar/testjob.jar");
  jobConf.set(getTaskSignalParameter(true), mapSignalFilename);
  jobConf.set(getTaskSignalParameter(false), redSignalFilename);
}
 
Example #19
Source File: TestRackAwareTaskPlacement.java    From RDFS with Apache License 2.0 6 votes vote down vote up
static RunningJob launchJob(JobConf jobConf, Path inDir, Path outputPath, 
                            int numMaps, String jobName) throws IOException {
  jobConf.setJobName(jobName);
  jobConf.setInputFormat(NonSplitableSequenceFileInputFormat.class);
  jobConf.setOutputFormat(SequenceFileOutputFormat.class);
  FileInputFormat.setInputPaths(jobConf, inDir);
  FileOutputFormat.setOutputPath(jobConf, outputPath);
  jobConf.setMapperClass(IdentityMapper.class);
  jobConf.setReducerClass(IdentityReducer.class);
  jobConf.setOutputKeyClass(BytesWritable.class);
  jobConf.setOutputValueClass(BytesWritable.class);
  jobConf.setNumMapTasks(numMaps);
  jobConf.setNumReduceTasks(0);
  jobConf.setJar("build/test/testjar/testjob.jar");
  return JobClient.runJob(jobConf);
}
 
Example #20
Source File: TestDatamerge.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public void testEmptyJoin() throws Exception {
  JobConf job = new JobConf();
  Path base = cluster.getFileSystem().makeQualified(new Path("/empty"));
  Path[] src = { new Path(base,"i0"), new Path("i1"), new Path("i2") };
  job.set("mapred.join.expr", CompositeInputFormat.compose("outer",
      Fake_IF.class, src));
  job.setInputFormat(CompositeInputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(base, "out"));

  job.setMapperClass(IdentityMapper.class);
  job.setReducerClass(IdentityReducer.class);
  job.setOutputKeyClass(IncomparableKey.class);
  job.setOutputValueClass(NullWritable.class);

  JobClient.runJob(job);
  base.getFileSystem(job).delete(base, true);
}
 
Example #21
Source File: UtilsForTests.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
static RunningJob runJobSucceed(JobConf conf, Path inDir, Path outDir)
       throws IOException {
  conf.setJobName("test-job-succeed");
  conf.setMapperClass(IdentityMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  
  RunningJob job = UtilsForTests.runJob(conf, inDir, outDir);
  while (!job.isComplete()) {
    try {
      Thread.sleep(100);
    } catch (InterruptedException e) {
      break;
    }
  }

  return job;
}
 
Example #22
Source File: TestMultipleLevelCaching.java    From big-c with Apache License 2.0 6 votes vote down vote up
static RunningJob launchJob(JobConf jobConf, Path inDir, Path outputPath,
    int numMaps, String jobName) throws IOException {
  jobConf.setJobName(jobName);
  jobConf.setInputFormat(NonSplitableSequenceFileInputFormat.class);
  jobConf.setOutputFormat(SequenceFileOutputFormat.class);
  FileInputFormat.setInputPaths(jobConf, inDir);
  FileOutputFormat.setOutputPath(jobConf, outputPath);
  jobConf.setMapperClass(IdentityMapper.class);
  jobConf.setReducerClass(IdentityReducer.class);
  jobConf.setOutputKeyClass(BytesWritable.class);
  jobConf.setOutputValueClass(BytesWritable.class);
  jobConf.setNumMapTasks(numMaps);
  jobConf.setNumReduceTasks(0);
  jobConf.setJar("build/test/mapred/testjar/testjob.jar");
  return JobClient.runJob(jobConf);
}
 
Example #23
Source File: TestMRAppWithCombiner.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testCombinerShouldUpdateTheReporter() throws Exception {
  JobConf conf = new JobConf(mrCluster.getConfig());
  int numMaps = 5;
  int numReds = 2;
  Path in = new Path(mrCluster.getTestWorkDir().getAbsolutePath(),
      "testCombinerShouldUpdateTheReporter-in");
  Path out = new Path(mrCluster.getTestWorkDir().getAbsolutePath(),
      "testCombinerShouldUpdateTheReporter-out");
  createInputOutPutFolder(in, out, numMaps);
  conf.setJobName("test-job-with-combiner");
  conf.setMapperClass(IdentityMapper.class);
  conf.setCombinerClass(MyCombinerToCheckReporter.class);
  //conf.setJarByClass(MyCombinerToCheckReporter.class);
  conf.setReducerClass(IdentityReducer.class);
  DistributedCache.addFileToClassPath(TestMRJobs.APP_JAR, conf);
  conf.setOutputCommitter(CustomOutputCommitter.class);
  conf.setInputFormat(TextInputFormat.class);
  conf.setOutputKeyClass(LongWritable.class);
  conf.setOutputValueClass(Text.class);

  FileInputFormat.setInputPaths(conf, in);
  FileOutputFormat.setOutputPath(conf, out);
  conf.setNumMapTasks(numMaps);
  conf.setNumReduceTasks(numReds);
  
  runJob(conf);
}
 
Example #24
Source File: TestTaskFail.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public RunningJob launchJob(JobConf conf,
                            Path inDir,
                            Path outDir,
                            String input) 
throws IOException {
  // set up the input file system and write input text.
  FileSystem inFs = inDir.getFileSystem(conf);
  FileSystem outFs = outDir.getFileSystem(conf);
  outFs.delete(outDir, true);
  if (!inFs.mkdirs(inDir)) {
    throw new IOException("Mkdirs failed to create " + inDir.toString());
  }
  {
    // write input into input file
    DataOutputStream file = inFs.create(new Path(inDir, "part-0"));
    file.writeBytes(input);
    file.close();
  }

  // configure the mapred Job
  conf.setMapperClass(MapperClass.class);        
  conf.setReducerClass(IdentityReducer.class);
  conf.setNumReduceTasks(0);
  FileInputFormat.setInputPaths(conf, inDir);
  FileOutputFormat.setOutputPath(conf, outDir);
  conf.setSpeculativeExecution(false);
  String TEST_ROOT_DIR = new Path(System.getProperty("test.build.data",
                                  "/tmp")).toString().replace(' ', '+');
  conf.set("test.build.data", TEST_ROOT_DIR);
  // return the RunningJob handle.
  return new JobClient(conf).submitJob(conf);
}
 
Example #25
Source File: TestMROldApiJobs.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public static void runJobSucceed(JobConf conf, Path inDir, Path outDir)
       throws IOException, InterruptedException {
  conf.setJobName("test-job-succeed");
  conf.setMapperClass(IdentityMapper.class);
  //conf.setJar(new File(MiniMRYarnCluster.APPJAR).getAbsolutePath());
  conf.setReducerClass(IdentityReducer.class);
  
  boolean success = runJob(conf, inDir, outDir, 1 , 1);
  Assert.assertTrue("Job expected to succeed failed", success);
}
 
Example #26
Source File: PipelineTest.java    From hiped2 with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() {
  mapper1 = new IdentityMapper<Text, Text>();
  reducer1 = new IdentityReducer<Text, Text>();
  mapper2 = new IdentityMapper<Text, Text>();
  reducer2 = new IdentityReducer<Text, Text>();
  driver = new PipelineMapReduceDriver<Text, Text, Text, Text>();
  driver.addMapReduce(new Pair<Mapper, Reducer>(mapper1, reducer1));
  driver.addMapReduce(new Pair<Mapper, Reducer>(mapper2, reducer2));
}
 
Example #27
Source File: TestMapRed.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public void testNullKeys() throws Exception {
  JobConf conf = new JobConf(TestMapRed.class);
  FileSystem fs = FileSystem.getLocal(conf);
  Path testdir = new Path(
      System.getProperty("test.build.data","/tmp")).makeQualified(fs);
  fs.delete(testdir, true);
  Path inFile = new Path(testdir, "nullin/blah");
  SequenceFile.Writer w = SequenceFile.createWriter(fs, conf, inFile,
      NullWritable.class, Text.class, SequenceFile.CompressionType.NONE);
  Text t = new Text();
  t.set("AAAAAAAAAAAAAA"); w.append(NullWritable.get(), t);
  t.set("BBBBBBBBBBBBBB"); w.append(NullWritable.get(), t);
  t.set("CCCCCCCCCCCCCC"); w.append(NullWritable.get(), t);
  t.set("DDDDDDDDDDDDDD"); w.append(NullWritable.get(), t);
  t.set("EEEEEEEEEEEEEE"); w.append(NullWritable.get(), t);
  t.set("FFFFFFFFFFFFFF"); w.append(NullWritable.get(), t);
  t.set("GGGGGGGGGGGGGG"); w.append(NullWritable.get(), t);
  t.set("HHHHHHHHHHHHHH"); w.append(NullWritable.get(), t);
  w.close();
  FileInputFormat.setInputPaths(conf, inFile);
  FileOutputFormat.setOutputPath(conf, new Path(testdir, "nullout"));
  conf.setMapperClass(NullMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  conf.setOutputKeyClass(NullWritable.class);
  conf.setOutputValueClass(Text.class);
  conf.setInputFormat(SequenceFileInputFormat.class);
  conf.setOutputFormat(SequenceFileOutputFormat.class);
  conf.setNumReduceTasks(1);

  JobClient.runJob(conf);

  SequenceFile.Reader r = new SequenceFile.Reader(fs,
      new Path(testdir, "nullout/part-00000"), conf);
  String m = "AAAAAAAAAAAAAA";
  for (int i = 1; r.next(NullWritable.get(), t); ++i) {
    assertTrue(t.toString() + " doesn't match " + m, m.equals(t.toString()));
    m = m.replace((char)('A' + i - 1), (char)('A' + i));
  }
}
 
Example #28
Source File: TestJobCleanup.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private void configureJob(JobConf jc, String jobName, int maps, int reds,
    Path outDir) {
  jc.setJobName(jobName);
  jc.setInputFormat(TextInputFormat.class);
  jc.setOutputKeyClass(LongWritable.class);
  jc.setOutputValueClass(Text.class);
  FileInputFormat.setInputPaths(jc, inDir);
  FileOutputFormat.setOutputPath(jc, outDir);
  jc.setMapperClass(IdentityMapper.class);
  jc.setReducerClass(IdentityReducer.class);
  jc.setNumMapTasks(maps);
  jc.setNumReduceTasks(reds);
}
 
Example #29
Source File: TestJobCleanup.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public void testJobDirctoryCleanup() throws Exception {
  try {
    conf = new JobConf();
    FileSystem fileSys = FileSystem.get(conf);
    fileSys.delete(new Path(TEST_ROOT_DIR), true);
    cluster = new MiniMRCluster(1, "file:///", 1, null, new String[] {"host1"}, conf);
    JobConf jc = cluster.createJobConf();
    jc.setJobName("TestJob");
    Path inDir = new Path(TEST_ROOT_DIR, "test-input");
    Path outDir = new Path(TEST_ROOT_DIR, "test-output");
    String input = "Test\n";
    DataOutputStream file = fileSys.create(new Path(inDir, "part-" + 0));
    file.writeBytes(input);
    file.close();
    FileInputFormat.setInputPaths(jc, inDir);
    FileOutputFormat.setOutputPath(jc, outDir);
    jc.setInputFormat(TextInputFormat.class);
    jc.setOutputKeyClass(LongWritable.class);
    jc.setOutputValueClass(Text.class);
    jc.setMapperClass(IdentityMapper.class);
    jc.setReducerClass(IdentityReducer.class);
    jc.setNumMapTasks(1);
    jc.setNumReduceTasks(1);
    JobClient jobClient = new JobClient(jc);
    RunningJob job = jobClient.submitJob(jc);
    JobID jobId = job.getID();
    job.waitForCompletion();
    cluster.getTaskTrackerRunner(0).getTaskTracker();
    String subdir = TaskTracker.getLocalJobDir(jobId.toString());
    File dir = new File(cluster.getTaskTrackerLocalDir(0) + "/" + subdir);
    assertEquals(null, dir.list());
  } finally {
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}
 
Example #30
Source File: ThreadedMapBenchmark.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/**
 * Generate input data for the benchmark
 */
public static void generateInputData(int dataSizePerMap, 
                                     int numSpillsPerMap, 
                                     int numMapsPerHost, 
                                     JobConf masterConf) 
throws Exception { 
  JobConf job = new JobConf(masterConf, ThreadedMapBenchmark.class);
  job.setJobName("threaded-map-benchmark-random-writer");
  job.setJarByClass(ThreadedMapBenchmark.class);
  job.setInputFormat(UtilsForTests.RandomInputFormat.class);
  job.setOutputFormat(SequenceFileOutputFormat.class);
  
  job.setMapperClass(Map.class);
  job.setReducerClass(IdentityReducer.class);
  
  job.setOutputKeyClass(BytesWritable.class);
  job.setOutputValueClass(BytesWritable.class);
  
  JobClient client = new JobClient(job);
  ClusterStatus cluster = client.getClusterStatus();
  long totalDataSize = dataSizePerMap * numMapsPerHost 
                       * cluster.getTaskTrackers();
  job.set("test.tmb.bytes_per_map", 
          String.valueOf(dataSizePerMap * 1024 * 1024));
  job.setNumReduceTasks(0); // none reduce
  job.setNumMapTasks(numMapsPerHost * cluster.getTaskTrackers());
  FileOutputFormat.setOutputPath(job, INPUT_DIR);
  
  FileSystem fs = FileSystem.get(job);
  fs.delete(BASE_DIR, true);
  
  LOG.info("Generating random input for the benchmark");
  LOG.info("Total data : " + totalDataSize + " mb");
  LOG.info("Data per map: " + dataSizePerMap + " mb");
  LOG.info("Number of spills : " + numSpillsPerMap);
  LOG.info("Number of maps per host : " + numMapsPerHost);
  LOG.info("Number of hosts : " + cluster.getTaskTrackers());
  
  JobClient.runJob(job); // generates the input for the benchmark
}