org.apache.hadoop.mapred.lib.IdentityReducer Java Examples
The following examples show how to use
org.apache.hadoop.mapred.lib.IdentityReducer.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestCompressedEmptyMapOutputs.java From RDFS with Apache License 2.0 | 6 votes |
private static void runSort(JobConf jobConf, Path sortInput, Path sortOutput) throws Exception { // Set up the job jobConf.setJobName("null-sorter"); jobConf.setMapperClass(SinkMapper.class); jobConf.setReducerClass(IdentityReducer.class); jobConf.setNumReduceTasks(2); jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); jobConf.setOutputKeyClass(BytesWritable.class); jobConf.setOutputValueClass(BytesWritable.class); FileInputFormat.setInputPaths(jobConf, sortInput); FileOutputFormat.setOutputPath(jobConf, sortOutput); // Compress the intermediate map-outputs! jobConf.setCompressMapOutput(true); // Run the job JobClient.runJob(jobConf); }
Example #2
Source File: TestReduceFetch.java From hadoop-gpu with Apache License 2.0 | 6 votes |
public static Counters runJob(JobConf conf) throws Exception { conf.setMapperClass(MapMB.class); conf.setReducerClass(IdentityReducer.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setNumReduceTasks(1); conf.setInputFormat(FakeIF.class); FileInputFormat.setInputPaths(conf, new Path("/in")); final Path outp = new Path("/out"); FileOutputFormat.setOutputPath(conf, outp); RunningJob job = null; try { job = JobClient.runJob(conf); assertTrue(job.isSuccessful()); } finally { FileSystem fs = dfsCluster.getFileSystem(); if (fs.exists(outp)) { fs.delete(outp, true); } } return job.getCounters(); }
Example #3
Source File: TestMultipleLevelCaching.java From hadoop with Apache License 2.0 | 6 votes |
static RunningJob launchJob(JobConf jobConf, Path inDir, Path outputPath, int numMaps, String jobName) throws IOException { jobConf.setJobName(jobName); jobConf.setInputFormat(NonSplitableSequenceFileInputFormat.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(jobConf, inDir); FileOutputFormat.setOutputPath(jobConf, outputPath); jobConf.setMapperClass(IdentityMapper.class); jobConf.setReducerClass(IdentityReducer.class); jobConf.setOutputKeyClass(BytesWritable.class); jobConf.setOutputValueClass(BytesWritable.class); jobConf.setNumMapTasks(numMaps); jobConf.setNumReduceTasks(0); jobConf.setJar("build/test/mapred/testjar/testjob.jar"); return JobClient.runJob(jobConf); }
Example #4
Source File: TestDatamerge.java From hadoop with Apache License 2.0 | 6 votes |
public void testEmptyJoin() throws Exception { JobConf job = new JobConf(); Path base = cluster.getFileSystem().makeQualified(new Path("/empty")); Path[] src = { new Path(base,"i0"), new Path("i1"), new Path("i2") }; job.set("mapreduce.join.expr", CompositeInputFormat.compose("outer", Fake_IF.class, src)); job.setInputFormat(CompositeInputFormat.class); FileOutputFormat.setOutputPath(job, new Path(base, "out")); job.setMapperClass(IdentityMapper.class); job.setReducerClass(IdentityReducer.class); job.setOutputKeyClass(IncomparableKey.class); job.setOutputValueClass(NullWritable.class); JobClient.runJob(job); base.getFileSystem(job).delete(base, true); }
Example #5
Source File: UtilsForTests.java From hadoop with Apache License 2.0 | 6 votes |
public static RunningJob runJobFail(JobConf conf, Path inDir, Path outDir) throws IOException { conf.setJobName("test-job-fail"); conf.setMapperClass(FailMapper.class); conf.setReducerClass(IdentityReducer.class); conf.setMaxMapAttempts(1); RunningJob job = UtilsForTests.runJob(conf, inDir, outDir); long sleepCount = 0; while (!job.isComplete()) { try { if (sleepCount > 300) { // 30 seconds throw new IOException("Job didn't finish in 30 seconds"); } Thread.sleep(100); sleepCount++; } catch (InterruptedException e) { break; } } return job; }
Example #6
Source File: UtilsForTests.java From hadoop with Apache License 2.0 | 6 votes |
public static RunningJob runJobSucceed(JobConf conf, Path inDir, Path outDir) throws IOException { conf.setJobName("test-job-succeed"); conf.setMapperClass(IdentityMapper.class); conf.setReducerClass(IdentityReducer.class); RunningJob job = UtilsForTests.runJob(conf, inDir, outDir); long sleepCount = 0; while (!job.isComplete()) { try { if (sleepCount > 300) { // 30 seconds throw new IOException("Job didn't finish in 30 seconds"); } Thread.sleep(100); sleepCount++; } catch (InterruptedException e) { break; } } return job; }
Example #7
Source File: UtilsForTests.java From hadoop with Apache License 2.0 | 6 votes |
/** * Configure a waiting job */ static void configureWaitingJobConf(JobConf jobConf, Path inDir, Path outputPath, int numMaps, int numRed, String jobName, String mapSignalFilename, String redSignalFilename) throws IOException { jobConf.setJobName(jobName); jobConf.setInputFormat(NonSplitableSequenceFileInputFormat.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(jobConf, inDir); FileOutputFormat.setOutputPath(jobConf, outputPath); jobConf.setMapperClass(UtilsForTests.HalfWaitingMapper.class); jobConf.setReducerClass(IdentityReducer.class); jobConf.setOutputKeyClass(BytesWritable.class); jobConf.setOutputValueClass(BytesWritable.class); jobConf.setInputFormat(RandomInputFormat.class); jobConf.setNumMapTasks(numMaps); jobConf.setNumReduceTasks(numRed); jobConf.setJar("build/test/mapred/testjar/testjob.jar"); jobConf.set(getTaskSignalParameter(true), mapSignalFilename); jobConf.set(getTaskSignalParameter(false), redSignalFilename); }
Example #8
Source File: UtilsForTests.java From big-c with Apache License 2.0 | 6 votes |
/** * Configure a waiting job */ static void configureWaitingJobConf(JobConf jobConf, Path inDir, Path outputPath, int numMaps, int numRed, String jobName, String mapSignalFilename, String redSignalFilename) throws IOException { jobConf.setJobName(jobName); jobConf.setInputFormat(NonSplitableSequenceFileInputFormat.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(jobConf, inDir); FileOutputFormat.setOutputPath(jobConf, outputPath); jobConf.setMapperClass(UtilsForTests.HalfWaitingMapper.class); jobConf.setReducerClass(IdentityReducer.class); jobConf.setOutputKeyClass(BytesWritable.class); jobConf.setOutputValueClass(BytesWritable.class); jobConf.setInputFormat(RandomInputFormat.class); jobConf.setNumMapTasks(numMaps); jobConf.setNumReduceTasks(numRed); jobConf.setJar("build/test/mapred/testjar/testjob.jar"); jobConf.set(getTaskSignalParameter(true), mapSignalFilename); jobConf.set(getTaskSignalParameter(false), redSignalFilename); }
Example #9
Source File: UtilsForTests.java From big-c with Apache License 2.0 | 6 votes |
public static RunningJob runJobSucceed(JobConf conf, Path inDir, Path outDir) throws IOException { conf.setJobName("test-job-succeed"); conf.setMapperClass(IdentityMapper.class); conf.setReducerClass(IdentityReducer.class); RunningJob job = UtilsForTests.runJob(conf, inDir, outDir); long sleepCount = 0; while (!job.isComplete()) { try { if (sleepCount > 300) { // 30 seconds throw new IOException("Job didn't finish in 30 seconds"); } Thread.sleep(100); sleepCount++; } catch (InterruptedException e) { break; } } return job; }
Example #10
Source File: UtilsForTests.java From big-c with Apache License 2.0 | 6 votes |
public static RunningJob runJobFail(JobConf conf, Path inDir, Path outDir) throws IOException { conf.setJobName("test-job-fail"); conf.setMapperClass(FailMapper.class); conf.setReducerClass(IdentityReducer.class); conf.setMaxMapAttempts(1); RunningJob job = UtilsForTests.runJob(conf, inDir, outDir); long sleepCount = 0; while (!job.isComplete()) { try { if (sleepCount > 300) { // 30 seconds throw new IOException("Job didn't finish in 30 seconds"); } Thread.sleep(100); sleepCount++; } catch (InterruptedException e) { break; } } return job; }
Example #11
Source File: UtilsForTests.java From hadoop-gpu with Apache License 2.0 | 6 votes |
static RunningJob runJobFail(JobConf conf, Path inDir, Path outDir) throws IOException { conf.setJobName("test-job-fail"); conf.setMapperClass(FailMapper.class); conf.setReducerClass(IdentityReducer.class); RunningJob job = UtilsForTests.runJob(conf, inDir, outDir); while (!job.isComplete()) { try { Thread.sleep(100); } catch (InterruptedException e) { break; } } return job; }
Example #12
Source File: TestReduceTaskNoMapOutput.java From RDFS with Apache License 2.0 | 6 votes |
public static TaskCompletionEvent[] runJob(JobConf conf, Class mapperClass, boolean enableNoFetchEmptyMapOutputs) throws Exception { conf.setMapperClass(mapperClass); conf.setReducerClass(IdentityReducer.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setNumMapTasks(3); conf.setNumReduceTasks(1); conf.setInputFormat(FakeIF.class); conf.setBoolean("mapred.enable.no.fetch.map.outputs", enableNoFetchEmptyMapOutputs); FileInputFormat.setInputPaths(conf, new Path("/in")); final Path outp = new Path("/out"); FileOutputFormat.setOutputPath(conf, outp); RunningJob job = null; job = JobClient.runJob(conf); assertTrue(job.isSuccessful()); return job.getTaskCompletionEvents(0); }
Example #13
Source File: TestDatamerge.java From big-c with Apache License 2.0 | 6 votes |
public void testEmptyJoin() throws Exception { JobConf job = new JobConf(); Path base = cluster.getFileSystem().makeQualified(new Path("/empty")); Path[] src = { new Path(base,"i0"), new Path("i1"), new Path("i2") }; job.set("mapreduce.join.expr", CompositeInputFormat.compose("outer", Fake_IF.class, src)); job.setInputFormat(CompositeInputFormat.class); FileOutputFormat.setOutputPath(job, new Path(base, "out")); job.setMapperClass(IdentityMapper.class); job.setReducerClass(IdentityReducer.class); job.setOutputKeyClass(IncomparableKey.class); job.setOutputValueClass(NullWritable.class); JobClient.runJob(job); base.getFileSystem(job).delete(base, true); }
Example #14
Source File: TestMiniMRChildTask.java From RDFS with Apache License 2.0 | 6 votes |
/** * Launch tests * @param conf Configuration of the mapreduce job. * @param inDir input path * @param outDir output path * @param input Input text * @throws IOException */ public void launchTest(JobConf conf, Path inDir, Path outDir, String input) throws IOException { configure(conf, inDir, outDir, input, MapClass.class, IdentityReducer.class); FileSystem outFs = outDir.getFileSystem(conf); // Launch job with default option for temp dir. // i.e. temp dir is ./tmp JobClient.runJob(conf); outFs.delete(outDir, true); // Launch job by giving relative path to temp dir. conf.set("mapred.child.tmp", "../temp"); JobClient.runJob(conf); outFs.delete(outDir, true); // Launch job by giving absolute path to temp dir conf.set("mapred.child.tmp", "/tmp"); JobClient.runJob(conf); outFs.delete(outDir, true); }
Example #15
Source File: UtilsForTests.java From RDFS with Apache License 2.0 | 6 votes |
static RunningJob runJobKill(JobConf conf, Path inDir, Path outDir) throws IOException { conf.setJobName("test-job-kill"); conf.setMapperClass(KillMapper.class); conf.setReducerClass(IdentityReducer.class); RunningJob job = UtilsForTests.runJob(conf, inDir, outDir); while (job.getJobState() != JobStatus.RUNNING) { try { Thread.sleep(100); } catch (InterruptedException e) { break; } } job.killJob(); while (job.cleanupProgress() == 0.0f) { try { Thread.sleep(10); } catch (InterruptedException ie) { break; } } return job; }
Example #16
Source File: UtilsForTests.java From RDFS with Apache License 2.0 | 6 votes |
static RunningJob runJobFail(JobConf conf, Path inDir, Path outDir) throws IOException { conf.setJobName("test-job-fail"); conf.setMapperClass(FailMapper.class); conf.setReducerClass(IdentityReducer.class); RunningJob job = UtilsForTests.runJob(conf, inDir, outDir); while (!job.isComplete()) { try { Thread.sleep(100); } catch (InterruptedException e) { break; } } return job; }
Example #17
Source File: TestMiniMRChildTask.java From RDFS with Apache License 2.0 | 6 votes |
void runTestTaskEnv(JobConf conf, Path inDir, Path outDir) throws IOException { String input = "The input"; configure(conf, inDir, outDir, input, EnvCheckMapper.class, IdentityReducer.class); // test // - new SET of new var (MY_PATH) // - set of old var (HOME) // - append to an old var from modified env (LD_LIBRARY_PATH) // - append to an old var from tt's env (PATH) // - append to a new var (NEW_PATH) conf.set("mapred.child.env", "MY_PATH=/tmp,HOME=/tmp,LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/tmp," + "PATH=$PATH:/tmp,NEW_PATH=$NEW_PATH:/tmp"); conf.set("path", System.getenv("PATH")); RunningJob job = JobClient.runJob(conf); assertTrue("The environment checker job failed.", job.isSuccessful()); }
Example #18
Source File: UtilsForTests.java From RDFS with Apache License 2.0 | 6 votes |
/** * Configure a waiting job */ static void configureWaitingJobConf(JobConf jobConf, Path inDir, Path outputPath, int numMaps, int numRed, String jobName, String mapSignalFilename, String redSignalFilename) throws IOException { jobConf.setJobName(jobName); jobConf.setInputFormat(NonSplitableSequenceFileInputFormat.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(jobConf, inDir); FileOutputFormat.setOutputPath(jobConf, outputPath); jobConf.setMapperClass(UtilsForTests.HalfWaitingMapper.class); jobConf.setReducerClass(IdentityReducer.class); jobConf.setOutputKeyClass(BytesWritable.class); jobConf.setOutputValueClass(BytesWritable.class); jobConf.setInputFormat(RandomInputFormat.class); jobConf.setNumMapTasks(numMaps); jobConf.setNumReduceTasks(numRed); jobConf.setJar("build/test/testjar/testjob.jar"); jobConf.set(getTaskSignalParameter(true), mapSignalFilename); jobConf.set(getTaskSignalParameter(false), redSignalFilename); }
Example #19
Source File: TestRackAwareTaskPlacement.java From RDFS with Apache License 2.0 | 6 votes |
static RunningJob launchJob(JobConf jobConf, Path inDir, Path outputPath, int numMaps, String jobName) throws IOException { jobConf.setJobName(jobName); jobConf.setInputFormat(NonSplitableSequenceFileInputFormat.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(jobConf, inDir); FileOutputFormat.setOutputPath(jobConf, outputPath); jobConf.setMapperClass(IdentityMapper.class); jobConf.setReducerClass(IdentityReducer.class); jobConf.setOutputKeyClass(BytesWritable.class); jobConf.setOutputValueClass(BytesWritable.class); jobConf.setNumMapTasks(numMaps); jobConf.setNumReduceTasks(0); jobConf.setJar("build/test/testjar/testjob.jar"); return JobClient.runJob(jobConf); }
Example #20
Source File: TestDatamerge.java From RDFS with Apache License 2.0 | 6 votes |
public void testEmptyJoin() throws Exception { JobConf job = new JobConf(); Path base = cluster.getFileSystem().makeQualified(new Path("/empty")); Path[] src = { new Path(base,"i0"), new Path("i1"), new Path("i2") }; job.set("mapred.join.expr", CompositeInputFormat.compose("outer", Fake_IF.class, src)); job.setInputFormat(CompositeInputFormat.class); FileOutputFormat.setOutputPath(job, new Path(base, "out")); job.setMapperClass(IdentityMapper.class); job.setReducerClass(IdentityReducer.class); job.setOutputKeyClass(IncomparableKey.class); job.setOutputValueClass(NullWritable.class); JobClient.runJob(job); base.getFileSystem(job).delete(base, true); }
Example #21
Source File: UtilsForTests.java From hadoop-gpu with Apache License 2.0 | 6 votes |
static RunningJob runJobSucceed(JobConf conf, Path inDir, Path outDir) throws IOException { conf.setJobName("test-job-succeed"); conf.setMapperClass(IdentityMapper.class); conf.setReducerClass(IdentityReducer.class); RunningJob job = UtilsForTests.runJob(conf, inDir, outDir); while (!job.isComplete()) { try { Thread.sleep(100); } catch (InterruptedException e) { break; } } return job; }
Example #22
Source File: TestMultipleLevelCaching.java From big-c with Apache License 2.0 | 6 votes |
static RunningJob launchJob(JobConf jobConf, Path inDir, Path outputPath, int numMaps, String jobName) throws IOException { jobConf.setJobName(jobName); jobConf.setInputFormat(NonSplitableSequenceFileInputFormat.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(jobConf, inDir); FileOutputFormat.setOutputPath(jobConf, outputPath); jobConf.setMapperClass(IdentityMapper.class); jobConf.setReducerClass(IdentityReducer.class); jobConf.setOutputKeyClass(BytesWritable.class); jobConf.setOutputValueClass(BytesWritable.class); jobConf.setNumMapTasks(numMaps); jobConf.setNumReduceTasks(0); jobConf.setJar("build/test/mapred/testjar/testjob.jar"); return JobClient.runJob(jobConf); }
Example #23
Source File: TestMRAppWithCombiner.java From big-c with Apache License 2.0 | 5 votes |
@Test public void testCombinerShouldUpdateTheReporter() throws Exception { JobConf conf = new JobConf(mrCluster.getConfig()); int numMaps = 5; int numReds = 2; Path in = new Path(mrCluster.getTestWorkDir().getAbsolutePath(), "testCombinerShouldUpdateTheReporter-in"); Path out = new Path(mrCluster.getTestWorkDir().getAbsolutePath(), "testCombinerShouldUpdateTheReporter-out"); createInputOutPutFolder(in, out, numMaps); conf.setJobName("test-job-with-combiner"); conf.setMapperClass(IdentityMapper.class); conf.setCombinerClass(MyCombinerToCheckReporter.class); //conf.setJarByClass(MyCombinerToCheckReporter.class); conf.setReducerClass(IdentityReducer.class); DistributedCache.addFileToClassPath(TestMRJobs.APP_JAR, conf); conf.setOutputCommitter(CustomOutputCommitter.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(conf, in); FileOutputFormat.setOutputPath(conf, out); conf.setNumMapTasks(numMaps); conf.setNumReduceTasks(numReds); runJob(conf); }
Example #24
Source File: TestTaskFail.java From hadoop-gpu with Apache License 2.0 | 5 votes |
public RunningJob launchJob(JobConf conf, Path inDir, Path outDir, String input) throws IOException { // set up the input file system and write input text. FileSystem inFs = inDir.getFileSystem(conf); FileSystem outFs = outDir.getFileSystem(conf); outFs.delete(outDir, true); if (!inFs.mkdirs(inDir)) { throw new IOException("Mkdirs failed to create " + inDir.toString()); } { // write input into input file DataOutputStream file = inFs.create(new Path(inDir, "part-0")); file.writeBytes(input); file.close(); } // configure the mapred Job conf.setMapperClass(MapperClass.class); conf.setReducerClass(IdentityReducer.class); conf.setNumReduceTasks(0); FileInputFormat.setInputPaths(conf, inDir); FileOutputFormat.setOutputPath(conf, outDir); conf.setSpeculativeExecution(false); String TEST_ROOT_DIR = new Path(System.getProperty("test.build.data", "/tmp")).toString().replace(' ', '+'); conf.set("test.build.data", TEST_ROOT_DIR); // return the RunningJob handle. return new JobClient(conf).submitJob(conf); }
Example #25
Source File: TestMROldApiJobs.java From hadoop with Apache License 2.0 | 5 votes |
public static void runJobSucceed(JobConf conf, Path inDir, Path outDir) throws IOException, InterruptedException { conf.setJobName("test-job-succeed"); conf.setMapperClass(IdentityMapper.class); //conf.setJar(new File(MiniMRYarnCluster.APPJAR).getAbsolutePath()); conf.setReducerClass(IdentityReducer.class); boolean success = runJob(conf, inDir, outDir, 1 , 1); Assert.assertTrue("Job expected to succeed failed", success); }
Example #26
Source File: PipelineTest.java From hiped2 with Apache License 2.0 | 5 votes |
@Before public void setUp() { mapper1 = new IdentityMapper<Text, Text>(); reducer1 = new IdentityReducer<Text, Text>(); mapper2 = new IdentityMapper<Text, Text>(); reducer2 = new IdentityReducer<Text, Text>(); driver = new PipelineMapReduceDriver<Text, Text, Text, Text>(); driver.addMapReduce(new Pair<Mapper, Reducer>(mapper1, reducer1)); driver.addMapReduce(new Pair<Mapper, Reducer>(mapper2, reducer2)); }
Example #27
Source File: TestMapRed.java From RDFS with Apache License 2.0 | 5 votes |
public void testNullKeys() throws Exception { JobConf conf = new JobConf(TestMapRed.class); FileSystem fs = FileSystem.getLocal(conf); Path testdir = new Path( System.getProperty("test.build.data","/tmp")).makeQualified(fs); fs.delete(testdir, true); Path inFile = new Path(testdir, "nullin/blah"); SequenceFile.Writer w = SequenceFile.createWriter(fs, conf, inFile, NullWritable.class, Text.class, SequenceFile.CompressionType.NONE); Text t = new Text(); t.set("AAAAAAAAAAAAAA"); w.append(NullWritable.get(), t); t.set("BBBBBBBBBBBBBB"); w.append(NullWritable.get(), t); t.set("CCCCCCCCCCCCCC"); w.append(NullWritable.get(), t); t.set("DDDDDDDDDDDDDD"); w.append(NullWritable.get(), t); t.set("EEEEEEEEEEEEEE"); w.append(NullWritable.get(), t); t.set("FFFFFFFFFFFFFF"); w.append(NullWritable.get(), t); t.set("GGGGGGGGGGGGGG"); w.append(NullWritable.get(), t); t.set("HHHHHHHHHHHHHH"); w.append(NullWritable.get(), t); w.close(); FileInputFormat.setInputPaths(conf, inFile); FileOutputFormat.setOutputPath(conf, new Path(testdir, "nullout")); conf.setMapperClass(NullMapper.class); conf.setReducerClass(IdentityReducer.class); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(Text.class); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setNumReduceTasks(1); JobClient.runJob(conf); SequenceFile.Reader r = new SequenceFile.Reader(fs, new Path(testdir, "nullout/part-00000"), conf); String m = "AAAAAAAAAAAAAA"; for (int i = 1; r.next(NullWritable.get(), t); ++i) { assertTrue(t.toString() + " doesn't match " + m, m.equals(t.toString())); m = m.replace((char)('A' + i - 1), (char)('A' + i)); } }
Example #28
Source File: TestJobCleanup.java From hadoop with Apache License 2.0 | 5 votes |
private void configureJob(JobConf jc, String jobName, int maps, int reds, Path outDir) { jc.setJobName(jobName); jc.setInputFormat(TextInputFormat.class); jc.setOutputKeyClass(LongWritable.class); jc.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(jc, inDir); FileOutputFormat.setOutputPath(jc, outDir); jc.setMapperClass(IdentityMapper.class); jc.setReducerClass(IdentityReducer.class); jc.setNumMapTasks(maps); jc.setNumReduceTasks(reds); }
Example #29
Source File: TestJobCleanup.java From RDFS with Apache License 2.0 | 5 votes |
public void testJobDirctoryCleanup() throws Exception { try { conf = new JobConf(); FileSystem fileSys = FileSystem.get(conf); fileSys.delete(new Path(TEST_ROOT_DIR), true); cluster = new MiniMRCluster(1, "file:///", 1, null, new String[] {"host1"}, conf); JobConf jc = cluster.createJobConf(); jc.setJobName("TestJob"); Path inDir = new Path(TEST_ROOT_DIR, "test-input"); Path outDir = new Path(TEST_ROOT_DIR, "test-output"); String input = "Test\n"; DataOutputStream file = fileSys.create(new Path(inDir, "part-" + 0)); file.writeBytes(input); file.close(); FileInputFormat.setInputPaths(jc, inDir); FileOutputFormat.setOutputPath(jc, outDir); jc.setInputFormat(TextInputFormat.class); jc.setOutputKeyClass(LongWritable.class); jc.setOutputValueClass(Text.class); jc.setMapperClass(IdentityMapper.class); jc.setReducerClass(IdentityReducer.class); jc.setNumMapTasks(1); jc.setNumReduceTasks(1); JobClient jobClient = new JobClient(jc); RunningJob job = jobClient.submitJob(jc); JobID jobId = job.getID(); job.waitForCompletion(); cluster.getTaskTrackerRunner(0).getTaskTracker(); String subdir = TaskTracker.getLocalJobDir(jobId.toString()); File dir = new File(cluster.getTaskTrackerLocalDir(0) + "/" + subdir); assertEquals(null, dir.list()); } finally { if (cluster != null) { cluster.shutdown(); } } }
Example #30
Source File: ThreadedMapBenchmark.java From RDFS with Apache License 2.0 | 5 votes |
/** * Generate input data for the benchmark */ public static void generateInputData(int dataSizePerMap, int numSpillsPerMap, int numMapsPerHost, JobConf masterConf) throws Exception { JobConf job = new JobConf(masterConf, ThreadedMapBenchmark.class); job.setJobName("threaded-map-benchmark-random-writer"); job.setJarByClass(ThreadedMapBenchmark.class); job.setInputFormat(UtilsForTests.RandomInputFormat.class); job.setOutputFormat(SequenceFileOutputFormat.class); job.setMapperClass(Map.class); job.setReducerClass(IdentityReducer.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(BytesWritable.class); JobClient client = new JobClient(job); ClusterStatus cluster = client.getClusterStatus(); long totalDataSize = dataSizePerMap * numMapsPerHost * cluster.getTaskTrackers(); job.set("test.tmb.bytes_per_map", String.valueOf(dataSizePerMap * 1024 * 1024)); job.setNumReduceTasks(0); // none reduce job.setNumMapTasks(numMapsPerHost * cluster.getTaskTrackers()); FileOutputFormat.setOutputPath(job, INPUT_DIR); FileSystem fs = FileSystem.get(job); fs.delete(BASE_DIR, true); LOG.info("Generating random input for the benchmark"); LOG.info("Total data : " + totalDataSize + " mb"); LOG.info("Data per map: " + dataSizePerMap + " mb"); LOG.info("Number of spills : " + numSpillsPerMap); LOG.info("Number of maps per host : " + numMapsPerHost); LOG.info("Number of hosts : " + cluster.getTaskTrackers()); JobClient.runJob(job); // generates the input for the benchmark }