Java Code Examples for org.apache.hadoop.mapred.JobConf#setNumReduceTasks()
The following examples show how to use
org.apache.hadoop.mapred.JobConf#setNumReduceTasks() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: NNBench.java From hadoop with Apache License 2.0 | 8 votes |
/** * Run the test * * @throws IOException on error */ public static void runTests() throws IOException { config.setLong("io.bytes.per.checksum", bytesPerChecksum); JobConf job = new JobConf(config, NNBench.class); job.setJobName("NNBench-" + operation); FileInputFormat.setInputPaths(job, new Path(baseDir, CONTROL_DIR_NAME)); job.setInputFormat(SequenceFileInputFormat.class); // Explicitly set number of max map attempts to 1. job.setMaxMapAttempts(1); // Explicitly turn off speculative execution job.setSpeculativeExecution(false); job.setMapperClass(NNBenchMapper.class); job.setReducerClass(NNBenchReducer.class); FileOutputFormat.setOutputPath(job, new Path(baseDir, OUTPUT_DIR_NAME)); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks((int) numberOfReduces); JobClient.runJob(job); }
Example 2
Source File: TeraGen.java From hadoop-gpu with Apache License 2.0 | 6 votes |
/** * @param args the cli arguments */ public int run(String[] args) throws IOException { JobConf job = (JobConf) getConf(); setNumberOfRows(job, Long.parseLong(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraGen"); job.setJarByClass(TeraGen.class); job.setMapperClass(SortGenMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormat(RangeInputFormat.class); job.setOutputFormat(TeraOutputFormat.class); JobClient.runJob(job); return 0; }
Example 3
Source File: SliveTest.java From big-c with Apache License 2.0 | 6 votes |
/** * Sets up a job conf for the given job using the given config object. Ensures * that the correct input format is set, the mapper and and reducer class and * the input and output keys and value classes along with any other job * configuration. * * @param config * @return JobConf representing the job to be ran * @throws IOException */ private JobConf getJob(ConfigExtractor config) throws IOException { JobConf job = new JobConf(config.getConfig(), SliveTest.class); job.setInputFormat(DummyInputFormat.class); FileOutputFormat.setOutputPath(job, config.getOutputPath()); job.setMapperClass(SliveMapper.class); job.setPartitionerClass(SlivePartitioner.class); job.setReducerClass(SliveReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setOutputFormat(TextOutputFormat.class); TextOutputFormat.setCompressOutput(job, false); job.setNumReduceTasks(config.getReducerAmount()); job.setNumMapTasks(config.getMapAmount()); return job; }
Example 4
Source File: JobControlTestUtils.java From RDFS with Apache License 2.0 | 6 votes |
/** * Creates a simple copy job. * * @param indirs List of input directories. * @param outdir Output directory. * @return JobConf initialised for a simple copy job. * @throws Exception If an error occurs creating job configuration. */ static JobConf createCopyJob(List<Path> indirs, Path outdir) throws Exception { Configuration defaults = new Configuration(); JobConf theJob = new JobConf(defaults, TestJobControl.class); theJob.setJobName("DataMoveJob"); FileInputFormat.setInputPaths(theJob, indirs.toArray(new Path[0])); theJob.setMapperClass(DataCopy.class); FileOutputFormat.setOutputPath(theJob, outdir); theJob.setOutputKeyClass(Text.class); theJob.setOutputValueClass(Text.class); theJob.setReducerClass(DataCopy.class); theJob.setNumMapTasks(12); theJob.setNumReduceTasks(4); return theJob; }
Example 5
Source File: TeraValidate.java From hadoop-book with Apache License 2.0 | 6 votes |
public int run(String[] args) throws Exception { JobConf job = (JobConf) getConf(); TeraInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraValidate"); job.setJarByClass(TeraValidate.class); job.setMapperClass(ValidateMapper.class); job.setReducerClass(ValidateReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // force a single reducer job.setNumReduceTasks(1); // force a single split job.setLong("mapred.min.split.size", Long.MAX_VALUE); job.setInputFormat(TeraInputFormat.class); JobClient.runJob(job); return 0; }
Example 6
Source File: TestTableMapReduceUtil.java From hbase with Apache License 2.0 | 6 votes |
@Test @SuppressWarnings("deprecation") public void shoudBeValidMapReduceEvaluation() throws Exception { Configuration cfg = UTIL.getConfiguration(); JobConf jobConf = new JobConf(cfg); try { jobConf.setJobName("process row task"); jobConf.setNumReduceTasks(1); TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY), ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class, jobConf); TableMapReduceUtil.initTableReduceJob(TABLE_NAME, ClassificatorRowReduce.class, jobConf); RunningJob job = JobClient.runJob(jobConf); assertTrue(job.isSuccessful()); } finally { if (jobConf != null) FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir"))); } }
Example 7
Source File: TestTableMapReduceUtil.java From hbase with Apache License 2.0 | 6 votes |
/** * Check what the given number of reduce tasks for the given job configuration * does not exceed the number of regions for the given table. */ @Test public void shouldNumberOfReduceTaskNotExceedNumberOfRegionsForGivenTable() throws IOException { Assert.assertNotNull(presidentsTable); Configuration cfg = UTIL.getConfiguration(); JobConf jobConf = new JobConf(cfg); TableMapReduceUtil.setNumReduceTasks(TABLE_NAME, jobConf); TableMapReduceUtil.limitNumReduceTasks(TABLE_NAME, jobConf); TableMapReduceUtil.setScannerCaching(jobConf, 100); assertEquals(1, jobConf.getNumReduceTasks()); assertEquals(100, jobConf.getInt("hbase.client.scanner.caching", 0)); jobConf.setNumReduceTasks(10); TableMapReduceUtil.setNumMapTasks(TABLE_NAME, jobConf); TableMapReduceUtil.limitNumReduceTasks(TABLE_NAME, jobConf); assertEquals(1, jobConf.getNumReduceTasks()); }
Example 8
Source File: TeraGen.java From RDFS with Apache License 2.0 | 6 votes |
/** * @param args the cli arguments */ public int run(String[] args) throws IOException { JobConf job = (JobConf) getConf(); setNumberOfRows(job, Long.parseLong(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraGen"); job.setJarByClass(TeraGen.class); job.setMapperClass(SortGenMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormat(RangeInputFormat.class); job.setOutputFormat(TeraOutputFormat.class); JobClient.runJob(job); return 0; }
Example 9
Source File: DistCp.java From RDFS with Apache License 2.0 | 6 votes |
private static JobConf createJobConfForCopyByChunk(Configuration conf) { JobConf jobconf = new JobConf(conf, DistCp.class); jobconf.setJobName(NAME); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. jobconf.setMapSpeculativeExecution(false); jobconf.setOutputKeyClass(Text.class); jobconf.setOutputValueClass(Text.class); jobconf.setInputFormat(CopyByChunkInputFormat.class); jobconf.setMapperClass(CopyFilesByChunkMapper.class); jobconf.setNumReduceTasks(0); return jobconf; }
Example 10
Source File: UtilsForTests.java From RDFS with Apache License 2.0 | 6 votes |
/** * Configure a waiting job */ static void configureWaitingJobConf(JobConf jobConf, Path inDir, Path outputPath, int numMaps, int numRed, String jobName, String mapSignalFilename, String redSignalFilename) throws IOException { jobConf.setJobName(jobName); jobConf.setInputFormat(NonSplitableSequenceFileInputFormat.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(jobConf, inDir); FileOutputFormat.setOutputPath(jobConf, outputPath); jobConf.setMapperClass(UtilsForTests.HalfWaitingMapper.class); jobConf.setReducerClass(IdentityReducer.class); jobConf.setOutputKeyClass(BytesWritable.class); jobConf.setOutputValueClass(BytesWritable.class); jobConf.setInputFormat(RandomInputFormat.class); jobConf.setNumMapTasks(numMaps); jobConf.setNumReduceTasks(numRed); jobConf.setJar("build/test/testjar/testjob.jar"); jobConf.set(getTaskSignalParameter(true), mapSignalFilename); jobConf.set(getTaskSignalParameter(false), redSignalFilename); }
Example 11
Source File: TestMROldApiJobs.java From hadoop with Apache License 2.0 | 5 votes |
static boolean runJob(JobConf conf, Path inDir, Path outDir, int numMaps, int numReds) throws IOException, InterruptedException { FileSystem fs = FileSystem.get(conf); if (fs.exists(outDir)) { fs.delete(outDir, true); } if (!fs.exists(inDir)) { fs.mkdirs(inDir); } String input = "The quick brown fox\n" + "has many silly\n" + "red fox sox\n"; for (int i = 0; i < numMaps; ++i) { DataOutputStream file = fs.create(new Path(inDir, "part-" + i)); file.writeBytes(input); file.close(); } DistributedCache.addFileToClassPath(TestMRJobs.APP_JAR, conf, fs); conf.setOutputCommitter(CustomOutputCommitter.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(conf, inDir); FileOutputFormat.setOutputPath(conf, outDir); conf.setNumMapTasks(numMaps); conf.setNumReduceTasks(numReds); JobClient jobClient = new JobClient(conf); RunningJob job = jobClient.submitJob(conf); return jobClient.monitorAndPrintJob(conf, job); }
Example 12
Source File: DistCh.java From hadoop with Apache License 2.0 | 5 votes |
private static JobConf createJobConf(Configuration conf) { JobConf jobconf = new JobConf(conf, DistCh.class); jobconf.setJobName(NAME); jobconf.setMapSpeculativeExecution(false); jobconf.setInputFormat(ChangeInputFormat.class); jobconf.setOutputKeyClass(Text.class); jobconf.setOutputValueClass(Text.class); jobconf.setMapperClass(ChangeFilesMapper.class); jobconf.setNumReduceTasks(0); return jobconf; }
Example 13
Source File: UpdateColumnJob.java From indexr with Apache License 2.0 | 5 votes |
public boolean doRun(Config upcolConfig) throws Exception { JobConf jobConf = new JobConf(getConf(), UpdateColumnJob.class); jobConf.setKeepFailedTaskFiles(false); jobConf.setNumReduceTasks(0); String jobName = String.format("indexr-upcol-%s-%s-%s", upcolConfig.table, LocalDateTime.now().format(timeFormatter), RandomStringUtils.randomAlphabetic(5)); jobConf.setJobName(jobName); jobConf.set(CONFKEY, JsonUtil.toJson(upcolConfig)); Path workDir = new Path(jobConf.getWorkingDirectory(), jobName); jobConf.setWorkingDirectory(workDir); Job job = Job.getInstance(jobConf); job.setInputFormatClass(SegmentInputFormat.class); job.setMapperClass(UpColSegmentMapper.class); job.setJarByClass(UpdateColumnJob.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setMapSpeculativeExecution(false); job.setOutputFormatClass(UpColSegmentOutputFormat.class); job.submit(); boolean ok = job.waitForCompletion(true); if (!ok) { TaskReport[] reports = job.getTaskReports(TaskType.MAP); if (reports != null) { for (TaskReport report : reports) { log.error("Error in task [%s] : %s", report.getTaskId(), Arrays.toString(report.getDiagnostics())); } } } return ok; }
Example 14
Source File: TestMapProcessor.java From incubator-tez with Apache License 2.0 | 5 votes |
public void setUpJobConf(JobConf job) { job.set(TezRuntimeFrameworkConfigs.LOCAL_DIRS, workDir.toString()); job.set(MRConfig.LOCAL_DIR, workDir.toString()); job.setClass( Constants.TEZ_RUNTIME_TASK_OUTPUT_MANAGER, TezLocalTaskOutputFiles.class, TezTaskOutput.class); job.set(TezJobConfig.TEZ_RUNTIME_PARTITIONER_CLASS, MRPartitioner.class.getName()); job.setNumReduceTasks(1); }
Example 15
Source File: TestMapProcessor.java From tez with Apache License 2.0 | 5 votes |
public void setUpJobConf(JobConf job) { job.set(TezRuntimeFrameworkConfigs.LOCAL_DIRS, workDir.toString()); job.set(MRConfig.LOCAL_DIR, workDir.toString()); job.setClass( Constants.TEZ_RUNTIME_TASK_OUTPUT_MANAGER, TezTaskOutputFiles.class, TezTaskOutput.class); job.set(TezRuntimeConfiguration.TEZ_RUNTIME_PARTITIONER_CLASS, MRPartitioner.class.getName()); job.setNumReduceTasks(1); }
Example 16
Source File: CloudBurst.java From emr-sample-apps with Apache License 2.0 | 4 votes |
public static RunningJob alignall(String refpath, String qrypath, String outpath, int MIN_READ_LEN, int MAX_READ_LEN, int K, int ALLOW_DIFFERENCES, boolean FILTER_ALIGNMENTS, int NUM_MAP_TASKS, int NUM_REDUCE_TASKS, int BLOCK_SIZE, int REDUNDANCY) throws IOException, Exception { int SEED_LEN = MIN_READ_LEN / (K+1); int FLANK_LEN = MAX_READ_LEN-SEED_LEN+K; System.out.println("refath: " + refpath); System.out.println("qrypath: " + qrypath); System.out.println("outpath: " + outpath); System.out.println("MIN_READ_LEN: " + MIN_READ_LEN); System.out.println("MAX_READ_LEN: " + MAX_READ_LEN); System.out.println("K: " + K); System.out.println("SEED_LEN: " + SEED_LEN); System.out.println("FLANK_LEN: " + FLANK_LEN); System.out.println("ALLOW_DIFFERENCES: " + ALLOW_DIFFERENCES); System.out.println("FILTER_ALIGNMENTS: " + FILTER_ALIGNMENTS); System.out.println("NUM_MAP_TASKS: " + NUM_MAP_TASKS); System.out.println("NUM_REDUCE_TASKS: " + NUM_REDUCE_TASKS); System.out.println("BLOCK_SIZE: " + BLOCK_SIZE); System.out.println("REDUNDANCY: " + REDUNDANCY); JobConf conf = new JobConf(MerReduce.class); conf.setJobName("CloudBurst"); conf.setNumMapTasks(NUM_MAP_TASKS); conf.setNumReduceTasks(NUM_REDUCE_TASKS); FileInputFormat.addInputPath(conf, new Path(refpath)); FileInputFormat.addInputPath(conf, new Path(qrypath)); conf.set("refpath", refpath); conf.set("qrypath", qrypath); conf.set("MIN_READ_LEN", Integer.toString(MIN_READ_LEN)); conf.set("MAX_READ_LEN", Integer.toString(MAX_READ_LEN)); conf.set("K", Integer.toString(K)); conf.set("SEED_LEN", Integer.toString(SEED_LEN)); conf.set("FLANK_LEN", Integer.toString(FLANK_LEN)); conf.set("ALLOW_DIFFERENCES", Integer.toString(ALLOW_DIFFERENCES)); conf.set("BLOCK_SIZE", Integer.toString(BLOCK_SIZE)); conf.set("REDUNDANCY", Integer.toString(REDUNDANCY)); conf.set("FILTER_ALIGNMENTS", (FILTER_ALIGNMENTS ? "1" : "0")); conf.setMapperClass(MapClass.class); conf.setInputFormat(SequenceFileInputFormat.class); conf.setMapOutputKeyClass(BytesWritable.class); conf.setMapOutputValueClass(BytesWritable.class); conf.setReducerClass(ReduceClass.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(BytesWritable.class); conf.setOutputFormat(SequenceFileOutputFormat.class); Path oPath = new Path(outpath); FileOutputFormat.setOutputPath(conf, oPath); System.err.println(" Removing old results"); FileSystem.get(conf).delete(oPath); RunningJob rj = JobClient.runJob(conf); System.err.println("CloudBurst Finished"); return rj; }
Example 17
Source File: RandomTextWriter.java From hadoop-gpu with Apache License 2.0 | 4 votes |
/** * This is the main routine for launching a distributed random write job. * It runs 10 maps/node and each node writes 1 gig of data to a DFS file. * The reduce doesn't do anything. * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { return printUsage(); } JobConf job = new JobConf(getConf()); job.setJarByClass(RandomTextWriter.class); job.setJobName("random-text-writer"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormat(RandomWriter.RandomInputFormat.class); job.setMapperClass(Map.class); JobClient client = new JobClient(job); ClusterStatus cluster = client.getClusterStatus(); int numMapsPerHost = job.getInt("test.randomtextwrite.maps_per_host", 10); long numBytesToWritePerMap = job.getLong("test.randomtextwrite.bytes_per_map", 1*1024*1024*1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have test.randomtextwrite.bytes_per_map set to 0"); return -2; } long totalBytesToWrite = job.getLong("test.randomtextwrite.total_bytes", numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; job.setLong("test.randomtextwrite.bytes_per_map", totalBytesToWrite); } Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; List<String> otherArgs = new ArrayList<String>(); for(int i=0; i < args.length; ++i) { try { if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else { otherArgs.add(args[i]); } } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i-1]); return printUsage(); // exits } } job.setOutputFormat(outputFormatClass); FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0))); job.setNumMapTasks(numMaps); System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); JobClient.runJob(job); Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) /1000 + " seconds."); return 0; }
Example 18
Source File: FreeGenerator.java From anthelion with Apache License 2.0 | 4 votes |
public int run(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: FreeGenerator <inputDir> <segmentsDir> [-filter] [-normalize]"); System.err.println("\tinputDir\tinput directory containing one or more input files."); System.err.println("\t\tEach text file contains a list of URLs, one URL per line"); System.err.println("\tsegmentsDir\toutput directory, where new segment will be created"); System.err.println("\t-filter\trun current URLFilters on input URLs"); System.err.println("\t-normalize\trun current URLNormalizers on input URLs"); return -1; } boolean filter = false; boolean normalize = false; if (args.length > 2) { for (int i = 2; i < args.length; i++) { if (args[i].equals("-filter")) { filter = true; } else if (args[i].equals("-normalize")) { normalize = true; } else { LOG.error("Unknown argument: " + args[i] + ", exiting ..."); return -1; } } } SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); long start = System.currentTimeMillis(); LOG.info("FreeGenerator: starting at " + sdf.format(start)); JobConf job = new NutchJob(getConf()); job.setBoolean(FILTER_KEY, filter); job.setBoolean(NORMALIZE_KEY, normalize); FileInputFormat.addInputPath(job, new Path(args[0])); job.setInputFormat(TextInputFormat.class); job.setMapperClass(FG.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Generator.SelectorEntry.class); job.setPartitionerClass(URLPartitioner.class); job.setReducerClass(FG.class); String segName = Generator.generateSegmentName(); job.setNumReduceTasks(job.getNumMapTasks()); job.setOutputFormat(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(CrawlDatum.class); job.setOutputKeyComparatorClass(Generator.HashComparator.class); FileOutputFormat.setOutputPath(job, new Path(args[1], new Path(segName, CrawlDatum.GENERATE_DIR_NAME))); try { JobClient.runJob(job); } catch (Exception e) { LOG.error("FAILED: " + StringUtils.stringifyException(e)); return -1; } long end = System.currentTimeMillis(); LOG.info("FreeGenerator: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end)); return 0; }
Example 19
Source File: IndexUpdater.java From RDFS with Apache License 2.0 | 4 votes |
JobConf createJob(Configuration conf, Path[] inputPaths, Path outputPath, int numMapTasks, Shard[] shards) throws IOException { // set the starting generation for each shard // when a reduce task fails, a new reduce task // has to know where to re-start setShardGeneration(conf, shards); // iconf.set sets properties in conf IndexUpdateConfiguration iconf = new IndexUpdateConfiguration(conf); Shard.setIndexShards(iconf, shards); // MapTask.MapOutputBuffer uses "io.sort.mb" to decide its max buffer size // (max buffer size = 1/2 * "io.sort.mb"). // Here we half-en "io.sort.mb" because we use the other half memory to // build an intermediate form/index in Combiner. iconf.setIOSortMB(iconf.getIOSortMB() / 2); // create the job configuration JobConf jobConf = new JobConf(conf, IndexUpdater.class); jobConf.setJobName(this.getClass().getName() + "_" + System.currentTimeMillis()); // provided by application FileInputFormat.setInputPaths(jobConf, inputPaths); FileOutputFormat.setOutputPath(jobConf, outputPath); jobConf.setNumMapTasks(numMapTasks); // already set shards jobConf.setNumReduceTasks(shards.length); jobConf.setInputFormat(iconf.getIndexInputFormatClass()); Path[] inputs = FileInputFormat.getInputPaths(jobConf); StringBuilder buffer = new StringBuilder(inputs[0].toString()); for (int i = 1; i < inputs.length; i++) { buffer.append(","); buffer.append(inputs[i].toString()); } LOG.info("mapred.input.dir = " + buffer.toString()); LOG.info("mapred.output.dir = " + FileOutputFormat.getOutputPath(jobConf).toString()); LOG.info("mapred.map.tasks = " + jobConf.getNumMapTasks()); LOG.info("mapred.reduce.tasks = " + jobConf.getNumReduceTasks()); LOG.info(shards.length + " shards = " + iconf.getIndexShards()); // better if we don't create the input format instance LOG.info("mapred.input.format.class = " + jobConf.getInputFormat().getClass().getName()); // set by the system jobConf.setMapOutputKeyClass(IndexUpdateMapper.getMapOutputKeyClass()); jobConf.setMapOutputValueClass(IndexUpdateMapper.getMapOutputValueClass()); jobConf.setOutputKeyClass(IndexUpdateReducer.getOutputKeyClass()); jobConf.setOutputValueClass(IndexUpdateReducer.getOutputValueClass()); jobConf.setMapperClass(IndexUpdateMapper.class); jobConf.setPartitionerClass(IndexUpdatePartitioner.class); jobConf.setCombinerClass(IndexUpdateCombiner.class); jobConf.setReducerClass(IndexUpdateReducer.class); jobConf.setOutputFormat(IndexUpdateOutputFormat.class); return jobConf; }
Example 20
Source File: RandomTextWriter.java From RDFS with Apache License 2.0 | 4 votes |
/** * This is the main routine for launching a distributed random write job. * It runs 10 maps/node and each node writes 1 gig of data to a DFS file. * The reduce doesn't do anything. * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { return printUsage(); } JobConf job = new JobConf(getConf()); job.setJarByClass(RandomTextWriter.class); job.setJobName("random-text-writer"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormat(RandomWriter.RandomInputFormat.class); job.setMapperClass(Map.class); JobClient client = new JobClient(job); ClusterStatus cluster = client.getClusterStatus(); int numMapsPerHost = job.getInt("test.randomtextwrite.maps_per_host", 10); long numBytesToWritePerMap = job.getLong("test.randomtextwrite.bytes_per_map", 1*1024*1024*1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have test.randomtextwrite.bytes_per_map set to 0"); return -2; } long totalBytesToWrite = job.getLong("test.randomtextwrite.total_bytes", numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; job.setLong("test.randomtextwrite.bytes_per_map", totalBytesToWrite); } Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; List<String> otherArgs = new ArrayList<String>(); for(int i=0; i < args.length; ++i) { try { if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else { otherArgs.add(args[i]); } } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i-1]); return printUsage(); // exits } } job.setOutputFormat(outputFormatClass); FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0))); job.setNumMapTasks(numMaps); System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); JobClient.runJob(job); Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) /1000 + " seconds."); return 0; }