package ssdut.training.mapreduce.topten; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class TopTenJob { public static void main(String[] args) throws Exception { String namenode_ip = "192.168.17.10"; String hdfs = "hdfs://" + namenode_ip + ":9000"; Configuration conf = new Configuration(); conf.set("fs.defaultFS", hdfs); conf.set("mapreduce.app-submission.cross-platform", "true"); String jobName = "TopTenJob"; Job job = Job.getInstance(conf, jobName); job.setJarByClass(TopTenJob.class); job.setJar("export\\TopTen.jar"); job.setMapperClass(TopTenMapper.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(TopTenReducer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(1); //计算最终TopN,只能运行一个Reduce任务 String dataDir = "/expr/topten/data"; String outputDir = "/expr/topten/output"; Path inPath = new Path(hdfs + dataDir); Path outPath = new Path(hdfs + outputDir); FileInputFormat.addInputPath(job, inPath); FileOutputFormat.setOutputPath(job, outPath); FileSystem fs = FileSystem.get(conf); if(fs.exists(outPath)) { fs.delete(outPath, true); } System.out.println( "Job: " + jobName + " is running..."); if(job.waitForCompletion(true)) { System.out.println("success!"); System.exit(0); } else { System.out.println("failed!"); System.exit(1); } } }