Java Code Examples for org.apache.hadoop.mapreduce.Job#setJar()

The following examples show how to use org.apache.hadoop.mapreduce.Job#setJar() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DistCacheConfigurer.java    From titan1withtp3.1 with Apache License 2.0 6 votes vote down vote up
@Override
public void configure(Job job) throws IOException {

    Configuration conf = job.getConfiguration();
    FileSystem localFS = FileSystem.getLocal(conf);
    FileSystem jobFS = FileSystem.get(conf);

    for (Path p : getLocalPaths()) {
        Path stagedPath = uploadFileIfNecessary(localFS, p, jobFS);
        // Calling this method decompresses the archive and makes Hadoop
        // handle its classfiles individually.  This leads to crippling
        // overhead times (10+ seconds) even with the LocalJobRunner
        // courtesy of o.a.h.yarn.util.FSDownload.changePermissions
        // copying and chmodding each classfile copy file individually.
        //job.addArchiveToClassPath(p);
        // Just add the compressed archive instead:
        job.addFileToClassPath(stagedPath);
    }

    // We don't really need to set a mapred job jar here,
    // but doing so suppresses a warning
    String mj = getMapredJar();
    if (null != mj)
        job.setJar(mj);
}
 
Example 2
Source File: Step2.java    From MapReduce-Demo with MIT License 6 votes vote down vote up
public static boolean run(Configuration config, Map<String, String> paths) 
		throws IOException, ClassNotFoundException, InterruptedException {
	String jobName = "step2";
	Job job = Job.getInstance(config, jobName);
	job.setJarByClass(Step2.class);
	job.setJar("export\\ItemCF.jar");
	job.setMapperClass(Step2_Mapper.class);
	job.setReducerClass(Step2_Reducer.class);
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(Text.class);

	Path inPath = new Path(paths.get("Step2Input"));
	Path outpath = new Path(paths.get("Step2Output"));
	FileInputFormat.addInputPath(job, inPath);
	FileOutputFormat.setOutputPath(job, outpath);		
	FileSystem fs = FileSystem.get(config);
	if (fs.exists(outpath)) {
		fs.delete(outpath, true);
	}
	
	return job.waitForCompletion(true);	
}
 
Example 3
Source File: JobControlRun.java    From MapReduce-Demo with MIT License 4 votes vote down vote up
public static void main(String[] args) throws IOException {
	String namenode_ip = "192.168.17.10";
	String hdfs = "hdfs://" + namenode_ip + ":9000";			
	Configuration conf = new Configuration();		
	conf.set("fs.defaultFS", hdfs);
	conf.set("mapreduce.app-submission.cross-platform", "true");
	
	Job job1 = Job.getInstance(conf,"Decompose");
	job1.setJarByClass(JobControlRun.class);
	job1.setJar("export\\mutualFriend.jar");
	job1.setMapperClass(DecomposeFriendsMapper.class);
	job1.setReducerClass(DecomposeFriendsReducer.class);
	job1.setOutputKeyClass(Text.class);
	job1.setOutputValueClass(Text.class);
	
	Path input = new Path(hdfs+"/workspace/mutualFriends/data");
	Path output1 = new Path(hdfs+"/workspace/mutualFriends/output_Dec");
	FileInputFormat.addInputPath(job1, input);
	FileOutputFormat.setOutputPath(job1, output1);
	FileSystem fs = FileSystem.get(conf);
	if (fs.exists(output1)) {
		fs.delete(output1, true);
		System.out.println("我被删了");// 打印可见只被删了一次,有点怪
	}
	// ControlledJob作业控制容器
	ControlledJob ctrJob1=new ControlledJob(conf);
	ctrJob1.setJob(job1);// job1加入控制容器
	
	Job job2 = Job.getInstance(conf, "Merge");
	job2.setJarByClass(JobControlRun.class);
	job2.setJar("export\\mutualFriend.jar");
	job2.setMapperClass(MergeFriendsMapper.class);
	job2.setReducerClass(MergeFriendsReducer.class);
	job2.setOutputKeyClass(Text.class);
	job2.setOutputValueClass(Text.class);
	
	Path input2 = new Path(hdfs+"/workspace/mutualFriends/output_Dec");
	Path output2 = new Path(hdfs+"/workspace/mutualFriends/output_Meg");
	FileInputFormat.addInputPath(job2, input2);
	FileOutputFormat.setOutputPath(job2, output2);
	if (fs.exists(output2)) {
		fs.delete(output2, true);
	}
	ControlledJob ctrJob2 = new ControlledJob(conf);
	ctrJob2.setJob(job2);// job2加入作业控制容器
	
	// 添加作业依赖,表明job2依赖job1执行
	ctrJob2.addDependingJob(ctrJob1);
	
	// 定义作业主控制容器,监控、调度job1,job2
	JobControl jobControl=new JobControl("JobControl");
	jobControl.addJob(ctrJob1);
	jobControl.addJob(ctrJob2);
	// 启动作业线程
	Thread T=new Thread(jobControl);
	T.start();
	while(true){
		if(jobControl.allFinished()){// 等待作业全部结束
			System.out.println(jobControl.getSuccessfulJobList());// 打印成功job信息
			jobControl.stop();
			break;
		}
	}
	/**
	 * 打印控制信息如下
	 * [job name:	Decompose
		job id:	JobControl0
		job state:	SUCCESS
		job mapred id:	job_local445604445_0001
		job message:	just initialized
		job has no depending job:	
		, job name:	Merge
		job id:	JobControl1
		job state:	SUCCESS
		job mapred id:	job_local1897659504_0002
		job message:	just initialized
		job has 1 dependeng jobs:
			 depending job 0:	Decompose
		]
	 */
}
 
Example 4
Source File: MedianStdDevJob.java    From MapReduce-Demo with MIT License 4 votes vote down vote up
public static void main(String[] args) throws Exception {
	//1.设置HDFS配置信息
	String namenode_ip = "192.168.17.10";
	String hdfs = "hdfs://" + namenode_ip + ":9000";			
	Configuration conf = new Configuration();
	conf.set("fs.defaultFS", hdfs);
	conf.set("mapreduce.app-submission.cross-platform", "true");

	//2.设置MapReduce作业配置信息
	String jobName = "MedianStdDevJob";					//作业名称
	Job job = Job.getInstance(conf, jobName);
	job.setJarByClass(MedianStdDevJob.class);			//指定运行时作业类
	job.setJar("export\\MedianStdDevJob.jar");			//指定本地jar包
	job.setMapperClass(MedianStdDevMapper.class);		//指定Mapper类
	job.setMapOutputKeyClass(IntWritable.class);		//设置Mapper输出Key类型
	job.setMapOutputValueClass(IntWritable.class);		//设置Mapper输出Value类型
	job.setReducerClass(MedianStdDevReducer.class);		//指定Reducer类
	job.setOutputKeyClass(IntWritable.class);			//设置Reduce输出Key类型
	job.setOutputValueClass(MedianStdDevTuple.class); 	//设置Reduce输出Value类型
	
	//3.设置作业输入和输出路径
	String dataDir = "/expr/medianstddev/data";			//实验数据目录	
	String outputDir = "/expr/medianstddev/output";		//实验输出目录
	Path inPath = new Path(hdfs + dataDir);
	Path outPath = new Path(hdfs + outputDir);
	FileInputFormat.addInputPath(job, inPath);
	FileOutputFormat.setOutputPath(job, outPath);
	FileSystem fs = FileSystem.get(conf);
	if(fs.exists(outPath)) {
		fs.delete(outPath, true);
	}
	
	//4.运行作业
	System.out.println("Job: " + jobName + " is running...");
	if(job.waitForCompletion(true)) {
		System.out.println("success!");
		System.exit(0);
	} else {
		System.out.println("failed!");
		System.exit(1);
	}
}
 
Example 5
Source File: TestMRJobs.java    From hadoop with Apache License 2.0 4 votes vote down vote up
public void _testDistributedCache(String jobJarPath) throws Exception {
  if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
    LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
         + " not found. Not running test.");
    return;
  }

  // Create a temporary file of length 1.
  Path first = createTempFile("distributed.first", "x");
  // Create two jars with a single file inside them.
  Path second =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.second.jar"), 2);
  Path third =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.third.jar"), 3);
  Path fourth =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.fourth.jar"), 4);

  Job job = Job.getInstance(mrCluster.getConfig());
  
  // Set the job jar to a new "dummy" jar so we can check that its extracted 
  // properly
  job.setJar(jobJarPath);
  // Because the job jar is a "dummy" jar, we need to include the jar with
  // DistributedCacheChecker or it won't be able to find it
  Path distributedCacheCheckerJar = new Path(
          JarFinder.getJar(DistributedCacheChecker.class));
  job.addFileToClassPath(distributedCacheCheckerJar.makeQualified(
          localFs.getUri(), distributedCacheCheckerJar.getParent()));
  
  job.setMapperClass(DistributedCacheChecker.class);
  job.setOutputFormatClass(NullOutputFormat.class);

  FileInputFormat.setInputPaths(job, first);
  // Creates the Job Configuration
  job.addCacheFile(
      new URI(first.toUri().toString() + "#distributed.first.symlink"));
  job.addFileToClassPath(second);
  // The AppMaster jar itself
  job.addFileToClassPath(
          APP_JAR.makeQualified(localFs.getUri(), APP_JAR.getParent())); 
  job.addArchiveToClassPath(third);
  job.addCacheArchive(fourth.toUri());
  job.setMaxMapAttempts(1); // speed up failures

  job.submit();
  String trackingUrl = job.getTrackingURL();
  String jobId = job.getJobID().toString();
  Assert.assertTrue(job.waitForCompletion(false));
  Assert.assertTrue("Tracking URL was " + trackingUrl +
                    " but didn't Match Job ID " + jobId ,
        trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
}
 
Example 6
Source File: MultInput.java    From MapReduce-Demo with MIT License 4 votes vote down vote up
public static void main(String[] args) throws Exception {		
	//1.设置HDFS配置信息
	String namenode_ip = "192.168.17.10";
	String hdfs = "hdfs://" + namenode_ip + ":9000";			
	Configuration conf = new Configuration();
	conf.set("fs.defaultFS", hdfs);
	conf.set("mapreduce.app-submission.cross-platform", "true");

	//2.设置MapReduce作业配置信息
	String jobName = "MultInput";					//作业名称
	Job job = Job.getInstance(conf, jobName);
	job.setJarByClass(MultInput.class);				//指定运行时作业类
	job.setJar("export\\MultInput.jar");			//指定本地jar包
	job.setMapperClass(MultInputMapper.class);		//指定Mapper类
	job.setMapOutputKeyClass(Text.class);			//设置Mapper输出Key类型
	job.setMapOutputValueClass(IntWritable.class);	//设置Mapper输出Value类型
	job.setReducerClass(MultInputReducer.class);	//指定Reducer类
	job.setOutputKeyClass(Text.class);				//设置Reduce输出Key类型
	job.setOutputValueClass(IntWritable.class); 	//设置Reduce输出Value类型
	
	//3.设置作业输入和输出路径	
	//方法一:FileInputFormat.addInputPath()
	FileInputFormat.addInputPath(job, new Path(hdfs+"/expr/multinput/data/txt1"));//输入目录1
	FileInputFormat.addInputPath(job, new Path(hdfs+"/expr/multinput/data/txt2"));//输入目录2
	
	//方法二:FileInputFormat.addInputPaths()
	//FileInputFormat.addInputPaths(job, String.join(",", hdfs+"/expr/multinput/data/txt1", hdfs+"/expr/multinput/data/txt2"));
	
	//方法三:FileInputFormat.setInputPaths()
	//FileInputFormat.setInputPaths(job, String.join(",", hdfs+"/expr/multinput/data/txt1", hdfs+"/expr/multinput/data/txt2") );
	
	Path outPath = new Path(hdfs + "/expr/multinput/output");		//输出目录
	FileOutputFormat.setOutputPath(job, outPath);
	FileSystem fs = FileSystem.get(conf);
	if(fs.exists(outPath)) {
		fs.delete(outPath, true);
	}
	
	//4.运行作业
	System.out.println("Job: " + jobName + " is running...");
	if(job.waitForCompletion(true)) {
		System.out.println("success!");
		System.exit(0);
	} else {
		System.out.println("failed!");
		System.exit(1);
	}
}
 
Example 7
Source File: Missed.java    From MapReduce-Demo with MIT License 4 votes vote down vote up
public static void main(String[] args) throws Exception {		
	//1.设置HDFS配置信息
	String namenode_ip = "192.168.17.10";
	String hdfs = "hdfs://" + namenode_ip + ":9000";			
	Configuration conf = new Configuration();
	conf.set("fs.defaultFS", hdfs);
	conf.set("mapreduce.app-submission.cross-platform", "true");

	//2.设置MapReduce作业配置信息
	String jobName = "Missed";						//作业名称
	Job job = Job.getInstance(conf, jobName);
	job.setJarByClass(Missed.class);				//指定运行时作业类
	job.setJar("export\\Missed.jar");				//指定本地jar包
	job.setMapperClass(MissedMapper.class);			//指定Mapper类
	job.setMapOutputKeyClass(Text.class);			//设置Mapper输出Key类型
	job.setMapOutputValueClass(NullWritable.class);	//设置Mapper输出Value类型
	job.setReducerClass(MissedReducer.class);		//指定Reducer类		
	//定义多文件输出的文件名、输出格式、键类型、值类型
	MultipleOutputs.addNamedOutput(job, "missed", TextOutputFormat.class, Text.class, NullWritable.class);
	
	//3.设置作业输入和输出路径
	String dataDir = "/expr/weblog/data";			//实验数据目录	
	String outputDir = "/expr/weblog/output2";		//实验输出目录
	Path inPath = new Path(hdfs + dataDir);
	Path outPath = new Path(hdfs + outputDir);
	FileInputFormat.addInputPath(job, inPath);
	FileOutputFormat.setOutputPath(job, outPath);
	FileSystem fs = FileSystem.get(conf);
	if(fs.exists(outPath)) {
		fs.delete(outPath, true);
	}
	
	//4.运行作业
	System.out.println("Job: " + jobName + " is running...");
	if(job.waitForCompletion(true)) {
		System.out.println("success!");
		System.exit(0);
	} else {
		System.out.println("failed!");
		System.exit(1);
	}
}
 
Example 8
Source File: DateGroup.java    From MapReduce-Demo with MIT License 4 votes vote down vote up
public static void main(String[] args) throws Exception {		
	//1.设置HDFS配置信息
	String namenode_ip = "192.168.17.10";
	String hdfs = "hdfs://" + namenode_ip + ":9000";			
	Configuration conf = new Configuration();
	conf.set("fs.defaultFS", hdfs);
	conf.set("mapreduce.app-submission.cross-platform", "true");

	//2.设置MapReduce作业配置信息
	String jobName = "DateGroup";					//作业名称
	Job job = Job.getInstance(conf, jobName);
	job.setJarByClass(DateGroup.class);				//指定运行时作业类
	job.setJar("export\\DateGroup.jar");			//指定本地jar包
	job.setMapperClass(DateGroupMapper.class);		//指定Mapper类
	job.setMapOutputKeyClass(Text.class);			//设置Mapper输出Key类型
	job.setMapOutputValueClass(IntWritable.class);	//设置Mapper输出Value类型
	job.setReducerClass(DateGroupReducer.class);	//指定Reducer类
	job.setOutputKeyClass(Text.class);				//设置Reduce输出Key类型
	job.setOutputValueClass(Text.class); 			//设置Reduce输出Value类型
	
	//3.设置作业输入和输出路径
	String dataDir = "/expr/datecount/data";			//实验数据目录	
	String outputDir = "/expr/datecount/output_group";	//实验输出目录
	Path inPath = new Path(hdfs + dataDir);
	Path outPath = new Path(hdfs + outputDir);
	FileInputFormat.addInputPath(job, inPath);
	FileOutputFormat.setOutputPath(job, outPath);
	FileSystem fs = FileSystem.get(conf);
	if(fs.exists(outPath)) {
		fs.delete(outPath, true);
	}
	
	//4.运行作业
	System.out.println("Job: " + jobName + " is running...");
	if(job.waitForCompletion(true)) {
		System.out.println("success!");
		System.exit(0);
	} else {
		System.out.println("failed!");
		System.exit(1);
	}
}
 
Example 9
Source File: FlowCount.java    From MapReduce-Demo with MIT License 4 votes vote down vote up
public static void main(String[] args) throws Exception {		
	//1.设置HDFS配置信息
	String namenode_ip = "192.168.17.10";
	String hdfs = "hdfs://" + namenode_ip + ":9000";			
	Configuration conf = new Configuration();
	conf.set("fs.defaultFS", hdfs);
	conf.set("mapreduce.app-submission.cross-platform", "true");

	//2.设置MapReduce作业配置信息
	String jobName = "FlowCount";					//作业名称
	Job job = Job.getInstance(conf, jobName);
	job.setJarByClass(FlowCount.class);				//指定运行时作业类
	job.setJar("export\\FlowCount.jar");			//指定本地jar包
	job.setMapperClass(FlowCountMapper.class);		//指定Mapper类
	job.setMapOutputKeyClass(Text.class);			//设置Mapper输出Key类型
	job.setMapOutputValueClass(IntWritable.class);	//设置Mapper输出Value类型
	job.setReducerClass(FlowCountReducer.class);	//指定Reducer类
	job.setOutputKeyClass(Text.class);				//设置Reduce输出Key类型
	job.setOutputValueClass(IntWritable.class); 	//设置Reduce输出Value类型
	
	//3.设置作业输入和输出路径
	String dataDir = "/expr/weblog/data";			//实验数据目录	
	String outputDir = "/expr/weblog/output1";		//实验输出目录
	Path inPath = new Path(hdfs + dataDir);
	Path outPath = new Path(hdfs + outputDir);
	FileInputFormat.addInputPath(job, inPath);
	FileOutputFormat.setOutputPath(job, outPath);
	FileSystem fs = FileSystem.get(conf);
	if(fs.exists(outPath)) {
		fs.delete(outPath, true);
	}
	
	//4.运行作业
	System.out.println("Job: " + jobName + " is running...");
	if(job.waitForCompletion(true)) {
		System.out.println("success!");
		System.exit(0);
	} else {
		System.out.println("failed!");
		System.exit(1);
	}
}
 
Example 10
Source File: TestMRJobs.java    From hadoop with Apache License 2.0 4 votes vote down vote up
private void testSleepJobInternal(boolean useRemoteJar) throws Exception {
  LOG.info("\n\n\nStarting testSleepJob: useRemoteJar=" + useRemoteJar);

  if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
    LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
             + " not found. Not running test.");
    return;
  }

  Configuration sleepConf = new Configuration(mrCluster.getConfig());
  // set master address to local to test that local mode applied iff framework == local
  sleepConf.set(MRConfig.MASTER_ADDRESS, "local");	
  
  SleepJob sleepJob = new SleepJob();
  sleepJob.setConf(sleepConf);
 
  // job with 3 maps (10s) and numReduces reduces (5s), 1 "record" each:
  Job job = sleepJob.createJob(3, numSleepReducers, 10000, 1, 5000, 1);

  job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
  if (useRemoteJar) {
    final Path localJar = new Path(
        ClassUtil.findContainingJar(SleepJob.class));
    ConfigUtil.addLink(job.getConfiguration(), "/jobjars",
        localFs.makeQualified(localJar.getParent()).toUri());
    job.setJar("viewfs:///jobjars/" + localJar.getName());
  } else {
    job.setJarByClass(SleepJob.class);
  }
  job.setMaxMapAttempts(1); // speed up failures
  job.submit();
  String trackingUrl = job.getTrackingURL();
  String jobId = job.getJobID().toString();
  boolean succeeded = job.waitForCompletion(true);
  Assert.assertTrue(succeeded);
  Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
  Assert.assertTrue("Tracking URL was " + trackingUrl +
                    " but didn't Match Job ID " + jobId ,
        trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
  verifySleepJobCounters(job);
  verifyTaskProgress(job);
  
  // TODO later:  add explicit "isUber()" checks of some sort (extend
  // JobStatus?)--compare against MRJobConfig.JOB_UBERTASK_ENABLE value
}
 
Example 11
Source File: TempSort.java    From MapReduce-Demo with MIT License 4 votes vote down vote up
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
		// TODO Auto-generated method stub
		String hdfs = "hdfs://192.168.17.10:9000";
		Configuration conf = new Configuration();
		conf.set("fs.defaultFS", hdfs);
		conf.set("mapreduce.app-submission.cross-platform", "true");
		// 设置作业配置信息
		String jobName = "TempSort";
		Job job = Job.getInstance(conf, jobName);
		job.setJarByClass(TempSort.class);
		job.setJar("export\\TempSort.jar");
		// Map
		job.setMapperClass(TempSortMapper.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);
		// Reduce
		job.setReducerClass(TempSortReducer.class);
		// 全局
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		// Sort
//		job.setSortComparatorClass(MySort.class);
		// Partition
		job.setPartitionerClass(YearPartitioner.class);
		job.setNumReduceTasks(3);
		//3.设置作业输入和输出路径
		String dataDir = "/expr/test/data";	//实验数据目录	
		String outputDir = "/expr/test/output";				//实验输出目录
		Path inPath = new Path(hdfs + dataDir);
		Path outPath = new Path(hdfs + outputDir);
		FileInputFormat.addInputPath(job, inPath);
		FileOutputFormat.setOutputPath(job, outPath);
		FileSystem fs = FileSystem.get(conf);
		if(fs.exists(outPath)) {
			fs.delete(outPath, true);
		}
		
		//4.运行作业
		System.out.println("Job: " + jobName + " is running...");
		if(job.waitForCompletion(true)) {
			System.out.println("success!");
			System.exit(0);
		} else {
			System.out.println("failed!");
			System.exit(1);
		}
	}
 
Example 12
Source File: KeyValueInput.java    From MapReduce-Demo with MIT License 4 votes vote down vote up
public static void main(String[] args) throws Exception {		
	//1.设置HDFS配置信息
	String namenode_ip = "192.168.17.10";
	String hdfs = "hdfs://" + namenode_ip + ":9000";			
	Configuration conf = new Configuration();
	conf.set("fs.defaultFS", hdfs);
	conf.set("mapreduce.app-submission.cross-platform", "true");
	conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", ":");	//设置输入文件kv分隔符
	
	//2.设置MapReduce作业配置信息
	String jobName = "KeyValueInput";					//作业名称
	Job job = Job.getInstance(conf, jobName);
	job.setJarByClass(KeyValueInput.class);				//指定运行时作业类
	job.setJar("export\\KeyValueInput.jar");			//指定本地jar包
	job.setMapperClass(KeyValueInputMapper.class);		//指定Mapper类
	job.setMapOutputKeyClass(Text.class);				//设置Mapper输出Key类型
	job.setMapOutputValueClass(IntWritable.class);		//设置Mapper输出Value类型
	job.setReducerClass(KeyValueInputReducer.class);	//指定Reducer类
	job.setOutputKeyClass(Text.class);					//设置Reduce输出Key类型
	job.setOutputValueClass(IntWritable.class); 		//设置Reduce输出Value类型
	
	job.setInputFormatClass(KeyValueTextInputFormat.class);	//设置输入格式化类
	
	//3.设置作业输入和输出路径
	String dataDir = "/expr/kvinput/data";			//实验数据目录	
	String outputDir = "/expr/kvinput/output";		//实验输出目录
	Path inPath = new Path(hdfs + dataDir);
	Path outPath = new Path(hdfs + outputDir);
	FileInputFormat.addInputPath(job, inPath);
	FileOutputFormat.setOutputPath(job, outPath);
	FileSystem fs = FileSystem.get(conf);
	if(fs.exists(outPath)) {
		fs.delete(outPath, true);
	}
	
	//4.运行作业
	System.out.println("Job: " + jobName + " is running...");
	if(job.waitForCompletion(true)) {
		System.out.println("success!");
		System.exit(0);
	} else {
		System.out.println("failed!");
		System.exit(1);
	}
}
 
Example 13
Source File: DateSortAsc.java    From MapReduce-Demo with MIT License 4 votes vote down vote up
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
	// 1.设置HDFS配置信息
	String namenode_ip = "192.168.17.10";
	String hdfs = "hdfs://" + namenode_ip + ":9000";
	Configuration conf = new Configuration();
	conf.set("fs.defaultFS", hdfs);
	conf.set("mapreduce.app-submission.cross-platform", "true");

	// 2.设置MapReduce作业配置信息
	String jobName = "DateSortAsc"; // 定义作业名称
	Job job = Job.getInstance(conf, jobName);
	job.setJarByClass(DateSortAsc.class); // 指定作业类
	job.setJar("export\\DateSortAsc.jar"); // 指定本地jar包
	
	job.setMapperClass(SortMapper.class); // 指定Mapper类
	job.setMapOutputKeyClass(IntWritable.class); // 设置Mapper输出Key类型
	job.setMapOutputValueClass(Text.class); // 设置Mapper输出Value类型
	
	job.setReducerClass(SortReducer.class); // 指定Reducer类
	job.setOutputKeyClass(Text.class); // 设置Reduce输出Key类型
	job.setOutputValueClass(IntWritable.class); // 设置Reduce输出Value类型

	// 3.设置作业输入和输出路径
	String dataDir = "/workspace/dateSort/data"; // 实验数据目录
	String outputDir = "/workspace/dateSort/output"; // 实验输出目录
	Path inPath = new Path(hdfs + dataDir);
	Path outPath = new Path(hdfs + outputDir);
	FileInputFormat.addInputPath(job, inPath);
	FileOutputFormat.setOutputPath(job, outPath);
	FileSystem fs = FileSystem.get(conf);
	if (fs.exists(outPath)) {
		fs.delete(outPath, true);
	}

	// 4.运行作业
	System.out.println("Job: " + jobName + " is running...");
	if (job.waitForCompletion(true)) {
		System.out.println("success!");
		System.exit(0);
	} else {
		System.out.println("failed!");
		System.exit(1);
	}
}
 
Example 14
Source File: DateGroup2.java    From MapReduce-Demo with MIT License 4 votes vote down vote up
public static void main(String[] args) throws Exception {		
	//1.设置HDFS配置信息
	String namenode_ip = "192.168.17.10";
	String hdfs = "hdfs://" + namenode_ip + ":9000";			
	Configuration conf = new Configuration();
	conf.set("fs.defaultFS", hdfs);
	conf.set("mapreduce.app-submission.cross-platform", "true");

	//2.设置MapReduce作业配置信息
	String jobName = "DateGroup2";					//作业名称
	Job job = Job.getInstance(conf, jobName);
	job.setJarByClass(DateGroup2.class);			//指定运行时作业类
	job.setJar("export\\DateGroup2.jar");			//指定本地jar包
	job.setMapperClass(DateGroup2Mapper.class);		//指定Mapper类
	job.setMapOutputKeyClass(Text.class);			//设置Mapper输出Key类型
	job.setMapOutputValueClass(IntWritable.class);	//设置Mapper输出Value类型
	job.setReducerClass(DateGroup2Reducer.class);	//指定Reducer类
	job.setOutputKeyClass(Text.class);				//设置Reduce输出Key类型
	job.setOutputValueClass(Text.class); 			//设置Reduce输出Value类型
	job.setGroupingComparatorClass(MyGroup.class);	//设置自定义分组类
	//3.设置作业输入和输出路径
	String dataDir = "/expr/datecount/data";			//实验数据目录	
	String outputDir = "/expr/datecount/output_group2";	//实验输出目录
	Path inPath = new Path(hdfs + dataDir);
	Path outPath = new Path(hdfs + outputDir);
	FileInputFormat.addInputPath(job, inPath);
	FileOutputFormat.setOutputPath(job, outPath);
	FileSystem fs = FileSystem.get(conf);
	if(fs.exists(outPath)) {
		fs.delete(outPath, true);
	}
	
	//4.运行作业
	System.out.println("Job: " + jobName + " is running...");
	if(job.waitForCompletion(true)) {
		System.out.println("success!");
		System.exit(0);
	} else {
		System.out.println("failed!");
		System.exit(1);
	}
}
 
Example 15
Source File: FlowStatistics.java    From MapReduce-Demo with MIT License 4 votes vote down vote up
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
		// 设置hdfs配置信息
		String namenode_ip = "192.168.17.10";
		String hdfs = "hdfs://"+namenode_ip+":9000";
		Configuration conf = new Configuration();
		conf.set("fs.defaultFS", hdfs);
		conf.set("mapreduce.app-submission.cross-platform", "true");
		
		// 设置作业Job配置信息
		String jobName = "FlowStatistics";
		Job job = Job.getInstance(conf, jobName);
		job.setJarByClass(FlowStatistics.class);
		job.setJar("export\\FlowStatistics.jar");
		// Map
		job.setMapperClass(FlowMapper.class);// 第一种
//		job.setMapperClass(FlowWritableMapper.class);
		// 这里因为同Reducer输出类型一致,可不写
//		job.setMapOutputKeyClass(Text.class);
//		job.setMapOutputValueClass(FlowWritable.class);
		// Reduce
		job.setReducerClass(FlowReducer.class);// 第一种
//		job.setReducerClass(FlowWritableReducer.class);
		// 输出k-v类型
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);// 第一种
//		job.setOutputValueClass(FlowWritable.class);
		
		// 设置job输入出路径
		String dataDir = "/workspace/flowStatistics/data";
		String outputDir = "/workspace/flowStatistics/output";
		Path inPath = new Path(hdfs+dataDir);
		Path outPath = new Path(hdfs+outputDir);
		FileInputFormat.addInputPath(job, inPath);
		FileOutputFormat.setOutputPath(job, outPath);
		FileSystem fs = FileSystem.get(conf);
		if(fs.exists(outPath)) {
			fs.delete(outPath, true);
		}
		
		// 运行作业
		System.out.println("Job: " + jobName + " is running...");
		if(job.waitForCompletion(true)) {
			System.out.println("success!");
			System.exit(0);
		} else {
			System.out.println("failed!");
			System.exit(1);
		}
	}
 
Example 16
Source File: DatePartition.java    From MapReduce-Demo with MIT License 4 votes vote down vote up
public static void main(String[] args) throws Exception {		
		//1.设置HDFS配置信息
		String namenode_ip = "192.168.17.10";
		String hdfs = "hdfs://" + namenode_ip + ":9000";			
		Configuration conf = new Configuration();
		conf.set("fs.defaultFS", hdfs);
		conf.set("mapreduce.app-submission.cross-platform", "true");

		//2.设置MapReduce作业配置信息
		String jobName = "DatePartition";					//定义作业名称
		Job job = Job.getInstance(conf, jobName);
		job.setJarByClass(DatePartition.class);				//指定运行时作业类
		job.setJar("export\\DatePartition.jar");			//指定本地jar包
//		Map
		job.setMapperClass(DatePartitionMapper.class);		//指定Mapper类
		job.setMapOutputKeyClass(Text.class);				//设置Mapper输出Key类型
		job.setMapOutputValueClass(IntWritable.class);		//设置Mapper输出Value类型
//		Reduce
		job.setReducerClass(DatePartitionReducer.class);	//指定Reducer类
//		全局
		job.setOutputKeyClass(Text.class);					//设置Reduce输出Key类型
		job.setOutputValueClass(IntWritable.class);			//设置Reduce输出Value类型
//		Partition
		job.setPartitionerClass(YearPartitioner.class);		//自定义分区方法
		job.setNumReduceTasks(10); 	//设置reduce任务的数量,该值传递给Partitioner.getPartition()方法的numPartitions参数
		
		//3.设置作业输入和输出路径
		String dataDir = "/expr/datecount/data";				//实验数据目录	
		String outputDir = "/expr/datecount/output_partition";	//实验输出目录
		Path inPath = new Path(hdfs + dataDir);
		Path outPath = new Path(hdfs + outputDir);
		FileInputFormat.addInputPath(job, inPath);
		FileOutputFormat.setOutputPath(job, outPath);
		FileSystem fs = FileSystem.get(conf);
		if(fs.exists(outPath)) {
			fs.delete(outPath, true);
		}
		
		//4.运行作业
		System.out.println("Job: " + jobName + " is running...");
		if(job.waitForCompletion(true)) {
			System.out.println("success!");
			System.exit(0);
		} else {
			System.out.println("failed!");
			System.exit(1);
		}
	}
 
Example 17
Source File: FlowPartition.java    From MapReduce-Demo with MIT License 4 votes vote down vote up
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
	// 设置hdfs配置信息
	String namenode_ip = "192.168.17.10";
	String hdfs = "hdfs://" + namenode_ip + ":9000";
	Configuration conf = new Configuration();
	conf.set("fs.defaultFS", hdfs);
	conf.set("mapreduce.app-submission.cross-platform", "true");

	// 设置作业Job配置信息
	String jobName = "FlowPartition";
	Job job = Job.getInstance(conf, jobName);
	job.setJarByClass(FlowPartition.class);
	job.setJar("export\\FlowPartition.jar");
	// Map
	 job.setMapperClass(FlowPartitionMapper.class);
	// Reduce
	job.setReducerClass(FlowPartitionReducer.class);
	// 输出k-v类型
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(FlowWritable.class);
	// 设置分区类,及Reducer数目
	job.setPartitionerClass(PhoneNumberPartitioner.class);
	job.setNumReduceTasks(4);

	// 设置job输入出路径
	String dataDir = "/workspace/flowStatistics/data";
	String outputDir = "/workspace/flowStatistics/output_partitions";
	Path inPath = new Path(hdfs + dataDir);
	Path outPath = new Path(hdfs + outputDir);
	FileInputFormat.addInputPath(job, inPath);
	FileOutputFormat.setOutputPath(job, outPath);
	FileSystem fs = FileSystem.get(conf);
	if (fs.exists(outPath)) {
		fs.delete(outPath, true);
	}

	// 运行作业
	System.out.println("Job: " + jobName + " is running...");
	if (job.waitForCompletion(true)) {
		System.out.println("success!");
		System.exit(0);
	} else {
		System.out.println("failed!");
		System.exit(1);
	}
}
 
Example 18
Source File: FixedLengthInput2.java    From MapReduce-Demo with MIT License 4 votes vote down vote up
public static void main(String[] args) throws Exception {		
	//1.设置HDFS配置信息
	String namenode_ip = "192.168.17.10";
	String hdfs = "hdfs://" + namenode_ip + ":9000";			
	Configuration conf = new Configuration();
	conf.set("fs.defaultFS", hdfs);
	conf.set("mapreduce.app-submission.cross-platform", "true");
	conf.setInt(FixedLengthInputFormat.FIXED_RECORD_LENGTH, 13);
	
	//2.设置MapReduce作业配置信息
	String jobName = "FixedLengthInput2";					//作业名称
	Job job = Job.getInstance(conf, jobName);
	job.setJarByClass(FixedLengthInput2.class);				//指定运行时作业类
	job.setJar("export\\FixedLengthInput2.jar");				//指定本地jar包
	job.setMapperClass(FixedLengthInput2Mapper.class);		//指定Mapper类
	job.setMapOutputKeyClass(Text.class);			//设置Mapper输出Key类型
	job.setMapOutputValueClass(IntWritable.class);		//设置Mapper输出Value类型
	job.setReducerClass(FixedLengthInput2Reducer.class);		//指定Reducer类
	job.setOutputKeyClass(Text.class);				//设置Reduce输出Key类型
	job.setOutputValueClass(IntWritable.class); 			//设置Reduce输出Value类型
	
	job.setInputFormatClass(FixedLengthInputFormat.class);	//设置输入格式化类
	
	//3.设置作业输入和输出路径
	String dataDir = "/expr/fixedinput/data";			//实验数据目录	
	String outputDir = "/expr/fixedinput/output";		//实验输出目录
	Path inPath = new Path(hdfs + dataDir);
	Path outPath = new Path(hdfs + outputDir);
	FileInputFormat.addInputPath(job, inPath);
	FileOutputFormat.setOutputPath(job, outPath);
	FileSystem fs = FileSystem.get(conf);
	if(fs.exists(outPath)) {
		fs.delete(outPath, true);
	}
	
	//4.运行作业
	System.out.println("Job: " + jobName + " is running...");
	if(job.waitForCompletion(true)) {
		System.out.println("success!");
		System.exit(0);
	} else {
		System.out.println("failed!");
		System.exit(1);
	}
}
 
Example 19
Source File: MergeJob.java    From MapReduce-Demo with MIT License 4 votes vote down vote up
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
	// 1.设置HDFS配置信息
	String namenode_ip = "192.168.17.10";
	String hdfs = "hdfs://" + namenode_ip + ":9000";
	Configuration conf = new Configuration();
	conf.set("fs.defaultFS", hdfs);
	conf.set("mapreduce.app-submission.cross-platform", "true");

	// 2.设置MapReduce作业配置信息
	String jobName = "MergeMultipleFiles"; // 作业名称
	Job job = Job.getInstance(conf, jobName);
	job.setJarByClass(MultiInOutput.class); // 指定运行时作业类
	job.setJar("export\\MergeMultipleFiles.jar"); // 指定本地jar包
	job.setMapOutputKeyClass(Text.class); // 设置Mapper输出Key类型
	job.setMapOutputValueClass(BytesWritable.class); // 设置Mapper输出Value类型
	job.setMapperClass(MergeMapper.class);
	// 输入数据格式
	job.setInputFormatClass(MyInputFormat.class);
	// 以文件格式输出,使用序列化文件输出类
	job.setOutputFormatClass(SequenceFileOutputFormat.class);

	// 设置作业输出路径
	String inputDir = "/workspace/mergeFiles/data";
	String outputDir = "/workspace/mergeFiles/output"; // 输出目录
	Path outPath = new Path(hdfs + outputDir);
	Path inputPath = new Path(hdfs+inputDir);
	FileInputFormat.setInputPaths(job, inputPath);
	FileOutputFormat.setOutputPath(job, outPath);
	FileSystem fs = FileSystem.get(conf);
	if (fs.exists(outPath)) {
		fs.delete(outPath, true);
	}

	// 运行作业
	System.out.println("Job: " + jobName + " is running...");
	if (job.waitForCompletion(true)) {
		System.out.println("success!");
		System.exit(0);
	} else {
		System.out.println("failed!");
		System.exit(1);
	}
}
 
Example 20
Source File: MyWordCount.java    From BigDataArchitect with Apache License 2.0 2 votes vote down vote up
public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration(true);

        GenericOptionsParser parser = new GenericOptionsParser(conf, args);  //工具类帮我们把-D 等等的属性直接set到conf,会留下commandOptions
        String[] othargs = parser.getRemainingArgs();

        //让框架知道是windows异构平台运行
        conf.set("mapreduce.app-submission.cross-platform","true");

//        conf.set("mapreduce.framework.name","local");
//        System.out.println(conf.get("mapreduce.framework.name"));

        Job job = Job.getInstance(conf);


//        FileInputFormat.setMinInputSplitSize(job,2222);
//        job.setInputFormatClass(ooxx.class);






        job.setJar("C:\\Users\\admin\\IdeaProjects\\msbhadoop\\target\\hadoop-hdfs-1.0-0.1.jar");
        //必须必须写的
        job.setJarByClass(MyWordCount.class);

        job.setJobName("mashibing");

        Path infile = new Path(othargs[0]);
        TextInputFormat.addInputPath(job, infile);

        Path outfile = new Path(othargs[1]);
        if (outfile.getFileSystem(conf).exists(outfile)) outfile.getFileSystem(conf).delete(outfile, true);
        TextOutputFormat.setOutputPath(job, outfile);

        job.setMapperClass(MyMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        job.setReducerClass(MyReducer.class);

//        job.setNumReduceTasks(2);
        // Submit the job, then poll for progress until the job is complete
        job.waitForCompletion(true);

    }