java source code of Step4

package ssdut.training.mapreduce.itemcf;

import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

//同显矩阵*评分矩阵，计算评分单项
public class Step4 {
	public static boolean run(Configuration config, Map<String, String> paths) 
			throws IOException, ClassNotFoundException, InterruptedException {
		String jobName = "step4";
		Job job = Job.getInstance(config, jobName);
		job.setJarByClass(Step4.class);
		job.setJar("export\\ItemCF.jar");
		job.setMapperClass(Step4_Mapper.class);
		job.setReducerClass(Step4_Reducer.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);

		Path[] inPaths = new Path[] { 
				new Path(paths.get("Step4Input1")),
				new Path(paths.get("Step4Input2")) };		
		Path outpath = new Path(paths.get("Step4Output"));
		FileInputFormat.setInputPaths(job, inPaths);
		FileOutputFormat.setOutputPath(job, outpath);		
		FileSystem fs = FileSystem.get(config);
		if (fs.exists(outpath)) {
			fs.delete(outpath, true);
		}
		
		return job.waitForCompletion(true);
	}

	static class Step4_Mapper extends Mapper<LongWritable, Text, Text, Text> {
		private String flag;	//保存Map输入数据来自于哪个目录（output2或ouput3），用于判断数据是同现矩阵还是评分矩阵

		protected void setup(Context context) throws IOException, InterruptedException {
			FileSplit split = (FileSplit) context.getInputSplit();	//根据上下文获取输入分片对象
			flag = split.getPath().getParent().getName();			//获取输入分片所属的目录名称
		}

		protected void map(LongWritable key, Text value, Context context) 
				throws IOException, InterruptedException {
			String[] strs = Pattern.compile("[\t,]").split(value.toString());
			if (flag.equals("output3")) {				//输入的是同现矩阵，strs格式："i100:i105 1"
				String[] items = strs[0].split(":");	
				String itemID1 = items[0];				//第一个商品id  "i100"
				String itemID2 = items[1];				//第二个商品id	 "i105"
				String num = strs[1];					//两件商品的同现次数    "1"
				
				Text k = new Text(itemID1);
				Text v = new Text("A:" + itemID2 + "," + num);	//格式："A:i105,1"
				context.write(k, v);							//格式："i100	A:i105,1"
				
			} else if (flag.equals("output2")) {	//输入的是评分矩阵，strs格式："u14 i100:1 i25:1"
				String userID = strs[0];
				for (int i = 1; i < strs.length; i++) {
					String[] vector = strs[i].split(":");	//i100:1
					String itemID = vector[0];
					String score = vector[1];
					Text k = new Text(itemID);				
					Text v = new Text("B:" + userID + "," + score);	//格式："B:u14,1"
					context.write(k, v);							//格式："i100 B:u14,1" 和 "i25 B:u14,1"
				}
			}
		}
	}

	static class Step4_Reducer extends Reducer<Text, Text, Text, Text> {
		protected void reduce(Text key, Iterable<Text> values, Context context)
				throws IOException, InterruptedException {
			Map<String, Integer> mapA = new HashMap<String, Integer>();
			Map<String, Integer> mapB = new HashMap<String, Integer>();
			//reduce输入格式："i100  A:i105,1  A:i107,2  B:u14,1  B:u22,3"
			for (Text val : values) {	//将AB格式的输入分别放入HashMap中
				String str = val.toString();
				if (str.startsWith("A:")) {			//str格式："A:i105,1"
					String[] kv = Pattern.compile("[\t,]").split(str.substring(2));
					mapA.put(kv[0], Integer.parseInt(kv[1]));
				} else if (str.startsWith("B:")) {	//str格式："B:u14,1"
					String[] kv = Pattern.compile("[\t,]").split(str.substring(2));
					mapB.put(kv[0], Integer.parseInt(kv[1]));
				}
			}
			double result = 0;
			Iterator<String> itera = mapA.keySet().iterator();		//根据mapA中key键(itemID)生成迭代器对象
			while (itera.hasNext()) {
				String mapka = itera.next();							//获得itemID
				int num = mapA.get(mapka).intValue();				//根据itemID从mapA获取同现次数
				
				Iterator<String> iterb = mapB.keySet().iterator();	//根据mapB中key键生成迭代器对象
				while (iterb.hasNext()) {
					String mapkb = iterb.next();					//userID
					int score = mapB.get(mapkb).intValue();			//根据userID从mapB中获取用户行为评分
					
					result = num * score;							//矩阵相乘，计算评分
					context.write(new Text(mapkb), new Text(mapka + "," + result));	//输出 key："userID" value:"itemID,result"
				}
			}
		}
	}
}