package org.tinylcy.similarity; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.tinylcy.driver.ItemBasedCFDriver; import org.tinylcy.hdfs.HDFS; import java.io.IOException; import java.util.regex.Pattern; /* * * 计算每个用户喜欢几个物品; * 输入数据为Step2的输出数据(用户-物品倒排表); * 只需要一个Mapper; * 输出数据格式:UserID count * 用于计算物品之间的相似度。 * */ public class CalculateSimilarityStep5 { private static final Pattern DELIMITER = Pattern.compile("[,:]"); public static class Step5_Mapper extends Mapper<LongWritable, Text, IntWritable, IntWritable> { private IntWritable k = new IntWritable(); private IntWritable v = new IntWritable(); public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] tokens = DELIMITER.split(value.toString()); k.set(Integer.parseInt(tokens[0])); v.set(tokens.length - 1); context.write(k, v); } } public static void run() throws IOException, ClassNotFoundException, InterruptedException { String inputPath = ItemBasedCFDriver.path.get("step5InputPath"); String outputPath = ItemBasedCFDriver.path.get("step5OutputPath"); Configuration conf = new Configuration(); conf.set("mapred.textoutputformat.separator", ":"); Job job = Job.getInstance(conf); HDFS hdfs = new HDFS(conf); hdfs.rmr(outputPath); job.setMapperClass(Step5_Mapper.class); job.setJarByClass(CalculateSimilarityStep5.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.waitForCompletion(true); } }