package com.ganqiang.recsys.cf.old; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; import org.apache.mahout.math.RandomAccessSparseVector; import org.apache.mahout.math.VarLongWritable; import org.apache.mahout.math.Vector; import org.apache.mahout.math.VectorWritable; //格式化用户的评分数据 key:userid value: Item Vector //输出格式:1 {101:5.0,103:2.5,102:3.0} // 2 {101:2.0,103:5.0,104:2.0,102:2.5} public class Step1 { public static final String INPUT_PATH = "hdfs://localhost:9000/input/first"; public static final String OUTPUT_PATH = "hdfs://localhost:9000/output/first"; public static void main(String[] args) throws Exception { Configuration conf1 = new Configuration(); Job job1 = new Job(conf1, "step1"); job1.setOutputFormatClass(SequenceFileOutputFormat.class); job1.setNumReduceTasks(1); job1.setJarByClass(Step1.class); job1.setMapperClass(WikiMapper1.class); job1.setMapOutputKeyClass(VarLongWritable.class); job1.setMapOutputValueClass(LongAndFloat.class); job1.setReducerClass(WiKiReducer1.class); job1.setOutputKeyClass(VarLongWritable.class); job1.setOutputValueClass(VectorWritable.class); FileInputFormat.addInputPath(job1, new Path( INPUT_PATH ) ); SequenceFileOutputFormat.setOutputPath(job1, new Path(OUTPUT_PATH )); if (!job1.waitForCompletion(true)) { System.exit(1); } } public static class WikiMapper1 extends Mapper<LongWritable, Text, VarLongWritable, LongAndFloat> { public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { VarLongWritable userID = new VarLongWritable(); LongWritable itemID = new LongWritable(); FloatWritable itemValue = new FloatWritable();System.err.println("key:"+key+" value:"+value+" "); String line = value.toString(); String[] info = line.split(","); if (info.length != 3) { return; } userID.set(Long.parseLong(info[0])); itemID.set(Long.parseLong(info[1])); itemValue.set(Float.parseFloat(info[2])); context.write(userID, new LongAndFloat(itemID, itemValue)); } } public static class WiKiReducer1 extends Reducer<VarLongWritable, LongAndFloat, VarLongWritable, VectorWritable> { public void reduce(VarLongWritable userID, Iterable<LongAndFloat> itemPrefs, Context context) throws IOException, InterruptedException { Vector userVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 10); for (LongAndFloat itemPref : itemPrefs) { userVector.set( Integer.parseInt(itemPref.getFirst().toString()), Float.parseFloat(itemPref.getSecond().toString())); } context.write(userID, new VectorWritable(userVector)); } } }