标签:
分两部分:
<strong><span style="font-size:18px;">/*** * @author YangXin * @info 计算共现关系的Mapper */ package unitSix; import java.io.IOException; import java.util.Iterator; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.mapreduce.Mapper; import org.apache.mahout.math.VarLongWritable; import org.apache.mahout.math.Vector; import org.apache.mahout.math.VectorWritable; public class UserVectorToCooccurrenceMapper extends Mapper<VarLongWritable, VectorWritable, IntWritable, IntWritable>{ public void map(VarLongWritable userID, VectorWritable userVector, Context context) throws IOException, InterruptedException{ Iterator<Vector.Element> it = userVector.get().nonZeroes().iterator(); //双重循环遍历非零元素 while(it.hasNext()){ int index1 = it.next().index(); Iterator<Vector.Element> it2 = userVector.get().nonZeroes().iterator(); while(it2.hasNext()){ int index2 = it2.next().index(); //写入项目ID context.write(new IntWritable(index1), new IntWritable(index2)); } } } }</span></strong>
<strong><span style="font-size:18px;">/*** * @author YangXin * @info Mahout实现计算共生关系的Reducer */ package unitSix; import java.io.IOException; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.mapreduce.Reducer; import org.apache.mahout.math.RandomAccessSparseVector; import org.apache.mahout.math.Vector; import org.apache.mahout.math.VectorWritable; public class UserVectorToCooccurrenceReducer extends Reducer<IntWritable, IntWritable, IntWritable, VectorWritable>{ public void reduce(IntWritable itemIndex1, Iterable<IntWritable> itemIndex2s, Context context) throws IOException, InterruptedException{ Vector cooccurrenceRow = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); for(IntWritable intWritable : itemIndex2s){ int itemIndex2 = intWritable.get(); cooccurrenceRow.set(itemIndex2, cooccurrenceRow.get(itemIndex2) + 1.0); } context.write(itemIndex1, new VectorWritable(cooccurrenceRow)); } } </span></strong>
标签:
原文地址:http://blog.csdn.net/u012965373/article/details/50715192