标签:
分两部分:
<span style="font-size:18px;">/*** * @author YangXin * @date 2016/2/21 * @ info 主要功能是mahout实现解析Wikipedia链接文件的Mapper接口 */ package unitSix; import java.io.IOException; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import org.apache.mahout.math.VarLongWritable; public class WikipediaToItemPrefsMapper extends Mapper<LongWritable, Text, VarLongWritable, VarLongWritable>{ private static final Pattern NUMBERS = Pattern.compile("(\\d+)"); public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException{ String line = value.toString(); Matcher m = NUMBERS.matcher(line); //定位用户ID m.find(); VarLongWritable userID = new VarLongWritable(Long.parseLong(m.group())); VarLongWritable itemID = new VarLongWritable(); while(m.find()){ itemID.set(Long.parseLong(m.group())); //为每个物品ID生成用户-物品对 context.write(userID, itemID); } } }</span>
<strong><span style="font-size:18px;">/*** * @author YangXin * @info 功能是mahout实现从用户物品偏好中生成Vector的reducer接口 */ package unitSix; import java.io.IOException; import org.apache.hadoop.mapreduce.Reducer; import org.apache.mahout.math.RandomAccessSparseVector; import org.apache.mahout.math.VarLongWritable; import org.apache.mahout.math.Vector; import org.apache.mahout.math.VectorWritable; public class WikipediaToUserVectorReducer extends Reducer<VarLongWritable, VarLongWritable, VarLongWritable, VectorWritable>{ public void reduce(VarLongWritable userID, Iterable<VarLongWritable> itemPrefs, Context context) throws IOException, InterruptedException{ Vector userVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); for(VarLongWritable itemPref : itemPrefs){ userVector.set((int)itemPref.get(), 1.0f); } context.write(userID, new VectorWritable(userVector)); } } </span></strong>
标签:
原文地址:http://blog.csdn.net/u012965373/article/details/50715129