标签:偏差 array hashmap 用户 entry odi 目的 amp 用户数
一.加权SlopeOne算法公式:
(1).求得所有item之间的评分偏差
上式中分子部分为项目j与项目i的偏差和,分母部分为所有同时对项目j与项目i评分的用户数
(2).加权预测评分
项目j与项目i
上式中表示用户u对项目j的评分预测,分子为项目j对项目i的偏差加上用户对项目i的评分,cji表示同时对项目j与项目i评分的用户数
二.python实现
1 #!/usr/bin/python 2 # -*- coding: utf-8 -*- 3 4 user_data = {"小明": {"张学友": 4, "周杰伦": 3, "刘德华": 4}, 5 "小海": {"张学友": 5, "周杰伦": 2}, 6 "李梅": {"周杰伦": 3.5, "刘德华": 4}, 7 "李磊": {"张学友": 5, "刘德华": 3}} 8 9 class recommender: 10 11 def __init__(self,data): 12 self.frequency={} 13 self.deviation={} 14 self.data=data 15 16 #计算所有item之间评分偏差 17 def computeDeviation(self): 18 for ratings in self.data.values(): 19 for item,rating in ratings.items(): 20 self.frequency.setdefault(item,{}) 21 self.deviation.setdefault(item,{}) 22 for item2,rating2 in ratings.items(): 23 if item!=item2: 24 self.frequency[item].setdefault(item2,0) 25 self.deviation[item].setdefault(item2,0.0) 26 self.frequency[item][item2]+=1#两个项目的用户数 27 self.deviation[item][item2]+=(rating-rating2)#累加两个评分差值 28 for item,ratings in self.deviation.items(): 29 for item2 in ratings: 30 ratings[item2]/=self.frequency[item][item2] 31 32 #评分预测 33 def predictRating(self,userRatings,k): 34 recommendations={} 35 frequencies={} 36 for item,rating in userRatings.items(): 37 for diffItem,diffRating in self.deviation.items(): 38 if diffItem not in userRatings and item in self.deviation[diffItem]: 39 fre=self.frequency[diffItem][item] 40 recommendations.setdefault(diffItem,0.0) 41 frequencies.setdefault(diffItem,0) 42 #分子部分 43 recommendations[diffItem]+=(diffRating[item]+rating)*fre 44 #分母部分 45 frequencies[diffItem]+=fre 46 recommendations=[(k,v/frequencies[k]) for (k,v) in recommendations.items()] 47 #排序返回前k个 48 recommendations.sort(key=lambda a_tuple:a_tuple[1],reverse=True) 49 return recommendations[:k] 50 51 if __name__==‘__main__‘: 52 r=recommender(user_data) 53 r.computeDeviation() 54 u=user_data[‘李磊‘] 55 print(r.predictRating(u,5)) 56
三.java实现
1 import java.util.HashMap; 2 import java.util.Map; 3 import java.util.List; 4 import java.util.ArrayList; 5 import java.util.Comparator; 6 import java.util.Collections; 7 8 /** 9 * Created by on 2016/12/8.ShiYan 10 * 一.计算所有物品对的偏差 11 * 二.利用偏差进行预测 12 */ 13 public class SlopeOne { 14 Map<String,Map<String,Integer>> frequency=null; 15 Map<String,Map<String,Double>> deviation=null; 16 Map<String,Map<String,Integer>> user_rating=null; 17 18 public SlopeOne( Map<String,Map<String,Integer>> user_rating){ 19 frequency=new HashMap<String,Map<String,Integer>>(); 20 deviation=new HashMap<String,Map<String,Double>>(); 21 this.user_rating=user_rating; 22 } 23 24 /** 25 * 所有有item间的评分偏差 26 */ 27 public void computeDeviation(){ 28 for(Map.Entry<String,Map<String,Integer>> ratingsEntry:user_rating.entrySet()){ 29 for(Map.Entry<String,Integer> ratingEntry:ratingsEntry.getValue().entrySet()){ 30 String item=ratingEntry.getKey(); 31 int rating=ratingEntry.getValue(); 32 Map<String,Integer> itemFrequency=null; 33 if(!frequency.containsKey(item)){ 34 itemFrequency=new HashMap<String, Integer>(); 35 frequency.put(item,itemFrequency); 36 }else{ 37 itemFrequency=frequency.get(item); 38 } 39 40 Map<String,Double> itemDeviation=null; 41 if(!deviation.containsKey(item)){ 42 itemDeviation=new HashMap<String, Double>(); 43 deviation.put(item,itemDeviation); 44 }else{ 45 itemDeviation=deviation.get(item); 46 } 47 48 for(Map.Entry<String,Integer> ratingEntry2:ratingsEntry.getValue().entrySet()){ 49 String item2=ratingEntry2.getKey(); 50 int rating2=ratingEntry2.getValue(); 51 if(!item.equals(item2)){ 52 //两个项目的用户数 53 itemFrequency.put(item2,itemFrequency.containsKey(item2)?itemFrequency.get(item2)+1:0); 54 //两个项目的评分偏差,累加 55 itemDeviation.put(item2,itemDeviation.containsKey(item2)?itemDeviation.get(item2)+(rating-rating2):0.0); 56 } 57 } 58 } 59 } 60 61 for(Map.Entry<String,Map<String,Double>> itemsDeviation:deviation.entrySet()){ 62 String item=itemsDeviation.getKey(); 63 Map<String,Double> itemDev=itemsDeviation.getValue(); 64 Map<String,Integer> itemFre=frequency.get(item); 65 for(String itemName:itemDev.keySet()){ 66 itemDev.put(itemName,itemDev.get(itemName)/itemFre.get(itemName)); 67 } 68 } 69 } 70 71 /** 72 * 评分预测 73 * @param userRating 目标用户的评分 74 * @param k 返回前k个 75 * @return 76 */ 77 public List<Map.Entry<String,Double>> predictRating(Map<String,Integer> userRating,int k){ 78 Map<String,Double> recommendations=new HashMap<String,Double>(); 79 Map<String,Integer> frequencies=new HashMap<String, Integer>(); 80 for(Map.Entry<String,Integer> userEntry:userRating.entrySet()){ 81 String userItem=userEntry.getKey(); 82 double rating=userEntry.getValue(); 83 for(Map.Entry<String,Map<String,Double>> deviationEntry:deviation.entrySet()){ 84 String item=deviationEntry.getKey(); 85 Map<String,Double> itemDeviation=deviationEntry.getValue(); 86 Map<String,Integer> itemFrequency=frequency.get(item); 87 if(!userRating.containsKey(item) && itemDeviation.containsKey(userItem)){ 88 int fre=itemFrequency.get(userItem); 89 if(!recommendations.containsKey(item)) 90 recommendations.put(item,0.0); 91 if(!frequencies.containsKey(item)) 92 frequencies.put(item,0); 93 //分子部分 94 recommendations.put(item,recommendations.get(item)+(itemDeviation.get(userItem)+rating)*fre); 95 //分母部分 96 frequencies.put(item,frequencies.get(item)+fre); 97 } 98 } 99 } 100 for(Map.Entry<String,Double> recoEntry:recommendations.entrySet()){ 101 String key=recoEntry.getKey(); 102 double value=recoEntry.getValue()/frequencies.get(key); 103 recommendations.put(key,value); 104 } 105 //排序,这里还可以使用优先队列返回top_k 106 List<Map.Entry<String,Double>> list_map=new ArrayList<Map.Entry<String,Double>>(recommendations.entrySet()); 107 Collections.sort(list_map,new Comparator<Map.Entry<String,Double>>(){ 108 @Override 109 public int compare(Map.Entry<String, Double> o1, Map.Entry<String, Double> o2) { 110 if(o2.getValue()>o1.getValue()) 111 return 1; 112 else if(o2.getValue()<o1.getValue()) 113 return -1; 114 else 115 return 0; 116 } 117 } 118 ); 119 List<Map.Entry<String,Double>> top_k=new ArrayList<Map.Entry<String, Double>>(); 120 if(list_map.size()<k) k=list_map.size(); 121 for(int i=0;i<k;i++){ 122 top_k.add(list_map.get(i)); 123 } 124 return top_k; 125 } 126 127 public static void main(String[] args){ 128 Map<String,Map<String,Integer>> userRatings=new HashMap<String, Map<String, Integer>>(); 129 Map<String,Integer> xiMingRating=new HashMap<String, Integer>(); 130 xiMingRating.put("张学友",4); 131 xiMingRating.put("周杰伦",3); 132 xiMingRating.put("刘德华",4); 133 Map<String,Integer> xiHaiRating=new HashMap<String, Integer>(); 134 xiHaiRating.put("张学友",5); 135 xiHaiRating.put("周杰伦",2); 136 Map<String,Integer> liMeiRating=new HashMap<String, Integer>(); 137 liMeiRating.put("周杰伦",3); 138 liMeiRating.put( "刘德华",4); 139 Map<String,Integer> liLeiRating=new HashMap<String, Integer>(); 140 liLeiRating.put("张学友",5); 141 liLeiRating.put("刘德华",3); 142 userRatings.put("xiMing",xiMingRating); 143 userRatings.put("xiHai",xiHaiRating); 144 userRatings.put("liMei", liMeiRating); 145 userRatings.put("liLei",liLeiRating); 146 147 SlopeOne slopOne=new SlopeOne(userRatings); 148 slopOne.computeDeviation(); 149 List<Map.Entry<String,Double>> top_k=slopOne.predictRating(userRatings.get("liLei"),5); 150 for(Map.Entry<String,Double> item:top_k){ 151 System.out.println(item.getKey()+" "+item.getValue()); 152 } 153 } 154 }
标签:偏差 array hashmap 用户 entry odi 目的 amp 用户数
原文地址:http://www.cnblogs.com/little-horse/p/7251502.html