标签:
基于用户的协同过滤算法:
#encoding: utf-8 from Similarity import Person from Sort import select_sort file=open(‘user_bookmark‘,‘r‘) filew=open(‘user_bookRecommend‘,‘w‘) #加载训练集 trainSet={} while True: line=file.readline().strip() if not line: break userId,itemId=line.split("::") trainSet.setdefault(userId,[]) trainSet[userId].append(itemId) # 计算每个用户和其他所有用户的相似度,排序,获得相似度最高的前N个用户, # 在计算每个用户的相似度*物品的偏好值(这里没有评分就把偏好值设置为1) for v in trainSet.keys(): v2u={} K={} for u in trainSet.keys(): if u!=v: sim=Person(trainSet[v],trainSet[u]) v2u.setdefault(u,sim) keys,values=select_sort(v2u) s=v for i in range(len(values)-3,len(values)): s=s+"::"+keys[i]+" "+str(values[i]) K.setdefault(keys[i],values[i]) rank={} rki=1 interacted_items=trainSet[v] for k,simk in K.items(): for i in trainSet[k]: if i in interacted_items: continue rvi=rki*simk rank.setdefault(i,rvi) s1=v _keys,_values=select_sort(rank) for i in range(len(rank)-6,len(rank)): #s1=s1+_keys[i]+" "+str(_values[i])+"::" s1=s1+"::"+_keys[i] print s1 filew.write(s1+‘\n‘) from math import sqrt def Person(item_a,item_b): if len(item_a)==0 or len(item_b)==0: return 0; else: sum=0 for i in range(len(item_a)): for j in range(len(item_b)): if item_a[i]==item_b[j]: sum=sum+1 if sum==0: return 0 else: sim=sum/sqrt(len(item_a)*len(item_b)) return sim
标签:
原文地址:http://www.cnblogs.com/zzblee/p/4189279.html