码迷,mamicode.com
首页 > 编程语言 > 详细

实现Apriori算法(python)

时间:2018-11-18 15:00:32      阅读:185      评论:0      收藏:0      [点我收藏+]

标签:can   span   算法   支持度   not   ase   code   int   python   

  1 # coding: utf-8
  2 
  3 # 利用python实现apriori算法
  4 
  5 # In[1]:
  6 
  7 
  8 #导入需要的库
  9 from numpy import *
 10 
 11 
 12 # In[2]:
 13 
 14 
 15 def loadDataSet():
 16     return [[1,3,4],[2,3,5],[1,2,3,5],[2,5]]
 17 
 18 
 19 # In[3]:
 20 
 21 
 22 def createC1(dataSet):
 23     C1=[]
 24     for transaction in dataSet:
 25         for item in transaction:
 26             if not [item] in C1:
 27                 C1.append([item])
 28     C1.sort()
 29     return map(frozenset,C1)
 30 
 31 
 32 # In[4]:
 33 
 34 
 35 #计算Ck在数据集D中的支持度,并返回支持度大于minSupport的数据集
 36 def scanD(D,Ck,minSupport):
 37     ssCnt={}
 38     for tid in D:
 39         for can in Ck:
 40             if can.issubset(tid):
 41                 if can not in ssCnt.keys():
 42                     ssCnt[can]=1
 43                 else :
 44                     ssCnt[can]+=1
 45     numItems=float(len(D))
 46     retList=[]
 47     supportData={}
 48     for key in ssCnt:
 49         support=ssCnt[key]/numItems
 50         if support>= minSupport:
 51             retList.insert(0,key)
 52         supportData[key]=support
 53     return retList,supportData
 54 
 55 
 56 # In[15]:
 57 
 58 
 59 def aprioriGen(Lk,k):
 60     retList=[]
 61     lenLk=len(Lk)
 62     for i in range(lenLk):
 63         for j in range(i+1,lenLk):
 64             L1=list(Lk[i])[:k-2]
 65             L2=list(Lk[j])[:k-2]
 66             L1.sort()
 67             L2.sort()
 68             if L1==L2:
 69                 retList.append(Lk[i] | Lk[j])
 70     return retList
 71         
 72 
 73 
 74 # In[14]:
 75 
 76 
 77 def apriori(dataSet, minSupport=0.5):
 78     C1=createC1(dataSet)
 79     D=list(map(set,dataSet))
 80     print(D:,D)
 81     L1,supportData= scanD(D,C1,minSupport)
 82     L=[L1]
 83     k=2
 84     while (len(L[k-2])>0):
 85         Ck=aprioriGen(L[k-2], k)
 86         Lk,supK= scanD(D,Ck,minSupport)
 87         supportData.update(supK)
 88         if len(Lk)==0:
 89             break
 90         L.append(Lk)
 91         k+=1
 92     return L,supportData
 93 
 94 
 95 # In[19]:
 96 
 97 
 98 def calConf(freqSet,H,supportData,brl,minConf=0.7):
 99     prunedH=[]
100     for conseq in H:
101         conf=supportData[freqSet]/supportData[freqSet-conseq]
102         if conf >= minConf:
103             print(freqSet-conseq, -->,conseq,conf,conf)
104             brl.append((freqSet-conseq,conseq,conf))
105             prunedH.append(conseq)
106     return prunedH
107 
108 
109 # In[21]:
110 
111 
112 def rulesFromConseq(freqSet,H,supportData,brl,minConf=0.7):
113     m=len(H[0])
114     if(len(freqSet)>(m+1)):
115         Hmpl=aprioriGen(H,m+1)
116         Hmpl=calConf(freqSet,Hmpl,supportData,brl,minConf)
117         print(Hmpl=,Hmpl)
118         print(len(Hmpl)=,len(Hmpl),len(freqSet)=,len(freqSet))
119         if(len(Hmpl)>1):
120             rulesFromConseq(freqSet,Hmpl,supportData,brl,minConf)
121 
122 
123 # In[9]:
124 
125 
126 def generateRules(L,supportData,minConf=0.7):
127     bigRuleList=[]
128     for i in range(1,len(L)):
129         for freqSet in L[i]:
130             H1=[frozenset([item]) for item in freqSet]
131             if(i>1):
132                 rulesFromConseq(freqSet,H1,supportData,bigRuleList,minConf)
133             else:
134                 calConf(freqSet,H1,supportData,bigRuleList,minConf)
135     return bigRuleList
136 
137 
138 # In[10]:
139 
140 
141 def testApriori():
142     dataSet=loadDataSet()
143     print(dataSet:,dataSet)
144     L1,supportData1=apriori(dataSet,minSupport=0.7)
145     print(L(0.7):,L1)
146     print(supportData(0.7):,supportData1)
147     print(------------------------------------------)
148     L2,supportData2=apriori(dataSet,minSupport=0.5)
149     print(L(0.5):,L2)
150     print(supportData(0.5:).supportData2)
151     print(------------------------------------------)
152 
153 
154 # In[11]:
155 
156 
157 def testGenerateRules():
158     dataSet=loadDataSet()
159     L1,supportData1=apriori(dataSet,minSupport=0.2)
160     print(L(0.2):,L1)
161     print(minSupport(0.2):,supportData1)
162     rules=generateRules(L1,supportData1,minConf=1.1)
163     print(Rules:,rules)
164 
165 
166 # In[12]:
167 
168 
169 def main():
170     testApriori()
171     testGenerateRules()
172 
173 
174 # In[22]:
175 
176 
177 if __name__=="__main__":
178     main()

参考:“机器学习实战-ApachCN”

实现Apriori算法(python)

标签:can   span   算法   支持度   not   ase   code   int   python   

原文地址:https://www.cnblogs.com/share-sjb/p/9977803.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!