标签:can span 算法 支持度 not ase code int python
1 # coding: utf-8 2 3 # 利用python实现apriori算法 4 5 # In[1]: 6 7 8 #导入需要的库 9 from numpy import * 10 11 12 # In[2]: 13 14 15 def loadDataSet(): 16 return [[1,3,4],[2,3,5],[1,2,3,5],[2,5]] 17 18 19 # In[3]: 20 21 22 def createC1(dataSet): 23 C1=[] 24 for transaction in dataSet: 25 for item in transaction: 26 if not [item] in C1: 27 C1.append([item]) 28 C1.sort() 29 return map(frozenset,C1) 30 31 32 # In[4]: 33 34 35 #计算Ck在数据集D中的支持度,并返回支持度大于minSupport的数据集 36 def scanD(D,Ck,minSupport): 37 ssCnt={} 38 for tid in D: 39 for can in Ck: 40 if can.issubset(tid): 41 if can not in ssCnt.keys(): 42 ssCnt[can]=1 43 else : 44 ssCnt[can]+=1 45 numItems=float(len(D)) 46 retList=[] 47 supportData={} 48 for key in ssCnt: 49 support=ssCnt[key]/numItems 50 if support>= minSupport: 51 retList.insert(0,key) 52 supportData[key]=support 53 return retList,supportData 54 55 56 # In[15]: 57 58 59 def aprioriGen(Lk,k): 60 retList=[] 61 lenLk=len(Lk) 62 for i in range(lenLk): 63 for j in range(i+1,lenLk): 64 L1=list(Lk[i])[:k-2] 65 L2=list(Lk[j])[:k-2] 66 L1.sort() 67 L2.sort() 68 if L1==L2: 69 retList.append(Lk[i] | Lk[j]) 70 return retList 71 72 73 74 # In[14]: 75 76 77 def apriori(dataSet, minSupport=0.5): 78 C1=createC1(dataSet) 79 D=list(map(set,dataSet)) 80 print(‘D:‘,D) 81 L1,supportData= scanD(D,C1,minSupport) 82 L=[L1] 83 k=2 84 while (len(L[k-2])>0): 85 Ck=aprioriGen(L[k-2], k) 86 Lk,supK= scanD(D,Ck,minSupport) 87 supportData.update(supK) 88 if len(Lk)==0: 89 break 90 L.append(Lk) 91 k+=1 92 return L,supportData 93 94 95 # In[19]: 96 97 98 def calConf(freqSet,H,supportData,brl,minConf=0.7): 99 prunedH=[] 100 for conseq in H: 101 conf=supportData[freqSet]/supportData[freqSet-conseq] 102 if conf >= minConf: 103 print(freqSet-conseq, ‘-->‘,conseq,‘conf‘,conf) 104 brl.append((freqSet-conseq,conseq,conf)) 105 prunedH.append(conseq) 106 return prunedH 107 108 109 # In[21]: 110 111 112 def rulesFromConseq(freqSet,H,supportData,brl,minConf=0.7): 113 m=len(H[0]) 114 if(len(freqSet)>(m+1)): 115 Hmpl=aprioriGen(H,m+1) 116 Hmpl=calConf(freqSet,Hmpl,supportData,brl,minConf) 117 print(‘Hmpl=‘,Hmpl) 118 print(‘len(Hmpl)=‘,len(Hmpl),‘len(freqSet)=‘,len(freqSet)) 119 if(len(Hmpl)>1): 120 rulesFromConseq(freqSet,Hmpl,supportData,brl,minConf) 121 122 123 # In[9]: 124 125 126 def generateRules(L,supportData,minConf=0.7): 127 bigRuleList=[] 128 for i in range(1,len(L)): 129 for freqSet in L[i]: 130 H1=[frozenset([item]) for item in freqSet] 131 if(i>1): 132 rulesFromConseq(freqSet,H1,supportData,bigRuleList,minConf) 133 else: 134 calConf(freqSet,H1,supportData,bigRuleList,minConf) 135 return bigRuleList 136 137 138 # In[10]: 139 140 141 def testApriori(): 142 dataSet=loadDataSet() 143 print(‘dataSet:‘,dataSet) 144 L1,supportData1=apriori(dataSet,minSupport=0.7) 145 print(‘L(0.7):‘,L1) 146 print(‘supportData(0.7):‘,supportData1) 147 print(‘------------------------------------------‘) 148 L2,supportData2=apriori(dataSet,minSupport=0.5) 149 print(‘L(0.5):‘,L2) 150 print(‘supportData(0.5:).supportData2‘) 151 print(‘------------------------------------------‘) 152 153 154 # In[11]: 155 156 157 def testGenerateRules(): 158 dataSet=loadDataSet() 159 L1,supportData1=apriori(dataSet,minSupport=0.2) 160 print(‘L(0.2):‘,L1) 161 print(‘minSupport(0.2):‘,supportData1) 162 rules=generateRules(L1,supportData1,minConf=1.1) 163 print(‘Rules:‘,rules) 164 165 166 # In[12]: 167 168 169 def main(): 170 testApriori() 171 testGenerateRules() 172 173 174 # In[22]: 175 176 177 if __name__=="__main__": 178 main()
参考:“机器学习实战-ApachCN”
标签:can span 算法 支持度 not ase code int python
原文地址:https://www.cnblogs.com/share-sjb/p/9977803.html