标签:
Thanks for akashrajkn@gmail.com. This is a good demo to understand candidate elimination algorithm based on this guy‘s good work.
rika@rika-UX303UB$ ./a.out <Input> Number of Features:6 <Input> Number of Attributes[0]:2 <Input> (1):rainy <Input> (2):sunny <Input> Number of Attributes[1]:2 <Input> (1):cold <Input> (2):warm <Input> Number of Attributes[2]:2 <Input> (1):normal <Input> (2):high <Input> Number of Attributes[3]:2 <Input> (1):weak <Input> (2):strong <Input> Number of Attributes[4]:2 <Input> (1):cool <Input> (2):warm <Input> Number of Attributes[5]:2 <Input> (1):same <Input> (2):change Input training data: sunny,warm,normal,Strong,warm,same,yes Specific border < sunny warm normal Strong warm same > Generic border < ? ? ? ? ? ? > sunny,warm,high,weak,warm,same,yes Specific border < sunny warm ? weak warm same > Generic border < ? ? ? ? ? ? > rainy,cold,high,strong,warm,change,no Specific border < sunny warm ? weak warm same > Generic border < sunny ? ? ? ? ? > < ? warm ? ? ? ? > < ? ? ? weak ? ? > < ? ? ? ? ? same > sunny,warm,high,strong,cool,change,yes Specific border < sunny warm ? ? ? ? > Generic border < sunny ? ? ? ? ? > < ? warm ? ? ? ? >
This is complete code:
1 /* 2 Candidate Elimination 3 */ 4 5 #include <cstdio> 6 #include <cstdlib> 7 #include <cctype> 8 #include <vector> 9 #include <string> 10 #include <stack> 11 #include <queue> 12 #include <iterator> 13 #include <set> 14 #include <map> 15 #include <iostream> 16 #include <sstream> 17 #include <deque> 18 #include <cmath> 19 #include <memory.h> 20 #include <algorithm> 21 #include <utility> 22 #include <climits> 23 24 typedef long double ld; 25 typedef long long ll; 26 #define all(c) c.begin(),c.end() 27 28 using namespace std; 29 30 int main() 31 { 32 int numberFeatures; //number of features in the dataset 33 string data; 34 bool consistent; 35 cout << "<Input> Number of Features:"; 36 cin >> numberFeatures; //input the number of features 37 38 vector <int> numberAttributes(numberFeatures); //number of attributes for each feature. 39 map <string, int> instance[numberFeatures+1]; 40 41 for(int i=0; i<numberFeatures; i++) 42 { 43 cout <<"<Input> Number of Attributes[" << i << "]:"; 44 cin >> numberAttributes[i]; 45 46 for(int j=1; j<=numberAttributes[i]; j++) 47 { 48 string temp; 49 cout <<"<Input> (" << j << "):"; 50 cin>> temp; 51 instance[i][temp] = j; //map attribute name with number 52 } 53 } 54 55 instance[numberFeatures]["no"] = 0; 56 instance[numberFeatures]["yes"] = 1; 57 /* 58 for any feature if attribute=0 -> null value; 59 attribute = INT_MAX -> all 60 */ 61 62 vector <int> currdata(numberFeatures+1), tmpData(numberFeatures); 63 64 //set used to represent the generic and specific boundaries. 65 set < vector<int> > specific, generic; 66 67 //initialization 68 for(int i=0; i<numberFeatures; i++) 69 { 70 tmpData[i]=0; 71 } 72 specific.insert(tmpData); 73 74 for(int i=0; i<numberFeatures; i++) 75 { 76 tmpData[i]=INT_MAX; 77 } 78 generic.insert(tmpData); 79 80 81 82 83 cout << endl << "Input training data:" << endl << endl; 84 //input the training data 85 while( getline(cin, data) ) 86 { 87 if(data.size()==0) 88 { 89 continue; 90 } 91 92 string temp; 93 int st = 0, count = 0; 94 int len = data.size(); 95 96 for( int i=0; i<len; i++ ) 97 { 98 if(data[i]==‘,‘|| i==len-1) 99 { 100 if(i==len-1) 101 temp = data.substr(st, i-st+1); 102 else 103 temp = data.substr(st, i-st); 104 105 currdata[count] = instance[count][temp]; 106 count++; 107 st = i+1; 108 } 109 } 110 111 vector<int> m, n, p; 112 113 if( currdata[count-1]==1 ) //if positive example 114 { 115 //remove inconsistent hypotheses from generic border 116 set < vector<int> > tempg, temps; 117 tempg = generic; 118 for(set<vector<int> >::iterator it= generic.begin(); it!=generic.end(); it++) 119 { 120 m = *it; 121 int er =0; 122 for(int i=0; i<numberFeatures; i++) 123 { 124 if(m[i]!=currdata[i] && m[i]!=INT_MAX ) 125 { 126 er = 1; 127 break; 128 } 129 } 130 if(er==1) 131 tempg.erase(m); 132 } 133 generic = tempg; 134 135 m = *(specific.begin()); 136 n = *(specific.begin()); 137 specific.erase(n); 138 139 for(int i=0; i<numberFeatures; i++) 140 { 141 if(m[i]==0) 142 { 143 m[i] = currdata[i]; 144 } 145 else if(m[i]!=currdata[i]) 146 { 147 m[i]=INT_MAX; 148 } 149 } 150 151 specific.insert(m); 152 } 153 else //if negative example 154 { 155 /* 156 if example is inconsistent with spec border, then it is noise 157 */ 158 159 set < vector<int> > tempg, temps; 160 n = *(specific.begin()); 161 162 for(set< vector<int> >::iterator it= generic.begin(); it!=generic.end(); it++) 163 { 164 m = *it; 165 166 int er=0; 167 //check if given example is consistent with the present hypothesis 168 for(int i=0; i<numberFeatures; i++) 169 { 170 if( m[i]!=INT_MAX && m[i]!=currdata[i]) 171 { 172 er=1; //curr hyp is consistent 173 break; 174 } 175 } 176 177 if(er==1)//if hyp is consistent with the example 178 { 179 tempg.insert(m); 180 } 181 else//hyp is not consistent with the example 182 { 183 vector<int> temphyp; 184 for(int i=0; i<numberFeatures; i++) 185 { 186 if( m[i]==INT_MAX ) 187 { 188 temphyp = m; 189 for(int j=1; j<=numberAttributes[i]; j++)//values are 1-based 190 { 191 if(j==currdata[i]) 192 continue; 193 else 194 { 195 temphyp[i] = j; 196 197 //check if temphyp is more general than specifc hyp. 198 consistent = true; 199 for(int k=0; k<numberFeatures; k++) 200 { 201 if(temphyp[k]!=INT_MAX && temphyp[k]!=n[k] && n[k]!=0) 202 { 203 consistent = false; 204 break; 205 } 206 } 207 if(consistent) 208 tempg.insert(temphyp); // new hypo is consistent 209 } 210 } 211 } 212 } 213 214 } 215 216 } 217 218 generic.clear(); 219 bool mGen; 220 set<vector<int> > tempgg; 221 222 //remove from generic any hyp that is more specific than another hyp in generic 223 for(set< vector<int> >::iterator it= tempg.begin(); it!=tempg.end(); it++) 224 { 225 m = *it; 226 227 for( set< vector<int> >::iterator jt= tempg.begin(); jt!=tempg.end(); jt++ ) 228 { 229 if(it==jt) 230 continue; 231 232 p = *jt; 233 consistent = true; 234 for(int k=0; k<numberFeatures; k++) 235 { 236 if(m[k]!=INT_MAX && m[k]!=p[k]) 237 { 238 consistent = false; 239 break; 240 } 241 } 242 if(consistent) 243 tempgg.insert(p); 244 } 245 } 246 247 // generic = set_difference(tempg, tempgg ); 248 for( set< vector<int> >::iterator it= tempg.begin(); it!=tempg.end(); it++ ) 249 { 250 m = *it; 251 252 if(tempgg.find(m)==tempgg.end()) 253 { 254 generic.insert(m); 255 } 256 } 257 } 258 259 260 /* 261 Printing Specific and General borders 262 */ 263 vector<int> abc; 264 265 cout<<"\n\tSpecific border"<<endl; 266 abc = *(specific.begin()); 267 cout<<"\t< "; 268 for(int i=0; i<numberFeatures; i++) 269 { 270 if( abc[i]==INT_MAX ) 271 cout<<"?"<<" "; 272 else 273 { 274 for(map<string,int>::iterator jt = instance[i].begin(); jt!=instance[i].end();jt++) 275 { 276 if((*jt).second == abc[i]) 277 cout<<(*jt).first<<" "; 278 } 279 } 280 } 281 cout<<">\n"; 282 283 cout<<"\n\tGeneric border"<<endl; 284 for(set< vector<int> >::iterator it= generic.begin(); it!=generic.end(); it++) 285 { 286 abc = *it; 287 cout<<"\t< "; 288 for(int i=0; i< numberFeatures; i++) 289 { 290 if( abc[i]==INT_MAX ) 291 cout<<"?"<<" "; 292 else 293 { 294 for(map<string,int>::iterator jt = instance[i].begin(); jt!=instance[i].end();jt++) 295 { 296 if((*jt).second == abc[i]) 297 cout<<(*jt).first<<" "; 298 } 299 } 300 } 301 cout<<">\n"; 302 } 303 cout<<endl; 304 305 } 306 307 cout<<"\ncandidate elem done! :)\n"; 308 309 return 0; 310 }
From 血糯米Otomii, which will help you to understand more.
样本集:
把S集合初始化为H中极大特殊假设:
把G集合初始化为H中极大一般假设:
首先加载第一条和第二条样本:
这个过程是特殊向一般的转变,这个过程非常地类似FIND-S算法
接着我们处理第三条样本:
让我们回到数据
我们会发现,Sky,AirTemp和Foreast和以前的数据不一致,我们可以怀疑是这三个数据导致最后结果的变化。
所以,我们就针对这3个数据进行一次特殊化:
接着,我们输入第四条样本:
在处理第四条样本的时候,我们先对于S集合进行一般化:
然后,为了让G集合覆盖S集合,我们需要剔除,过程为
在处理完了这四个样本后,我们就可以获取所有的假设:
当前为6个假设,当我们可以获取到更多的训练集的时候,我们可以划出更小的设计空间。
当我使用这6个假设对测试集进行测试的时候,我们可以使用这6个假设同时对测试样本进行检测,每个假设都有自己的权重,如果最后的结果超过80%,那么就测试通过。
本算法弊端:
标签:
原文地址:http://www.cnblogs.com/jesse123/p/5571374.html