标签:
字符串匹配,实现c++ strstr()函数
1 int strStr(string haystack, string needle) { 2 3 int i, hSize = haystack.size(), nSize = needle.size(); 4 if(hSize < nSize) 5 return -1; 6 if(nSize == 0) 7 return 0; 8 for(i = 0; i <= hSize - nSize && haystack.substr(i, nSize) != needle; ++i); 9 10 return i <= hSize - nSize ? i : -1; 11 }
具体说明参考维基百科:https://en.wikipedia.org/wiki/Rabin–Karp_algorithm
1 char hash(const string& str) 2 { 3 char all = 0; 4 for(auto c : str) 5 all ^= c; 6 return all; 7 } 8 9 //选定一个hash函数,对字符串hash,hash值不同一定是不同字符串 10 //由于hash值可能有冲突 所以hash值相同的字符并不一定相同 需要逐个字符再比较 11 //hash函数可以自己写,也可以用std::hash<string> 12 int strStr(string haystack, string needle) { 13 14 int i, hSize = haystack.size(), nSize = needle.size(); 15 if(hSize < nSize) 16 return -1; 17 if(nSize == 0) 18 return 0; 19 //或者使用std::hash 20 //std::hash<string> hash; 21 char target = hash(needle); 22 for(i = 0; i <= hSize - nSize; ++i) 23 { 24 if(hash(haystack.substr(i,nSize)) == target && haystack.substr(i,nSize) == needle) 25 break; 26 } 27 28 return i <= hSize - nSize ? i : -1; 29 }
具体说明参考维基百科:https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm
1 vector<int> buildNextArray(const string& s) 2 { 3 vector<int> next(s.size()); 4 int i = 2, j = 0; 5 next[0] = -1; 6 if(s.size() > 1) 7 next[1] = 0; 8 while(i < s.size()) 9 { 10 if(s[i-1] == s[j]) 11 next[i++] = ++j; 12 else if(j > 0) 13 j = next[j]; 14 else 15 next[i++] = 0; 16 } 17 return next; 18 } 19 20 int strStr(string haystack, string needle) { 21 22 int start = 0, i = 0, hSize = haystack.size(), nSize = needle.size(); 23 if(hSize < nSize) 24 return -1; 25 if(nSize == 0) 26 return 0; 27 //kmp算法 28 vector<int> next = buildNextArray(needle); 29 while(start <= hSize - nSize) 30 { 31 if(haystack[start + i] == needle[i]) 32 { 33 if(++i == nSize) 34 return start; 35 } 36 else 37 { 38 start = start + i - next[i]; 39 i = i > 0 ? next[i] : 0; 40 } 41 } 42 43 return -1; 44 }
标签:
原文地址:http://www.cnblogs.com/zhangbaochong/p/5759294.html