标签:style blog color 使用 ar for 2014 art div
给定一个query和一个text,均由小写字母组成。要求在text中找出以同样的顺序连续出现在query中的最长连续字母序列的长度。例如, query为“acbac”,text为“acaccbabb”,那么text中的“cba”为最长的连续出现在query中的字母序列,因此,返回结果应该为其长度3。
bool RabinKarpMatch(const string& T, const string& P) { static const int d = 128; static const int q = 6999997; int n = T.length(); int m = P.length(); int h = 1; for (int i = 1; i < m; i++) h = (h*d) % q; //h=d^(m-1) mode q int p = 0, t = 0; for (int i = 0; i < m; ++i) //processing { p = ((p*d) + P[i]) % q; t = ((t*d) + T[i]) % q; } for (int s = 0; s < n - m + 1; ++s) //s=[0...n-m+1-1] { if (t== p) { int i = 0; for (i; i < m; ++i) { if (P[i] != T[s + i]) break; } if (i == m) return true; } t = (d*(t - T[s] * h% q+q) + T[s + m]) % q; } return false; }
size_t GetLargestCommomSubLen(const string& text, const string& query) { size_t query_len = query.length(); size_t text_len = text.length(); assert(text_len >= query_len); if (text.empty() || query.empty()) return 0; size_t max_len = 0; for (size_t start = 0; start < query_len; ++start) { size_t size = query_len - start; for (size_t len = 1; len <= size; ++len) { if (RabinKarpMatch(text, query.substr(start, len))) { if (len>max_len) max_len = len; } } } return max_len; }
int GetLongestCommSubstrLen(const string& text, const string& query) { int text_len = text.length(); int query_len = query.length(); if (text_len == 0 || 0 == query_len) return 0; vector<vector<int>> L(text_len, vector<int>(query_len, 0)); int text_start = -1; int query_start = -1; for (int j = 0; j < query_len; ++j) { L[0][j] = (text[0] == query[j] ? 1 : 0); } for (int i = 1; i < text_len; ++i) { L[i][0] = (text[i] == query[0] ? 1 : 0); for (int j = 1; j < query_len; ++j) { if (text[i] == query[j]) { L[i][j] = L[i - 1][j - 1] + 1; } } } int longest = 0; for (int i = 0; i < text_len; ++i) { for (int j = 0; j < query_len; ++j) { if (longest < L[i][j]) { longest = L[i][j]; text_start = i - longest + 1; query_start = j - longest + 1; } } } return longest; }这种方法采取了空间换时间的策略,尽管如此,在空间上,还可以优化,在空间的使用上并没有想象的那么恐怖。比如在计算斐波拉契数列时,其实求后一项只与前面两项相关,多余的信息存储造成了空间上的浪费,在这里同样也是如此,看公式L[ i,j ]=L[ i-1,j-1 ] + 1,亦知L的计算也只与前一行相关,而前一行的值是通过计算已知的了,于是只要两行存储空间即可,每当计算新的一行的,把旧行上升到第0行即可,swap一下即可。
int GetLongestCommSubstrLen(const string& text, const string& query) { int text_len = text.length(); int query_len = query.length(); if (text_len == 0 || 0 == query_len) return 0; vector<vector<int>> L(2, vector<int>(query_len, 0)); int text_start = -1; int query_start = -1; int longest = 0; for (int j = 0; j < query_len; ++j) { if (text[0] == query[j]) { L[0][j] = 1; } } for (int i = 1; i < text_len; ++i) { L[1][0] = (text[i] == query[0] ? 1 : 0); for (int j = 1; j < query_len; ++j) { if (text[i] == query[j]) { L[1][j] = L[0][j - 1] + 1; if (longest < L[1][j]) longest = L[1][j]; } } L[1].swap(L[0]); } return longest; }
最长公共子串问题(方法一:暴力+RK匹配,方法二:DP+空间优化)
标签:style blog color 使用 ar for 2014 art div
原文地址:http://blog.csdn.net/u012333003/article/details/39082981