所有DNA由一系列缩写为A,C,G和 T 的核苷酸组成,例如:“ACGAATTCCG”。在研究DNA时,识别DNA中的重复序列有时非常有用。
编写一个函数来查找DNA分子中所有出现超多一次的10个字母长的序列(子串)。
详见:https://leetcode.com/problems/repeated-dna-sequences/description/
class Solution { public: vector<string> findRepeatedDnaSequences(string s) { vector<string> res; if (s.size() <= 10) { return res; } int mask = 0x7ffffff; unordered_map<int, int> m; int cur = 0, i = 0; while (i < 9) { cur = (cur << 3) | (s[i++] & 7); } while (i < s.size()) { cur = ((cur & mask) << 3) | (s[i++] & 7); if (m.find(cur) != m.end()) { if (m[cur] == 1) { res.push_back(s.substr(i - 10, 10)); } ++m[cur]; } else { m[cur] = 1; } } return res; } };
参考:https://www.cnblogs.com/grandyang/p/4284205.html