标签:
All DNA is composed of a series of nucleotides abbreviated as A, C, G, and T, for example: "ACGAATTCCG". When studying DNA, it is sometimes useful to identify repeated sequences within the DNA.
Write a function to find all the 10-letter-long sequences (substrings) that occur more than once in a DNA molecule.
For example,
Given s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT", Return: ["AAAAACCCCC", "CCCCCAAAAA"].
#include<iostream> #include<vector> #include<map> #include<string> using namespace std; //用map来做,结果: Memory Limit Exceeded vector<string> findRepeatedDnaSequences(string s) { vector<string> ResultString; map<string, int> MapStringCount; if (s.size()<=10) return ResultString; for (int i = 0; i != s.size() - 10;++i){ if (!MapStringCount.count(s.substr(i, 10))) MapStringCount.insert(make_pair(s.substr(i, 10), 1)); else { if (MapStringCount[s.substr(i, 10)] == 1) ResultString.push_back(s.substr(i, 10)); MapStringCount[s.substr(i, 10)]++; } } return ResultString; } //改进hashkey //利用位计算来实现hashkey int myhashkey(string s) { int n = 0; for (int i = 0; i != s.size();++i) { n <<= 2; if (s[i]=='C') n += 1; else if (s[i] == 'G') n += 2; else if (s[i] == 'T') n += 3; } return n; } vector<string> findRepeatedDnaSequences(string s) { vector<string> ResultString; map<int, int> MapStringCount; if (s.size() <= 10) return ResultString; for (int i = 0; i <= s.size() - 10; ++i){ if (!MapStringCount.count(myhashkey(s.substr(i, 10)))) MapStringCount.insert(make_pair(myhashkey(s.substr(i, 10)), 1)); else { if (MapStringCount[myhashkey(s.substr(i, 10))] == 1) ResultString.push_back(s.substr(i, 10)); MapStringCount[myhashkey(s.substr(i, 10))]++; } } return ResultString; }
标签:
原文地址:http://blog.csdn.net/li_chihang/article/details/44024753