标签:
Time Limit: 5000MS | Memory Limit: 65536K | |
Total Submissions: 10800 | Accepted: 2967 |
Description
You may have wondered why most extraterrestrial life forms resemble humans, differing by superficial traits such as height, colour, wrinkles, ears, eyebrows and the like. A few bear no human resemblance; these typically have geometric or amorphous shapes like cubes, oil slicks or clouds of dust.
The answer is given in the 146th episode of Star Trek - The Next Generation, titled The Chase. It turns out that in the vast majority of the quadrant‘s life forms ended up with a large fragment of common DNA.
Given the DNA sequences of several life forms represented as strings of letters, you are to find the longest substring that is shared by more than half of them.
Input
Standard input contains several test cases. Each test case begins with 1 ≤ n ≤ 100, the number of life forms. n lines follow; each contains a string of lower case letters representing the DNA sequence of a life form. Each DNA sequence contains at least one and not more than 1000 letters. A line containing 0 follows the last test case.
Output
For each test case, output the longest string or strings shared by more than half of the life forms. If there are many, output all of them in alphabetical order. If there is no solution with at least one letter, output "?". Leave an empty line between test cases.
Sample Input
3 abcdefg bcdefgh cdefghi 3 xxx yyy zzz 0
Sample Output
bcdefg cdefgh ?
题意: n个字符串, 求大于n/2个字符串的最长子串。 如果有多个按字典序输出。
大致思路:首先把所有字符串用不相同的一个字符隔开(用同一个字符隔开wa了好久), 这里我是用数字来隔开的。
然后依次求sa,lcp。 我们可以二分答案的长度, 对于长度x,我们可以把 后缀进行分组(lcp[i] < x时 隔开), 然后对于每一组判断有多少个字符串出现,如果大于n/2说明符合。。对于字典序就不用排序了,,因为我们就是按照sa数组来遍历lcp的。。所以直接得到的答案就是字典序从小到大。
1 #include <set> 2 #include <map> 3 #include <cmath> 4 #include <ctime> 5 #include <queue> 6 #include <stack> 7 #include <cstdio> 8 #include <string> 9 #include <vector> 10 #include <cstdlib> 11 #include <cstring> 12 #include <iostream> 13 #include <algorithm> 14 using namespace std; 15 typedef unsigned long long ull; 16 typedef long long ll; 17 const int inf = 0x3f3f3f3f; 18 const double eps = 1e-8; 19 const int M = 2e6+10; 20 int s[M]; 21 int sa[M], tmp[M], rank[M], lcp[M], k, len; 22 bool cmp(int i, int j) 23 { 24 if (rank[i] != rank[j]) 25 return rank[i] < rank[j]; 26 else 27 { 28 int x = i+k <= len ? rank[i+k] : -1; 29 int y = j+k <= len ? rank[j+k] : -1; 30 return x < y; 31 } 32 } 33 void build_sa() 34 { 35 for (int i = 0; i <= len; i++) 36 { 37 sa[i] = i; 38 rank[i] = i < len ? s[i] : -1; 39 } 40 for (k = 1; k <= len; k *= 2) 41 { 42 sort (sa, sa+len+1, cmp); 43 tmp[sa[0]] = 0; 44 for (int i = 1; i <= len; i++) 45 { 46 tmp[sa[i]] = tmp[sa[i-1]] + (cmp(sa[i-1], sa[i]) ? 1 : 0); 47 } 48 for (int i = 0; i <= len; i++) 49 { 50 rank[i] = tmp[i]; 51 } 52 } 53 } 54 void Get_Lcp() 55 { 56 for (int i = 0; i < len; i++) 57 { 58 rank[sa[i]] = i; 59 } 60 int h = 0; 61 lcp[0] = 0; 62 for (int i = 0; i < len; i++) 63 { 64 int j = sa[rank[i]-1]; 65 if (h > 0) 66 h--; 67 for (; i+h < len && j+h < len; h++) 68 if (s[i+h] != s[j+h]) 69 break; 70 lcp[rank[i]] = h; 71 } 72 } 73 int vis[110], pos[M]; 74 int ans[M], tot; 75 int Stack[M], top; 76 bool solve (int x, int n) 77 { 78 int minv = inf; 79 int cnt = 0; 80 bool flag = false; 81 for (int i = 0; i <= len+1; i++) 82 { 83 if (lcp[i] < x) 84 { 85 86 if ( cnt+ (!vis[pos[sa[i-1]]]) > n/2 && (minv != inf && minv >= x)) 87 { 88 if (!flag ) 89 tot = 0; 90 flag = true; 91 ans[tot++] = sa[i-1]; 92 } 93 minv = inf; 94 cnt = 0; 95 memset(vis, 0, sizeof (vis)); 96 continue; 97 } 98 if ( vis[pos[sa[i-1]]]==0) 99 { 100 cnt++; 101 102 } 103 vis[pos[sa[i-1]]] = 1; 104 minv = min(minv, lcp[i]); 105 106 } 107 return tot > 0 && flag; 108 } 109 int string_len[110], c1; 110 void init() 111 { 112 c1 = tot = 0; 113 memset(vis, 0, sizeof (vis)); 114 memset(string_len, 0, sizeof (string_len)); 115 } 116 char cacaca[1100]; 117 int main() 118 { 119 #ifndef ONLINE_JUDGE 120 freopen("in.txt","r",stdin); 121 // freopen("wa.txt","w",stdout); 122 #endif 123 int n, cas = 1; 124 while ( scanf ("%d", &n), n) 125 { 126 if (cas != 1) 127 printf("\n"); 128 cas++; 129 init(); 130 len = 0; 131 int del = 1; 132 for (int i = 0; i < n; i++) 133 { 134 scanf ("%s", cacaca); 135 int sub_len = strlen(cacaca); 136 for (int j = 0; j < sub_len; j++) 137 { 138 s[len++] = cacaca[j]; 139 } 140 s[len++] = M+del; 141 del++; 142 string_len[c1] = sub_len + string_len[c1-1]; 143 if (c1) 144 string_len[c1]++; 145 c1++; 146 } 147 if (n == 1) 148 { 149 for (int i = 0; i < len-1; i++) 150 { 151 printf("%c", s[i]); 152 } 153 continue; 154 } 155 for (int i = 0, j = 0; i < len; i++) 156 { 157 if (i >= string_len[j]) 158 { 159 pos[i] = -1; 160 j++; 161 continue; 162 } 163 pos[i] = j+1; 164 } 165 build_sa(); 166 Get_Lcp(); 167 168 int ua = 0, ub = M; 169 while (ua + 1 < ub) 170 { 171 int mid = (ua + ub) >> 1; 172 if (mid&&solve(mid, n) == true) 173 { 174 175 ua = mid; 176 } 177 else 178 ub = mid; 179 } 180 if (tot == 0) 181 printf("?\n"); 182 else 183 { 184 if (ua == 0) 185 { 186 printf("?\n"); 187 continue; 188 } 189 for (int i = 0; i < tot; i++) 190 { 191 for (int j = ans[i]; j < ans[i]+ua; j++) 192 { 193 printf("%c", s[j]); 194 } 195 printf("\n"); 196 } 197 } 198 } 199 return 0; 200 }
POJ3294--Life Forms 后缀数组+二分答案 大于k个字符串的最长公共子串
标签:
原文地址:http://www.cnblogs.com/oneshot/p/4448746.html