http://poj.org/problem?id=3294
Time Limit: 5000MS | Memory Limit: 65536K | |
Total Submissions: 9931 | Accepted: 2739 |
Description
You may have wondered why most extraterrestrial life forms resemble humans, differing by superficial traits such as height, colour, wrinkles, ears, eyebrows and the like. A few bear no human resemblance; these typically have geometric or amorphous shapes like cubes, oil slicks or clouds of dust.
The answer is given in the 146th episode of Star Trek - The Next Generation, titled The Chase. It turns out that in the vast majority of the quadrant‘s life forms ended up with a large fragment of common DNA.
Given the DNA sequences of several life forms represented as strings of letters, you are to find the longest substring that is shared by more than half of them.
Input
Standard input contains several test cases. Each test case begins with 1 ≤ n ≤ 100, the number of life forms. n lines follow; each contains a string of lower case letters representing the DNA sequence of a life form. Each DNA sequence contains at least one and not more than 1000 letters. A line containing 0 follows the last test case.
Output
For each test case, output the longest string or strings shared by more than half of the life forms. If there are many, output all of them in alphabetical order. If there is no solution with at least one letter, output "?". Leave an empty line between test cases.
Sample Input
3 abcdefg bcdefgh cdefghi 3 xxx yyy zzz 0
Sample Output
bcdefg cdefgh ?
Source
题意:求一个长度最大的字符串,使其在超过一半的给定字符串中出现,存在多个按字典序输出。
思路:用不同的分隔符把给定字符串拼起来。然后我们二分答案,扫描height数组,判断长度为p的串是否连续超过一半串中连续出现,注意这里并不是连续n/2个连续height数组里的,而是属于不同给定字符串的n/2个,所以可以开始预处理每一个后缀属于哪个字符串。最后输出和判断一样,按照字典序由于是扫描height数组所以保证了。
RE了好多发,发现是这个问题,在分隔字符串时,s[n]=‘z‘+i,a[n]=s[n]-‘a‘+i+1;这样写会出问题233,a[n]赋‘z‘-‘a‘+1+i就好了。
poj用set判断会超时。。
/** * @author neko01 */ //#pragma comment(linker, "/STACK:102400000,102400000") #include <cstdio> #include <cstring> #include <string.h> #include <iostream> #include <algorithm> #include <queue> #include <vector> #include <cmath> #include <set> #include <map> using namespace std; typedef long long LL; #define min3(a,b,c) min(a,min(b,c)) #define max3(a,b,c) max(a,max(b,c)) #define pb push_back #define mp(a,b) make_pair(a,b) #define clr(a) memset(a,0,sizeof a) #define clr1(a) memset(a,-1,sizeof a) #define dbg(a) printf("%d\n",a) typedef pair<int,int> pp; const double eps=1e-9; const double pi=acos(-1.0); const int N=101005; int sa[N]; //排第几的是哪个后缀 //sa[1~n]为有效值,sa[0]必定为n是无效值 int rank[N]; //rank后缀i排第几 //rank[0~n-1]为有效值,rank[n]必定为0无效值 int height[N]; //sa[i]和sa[i-1]的最长公共前缀 //height[2~n]为有效值 int t1[N],t2[N],c[N]; void build_sa(int s[],int n,int m) { int *x=t1,*y=t2; //第一轮计数排序 for(int i=0;i<m;i++) c[i]=0; for(int i=0;i<n;i++) c[x[i]=s[i]]++; for(int i=1;i<m;i++) c[i]+=c[i-1]; for(int i=n-1;i>=0;i--) sa[--c[x[i]]]=i; for(int j=1;j<=n;j<<=1) { int p=0; //直接利用sa数组排序第二关键字 for(int i=n-j;i<n;i++) y[p++]=i; for(int i=0;i<n;i++) if(sa[i]>=j) y[p++]=sa[i]-j; //计数排序第一关键字 for(int i=0;i<m;i++) c[i]=0; for(int i=0;i<n;i++) c[x[y[i]]]++; for(int i=1;i<m;i++) c[i]+=c[i-1]; for(int i=n-1;i>=0;i--) sa[--c[x[y[i]]]]=y[i]; //根据sa和x数组计算新的x数组 swap(x,y); p=1,x[sa[0]]=0; for(int i=1;i<n;i++) x[sa[i]]=y[sa[i-1]]==y[sa[i]]&&y[sa[i-1]+j]==y[sa[i]+j]?p-1:p++; if(p>=n) break; m=p; } } void getheight(int s[],int n) { int k=0; for(int i=0;i<=n;i++) rank[sa[i]]=i; for(int i=0;i<n;i++) { if(k) k--; int j=sa[rank[i]-1]; while(s[i+k]==s[j+k]) k++; height[rank[i]]=k; } } int id[N]; char s[N],str[N]; int a[N]; bool vis[105]; bool gao(int n,int x,int m,int flag) { //set<int>st; //st.insert(id[sa[1]]); clr(vis); int sz=0; vis[id[sa[1]]]=true; sz++; for(int i=2;i<=n;i++) { while(i<=n&&height[i]>=x) //st.insert(id[sa[i]]),i++; { if(!vis[id[sa[i]]]) { vis[id[sa[i]]]=true; sz++; } i++; } //if(st.size()>m/2) if(sz>m/2) { if(flag==0) return true; for(int j=0;j<x;j++) printf("%c",s[sa[i-1]+j]); puts(""); } //st.clear(); //st.insert(id[sa[i]]); clr(vis); vis[id[sa[i]]]=true; sz=1; } return false; } int main() { int t,flag=1; while(~scanf("%d",&t)&&t) { int n=0; if(!flag) puts(""); flag=0; for(int i=1;i<=t;i++) { scanf("%s",str); int len=strlen(str); for(int j=0;j<len;j++) { s[n]=str[j]; a[n]=str[j]-'a'+1; id[n++]=i; } s[n]='z'+i; a[n]='z'-'a'+1+i; id[n++]=i; } if(t==1) { printf("%s\n\n",str); continue; } s[n]='\0',a[n]=0; build_sa(a,n+1,30+t); getheight(a,n); int ans=-1,l=1,r=1000; while(l<=r) { int mid=(l+r)>>1; if(gao(n,mid,t,0)) ans=mid,l=mid+1; else r=mid-1; } if(ans==-1) puts("?"); else gao(n,ans,t,1); } return 0; }
poj3294 UVA 11107 Life Forms 后缀数组
原文地址:http://blog.csdn.net/neko01/article/details/41089199