标签:
对应POJ 题目:点击打开链接
思路:就是后缀数组求多字符串的最长公共子串,height数组分组+二分答案求上界。细节上,求得一组后前缀后,要判断是否含有分隔符。
#include <stdio.h> #include <stdlib.h> #include <string.h> #define MS(x, y) memset(x, y, sizeof(x)) const int MAXN = 100000+2000; const int INF = 1<<30; int wa[MAXN],wb[MAXN],wv[MAXN],ws[MAXN]; int rank[MAXN],r[MAXN],sa[MAXN],height[MAXN]; char str[1005]; int vis[1005], ID[1005]; int block[MAXN]; int cmp(int *r, int a, int b, int l) { return r[a] == r[b] && r[a+l] == r[b+l]; } void da(int *r, int *sa, int n, int m) { int i, j, p, *x = wa, *y = wb, *t; for(i=0; i<m; i++) ws[i] = 0; for(i=0; i<n; i++) ws[x[i] = r[i]]++; for(i=1; i<m; i++) ws[i] += ws[i-1]; for(i=n-1; i>=0; i--) sa[--ws[x[i]]] = i; for(j=1,p=1; p<n; j<<=1, m=p){ for(p=0,i=n-j; i<n; i++) y[p++] = i; for(i=0; i<n; i++) if(sa[i] >= j) y[p++] = sa[i] - j; for(i=0; i<n; i++) wv[i] = x[y[i]]; for(i=0; i<m; i++) ws[i] = 0; for(i=0; i<n; i++) ws[wv[i]]++; for(i=1; i<m; i++) ws[i] += ws[i-1]; for(i=n-1; i>=0; i--) sa[--ws[wv[i]]] = y[i]; for(t=x,x=y,y=t,p=1,x[sa[0]]=0,i=1; i<n; i++) x[sa[i]] = cmp(y, sa[i-1], sa[i], j) ? p-1 : p++; } return; } void calheight(int *r, int *sa, int n) { int i, j, k = 0; for(i=1; i<n; i++) rank[sa[i]] = i; for(i=0; i<n-1; height[rank[i++]] = k) for(k ? k-- : 0,j=sa[rank[i]-1]; r[i+k] == r[j+k]; k++); return; } int main() { //freopen("in.txt", "r", stdin); int n; scanf("%d", &n); while(n) { int i, j, k; MS(rank, 0); MS(sa, 0); MS(wa, 0); MS(wb, 0); MS(ws, 0); MS(wv, 0); MS(r, 0); MS(height, 0); MS(block, 0); MS(ID, 0); int len = 1, tmp_l, maxn = 0; int left = 1, right = INF; for(i=0; i<n; i++){//把所有字符串连成一个用分隔符分隔的字符串 scanf("%s", str); tmp_l = strlen(str); if(tmp_l < right) right = tmp_l;//二分答案的右边界为最短字符串的长度 int k; for(j=len, k=0; k<tmp_l; j++, k++){ block[j] = i;//下标为j的字符所在的是第i个字符串 r[j] = str[k] - 'a' + 1; if(r[j] > maxn) maxn = r[j]; } len += tmp_l; r[len++] = 0;//末尾添加一个最小值 } da(r, sa, len, maxn+1); calheight(r, sa, len); int beg = 0, end = 0, ok, u = 0, ul = 0, LEN = 0; while(left <= right) { ok = u = 0; int mid = left + (right - left)/2;//二分答案 for(i=n+1; i<len; i++){ if(height[i] >= mid){//确定某一组的起点终点 for(k=sa[i]; k < sa[i] + mid; k++) if(0 == r[k]) break;//该公共前缀含有分隔符 if(k == sa[i] + mid){ if(!beg) beg = i; end = i; } } if((beg && end) && (i == len - 1 || height[i] < mid)){ int count = 0; MS(vis, 0); for(j=beg-1; j<=end; j++){//一组里面有多少个后缀来自不同的字符串 int num = block[sa[j]]; if(!vis[num]) { vis[num] = 1; count++; } } if(count > n/2){//符合题意的解 ID[u++] = sa[j-1];//保存下标 LEN = mid; ok = 1; } beg = end = 0; } } if(ok) ul = u;//u值在每次二分都会置为0,故在每次找到合理的解后要赋给其它变量 if(ok) left = mid + 1;//找到解,说明不是最长 else right = mid - 1; } if(ul){ for(i=0; i<ul; i++){ for(j=ID[i]; j<ID[i] + LEN; j++) printf("%c", char(r[j] - 1 +'a')); printf("\n"); } } else printf("?\n"); scanf("%d", &n); if(n) printf("\n"); } }
标签:
原文地址:http://blog.csdn.net/u013351484/article/details/43148957