码迷,mamicode.com
首页 > 编程语言 > 详细

POJ 3294 Life Forms(后缀数组求k个串的最长子串)

时间:2015-01-27 16:23:07      阅读:292      评论:0      收藏:0      [点我收藏+]

标签:

题目大意:给出n个字符串,让你求出最长的子串,如果有多个按照字典序顺序输出。

解题思路:将n个字符串连起来,中间需要隔开,然后我们二分枚举字符串的长度,求最长的长度,如果多个需要按照字典序保存起来,最后输出答案就可以了。时间复杂度是:O(n*log(n))。

Life Forms
Time Limit: 5000MS   Memory Limit: 65536K
Total Submissions: 10275   Accepted: 2822

Description

You may have wondered why most extraterrestrial life forms resemble humans, differing by superficial traits such as height, colour, wrinkles, ears, eyebrows and the like. A few bear no human resemblance; these typically have geometric or amorphous shapes like cubes, oil slicks or clouds of dust.

The answer is given in the 146th episode of Star Trek - The Next Generation, titled The Chase. It turns out that in the vast majority of the quadrant‘s life forms ended up with a large fragment of common DNA.

Given the DNA sequences of several life forms represented as strings of letters, you are to find the longest substring that is shared by more than half of them.

Input

Standard input contains several test cases. Each test case begins with 1 ≤ n ≤ 100, the number of life forms. n lines follow; each contains a string of lower case letters representing the DNA sequence of a life form. Each DNA sequence contains at least one and not more than 1000 letters. A line containing 0 follows the last test case.

Output

For each test case, output the longest string or strings shared by more than half of the life forms. If there are many, output all of them in alphabetical order. If there is no solution with at least one letter, output "?". Leave an empty line between test cases.

Sample Input

3
abcdefg
bcdefgh
cdefghi
3
xxx
yyy
zzz
0

Sample Output

bcdefg
cdefgh

?
#include <algorithm>
#include <iostream>
#include <stdlib.h>
#include <string.h>
#include <iomanip>
#include <stdio.h>
#include <string>
#include <queue>
#include <cmath>
#include <stack>
#include <ctime>
#include <map>
#include <set>
#define eps 1e-9
///#define M 1000100
///#define LL __int64
#define LL long long
///#define INF 0x7ffffff
#define INF 0x3f3f3f3f
#define PI 3.1415926535898
#define zero(x) ((fabs(x)<eps)?0:x)
#define mod 1000000007
#define Read() freopen("autocomplete.in","r",stdin)
#define Write() freopen("autocomplete.out","w",stdout)
#define Cin() ios::sync_with_stdio(false)

using namespace std;



inline int read()
{
    char ch;
    bool flag = false;
    int a = 0;
    while(!((((ch = getchar()) >= '0') && (ch <= '9')) || (ch == '-')));
    if(ch != '-')
    {
        a *= 10;
        a += ch - '0';
    }
    else
    {
        flag = true;
    }
    while(((ch = getchar()) >= '0') && (ch <= '9'))
    {
        a *= 10;
        a += ch - '0';
    }
    if(flag)
    {
        a = -a;
    }
    return a;
}
void write(int a)
{
    if(a < 0)
    {
        putchar('-');
        a = -a;
    }
    if(a >= 10)
    {
        write(a / 10);
    }
    putchar(a % 10 + '0');
}

const int maxn = 200010;



int wa[maxn], wb[maxn], wv[maxn], ws1[maxn];
int sa[maxn];

int cmp(int *r, int a, int b, int l)
{
    return r[a] == r[b] && r[a+l] == r[b+l];
}


void da(int *r, int *sa, int n, int m)
{
    int i, j, p, *x = wa, *y = wb;
    for(i = 0; i < m; i++) ws1[i] = 0;
    for(i = 0; i < n; i++) ws1[x[i] = r[i]]++;
    for(i = 1; i < m; i++) ws1[i] += ws1[i-1];
    for(i = n-1; i >= 0; i--) sa[--ws1[x[i]]] = i;
    for(j = 1, p = 1; p < n; j <<= 1, m = p)
    {
        for(p = 0, i = n-j; i < n; i++) y[p++] = i;
        for(i = 0; i < n; i++)
            if(sa[i] >= j) y[p++] = sa[i]-j;
        for(i = 0; i < n; i++) wv[i] = x[y[i]];
        for(i = 0; i < m; i++) ws1[i] = 0;
        for(i = 0; i < n; i++) ws1[wv[i]]++;
        for(i = 1; i < m; i++) ws1[i] += ws1[i-1];
        for(i = n-1; i >= 0; i--) sa[--ws1[wv[i]]] = y[i];
        for(swap(x, y), p = 1, x[sa[0]] = 0, i = 1; i < n; i++)
            x[sa[i]] = cmp(y, sa[i-1], sa[i], j)?p-1:p++;
    }
    return ;
}

int rank[maxn], height[maxn];

void calheight(int *r, int *sa, int n)
{
    int i, j, k = 0;
    for(i = 1; i <= n; i++) rank[sa[i]] = i;
    for(i = 0; i < n; height[rank[i++]] = k)
        for(k?k--:0, j = sa[rank[i]-1]; r[i+k] == r[j+k]; k++);
    return;
}

char str1[maxn], str2[maxn];
int seq[maxn];

int hash[maxn];

char str[110][1010];

struct node
{
    int pos;
    int s;
    int len;
}f[maxn], xf[maxn];

int xans;

int vis[110];

int Find(int x)
{
    int s = hash[x];
    for(int i = 0; i < s; i++)
    {
        int len = strlen(str[i]);
        x -= len;
    }
    x -= s;
    return x;
}

bool judge(int mid, int n, int m)
{
    int ans = 0;
    int sx;
    for(int i = 2; i <= n; i++)
    {
        memset(vis, 0, sizeof(vis));
        sx = 1;
        vis[hash[sa[i-1]]] = 1;
        while(height[i] >= mid)
        {
            if(!vis[hash[sa[i]]])
            {
                vis[hash[sa[i]]] = 1;
                ++sx;
            }
            i++;
        }
        if(sx*2 > m)
        {
            xf[ans].len = mid;
            xf[ans].pos = Find(sa[i-1]);
            xf[ans++].s = hash[sa[i-1]];
        }

    }
    if(ans)
    {
        xans = 0;
        for(int i = 0; i < ans; i++)
        f[xans++] = xf[i];
    }
    return ans;
}

void Del(int n, int len, int m)
{
    int l = 1;
    int r = len;
    xans = 0;
    while(l <= r)
    {
        int mid = (l+r)>>1;
        if(judge(mid, n, m)) l = mid+1;
        else r = mid-1;
    }

    if(!xans)
    {
        cout<<"?"<<endl;
        return;
    }
    for(int i = 0; i < xans; i++)
    {
        for(int j = f[i].pos, k = 0; k < f[i].len; k++, j++)
        cout<<str[f[i].s][j];
        puts("");
    }
}


int main()
{
    int n;
    int flag = 0;
    while(~scanf("%d", &n) && n)
    {
        memset(hash, -1, sizeof(hash));
        int ans = 0;
        int Min = maxn;
        for(int i = 0; i < n; i++)
        {
            scanf("%s",str[i]);
            int len = strlen(str[i]);
            Min = min(Min, len);
            for(int j = 0; j < len; j++)
            {
                seq[ans] = str[i][j];
                hash[ans++] = i;
            }
            seq[ans++] = 200+i;
        }
        seq[ans] = 0;
        da(seq, sa, ans+1, 310);
        calheight(seq, sa, ans);
        if(!flag) flag = 1;
        else puts("");
        Del(ans, Min, n);
    }
    return 0;
}



POJ 3294 Life Forms(后缀数组求k个串的最长子串)

标签:

原文地址:http://blog.csdn.net/xu12110501127/article/details/43195465

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!