码迷,mamicode.com
首页 > 其他好文 > 详细

Common Substrings POJ - 3415(长度不小于k的公共子串的个数)

时间:2018-08-18 11:31:35      阅读:131      评论:0      收藏:0      [点我收藏+]

标签:ffffff   include   tin   code   name   cst   sizeof   http   swap   

题意:

  给定两个字符串A 和 B, 求长度不小于 k 的公共子串的个数(可以相同)

 

技术分享图片

分两部分求和sa[i-1] > len1  sa[i] < len1  和  sa[i-1] < len1   sa[i] > len1

技术分享图片

 

 

#include <iostream>
#include <cstdio>
#include <sstream>
#include <cstring>
#include <map>
#include <cctype>
#include <set>
#include <vector>
#include <stack>
#include <queue>
#include <algorithm>
#include <cmath>
#define rap(i, a, n) for(int i=a; i<=n; i++)
#define rep(i, a, n) for(int i=a; i<n; i++)
#define lap(i, a, n) for(int i=n; i>=a; i--)
#define lep(i, a, n) for(int i=n; i>a; i--)
#define rd(a) scanf("%d", &a)
#define rlld(a) scanf("%lld", &a)
#define rc(a) scanf("%c", &a)
#define rs(a) scanf("%s", a)
#define MOD 2018
#define LL long long
#define ULL unsigned long long
#define Pair pair<int, int>
#define mem(a, b) memset(a, b, sizeof(a))
#define _  ios_base::sync_with_stdio(0),cin.tie(0)
//freopen("1.txt", "r", stdin);
using namespace std;
const int maxn = 200010, INF = 0x7fffffff;
int s[maxn];
int sa[maxn], t[maxn], t2[maxn], c[maxn], n;
int ran[maxn], height[maxn];

void get_sa(int m)
{
    int i, *x = t, *y = t2;
    for(i = 0; i < m; i++) c[i] = 0;
    for(i = 0; i < n; i++) c[x[i] = s[i]]++;
    for(i = 1; i < m; i++) c[i] += c[i-1];
    for(i = n-1; i >= 0; i--) sa[--c[x[i]]] = i;
    for(int k = 1; k <= n; k <<= 1)
    {
        int p = 0;
        for(i = n-k; i < n; i++) y[p++] = i;
        for(i = 0; i < n; i++) if(sa[i] >= k) y[p++] = sa[i] - k;
        for(i = 0; i < m; i++) c[i] = 0;
        for(i = 0; i < n; i++) c[x[y[i]]]++;
        for(i = 0; i< m; i++) c[i] += c[i-1];
        for(i = n-1; i >= 0; i--) sa[--c[x[y[i]]]] = y[i];
        swap(x, y);
        p = 1; x[sa[0]] = 0;
        for(i = 1; i < n; i++)
            x[sa[i]] = y[sa[i-1]] == y[sa[i]] && y[sa[i-1]+k] == y[sa[i]+k] ? p-1 : p++;
        if(p >= n) break;
        m = p;
    }
    int k = 0;
    for(i = 0; i < n; i++) ran[sa[i]] = i;
    for(i = 0; i < n; i++)
    {
        if(k) k--;
        int j = sa[ran[i]-1];
        while(s[i+k] == s[j+k]) k++;
        height[ran[i]] = k;
    }
}

int k, top, num;
LL sum, ans;
char s1[maxn], s2[maxn];
int stac[maxn], cnt[maxn];
int main()
{
    while(~rd(k) && k)
    {
        top = sum = num = ans = n = 0;
        rs(s1); rs(s2);
        int len1 = strlen(s1);
        int len2 = strlen(s2);
        rep(i, 0, len1)
            s[n++] = s1[i];
        s[n++] = #;
        rep(i, 0, len2)
            s[n++] = s2[i];
        s[n++] = 0;
        get_sa(200);
        rep(i, 1, n)
        {
            if(height[i] < k)
            {
                sum = top = 0;
                continue;
            }
            int num = 0;
            while(top && height[i] < stac[top])  //维持单调递增栈 可能当前sa[i-1] < len1 但height是连续的 所以短板效应替换栈中元素
            {                                      //而它自己如果sa[i-1] < len1 那么下面的 num是不加1的 即自己不算在内
                sum -= (LL)(stac[top] - k + 1) * cnt[top];
                sum += (LL)(height[i] - k + 1) * cnt[top];
                num += cnt[top];
                top--;
            }
            stac[++top] = height[i];
            if(sa[i-1] > len1)                  //扫描B串
            {
                sum += (LL)(height[i] - k + 1);
                cnt[top] = num + 1;
            }
            else
                cnt[top] = num;
            if(sa[i] < len1)
                ans += sum;
        }
        rep(i, 1, n)
        {
            if(height[i] < k)
            {
                sum = top = 0;
                continue;
            }
            int num = 0;
            while(top && height[i] < stac[top])
            {
                sum -= (LL)(stac[top] - k + 1) * cnt[top];
                sum += (LL)(height[i] - k + 1) * cnt[top];
                num += cnt[top];
                top--;
            }
            stac[++top] = height[i];
            if(sa[i-1] < len1)                  //扫描A串
            {
                sum += (LL)(height[i] - k + 1);
                cnt[top] = num + 1;
            }
            else
                cnt[top] = num;
            if(sa[i] > len1)
                ans += sum;
        }
        printf("%lld\n", ans);


    }


    return 0;
}

 

Common Substrings POJ - 3415(长度不小于k的公共子串的个数)

标签:ffffff   include   tin   code   name   cst   sizeof   http   swap   

原文地址:https://www.cnblogs.com/WTSRUVF/p/9496298.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!