码迷,mamicode.com
首页 > 其他好文 > 详细

poj 2778 DNA Sequence(AC自动机 + 矩阵快速幂)

时间:2016-04-30 19:35:49      阅读:209      评论:0      收藏:0      [点我收藏+]

标签:

DNA Sequence
Time Limit: 1000MS   Memory Limit: 65536K
Total Submissions: 14426   Accepted: 5572

Description

It‘s well known that DNA Sequence is a sequence only contains A, C, T and G, and it‘s very useful to analyze a segment of DNA Sequence,For example, if a animal‘s DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don‘t contain those segments. 

Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n. 

Input

First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences. 

Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10. 

Output

An integer, the number of DNA sequences, mod 100000.
 

Sample Input

4 3
AT
AC
AG
AA

Sample Output

36

 

/*
poj 2778 DNA Sequence(AC自动机 + 矩阵快速幂)

已知一个长度为n的字符串Str由A,T,G,C组成,给你m个子串.  求不包含这些子串的Str有多少种

AC自动机建立可以构建一个关系图。然后将这个关系图转换到矩阵上面,然后利用矩阵快速幂便能
求出从a到b的方案数。

大致就是 子串中没有出现的字符都会被指向root节点。即不停地在图中查找子串,于是我们只要在
路径上不经过ed标记的危险节点即可(每个子串的最后一个节点)。

对于agc、c而言,如果我zou过 a-g-c-d 这个路径。
                          root
                         /                            a      c
                       /
                      g
                     /
                    c
                   /
                  d
由上面这个图可知 左边的d 和 右边的c都是危险节点。  但漏掉了左边上的c
所以如果fail指针指向那个节点是危险节点的话,那么当前节点也是危险节点

AC自动机:http://blog.csdn.net/niushuai666/article/details/7002823
hhh-2016-04-23 15:59:53
*/
#include <iostream>
#include <vector>
#include <cstring>
#include <string>
#include <cstdio>
#include <queue>
#include <functional>
#include <map>
using namespace std;
#define lson  (i<<1)
#define rson  ((i<<1)|1)
typedef long long ll;
const int  maxn = 40010;
const int mod = 100000;
struct Matrix
{
    int len;
    int ma[105][105];
    Matrix() {}
    Matrix(int L)
    {
        len = L;
    }
};

Matrix mult(Matrix ta,Matrix tb)
{
    Matrix tc;
    tc.len = ta.len;
    for(int i = 0; i < ta.len; i++)
    {
        for(int j = 0; j < ta.len; j++)
        {
            tc.ma[i][j] = 0;
            for(int k = 0; k < ta.len; k++){
                tc.ma[i][j] = tc.ma[i][j]+(ll)ta.ma[i][k]*tb.ma[k][j]%mod;
                tc.ma[i][j] %= mod;
            }
        }
    }
    return tc;
}

Matrix pow_mat(Matrix a,ll n)
{
    Matrix cnt;
    cnt.len = a.len;
    memset(cnt.ma,0,sizeof(cnt.ma));
    for(int i = 0 ; i < cnt.len; i++)
        cnt.ma[i][i] = 1;

    while(n)
    {
        if(n&1) cnt = mult(cnt,a);
        a = mult(a,a);
        n >>= 1;
    }
    return cnt;
}

struct Tire
{
    int nex[105][4],fail[105],ed[105];
    int root,L;
    int newnode()
    {
        for(int i = 0; i < 4; i++)
            nex[L][i] = -1;
        ed[L++] = 0;
        return L-1;
    }

    void ini()
    {
        L = 0,root = newnode();
    }

    int cha(char x)
    {
        if(x == ‘A‘)
            return 0;
        else if(x == ‘C‘)
            return 1;
        else if(x == ‘T‘)
            return 2;
        else if(x == ‘G‘)
            return 3;
    }

    void inser(char buf[])
    {
        int len = strlen(buf);
        int now = root;
        for(int i = 0; i < len; i++)
        {
            int ta = cha(buf[i]);
            if(nex[now][ta] == -1)
                nex[now][ta] = newnode();
            now = nex[now][ta];
        }
        ed[now]++;
    }

    void build()
    {
        queue<int >q;
        fail[root] = root;
        for(int i = 0; i < 4; i++)
            if(nex[root][i] == -1)
                nex[root][i] = root;
            else
            {
                fail[nex[root][i]] = root;
                q.push(nex[root][i]);
            }
        while(!q.empty())
        {
            int now = q.front();
            if(ed[fail[now]])
                ed[now] = 1;
            q.pop();
            for(int i = 0; i < 4; i++)
            {
                if(nex[now][i] == -1)
                    nex[now][i] = nex[fail[now]][i];
                else
                {
                    fail[nex[now][i]] = nex[fail[now]][i];
                    q.push(nex[now][i]);
                }
            }
        }
    }

    Matrix to_mat()
    {
        Matrix ta(L);
        memset(ta.ma,0,sizeof(ta.ma));
        for(int i = 0; i < L; i++)
        {
            for(int j = 0; j < 4; j++)
                if(!ed[nex[i][j]])
                    ta.ma[i][nex[i][j]]++;
        }
        return ta;
    }
};
Tire ac;
char buf[20];
int main()
{
    int m;
    ll n;
    while(scanf("%d%I64d",&m,&n) != EOF)
    {
        ac.ini();
        for(int i = 0; i < m; i++)
        {
            scanf("%s",buf);
            ac.inser(buf);
        }
        ac.build();
        Matrix ta = ac.to_mat();
        int ans = 0;
        ta = pow_mat(ta,n);
        for(int i = 0;i < ta.len;i++)
        {
            ans = (ans+ta.ma[0][i])%mod;
        }
        printf("%d\n",ans);
    }
    return 0;
}

  

poj 2778 DNA Sequence(AC自动机 + 矩阵快速幂)

标签:

原文地址:http://www.cnblogs.com/Przz/p/5449109.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!