码迷,mamicode.com
首页 > 其他好文 > 详细

poj2778--DNA Sequence(AC自动机+矩阵优化)

时间:2015-02-03 19:38:03      阅读:165      评论:0      收藏:0      [点我收藏+]

标签:

DNA Sequence
Time Limit: 1000MS   Memory Limit: 65536K
Total Submissions: 12252   Accepted: 4661

Description

It‘s well known that DNA Sequence is a sequence only contains A, C, T and G, and it‘s very useful to analyze a segment of DNA Sequence,For example, if a animal‘s DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don‘t contain those segments.

Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.

Input

First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.

Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.

Output

An integer, the number of DNA sequences, mod 100000.

Sample Input

4 3
AT
AC
AG
AA

Sample Output

36

 

给出患病的DNA序列,问序列长度为n的,且不包含患病的DNA序列有多少种。

首先处理患病的DNA串,连接为字典树后,添加fail指针,完成AC自动机,给每个节点一个编号,然后用矩阵统计每个节点走一步可以走到的节点的种数,其中,不能包含会患病的序列,最后矩阵相乘。

注意1.__int64 相乘会超出范围。

注意2.处理自动机时,注意,如果发现某个节点的fail会返回到一个代表序列结束的节点上,那么这个节点也是不可达的。不能被统计到矩阵中。

注意3.矩阵快速幂要写成非递归的形式。

给出测试案例:

2 1

ACG

C

其中矩阵应该为

2 1 0 0 0

2 1 0 0 0

0 0 0 0 0

0 0 0 0 0

0 0 0 0 0

#include <cstdio>
#include <cstring>
#include <queue>
#include <algorithm>
using namespace std ;
#define MOD 100000
#define LL __int64
struct node{
    int flag , id ;
    node *next[4] , *fail ;
};
struct nnode{
    LL Map[110][110] , n ;
};
queue <node*> que ;
char c[5] = "ACGT" ;
char str[20] ;
int num , vis[110] ;
node *newnode()
{
    node *p = new node ;
    p->flag = 0 ;
    p->id = num++ ;
    p->fail = NULL ;
    for(int i = 0 ; i < 4 ; i++)
        p->next[i] = NULL ;
    return p ;
}
void settree(char *s,node *rt,int temp)
{
    int i , k , l = strlen(s) ;
    node *p = rt ;
    for(i = 0 ; i < l ; i++)
    {
        for(k = 0 ; k < 4 ; k++)
            if( s[i] == c[k] )
                break ;
        if( p->next[k] == NULL )
            p->next[k] = newnode() ;
        p = p->next[k] ;
    }
    p->flag = 1 ;
    return ;
}
void setfail(node *rt)
{
    int i ;
    node *p = rt , *temp ;
    p->fail = NULL ;
    while( !que.empty() ) que.pop() ;
    que.push(p) ;
    while( !que.empty() )
    {
        p = que.front() ;
        que.pop() ;
        for(i = 0 ; i < 4 ; i++)
        {
            if( p->next[i] )
            {
                temp = p->fail ;
                while( temp && !temp->next[i] )
                    temp = temp->fail ;
                p->next[i]->fail = temp ? temp->next[i] : rt ;
                if( temp != NULL && temp->next[i]->flag )
                    p->next[i]->flag = 1 ;
                que.push(p->next[i]) ;
            }
            else
                p->next[i] = p == rt ? rt : p->fail->next[i] ;
        }
    }
}
nnode setmat(node *rt)
{
    int i , j , u , v ;
    nnode q ;
    node *p = rt ;
    while( !que.empty() ) que.pop() ;
    memset(q.Map,0,sizeof(q.Map)) ;
    memset(vis,0,sizeof(vis)) ;
    que.push(p) ;
    q.n = num ;
    vis[ p->id ] = 1 ;
    while( !que.empty() )
    {
        p = que.front() ;
        que.pop() ;
        u = p->id ;
        for(i = 0 ; i < 4 ; i++)
        {
            if( !p->flag && !p->next[i]->flag )
                q.Map[ p->id ][ p->next[i]->id ]++ ;
            if( !vis[p->next[i]->id] )
            {
                vis[ p->next[i]->id ] = 1 ;
                que.push( p->next[i] ) ;
            }
        }
    }
    return q ;
}
nnode mul(nnode a,nnode b)
{
    nnode c ;
    c.n  = a.n ;
    int i , j , k ;
    for(i = 0 ; i < a.n ; i++)
    {
        for(j = 0 ; j < a.n ; j++)
        {
            c.Map[i][j] = 0 ;
            for(k = 0 ; k < a.n ; k++)
                c.Map[i][j] = ( c.Map[i][j] + a.Map[i][k]*b.Map[k][j] ) % MOD ;
        }
    }
    return c ;
}
nnode pow(nnode p,int k)
{
    nnode temp ;
    int i , j ;
    temp.n = p.n ;
    memset(temp.Map,0,sizeof(temp.Map)) ;
    for(i = 0 ; i < p.n ; i++)
        temp.Map[i][i] = 1 ;
    while( k )
    {
        if( k&1 )
            temp = mul(temp,p) ;
        p = mul(p,p) ;
        k >>= 1 ;
    }
    return temp ;
}
int main()
{
    int n , m , i , j ;
    node *rt ;
    nnode p ;
    while( scanf("%d %d", &m, &n) != EOF )
    {
        num = 0 ;
        rt = newnode() ;
        for(i = 1 ; i <= m ; i++)
        {
            scanf("%s", str) ;
            settree(str,rt,i) ;
        }
        setfail(rt) ;
        p = setmat(rt) ;
        /*for(i = 0 ; i < p.n ; i++)
        {
            for(j = 0 ; j < p.n ; j++)
                printf("%d ", p.Map[i][j]) ;
            printf("\n") ;
        }*/
        p = pow(p,n) ;
        LL ans = 0 ;
        for(i = 0 ; i < p.n ; i++)
            ans = ( ans + p.Map[0][i] ) % MOD ;
        printf("%d\n", ans) ;
    }
    return 0 ;
}

poj2778--DNA Sequence(AC自动机+矩阵优化)

标签:

原文地址:http://blog.csdn.net/winddreams/article/details/43452145

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!