您的位置:首页 > 产品设计 > UI/UE

poj2778--DNA Sequence(AC自动机+矩阵优化)

2015-02-03 17:19 495 查看
DNA Sequence

Time Limit: 1000MS Memory Limit: 65536K
Total Submissions: 12252 Accepted: 4661
Description
It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal
may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments.

Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.

Input
First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.

Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.

Output
An integer, the number of DNA sequences, mod 100000.
Sample Input
4 3
AT
AC
AG
AA

Sample Output
36

 

给出患病的DNA序列,问序列长度为n的,且不包含患病的DNA序列有多少种。

首先处理患病的DNA串,连接为字典树后,添加fail指针,完成AC自动机,给每个节点一个编号,然后用矩阵统计每个节点走一步可以走到的节点的种数,其中,不能包含会患病的序列,最后矩阵相乘。

注意1.__int64 相乘会超出范围。

注意2.处理自动机时,注意,如果发现某个节点的fail会返回到一个代表序列结束的节点上,那么这个节点也是不可达的。不能被统计到矩阵中。

注意3.矩阵快速幂要写成非递归的形式。

给出测试案例:

2 1

ACG

C

其中矩阵应该为

2 1 0 0 0

2 1 0 0 0

0 0 0 0 0

0 0 0 0 0

0 0 0 0 0

#include <cstdio>
#include <cstring>
#include <queue>
#include <algorithm>
using namespace std ;
#define MOD 100000
#define LL __int64
struct node{
int flag , id ;
node *next[4] , *fail ;
};
struct nnode{
LL Map[110][110] , n ;
};
queue <node*> que ;
char c[5] = "ACGT" ;
char str[20] ;
int num , vis[110] ;
node *newnode()
{
node *p = new node ;
p->flag = 0 ;
p->id = num++ ;
p->fail = NULL ;
for(int i = 0 ; i < 4 ; i++)
p->next[i] = NULL ;
return p ;
}
void settree(char *s,node *rt,int temp)
{
int i , k , l = strlen(s) ;
node *p = rt ;
for(i = 0 ; i < l ; i++)
{
for(k = 0 ; k < 4 ; k++)
if( s[i] == c[k] )
break ;
if( p->next[k] == NULL )
p->next[k] = newnode() ;
p = p->next[k] ;
}
p->flag = 1 ;
return ;
}
void setfail(node *rt)
{
int i ;
node *p = rt , *temp ;
p->fail = NULL ;
while( !que.empty() ) que.pop() ;
que.push(p) ;
while( !que.empty() )
{
p = que.front() ;
que.pop() ;
for(i = 0 ; i < 4 ; i++)
{
if( p->next[i] )
{
temp = p->fail ;
while( temp && !temp->next[i] )
temp = temp->fail ;
p->next[i]->fail = temp ? temp->next[i] : rt ;
if( temp != NULL && temp->next[i]->flag )
p->next[i]->flag = 1 ;
que.push(p->next[i]) ;
}
else
p->next[i] = p == rt ? rt : p->fail->next[i] ;
}
}
}
nnode setmat(node *rt)
{
int i , j , u , v ;
nnode q ;
node *p = rt ;
while( !que.empty() ) que.pop() ;
memset(q.Map,0,sizeof(q.Map)) ;
memset(vis,0,sizeof(vis)) ;
que.push(p) ;
q.n = num ;
vis[ p->id ] = 1 ;
while( !que.empty() )
{
p = que.front() ;
que.pop() ;
u = p->id ;
for(i = 0 ; i < 4 ; i++)
{
if( !p->flag && !p->next[i]->flag )
q.Map[ p->id ][ p->next[i]->id ]++ ;
if( !vis[p->next[i]->id] )
{
vis[ p->next[i]->id ] = 1 ;
que.push( p->next[i] ) ;
}
}
}
return q ;
}
nnode mul(nnode a,nnode b)
{
nnode c ;
c.n  = a.n ;
int i , j , k ;
for(i = 0 ; i < a.n ; i++)
{
for(j = 0 ; j < a.n ; j++)
{
c.Map[i][j] = 0 ;
for(k = 0 ; k < a.n ; k++)
c.Map[i][j] = ( c.Map[i][j] + a.Map[i][k]*b.Map[k][j] ) % MOD ;
}
}
return c ;
}
nnode pow(nnode p,int k)
{
nnode temp ;
int i , j ;
temp.n = p.n ;
memset(temp.Map,0,sizeof(temp.Map)) ;
for(i = 0 ; i < p.n ; i++)
temp.Map[i][i] = 1 ;
while( k )
{
if( k&1 )
temp = mul(temp,p) ;
p = mul(p,p) ;
k >>= 1 ;
}
return temp ;
}
int main()
{
int n , m , i , j ;
node *rt ;
nnode p ;
while( scanf("%d %d", &m, &n) != EOF )
{
num = 0 ;
rt = newnode() ;
for(i = 1 ; i <= m ; i++)
{
scanf("%s", str) ;
settree(str,rt,i) ;
}
setfail(rt) ;
p = setmat(rt) ;
/*for(i = 0 ; i < p.n ; i++)
{
for(j = 0 ; j < p.n ; j++)
printf("%d ", p.Map[i][j]) ;
printf("\n") ;
}*/
p = pow(p,n) ;
LL ans = 0 ;
for(i = 0 ; i < p.n ; i++)
ans = ( ans + p.Map[0][i] ) % MOD ;
printf("%d\n", ans) ;
}
return 0 ;
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: