您的位置:首页 > 产品设计 > UI/UE

poj 2778 DNA Sequence AC自动机+DP+矩阵乘法 检测所有可能的n位DNA串有多少个DNA串中不含有指定的病毒片段

2011-07-21 11:36 295 查看
It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments.

Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.
InputFirst line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.

Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.
OutputAn integer, the number of DNA sequences, mod 100000.Sample Input
4 3
AT
AC
AG
AA
Sample Output
36


//#include<iostream>
#include<cstdio>
#include<cstring>
#include<string>
#include<algorithm>
#include<queue>
#include<cctype>
#include<map>
using namespace std;
const int MOD=100000;
const int maxn=500;
struct Node
{
int flag;//序号
int id;//在静态链表中的位置
Node* next[4];
Node* fail;
};
Node temp[maxn];
int tp;
int n;
__int64 len;
const int kind=4;
int hash[100];
Node* root;
void reset(Node* p)
{
p->flag=0;p->id=tp-1;
for(int i=0;i<kind;i++) p->next[i]=NULL;
p->fail=root;
if(p==root) p->fail=NULL;
}
void init()
{
hash['A']=0,hash['T']=1,hash['C']=2,hash['G']=3;
tp=0;
root=&temp[tp++];
reset(root);
}
void insert(char* word)
{
Node* p=root;
for(int i=0;word[i];i++)
{
int x=hash[word[i]];
if(p->next[x]==NULL)
{
p->next[x]=&temp[tp++];
reset(p->next[x]);
}
p=p->next[x];
}
p->flag=1;
}
Node* que[maxn*4];
int front,rear;
void DFA()
{
Node* p=root;
front=rear=0;
que[rear++]=p;
while(front<rear)
{
Node* t=que[front++];
for(int i=0;i<kind;i++)
{
Node* cnt=t->next[i];
if(cnt!=NULL)
{
Node* fath=t->fail;
while(fath!=NULL&&fath->next[i]==NULL)
{
fath=fath->fail;
}
if(fath!=NULL)
{
cnt->fail=fath->next[i];
}
else
{
cnt->fail=p;
}
que[rear++]=cnt;
}
}
}
}
__int64 a[maxn][maxn];
int r;
//a r*r 求a^len
void toMatrix()
{
r=rear;
memset(a,0,sizeof(a));
Node* fath;
for(int i=0;i<rear;i++)
{
Node* p=&temp[i];
if(p->flag) continue;
for(int j=0;j<kind;j++)
{
Node* cnt=p->next[j];
if(cnt!=NULL)
{
int mark=1;//important
for(fath=cnt;fath!=NULL;fath=fath->fail)
{
if(fath->flag) {
mark=0;
break;
}
}
if(mark)
{
int k=cnt->id;
a[i][k]++;
}
}
else
{
fath=p->fail;
while(fath!=NULL&&fath->next[j]==NULL)
{
fath=fath->fail;
}
if(fath!=NULL)
{
cnt=fath->next[j];
int mark=1;//important
for(fath=cnt;fath!=NULL;fath=fath->fail)
{
if(fath->flag) {
mark=0;
break;
}
}
if(mark)
{
int k=cnt->id;
a[i][k]++;
}
}
else
{
cnt=root;
a[i][0]++;
}
}
}
}
}
__int64 t[maxn][maxn],tmp[maxn][maxn],b[maxn][maxn];
void multiply(__int64 a[][maxn],__int64 b[][maxn],__int64 c[][maxn])
{
for(int i=0;i<r;i++)
{
for(int j=0;j<r;j++)
{
__int64 cnt=0;
for(int k=0;k<r;k++)
{
cnt+=a[i][k]*b[k][j];
cnt%=MOD;
}
c[i][j]=cnt;
}
}
}
void matrixPow(__int64 a[][maxn],__int64 p)//p>1
{
if(p==1) return ;
if(p&1)
{
matrixPow(a,p/2);
multiply(a,a,tmp);
multiply(b,tmp,t);
for(int i=0;i<r;i++) for(int j=0;j<r;j++) a[i][j]=t[i][j];
}
else
{
matrixPow(a,p/2);
multiply(a,a,t);
for(int i=0;i<r;i++) for(int j=0;j<r;j++) a[i][j]=t[i][j];
}
}
int main()
{
//freopen("outwa.txt","w",stdout);
char str[maxn];
scanf("%d%I64d",&n,&len);
init();
for(int i=0;i<n;i++)
{
scanf("%s",str);
insert(str);
}
DFA();
toMatrix();
for(int i=0;i<r;i++) for(int j=0;j<r;j++) b[i][j]=a[i][j];
matrixPow(a,len);//a^len
__int64 cnt=0;
for(int i=0;i<r;i++) cnt+=a[0][i],cnt%=MOD;
printf("%I64d\n",cnt);
return 0;
}
/*
10 100
AGAGAGT
CGTATTG
AAAATTTCGC
GCGTA
TCGA
AATTGGA
TAGATAGC
AGCGTATT
TTCGA
TACGTATTG
*/
//35771
/*
10 6
AGAGAGT
CGTATTG
AAAATTTCGC
GCGTA
TCGA
AATTGGA
TAGATAGC
AGCGTATT
TTCGA
TACGTATTG
*/
//4040
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: 
相关文章推荐