poj 2778 DNA Sequence AC自动机+DP+矩阵乘法 检测所有可能的n位DNA串有多少个DNA串中不含有指定的病毒片段
2011-07-21 11:36
295 查看
It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments.
Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.
InputFirst line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.
Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.
OutputAn integer, the number of DNA sequences, mod 100000.Sample Input
//#include<iostream>
#include<cstdio>
#include<cstring>
#include<string>
#include<algorithm>
#include<queue>
#include<cctype>
#include<map>
using namespace std;
const int MOD=100000;
const int maxn=500;
struct Node
{
int flag;//序号
int id;//在静态链表中的位置
Node* next[4];
Node* fail;
};
Node temp[maxn];
int tp;
int n;
__int64 len;
const int kind=4;
int hash[100];
Node* root;
void reset(Node* p)
{
p->flag=0;p->id=tp-1;
for(int i=0;i<kind;i++) p->next[i]=NULL;
p->fail=root;
if(p==root) p->fail=NULL;
}
void init()
{
hash['A']=0,hash['T']=1,hash['C']=2,hash['G']=3;
tp=0;
root=&temp[tp++];
reset(root);
}
void insert(char* word)
{
Node* p=root;
for(int i=0;word[i];i++)
{
int x=hash[word[i]];
if(p->next[x]==NULL)
{
p->next[x]=&temp[tp++];
reset(p->next[x]);
}
p=p->next[x];
}
p->flag=1;
}
Node* que[maxn*4];
int front,rear;
void DFA()
{
Node* p=root;
front=rear=0;
que[rear++]=p;
while(front<rear)
{
Node* t=que[front++];
for(int i=0;i<kind;i++)
{
Node* cnt=t->next[i];
if(cnt!=NULL)
{
Node* fath=t->fail;
while(fath!=NULL&&fath->next[i]==NULL)
{
fath=fath->fail;
}
if(fath!=NULL)
{
cnt->fail=fath->next[i];
}
else
{
cnt->fail=p;
}
que[rear++]=cnt;
}
}
}
}
__int64 a[maxn][maxn];
int r;
//a r*r 求a^len
void toMatrix()
{
r=rear;
memset(a,0,sizeof(a));
Node* fath;
for(int i=0;i<rear;i++)
{
Node* p=&temp[i];
if(p->flag) continue;
for(int j=0;j<kind;j++)
{
Node* cnt=p->next[j];
if(cnt!=NULL)
{
int mark=1;//important
for(fath=cnt;fath!=NULL;fath=fath->fail)
{
if(fath->flag) {
mark=0;
break;
}
}
if(mark)
{
int k=cnt->id;
a[i][k]++;
}
}
else
{
fath=p->fail;
while(fath!=NULL&&fath->next[j]==NULL)
{
fath=fath->fail;
}
if(fath!=NULL)
{
cnt=fath->next[j];
int mark=1;//important
for(fath=cnt;fath!=NULL;fath=fath->fail)
{
if(fath->flag) {
mark=0;
break;
}
}
if(mark)
{
int k=cnt->id;
a[i][k]++;
}
}
else
{
cnt=root;
a[i][0]++;
}
}
}
}
}
__int64 t[maxn][maxn],tmp[maxn][maxn],b[maxn][maxn];
void multiply(__int64 a[][maxn],__int64 b[][maxn],__int64 c[][maxn])
{
for(int i=0;i<r;i++)
{
for(int j=0;j<r;j++)
{
__int64 cnt=0;
for(int k=0;k<r;k++)
{
cnt+=a[i][k]*b[k][j];
cnt%=MOD;
}
c[i][j]=cnt;
}
}
}
void matrixPow(__int64 a[][maxn],__int64 p)//p>1
{
if(p==1) return ;
if(p&1)
{
matrixPow(a,p/2);
multiply(a,a,tmp);
multiply(b,tmp,t);
for(int i=0;i<r;i++) for(int j=0;j<r;j++) a[i][j]=t[i][j];
}
else
{
matrixPow(a,p/2);
multiply(a,a,t);
for(int i=0;i<r;i++) for(int j=0;j<r;j++) a[i][j]=t[i][j];
}
}
int main()
{
//freopen("outwa.txt","w",stdout);
char str[maxn];
scanf("%d%I64d",&n,&len);
init();
for(int i=0;i<n;i++)
{
scanf("%s",str);
insert(str);
}
DFA();
toMatrix();
for(int i=0;i<r;i++) for(int j=0;j<r;j++) b[i][j]=a[i][j];
matrixPow(a,len);//a^len
__int64 cnt=0;
for(int i=0;i<r;i++) cnt+=a[0][i],cnt%=MOD;
printf("%I64d\n",cnt);
return 0;
}
/*
10 100
AGAGAGT
CGTATTG
AAAATTTCGC
GCGTA
TCGA
AATTGGA
TAGATAGC
AGCGTATT
TTCGA
TACGTATTG
*/
//35771
/*
10 6
AGAGAGT
CGTATTG
AAAATTTCGC
GCGTA
TCGA
AATTGGA
TAGATAGC
AGCGTATT
TTCGA
TACGTATTG
*/
//4040
Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.
InputFirst line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.
Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.
OutputAn integer, the number of DNA sequences, mod 100000.Sample Input
4 3 AT AC AG AASample Output
36
//#include<iostream>
#include<cstdio>
#include<cstring>
#include<string>
#include<algorithm>
#include<queue>
#include<cctype>
#include<map>
using namespace std;
const int MOD=100000;
const int maxn=500;
struct Node
{
int flag;//序号
int id;//在静态链表中的位置
Node* next[4];
Node* fail;
};
Node temp[maxn];
int tp;
int n;
__int64 len;
const int kind=4;
int hash[100];
Node* root;
void reset(Node* p)
{
p->flag=0;p->id=tp-1;
for(int i=0;i<kind;i++) p->next[i]=NULL;
p->fail=root;
if(p==root) p->fail=NULL;
}
void init()
{
hash['A']=0,hash['T']=1,hash['C']=2,hash['G']=3;
tp=0;
root=&temp[tp++];
reset(root);
}
void insert(char* word)
{
Node* p=root;
for(int i=0;word[i];i++)
{
int x=hash[word[i]];
if(p->next[x]==NULL)
{
p->next[x]=&temp[tp++];
reset(p->next[x]);
}
p=p->next[x];
}
p->flag=1;
}
Node* que[maxn*4];
int front,rear;
void DFA()
{
Node* p=root;
front=rear=0;
que[rear++]=p;
while(front<rear)
{
Node* t=que[front++];
for(int i=0;i<kind;i++)
{
Node* cnt=t->next[i];
if(cnt!=NULL)
{
Node* fath=t->fail;
while(fath!=NULL&&fath->next[i]==NULL)
{
fath=fath->fail;
}
if(fath!=NULL)
{
cnt->fail=fath->next[i];
}
else
{
cnt->fail=p;
}
que[rear++]=cnt;
}
}
}
}
__int64 a[maxn][maxn];
int r;
//a r*r 求a^len
void toMatrix()
{
r=rear;
memset(a,0,sizeof(a));
Node* fath;
for(int i=0;i<rear;i++)
{
Node* p=&temp[i];
if(p->flag) continue;
for(int j=0;j<kind;j++)
{
Node* cnt=p->next[j];
if(cnt!=NULL)
{
int mark=1;//important
for(fath=cnt;fath!=NULL;fath=fath->fail)
{
if(fath->flag) {
mark=0;
break;
}
}
if(mark)
{
int k=cnt->id;
a[i][k]++;
}
}
else
{
fath=p->fail;
while(fath!=NULL&&fath->next[j]==NULL)
{
fath=fath->fail;
}
if(fath!=NULL)
{
cnt=fath->next[j];
int mark=1;//important
for(fath=cnt;fath!=NULL;fath=fath->fail)
{
if(fath->flag) {
mark=0;
break;
}
}
if(mark)
{
int k=cnt->id;
a[i][k]++;
}
}
else
{
cnt=root;
a[i][0]++;
}
}
}
}
}
__int64 t[maxn][maxn],tmp[maxn][maxn],b[maxn][maxn];
void multiply(__int64 a[][maxn],__int64 b[][maxn],__int64 c[][maxn])
{
for(int i=0;i<r;i++)
{
for(int j=0;j<r;j++)
{
__int64 cnt=0;
for(int k=0;k<r;k++)
{
cnt+=a[i][k]*b[k][j];
cnt%=MOD;
}
c[i][j]=cnt;
}
}
}
void matrixPow(__int64 a[][maxn],__int64 p)//p>1
{
if(p==1) return ;
if(p&1)
{
matrixPow(a,p/2);
multiply(a,a,tmp);
multiply(b,tmp,t);
for(int i=0;i<r;i++) for(int j=0;j<r;j++) a[i][j]=t[i][j];
}
else
{
matrixPow(a,p/2);
multiply(a,a,t);
for(int i=0;i<r;i++) for(int j=0;j<r;j++) a[i][j]=t[i][j];
}
}
int main()
{
//freopen("outwa.txt","w",stdout);
char str[maxn];
scanf("%d%I64d",&n,&len);
init();
for(int i=0;i<n;i++)
{
scanf("%s",str);
insert(str);
}
DFA();
toMatrix();
for(int i=0;i<r;i++) for(int j=0;j<r;j++) b[i][j]=a[i][j];
matrixPow(a,len);//a^len
__int64 cnt=0;
for(int i=0;i<r;i++) cnt+=a[0][i],cnt%=MOD;
printf("%I64d\n",cnt);
return 0;
}
/*
10 100
AGAGAGT
CGTATTG
AAAATTTCGC
GCGTA
TCGA
AATTGGA
TAGATAGC
AGCGTATT
TTCGA
TACGTATTG
*/
//35771
/*
10 6
AGAGAGT
CGTATTG
AAAATTTCGC
GCGTA
TCGA
AATTGGA
TAGATAGC
AGCGTATT
TTCGA
TACGTATTG
*/
//4040
相关文章推荐
- hdu 3434 给你含有n个数的序列,每次你可以选一个子序列将上面所有的数字加1或者减1,目标是把所有数字变成相同的,问最少步数,和那个相同的数字有多少种可能
- POJ 2778 DNA Sequence(AC自动机+矩阵)
- POJ 2778 DNA Sequence [AC自动机 + 矩阵快速幂]
- POJ 题目2778 DNA Sequence(AC自动机,矩阵快速幂)
- POJ 2778 DNA Sequence && AC自动机 矩阵 矩阵加速
- poj 2778 AC自动机+快速幂(DNA Sequence)
- 二、给定一个 n 行 m 列的地牢,其中 '.' 表示可以通行的位置,'X' 表示不可通行的障碍,牛牛从 (x0 , y0 ) 位置出发,遍历这个地牢,和一般的游戏所不同的是,他每一步只能按照一些指定的步长遍历地牢,要求每一步都不可以超过地牢的边界,也不能到达障碍上。地牢的出口可能在任意某个可以通行的位置上。牛牛想知道最坏情况下,他需要多少步才可以离开这个地牢。
- POJ 2778 DNA Sequence [AC自动机 + 矩阵快速幂]
- 【POJ 2778】DNA Sequence 中文题意&题解&代码(C++)
- 下载文件时火狐总是提示“已屏蔽:可能含有病毒或间谍软件”
- 将1~6这6个数字按每行3个进行输出,输出时要求左边的数字比右边的大,上边的数字比下边的大,求出所有可能的填写方法并统计输出排列方法有多少种。
- (AC自动机/矩阵快速幂)poj 2778 DNA Sequence
- 查找数据库中含有指定数据的所有表名和字段
- poj 2778 DNA Sequence 【ac自动机 + dp + 矩阵快速幂】
- POJ 2778:DNA Sequence(AC自动机+矩阵快速幂)
- poj2778--DNA Sequence(AC自动机+矩阵优化)
- POJ 2778 DNA Sequence(AC自动机 + 矩阵乘法)
- POJ 2778 DNA Sequence 题解&代码
- poj 2778 DNA Sequence (ac自动机+矩阵快速幂优化dp)
- 下载文件时火狐总是提示“已屏蔽:可能含有病毒或间谍软件”