您的位置:首页 > 其它

【小结】AC自动机

2016-06-10 16:31 375 查看
参考资料:/article/2390636.html

搞了两天,突然明白,这玩意它原来就是个DFADFA鸭!窝来分析分析

从DFADFA到ACAC自动机

考虑以下单词: {she, he, her}\{she,\ he,\ her\}

我们先画出它Trie树的模样

留个板子

/* **********************************************

File Name: ac_automata.cpp

Auther: zhengdongjian@tju.edu.cn

Created Time: 2015年08月14日 星期五 08时41分23秒

*********************************************** */
#include <bits/stdc++.h>
using namespace std;

typedef pair<int, int> P;
const double EPS = 1e-8;
const double PI = acos(-1.0);

const int MAX = 500007;
const int MAXD = 26; //26 alphas
struct Trie {
/*
* nxt & end is used in trie
* fail is for ac automata
*/
int nxt[MAX][MAXD], fail[MAX], end[MAX];
int root, L; //root node, length(the nodes has been malloc)[0, L]
int newnode() {
memset(nxt[L], -1, sizeof(int) * MAXD);
end[L++] = 0;
return L - 1;
}
void clear() {
L = 0;
root = newnode();
}
void insert(char* buf) {
int len = strlen(buf);
int now = root;
for (int i = 0; i < len; ++i) {
if (nxt[now][buf[i] - 'a'] == -1) {
nxt[now][buf[i] - 'a'] = newnode();
}
now = nxt[now][buf[i] - 'a'];
}
++end[now];
}
void build() {
queue<int> Q;
fail[root] = root;
for (int i = 0; i < MAXD; ++i) {
if (nxt[root][i] == -1) {
nxt[root][i] = root;
} else {
fail[nxt[root][i]] = root;
Q.push(nxt[root][i]);
}
}

while (!Q.empty()) {
int now = Q.front();
Q.pop();

for (int i = 0; i < MAXD; ++i) {
if (nxt[now][i] == -1) {
nxt[now][i] = nxt[fail[now]][i];
} else {
fail[nxt[now][i]] = nxt[fail[now]][i];
Q.push(nxt[now][i]);
}
}
}
}
int query(char* buf, int len = -1) {
if (len == -1) {
len = strlen(buf);
}
int now = root;
int res = 0;
for (int i = 0; i < len; ++i) {
now = nxt[now][buf[i] - 'a'];
int tmp = now;
while (tmp != root) {
res += end[tmp];
end[tmp] = 0; //不重复,若可重复此处不置0即可
tmp = fail[tmp];
}
}
return res;
}
void debug() {
for (int i = 0; i < L; ++i) {
printf("%d, %d, %d, [%d", i, fail[i], end[i], nxt[i][0]);
for (int j = 1; j < MAXD; ++j) {
printf(" %d", nxt[i][j]);
}
puts("]");
}
}
} ac;

const int MAXL = 64;
char str[MAXL];
char buf[MAX << 1];

int main() {
int T;
scanf(" %d", &T);
while (T--) {
int n;
scanf(" %d", &n);
ac.clear();
for (int i = 0; i < n; ++i) {
scanf(" %s", str);
ac.insert(str);
}
ac.build();

scanf(" %s", buf);
printf("%d\n", ac.query(buf));
}
return 0;
}


几个简单的小题目

hdu2222,First attempt

/* **********************************************

File Name: ac_automata.cpp => hdu2222

Auther: zhengdongjian@tju.edu.cn

Created Time: 2015年08月14日 星期五 08时41分23秒

*********************************************** */
#include <bits/stdc++.h>
using namespace std;

typedef pair<int, int> P;
const double EPS = 1e-8;
const double PI = acos(-1.0);

const int MAX = 500007;
const int MAXD = 26; //26 alphas
struct Trie {
/*
* nxt & end is used in trie
* fail is for ac automata
*/
int nxt[MAX][MAXD], fail[MAX], end[MAX];
int root, L; //root node, length(the nodes has been malloc)[0, L]
int newnode() {
memset(nxt[L], -1, sizeof(int) * MAXD);
end[L++] = 0;
return L - 1;
}
void clear() {
L = 0;
root = newnode();
}
void insert(char* buf) {
int len = strlen(buf);
int now = root;
for (int i = 0; i < len; ++i) {
if (nxt[now][buf[i] - 'a'] == -1) {
nxt[now][buf[i] - 'a'] = newnode();
}
now = nxt[now][buf[i] - 'a'];
}
++end[now];
}
void build() {
queue<int> Q;
fail[root] = root;
for (int i = 0; i < MAXD; ++i) {
if (nxt[root][i] == -1) {
nxt[root][i] = root;
} else {
fail[nxt[root][i]] = root;
Q.push(nxt[root][i]);
}
}

while (!Q.empty()) {
int now = Q.front();
Q.pop();

for (int i = 0; i < MAXD; ++i) {
if (nxt[now][i] == -1) {
nxt[now][i] = nxt[fail[now]][i];
} else {
fail[nxt[now][i]] = nxt[fail[now]][i];
Q.push(nxt[now][i]);
}
}
}
}
int query(char* buf, int len = -1) {
if (len == -1) {
len = strlen(buf);
}
int now = root;
int res = 0;
for (int i = 0; i < len; ++i) {
now = nxt[now][buf[i] - 'a'];
int tmp = now;
while (tmp != root) {
res += end[tmp];
end[tmp] = 0; //不重复,若可重复此处不置0即可
tmp = fail[tmp];
}
}
return res;
}
void debug() {
for (int i = 0; i < L; ++i) {
printf("%d, %d, %d, [%d", i, fail[i], end[i], nxt[i][0]);
for (int j = 1; j < MAXD; ++j) {
printf(" %d", nxt[i][j]);
}
puts("]");
}
}
} ac;

const int MAXL = 64;
char str[MAXL];
char buf[MAX << 1];

int main() {
int T;
scanf(" %d", &T);
while (T--) {
int n;
scanf(" %d", &n);
ac.clear();
for (int i = 0; i < n; ++i) {
scanf(" %s", str);
ac.insert(str);
}
ac.build();

scanf(" %s", buf);
printf("%d\n", ac.query(buf));
}
return 0;
}


hdu2896

病毒保证不同,简单统计,随便搞

/* **********************************************

File Name: 2896.cpp

Auther: zhengdongjian@tju.edu.cn

Created Time: 2015年08月14日 星期五 11时25分51秒

*********************************************** */
#include <bits/stdc++.h>
using namespace std;

typedef pair<int, int> P;
const double EPS = 1e-8;
const double PI = acos(-1.0);
const int MAX = 100007;
const int MAXD = 128;
struct Trie {
int nxt[MAX][MAXD], fail[MAX], end[MAX];
int root, L;

int newnode() {
memset(nxt[L], -1, sizeof(int) * MAXD);
end[L++] = -1;
return L - 1;
}

void clear() {
L = 0;
root = newnode();
}

void insert(char* buf, int _end) {
int len = strlen(buf);
int now = root;
for (int i = 0; i < len; ++i) {
if (nxt[now][(int)buf[i]] == -1) {
nxt[now][(int)buf[i]] = newnode();
}
now = nxt[now][(int)buf[i]];
}
end[now] = _end;
}

void build() {
queue<int> Q;
fail[root] = root;
for (int i = 0; i < MAXD; ++i) {
if (nxt[root][i] == -1) {
nxt[root][i] = root;
} else {
fail[nxt[root][i]] = root;
Q.push(nxt[root][i]);
}
}

while (!Q.empty()) {
int now = Q.front();
Q.pop();

for (int i = 0; i < MAXD; ++i) {
if (nxt[now][i] == -1) {
nxt[now][i] = nxt[fail[now]][i];
} else {
fail[nxt[now][i]] = nxt[fail[now]][i];
Q.push(nxt[now][i]);
}
}
}
}
set<int> query(char* buf) {
int len = strlen(buf);
int now = root;
set<int> res;
for (int i = 0; i < len; ++i) {
now = nxt[now][(int)buf[i]];
int tmp = now;
while (tmp != root) {
if (~end[tmp]) {
res.insert(end[tmp]);
}
tmp = fail[tmp];
}
}
return res;
}
} ac;
char buf[MAX];

int main() {
int n, m;
while (~scanf(" %d", &n)) {
ac.clear();
for (int i = 1; i <= n; ++i) {
scanf(" %s", buf);
ac.insert(buf, i);
}
ac.build();
scanf(" %d", &m);
int sum = 0;
for (int i = 1; i <= m; ++i) {
scanf(" %s", buf);
auto v = ac.query(buf);
if (!v.empty()) {
++sum;
printf("web %d:", i);
for (auto it = v.begin(); it != v.end(); ++it) {
printf(" %d", *it);
}
puts("");
}
}
printf("total: %d\n", sum);
}
return 0;
}


hdu3065

要打印匹配串:打标记,Trie上节点打前驱和字符标记。P.S.空间上还可以优化

/* **********************************************

File Name: 3065.cpp

Auther: zhengdongjian@tju.edu.cn

Created Time: 2015年08月14日 星期五 11时46分10秒

*********************************************** */
#include <bits/stdc++.h>
using namespace std;

typedef pair<int, int> P;
const double EPS = 1e-8;
const double PI = acos(-1.0);

const int MAX = 50007;
const int MAXD = 128; //26 alphas
struct Trie {
/*
* nxt & end is used in trie
* fail is for ac automata
*/
int nxt[MAX][MAXD], fail[MAX], end[MAX], pre[MAX];
char dad[MAX];
int root, L; //root node, length(the nodes has been malloc)[0, L]
int newnode() {
memset(nxt[L], -1, sizeof(int) * MAXD);
pre[L] = -1;
end[L++] = 0;
return L - 1;
}
void clear() {
L = 0;
root = newnode();
}
void insert(char* buf) {
int len = strlen(buf);
int now = root;
for (int i = 0; i < len; ++i) {
if (nxt[now][(int)buf[i]] == -1) {
nxt[now][(int)buf[i]] = newnode();
pre[nxt[now][(int)buf[i]]] = now;
dad[nxt[now][(int)buf[i]]] = buf[i];
}
now = nxt[now][(int)buf[i]];
}
++end[now];
}

void build() {
queue<int> Q;
fail[root] = root;
for (int i = 0; i < MAXD; ++i) {
if (nxt[root][i] == -1) {
nxt[root][i] = root;
} else {
fail[nxt[root][i]] = root;
Q.push(nxt[root][i]);
}
}

while (!Q.empty()) {
int now = Q.front();
Q.pop();

for (int i = 0; i < MAXD; ++i) {
if (nxt[now][i] == -1) {
nxt[now][i] = nxt[fail[now]][i];
} else {
fail[nxt[now][i]] = nxt[fail[now]][i];
Q.push(nxt[now][i]);
}
}
}
}
map<int, int> query(char* buf, int len = -1) {
if (len == -1) {
len = strlen(buf);
}
int now = root;
map<int, int> res;
for (int i = 0; i < len; ++i) {
now = nxt[now][(int)buf[i]];
int tmp = now;
while (tmp != root) {
if (end[tmp] > 0) {
++res[tmp];
}
tmp = fail[tmp];
}
}
return res;
}
void debug() {
for (int i = 0; i < L; ++i) {
printf("%d, %d, %d, [%d", i, fail[i], end[i], nxt[i][0]);
for (int j = 1; j < MAXD; ++j) {
printf(" %d", nxt[i][j]);
}
puts("]");
}
}
} ac;

const int MAXL = 64;
char str[MAXL];
char buffer[MAX * 40];

int main() {
int n;
while (~scanf(" %d", &n)) {
ac.clear();
for (int i = 1; i <= n; ++i) {
scanf(" %s", str);
ac.insert(str);
}
ac.build();
scanf(" %s", buffer);
auto mp = ac.query(buffer);
for (auto it = mp.begin(); it != mp.end(); ++it) {
int now = it->first;
int idx = MAXL - 1;
str[idx--] = '\0';
while (now != ac.root) {
str[idx--] = ac.dad[now];
now = ac.pre[now];
}
++idx;
printf("%s: %d\n", str + idx, it->second);
}
}
return 0;
}


zoj3430

解码一下即可。debug了好久,最后发现Base64直接解码出来的字符可能不是ASCIIASCII码,就如1111 11111111\ 1111对应到ASCIIASCII后是EOF…EOF\dots真让人难堪。。。所以字符集开到256就可以过了…一部分调试中间修改导致整个代码看起来丑陋了一些,不愿改了。。

/* **********************************************

File Name: 3430.cpp

Auther: zhengdongjian@tju.edu.cn

Created Time: 2015年08月14日 星期五 13时28分38秒

*********************************************** */
#include <bits/stdc++.h>
using namespace std;

typedef pair<int, int> P;
const double EPS = 1e-8;
const double PI = acos(-1.0);
const int MAX = 50007;
const int MAXD = 256;

/*
* Base64 Decode
*/
static const char cb64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
bool pool[50007];
inline int dic64(char& c) {
if (isupper(c)) {
return c - 'A';
} else if (islower(c)) {
return c - 'a' + 26;
} else if (isdigit(c)) {
return c - '0' + 52;
} else {
return c == '+' ? 62 : 63;
}
}
void decode64(char source[], int dest[]) {
int len = strlen(source);
int bit = len * 6;
memset(pool, false, sizeof(bool) * bit);
while (source[len - 1] == '=') {
--len;
bit -= 8;
}
//printf("bit = %d\n", bit);
for (int i = 0, j = 0; i < len; ++i, j += 6) {
int c = dic64(source[i]);
for (int k = j + 5; k >= j; --k) {
pool[k] = c & 1;
c >>= 1;
}
}
int p = 0;
for (int i = 0; i < bit; i += 8) {
dest[p] = 0;
for (int j = 0; j < 8; ++j) {
dest[p] <<= 1;
if (pool[i + j]) {
++dest[p];
//dest[p] = (char)((int)dest[p] + 1);
}
}
++p;
}
dest[p] = -1;
}
/**********************************************/

struct Trie {
/*
* nxt & end is used in trie
* fail is for ac automata
*/
int nxt[MAX][MAXD], fail[MAX], end[MAX];
int root, L; //root node, length(the nodes has been malloc)[0, L]
int newnode() {
memset(nxt[L], -1, sizeof(int) * MAXD);
end[L++] = 0;
return L - 1;
}
void clear() {
L = 0;
root = newnode();
}
void insert(int* buf, int _id) {
int* p = buf;
int now = root;
while (~(*p)) {
if (nxt[now][*p] == -1) {
nxt[now][*p] = newnode();
}
now = nxt[now][*p++];
}
end[now] = _id;
}
void build() {
queue<int> Q;
fail[root] = root;
for (int i = 0; i < MAXD; ++i) {
if (nxt[root][i] == -1) {
nxt[root][i] = root;
} else {
fail[nxt[root][i]] = root;
Q.push(nxt[root][i]);
}
}

while (!Q.empty()) {
int now = Q.front();
Q.pop();

for (int i = 0; i < MAXD; ++i) {
if (nxt[now][i] == -1) {
nxt[now][i] = nxt[fail[now]][i];
} else {
fail[nxt[now][i]] = nxt[fail[now]][i];
Q.push(nxt[now][i]);
}
}
}
}
set<int> query(int* buf) {
int* p = buf;
int now = root;
set<int> res;
while (~(*p)) {
now = nxt[now][*p++];
int tmp = now;
while (tmp != root) {
if (end[tmp]) {
res.insert(end[tmp]);
}
tmp = fail[tmp];
}
}
return res;
}
void debug() {
for (int i = 0; i < L; ++i) {
printf("%d, %d, %d, [%d", i, fail[i], end[i], nxt[i][0]);
for (int j = 1; j < MAXD; ++j) {
printf(" %d", nxt[i][j]);
}
puts("]");
}
}
} ac;

const int MAXL = 128;
char str[MAXL];
char buf[MAX];
int jj[MAX];

int main() {
/*
while (cin >> buf) {
decode64(buf, buf);
cout << buf << endl;
}
return 0;
*/

int n;
while (~scanf(" %d", &n)) {
ac.clear();
for (int i = 1; i <= n; ++i) {
scanf(" %s", str);
decode64(str, jj);
ac.insert(jj, i);
}
ac.build();

int m;
scanf(" %d", &m);
while (m--) {
scanf(" %s", buf);
decode64(buf, jj);
printf("%d\n", (int)ac.query(jj).size());
}
puts("");
}
return 0;
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: