【小结】AC自动机
2016-06-10 16:31
375 查看
参考资料:/article/2390636.html
搞了两天,突然明白,这玩意它原来就是个DFADFA鸭!窝来分析分析
我们先画出它Trie树的模样
留个板子
hdu2896
病毒保证不同,简单统计,随便搞
hdu3065
要打印匹配串:打标记,Trie上节点打前驱和字符标记。P.S.空间上还可以优化
zoj3430
解码一下即可。debug了好久,最后发现Base64直接解码出来的字符可能不是ASCIIASCII码,就如1111 11111111\ 1111对应到ASCIIASCII后是EOF…EOF\dots真让人难堪。。。所以字符集开到256就可以过了…一部分调试中间修改导致整个代码看起来丑陋了一些,不愿改了。。
搞了两天,突然明白,这玩意它原来就是个DFADFA鸭!窝来分析分析
从DFADFA到ACAC自动机
考虑以下单词: {she, he, her}\{she,\ he,\ her\}我们先画出它Trie树的模样
留个板子
/* ********************************************** File Name: ac_automata.cpp Auther: zhengdongjian@tju.edu.cn Created Time: 2015年08月14日 星期五 08时41分23秒 *********************************************** */ #include <bits/stdc++.h> using namespace std; typedef pair<int, int> P; const double EPS = 1e-8; const double PI = acos(-1.0); const int MAX = 500007; const int MAXD = 26; //26 alphas struct Trie { /* * nxt & end is used in trie * fail is for ac automata */ int nxt[MAX][MAXD], fail[MAX], end[MAX]; int root, L; //root node, length(the nodes has been malloc)[0, L] int newnode() { memset(nxt[L], -1, sizeof(int) * MAXD); end[L++] = 0; return L - 1; } void clear() { L = 0; root = newnode(); } void insert(char* buf) { int len = strlen(buf); int now = root; for (int i = 0; i < len; ++i) { if (nxt[now][buf[i] - 'a'] == -1) { nxt[now][buf[i] - 'a'] = newnode(); } now = nxt[now][buf[i] - 'a']; } ++end[now]; } void build() { queue<int> Q; fail[root] = root; for (int i = 0; i < MAXD; ++i) { if (nxt[root][i] == -1) { nxt[root][i] = root; } else { fail[nxt[root][i]] = root; Q.push(nxt[root][i]); } } while (!Q.empty()) { int now = Q.front(); Q.pop(); for (int i = 0; i < MAXD; ++i) { if (nxt[now][i] == -1) { nxt[now][i] = nxt[fail[now]][i]; } else { fail[nxt[now][i]] = nxt[fail[now]][i]; Q.push(nxt[now][i]); } } } } int query(char* buf, int len = -1) { if (len == -1) { len = strlen(buf); } int now = root; int res = 0; for (int i = 0; i < len; ++i) { now = nxt[now][buf[i] - 'a']; int tmp = now; while (tmp != root) { res += end[tmp]; end[tmp] = 0; //不重复,若可重复此处不置0即可 tmp = fail[tmp]; } } return res; } void debug() { for (int i = 0; i < L; ++i) { printf("%d, %d, %d, [%d", i, fail[i], end[i], nxt[i][0]); for (int j = 1; j < MAXD; ++j) { printf(" %d", nxt[i][j]); } puts("]"); } } } ac; const int MAXL = 64; char str[MAXL]; char buf[MAX << 1]; int main() { int T; scanf(" %d", &T); while (T--) { int n; scanf(" %d", &n); ac.clear(); for (int i = 0; i < n; ++i) { scanf(" %s", str); ac.insert(str); } ac.build(); scanf(" %s", buf); printf("%d\n", ac.query(buf)); } return 0; }
几个简单的小题目
hdu2222,First attempt/* ********************************************** File Name: ac_automata.cpp => hdu2222 Auther: zhengdongjian@tju.edu.cn Created Time: 2015年08月14日 星期五 08时41分23秒 *********************************************** */ #include <bits/stdc++.h> using namespace std; typedef pair<int, int> P; const double EPS = 1e-8; const double PI = acos(-1.0); const int MAX = 500007; const int MAXD = 26; //26 alphas struct Trie { /* * nxt & end is used in trie * fail is for ac automata */ int nxt[MAX][MAXD], fail[MAX], end[MAX]; int root, L; //root node, length(the nodes has been malloc)[0, L] int newnode() { memset(nxt[L], -1, sizeof(int) * MAXD); end[L++] = 0; return L - 1; } void clear() { L = 0; root = newnode(); } void insert(char* buf) { int len = strlen(buf); int now = root; for (int i = 0; i < len; ++i) { if (nxt[now][buf[i] - 'a'] == -1) { nxt[now][buf[i] - 'a'] = newnode(); } now = nxt[now][buf[i] - 'a']; } ++end[now]; } void build() { queue<int> Q; fail[root] = root; for (int i = 0; i < MAXD; ++i) { if (nxt[root][i] == -1) { nxt[root][i] = root; } else { fail[nxt[root][i]] = root; Q.push(nxt[root][i]); } } while (!Q.empty()) { int now = Q.front(); Q.pop(); for (int i = 0; i < MAXD; ++i) { if (nxt[now][i] == -1) { nxt[now][i] = nxt[fail[now]][i]; } else { fail[nxt[now][i]] = nxt[fail[now]][i]; Q.push(nxt[now][i]); } } } } int query(char* buf, int len = -1) { if (len == -1) { len = strlen(buf); } int now = root; int res = 0; for (int i = 0; i < len; ++i) { now = nxt[now][buf[i] - 'a']; int tmp = now; while (tmp != root) { res += end[tmp]; end[tmp] = 0; //不重复,若可重复此处不置0即可 tmp = fail[tmp]; } } return res; } void debug() { for (int i = 0; i < L; ++i) { printf("%d, %d, %d, [%d", i, fail[i], end[i], nxt[i][0]); for (int j = 1; j < MAXD; ++j) { printf(" %d", nxt[i][j]); } puts("]"); } } } ac; const int MAXL = 64; char str[MAXL]; char buf[MAX << 1]; int main() { int T; scanf(" %d", &T); while (T--) { int n; scanf(" %d", &n); ac.clear(); for (int i = 0; i < n; ++i) { scanf(" %s", str); ac.insert(str); } ac.build(); scanf(" %s", buf); printf("%d\n", ac.query(buf)); } return 0; }
hdu2896
病毒保证不同,简单统计,随便搞
/* ********************************************** File Name: 2896.cpp Auther: zhengdongjian@tju.edu.cn Created Time: 2015年08月14日 星期五 11时25分51秒 *********************************************** */ #include <bits/stdc++.h> using namespace std; typedef pair<int, int> P; const double EPS = 1e-8; const double PI = acos(-1.0); const int MAX = 100007; const int MAXD = 128; struct Trie { int nxt[MAX][MAXD], fail[MAX], end[MAX]; int root, L; int newnode() { memset(nxt[L], -1, sizeof(int) * MAXD); end[L++] = -1; return L - 1; } void clear() { L = 0; root = newnode(); } void insert(char* buf, int _end) { int len = strlen(buf); int now = root; for (int i = 0; i < len; ++i) { if (nxt[now][(int)buf[i]] == -1) { nxt[now][(int)buf[i]] = newnode(); } now = nxt[now][(int)buf[i]]; } end[now] = _end; } void build() { queue<int> Q; fail[root] = root; for (int i = 0; i < MAXD; ++i) { if (nxt[root][i] == -1) { nxt[root][i] = root; } else { fail[nxt[root][i]] = root; Q.push(nxt[root][i]); } } while (!Q.empty()) { int now = Q.front(); Q.pop(); for (int i = 0; i < MAXD; ++i) { if (nxt[now][i] == -1) { nxt[now][i] = nxt[fail[now]][i]; } else { fail[nxt[now][i]] = nxt[fail[now]][i]; Q.push(nxt[now][i]); } } } } set<int> query(char* buf) { int len = strlen(buf); int now = root; set<int> res; for (int i = 0; i < len; ++i) { now = nxt[now][(int)buf[i]]; int tmp = now; while (tmp != root) { if (~end[tmp]) { res.insert(end[tmp]); } tmp = fail[tmp]; } } return res; } } ac; char buf[MAX]; int main() { int n, m; while (~scanf(" %d", &n)) { ac.clear(); for (int i = 1; i <= n; ++i) { scanf(" %s", buf); ac.insert(buf, i); } ac.build(); scanf(" %d", &m); int sum = 0; for (int i = 1; i <= m; ++i) { scanf(" %s", buf); auto v = ac.query(buf); if (!v.empty()) { ++sum; printf("web %d:", i); for (auto it = v.begin(); it != v.end(); ++it) { printf(" %d", *it); } puts(""); } } printf("total: %d\n", sum); } return 0; }
hdu3065
要打印匹配串:打标记,Trie上节点打前驱和字符标记。P.S.空间上还可以优化
/* ********************************************** File Name: 3065.cpp Auther: zhengdongjian@tju.edu.cn Created Time: 2015年08月14日 星期五 11时46分10秒 *********************************************** */ #include <bits/stdc++.h> using namespace std; typedef pair<int, int> P; const double EPS = 1e-8; const double PI = acos(-1.0); const int MAX = 50007; const int MAXD = 128; //26 alphas struct Trie { /* * nxt & end is used in trie * fail is for ac automata */ int nxt[MAX][MAXD], fail[MAX], end[MAX], pre[MAX]; char dad[MAX]; int root, L; //root node, length(the nodes has been malloc)[0, L] int newnode() { memset(nxt[L], -1, sizeof(int) * MAXD); pre[L] = -1; end[L++] = 0; return L - 1; } void clear() { L = 0; root = newnode(); } void insert(char* buf) { int len = strlen(buf); int now = root; for (int i = 0; i < len; ++i) { if (nxt[now][(int)buf[i]] == -1) { nxt[now][(int)buf[i]] = newnode(); pre[nxt[now][(int)buf[i]]] = now; dad[nxt[now][(int)buf[i]]] = buf[i]; } now = nxt[now][(int)buf[i]]; } ++end[now]; } void build() { queue<int> Q; fail[root] = root; for (int i = 0; i < MAXD; ++i) { if (nxt[root][i] == -1) { nxt[root][i] = root; } else { fail[nxt[root][i]] = root; Q.push(nxt[root][i]); } } while (!Q.empty()) { int now = Q.front(); Q.pop(); for (int i = 0; i < MAXD; ++i) { if (nxt[now][i] == -1) { nxt[now][i] = nxt[fail[now]][i]; } else { fail[nxt[now][i]] = nxt[fail[now]][i]; Q.push(nxt[now][i]); } } } } map<int, int> query(char* buf, int len = -1) { if (len == -1) { len = strlen(buf); } int now = root; map<int, int> res; for (int i = 0; i < len; ++i) { now = nxt[now][(int)buf[i]]; int tmp = now; while (tmp != root) { if (end[tmp] > 0) { ++res[tmp]; } tmp = fail[tmp]; } } return res; } void debug() { for (int i = 0; i < L; ++i) { printf("%d, %d, %d, [%d", i, fail[i], end[i], nxt[i][0]); for (int j = 1; j < MAXD; ++j) { printf(" %d", nxt[i][j]); } puts("]"); } } } ac; const int MAXL = 64; char str[MAXL]; char buffer[MAX * 40]; int main() { int n; while (~scanf(" %d", &n)) { ac.clear(); for (int i = 1; i <= n; ++i) { scanf(" %s", str); ac.insert(str); } ac.build(); scanf(" %s", buffer); auto mp = ac.query(buffer); for (auto it = mp.begin(); it != mp.end(); ++it) { int now = it->first; int idx = MAXL - 1; str[idx--] = '\0'; while (now != ac.root) { str[idx--] = ac.dad[now]; now = ac.pre[now]; } ++idx; printf("%s: %d\n", str + idx, it->second); } } return 0; }
zoj3430
解码一下即可。debug了好久,最后发现Base64直接解码出来的字符可能不是ASCIIASCII码,就如1111 11111111\ 1111对应到ASCIIASCII后是EOF…EOF\dots真让人难堪。。。所以字符集开到256就可以过了…一部分调试中间修改导致整个代码看起来丑陋了一些,不愿改了。。
/* ********************************************** File Name: 3430.cpp Auther: zhengdongjian@tju.edu.cn Created Time: 2015年08月14日 星期五 13时28分38秒 *********************************************** */ #include <bits/stdc++.h> using namespace std; typedef pair<int, int> P; const double EPS = 1e-8; const double PI = acos(-1.0); const int MAX = 50007; const int MAXD = 256; /* * Base64 Decode */ static const char cb64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; bool pool[50007]; inline int dic64(char& c) { if (isupper(c)) { return c - 'A'; } else if (islower(c)) { return c - 'a' + 26; } else if (isdigit(c)) { return c - '0' + 52; } else { return c == '+' ? 62 : 63; } } void decode64(char source[], int dest[]) { int len = strlen(source); int bit = len * 6; memset(pool, false, sizeof(bool) * bit); while (source[len - 1] == '=') { --len; bit -= 8; } //printf("bit = %d\n", bit); for (int i = 0, j = 0; i < len; ++i, j += 6) { int c = dic64(source[i]); for (int k = j + 5; k >= j; --k) { pool[k] = c & 1; c >>= 1; } } int p = 0; for (int i = 0; i < bit; i += 8) { dest[p] = 0; for (int j = 0; j < 8; ++j) { dest[p] <<= 1; if (pool[i + j]) { ++dest[p]; //dest[p] = (char)((int)dest[p] + 1); } } ++p; } dest[p] = -1; } /**********************************************/ struct Trie { /* * nxt & end is used in trie * fail is for ac automata */ int nxt[MAX][MAXD], fail[MAX], end[MAX]; int root, L; //root node, length(the nodes has been malloc)[0, L] int newnode() { memset(nxt[L], -1, sizeof(int) * MAXD); end[L++] = 0; return L - 1; } void clear() { L = 0; root = newnode(); } void insert(int* buf, int _id) { int* p = buf; int now = root; while (~(*p)) { if (nxt[now][*p] == -1) { nxt[now][*p] = newnode(); } now = nxt[now][*p++]; } end[now] = _id; } void build() { queue<int> Q; fail[root] = root; for (int i = 0; i < MAXD; ++i) { if (nxt[root][i] == -1) { nxt[root][i] = root; } else { fail[nxt[root][i]] = root; Q.push(nxt[root][i]); } } while (!Q.empty()) { int now = Q.front(); Q.pop(); for (int i = 0; i < MAXD; ++i) { if (nxt[now][i] == -1) { nxt[now][i] = nxt[fail[now]][i]; } else { fail[nxt[now][i]] = nxt[fail[now]][i]; Q.push(nxt[now][i]); } } } } set<int> query(int* buf) { int* p = buf; int now = root; set<int> res; while (~(*p)) { now = nxt[now][*p++]; int tmp = now; while (tmp != root) { if (end[tmp]) { res.insert(end[tmp]); } tmp = fail[tmp]; } } return res; } void debug() { for (int i = 0; i < L; ++i) { printf("%d, %d, %d, [%d", i, fail[i], end[i], nxt[i][0]); for (int j = 1; j < MAXD; ++j) { printf(" %d", nxt[i][j]); } puts("]"); } } } ac; const int MAXL = 128; char str[MAXL]; char buf[MAX]; int jj[MAX]; int main() { /* while (cin >> buf) { decode64(buf, buf); cout << buf << endl; } return 0; */ int n; while (~scanf(" %d", &n)) { ac.clear(); for (int i = 1; i <= n; ++i) { scanf(" %s", str); decode64(str, jj); ac.insert(jj, i); } ac.build(); int m; scanf(" %d", &m); while (m--) { scanf(" %s", buf); decode64(buf, jj); printf("%d\n", (int)ac.query(jj).size()); } puts(""); } return 0; }
相关文章推荐
- Oracle学习 第1、2天之高级查询
- 用Qt 调用GDB调试 Arm程序 详细步骤----可单步执行每一行
- js 输出html的表格数据到 excel
- 当下要真硬着性子把Android学下去, 定会是移动互联台上的角儿
- 新一代分布式任务调度框架:当当elastic-job开源项目的10项特性
- Android点击事件的三种写法
- Pascal's Triangle
- spring开发 MethodInvokingFactoryBean的学习
- coyote
- offsetof' was not declared in this scope
- 第七届山东省ACM省赛
- RecyclerView的Adapter的抽取
- reflection
- Qt下使用GDB远程调试ARM板
- 神经网络
- Java POI读取Excel 2003/2007/2010例子
- Spring 的Helloworld
- openwrt 编译node.js功能(解决Illegal instruction错误)
- win7升级到win10所遇到的坑
- Eclipse编写jsp文件连接Mysql数据库的安装配置指南