作者:陈芝麻烂谷子我勒个去 | 来源:互联网 | 2024-11-23 13:17
本文提供了一个关于AC自动机(Aho-CorasickAlgorithm)的详细解析与实现方法,特别针对P3796题目进行了深入探讨。文章不仅涵盖了AC自动机的基本概念,还重点讲解了如何通过构建失败指针(failpointer)来提高字符串匹配效率。
题目链接
P3796 AC自动机强化版题解 - Aho-Corasick Algorithm
解题思路
本题旨在通过AC自动机解决多模式串匹配问题。AC自动机是一种高效的字符串匹配算法,它能够在O(n + m)的时间复杂度内完成模式串的预处理和文本串的搜索,其中n是所有模式串的总长度,m是文本串的长度。
AC自动机的核心在于失败指针(fail pointer)的构建。失败指针用于在当前路径无法继续匹配时,快速跳转至一个可能的匹配起点,从而避免重复扫描文本串。具体来说,当在某个节点上匹配失败时,失败指针会指向一个能够继续匹配的节点,该节点对应的字符串是当前节点前缀的最大公共后缀。
AC代码
#include
#include
#define MAXN 100010
#define MAXM 1000010
#define MAXL 75
int ac[MAXN][26], cnt = 1;
int queue[MAXN], fail[MAXN], end[MAXN];
char patterns[MAXL][MAXL], text[MAXM];
struct Result {
int index, count;
} results[500], temp;
void insertPattern(const char *pattern, int length, int index) {
int node = 0;
for (int i = 0; i int c = pattern[i] - 'a';
if (!ac[node][c]) ac[node][c] = cnt++;
node = ac[node][c];
}
end[node] = index;
}
void buildFailPointers() {
int head = 0, tail = 0;
for (int i = 0; i <26; ++i) {
if (ac[0][i]) {
queue[tail++] = ac[0][i];
fail[ac[0][i]] = 0;
}
}
while (head int current = queue[head++];
for (int i = 0; i <26; ++i) {
if (ac[current][i]) {
fail[ac[current][i]] = ac[fail[current]][i];
queue[tail++] = ac[current][i];
} else {
ac[current][i] = ac[fail[current]][i];
}
}
}
}
void searchText(const char *text, int length) {
int node = 0;
for (int i = 0; i node = ac[node][text[i] - 'a'];
for (int j = node; j; j = fail[j]) {
if (end[j]) {
results[end[j]].count++;
}
}
}
}
int compare(const void *a, const void *b) {
return ((Result *)b)->count - ((Result *)a)->count;
}
int main() {
int n;
while (scanf("%d", &n) != EOF && n) {
cnt = 1;
for (int i = 1; i <= n; ++i) {
scanf("%s", patterns[i]);
insertPattern(patterns[i], strlen(patterns[i]), i);
results[i].index = i;
results[i].count = 0;
}
buildFailPointers();
scanf("%s", text);
searchText(text, strlen(text));
qsort(results + 1, n, sizeof(Result), compare);
printf("%d\n", results[1].count);
printf("%s\n", patterns[results[1].index]);
for (int i = 2; i <= n; ++i) {
if (results[i].count != results[i - 1].count) break;
printf("%s\n", patterns[results[i].index]);
}
memset(end, 0, sizeof(int) * cnt);
memset(fail, 0, sizeof(int) * cnt);
memset(ac, 0, sizeof(ac));
}
return 0;
}