KMP 算法笔记
next
数组定义(核心思想)
对于一个模式串 pattern
:
next[i]
表示:pattern[0..i-1]
(长度为 i 的前缀)中,最长的“相等的真前缀与真后缀”的长度。
模式串
pattern = "ababaca"
1-base:
i(从1开始) | 子串 pattern[0..i-1] | 最长相等前后缀 | 长度 | next[i] |
---|---|---|---|---|
1 | a | 无 | 0 | 0 |
2 | ab | 无 | 0 | 0 |
3 | aba | a | 1 | 1 |
4 | abab | ab | 2 | 2 |
5 | ababa | aba | 3 | 3 |
6 | ababac | 无 | 0 | 0 |
7 | ababaca | a | 1 | 1 |
所以:
next = [ 0, 0, 1, 2, 3, 0, 1]
0-base:
模式串 pattern[0..i]
(长度为 i+1 的前缀)中,最长相等的真前缀与真后缀的长度。
next = [0, 0, 1, 2, 3, 0, 1]
1-base 版代码实现(更常见)
#include <bits/stdc++.h>
using namespace std;
int main() {
string text, pattern;
cin >> text >> pattern;
vector<int> next(pattern.size() + 1, 0);
for (int i = 2, j = 0;i <= pattern.size(); i++) {
while (j > 0 && pattern[i - 1] != pattern[j]) j = next[j];
if (pattern[i - 1] == pattern[j]) j++;
next[i] = j;
}
vector<int> pos;
for (int i = 1, j = 0; i <= text.size(); i++) {
while (j > 0 && text[i - 1] != pattern[j]) j = next[j];
if (text[i - 1] == pattern[j]) j++;
if (j == pattern.size()) {
pos.push_back(i - j + 1);
j = next[j];
}
}
for (int p : pos) cout << p << '\n';
for (int i = 1; i < next.size(); i++) {
if (i > 1) cout << " ";
cout << next[i];
}
return 0;
}
// ----------- 优化为 inext 数组 -----------
vector<int> inext(n + 1, 0);
inext[1] = 0;
for (int i = 2; i <= n; i++) {
if (pattern[i - 1] == pattern[next[i]])
inext[i] = inext[next[i]];
else
inext[i] = next[i];
}
inext 优化后:
#include <bits/stdc++.h>
using namespace std;
int main() {
string text, pattern;
cin >> text >> pattern;
int n = pattern.size();
vector<int> inext(n + 1, 0); // inext[0] 无用,1-based
inext[1] = 0;
for (int i = 2, j = 0; i <= n; i++) {
while (j > 0 && pattern[i - 1] != pattern[j])
j = inext[j];
if (pattern[i - 1] == pattern[j]) j++;
// 核心优化:避免无效重复匹配
if (i < n && pattern[i] == pattern[j])
inext[i] = inext[j];
else
inext[i] = j;
}
// 输出 inext 数组查看
for (int i = 1; i <= n; i++) {
if (i > 1) cout << ' ';
cout << inext[i];
}
cout << '\n';
return 0;
}
0-base 版代码实现
#include <bits/stdc++.h>
using namespace std;
int main() {
string text, pattern; cin >> text >> pattern;
vector<int> next(pattern.size());
for (int i = 1, j = 0; i < pattern.size(); i++) {
while (j > 0 && pattern[i] != pattern[j]) j = next[j - 1];
if (pattern[i] == pattern[j]) j++;
next[i] = j;
}
vector<int> pos;
for (int i = 0, j = 0; i < text.size(); i++) {
while (j > 0 && text[i] != pattern[j]) j = next[j - 1];
if (text[i] == pattern[j]) j++;
if (j == pattern.size()) {
pos.push_back(i - j + 1);
j = next[j - 1];
}
}
for (int p : pos) cout << p + 1 << '\n';
for (int n : next) cout << n << ' ';
return 0;
}
// ----------- 构造优化后的 inext 数组 -----------
vector<int> inext(m);
inext[0] = 0;
for (int i = 1; i < m; i++) {
if (pattern[i] == pattern[next[i] - 1] && next[i] > 0)
inext[i] = inext[next[i] - 1];
else
inext[i] = next[i];
}
类型 | next[0] | 失配跳转逻辑 |
---|---|---|
0-based next | 0 | j = next[j-1];需要判断 j>0 |
-1版 next | -1 | j = next[j];无需判断 j>0,代码更干净 |
举个例子,模式串 "ABCDABD"
:
- 普通 next(0-based):
next = [0, 0, 0, 0, 1, 2, 0]
- -1 版 next:
next = [-1, 0, 0, 0, 0, 1, 2]
区别在于 next[0]=-1,失配直接回到开头。
区别在匹配时的体现:
- 普通 next:
while (j > 0 && text[i] != pattern[j]) j = next[j-1];
if (text[i] == pattern[j]) j++;
- -1版 next:
while (j != -1 && text[i] != pattern[j]) j = next[j];
j++;
版权申明
本文系作者 @xiin 原创发布在To Future$站点。未经许可,禁止转载。
暂无评论数据