next 数组定义(核心思想)

对于一个模式串 pattern

next[i] 表示:pattern[0..i-1](长度为 i 的前缀)中,最长的“相等的真前缀与真后缀”的长度

模式串

pattern = "ababaca"

1-base:

i(从1开始)子串 pattern[0..i-1]最长相等前后缀长度next[i]
1a00
2ab00
3abaa11
4ababab22
5ababaaba33
6ababac00
7ababacaa11

所以:

next = [    0, 0, 1, 2, 3, 0, 1]

0-base:

模式串 pattern[0..i](长度为 i+1 的前缀)中,最长相等的真前缀与真后缀的长度

next = [0, 0, 1, 2, 3, 0, 1]

1-base 版代码实现(更常见)

#include <bits/stdc++.h>
using namespace std;
int main() {
    string text, pattern;
    cin >> text >> pattern;
    vector<int> next(pattern.size() + 1, 0);
    for (int i = 2, j = 0;i <= pattern.size(); i++) {
        while (j > 0 && pattern[i - 1] != pattern[j]) j = next[j];
        if (pattern[i - 1] == pattern[j]) j++;
        next[i] = j;
    }
    vector<int> pos;
    for (int i = 1, j = 0; i <= text.size(); i++) {
        while (j > 0 && text[i - 1] != pattern[j]) j = next[j];
        if (text[i - 1] == pattern[j]) j++;
        if (j == pattern.size()) {
            pos.push_back(i - j + 1);
            j = next[j];
        }
    }
    for (int p : pos) cout << p << '\n';
    for (int i = 1; i < next.size(); i++) {
        if (i > 1) cout << " ";
        cout << next[i];
    }
    return 0;
}
    // ----------- 优化为 inext 数组 -----------
    vector<int> inext(n + 1, 0);
    inext[1] = 0;
    for (int i = 2; i <= n; i++) {
        if (pattern[i - 1] == pattern[next[i]]) 
            inext[i] = inext[next[i]];
        else 
            inext[i] = next[i];
    }

inext 优化后:

#include <bits/stdc++.h>
using namespace std;

int main() {
    string text, pattern;
    cin >> text >> pattern;

    int n = pattern.size();
    vector<int> inext(n + 1, 0); // inext[0] 无用,1-based
    inext[1] = 0;

    for (int i = 2, j = 0; i <= n; i++) {
        while (j > 0 && pattern[i - 1] != pattern[j])
            j = inext[j];
        if (pattern[i - 1] == pattern[j]) j++;

        // 核心优化:避免无效重复匹配
        if (i < n && pattern[i] == pattern[j])
            inext[i] = inext[j];
        else
            inext[i] = j;
    }

    // 输出 inext 数组查看
    for (int i = 1; i <= n; i++) {
        if (i > 1) cout << ' ';
        cout << inext[i];
    }
    cout << '\n';

    return 0;
}

0-base 版代码实现

#include <bits/stdc++.h>
using namespace std;
int main() {
    string text, pattern; cin >> text >> pattern;
    vector<int> next(pattern.size());
    for (int i = 1, j = 0; i < pattern.size(); i++) {
        while (j > 0 && pattern[i] != pattern[j]) j = next[j - 1];
        if (pattern[i] == pattern[j]) j++;
        next[i] = j;
    }
    vector<int> pos;
    for (int i = 0, j = 0; i < text.size(); i++) {
        while (j > 0 && text[i] != pattern[j]) j = next[j - 1];
        if (text[i] == pattern[j]) j++;
        if (j == pattern.size()) {
            pos.push_back(i - j + 1);
            j = next[j - 1];
        }
    }
    for (int p : pos) cout << p + 1 << '\n';
    for (int n : next) cout << n << ' ';
    return 0;
}
    // ----------- 构造优化后的 inext 数组 -----------
    vector<int> inext(m);
    inext[0] = 0;
    for (int i = 1; i < m; i++) {
        if (pattern[i] == pattern[next[i] - 1] && next[i] > 0)
            inext[i] = inext[next[i] - 1];
        else
            inext[i] = next[i];
    }
类型next[0]失配跳转逻辑
0-based next0j = next[j-1];需要判断 j>0
-1版 next-1j = next[j];无需判断 j>0,代码更干净

举个例子,模式串 "ABCDABD"

  • 普通 next(0-based)
next = [0, 0, 0, 0, 1, 2, 0]
  • -1 版 next
next = [-1, 0, 0, 0, 0, 1, 2]
区别在于 next[0]=-1,失配直接回到开头。

区别在匹配时的体现:

  • 普通 next
while (j > 0 && text[i] != pattern[j]) j = next[j-1];
if (text[i] == pattern[j]) j++;
  • -1版 next
while (j != -1 && text[i] != pattern[j]) j = next[j];
j++;
分类: DS-Algo 标签: C++

评论

暂无评论数据

暂无评论数据

目录