Boyer–Moore solves the String Matching problem: find all occurrences of a pattern P (length m) in a text T (length n) as efficiently as possible.
The Boyer–Moore algorithm uses two heuristics to skip ahead in the text:
Preprocessing builds two tables in O(m + σ) time (σ = alphabet size). Searching then runs in sublinear average time O(n/m), with worst-case O(mn) without further optimizations.
Build a last array (L) mapping each character to its rightmost index in P, or -1 if absent. On a mismatch at T[i+j], shift by j − L[T[i+j]].
For P = "EXAMPLE" (m = 7), we build L:
int[] L = new int[256];
Arrays.fill(L, -1);
All entries = −1.
| Char | Last Occurrence |
|---|---|
| All chars | −1 |
P[0] = 'E')
L['E'] = 0;
Set last(E) = 0.
| Char | Last Occurrence |
|---|---|
| E | 0 |
| Others | −1 |
P[1] = 'X')
L['X'] = 1;
Set last(X) = 1.
| Char | Last Occurrence |
|---|---|
| E | 0 |
| X | 1 |
| Others | −1 |
P[2] = 'A')
L['A'] = 2;
Set last(A) = 2.
| Char | Last Occurrence |
|---|---|
| E | 0 |
| X | 1 |
| A | 2 |
| Others | −1 |
P[3] = 'M')
L['M'] = 3;
Set last(M) = 3.
| Char | Last Occurrence |
|---|---|
| E | 0 |
| X | 1 |
| A | 2 |
| M | 3 |
| Others | −1 |
P[4] = 'P')
L['P'] = 4;
Set last(P) = 4.
| Char | Last Occurrence |
|---|---|
| E | 0 |
| X | 1 |
| A | 2 |
| M | 3 |
| P | 4 |
| Others | −1 |
P[5] = 'L')
L['L'] = 5;
Set last(L) = 5.
| Char | Last Occurrence |
|---|---|
| E | 0 |
| X | 1 |
| A | 2 |
| M | 3 |
| P | 4 |
| L | 5 |
| Others | −1 |
P[6] = 'E')
L['E'] = 6;
Overwrite last(E) = 6.
| Char | Last Occurrence |
|---|---|
| E | 6 |
| X | 1 |
| A | 2 |
| M | 3 |
| P | 4 |
| L | 5 |
| Others | −1 |
| Character | Last Occurrence |
|---|---|
| E | 6 |
| X | 1 |
| A | 2 |
| M | 3 |
| P | 4 |
| L | 5 |
| Others | −1 |
When a mismatch occurs at position j in P, let k = m−1−j. If the suffix of length k also appears elsewhere in the pattern or as a prefix, shift so that that occurrence aligns. We compute a suff array then build the gs table in two phases.
// Compute suffix lengths
tools.suffixes = function(pat) {
const m = pat.length, suff = Array(m).fill(0);
suff[m-1] = m;
let g = m - 1, f = 0;
for (let i = m - 2; i >= 0; i--) {
if (i > g && suff[i + m - 1 - f] < i - g + 1) {
suff[i] = suff[i + m - 1 - f];
} else {
if (i < g) g = i;
f = i;
while (g >= 0 && pat[g] === pat[g + m - 1 - f]) g--;
suff[i] = f - g;
}
}
return suff;
};
// Build the good-suffix shift table
tools.buildGoodSuffixTable = function(pat) {
const m = pat.length, gs = Array(m).fill(m), suff = tools.suffixes(pat);
let j = 0;
// Phase 1: prefix matches
for (let i = m - 1; i >= 0; i--) {
if (suff[i] === i + 1) {
for (; j < m - 1 - i; j++) {
if (gs[j] === m) gs[j] = m - 1 - i;
}
}
}
// Phase 2: other occurrences
for (let i = 0; i < m - 1; i++) {
gs[m - 1 - suff[i]] = m - 1 - i;
}
return gs;
};
Example for P = "ABCDABC" (m = 7):
suff and variables
Set suff = [0,0,0,0,0,0,0], then suff[6] = 7 (full match).
Initialize g = 6, f = 0.
| i | suff[i] |
|---|---|
| 0–5 | 0 |
| 6 | 7 |
suff[5]
i = 5 ≤ g (6), so set g = 5, f = 5.
Compare pat[5] (B) to pat[6] (C) → mismatch → suff[5] = f - g = 0.
| i | suff[i] |
|---|---|
| 5 | 0 |
suff[4]
i = 4 ≤ g (5), set g = 4, f = 4.
Compare pat[4] (A) to pat[6] (C) → mismatch → suff[4] = 0.
| i | suff[i] |
|---|---|
| 4 | 0 |
suff[3]
i = 3 ≤ g (4), set g = 3, f = 3.
Compare pat[3] (D) to pat[6] (C) → mismatch → suff[3] = 0.
| i | suff[i] |
|---|---|
| 3 | 0 |
suff[2]
i = 2 ≤ g (3), set g = 2, f = 2.
Compare backwards:
pat[2]=C vs pat[6]=C → match (g→1)
pat[1]=B vs pat[5]=B → match (g→0)
pat[0]=A vs pat[4]=A → match (g→-1)
→ suff[2] = f - g = 2 - (-1) = 3.
| i | suff[i] |
|---|---|
| 2 | 3 |
suff[1]
i = 1 > g (-1) and suff[1+m-1-f] = suff[5] = 0 < i-g+1 = 3 → reuse → suff[1] = 0.
| i | suff[i] |
|---|---|
| 1 | 0 |
suff[0]
i = 0 > g (-1) and suff[0+m-1-f] = suff[4] = 0 < i-g+1 = 2 → reuse → suff[0] = 0.
| i | suff[i] |
|---|---|
| 0 | 0 |
suff array
[0, 0, 3, 0, 0, 0, 7]
| i | suff[i] |
|---|---|
| 0 | 0 |
| 1 | 0 |
| 2 | 3 |
| 3 | 0 |
| 4 | 0 |
| 5 | 0 |
| 6 | 7 |
Indices where suff[i] === i + 1: i = 2 (since suff[2] = 3 = 2+1).
This sets gs[0..4) = m-1-2 = 4.
| j | gs[j] after Phase 1 |
|---|---|
| 0 | 4 |
| 1 | 4 |
| 2 | 4 |
| 3 | 4 |
| 4 | 4 |
| 5 | 7 |
| 6 | 7 |
For each i = 0..5, update gs[m-1-suff[i]] = m-1-i. Final gs:
| j | gs[j] final |
|---|---|
| 0 | 4 |
| 1 | 4 |
| 2 | 4 |
| 3 | 4 |
| 4 | 7 |
| 5 | 7 |
| 6 | 1 |
The implementations below combine both heuristics. Preprocessing builds last, suffix, and prefix tables.
During search, on a mismatch at j, we compute both the bad-character shift and good-suffix shift and advance by their maximum, ensuring we never skip a valid match and reduce unnecessary comparisons.
// Java implementation of Boyer–Moore
public List boyerMooreSearch(String T, String P) {
int n = T.length(), m = P.length();
List result = new ArrayList<>();
if (m == 0 || n < m) return result;
// 1. Build bad character table
int[] last = new int[256];
Arrays.fill(last, -1);
for (int i = 0; i < m; i++) {
last[P.charAt(i)] = i;
}
// 2. Build good suffix tables
int[] suffix = new int[m];
boolean[] prefix = new boolean[m];
buildGoodSuffix(P, suffix, prefix);
// 3. Search
int i = 0;
while (i <= n - m) {
int j = m - 1;
while (j >= 0 && P.charAt(j) == T.charAt(i + j)) {
j--;
}
if (j < 0) {
result.add(i);
i++; // full match → shift by 1 to continue
} else {
int bcShift = j - last[T.charAt(i + j)];
int gsShift = (j < m - 1) ? moveByGoodSuffix(j, m, suffix, prefix) : 1;
i += Math.max(bcShift, gsShift);
}
}
return result;
}
private void buildGoodSuffix(String P, int[] suffix, boolean[] prefix) {
int m = P.length();
Arrays.fill(suffix, -1);
Arrays.fill(prefix, false);
for (int i = 0; i < m - 1; i++) {
int j = i, k = 0;
while (j >= 0 && P.charAt(j) == P.charAt(m - 1 - k)) {
j--; k++;
suffix[k] = j + 1;
}
if (j == -1) prefix[k] = true;
}
}
private int moveByGoodSuffix(int j, int m, int[] suffix, boolean[] prefix) {
int k = m - 1 - j;
if (suffix[k] != -1) {
return j - suffix[k] + 1;
}
for (int r = j + 2; r < m; r++) {
if (prefix[m - r]) {
return r;
}
}
return m;
}
// C++ implementation of Boyer–Moore
vector boyerMooreSearch(const string &T, const string &P) {
int n = T.size(), m = P.size();
vector result;
if (m == 0 || n < m) return result;
// 1. Build bad character table
vector last(256, -1);
for (int i = 0; i < m; i++) last[(unsigned char)P[i]] = i;
// 2. Build good suffix tables
vector suffix(m, -1);
vector prefix(m, false);
for (int i = 0; i < m - 1; i++) {
int j = i, k = 0;
while (j >= 0 && P[j] == P[m - 1 - k]) {
j--; k++;
suffix[k] = j + 1;
}
if (j == -1) prefix[k] = true;
}
// 3. Search
int i = 0;
while (i <= n - m) {
int j = m - 1;
while (j >= 0 && P[j] == T[i + j]) j--;
if (j < 0) {
result.push_back(i);
i++;
} else {
int bcShift = j - last[(unsigned char)T[i + j]];
int gsShift = (j < m - 1) ? [&] {
int k = m - 1 - j;
if (suffix[k] != -1) return j - suffix[k] + 1;
for (int r = j + 2; r < m; r++)
if (prefix[m - r]) return r;
return m;
}() : 1;
i += max(bcShift, gsShift);
}
}
return result;
}
def boyer_moore_search(text: str, pattern: str) -> List[int]:
n, m = len(text), len(pattern)
result = []
if m == 0 or n < m:
return result
# 1. Build bad character table
last = {chr(c): -1 for c in range(256)}
for i, ch in enumerate(pattern):
last[ch] = i
# 2. Build good suffix tables
suffix = [-1] * m
prefix = [False] * m
for i in range(m - 1):
j, k = i, 0
while j >= 0 and pattern[j] == pattern[m - 1 - k]:
suffix[k + 1] = j
j -= 1
k += 1
if j == -1:
prefix[k] = True
# 3. Search
i = 0
while i <= n - m:
j = m - 1
while j >= 0 and pattern[j] == text[i + j]:
j -= 1
if j < 0:
result.append(i)
i += 1
else:
bc_shift = j - last.get(text[i + j], -1)
# good suffix shift
k = m - 1 - j
if suffix[k] != -1:
gs_shift = j - suffix[k] + 1
else:
gs_shift = next((r for r in range(j+2, m) if prefix[m-r]), m)
i += max(bc_shift, gs_shift)
return result
Preprocessing: O(m + σ). Search: average-case O(n/m), worst-case O(mn) (without optimizations). Space: O(σ + m).
suffix and prefix arrays constructed?