Loading pageā¦
Rust walkthroughs
Loading pageā¦
regex::Regex::replace_all handle overlapping matches differently than iterative replacement?regex::Regex::replace_all replaces all non-overlapping matches in a single pass, meaning once a match is replaced, the replacement text is not scanned again for new matches. This differs from iterative replacement where you might loop and apply replacements repeatedly, which can create new matches from replacement text or cause infinite loops. replace_all processes the input left-to-right, replacing each match and immediately moving past it, preventing both overlapping matches and re-matching on replacement content. Understanding this behavior is crucial when replacements might create patterns that could match again, or when overlapping matches might occur.
use regex::Regex;
fn main() {
let text = "The quick brown fox jumps over the lazy dog";
let re = Regex::new(r"the").unwrap();
// replace_all replaces all non-overlapping matches
let result = re.replace_all(text, "a");
println!("{}", result);
// "a quick brown fox jumps over a lazy dog"
// Note: matches are case-sensitive, so "The" wasn't replaced
}replace_all finds all matches and replaces them in one operation.
use regex::Regex;
fn main() {
let text = "aaa";
let re = Regex::new(r"aa").unwrap();
// "aaa" contains "aa" starting at position 0 and 1
// But these overlap!
let result = re.replace_all(text, "X");
println!("Result: {}", result); // "Xa"
// What happened:
// 1. Found "aa" at position 0-2
// 2. Replaced with "X"
// 3. Continue from position 2
// 4. Only "a" remains, doesn't match "aa"
// Non-overlapping means once a match is consumed,
// it cannot be part of another match
}replace_all never considers overlapping matchesāeach character is matched at most once.
use regex::Regex;
fn main() {
let text = "abc";
let re = Regex::new(r"a").unwrap();
// replace_all: single pass
let result = re.replace_all(text, "aa");
println!("replace_all: {}", result); // "aabc"
// Iterative replacement (hypothetical loop)
fn iterative_replace(text: &str, re: &Regex, replacement: &str, iterations: usize) -> String {
let mut result = text.to_string();
for _ in 0..iterations {
result = re.replace(&result, replacement).to_string();
}
result
}
println!("1 iteration: {}", iterative_replace(text, &re, "aa", 1)); // "aabc"
println!("2 iterations: {}", iterative_replace(text, &re, "aa", 2)); // "aaaabc"
println!("3 iterations: {}", iterative_replace(text, &re, "aa", 3)); // "aaaaabc"
// Iterative replacement keeps matching the replacement text!
// replace_all avoids this by not re-scanning replacements
}replace_all doesn't re-scan replacement text, preventing infinite expansion.
use regex::Regex;
fn main() {
// Dangerous pattern: replacement contains match pattern
let text = "hello";
let re = Regex::new(r"hello").unwrap();
// Replacement contains "hello" - could loop forever!
let result = re.replace_all(text, "hello world");
println!("{}", result); // "hello world"
// replace_all stopped after one replacement
// It does NOT try to match "hello" inside "hello world"
// If we did iterative replacement:
// Iteration 1: "hello" -> "hello world"
// Iteration 2: "hello" -> "hello world world" (matched the "hello" in replacement!)
// ...infinite loop or runaway expansion
}replace_all processes replacements in a single pass without re-scanning.
use regex::Regex;
fn main() {
let text = "ababab";
let re = Regex::new(r"ab").unwrap();
let result = re.replace_all(text, "X");
println!("{}", result); // "XXX"
// Processing:
// 1. Position 0: "ab" matches -> replace with "X", continue from 2
// 2. Position 2: "ab" matches -> replace with "X", continue from 4
// 3. Position 4: "ab" matches -> replace with "X", continue from 6
// 4. Position 6: end of string
// Left-to-right, non-overlapping, each character matched once
}Matches are found and replaced strictly left-to-right.
use regex::Regex;
fn main() {
let text = "hello world";
let re = Regex::new(r"(\w+) (\w+)").unwrap();
// Capturing groups referenced with $1, $2, etc.
let result = re.replace_all(text, "$2 $1");
println!("{}", result); // "world hello"
// Multiple matches with captures
let text2 = "a=1, b=2, c=3";
let re2 = Regex::new(r"(\w)=(\d)").unwrap();
let result2 = re2.replace_all(text2, "$1:$2");
println!("{}", result2); // "a:1, b:2, c:3"
// Each match has its captures replaced independently
}Capturing groups work correctly with replace_all, applied per-match.
use regex::Regex;
fn main() {
let text = "banana";
// Pattern that could overlap: "ana"
let re = Regex::new(r"ana").unwrap();
let result = re.replace_all(text, "X");
println!("'{}' -> '{}'", text, result); // "bXna"
// What happened:
// "banana"
// |||
// "ana" found at position 1-4
// Replaced with "X", continue from position 4
// Remaining: "na" (from position 4-6)
// "na" doesn't match "ana"
// The second "ana" at position 3-6 was NOT matched
// because position 3-4 was already consumed
// If you need overlapping matches, you need a different approach
}Overlapping patterns only match where they don't conflict with earlier matches.
use regex::Regex;
fn main() {
let text = "banana";
let re = Regex::new(r"ana").unwrap();
// Find all overlapping matches manually
let mut overlapping_positions: Vec<(usize, usize)> = Vec::new();
// Method: try matching at each position
for start in 0..text.len() {
if let Some(mat) = re.find_at(text, start) {
// Only add if start matches our expected position
if mat.start() == start {
overlapping_positions.push((mat.start(), mat.end()));
}
}
}
println!("Overlapping matches:");
for (start, end) in &overlapping_positions {
println!(" {} at {}..{}", &text[*start..*end], start, end);
}
// Matches at:
// "ana" at 1..4
// "ana" at 3..6 (overlapping!)
// But replace_all only replaces the first one
// To replace all overlapping, you'd need to:
// 1. Collect all match positions first
// 2. Build result string manually (complex with overlapping)
}Overlapping matches require manual handling; replace_all doesn't support them.
use regex::Regex;
fn main() {
// Pattern that matches empty string
let text = "abc";
let re = Regex::new(r"").unwrap();
// This could cause infinite loop in naive implementation
// But replace_all handles it correctly
let result = re.replace_all(text, "X");
println!("{}", result); // "XaXbXcX"
// Empty matches are allowed but don't consume characters
// The regex engine advances by at least 1 character
// Another problematic pattern
let re2 = Regex::new(r"a*").unwrap();
let result2 = re2.replace_all("aaa", "X");
println!("{}", result2); // "XXX"
// "a*" matches "" at positions where "a" doesn't match
// But engine prevents infinite empty matches
}The regex engine prevents infinite loops from zero-width matches.
use regex::Regex;
fn main() {
let text = "hello hello hello";
let re = Regex::new(r"hello").unwrap();
// replace: only first match
let result1 = re.replace(text, "hi");
println!("replace: {}", result1); // "hi hello hello"
// replace_all: all matches
let result2 = re.replace_all(text, "hi");
println!("replace_all: {}", result2); // "hi hi hi"
// Both use same non-overlapping, non-re-scanning semantics
// Just different number of replacements
}replace and replace_all share the same matching semantics.
use regex::Regex;
fn main() {
let text = "1 2 3 4 5";
let re = Regex::new(r"\d+").unwrap();
// Use closure for dynamic replacement
let result = re.replace_all(text, |caps: ®ex::Captures| {
let num: i32 = caps[0].parse().unwrap();
(num * 2).to_string()
});
println!("{}", result); // "2 4 6 8 10"
// Each match processed independently
// No re-scanning of replacements
// The closure is called once per match
// Replacement text is inserted and not examined again
}Closures allow dynamic replacements without re-scanning.
use regex::Regex;
fn main() {
let text = "abc123def456";
// If you need multiple patterns, use alternation
let re = Regex::new(r"[a-z]+|[0-9]+").unwrap();
let result = re.replace_all(text, |caps: ®ex::Captures| {
let matched = &caps[0];
if matched.chars().all(|c| c.is_ascii_lowercase()) {
format!("[{}]", matched) // Wrap letters
} else {
format!("#{}#", matched) // Wrap numbers
}
});
println!("{}", result); // "[abc]#123#[def]#456"
// Still non-overlapping, left-to-right
// Each match replaced once
}Alternation allows multiple patterns in one replace_all call.
use regex::Regex;
fn main() {
let text = "<div>Hello & welcome!</div>";
// Escape HTML special characters
// Using replace_all with alternation
let re = Regex::new(r"[&<>]").unwrap();
let escaped = re.replace_all(text, |caps: ®ex::Captures| {
match &caps[0] {
"&" => "&",
"<" => "<",
">" => ">",
_ => unreachable!(),
}
});
println!("{}", escaped); // "<div>Hello & welcome!</div>"
// Important: we replaced "&" first
// If we used "&" as a pattern, we'd need to be careful
// Example of problematic pattern:
let text2 = "&&";
let re2 = Regex::new(r"&").unwrap();
let result2 = re2.replace_all(text2, "&");
println!("{}", result2); // "&&"
// Safe: replace_all doesn't match "&" again
}replace_all is safe for escaping operations because it doesn't re-scan.
use regex::Regex;
fn main() {
// Example where iterative replacement would fail
let text = "x = x + 1";
// Goal: replace "x" with "y" and "y" with "x" (swap)
// Naive iterative approach:
let re_x = Regex::new(r"x").unwrap();
let re_y = Regex::new(r"y").unwrap();
let step1 = re_x.replace_all(text, "y");
println!("After x->y: {}", step1); // "y = y + 1"
let step2 = re_y.replace_all(&step1, "x");
println!("After y->x: {}", step2); // "x = x + 1"
// Oops! We swapped back!
// Correct approach: use single pass with captures
// or different variable names
// Or use regex alternation with a single replace_all:
let re_swap = Regex::new(r"[xy]").unwrap();
let result = re_swap.replace_all(text, |caps: ®ex::Captures| {
match &caps[0] {
"x" => "y",
"y" => "x",
_ => unreachable!(),
}
});
println!("Swapped: {}", result); // "y = y + 1" (wait, still wrong!)
// Actually correct: use a temp variable or mapping
// The issue is conceptual - need temporary state
}Multiple replace_all calls can interfere; use captures for complex transformations.
use regex::Regex;
use std::time::Instant;
fn main() {
let text = "a".repeat(100_000);
let re = Regex::new(r"a").unwrap();
// replace_all: single pass
let start = Instant::now();
let result = re.replace_all(&text, "b");
println!("replace_all: {:?}", start.elapsed());
// replace_all is O(n) where n is text length
// It scans once, replacing as it goes
// Iterative replacement would be:
// O(n * iterations) or worse if replacements create matches
// TheCow<Cow<str>> return type allows borrowing when no changes
let text2 = "bbbb";
let result2 = re.replace_all(text2, "a");
// If no matches, returns borrowed string (zero allocation)
}replace_all is efficient: single pass, no re-scanning.
Core behavior:
replace_all replaces all non-overlapping matches in a single left-to-right passNon-overlapping semantics:
Key difference from iterative replacement:
replace_all: Single pass, replacements not re-matchedWhen this matters:
Handling overlapping matches:
replace_all doesn't support overlapping matchesfind_at with manual iteration to find overlapping positionsBest practices:
replace_all for all replacements in one passreplace_all calls for related transformationsKey insight: replace_all is designed to be safe and efficientāit won't create infinite loops from replacement text matching, and it won't double-count overlapping matches. This is exactly what you want for most text processing, but if you need overlapping match semantics, you must implement that manually. The single-pass, non-overlapping behavior makes replace_all predictable and prevents common bugs that plague iterative replacement approaches.