What are the trade-offs between regex::Regex::replace_all and captures_iter for complex substitution logic?
replace_all is simpler and faster for straightforward text replacement but limited to static or simple dynamic substitutions, while captures_iter provides full access to capture groups for complex conditional logic, transformations, and context-dependent substitutions at the cost of more verbose code and potential performance overhead. The choice depends on whether you need simple pattern-based replacement or sophisticated transformation logic.
The replace_all Method
use regex::Regex;
fn replace_all_basics() {
let text = "The quick brown fox jumps over the lazy dog.";
let re = Regex::new(r"\b\w{3}\b").unwrap(); // 3-letter words
// Simple static replacement
let result = re.replace_all(text, "WORD");
// "The WORD brown fox jumps over the WORD dog."
// replace_all replaces all matches with the replacement string
// The replacement can reference capture groups with $1, $2, etc.
let text2 = "hello world, hello universe";
let re2 = Regex::new(r"(\w+) (\w+)").unwrap();
// Reference captures in replacement
let result2 = re2.replace_all(text2, "$2 $1");
// "world hello, universe hello"
}replace_all handles simple replacements with capture group references using $1, $2.
Capture Group References in replace_all
use regex::Regex;
fn capture_references() {
let text = "John Smith, Jane Doe";
let re = Regex::new(r"(\w+) (\w+)").unwrap();
// $1 = first capture group, $2 = second
let result = re.replace_all(text, "$2, $1");
// "Smith, John, Doe, Jane"
// Named captures use ${name}
let re_named = Regex::new(r"(?P<first>\w+) (?P<last>\w+)").unwrap();
let result2 = re_named.replace_all(text, "${last}, ${first}");
// "Smith, John, Doe, Jane"
// Literal $ needs escaping with $
let text_with_price = "Price: $100";
let re_price = Regex::new(r"\$(\d+)").unwrap();
let result3 = re_price.replace_all(text_with_price, "$$1");
// "Price: $1" (literal $1, not capture group)
}replace_all supports $1, $2, ${name} for capture group substitution.
The captures_iter Method
use regex::Regex;
fn captures_iter_basics() {
let text = "hello world, hello universe";
let re = Regex::new(r"(\w+) (\w+)").unwrap();
// captures_iter yields Captures for each match
for caps in re.captures_iter(text) {
// Full match
println!("Full match: {}", &caps[0]);
// Capture groups by index
println!("First word: {}", &caps[1]);
println!("Second word: {}", &caps[2]);
}
// This gives you full control over what to do with each match
// You can compute replacements programmatically
}captures_iter yields Captures objects for each match, giving full programmatic access.
Complex Substitution with captures_iter
use regex::Regex;
fn complex_substitution() {
let text = "prices: $10, $20, $30, $40";
let re = Regex::new(r"\$(\d+)").unwrap();
// Goal: apply different transformations based on value
// - values under 25: add 10% markup
// - values 25 or over: add 20% markup
let mut result = String::new();
let mut last_end = 0;
for caps in re.captures_iter(text) {
// Get the match location
let full_match = caps.get(0).unwrap();
// Add text before this match
result.push_str(&text[last_end..full_match.start()]);
// Parse the captured number
let price: i32 = caps[1].parse().unwrap();
// Apply conditional logic
let adjusted = if price < 25 {
(price as f64 * 1.1) as i32
} else {
(price as f64 * 1.2) as i32
};
// Add the adjusted value
result.push_str(&format!("${}", adjusted));
last_end = full_match.end();
}
// Add remaining text
result.push_str(&text[last_end..]);
// "prices: $11, $22, $36, $48"
// Notice: $10->11 (10% markup), $20->22 (10% markup)
// $30->36 (20% markup), $40->48 (20% markup)
}captures_iter enables conditional transformations impossible with replace_all.
Comparison: When Each Shines
use regex::Regex;
fn simple_vs_complex() {
// SIMPLE REPLACEMENTS: use replace_all
let text = "foo bar baz";
let re = Regex::new(r"\b\w{3}\b").unwrap();
// Simple: all matches become same replacement
let result = re.replace_all(text, "WORD");
// "WORD WORD WORD"
// COMPLEX REPLACEMENTS: use captures_iter
let text2 = "small, medium, large, huge";
let re2 = Regex::new(r"\w+").unwrap();
// Goal: capitalize words by length
// short words (<=4): uppercase
// medium words (5-6): title case
// long words (>6): reverse
let mut result2 = String::new();
let mut last_end = 0;
for caps in re2.captures_iter(text2) {
let m = caps.get(0).unwrap();
result2.push_str(&text2[last_end..m.start()]);
let word = m.as_str();
let transformed = match word.len() {
n if n <= 4 => word.to_uppercase(),
n if n <= 6 => {
let mut chars: Vec<char> = word.chars().collect();
if let Some(c) = chars.first_mut() {
*c = c.to_uppercase().next().unwrap();
}
chars.into_iter().collect()
}
_ => word.chars().rev().collect::<String>(),
};
result2.push_str(&transformed);
last_end = m.end();
}
result2.push_str(&text2[last_end..]);
// "SMALL, Medium, LARGE, eguh"
}replace_all for simple; captures_iter when logic depends on matched content.
Using replace_all with Callbacks
use regex::Regex;
fn replace_with_callback() {
// replace_all can accept a closure for dynamic replacement
let text = "prices: $10, $20, $30";
let re = Regex::new(r"\$(\d+)").unwrap();
// The closure receives &Captures for each match
let result = re.replace_all(text, |caps: ®ex::Captures| {
let price: i32 = caps[1].parse().unwrap();
let adjusted = (price as f64 * 1.1) as i32;
format!("${}", adjusted)
});
// "prices: $11, $22, $33"
// This gives much of captures_iter's flexibility
// while still handling string building for you
}
fn advanced_callback() {
let text = "user: alice, user: bob, user: charlie";
let re = Regex::new(r"user: (\w+)").unwrap();
// Closure can maintain state or use external data
let mut user_count = 0;
let result = re.replace_all(text, |caps: ®ex::Captures| {
user_count += 1;
let name = &caps[1];
format!("#{}: {}", user_count, name)
});
// "#1: alice, #2: bob, #3: charlie"
}replace_all with a closure provides a middle groundβdynamic replacement without manual string building.
When replace_all Callbacks Are Enough
use regex::Regex;
fn callback_sufficient() {
let text = "2024-01-15, 2024-02-20, 2024-03-25";
let re = Regex::new(r"(\d{4})-(\d{2})-(\d{2})").unwrap();
// Format dates differently
let result = re.replace_all(text, |caps: ®ex::Captures| {
let year = &caps[1];
let month = &caps[2];
let day = &caps[3];
format!("{}/{}/{}", month, day, year)
});
// "01/15/2024, 02/20/2024, 03/25/2024"
// For this simple transformation, replace_all with callback works well
// No need for captures_iter's complexity
}
fn callback_with_context() {
let text = "error: 404, error: 500, error: 403";
let re = Regex::new(r"error: (\d+)").unwrap();
// Look up error messages from a map
let error_messages = std::collections::HashMap::from([
("404", "Not Found"),
("500", "Internal Server Error"),
("403", "Forbidden"),
]);
let result = re.replace_all(text, |caps: ®ex::Captures| {
let code = &caps[1];
let message = error_messages.get(code).unwrap_or(&"Unknown Error");
format!("{}: {}", code, message)
});
// "404: Not Found, 500: Internal Server Error, 403: Forbidden"
}Callbacks handle most dynamic replacement needs; use captures_iter only when necessary.
When captures_iter Is Necessary
use regex::Regex;
fn captures_iter_necessary() {
// Case 1: Replacement depends on previous/next matches
let text = "1, 2, 3, 4, 5";
let re = Regex::new(r"\d+").unwrap();
// Goal: replace each number with cumulative sum
let mut result = String::new();
let mut last_end = 0;
let mut cumulative = 0;
for caps in re.captures_iter(text) {
let m = caps.get(0).unwrap();
result.push_str(&text[last_end..m.start()]);
let num: i32 = m.as_str().parse().unwrap();
cumulative += num;
result.push_str(&cumulative.to_string());
last_end = m.end();
}
result.push_str(&text[last_end..]);
// "1, 3, 6, 10, 15" (running totals)
// This requires state across matches - can't do with replace_all
}
fn captures_iter_overlap() {
// Case 2: Handling overlapping transformations
let text = "abc123def456ghi";
let re = Regex::new(r"([a-z]+)(\d+)").unwrap();
// Goal: transform letters based on following numbers
let mut result = String::new();
let mut last_end = 0;
for caps in re.captures_iter(text) {
let m = caps.get(0).unwrap();
result.push_str(&text[last_end..m.start()]);
let letters = &caps[1];
let numbers = &caps[2];
let count: usize = numbers.parse().unwrap();
// Repeat letters by number count
for _ in 0..count {
result.push_str(letters);
}
last_end = m.end();
}
result.push_str(&text[last_end..]);
// "abcabcabcabcabcabcabcabcabcabcabcabcdefdefdefdefdefdefdefghi"
// Complex transformation with context from captures
}
fn captures_iter_multi_pass() {
// Case 3: Conditional removal based on content
let text = "keep [remove secret] keep [keep this] keep [remove hidden]";
let re = Regex::new(r"\[([^\]]+)\]").unwrap();
// Remove brackets containing "remove" but keep others
let mut result = String::new();
let mut last_end = 0;
for caps in re.captures_iter(text) {
let m = caps.get(0).unwrap();
result.push_str(&text[last_end..m.start()]);
let content = &caps[1];
if !content.contains("remove") {
result.push_str(&caps[0]);
}
last_end = m.end();
}
result.push_str(&text[last_end..]);
// "keep keep [keep this] keep "
}captures_iter is necessary when replacements depend on state, context, or complex conditions.
Performance Characteristics
use regex::Regex;
fn performance_comparison() {
// replace_all is generally faster because:
// 1. Regex engine can optimize the replacement
// 2. String allocation is done efficiently
// 3. No need for iterator overhead
// captures_iter has overhead:
// 1. Iterator object creation
// 2. Captures object allocation for each match
// 3. Manual string building
// BUT: replace_all with callback is comparable to captures_iter
// Both create Captures objects for each match
// For simple static replacements, replace_all is fastest:
let text = "foo bar baz foo bar baz";
let re = Regex::new(r"foo").unwrap();
let result = re.replace_all(text, "FOO");
// This is optimized internally
// For complex logic, captures_iter and replace_all callback are similar
// Choose based on which API is clearer for your use case
}replace_all with static replacement is fastest; with callback, similar to captures_iter.
Memory and Allocation
use regex::Regex;
fn memory_considerations() {
// replace_all with static replacement:
// - Allocates result string once
// - No intermediate allocations per match
// replace_all with callback:
// - Allocates result string
// - Allocates for each callback return value
// - Allocates Captures object (but reused)
// captures_iter:
// - You control all allocations
// - Can reuse buffers if needed
// - Full control over string building
let text = "a1 b2 c3 d4 e5";
let re = Regex::new(r"(\w)(\d)").unwrap();
// Efficient: captures_iter with capacity
let mut result = String::with_capacity(text.len());
let mut last_end = 0;
for caps in re.captures_iter(text) {
let m = caps.get(0).unwrap();
result.push_str(&text[last_end..m.start()]);
// In-place transformation
let letter = &caps[1];
let number = &caps[2].parse::<i32>().unwrap();
result.push_str(&format!("{}{}", letter, number * 2));
last_end = m.end();
}
result.push_str(&text[last_end..]);
}With captures_iter, you control allocation; replace_all handles it automatically.
Error Handling
use regex::Regex;
fn error_handling() {
// replace_all callback can panic or return String
// It cannot easily propagate errors
let text = "prices: $10, $invalid, $20";
let re = Regex::new(r"\$(\w+)").unwrap();
// This will panic on "invalid"
// let result = re.replace_all(text, |caps: ®ex::Captures| {
// let num: i32 = caps[1].parse().unwrap();
// format!("${}", num)
// });
// With captures_iter, you can handle errors properly
let mut result = String::new();
let mut last_end = 0;
let mut errors = Vec::new();
for caps in re.captures_iter(text) {
let m = caps.get(0).unwrap();
result.push_str(&text[last_end..m.start()]);
match caps[1].parse::<i32>() {
Ok(num) => result.push_str(&format!("${}", num)),
Err(e) => {
errors.push(format!("Failed to parse '{}': {}", &caps[1], e));
result.push_str(&caps[0]); // Keep original
}
}
last_end = m.end();
}
result.push_str(&text[last_end..]);
// result: "prices: $10, $invalid, $20"
// errors: ["Failed to parse 'invalid': ..."]
}captures_iter allows proper error handling; replace_all callbacks must succeed or panic.
Real-World Example: Template Engine
use regex::Regex;
use std::collections::HashMap;
struct TemplateEngine {
variables: HashMap<String, String>,
}
impl TemplateEngine {
fn new() -> Self {
Self {
variables: HashMap::new(),
}
}
fn set(&mut self, key: &str, value: &str) {
self.variables.insert(key.to_string(), value.to_string());
}
// Using replace_all with callback
fn render_callback(&self, template: &str) -> String {
let re = Regex::new(r"\{\{(\w+)\}\}").unwrap();
re.replace_all(template, |caps: ®ex::Captures| {
let var_name = &caps[1];
self.variables.get(var_name)
.cloned()
.unwrap_or_else(|| format!("{{{{missing:{}}}}}", var_name))
}).into_owned()
}
// Using captures_iter (more control)
fn render_iter(&self, template: &str) -> Result<String, String> {
let re = Regex::new(r"\{\{(\w+)\}\}").unwrap();
let mut result = String::with_capacity(template.len());
let mut last_end = 0;
let mut missing = Vec::new();
for caps in re.captures_iter(template) {
let m = caps.get(0).unwrap();
result.push_str(&template[last_end..m.start()]);
let var_name = &caps[1];
match self.variables.get(var_name) {
Some(value) => result.push_str(value),
None => {
missing.push(var_name.to_string());
result.push_str(&format!("{{{{missing:{}}}}}", var_name));
}
}
last_end = m.end();
}
result.push_str(&template[last_end..]);
if missing.is_empty() {
Ok(result)
} else {
Err(format!("Missing variables: {}", missing.join(", ")))
}
}
}
fn template_usage() {
let mut engine = TemplateEngine::new();
engine.set("name", "Alice");
engine.set("city", "Wonderland");
let template = "Hello {{name}}, welcome to {{city}}!";
// Both produce same result for this case
let result1 = engine.render_callback(template);
// "Hello Alice, welcome to Wonderland!"
let result2 = engine.render_iter(template);
// Ok("Hello Alice, welcome to Wonderland!")
}Both approaches work; captures_iter allows returning errors.
Real-World Example: Markdown Processing
use regex::Regex;
fn process_markdown() {
let markdown = "Here is **bold** and *italic* and `code` text.";
// Process different formatting with different logic
let bold_re = Regex::new(r"\*\*([^*]+)\*\*").unwrap();
let italic_re = Regex::new(r"\*([^*]+)\*").unwrap();
let code_re = Regex::new(r"`([^`]+)`").unwrap();
// With captures_iter, we have full control
fn process_formatting(text: &str) -> String {
let re = Regex::new(r"\*\*([^*]+)\*\*|\*([^*]+)\*|`([^`]+)`").unwrap();
let mut result = String::new();
let mut last_end = 0;
for caps in re.captures_iter(text) {
let m = caps.get(0).unwrap();
result.push_str(&text[last_end..m.start()]);
// Check which capture group matched
if let Some(bold) = caps.get(1) {
result.push_str(&format!("<b>{}</b>", bold.as_str()));
} else if let Some(italic) = caps.get(2) {
result.push_str(&format!("<i>{}</i>", italic.as_str()));
} else if let Some(code) = caps.get(3) {
result.push_str(&format!("<code>{}</code>", code.as_str()));
}
last_end = m.end();
}
result.push_str(&text[last_end..]);
result
}
let html = process_formatting(markdown);
// "Here is <b>bold</b> and <i>italic</i> and <code>code</code> text."
}Processing multiple patterns with different logic is clear with captures_iter.
Choosing Between Them
use regex::Regex;
fn decision_guide() {
// Use replace_all when:
// 1. Replacement is static or simple capture reference
// 2. Replacement doesn't depend on context/state
// 3. You want the simplest code
// 4. Performance matters (it's optimized)
let text = "foo bar baz";
let re = Regex::new(r"(\w+)").unwrap();
// Static replacement
let _ = re.replace_all(text, "WORD");
// Capture reference
let _ = re.replace_all(text, "[$1]");
// Use replace_all with callback when:
// 1. Replacement depends on captured content
// 2. Transformation is simple (no state needed)
// 3. You don't need error propagation
// 4. You want simpler code than captures_iter
let _ = re.replace_all(text, |caps: ®ex::Captures| {
caps[1].to_uppercase()
});
// Use captures_iter when:
// 1. Replacement depends on state across matches
// 2. You need to propagate errors
// 3. Replacement depends on surrounding context
// 4. You need fine-grained control over allocation
// 5. Logic is too complex for a callback
}Summary Table
fn summary() {
// | Aspect | replace_all (static) | replace_all (callback) | captures_iter |
// |-------------------------|----------------------|------------------------|---------------|
// | Simplicity | β
β
β
β
β
| β
β
β
β
β | β
β
βββ |
// | Performance | β
β
β
β
β
| β
β
β
ββ | β
β
β
ββ |
// | Dynamic replacement | β | β | β |
// | State across matches | β | β (via closure) | β |
// | Error propagation | β | β (panics) | β |
// | Conditional logic | β | β | β |
// | Allocation control | β | β | β |
// | Multiple capture groups | $1, $2 | caps[1], caps[2] | caps[1], caps[2] |
// Quick recommendations:
// - Simple text replacement: replace_all (static)
// - Dynamic transformation: replace_all (callback)
// - Complex logic, errors, state: captures_iter
}Synthesis
Quick reference:
use regex::Regex;
let text = "hello world";
let re = Regex::new(r"(\w+)").unwrap();
// Simple static replacement
let result = re.replace_all(text, "WORD");
// "WORD WORD"
// Capture group reference
let result = re.replace_all(text, "[$1]");
// "[hello] [world]"
// Callback (dynamic)
let result = re.replace_all(text, |caps: ®ex::Captures| {
caps[1].to_uppercase()
});
// "HELLO WORLD"
// Full control (captures_iter)
let mut result = String::new();
let mut last_end = 0;
for caps in re.captures_iter(text) {
let m = caps.get(0).unwrap();
result.push_str(&text[last_end..m.start()]);
result.push_str(&caps[1].to_uppercase());
last_end = m.end();
}
result.push_str(&text[last_end..]);
// "HELLO WORLD"Key insight: The replace_all vs captures_iter choice is about trading simplicity for control. replace_all with static replacement (re.replace_all(text, "replacement")) is the fastest and cleanest option when you just need to swap pattern matches for a constant string or capture group references like $1. When you need dynamic transformations based on matched content, replace_all with a closure (re.replace_all(text, |caps| ...)) gives you access to Captures for each match and handles all the string building for youβthis is sufficient for most conditional logic and is clearer than captures_iter. However, captures_iter becomes necessary when you need: (1) state that persists across matches (like running totals), (2) proper error handling without panicking, (3) transformations that depend on context outside the match (like surrounding text), or (4) precise control over memory allocation. The callback approach is a middle ground that handles the common case of "transform each match independently" without the verbosity of manual string building, while captures_iter is the escape hatch for genuinely complex scenarios where each replacement decision depends on more than just the current match's captures.
