Loading page…
Rust walkthroughs
Loading page…
regex::Regex::captures_iter enable iterating over all non-overlapping matches with capture groups?regex::Regex::captures_iter returns an iterator that yields Captures values for each non-overlapping match in a string, providing access to both the full match and all named or numbered capture groups within each match. Each Captures value contains the match locations for the entire pattern and each capturing group, accessible by index (get(i)) or name (name("group")). The iterator processes matches lazily—finding each match only when the iterator advances—making it efficient for large texts. Non-overlapping means that after finding a match, the next search starts at the end of the current match, so patterns like a.a matching aba on text ababa will find only one match (aba), not two (aba and aba overlapping).
use regex::Regex;
fn main() {
let re = Regex::new(r"(\d{4})-(\d{2})-(\d{2})").unwrap();
let text = "Dates: 2024-01-15, 2023-12-25, 2022-07-04";
// Iterate over all matches with their capture groups
for caps in re.captures_iter(text) {
// Full match
let full_match = caps.get(0).unwrap().as_str();
// Capture groups by index (1-indexed)
let year = caps.get(1).unwrap().as_str();
let month = caps.get(2).unwrap().as_str();
let day = caps.get(3).unwrap().as_str();
println!("Full: {}, Year: {}, Month: {}, Day: {}",
full_match, year, month, day);
}
}captures_iter yields Captures values, each containing the full match and all captured groups.
use regex::{Regex, Captures};
fn main() {
let re = Regex::new(r"(\w+)=(\d+)").unwrap();
let text = "a=1, b=2, c=3";
for caps in re.captures_iter(text) {
// caps[0] or caps.get(0): the entire match
// caps[1] or caps.get(1): first capture group
// caps[2] or caps.get(2): second capture group
// etc.
// Index syntax returns &str, panics if missing
let full = &caps[0];
let key = &caps[1];
let value = &caps[2];
println!("{}: key={}, value={}", full, key, value);
}
}Captures implements Index<usize> for direct string access, or use get() for Option<Match>.
use regex::Regex;
fn main() {
// Pattern that could overlap
let re = Regex::new(r"aba").unwrap();
let text = "ababa";
// Non-overlapping: finds only one match
let matches: Vec<&str> = re.captures_iter(text)
.map(|caps| caps.get(0).unwrap().as_str())
.collect();
println!("{:?}", matches); // ["aba"]
// After matching "aba" at position 0-3, search resumes at position 3
// "ba" remains, no match
// Compare with overlapping search (manual implementation):
let overlapping: Vec<&str> = (0..text.len())
.filter_map(|start| re.captures(&text[start..]))
.map(|caps| caps.get(0).unwrap().as_str())
.collect();
println!("{:?}", overlapping); // Would find two "aba" if implemented correctly
}Non-overlapping means the next search starts after the previous match ends.
use regex::Regex;
fn main() {
let re = Regex::new(r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})").unwrap();
let text = "2024-01-15, 2023-12-25";
for caps in re.captures_iter(text) {
// Access by name
let year = caps.name("year").unwrap().as_str();
let month = caps.name("month").unwrap().as_str();
let day = caps.name("day").unwrap().as_str();
// Can still access by index
let year_idx = caps.get(1).unwrap().as_str();
println!("{}-{}-{}", year, month, day);
}
// Index syntax with names requires Regex::Captures extraction
// &caps["year"] works directly
for caps in re.captures_iter(text) {
let year = &caps["year"];
let month = &caps["month"];
println!("{}-{}", year, month);
}
}Named groups are accessed with name() returning Option<Match> or index syntax &caps["name"].
use regex::Regex;
fn main() {
// Optional group: (?:...)? makes the entire group optional
// But for capturing, we need (....)?
let re = Regex::new(r"(\w+)(?:\s+(\d+))?").unwrap();
let text = "hello 123, world";
for caps in re.captures_iter(text) {
let word = &caps[1]; // Always present
// Optional group - use get() and check for None
let number = caps.get(2).map(|m| m.as_str());
println!("word: {}, number: {:?}", word, number);
}
// Output:
// word: hello, number: Some("123")
// word: world, number: None
}Optional capture groups return None from get() when not matched.
use regex::Regex;
fn main() {
let re = Regex::new(r"(\w+)@(\w+)").unwrap();
let text = "Email: user@domain, admin@site";
for caps in re.captures_iter(text) {
// Match provides start() and end() positions
let full_match = caps.get(0).unwrap();
println!("Full match: '{}' at {}-{}",
full_match.as_str(),
full_match.start(),
full_match.end());
let local = caps.get(1).unwrap();
println!(" Local: '{}' at {}-{}",
local.as_str(),
local.start(),
local.end());
let domain = caps.get(2).unwrap();
println!(" Domain: '{}' at {}-{}",
domain.as_str(),
domain.start(),
domain.end());
}
}Each Match value contains the matched text and its byte positions in the original string.
use regex::Regex;
fn main() {
let re = Regex::new(r"\b\w+\b").unwrap();
let text = "one two three four five";
// Create iterator - no matching happens yet
let iter = re.captures_iter(text);
// First match found when iter.next() is called
let mut iter = iter;
if let Some(caps) = iter.next() {
println!("First word: {}", &caps[0]);
}
// Second match found when next() is called again
if let Some(caps) = iter.next() {
println!("Second word: {}", &caps[0]);
}
// Rest can be collected or processed
let remaining: Vec<&str> = iter
.map(|caps| caps.get(0).unwrap().as_str())
.collect();
println!("Remaining: {:?}", remaining);
}The iterator is lazy—matches are found only as needed.
use regex::Regex;
fn main() {
let re = Regex::new(r"(\d+)-(\d+)").unwrap();
let text = "10-20, 30-40, 50-60";
// find_iter: yields Match values (full match only)
println!("find_iter (full matches only):");
for m in re.find_iter(text) {
println!(" {}", m.as_str());
}
// captures_iter: yields Captures (full match + groups)
println!("\ncaptures_iter (with groups):");
for caps in re.captures_iter(text) {
println!(" {} + {} = {}",
&caps[1],
&caps[2],
caps[1].as_str().parse::<i32>().unwrap() +
caps[2].as_str().parse::<i32>().unwrap());
}
}Use find_iter when you only need the full match; use captures_iter when you need groups.
use regex::Regex;
fn main() {
let re = Regex::new(r"(\w+)=(\d+)").unwrap();
let text = "x=1, y=2, z=3";
// enumerate() works with captures_iter
for (i, caps) in re.captures_iter(text).enumerate() {
println!("Match {}: {} = {}", i, &caps[1], &caps[2]);
}
// Accessing match position in original text
for caps in re.captures_iter(text) {
let full = caps.get(0).unwrap();
let before = &text[..full.start()];
let after = &text[full.end()..];
println!("Before: '{}', Match: '{}', After: '{}'",
before, full.as_str(), after);
}
}The iterator works with standard iterator adaptors like enumerate, map, filter.
use regex::Regex;
fn main() {
// Pattern with repeated capture group structure
let re = Regex::new(r"(\w+)(?:\s+(\w+))+").unwrap();
let text = "first second third fourth";
for caps in re.captures_iter(text) {
// Only the LAST match of a repeated group is captured
// This is a regex limitation, not Rust-specific
println!("Group 1: {}", &caps[1]); // "first"
// Group 2 captures "fourth" (the last one)
// "second" and "third" are NOT captured
// To capture all, use captures_iter with different approach:
// See extract example below
}
}Repeated capture groups only capture the last match—use multiple matches instead.
use regex::Regex;
fn main() {
// Regex 1.8+ provides extract() for simpler extraction
let re = Regex::new(r"(\d{4})-(\d{2})-(\d{2})").unwrap();
let text = "2024-01-15, 2023-12-25";
for caps in re.captures_iter(text) {
// extract returns a tuple of captured strings
// This is more ergonomic for known number of groups
let (_, year, month, day) = caps.extract::<(&&str, &str, &str, &str)>();
// Note: First element is the full match
// Or use the simpler version
let extracted = caps.extract::<(&str, &str, &str, &str)>();
println!("{:?}", extracted);
}
}The extract method (Regex 1.8+) provides ergonomic extraction into a tuple.
use regex::Regex;
use std::fs::File;
use std::io::{BufRead, BufReader};
fn process_large_file(path: &str) -> Vec<(String, String)> {
let re = Regex::new(r"(\w+)\s*=\s*(\w+)").unwrap();
let file = File::open(path).unwrap();
let reader = BufReader::new(file);
let mut results = Vec::new();
// Process line by line - never loads entire file into memory
for line in reader.lines() {
let line = line.unwrap();
for caps in re.captures_iter(&line) {
results.push((&caps[1].to_string(), &caps[2].to_string()));
}
}
results
}
// Or use regex on the entire file content lazily
fn process_file_lazy(path: &str) -> impl Iterator<Item=(String, String)> {
let re = Regex::new(r"(\w+)\s*=\s*(\w+)").unwrap();
let content = std::fs::read_to_string(path).unwrap();
re.captures_iter(&content)
.map(move |caps| (caps[1].to_string(), caps[2].to_string()))
}The lazy iterator works well with streaming or line-by-line processing.
use regex::Regex;
fn main() {
let re = Regex::new(r"(\w+)(?:-(\w+))?(?:@(\w+))?").unwrap();
// len() returns number of capture groups + 1 (for full match)
// Index 0 is full match, indices 1..len are capture groups
let text = "hello-world@test";
for caps in re.captures_iter(text) {
// Iterate over all groups including optional ones
for i in 0..caps.len() {
match caps.get(i) {
Some(m) => println!("Group {}: '{}'", i, m.as_str()),
None => println!("Group {}: <none>", i),
}
}
// Output:
// Group 0: 'hello-world@test' (full match)
// Group 1: 'hello'
// Group 2: 'world'
// Group 3: 'test'
}
let text2 = "simple";
for caps in re.captures_iter(text2) {
for i in 0..caps.len() {
match caps.get(i) {
Some(m) => println!("Group {}: '{}'", i, m.as_str()),
None => println!("Group {}: <none>", i),
}
}
// Output:
// Group 0: 'simple'
// Group 1: 'simple'
// Group 2: <none>
// Group 3: <none>
}
}caps.len() includes the full match at index 0.
use regex::Regex;
#[derive(Debug)]
struct LogEntry {
timestamp: String,
level: String,
message: String,
}
fn parse_logs(text: &str) -> Vec<LogEntry> {
let re = Regex::new(
r"(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\s+(?P<level>INFO|WARN|ERROR)\s+(?P<message>.+)"
).unwrap();
re.captures_iter(text)
.map(|caps| LogEntry {
timestamp: caps["timestamp"].to_string(),
level: caps["level"].to_string(),
message: caps["message"].to_string(),
})
.collect()
}
fn main() {
let logs = r#"2024-01-15 10:30:00 INFO Starting service
2024-01-15 10:30:01 ERROR Connection failed
2024-01-15 10:30:02 WARN Retrying..."#;
let entries = parse_logs(logs);
for entry in entries {
println!("{:?}", entry);
}
}captures_iter naturally maps to structured data extraction.
use regex::Regex;
fn main() {
let re = Regex::new(r"(\w+)").unwrap();
// The iterator borrows the regex and text
let text = String::from("hello world");
let iter = re.captures_iter(&text);
// Captures borrow the text, not the regex
let results: Vec<String> = iter
.map(|caps| caps[1].to_string())
.collect();
// results owns its data, no longer borrowing text
// Can also get references with lifetimes
fn extract_words<'a>(re: &Regex, text: &'a str) -> Vec<&'a str> {
re.captures_iter(text)
.map(|caps| caps.get(1).unwrap().as_str())
.collect()
}
}Captures values borrow the source text, allowing zero-copy extraction.
use regex::{Regex, RegexSet};
fn main() {
// For multiple patterns, use RegexSet for matching,
// then individual Regex for capturing
let patterns = RegexSet::new(&[
r"(\d{4})-(\d{2})-(\d{2})", // date
r"(\w+)@(\w+\.\w+)", // email
]).unwrap();
let date_re = Regex::new(r"(\d{4})-(\d{2})-(\d{2})").unwrap();
let email_re = Regex::new(r"(\w+)@(\w+\.\w+)").unwrap();
let text = "Date: 2024-01-15, Email: user@example.com";
let matches: Vec<_> = patterns.matches(text).into_iter().collect();
// Then use captures_iter with the matching pattern
for pattern_idx in matches {
match pattern_idx {
0 => {
for caps in date_re.captures_iter(text) {
println!("Date: {}-{}-{}", &caps[1], &caps[2], &caps[3]);
}
}
1 => {
for caps in email_re.captures_iter(text) {
println!("Email: {}@{}", &caps[1], &caps[2]);
}
}
_ => {}
}
}
}For multiple patterns, use RegexSet to find which patterns match, then use the corresponding Regex for captures.
use regex::Regex;
fn main() {
let re = Regex::new(r"(\w+)=(\d+)").unwrap();
let text = "x=10, y=20, z=30";
// capture_replace combines captures with replacement
let result = re.replace_all(text, |caps: ®ex::Captures| {
let key = &caps[1];
let value: i32 = caps[2].parse().unwrap();
format!("{}={}", key, value * 2)
});
println!("{}", result); // "x=20, y=40, z=60"
// Alternative: manual iteration with replacement positions
let mut results: Vec<(usize, usize, String)> = Vec::new();
for caps in re.captures_iter(text) {
let m = caps.get(0).unwrap();
let key = &caps[1];
let value: i32 = caps[2].parse().unwrap();
results.push((m.start(), m.end(), format!("{}={}", key, value * 2)));
}
}replace_all with a closure receives Captures for custom replacement logic.
use regex::Regex;
fn main() {
// Nested parentheses matching (limited - can't match arbitrary depth)
let re = Regex::new(r"\(([^()]+)\)").unwrap();
let text = "a (b (c) d) e (f) g";
// First pass
for caps in re.captures_iter(text) {
println!("Innermost: {}", &caps[1]);
}
// Output: "c" and "f" - innermost parentheses
// For nested structures, use multiple passes or a parser
// Regex alone cannot handle arbitrary nesting depth
}Regular expressions cannot match arbitrarily nested structures; use a parser for those cases.
use regex::Regex;
use std::collections::HashMap;
fn parse_headers(text: &str) -> HashMap<String, String> {
let re = Regex::new(r"(?P<name>[^:]+):\s*(?P<value>.+)").unwrap();
re.captures_iter(text)
.filter_map(|caps| {
let name = caps.name("name")?.as_str().trim().to_string();
let value = caps.name("value")?.as_str().trim().to_string();
Some((name, value))
})
.collect()
}
fn main() {
let headers = "Content-Type: application/json\nContent-Length: 42\nAuthorization: Bearer token123";
let parsed = parse_headers(headers);
println!("{:?}", parsed);
// {"Content-Type": "application/json", "Content-Length": "42", "Authorization": "Bearer token123"}
}HTTP header parsing with named capture groups.
use regex::Regex;
#[derive(Debug)]
struct Url {
scheme: String,
host: String,
port: Option<u16>,
path: String,
}
fn extract_urls(text: &str) -> Vec<Url> {
let re = Regex::new(
r"(?P<scheme>https?)://(?P<host>[^/:]+)(?::(?P<port>\d+))?(?P<path>/[^ \s]*)?"
).unwrap();
re.captures_iter(text)
.map(|caps| Url {
scheme: caps["scheme"].to_string(),
host: caps["host"].to_string(),
port: caps.name("port")
.and_then(|m| m.as_str().parse().ok()),
path: caps.name("path")
.map(|m| m.as_str().to_string())
.unwrap_or_else(|| "/".to_string()),
})
.collect()
}
fn main() {
let text = "Visit https://example.com:8080/path or http://localhost for more info.";
let urls = extract_urls(text);
for url in urls {
println!("{:?}", url);
}
}URL extraction with optional port and path groups.
captures_iter components:
| Component | Type | Purpose |
|-----------|------|---------|
| captures_iter method | CaptureMatches<'_, '_> | Iterator yielding Captures |
| Captures value | Captures<'_> | Contains all groups for one match |
| get(0) | Option<Match> | Full match |
| get(i) | Option<Match> | i-th capture group |
| name("n") | Option<Match> | Named capture group |
| &caps[i] | &str | Direct string access |
| caps.len() | usize | Number of groups + 1 |
| caps.extract() | Tuple | Ergonomic extraction (Regex 1.8+) |
Comparison with other methods:
| Method | Yields | Use Case |
|--------|--------|----------|
| is_match | bool | Check if pattern exists |
| find | Option<Match> | First match only |
| find_iter | Match values | All matches, full text only |
| captures | Option<Captures> | First match with groups |
| captures_iter | Captures values | All matches with groups |
Key insight: regex::Regex::captures_iter bridges pattern matching and structured data extraction by yielding Captures values that contain both the full match and each captured group's position and text. The iterator processes matches lazily and non-overlappingly—each subsequent match starts searching after the previous match ends. Named capture groups provide semantic meaning to extracted values, while optional groups return None when not present. The Captures values borrow the source text, enabling zero-copy extraction of substrings. Combined with iterator adaptors, captures_iter naturally transforms text into structured data like Vec, HashMap, or custom structs. For patterns with many groups, the extract method (Regex 1.8+) provides ergonomic tuple extraction. The non-overlapping behavior ensures predictable iteration—each character is matched at most once, which simplifies reasoning about match boundaries but requires alternative approaches for overlapping matches.