Loading page…
Rust walkthroughs
Loading page…
regex::Captures::get and name for accessing matched groups by index vs name?Captures::get retrieves matched groups by numeric index (starting at 1 for the first capture group), returning an Option<Match> that contains the matched substring's byte offsets. Captures::name retrieves groups by their named identifier, returning the same Option<Match> type but using the symbolic name assigned in the pattern with (?P<name>...) syntax. Both methods return None if the group exists but didn't participate in the match (optional groups) or if the index/name doesn't exist, but name provides semantic clarity and maintains correctness when pattern modifications reorder groups.
use regex::Regex;
fn main() {
let re = Regex::new(r"(\d{4})-(\d{2})-(\d{2})").unwrap();
let text = "Date: 2024-01-15";
if let Some(caps) = re.captures(text) {
// Group 0 is the entire match
let full_match = caps.get(0).unwrap();
println!("Full match: {}", full_match.as_str()); // "2024-01-15"
// Groups 1, 2, 3 are capture groups in order
let year = caps.get(1).unwrap();
let month = caps.get(2).unwrap();
let day = caps.get(3).unwrap();
println!("Year: {}", year.as_str()); // "2024"
println!("Month: {}", month.as_str()); // "01"
println!("Day: {}", day.as_str()); // "15"
}
}get(0) returns the entire match; get(1), get(2), etc. return capture groups in declaration order.
use regex::Regex;
fn main() {
let re = Regex::new(r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})").unwrap();
let text = "Date: 2024-01-15";
if let Some(caps) = re.captures(text) {
// Access by name instead of index
let year = caps.name("year").unwrap();
let month = caps.name("month").unwrap();
let day = caps.name("day").unwrap();
println!("Year: {}", year.as_str()); // "2024"
println!("Month: {}", month.as_str()); // "01"
println!("Day: {}", day.as_str()); // "15"
// Named groups still accessible by index
// They are numbered in order of appearance
let year_by_idx = caps.get(1).unwrap();
assert_eq!(year.as_str(), year_by_idx.as_str());
}
}Named groups use (?P<name>pattern) syntax and are accessed with name("identifier").
use regex::{Regex, Match};
fn main() {
let re = Regex::new(r"(?P<word>\w+)").unwrap();
let text = "hello world";
if let Some(caps) = re.captures(text) {
// Both get() and name() return Option<Match>
let match_result: Option<Match> = caps.get(1);
let named_result: Option<Match> = caps.name("word");
// Match provides:
let m = named_result.unwrap();
println!("Matched string: {}", m.as_str());
// Byte offsets in the original text
println!("Start byte: {}", m.start());
println!("End byte: {}", m.end());
// Convenience methods
println!("Length: {}", m.len());
println!("Range: {:?}", m.range());
}
}Both methods return Option<Match> providing matched text and byte positions.
use regex::Regex;
fn main() {
// Optional group with ? quantifier
let re = Regex::new(r"(\w+)(?:\s+(\w+))?").unwrap();
let text1 = "hello";
let text2 = "hello world";
if let Some(caps) = re.captures(text1) {
// First group always matches
println!("First: {:?}", caps.get(1).map(|m| m.as_str())); // Some("hello")
// Second group is optional and may not participate
println!("Second: {:?}", caps.get(2)); // None (group didn't match)
}
if let Some(caps) = re.captures(text2) {
println!("First: {:?}", caps.get(1).map(|m| m.as_str())); // Some("hello")
println!("Second: {:?}", caps.get(2).map(|m| m.as_str())); // Some("world")
}
}None means the group exists but didn't participate in this specific match.
use regex::Regex;
fn main() {
// PROBLEM: Adding/removing groups changes indices
// Original pattern:
let re_v1 = Regex::new(r"(\w+)@(\w+)\.(\w+)").unwrap();
// Group 1: user, Group 2: domain, Group 3: tld
// Modified pattern - added a new group:
let re_v2 = Regex::new(r"(\w+)@(\w+)\.(\w+)(?:/(\w+))?").unwrap();
// Group 1: user, Group 2: domain, Group 3: tld, Group 4: path
// Indices are now different!
// Using named groups solves this:
let re_named = Regex::new(
r"(?P<user>\w+)@(?P<domain>\w+)\.(?P<tld>\w+)(?:/(?P<path>\w+))?"
).unwrap();
// Access is by name, not index
// Adding new groups doesn't affect existing code
let text = "user@example.com/docs";
if let Some(caps) = re_named.captures(text) {
// These names remain stable even if pattern changes
println!("User: {:?}", caps.name("user").map(|m| m.as_str()));
println!("Domain: {:?}", caps.name("domain").map(|m| m.as_str()));
println!("TLD: {:?}", caps.name("tld").map(|m| m.as_str()));
println!("Path: {:?}", caps.name("path").map(|m| m.as_str()));
}
}Named groups insulate code from pattern changes that would break index-based access.
use regex::Regex;
fn main() {
// Named and unnamed groups can coexist
let re = Regex::new(
r"(\w+)@(?P<domain>\w+)\.(\w+)" // Mixed groups
).unwrap();
// Group numbering:
// Group 0: entire match
// Group 1: first unnamed group (\w+)
// Group 2: named group "domain"
// Group 3: second unnamed group (\w+)
let text = "user@example.com";
if let Some(caps) = re.captures(text) {
// Unnamed groups: only accessible by index
println!("User: {:?}", caps.get(1).map(|m| m.as_str())); // "user"
println!("TLD: {:?}", caps.get(3).map(|m| m.as_str())); // "com"
// Named group: accessible by both name and index
println!("Domain: {:?}", caps.name("domain").map(|m| m.as_str())); // "example"
println!("Domain (by index): {:?}", caps.get(2).map(|m| m.as_str())); // "example"
}
}Named groups are still numbered and accessible by index; unnamed groups are only accessible by index.
use regex::Regex;
fn main() {
let re = Regex::new(
r"(?P<protocol>https?)://(?P<host>[^/]+)(?P<path>/.*)?"
).unwrap();
let text = "https://example.com/path/to/page";
if let Some(caps) = re.captures(text) {
// Both get() and name() return Option<Match>
// Performance difference is negligible for most use cases
// Index lookup: O(1) array access
let protocol_idx = caps.get(1);
// Name lookup: O(1) hash lookup (internally uses a hash map)
let protocol_name = caps.name("protocol");
// Both are fast; the difference is usually irrelevant
// For hot loops processing many captures:
// - Index access is slightly faster (no hash lookup)
// - But names provide maintainability benefits
// - Choose based on your priorities
assert_eq!(protocol_idx.map(|m| m.as_str()),
protocol_name.map(|m| m.as_str()));
}
}Both methods are O(1); the performance difference is negligible for most applications.
use regex::Regex;
fn main() {
let re = Regex::new(r"(\w+)").unwrap();
let text = "hello";
if let Some(caps) = re.captures(text) {
// Valid indices: 0 (full match) and 1 (capture group)
assert!(caps.get(0).is_some()); // Always exists for a match
assert!(caps.get(1).is_some()); // Exists and matched
// Invalid index: returns None
assert!(caps.get(2).is_none()); // Group doesn't exist
// Invalid name: returns None
assert!(caps.name("nonexistent").is_none());
}
// Note: There's no way to iterate all groups or names
// You must know the structure of your regex
}Out-of-bounds indices and unknown names return None without panicking.
use regex::Regex;
fn main() {
let re = Regex::new(r"(?P<key>\w+)=(?P<value>\w+)").unwrap();
let text = "setting=value";
if let Some(caps) = re.captures(text) {
// Extract positions for later use
let key_match = caps.name("key").unwrap();
let value_match = caps.name("value").unwrap();
// Byte positions in original string
println!("Key range: {}..{}", key_match.start(), key_match.end());
println!("Value range: {}..{}", value_match.start(), value_match.end());
// Useful for text manipulation
let full_text = "The setting=value pair";
let key_pos = key_match.start() + 4; // Adjust for prefix
let key_end = key_match.end() + 4;
// Or work with the original matched text
let original = key_match.as_str();
println!("Key: {}", original);
// Extract matched portion from original text
let matched_portion = &text[key_match.range()];
assert_eq!(matched_portion, key_match.as_str());
}
}Match provides byte offsets useful for text extraction and manipulation.
use regex::Regex;
fn main() {
// URL parsing with multiple named groups
let url_pattern = Regex::new(
r"(?x)
(?P<scheme>https?)://
(?P<host>[^/:]+)
(?::(?P<port>\d+))?
(?P<path>/[^?]*)?
(?:\?(?P<query>[^#]*))?
(?:#(?P<fragment>.*))?
"
).unwrap();
let text = "https://example.com:8080/path/to/page?param=value#section";
if let Some(caps) = re.captures(text) {
// Access components by name - clearer than indices
println!("Scheme: {:?}", caps.name("scheme").map(|m| m.as_str()));
println!("Host: {:?}", caps.name("host").map(|m| m.as_str()));
println!("Port: {:?}", caps.name("port").map(|m| m.as_str()));
println!("Path: {:?}", caps.name("path").map(|m| m.as_str()));
println!("Query: {:?}", caps.name("query").map(|m| m.as_str()));
println!("Fragment: {:?}", caps.name("fragment").map(|m| m.as_str()));
// Some groups may be None (optional)
let no_port = "https://example.com/path";
if let Some(caps) = re.captures(no_port) {
println!("Port (absent): {:?}", caps.name("port")); // None
}
}
}Named groups make complex patterns self-documenting and robust to modification.
use regex::Regex;
fn main() {
let re = Regex::new(r"(\w+)\s+(\w+)\s+(\w+)").unwrap();
let text = "one two three";
if let Some(caps) = re.captures(text) {
// Iterate over all groups using indices
for i in 0..=caps.len() {
if let Some(m) = caps.get(i) {
println!("Group {}: '{}'", i, m.as_str());
}
}
// Group 0: 'one two three'
// Group 1: 'one'
// Group 2: 'two'
// Group 3: 'three'
}
// caps.len() gives the number of groups + 1
// (including group 0 for the full match)
}caps.len() and indexed iteration work for both named and unnamed groups.
use regex::Regex;
fn main() {
let re = Regex::new(r"(?P<word>\w+)").unwrap();
let text = "hello";
if let Some(caps) = re.captures(text) {
// get(i): index-based access
// - i=0: full match
// - i=1, 2, 3...: capture groups in order
// - Returns None for non-existent or non-matching groups
let by_index = caps.get(1);
// name("str"): name-based access
// - Uses group name from (?P<name>...) syntax
// - Returns None for unknown names or non-matching groups
let by_name = caps.name("word");
// Both return Option<Match>
// Both have same capabilities for the Match value
// Both handle non-participating groups as None
// Key difference: access method
// - get: numeric index, breaks if pattern changes
// - name: symbolic name, stable across pattern changes
}
}| Method | Access By | Returns | Use Case |
|--------|-----------|---------|----------|
| get(0) | Index | Full match | Entire matched text |
| get(n) | Index | nth group | Ordered, simple patterns |
| name("str") | Name | Named group | Complex, evolving patterns |
Primary differences:
| Aspect | get(i) | name("str") |
|--------|----------|---------------|
| Access method | Numeric index | Symbolic name |
| Group 0 | Full match | N/A (names for capture groups only) |
| Stability | Breaks if groups reorder | Stable across pattern changes |
| Readability | Requires knowing order | Self-documenting |
| Performance | Slightly faster (array access) | Negligible difference (hash lookup) |
When to use get(i):
When to use name("str"):
Key insight: Both methods return the same Option<Match> type with identical capabilities—byte positions, substring extraction, and match information. The choice between them is about maintainability, not functionality. Indexed access with get requires tracking group positions and updating all downstream code when groups are added or reordered. Named access with name uses symbolic identifiers that remain stable regardless of pattern modifications, making it the better choice for production code. The only exception is accessing the full match, which requires get(0) since named groups only cover capture groups. For patterns with optional groups, both methods return None when the group doesn't participate in a particular match, requiring the same Option handling either way.