Loading pageā¦
Rust walkthroughs
Loading pageā¦
Regular expressions are powerful tools for pattern matching and text processing. The regex crate provides a mature, high-performance regex engine with a clean API. Unlike some languages where regex is built-in, Rust requires this external crateābut it offers excellent performance and safety guarantees.
Important concepts:
Regex::new() and reuse it for efficiencyis_match() to check if a pattern exists, find() to locate matches, captures() to extract groupsr"..." avoid escaping backslashesThe regex engine uses finite automata internally, guaranteeing linear-time matching regardless of input complexity.
# Cargo.toml
[dependencies]
regex = "1.10"use regex::Regex;
fn main() {
// Basic pattern matching
let pattern = Regex::new(r"\d{3}-\d{3}-\d{4}").unwrap();
let text = "Call me at 555-123-4567 or 800-999-0000";
// Check if pattern exists
if pattern.is_match(text) {
println!("Found a phone number!");
}
// Find all matches
for match_obj in pattern.find_iter(text) {
println!("Phone: {}", match_obj.as_str());
}
// Capture groups
let email_pattern = Regex::new(r"(\w+)@(\w+)\.(\w+)").unwrap();
let email = "user@example.com";
if let Some(captures) = email_pattern.captures(email) {
println!("Full match: {}", &captures[0]); // user@example.com
println!("Username: {}", &captures[1]); // user
println!("Domain: {}", &captures[2]); // example
println!("TLD: {}", &captures[3]); // com
}
// Named capture groups
let named_pattern = Regex::new(r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})").unwrap();
let date = "2024-03-15";
if let Some(caps) = named_pattern.captures(date) {
println!("Year: {}", &caps["year"]);
println!("Month: {}", &caps["month"]);
println!("Day: {}", &caps["day"]);
}
// Replace matches
let censored = pattern.replace_all(text, "XXX-XXX-XXXX");
println!("Censored: {}", censored);
// Replace with captures
let swap_pattern = Regex::new(r"(\w+)\s+(\w+)").unwrap();
let swapped = swap_pattern.replace("hello world", "$2 $1");
println!("Swapped: {}", swapped); // world hello
// Split text by pattern
let split_pattern = Regex::new(r"\s+").unwrap();
let parts: Vec<&str> = split_pattern.split("one two\tthree\nfour").collect();
println!("Parts: {:?}", parts); // ["one", "two", "three", "four"]
}use regex::Regex;
struct LogEntry {
timestamp: String,
level: String,
message: String,
}
fn parse_log(line: &str) -> Option<LogEntry> {
// Pattern: 2024-03-15 10:30:45 [INFO] Application started
let pattern = Regex::new(
r"(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\s+\[(?P<level>\w+)\]\s+(?P<message>.+)"
).ok()?;
pattern.captures(line).map(|caps| LogEntry {
timestamp: caps["timestamp"].to_string(),
level: caps["level"].to_string(),
message: caps["message"].to_string(),
})
}
fn main() {
let log_lines = vec![
"2024-03-15 10:30:45 [INFO] Application started",
"2024-03-15 10:30:46 [WARN] Configuration file not found",
"2024-03-15 10:30:47 [ERROR] Connection refused",
"Invalid log line",
];
for line in log_lines {
if let Some(entry) = parse_log(line) {
println!("[{}] {}: {}", entry.level, entry.timestamp, entry.message);
}
}
}Regex::new(); use raw strings r"..." to avoid double-escapingis_match() checks existence; find() returns match locations; captures() extracts groups(?P<name>...) allow accessing groups by name with caps["name"]replace_all() substitutes matches with replacement text; use $1, $2 for capture referencessplit() divides text by pattern matcheslazy_static or once_cell for global patterns)