How do I match patterns with regular expressions in Rust?
Walkthrough
The regex crate provides regular expression support for Rust. It offers a mature, well-tested regex engine with both compiled and one-shot matching APIs. The syntax is similar to Perl-compatible regex (PCRE) but with some differences. Rust's regex engine prioritizes safety and predictability—no backtracking bombs, bounded execution time.
Key concepts:
Regex::new()— compile a pattern for reuseis_match()— check if pattern matches anywhere in textfind()— locate match positionscaptures()— extract matched groups- **
replace()/replace_all()— substitution - **
split()/splitn()— tokenize by pattern
Regex patterns are compiled once and can be reused efficiently for multiple searches.
Code Example
# Cargo.toml
[dependencies]
regex = "1"use regex::Regex;
fn main() {
// Compile a pattern
let re = Regex::new(r"\d{3}-\d{3}-\d{4}").unwrap();
let text = "Call me at 555-123-4567 or 555-987-6543";
// Check if pattern matches
if re.is_match(text) {
println!("Found a phone number!");
}
// Find all matches
for mat in re.find_iter(text) {
println!("Phone: {}", mat.as_str());
}
}Basic Matching
use regex::Regex;
fn main() {
// Literal matching
let re = Regex::new(r"hello").unwrap();
println!("Match 'hello': {}", re.is_match("hello world"));
println!("Match 'hello': {}", re.is_match("hi there"));
// Case-insensitive matching
let re = Regex::new(r"(?i)hello").unwrap();
println!("Case-insensitive: {}", re.is_match("HELLO WORLD"));
// Character classes
let re = Regex::new(r"[aeiou]").unwrap();
println!("Has vowel: {}", re.is_match("xyz"));
println!("Has vowel: {}", re.is_match("cat"));
// Negated character class
let re = Regex::new(r"[^0-9]").unwrap();
println!("Has non-digit: {}", re.is_match("123"));
println!("Has non-digit: {}", re.is_match("12a3"));
// Digit shortcut
let re = Regex::new(r"\d+").unwrap();
println!("Has digits: {}", re.is_match("abc123def"));
// Word character shortcut
let re = Regex::new(r"\w+").unwrap();
if let Some(m) = re.find("hello world") {
println!("First word: {}", m.as_str());
}
}Finding Matches and Positions
use regex::Regex;
fn main() {
let text = "The quick brown fox jumps over the lazy dog";
// Find first match
let re = Regex::new(r"\b\w{4}\b").unwrap(); // 4-letter words
if let Some(mat) = re.find(text) {
println!("First 4-letter word: '{}' at {}..{}",
mat.as_str(), mat.start(), mat.end());
}
// Find all matches
println!("\nAll 4-letter words:");
for mat in re.find_iter(text) {
println!(" '{}' at {}..{}", mat.as_str(), mat.start(), mat.end());
}
// Find all words
let re = Regex::new(r"\b\w+\b").unwrap();
let words: Vec<&str> = re.find_iter(text)
.map(|m| m.as_str())
.collect();
println!("\nWords: {:?}", words);
// Find email addresses
let text = "Contact: alice@example.com and bob@test.org";
let re = Regex::new(r"\b[\w.+-]+@[\w.-]+\.[a-z]{2,}\b").unwrap();
println!("\nEmails:");
for mat in re.find_iter(text) {
println!(" {}", mat.as_str());
}
}Capture Groups
use regex::Regex;
fn main() {
// Named capture groups
let re = Regex::new(r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})").unwrap();
let text = "Date: 2024-03-15";
if let Some(caps) = re.captures(text) {
println!("Full match: {}", &caps[0]);
println!("Year: {}", &caps["year"]);
println!("Month: {}", &caps["month"]);
println!("Day: {}", &caps["day"]);
}
// Numbered capture groups
let re = Regex::new(r"(\w+)@(\w+)\.(\w+)").unwrap();
let text = "Email: user@example.com";
if let Some(caps) = re.captures(text) {
println!("\nFull match: {}", &caps[0]);
println!("User: {}", &caps[1]);
println!("Domain: {}", &caps[2]);
println!("TLD: {}", &caps[3]);
}
// Optional groups
let re = Regex::new(r"(\d+)(?:\.(\d+))?").unwrap();
for text in &["42", "3.14", "100.0"] {
if let Some(caps) = re.captures(text) {
let integer = &caps[1];
let decimal = caps.get(2).map(|m| m.as_str()).unwrap_or("0");
println!("Number: {} -> integer={}, decimal={}", text, integer, decimal);
}
}
}Iterating Over Captures
use regex::Regex;
fn main() {
let text = "Name: Alice, Age: 30; Name: Bob, Age: 25";
// Capture all name-age pairs
let re = Regex::new(r"Name: (?P<name>\w+), Age: (?P<age>\d+)").unwrap();
for caps in re.captures_iter(text) {
println!("{} is {} years old", &caps["name"], &caps["age"]);
}
// Extract all key-value pairs
let text = "key1=value1; key2=value2; key3=value3";
let re = Regex::new(r"(\w+)=(\w+)").unwrap();
let pairs: Vec<(&str, &str)> = re.captures_iter(text)
.map(|caps| (caps[1].as_str(), caps[2].as_str()))
.collect();
println!("\nKey-value pairs: {:?}", pairs);
// Parse URLs
let text = "https://example.com/path and http://test.org:8080/api";
let re = Regex::new(
r"(?P<scheme>https?)://(?P<host>[^/:]+)(?::(?P<port>\d+))?(?P<path>/[^\s]*)?"
).unwrap();
println!("\nURLs:");
for caps in re.captures_iter(text) {
let scheme = &caps["scheme"];
let host = &caps["host"];
let port = caps.name("port").map(|m| m.as_str()).unwrap_or("80");
let path = caps.name("path").map(|m| m.as_str()).unwrap_or("/");
println!(" {}://{}:{}{}", scheme, host, port, path);
}
}Replacement
use regex::Regex;
fn main() {
// Simple replacement
let re = Regex::new(r"foo").unwrap();
let result = re.replace("foo bar foo baz", "qux");
println!("Replace first: {}", result); // qux bar foo baz
// Replace all
let result = re.replace_all("foo bar foo baz", "qux");
println!("Replace all: {}", result); // qux bar qux baz
// Replace with captured groups
let re = Regex::new(r"(\w+)@(\w+)\.com").unwrap();
let text = "Contact alice@example.com and bob@test.org";
let result = re.replace_all(text, "[EMAIL:$1@$2.com]");
println!("\nReplace with captures: {}", result);
// Replace with named groups
let re = Regex::new(r"(?P<first>\w+) (?P<last>\w+)").unwrap();
let text = "John Doe and Jane Smith";
let result = re.replace_all(text, "$last, $first");
println!("Name swap: {}", result); // Doe, John and Smith, Jane
// Replace with closure
let re = Regex::new(r"\d+").unwrap();
let text = "Values: 10, 20, 30";
let result = re.replace_all(text, |caps: ®ex::Captures| {
let num: i32 = caps[0].parse().unwrap();
(num * 2).to_string()
});
println!("Double values: {}", result); // Values: 20, 40, 60
// Case conversion
let re = Regex::new(r"\b\w").unwrap();
let text = "hello world";
let result = re.replace_all(text, |caps: ®ex::Captures| {
caps[0].to_uppercase()
});
println!("Title case: {}", result); // Hello World
}Splitting
use regex::Regex;
fn main() {
// Split by pattern
let re = Regex::new(r"\s+").unwrap();
let parts: Vec<&str> = re.split("hello world\t\tfrom\nrust").collect();
println!("Split by whitespace: {:?}", parts);
// Split with limit
let re = Regex::new(r"\s+").unwrap();
let parts: Vec<&str> = re.splitn("a b c d e", 3).collect();
println!("Split with limit: {:?}", parts); // ["a", "b", "c d e"]
// Split by multiple delimiters
let re = Regex::new(r"[,;|]").unwrap();
let parts: Vec<&str> = re.split("a,b;c|d,e;f").collect();
println!("Split by delimiters: {:?}", parts);
// Parse CSV-like data
let re = Regex::new(r",\s*").unwrap();
let line = "name, age, city, country";
let fields: Vec<&str> = re.split(line).collect();
println!("CSV fields: {:?}", fields);
// Extract words
let re = Regex::new(r"\W+").unwrap(); // Non-word characters
let words: Vec<&str> = re.split("Hello, world! How are you?").collect();
println!("Words: {:?}", words);
}Common Patterns
use regex::Regex;
fn main() {
// Email validation
let email_re = Regex::new(
r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
).unwrap();
for email in &["user@example.com", "invalid", "test@org"] {
println!("Email '{}' valid: {}", email, email_re.is_match(email));
}
// Phone number (US format)
let phone_re = Regex::new(
r"^\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}$"
).unwrap();
for phone in &["555-123-4567", "(555) 123 4567", "5551234567", "invalid"] {
println!("Phone '{}' valid: {}", phone, phone_re.is_match(phone));
}
// IP Address
let ip_re = Regex::new(
r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$"
).unwrap();
for ip in &["192.168.1.1", "10.0.0.1", "invalid", "256.1.1.1"] {
println!("IP '{}' valid: {}", ip, ip_re.is_match(ip));
}
// URL
let url_re = Regex::new(
r"^https?://[\w.-]+(?:/[\w./-]*)?(?:\?[\w=&%-]*)?(?:#[\w-]+)?$"
).unwrap();
for url in &[
"https://example.com",
"http://test.org/path?query=1",
"invalid",
] {
println!("URL '{}' valid: {}", url, url_re.is_match(url));
}
// Username (alphanumeric + underscore, 3-20 chars)
let username_re = Regex::new(r"^[a-zA-Z0-9_]{3,20}$").unwrap();
for name in &["alice", "bob_123", "ab", "this_is_too_long_username"] {
println!("Username '{}' valid: {}", name, username_re.is_match(name));
}
// Hex color
let hex_re = Regex::new(r"^#[0-9a-fA-F]{6}$").unwrap();
for color in &["#FF0000", "#00ff00", "#gggggg", "red"] {
println!("Hex color '{}' valid: {}", color, hex_re.is_match(color));
}
}Anchors and Boundaries
use regex::Regex;
fn main() {
// Start anchor ^
let re = Regex::new(r"^Hello").unwrap();
println!("Starts with Hello: {}", re.is_match("Hello World"));
println!("Starts with Hello: {}", re.is_match("Say Hello"));
// End anchor $
let re = Regex::new(r"World$").unwrap();
println!("Ends with World: {}", re.is_match("Hello World"));
println!("Ends with World: {}", re.is_match("World Hello"));
// Word boundary \b
let re = Regex::new(r"\bcat\b").unwrap();
println!("\nWord 'cat': {}", re.is_match("cat"));
println!("Word 'cat': {}", re.is_match("catalog"));
println!("Word 'cat': {}", re.is_match("my cat is"));
// Find whole words
let text = "The cat sat on the category mat";
let re = Regex::new(r"\b\w+\b").unwrap();
let words: Vec<&str> = re.find_iter(text)
.map(|m| m.as_str())
.collect();
println!("\nWords: {:?}", words);
// Line anchors with multiline
let text = "Hello\nWorld\nHello";
let re = Regex::new(r"(?m)^Hello").unwrap();
println!("\nLines starting with Hello:");
for mat in re.find_iter(text) {
println!(" {}", mat.as_str());
}
}Quantifiers
use regex::Regex;
fn main() {
// * : zero or more
let re = Regex::new(r"ab*c").unwrap();
for s in &["ac", "abc", "abbbc", "ab"] {
println!("'{}' matches 'ab*c': {}", s, re.is_match(s));
}
// + : one or more
let re = Regex::new(r"ab+c").unwrap();
println!("\n'ac' matches 'ab+c': {}", re.is_match("ac"));
println!("'abc' matches 'ab+c': {}", re.is_match("abc"));
// ? : zero or one
let re = Regex::new(r"colou?r").unwrap();
println!("\n'color' matches: {}", re.is_match("color"));
println!("'colour' matches: {}", re.is_match("colour"));
// {n} : exactly n
let re = Regex::new(r"\d{4}").unwrap();
println!("\n'1234' matches 4 digits: {}", re.is_match("1234"));
println!("'123' matches 4 digits: {}", re.is_match("123"));
// {n,m} : between n and m
let re = Regex::new(r"\d{2,4}").unwrap();
for s in &["1", "12", "123", "1234", "12345"] {
if let Some(m) = re.find(s) {
println!("'{}' found 2-4 digits: '{}'", s, m.as_str());
}
}
// Non-greedy with ?
let text = "<div>content</div><div>more</div>";
let greedy = Regex::new(r"<div>.*</div>").unwrap();
let non_greedy = Regex::new(r"<div>.*?</div>").unwrap();
println!("\nGreedy match: {}", greedy.find(text).unwrap().as_str());
println!("Non-greedy match: {}", non_greedy.find(text).unwrap().as_str());
}Character Classes
use regex::Regex;
fn main() {
// Predefined classes
let digit = Regex::new(r"\d").unwrap(); // [0-9]
let word = Regex::new(r"\w").unwrap(); // [a-zA-Z0-9_]
let space = Regex::new(r"\s").unwrap(); // Whitespace
println!("Digit: {}", digit.is_match("5"));
println!("Word char: {}", word.is_match("_"));
println!("Whitespace: {}", space.is_match("\t"));
// Negated classes
let non_digit = Regex::new(r"\D").unwrap(); // [^0-9]
let non_word = Regex::new(r"\W").unwrap(); // [^a-zA-Z0-9_]
let non_space = Regex::new(r"\S").unwrap(); // Non-whitespace
println!("\nNon-digit: {}", non_digit.is_match("a"));
println!("Non-word: {}", non_word.is_match("!"));
println!("Non-space: {}", non_space.is_match("a"));
// Custom character class
let vowel = Regex::new(r"[aeiou]").unwrap();
let hex = Regex::new(r"[0-9a-fA-F]").unwrap();
println!("\nVowel: {}", vowel.is_match("e"));
println!("Hex: {}", hex.is_match("F"));
// Negated custom class
let non_vowel = Regex::new(r"[^aeiou]").unwrap();
println!("Non-vowel: {}", non_vowel.is_match("x"));
// Ranges
let lowercase = Regex::new(r"[a-z]+").unwrap();
let alphanum = Regex::new(r"[a-zA-Z0-9]+").unwrap();
println!("\nLowercase match: {:?}", lowercase.find("Hello"));
println!("Alphanum match: {:?}", alphanum.find("Hello123"));
}Lookahead and Lookbehind
use regex::Regex;
fn main() {
// Positive lookahead (?=...)
// Match word followed by "world"
let re = Regex::new(r"\w+(?= world)").unwrap();
if let Some(m) = re.find("Hello world") {
println!("Word before 'world': {}", m.as_str());
}
// Negative lookahead (?!...)
// Match "foo" not followed by "bar"
let re = Regex::new(r"foo(?!bar)").unwrap();
for text in &["foobar", "foobaz", "foo"] {
if let Some(m) = re.find(text) {
println!("'{}' matched in '{}'", m.as_str(), text);
}
}
// Positive lookbehind (?<=...)
// Match digits preceded by "$"
let re = Regex::new(r"(?<=\$)\d+").unwrap();
let text = "Price: $100, Tax: $20";
println!("\nAmounts:");
for m in re.find_iter(text) {
println!(" {}", m.as_str());
}
// Negative lookbehind (?<!...)
// Match "bar" not preceded by "foo"
let re = Regex::new(r"(?<!foo)bar").unwrap();
for text in &["foobar", "bazbar", "bar"] {
if let Some(m) = re.find(text) {
println!("'{}' matched in '{}'", m.as_str(), text);
}
}
}Performance Tips
use regex::Regex;
use std::time::Instant;
fn main() {
// Compile once, use many times
let re = Regex::new(r"\d{4}-\d{2}-\d{2}").unwrap();
let texts: Vec<&str> = (0..1000)
.map(|i| format!("Date {} is 2024-03-{}", i, (i % 28) + 1).leak())
.collect();
// Efficient: reuse compiled regex
let start = Instant::now();
let count = texts.iter().filter(|t| re.is_match(t)).count();
println!("Matches: {}, Time: {:?}", count, start.elapsed());
// Use bytes! for byte strings
let re = Regex::new(r"hello").unwrap();
let bytes = b"hello world";
if re.is_match(bytes) {
println!("\nMatch in bytes!");
}
// Use RegexSet for multiple patterns
let set = regex::RegexSet::new([
r"\bfoo\b",
r"\bbar\b",
r"\bbaz\b",
]).unwrap();
let text = "foo and bar";
let matches: Vec<_> = set.matches(text).into_iter().collect();
println!("\nPatterns matched: {:?}", matches); // [0, 1]
}Summary
- Use
Regex::new(pattern)to compile a regex (returnsResult) - Use raw strings
r"..."to avoid escaping backslashes is_match(text)returns true if pattern matches anywherefind(text)returns first match positionfind_iter(text)iterates over all matchescaptures(text)extracts capture groupscaptures_iter(text)iterates over all captures- Named groups:
(?P<name>...)accessed withcaps["name"] - Numbered groups:
(...)accessed withcaps[1],caps[2], etc. replace(text, replacement)replaces first matchreplace_all(text, replacement)replaces all matches- Use
$1,$2or$namein replacement strings for captured groups split(text)splits by patternsplitn(text, n)splits with limit- Character classes:
\d,\w,\s,\D,\W,\S - Anchors:
^start,$end,\bword boundary - Quantifiers:
*,+,?,{n},{n,},{n,m} - Non-greedy:
*?,+?,?? - Case-insensitive:
(?i)flag - Multiline:
(?m)flag for^and$to match line boundaries - Compile patterns once and reuse for performance
- Use
RegexSetto match multiple patterns efficiently
