How do I match patterns with regular expressions in Rust?

Walkthrough

The regex crate provides regular expression support for Rust. It offers a mature, well-tested regex engine with both compiled and one-shot matching APIs. The syntax is similar to Perl-compatible regex (PCRE) but with some differences. Rust's regex engine prioritizes safety and predictability—no backtracking bombs, bounded execution time.

Key concepts:

  1. Regex::new() — compile a pattern for reuse
  2. is_match() — check if pattern matches anywhere in text
  3. find() — locate match positions
  4. captures() — extract matched groups
  5. **replace() / replace_all() — substitution
  6. **split() / splitn() — tokenize by pattern

Regex patterns are compiled once and can be reused efficiently for multiple searches.

Code Example

# Cargo.toml
[dependencies]
regex = "1"
use regex::Regex;
 
fn main() {
    // Compile a pattern
    let re = Regex::new(r"\d{3}-\d{3}-\d{4}").unwrap();
    
    let text = "Call me at 555-123-4567 or 555-987-6543";
    
    // Check if pattern matches
    if re.is_match(text) {
        println!("Found a phone number!");
    }
    
    // Find all matches
    for mat in re.find_iter(text) {
        println!("Phone: {}", mat.as_str());
    }
}

Basic Matching

use regex::Regex;
 
fn main() {
    // Literal matching
    let re = Regex::new(r"hello").unwrap();
    println!("Match 'hello': {}", re.is_match("hello world"));
    println!("Match 'hello': {}", re.is_match("hi there"));
    
    // Case-insensitive matching
    let re = Regex::new(r"(?i)hello").unwrap();
    println!("Case-insensitive: {}", re.is_match("HELLO WORLD"));
    
    // Character classes
    let re = Regex::new(r"[aeiou]").unwrap();
    println!("Has vowel: {}", re.is_match("xyz"));
    println!("Has vowel: {}", re.is_match("cat"));
    
    // Negated character class
    let re = Regex::new(r"[^0-9]").unwrap();
    println!("Has non-digit: {}", re.is_match("123"));
    println!("Has non-digit: {}", re.is_match("12a3"));
    
    // Digit shortcut
    let re = Regex::new(r"\d+").unwrap();
    println!("Has digits: {}", re.is_match("abc123def"));
    
    // Word character shortcut
    let re = Regex::new(r"\w+").unwrap();
    if let Some(m) = re.find("hello world") {
        println!("First word: {}", m.as_str());
    }
}

Finding Matches and Positions

use regex::Regex;
 
fn main() {
    let text = "The quick brown fox jumps over the lazy dog";
    
    // Find first match
    let re = Regex::new(r"\b\w{4}\b").unwrap();  // 4-letter words
    if let Some(mat) = re.find(text) {
        println!("First 4-letter word: '{}' at {}..{}", 
                 mat.as_str(), mat.start(), mat.end());
    }
    
    // Find all matches
    println!("\nAll 4-letter words:");
    for mat in re.find_iter(text) {
        println!("  '{}' at {}..{}", mat.as_str(), mat.start(), mat.end());
    }
    
    // Find all words
    let re = Regex::new(r"\b\w+\b").unwrap();
    let words: Vec<&str> = re.find_iter(text)
        .map(|m| m.as_str())
        .collect();
    println!("\nWords: {:?}", words);
    
    // Find email addresses
    let text = "Contact: alice@example.com and bob@test.org";
    let re = Regex::new(r"\b[\w.+-]+@[\w.-]+\.[a-z]{2,}\b").unwrap();
    println!("\nEmails:");
    for mat in re.find_iter(text) {
        println!("  {}", mat.as_str());
    }
}

Capture Groups

use regex::Regex;
 
fn main() {
    // Named capture groups
    let re = Regex::new(r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})").unwrap();
    let text = "Date: 2024-03-15";
    
    if let Some(caps) = re.captures(text) {
        println!("Full match: {}", &caps[0]);
        println!("Year: {}", &caps["year"]);
        println!("Month: {}", &caps["month"]);
        println!("Day: {}", &caps["day"]);
    }
    
    // Numbered capture groups
    let re = Regex::new(r"(\w+)@(\w+)\.(\w+)").unwrap();
    let text = "Email: user@example.com";
    
    if let Some(caps) = re.captures(text) {
        println!("\nFull match: {}", &caps[0]);
        println!("User: {}", &caps[1]);
        println!("Domain: {}", &caps[2]);
        println!("TLD: {}", &caps[3]);
    }
    
    // Optional groups
    let re = Regex::new(r"(\d+)(?:\.(\d+))?").unwrap();
    
    for text in &["42", "3.14", "100.0"] {
        if let Some(caps) = re.captures(text) {
            let integer = &caps[1];
            let decimal = caps.get(2).map(|m| m.as_str()).unwrap_or("0");
            println!("Number: {} -> integer={}, decimal={}", text, integer, decimal);
        }
    }
}

Iterating Over Captures

use regex::Regex;
 
fn main() {
    let text = "Name: Alice, Age: 30; Name: Bob, Age: 25";
    
    // Capture all name-age pairs
    let re = Regex::new(r"Name: (?P<name>\w+), Age: (?P<age>\d+)").unwrap();
    
    for caps in re.captures_iter(text) {
        println!("{} is {} years old", &caps["name"], &caps["age"]);
    }
    
    // Extract all key-value pairs
    let text = "key1=value1; key2=value2; key3=value3";
    let re = Regex::new(r"(\w+)=(\w+)").unwrap();
    
    let pairs: Vec<(&str, &str)> = re.captures_iter(text)
        .map(|caps| (caps[1].as_str(), caps[2].as_str()))
        .collect();
    
    println!("\nKey-value pairs: {:?}", pairs);
    
    // Parse URLs
    let text = "https://example.com/path and http://test.org:8080/api";
    let re = Regex::new(
        r"(?P<scheme>https?)://(?P<host>[^/:]+)(?::(?P<port>\d+))?(?P<path>/[^\s]*)?"
    ).unwrap();
    
    println!("\nURLs:");
    for caps in re.captures_iter(text) {
        let scheme = &caps["scheme"];
        let host = &caps["host"];
        let port = caps.name("port").map(|m| m.as_str()).unwrap_or("80");
        let path = caps.name("path").map(|m| m.as_str()).unwrap_or("/");
        println!("  {}://{}:{}{}", scheme, host, port, path);
    }
}

Replacement

use regex::Regex;
 
fn main() {
    // Simple replacement
    let re = Regex::new(r"foo").unwrap();
    let result = re.replace("foo bar foo baz", "qux");
    println!("Replace first: {}", result);  // qux bar foo baz
    
    // Replace all
    let result = re.replace_all("foo bar foo baz", "qux");
    println!("Replace all: {}", result);  // qux bar qux baz
    
    // Replace with captured groups
    let re = Regex::new(r"(\w+)@(\w+)\.com").unwrap();
    let text = "Contact alice@example.com and bob@test.org";
    let result = re.replace_all(text, "[EMAIL:$1@$2.com]");
    println!("\nReplace with captures: {}", result);
    
    // Replace with named groups
    let re = Regex::new(r"(?P<first>\w+) (?P<last>\w+)").unwrap();
    let text = "John Doe and Jane Smith";
    let result = re.replace_all(text, "$last, $first");
    println!("Name swap: {}", result);  // Doe, John and Smith, Jane
    
    // Replace with closure
    let re = Regex::new(r"\d+").unwrap();
    let text = "Values: 10, 20, 30";
    let result = re.replace_all(text, |caps: &regex::Captures| {
        let num: i32 = caps[0].parse().unwrap();
        (num * 2).to_string()
    });
    println!("Double values: {}", result);  // Values: 20, 40, 60
    
    // Case conversion
    let re = Regex::new(r"\b\w").unwrap();
    let text = "hello world";
    let result = re.replace_all(text, |caps: &regex::Captures| {
        caps[0].to_uppercase()
    });
    println!("Title case: {}", result);  // Hello World
}

Splitting

use regex::Regex;
 
fn main() {
    // Split by pattern
    let re = Regex::new(r"\s+").unwrap();
    let parts: Vec<&str> = re.split("hello   world\t\tfrom\nrust").collect();
    println!("Split by whitespace: {:?}", parts);
    
    // Split with limit
    let re = Regex::new(r"\s+").unwrap();
    let parts: Vec<&str> = re.splitn("a b c d e", 3).collect();
    println!("Split with limit: {:?}", parts);  // ["a", "b", "c d e"]
    
    // Split by multiple delimiters
    let re = Regex::new(r"[,;|]").unwrap();
    let parts: Vec<&str> = re.split("a,b;c|d,e;f").collect();
    println!("Split by delimiters: {:?}", parts);
    
    // Parse CSV-like data
    let re = Regex::new(r",\s*").unwrap();
    let line = "name, age, city, country";
    let fields: Vec<&str> = re.split(line).collect();
    println!("CSV fields: {:?}", fields);
    
    // Extract words
    let re = Regex::new(r"\W+").unwrap();  // Non-word characters
    let words: Vec<&str> = re.split("Hello, world! How are you?").collect();
    println!("Words: {:?}", words);
}

Common Patterns

use regex::Regex;
 
fn main() {
    // Email validation
    let email_re = Regex::new(
        r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
    ).unwrap();
    
    for email in &["user@example.com", "invalid", "test@org"] {
        println!("Email '{}' valid: {}", email, email_re.is_match(email));
    }
    
    // Phone number (US format)
    let phone_re = Regex::new(
        r"^\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}$"
    ).unwrap();
    
    for phone in &["555-123-4567", "(555) 123 4567", "5551234567", "invalid"] {
        println!("Phone '{}' valid: {}", phone, phone_re.is_match(phone));
    }
    
    // IP Address
    let ip_re = Regex::new(
        r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$"
    ).unwrap();
    
    for ip in &["192.168.1.1", "10.0.0.1", "invalid", "256.1.1.1"] {
        println!("IP '{}' valid: {}", ip, ip_re.is_match(ip));
    }
    
    // URL
    let url_re = Regex::new(
        r"^https?://[\w.-]+(?:/[\w./-]*)?(?:\?[\w=&%-]*)?(?:#[\w-]+)?$"
    ).unwrap();
    
    for url in &[
        "https://example.com",
        "http://test.org/path?query=1",
        "invalid",
    ] {
        println!("URL '{}' valid: {}", url, url_re.is_match(url));
    }
    
    // Username (alphanumeric + underscore, 3-20 chars)
    let username_re = Regex::new(r"^[a-zA-Z0-9_]{3,20}$").unwrap();
    for name in &["alice", "bob_123", "ab", "this_is_too_long_username"] {
        println!("Username '{}' valid: {}", name, username_re.is_match(name));
    }
    
    // Hex color
    let hex_re = Regex::new(r"^#[0-9a-fA-F]{6}$").unwrap();
    for color in &["#FF0000", "#00ff00", "#gggggg", "red"] {
        println!("Hex color '{}' valid: {}", color, hex_re.is_match(color));
    }
}

Anchors and Boundaries

use regex::Regex;
 
fn main() {
    // Start anchor ^
    let re = Regex::new(r"^Hello").unwrap();
    println!("Starts with Hello: {}", re.is_match("Hello World"));
    println!("Starts with Hello: {}", re.is_match("Say Hello"));
    
    // End anchor $
    let re = Regex::new(r"World$").unwrap();
    println!("Ends with World: {}", re.is_match("Hello World"));
    println!("Ends with World: {}", re.is_match("World Hello"));
    
    // Word boundary \b
    let re = Regex::new(r"\bcat\b").unwrap();
    println!("\nWord 'cat': {}", re.is_match("cat"));
    println!("Word 'cat': {}", re.is_match("catalog"));
    println!("Word 'cat': {}", re.is_match("my cat is"));
    
    // Find whole words
    let text = "The cat sat on the category mat";
    let re = Regex::new(r"\b\w+\b").unwrap();
    let words: Vec<&str> = re.find_iter(text)
        .map(|m| m.as_str())
        .collect();
    println!("\nWords: {:?}", words);
    
    // Line anchors with multiline
    let text = "Hello\nWorld\nHello";
    let re = Regex::new(r"(?m)^Hello").unwrap();
    println!("\nLines starting with Hello:");
    for mat in re.find_iter(text) {
        println!("  {}", mat.as_str());
    }
}

Quantifiers

use regex::Regex;
 
fn main() {
    // * : zero or more
    let re = Regex::new(r"ab*c").unwrap();
    for s in &["ac", "abc", "abbbc", "ab"] {
        println!("'{}' matches 'ab*c': {}", s, re.is_match(s));
    }
    
    // + : one or more
    let re = Regex::new(r"ab+c").unwrap();
    println!("\n'ac' matches 'ab+c': {}", re.is_match("ac"));
    println!("'abc' matches 'ab+c': {}", re.is_match("abc"));
    
    // ? : zero or one
    let re = Regex::new(r"colou?r").unwrap();
    println!("\n'color' matches: {}", re.is_match("color"));
    println!("'colour' matches: {}", re.is_match("colour"));
    
    // {n} : exactly n
    let re = Regex::new(r"\d{4}").unwrap();
    println!("\n'1234' matches 4 digits: {}", re.is_match("1234"));
    println!("'123' matches 4 digits: {}", re.is_match("123"));
    
    // {n,m} : between n and m
    let re = Regex::new(r"\d{2,4}").unwrap();
    for s in &["1", "12", "123", "1234", "12345"] {
        if let Some(m) = re.find(s) {
            println!("'{}' found 2-4 digits: '{}'", s, m.as_str());
        }
    }
    
    // Non-greedy with ?
    let text = "<div>content</div><div>more</div>";
    let greedy = Regex::new(r"<div>.*</div>").unwrap();
    let non_greedy = Regex::new(r"<div>.*?</div>").unwrap();
    
    println!("\nGreedy match: {}", greedy.find(text).unwrap().as_str());
    println!("Non-greedy match: {}", non_greedy.find(text).unwrap().as_str());
}

Character Classes

use regex::Regex;
 
fn main() {
    // Predefined classes
    let digit = Regex::new(r"\d").unwrap();    // [0-9]
    let word = Regex::new(r"\w").unwrap();     // [a-zA-Z0-9_]
    let space = Regex::new(r"\s").unwrap();    // Whitespace
    
    println!("Digit: {}", digit.is_match("5"));
    println!("Word char: {}", word.is_match("_"));
    println!("Whitespace: {}", space.is_match("\t"));
    
    // Negated classes
    let non_digit = Regex::new(r"\D").unwrap();   // [^0-9]
    let non_word = Regex::new(r"\W").unwrap();    // [^a-zA-Z0-9_]
    let non_space = Regex::new(r"\S").unwrap();   // Non-whitespace
    
    println!("\nNon-digit: {}", non_digit.is_match("a"));
    println!("Non-word: {}", non_word.is_match("!"));
    println!("Non-space: {}", non_space.is_match("a"));
    
    // Custom character class
    let vowel = Regex::new(r"[aeiou]").unwrap();
    let hex = Regex::new(r"[0-9a-fA-F]").unwrap();
    
    println!("\nVowel: {}", vowel.is_match("e"));
    println!("Hex: {}", hex.is_match("F"));
    
    // Negated custom class
    let non_vowel = Regex::new(r"[^aeiou]").unwrap();
    println!("Non-vowel: {}", non_vowel.is_match("x"));
    
    // Ranges
    let lowercase = Regex::new(r"[a-z]+").unwrap();
    let alphanum = Regex::new(r"[a-zA-Z0-9]+").unwrap();
    
    println!("\nLowercase match: {:?}", lowercase.find("Hello"));
    println!("Alphanum match: {:?}", alphanum.find("Hello123"));
}

Lookahead and Lookbehind

use regex::Regex;
 
fn main() {
    // Positive lookahead (?=...)
    // Match word followed by "world"
    let re = Regex::new(r"\w+(?= world)").unwrap();
    if let Some(m) = re.find("Hello world") {
        println!("Word before 'world': {}", m.as_str());
    }
    
    // Negative lookahead (?!...)
    // Match "foo" not followed by "bar"
    let re = Regex::new(r"foo(?!bar)").unwrap();
    for text in &["foobar", "foobaz", "foo"] {
        if let Some(m) = re.find(text) {
            println!("'{}' matched in '{}'", m.as_str(), text);
        }
    }
    
    // Positive lookbehind (?<=...)
    // Match digits preceded by "$"
    let re = Regex::new(r"(?<=\$)\d+").unwrap();
    let text = "Price: $100, Tax: $20";
    println!("\nAmounts:");
    for m in re.find_iter(text) {
        println!("  {}", m.as_str());
    }
    
    // Negative lookbehind (?<!...)
    // Match "bar" not preceded by "foo"
    let re = Regex::new(r"(?<!foo)bar").unwrap();
    for text in &["foobar", "bazbar", "bar"] {
        if let Some(m) = re.find(text) {
            println!("'{}' matched in '{}'", m.as_str(), text);
        }
    }
}

Performance Tips

use regex::Regex;
use std::time::Instant;
 
fn main() {
    // Compile once, use many times
    let re = Regex::new(r"\d{4}-\d{2}-\d{2}").unwrap();
    
    let texts: Vec<&str> = (0..1000)
        .map(|i| format!("Date {} is 2024-03-{}", i, (i % 28) + 1).leak())
        .collect();
    
    // Efficient: reuse compiled regex
    let start = Instant::now();
    let count = texts.iter().filter(|t| re.is_match(t)).count();
    println!("Matches: {}, Time: {:?}", count, start.elapsed());
    
    // Use bytes! for byte strings
    let re = Regex::new(r"hello").unwrap();
    let bytes = b"hello world";
    if re.is_match(bytes) {
        println!("\nMatch in bytes!");
    }
    
    // Use RegexSet for multiple patterns
    let set = regex::RegexSet::new([
        r"\bfoo\b",
        r"\bbar\b",
        r"\bbaz\b",
    ]).unwrap();
    
    let text = "foo and bar";
    let matches: Vec<_> = set.matches(text).into_iter().collect();
    println!("\nPatterns matched: {:?}", matches);  // [0, 1]
}

Summary

  • Use Regex::new(pattern) to compile a regex (returns Result)
  • Use raw strings r"..." to avoid escaping backslashes
  • is_match(text) returns true if pattern matches anywhere
  • find(text) returns first match position
  • find_iter(text) iterates over all matches
  • captures(text) extracts capture groups
  • captures_iter(text) iterates over all captures
  • Named groups: (?P<name>...) accessed with caps["name"]
  • Numbered groups: (...) accessed with caps[1], caps[2], etc.
  • replace(text, replacement) replaces first match
  • replace_all(text, replacement) replaces all matches
  • Use $1, $2 or $name in replacement strings for captured groups
  • split(text) splits by pattern
  • splitn(text, n) splits with limit
  • Character classes: \d, \w, \s, \D, \W, \S
  • Anchors: ^ start, $ end, \b word boundary
  • Quantifiers: *, +, ?, {n}, {n,}, {n,m}
  • Non-greedy: *?, +?, ??
  • Case-insensitive: (?i) flag
  • Multiline: (?m) flag for ^ and $ to match line boundaries
  • Compile patterns once and reuse for performance
  • Use RegexSet to match multiple patterns efficiently