How do I match and manipulate text patterns with regular expressions?

Walkthrough

Regular expressions are essential for text processing, validation, and extraction. The regex crate provides a robust regex engine with Unicode support, zero-copy parsing, and excellent performance through finite automata. It's the standard choice for regex in Rust.

Key capabilities:

  1. Pattern matching — test if text matches a pattern
  2. Capturing groups — extract specific portions of matches
  3. Search and replace — transform text based on patterns
  4. Iteration — find all matches in a string
  5. Splitting — divide text on pattern boundaries

The regex crate prioritizes safety and correctness—look-ahead and backreferences are limited for performance guarantees.

Code Example

# Cargo.toml
[dependencies]
regex = "1"
use regex::Regex;
 
fn main() {
    // ===== Basic Matching =====
    let pattern = Regex::new(r"\d{3}-\d{3}-\d{4}").unwrap();
    let text = "Call me at 555-123-4567 or 800-999-0000";
    
    if pattern.is_match(text) {
        println!("Found a phone number!");
    }
 
    // ===== Find All Matches =====
    for cap in pattern.find_iter(text) {
        println!("Found: {}", cap.as_str());
    }
 
    // ===== Capturing Groups =====
    let email_pattern = Regex::new(r"(\w+)@(\w+)\.(\w+)").unwrap();
    let text = "Contact: alice@example.com and bob@test.org";
    
    for cap in email_pattern.captures_iter(text) {
        println!("Full match: {}", &cap[0]);
        println!("  Username: {}", &cap[1]);
        println!("  Domain: {}", &cap[2]);
        println!("  TLD: {}", &cap[3]);
    }
 
    // ===== Named Capture Groups =====
    let named_pattern = Regex::new(r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})").unwrap();
    let date_text = "Date: 2024-03-15";
    
    if let Some(cap) = named_pattern.captures(date_text) {
        println!("Year: {}", &cap["year"]);
        println!("Month: {}", &cap["month"]);
        println!("Day: {}", &cap["day"]);
    }
 
    // ===== Search and Replace =====
    let text = "The quick brown fox jumps over the lazy dog.";
    let vowel_pattern = Regex::new(r"[aeiou]").unwrap();
    
    // Simple replacement
    let replaced = vowel_pattern.replace_all(text, "X");
    println!("Replaced vowels: {}", replaced);
    
    // Replacement with capture groups
    let swap_pattern = Regex::new(r"(\w+) (\w+)").unwrap();
    let swapped = swap_pattern.replace("hello world", "$2 $1");
    println!("Swapped: {}", swapped); // "world hello"
 
    // ===== Split Text =====
    let csv = "apple,banana,,cherry";
    let split_pattern = Regex::new(r",").unwrap();
    
    let parts: Vec<&str> = split_pattern.split(csv).collect();
    println!("Parts: {:?}", parts);
    
    // Split with limit
    let limited: Vec<&str> = split_pattern.splitn(csv, 2).collect();
    println!("Limited split: {:?}", limited);
}

Validation Patterns

use regex::Regex;
 
fn validate_email(email: &str) -> bool {
    // Simple email validation
    let pattern = Regex::new(
        r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
    ).unwrap();
    pattern.is_match(email)
}
 
fn validate_phone(phone: &str) -> bool {
    // Various phone formats
    let pattern = Regex::new(
        r"^(\+?1[-.]?)?(\(?\d{3}\)?[-.]?)?\d{3}[-.]?\d{4}$"
    ).unwrap();
    pattern.is_match(phone)
}
 
fn validate_username(name: &str) -> bool {
    // 3-16 chars, alphanumeric and underscores, starts with letter
    let pattern = Regex::new(r"^[a-zA-Z][a-zA-Z0-9_]{2,15}$").unwrap();
    pattern.is_match(name)
}
 
fn main() {
    println!("Email valid: {}", validate_email("user@example.com"));
    println!("Phone valid: {}", validate_phone("555-123-4567"));
    println!("Username valid: {}", validate_username("alice_123"));
}

Extracting Structured Data

use regex::Regex;
use std::collections::HashMap;
 
fn parse_config(text: &str) -> HashMap<String, String> {
    let pattern = Regex::new(r"^(\w+)\s*=\s*(.+)$").unwrap();
    let mut config = HashMap::new();
    
    for line in text.lines() {
        if let Some(cap) = pattern.captures(line.trim()) {
            config.insert(
                cap[1].to_string(),
                cap[2].trim().to_string(),
            );
        }
    }
    
    config
}
 
fn main() {
    let config_text = r#"
        host = localhost
        port = 8080
        debug = true
    "#;
    
    let config = parse_config(config_text);
    println!("Config: {:?}", config);
}

Case-Insensitive Matching

use regex::Regex;
 
fn main() {
    // Case-insensitive flag in pattern
    let pattern = Regex::new(r"(?i)hello").unwrap();
    println!("Matches HELLO: {}", pattern.is_match("HELLO WORLD"));
    println!("Matches hello: {}", pattern.is_match("hello world"));
 
    // Using builder for more options
    let complex = regex::RegexBuilder::new(r"\d{4}")
        .case_insensitive(false)
        .multi_line(true)
        .build()
        .unwrap();
}

Replacement with Closures

use regex::Regex;
 
fn main() {
    let text = "Prices: $100, $250, $75";
    let price_pattern = Regex::new(r"\$(\d+)").unwrap();
    
    // Use closure for dynamic replacement
    let adjusted = price_pattern.replace_all(text, |cap: &regex::Captures| {
        let price: i32 = cap[1].parse().unwrap();
        let with_tax = (price as f64 * 1.1) as i32;
        format!("${}", with_tax)
    });
    
    println!("With tax: {}", adjusted);
}

Summary

  • Create patterns with Regex::new(r"pattern") — use raw strings (r"") to avoid escaping backslashes
  • is_match() tests if text contains a match; find() returns match location
  • captures_iter() extracts groups; access with cap[0] (full match) or cap[1], cap[2] (groups)
  • Named groups (?P<name>...) accessed via cap["name"] for readable code
  • replace_all() substitutes matches with literal text or $1, $2 for captured groups
  • Use closures with replace_all() for dynamic replacements based on captured values
  • split() and splitn() divide text on pattern boundaries
  • The (?i) flag enables case-insensitive matching within the pattern
  • For complex options, use RegexBuilder instead of Regex::new