Loading page…
Rust walkthroughs
Loading page…
Regex is a crate for parsing, compiling, and executing regular expressions in Rust. It provides a mature, well-tested implementation with a familiar syntax similar to other regex engines. The crate offers both simple pattern matching and advanced features like capture groups, lookahead, and replacement with callbacks.
Key concepts:
Regex::new()is_match()find() and find_iter()captures()replace()(?i)When to use Regex:
When NOT to use Regex:
str::contains, str::starts_with, etc.)nom)use regex::Regex;
fn main() {
let re = Regex::new(r"hello").unwrap();
assert!(re.is_match("hello world"));
assert!(!re.is_match("goodbye"));
println!("Pattern matches!");
}use regex::Regex;
fn main() {
let re = Regex::new(r"\d+").unwrap();
let text = "There are 42 cats and 7 dogs.";
// Find all matches
for cap in re.find_iter(text) {
println!("Found: {}", cap.as_str());
}
// Check if pattern exists
if let Some(m) = re.find(text) {
println!("First match: {}", m.as_str());
}
}use regex::Regex;
fn main() {
let re = Regex::new(r"(\d+)-(\d+)-(\d+)").unwrap();
let text = "Date: 2024-01-15";
if let Some(caps) = re.captures(text) {
println!("Full match: {}", &caps[0]);
println!("Year: {}", &caps[1]);
println!("Month: {}", &caps[2]);
println!("Day: {}", &caps[3]);
}
}use regex::Regex;
fn main() {
let re = Regex::new(r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})").unwrap();
let text = "Date: 2024-01-15";
if let Some(caps) = re.captures(text) {
println!("Year: {}", &caps["year"]);
println!("Month: {}", &caps["month"]);
println!("Day: {}", &caps["day"]);
}
}use regex::Regex;
fn main() {
let re = Regex::new(r"\d+").unwrap();
let text = "There are 42 cats and 7 dogs.";
// Replace with string
let result = re.replace(text, "X");
println!("{}", result); // "There are X cats and X dogs."
// Replace first occurrence only
let result = re.replace(text, "X");
println!("First only: {}", result);
}use regex::Regex;
fn main() {
let re = Regex::new(r"\d+").unwrap();
let text = "Ages: 25, 30, 45";
// Replace with a function
let result = re.replace_all(text, |caps: ®ex::Captures| {
let num: i32 = caps[0].parse().unwrap();
(num + 1).to_string()
});
println!("{}", result); // "Ages: 26, 31, 46"
}use regex::Regex;
fn main() {
// Swap first and last name
let re = Regex::new(r"(\w+) (\w+)").unwrap();
let text = "John Smith, Jane Doe";
let result = re.replace_all(text, "$2 $1");
println!("{}", result); // "Smith John, Doe Jane"
}use regex::Regex;
fn main() {
// Inline flag for case insensitivity
let re = Regex::new(r"(?i)hello").unwrap();
assert!(re.is_match("Hello"));
assert!(re.is_match("HELLO"));
assert!(re.is_match("hello"));
// Or use the builder
let re = Regex::builder()
.case_insensitive(true)
.build(r"hello")
.unwrap();
assert!(re.is_match("HELLO"));
}use regex::Regex;
fn main() {
let text = "the cat and the dog";
// Start of string
let start = Regex::new(r"^the").unwrap();
assert!(start.is_match(text));
// End of string
let end = Regex::new(r"dog$").unwrap();
assert!(end.is_match(text));
// Word boundary
let word = Regex::new(r"\bcat\b").unwrap();
assert!(word.is_match(text));
// Not a word boundary
let not_word = Regex::new(r"\Bcat\B").unwrap();
assert!(!not_word.is_match("category"));
}use regex::Regex;
fn main() {
let text = "aaa b aaaa";
// Zero or more (greedy)
let re = Regex::new(r"a*").unwrap();
// One or more
let re = Regex::new(r"a+").unwrap();
// Zero or one
let re = Regex::new(r"a?").unwrap();
// Exact count
let re = Regex::new(r"a{3}").unwrap(); // Exactly 3
assert!(re.is_match("aaa"));
// Range
let re = Regex::new(r"a{2,4}").unwrap(); // 2 to 4
assert!(re.is_match("aaaa"));
// Non-greedy (minimal match)
let re = Regex::new(r"a+?").unwrap();
}use regex::Regex;
fn main() {
// Digit
let digit = Regex::new(r"\d").unwrap();
assert!(digit.is_match("5"));
// Non-digit
let non_digit = Regex::new(r"\D").unwrap();
assert!(non_digit.is_match("a"));
// Word character
let word = Regex::new(r"\w").unwrap();
assert!(word.is_match("a"));
assert!(word.is_match("_"));
// Whitespace
let space = Regex::new(r"\s").unwrap();
assert!(space.is_match(" "));
// Custom character class
let vowels = Regex::new(r"[aeiou]").unwrap();
assert!(vowels.is_match("hello"));
// Negated character class
let non_vowels = Regex::new(r"[^aeiou]").unwrap();
assert!(non_vowels.is_match("h"));
// Range in character class
let lowercase = Regex::new(r"[a-z]").unwrap();
}use regex::Regex;
fn main() {
let re = Regex::new(r"cat|dog|bird").unwrap();
assert!(re.is_match("I have a cat"));
assert!(re.is_match("I have a dog"));
assert!(re.is_match("I have a bird"));
assert!(!re.is_match("I have a fish"));
}use regex::Regex;
fn is_valid_email(email: &str) -> bool {
let re = Regex::new(
r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
).unwrap();
re.is_match(email)
}
fn main() {
assert!(is_valid_email("user@example.com"));
assert!(is_valid_email("user.name+tag@example.co.uk"));
assert!(!is_valid_email("invalid-email"));
assert!(!is_valid_email("@example.com"));
println!("Email validation works!");
}use regex::Regex;
fn extract_phone_numbers(text: &str) -> Vec<String> {
let re = Regex::new(
r"\b\d{3}[-.]?\d{3}[-.]?\d{4}\b"
).unwrap();
re.find_iter(text)
.map(|m| m.as_str().to_string())
.collect()
}
fn main() {
let text = "Call 555-123-4567 or 555.987.6543 or 5551112222";
let numbers = extract_phone_numbers(text);
for num in &numbers {
println!("Found: {}", num);
}
}use regex::Regex;
#[derive(Debug)]
struct Url {
scheme: String,
domain: String,
path: Option<String>,
}
fn parse_url(url: &str) -> Option<Url> {
let re = Regex::new(
r"^(?P<scheme>https?)://(?P<domain>[^/]+)(?P<path>/.*)?$"
).unwrap();
re.captures(url).map(|caps| Url {
scheme: caps["scheme"].to_string(),
domain: caps["domain"].to_string(),
path: caps.name("path").map(|m| m.as_str().to_string()),
})
}
fn main() {
let urls = vec![
"https://example.com",
"http://example.com/path/to/page",
];
for url_str in urls {
if let Some(url) = parse_url(url_str) {
println!("{:#?}", url);
}
}
}use regex::Regex;
#[derive(Debug)]
struct LogEntry {
timestamp: String,
level: String,
message: String,
}
fn parse_log_line(line: &str) -> Option<LogEntry> {
let re = Regex::new(
r"\[(?P<timestamp>[^\]]+)\]\s+(?P<level>\w+):\s+(?P<message>.+)"
).unwrap();
re.captures(line).map(|caps| LogEntry {
timestamp: caps["timestamp"].to_string(),
level: caps["level"].to_string(),
message: caps["message"].to_string(),
})
}
fn main() {
let log = "[2024-01-15 10:30:00] ERROR: Connection failed";
if let Some(entry) = parse_log_line(log) {
println!("{:#?}", entry);
}
}use regex::Regex;
fn main() {
let re = Regex::new(r"[,\s]+").unwrap();
let text = "apple, banana, cherry, orange";
// Split on pattern
let parts: Vec<&str> = re.split(text).collect();
println!("{:?}", parts); // ["apple", "banana", "cherry", "orange"]
// Limited split
let parts: Vec<&str> = re.splitn(text, 2).collect();
println!("{:?}", parts); // ["apple", "banana, cherry, orange"]
}use regex::Regex;
fn main() {
let text = "first line\nsecond line\nthird line";
// Multi-line mode (^ and $ match line boundaries)
let re = Regex::new(r"(?m)^second").unwrap();
assert!(re.is_match(text));
// Match newline explicitly
let re = Regex::new(r"first\nsecond").unwrap();
assert!(re.is_match(text));
}use regex::Regex;
fn main() {
let text = "hello\nworld";
// By default, dot doesn't match newline
let re = Regex::new(r"hello.world").unwrap();
assert!(!re.is_match(text));
// With s flag, dot matches everything including newline
let re = Regex::new(r"(?s)hello.world").unwrap();
assert!(re.is_match(text));
}use regex::Regex;
fn main() {
// Escape special characters for literal matching
let pattern = regex::escape("(a+b)*c");
// pattern is now "\\(a\\+b\\)\\*c"
let re = Regex::new(&pattern).unwrap();
assert!(re.is_match("(a+b)*c"));
}use regex::Regex;
use std::sync::LazyLock;
// Compile regex once, reuse everywhere
static EMAIL_REGEX: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$").unwrap()
});
fn is_valid_email(email: &str) -> bool {
EMAIL_REGEX.is_match(email)
}
fn main() {
println!("Valid: {}", is_valid_email("user@example.com"));
}use regex::RegexSet;
fn main() {
let set = RegexSet::new(&[
r"\w+",
r"\d+",
r"\pL+",
r"foo",
r"bar",
]).unwrap();
let matches: Vec<_> = set.matches("bar").into_iter().collect();
println!("Matches: {:?}", matches); // [0, 2, 4]
// Check if any match
assert!(set.is_match("bar"));
}use regex::Regex;
fn main() {
// Prefer non-capturing groups when you don't need captures
let re = Regex::new(r"(?:https?|ftp)://\S+").unwrap();
// Use atomic groups for performance on large inputs
// let re = Regex::new(r"(?>a|b|c)").unwrap();
// Compile regex outside loops
let re = Regex::new(r"\d+").unwrap();
let texts = vec!["a1", "b2", "c3"];
for text in texts {
// Reuse the compiled regex
if re.is_match(text) {
println!("Match in: {}", text);
}
}
}use regex::Regex;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_phone_pattern() {
let re = Regex::new(r"\b\d{3}[-.]?\d{3}[-.]?\d{4}\b").unwrap();
assert!(re.is_match("555-123-4567"));
assert!(re.is_match("555.123.4567"));
assert!(re.is_match("5551234567"));
assert!(!re.is_match("55-123-4567"));
}
#[test]
fn test_capture_groups() {
let re = Regex::new(r"(\d{4})-(\d{2})-(\d{2})").unwrap();
let caps = re.captures("2024-01-15").unwrap();
assert_eq!(&caps[1], "2024");
assert_eq!(&caps[2], "01");
assert_eq!(&caps[3], "15");
}
}
fn main() {
println!("Tests defined");
}Regex Key Imports:
use regex::Regex;
use regex::RegexSet;Core Methods:
| Method | Description |
|--------|-------------|
| Regex::new(pattern) | Compile pattern |
| is_match(text) | Check if pattern matches |
| find(text) | Find first match |
| find_iter(text) | Iterate all matches |
| captures(text) | Get capture groups |
| replace(text, replacement) | Replace matches |
| split(text) | Split on pattern |
Common Patterns:
| Pattern | Description |
|---------|-------------|
| \d | Digit |
| \w | Word character |
| \s | Whitespace |
| \b | Word boundary |
| ^ | Start of string |
| $ | End of string |
| . | Any character |
| * | Zero or more |
| + | One or more |
| ? | Zero or one |
Flags:
| Flag | Description |
|------|-------------|
| (?i) | Case insensitive |
| (?m) | Multi-line mode |
| (?s) | Dot matches newline |
| (?x) | Ignore whitespace |
Escape Metacharacters:
let escaped = regex::escape("(a+b)*c");Key Points:
LazyLock or once_cell for global patternsfind_iter() for all matchescaptures() for extracting groups(?P<name>...) improve readabilityRegexSet when matching multiple patternsregex::escape()