Loading pageā¦
Rust walkthroughs
Loading pageā¦
regex::Regex::is_match vs regex::Regex::find?is_match and find serve different purposes in the regex crate, with is_match being faster when you only need to check if a pattern exists, while find returns location information at additional cost. The performance difference stems from what each method computes: is_match only determines existence, while find must track and return match positions. For hot paths where you only need boolean results, is_match provides measurable performance benefits.
use regex::Regex;
fn basic_comparison() {
let pattern = Regex::new(r"\d{4}-\d{2}-\d{2}").unwrap();
let text = "The date is 2024-01-15 for the event.";
// is_match: returns bool only
let has_date = pattern.is_match(text);
println!("Contains date: {}", has_date);
// find: returns match location
if let Some(m) = pattern.find(text) {
println!("Found at {}..{}: {}", m.start(), m.end(), m.as_str());
}
}is_match gives a boolean; find gives location and matched text.
use regex::Regex;
fn computational_difference() {
let pattern = Regex::new(r"[a-z]+").unwrap();
let text = "abc123def456ghi";
// is_match only needs to:
// 1. Scan until first match
// 2. Return true when found
// It can stop immediately upon finding a match
let matched = pattern.is_match(text);
println!("Has letters: {}", matched);
// find needs to:
// 1. Scan until first match
// 2. Determine exact match boundaries
// 3. Return the Match object with start/end
// More work even for finding the same thing
let match_obj = pattern.find(text);
if let Some(m) = match_obj {
println!("Match '{}' at {}..{}", m.as_str(), m.start(), m.end());
}
}find does more work to compute match boundaries and create the Match object.
use regex::Regex;
fn benchmark_comparison() {
let pattern = Regex::new(r"[a-z]{10,}").unwrap();
// Text with match near the beginning
let text_early = "abcdefghijklmnopqrstuvwxyz" + &"xyz".repeat(1000);
// Text with no match
let text_none = "XYZ".repeat(1000);
// Text with match at the end
let text_late = "XYZ".repeat(500) + "abcdefghijklmnopqrstuvwxyz";
// Benchmark is_match
let start = std::time::Instant::now();
for _ in 0..10_000 {
let _ = pattern.is_match(&text_early);
}
let is_match_early = start.elapsed();
let start = std::time::Instant::now();
for _ in 0..10_000 {
let _ = pattern.is_match(&text_none);
}
let is_match_none = start.elapsed();
let start = std::time::Instant::now();
for _ in 0..10_000 {
let _ = pattern.is_match(&text_late);
}
let is_match_late = start.elapsed();
// Benchmark find
let start = std::time::Instant::now();
for _ in 0..10_000 {
let _ = pattern.find(&text_early);
}
let find_early = start.elapsed();
let start = std::time::Instant::now();
for _ in 0..10_000 {
let _ = pattern.find(&text_none);
}
let find_none = start.elapsed();
let start = std::time::Instant::now();
for _ in 0..10_000 {
let _ = pattern.find(&text_late);
}
let find_late = start.elapsed();
println!("Early match - is_match: {:?}, find: {:?}", is_match_early, find_early);
println!("No match - is_match: {:?}, find: {:?}", is_match_none, find_none);
println!("Late match - is_match: {:?}, find: {:?}", is_match_late, find_late);
}The benchmark shows relative performance across different match positions.
use regex::Regex;
fn why_faster() {
let pattern = Regex::new(r"[A-Z][a-z]+\d+").unwrap();
let text = "abcdefABC123xyz";
// is_match can optimize:
// 1. Stop at first matching position
// 2. No need to compute exact end boundary
// 3. No need to allocate or construct Match struct
// 4. Internal engine can use faster algorithms
// The regex engine knows we only care about existence
// It can use simplified matching logic
let _ = pattern.is_match(text);
// find must:
// 1. Find the match position
// 2. Execute the full NFA/DFA to find boundaries
// 3. Construct and return the Match object
// 4. Handle all edge cases for match boundaries
let _ = pattern.find(text);
}is_match allows the regex engine to skip boundary computation.
use regex::Regex;
fn find_all_comparison() {
let pattern = Regex::new(r"\d+").unwrap();
let text = "a1b22c333d4444";
// If you need all match positions, use find_iter
let positions: Vec<_> = pattern.find_iter(text)
.map(|m| (m.start(), m.end(), m.as_str()))
.collect();
println!("All matches: {:?}", positions);
// If you only need to know if ANY exist, is_match is faster
let has_numbers = pattern.is_match(text);
println!("Has numbers: {}", has_numbers);
// If you need to count matches, you MUST use find_iter
let count = pattern.find_iter(text).count();
println!("Count: {}", count);
}Use the appropriate method based on what information you need.
use regex::Regex;
fn short_circuit() {
let pattern = Regex::new(r"[a-z]+").unwrap();
// Both methods short-circuit on first match
let text = "abcXYZdefGHI";
// is_match stops at 'abc' - first match found
let _ = pattern.is_match(text);
// find also stops at 'abc' - returns first match
let first = pattern.find(text);
println!("First match: {:?}", first.map(|m| m.as_str()));
// But is_match doesn't need to track position
// So it's still faster even with same short-circuit
}Both methods stop at the first match, but is_match does less work.
use regex::Regex;
fn validation_example() {
// Validation: only need yes/no answer
let email_pattern = Regex::new(
r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
).unwrap();
let email = "user@example.com";
// is_match is perfect here - we only need valid/invalid
if email_pattern.is_match(email) {
println!("Valid email format");
} else {
println!("Invalid email format");
}
}
fn filtering_example() {
let pattern = Regex::new(r"error|warning|critical").unwrap();
let logs = vec![
"[INFO] Application started",
"[ERROR] Connection failed",
"[DEBUG] Processing request",
"[WARNING] Memory usage high",
];
// Filter: only need to know if pattern exists in line
let matching_lines: Vec<_> = logs.iter()
.filter(|line| pattern.is_match(line))
.collect();
println!("Lines with issues: {:?}", matching_lines);
}
fn conditional_processing() {
let sensitive_pattern = Regex::new(r"\b\d{3}-\d{2}-\d{4}\b").unwrap(); // SSN pattern
let document = "The employee SSN is 123-45-6789.";
// Check before doing expensive processing
if sensitive_pattern.is_match(document) {
println!("Document contains sensitive data - applying redaction");
// Now we might do find/replace, but first check was fast
}
}Use is_match when the boolean result is all you need.
use regex::Regex;
fn extraction_example() {
let pattern = Regex::new(r"\b(\d{4})-(\d{2})-(\d{2})\b").unwrap();
let text = "Events on 2024-01-15 and 2024-02-20";
// Need positions and content
for m in pattern.find_iter(text) {
println!("Found date '{}' at position {}", m.as_str(), m.start());
}
}
fn replacement_example() {
let pattern = Regex::new(r"\b\w{4}\b").unwrap();
let text = "This test has four word four";
// find gives us the location for replacement
let result = pattern.replace_all(text, "****");
println!("Censored: {}", result);
}
fn parsing_example() {
let token_pattern = Regex::new(r"[a-zA-Z_][a-zA-Z0-9_]*|\d+|[+\-*/=()]").unwrap();
let expression = "x = 42 + y * 10";
// Need each token and its position
let tokens: Vec<_> = token_pattern.find_iter(expression)
.map(|m| (m.as_str(), m.start()))
.collect();
println!("Tokens: {:?}", tokens);
}Use find when you need to know where matches occur.
use regex::Regex;
fn anchored_patterns() {
// Anchored pattern must match at start
let anchored = Regex::new(r"^\d+").unwrap();
// For anchored patterns, is_match and find have similar work
// Both must scan from position 0
let text = "123abc";
let is_match = anchored.is_match(text);
let find_result = anchored.find(text);
// But is_match still avoids constructing Match
// More benefit with unanchored patterns
let unanchored = Regex::new(r"\d+").unwrap();
let text2 = "abc123def456";
// is_match stops at 123
let _ = unanchored.is_match(text2);
// find stops at 123 but also computes boundaries
let _ = unanchored.find(text2);
}Anchored patterns reduce the gap between is_match and find.
use regex::Regex;
fn complex_patterns() {
// Complex pattern with alternation and quantifiers
let complex = Regex::new(
r"(?:https?://)?(?:www\.)?[a-zA-Z0-9-]+\.[a-zA-Z]{2,}(?:/[^\s]*)?"
).unwrap();
let text = "Visit https://www.example.com/page for more info.";
// is_match: just need to find if URL exists
let has_url = complex.is_match(text);
// find: need the URL itself
if let Some(m) = complex.find(text) {
let url = m.as_str();
println!("Found URL: {}", url);
}
// The performance gap is larger with complex patterns
// because boundary computation is more expensive
}Complex patterns amplify the performance difference.
use regex::Regex;
fn captures_comparison() {
let pattern = Regex::new(r"(\d{4})-(\d{2})-(\d{2})").unwrap();
let text = "Date: 2024-01-15";
// captures: even more overhead than find
if let Some(caps) = pattern.captures(text) {
// Full match
println!("Full: {}", caps.get(0).unwrap().as_str());
// Individual groups
println!("Year: {}", &caps[1]);
println!("Month: {}", &caps[2]);
println!("Day: {}", &caps[3]);
}
// Performance hierarchy:
// is_match < find < captures
// (faster to slower)
// Use captures only when you need group extraction
}captures has the most overhead; use it only when you need capture groups.
use regex::Regex;
fn memory_comparison() {
let pattern = Regex::new(r"[a-z]+").unwrap();
let text = "hello world test";
// is_match: minimal allocations
// Returns a simple bool
// find: may allocate Match structure
// Returns Option<Match> with position data
// For high-frequency matching, this matters:
fn high_frequency_check(texts: &[&str], pattern: &Regex) -> usize {
texts.iter()
.filter(|text| pattern.is_match(text))
.count()
}
// vs.
fn high_frequency_find(texts: &[&str], pattern: &Regex) -> usize {
texts.iter()
.filter(|text| pattern.find(text).is_some())
.count()
}
// is_match version is more efficient for counting/filtering
}is_match avoids allocations that find may require.
use regex::RegexSet;
fn regex_set() {
// When checking multiple patterns, RegexSet is efficient
let set = RegexSet::new([
r"\berror\b",
r"\bwarning\b",
r"\bcritical\b",
]).unwrap();
let text = "A warning was issued";
// matches() returns which patterns matched (bool for each)
let matches: Vec<_> = set.matches(text).into_iter().collect();
println!("Matched patterns: {:?}", matches); // [1] - warning
// This is like is_match for multiple patterns at once
// More efficient than checking each pattern separately
// If you need positions, you still need individual find()
let warning_pattern = regex::Regex::new(r"\bwarning\b").unwrap();
if let Some(m) = warning_pattern.find(text) {
println!("Warning at {}..{}", m.start(), m.end());
}
}RegexSet efficiently checks multiple patterns with boolean results.
use regex::Regex;
fn optimization_tips() {
// 1. Use is_match when you only need existence check
let pattern = Regex::new(r"[A-Z]{3}").unwrap();
let text = "ABC123";
if pattern.is_match(text) {
// Good: only checking existence
}
// 2. Compile regex once, reuse many times
// Regex compilation is expensive
struct Validator {
email_regex: Regex,
phone_regex: Regex,
}
impl Validator {
fn new() -> Self {
Self {
email_regex: Regex::new(r"^[^@]+@[^@]+\.[^@]+$").unwrap(),
phone_regex: Regex::new(r"^\d{3}-\d{3}-\d{4}$").unwrap(),
}
}
fn is_valid_email(&self, email: &str) -> bool {
self.email_regex.is_match(email)
}
fn is_valid_phone(&self, phone: &str) -> bool {
self.phone_regex.is_match(phone)
}
}
// 3. Use bytes regex for byte data
let bytes_pattern = regex::bytes::Regex::new(r"\d+").unwrap();
let data = b"12345";
let _ = bytes_pattern.is_match(data);
// 4. Consider RegexBuilder for fine-tuning
let optimized = regex::RegexBuilder::new(r"[a-z]+")
.size_limit(10_000)
.dfa_size_limit(10_000)
.build()
.unwrap();
}Follow these patterns for optimal regex performance.
| Method | Returns | Use Case | Performance |
|--------|---------|----------|-------------|
| is_match | bool | Validation, filtering | Fastest |
| find | Option<Match> | Extraction, position | Moderate |
| find_iter | Matches iterator | All match positions | Variable |
| captures | Option<Captures> | Group extraction | Slowest |
| captures_iter | CaptureMatches | All groups | Slowest |
The performance difference between is_match and find comes from their different computational requirements:
is_match:
find:
Key insight: Choose based on your actual needs. If you only need to know whether a pattern matches, is_match is always the right choiceāit's not just cleaner code, it's measurably faster. Reserve find for when you actually need match positions, and captures for when you need to extract specific groups from matches.