What are the trade-offs between regex::Regex::is_match and find for simple presence detection?

regex::Regex::is_match returns a boolean indicating whether the pattern matches anywhere in the input, optimized for the common case where you only need to know presence, while find returns Option<Match> with location and content details—making is_match faster for pure presence detection but find necessary when you need to know where or what matched. The key trade-off is between simplicity and information: is_match is simpler and faster for yes/no answers, while find provides match boundaries and enables further operations on matched content.

Basic is_match Usage

use regex::Regex;
 
fn basic_is_match() {
    let pattern = Regex::new(r"\d{3}-\d{4}").unwrap();
    
    // is_match returns bool - just presence
    let contains_phone = pattern.is_match("Call 555-1234 for help");
    assert!(contains_phone);
    
    let no_match = pattern.is_match("No phone number here");
    assert!(!no_match);
}

is_match answers "does this pattern exist anywhere in the string?"

Basic find Usage

use regex::Regex;
 
fn basic_find() {
    let pattern = Regex::new(r"\d{3}-\d{4}").unwrap();
    let text = "Call 555-1234 for help";
    
    // find returns Option<Match> with location
    if let Some(m) = pattern.find(text) {
        println!("Found: {}", m.as_str());      // "555-1234"
        println!("Start: {}", m.start());       // 5
        println!("End: {}", m.end());           // 13
    }
}

find answers "where is the first match, and what does it contain?"

Performance Comparison

use regex::Regex;
use std::time::Instant;
 
fn performance_comparison() {
    let pattern = Regex::new(r"\d{4}-\d{2}-\d{2}").unwrap();  // Date pattern
    let text = "Log entry from 2024-03-15 shows error";
    
    // Benchmark is_match
    let iterations = 1_000_000;
    let start = Instant::now();
    for _ in 0..iterations {
        let _ = pattern.is_match(text);
    }
    let is_match_duration = start.elapsed();
    
    // Benchmark find
    let start = Instant::now();
    for _ in 0..iterations {
        let _ = pattern.find(text);
    }
    let find_duration = start.elapsed();
    
    // is_match is typically faster when you only need presence
    println!("is_match: {:?}", is_match_duration);
    println!("find: {:?}", find_duration);
    // is_match is often 10-30% faster for presence-only checks
}

is_match avoids the overhead of constructing a Match object.

Why is_match is Faster

use regex::Regex;
 
fn why_faster() {
    // is_match optimizations:
    // 1. No need to track match boundaries
    // 2. No need to extract matched text
    // 3. Can stop at first successful match attempt
    // 4. Simpler internal state machine operation
    
    let pattern = Regex::new(r"error|warning|info").unwrap();
    let text = "System running normally";
    
    // is_match: "Is there any match?" -> bool
    // Internal: Run regex, return true on first match
    
    // find: "Where is the match?" -> Option<Match>
    // Internal: Run regex, track start/end positions, construct Match
    
    // The overhead is small but measurable in tight loops
}

is_match skips work related to tracking and returning match details.

When to Use is_match

use regex::Regex;
 
fn is_match_use_cases() {
    let email_pattern = Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap();
    let profanity_filter = Regex::new(r"(?i)\b(badword1|badword2|badword3)\b").unwrap();
    let has_digits = Regex::new(r"\d").unwrap();
    
    // Use case 1: Validation
    fn validate_email(email: &str) -> bool {
        email_pattern.is_match(email)
    }
    
    // Use case 2: Content filtering
    fn contains_profanity(text: &str) -> bool {
        profanity_filter.is_match(text)
    }
    
    // Use case 3: Feature detection
    fn has_numbers(text: &str) -> bool {
        has_digits.is_match(text)
    }
    
    // Use case 4: Conditional branching
    fn process_text(text: &str) {
        if email_pattern.is_match(text) {
            println!("Contains email");
        } else {
            println!("No email found");
        }
    }
    
    // Use case 5: Early exit in loops
    fn any_match(items: &[&str], pattern: &Regex) -> bool {
        for item in items {
            if pattern.is_match(item) {
                return true;  // Found match, exit early
            }
        }
        false
    }
}

Use is_match when you only need yes/no answer about presence.

When to Use find

use regex::Regex;
 
fn find_use_cases() {
    let pattern = Regex::new(r"\d{4}").unwrap();
    let text = "Years: 2020, 2021, 2022";
    
    // Use case 1: Extract matched content
    if let Some(m) = pattern.find(text) {
        let year: i32 = m.as_str().parse().unwrap();
        println!("First year: {}", year);
    }
    
    // Use case 2: Highlight matches
    fn highlight(text: &str, pattern: &Regex) -> String {
        if let Some(m) = pattern.find(text) {
            format!("{}[{}]{}", 
                &text[..m.start()],
                m.as_str(),
                &text[m.end()..])
        } else {
            text.to_string()
        }
    }
    
    // Use case 3: Replace or modify matches
    fn replace_first(text: &str, pattern: &Regex, replacement: &str) -> String {
        if let Some(m) = pattern.find(text) {
            let mut result = String::with_capacity(text.len() + replacement.len());
            result.push_str(&text[..m.start()]);
            result.push_str(replacement);
            result.push_str(&text[m.end()..]);
            result
        } else {
            text.to_string()
        }
    }
    
    // Use case 4: Extract position for further processing
    fn process_match(text: &str, pattern: &Regex) {
        if let Some(m) = pattern.find(text) {
            println!("Match at {}..{}", m.start(), m.end());
            // Use position for text manipulation
        }
    }
}

Use find when you need location or content of the match.

Combining with find_iter for All Matches

use regex::Regex;
 
fn find_all_matches() {
    let pattern = Regex::new(r"\d+").unwrap();
    let text = "Found 42 items in 7 categories with 100 total";
    
    // find gives first match only
    let first = pattern.find(text);
    assert_eq!(first.map(|m| m.as_str()), Some("42"));
    
    // find_iter gives all matches
    let all_numbers: Vec<&str> = pattern.find_iter(text)
        .map(|m| m.as_str())
        .collect();
    assert_eq!(all_numbers, vec
!["42", "7", "100"])
;
    
    // is_match with find_iter for presence check on all matches
    if pattern.is_match(text) {
        for m in pattern.find_iter(text) {
            println!("Number: {}", m.as_str());
        }
    }
}

find returns first match; use find_iter for all matches.

is_match vs find vs captures

use regex::Regex;
 
fn method_comparison() {
    let pattern = Regex::new(r"(\w+)@(\w+\.\w+)").unwrap();
    let text = "Contact: user@example.com for help";
    
    // is_match: Bool only, fastest
    let has_match = pattern.is_match(text);
    println!("Has email: {}", has_match);
    
    // find: Location + content, medium cost
    if let Some(m) = pattern.find(text) {
        println!("Found at {}..{}: {}", m.start(), m.end(), m.as_str());
    }
    
    // captures: Full capture groups, most expensive
    if let Some(caps) = pattern.captures(text) {
        println!("Full match: {}", &caps[0]);
        println!("Username: {}", &caps[1]);
        println!("Domain: {}", &caps[2]);
    }
}

Choose based on information needed: is_match for bool, find for location, captures for groups.

Optimization Behavior

use regex::Regex;
 
fn optimization_behavior() {
    // Both methods use the same regex engine
    // But is_match can short-circuit more aggressively
    
    let pattern = Regex::new(r"error|warning").unwrap();
    let text = "No issues found";
    
    // is_match: Stop as soon as match is confirmed
    // find: Continue to find match boundaries
    
    // For anchored patterns, both are similar
    let anchored = Regex::new(r"^\d+").unwrap();
    // is_match and find have similar performance for anchored patterns
    // because the match location is known immediately
    
    // For unanchored patterns, is_match has advantage
    let unanchored = Regex::new(r"\d+").unwrap();
    // find must determine where in the string the match starts
    // is_match just confirms existence
}

is_match can short-circuit earlier in the matching process.

Short-Circuit Evaluation

use regex::Regex;
 
fn short_circuit_patterns() {
    // Patterns with alternation benefit from is_match
    let pattern = Regex::new(r"a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z").unwrap();
    
    let text = "The quick brown fox";
    
    // is_match: Finds 'T' (case matters), then tries lowercase
    // Actually finds 'h' at position 1, returns true immediately
    let has_match = pattern.is_match(text);
    
    // For alternation, find might need to check multiple branches
    // But is_match also benefits from quick success
}

Both methods benefit from regex optimization, but is_match has less post-match work.

Practical Filtering Example

use regex::Regex;
 
struct ContentFilter {
    patterns: Vec<Regex>,
}
 
impl ContentFilter {
    fn new() -> Self {
        ContentFilter {
            patterns: vec
![
                Regex::new(r"(?i)\b(spam|scam|phish)\b").unwrap(),
                Regex::new(r"(?i)\b(malware|virus|trojan)\b").unwrap(),
                Regex::new(r"(?i)https?://[^\s]+").unwrap(),  // URLs
            ]
        }
    }
    
    // is_match: Check if content matches any pattern
    fn is_flagged(&self, text: &str) -> bool {
        self.patterns.iter().any(|p| p.is_match(text))
    }
    
    // find: Also return which pattern matched and where
    fn find_violation(&self, text: &str) -> Option<(usize, &str)> {
        for (idx, pattern) in self.patterns.iter().enumerate() {
            if let Some(m) = pattern.find(text) {
                return Some((idx, m.as_str()));
            }
        }
        None
    }
}
 
fn main() {
    let filter = ContentFilter::new();
    let content = "This is spam content with a link";
    
    // Just checking
    if filter.is_flagged(content) {
        println!("Content flagged!");
    }
    
    // More details
    if let Some((pattern_idx, match_str)) = filter.find_violation(content) {
        println!("Pattern {} matched: '{}'", pattern_idx, match_str);
    }
}

Use is_match for filtering, find when you need details.

Memory Considerations

use regex::Regex;
 
fn memory_comparison() {
    let pattern = Regex::new(r"\d{4}-\d{2}-\d{2}").unwrap();
    let text = "Date: 2024-03-15";
    
    // is_match: Returns bool (1 byte on stack)
    let has_date: bool = pattern.is_match(text);
    
    // find: Returns Option<Match>
    // Match contains: start (usize), end (usize), text reference
    let maybe_match: Option<regex::Match> = pattern.find(text);
    
    // The Match object is small (3 usizes), but still more than bool
    // For high-frequency calls, this adds up
}

is_match returns minimal data; find constructs a Match object.

Negation Patterns

use regex::Regex;
 
fn negation_patterns() {
    let pattern = Regex::new(r"error").unwrap();
    let logs = vec
!["System started", "Error in module", "System running"]
;
    
    // Check which logs DON'T contain "error"
    let clean_logs: Vec<_> = logs.iter()
        .filter(|log| !pattern.is_match(log))
        .collect();
    
    assert_eq!(clean_logs, vec
!["System started", "System running"]
);
    
    // is_match is natural for negation
    // "not contains" is a common pattern
}

is_match is natural for negation patterns (does NOT contain).

Benchmark Results Pattern

use regex::Regex;
 
fn benchmark_pattern() {
    let simple = Regex::new(r"hello").unwrap();
    let complex = Regex::new(r"(?i)\b[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,}\b").unwrap();
    let text_short = "hello world";
    let text_long = "This is a longer text without the pattern but some content.";
    
    // Simple pattern, short text: difference is minimal
    // is_match might be 5-10% faster
    
    // Complex pattern, long text: difference more noticeable
    // is_match can be 15-30% faster when pattern doesn't match
    // (find must scan entire string, is_match stops early)
    
    // Pattern matches at start: similar performance
    // Pattern matches at end: is_match much faster
}

Performance difference varies based on pattern complexity and text length.

Complete Example: Log Analyzer

use regex::Regex;
 
struct LogAnalyzer {
    error_pattern: Regex,
    warning_pattern: Regex,
    date_pattern: Regex,
}
 
impl LogAnalyzer {
    fn new() -> Self {
        LogAnalyzer {
            error_pattern: Regex::new(r"ERROR").unwrap(),
            warning_pattern: Regex::new(r"WARNING").unwrap(),
            date_pattern: Regex::new(r"\d{4}-\d{2}-\d{2}").unwrap(),
        }
    }
    
    // is_match for simple filtering
    fn has_error(&self, log: &str) -> bool {
        self.error_pattern.is_match(log)
    }
    
    fn has_warning(&self, log: &str) -> bool {
        self.warning_pattern.is_match(log)
    }
    
    // find for extraction
    fn extract_date(&self, log: &str) -> Option<&str> {
        self.date_pattern.find(log).map(|m| m.as_str())
    }
    
    // Combination: filter then extract
    fn analyze(&self, logs: &[&str]) -> (usize, usize, Vec<&str>) {
        let error_count = logs.iter()
            .filter(|log| self.has_error(log))
            .count();
        
        let warning_count = logs.iter()
            .filter(|log| self.has_warning(log))
            .count();
        
        let dates: Vec<&str> = logs.iter()
            .filter_map(|log| self.extract_date(log))
            .collect();
        
        (error_count, warning_count, dates)
    }
}
 
fn main() {
    let analyzer = LogAnalyzer::new();
    let logs = vec
![
        "2024-03-15 ERROR: Connection failed",
        "2024-03-15 WARNING: High memory",
        "2024-03-16 INFO: System ok",
        "ERROR: Critical failure",
    ]
;
    
    let (errors, warnings, dates) = analyzer.analyze(&logs);
    println!("Errors: {}, Warnings: {}", errors, warnings);
    println!("Dates: {:?}", dates);
}

Use is_match for counting/filtering, find for extraction.

Synthesis

Quick reference:

Method Returns Use Case Performance
is_match bool Presence only Fastest
find Option<Match> Location/content needed Slightly slower
find_iter Matches iterator All matches Variable
captures Option<Captures> Capture groups needed Slowest

Decision tree:

use regex::Regex;
 
fn choose_method(pattern: &Regex, text: &str) {
    // Need to know if pattern exists?
    // -> Use is_match
    
    // Need to know WHERE the match is?
    // -> Use find
    
    // Need to know WHAT matched?
    // -> Use find (and m.as_str())
    
    // Need capture groups?
    // -> Use captures
    
    // Need all matches?
    // -> Use find_iter
    
    // Example:
    let has_number = pattern.is_match(text);      // Bool only
    let match_location = pattern.find(text);       // Option<Match>
    let all_matches: Vec<_> = pattern.find_iter(text).collect();  // Vec<Match>
}

Key insight: is_match and find are semantically similar—both answer "does this pattern match?"—but differ in what they return and their internal optimizations. is_match returns only a boolean, allowing the regex engine to skip tracking match boundaries and constructing a Match object. This makes it the preferred choice for pure presence detection in validation, filtering, conditional branching, or any code that only needs to know whether a pattern matches. find is necessary when you need the match location (for highlighting, replacement, or further processing) or the matched content (for extraction, parsing, or transformation). The performance difference is modest—typically 10-30% in microbenchmarks—but adds up in hot loops or high-throughput scenarios. A common pattern is using is_match for filtering (selecting which strings to process) and then find or find_iter on the filtered results for extraction. Both methods benefit from regex engine optimizations like quick rejection for non-matching strings, but is_match has slightly less work to do after finding a match.