What is the purpose of glob::Pattern::compile for reusing compiled glob patterns?

glob::Pattern::compile parses a glob pattern string into a reusable compiled pattern, avoiding the overhead of repeated parsing when the same pattern is used multiple times. The compilation process transforms a human-readable glob string into an optimized internal representation that can match paths efficiently. Pre-compiling patterns is essential for performance when matching against many paths or when patterns are used repeatedly in loops.

The Compile Step

use glob::Pattern;
 
fn basic_compilation() {
    // Pattern::compile parses the glob string into an internal representation
    // This is the same as Pattern::new()
    
    let pattern = Pattern::compile("src/**/*.rs").unwrap();
    
    // Now the pattern can be used to match paths
    assert!(pattern.matches("src/main.rs"));
    assert!(pattern.matches("src/lib/mod.rs"));
    assert!(!pattern.matches("test/main.rs"));
    
    // The compiled pattern is reusable
    for path in &["src/a.rs", "src/b.rs", "src/sub/c.rs"] {
        if pattern.matches(path) {
            println!("Matched: {}", path);
        }
    }
}

Pattern::compile (aliased as Pattern::new) creates a reusable pattern object from a glob string.

Compilation Cost vs. Reuse

use glob::Pattern;
use std::time::Instant;
 
fn performance_comparison() {
    let paths: Vec<&str> = (0..10_000)
        .map(|i| format!("src/module{}/file{}.rs", i % 100, i % 50))
        .collect()
        .iter()
        .map(|s| s.as_str())
        .collect();
    
    // BAD: Compiling inside the loop
    let start = Instant::now();
    for path in &paths {
        // This compiles the pattern EVERY iteration!
        let pattern = Pattern::compile("src/**/*.rs").unwrap();
        let _ = pattern.matches(path);
    }
    let compile_inside_loop = start.elapsed();
    println!("Compile inside loop: {:?}", compile_inside_loop);
    
    // GOOD: Compile once, reuse many times
    let start = Instant::now();
    let pattern = Pattern::compile("src/**/*.rs").unwrap();
    for path in &paths {
        let _ = pattern.matches(path);
    }
    let compile_once = start.elapsed();
    println!("Compile once: {:?}", compile_once);
    
    // The difference is dramatic:
    // compile_once is typically 10-100x faster for large numbers of matches
    // because compilation involves parsing, validation, and building
    // internal data structures
}
 
fn compilation_overhead() {
    // What does compilation do?
    // 1. Parse the glob string into tokens
    // 2. Validate syntax (matching brackets, etc.)
    // 3. Build internal matcher representation
    // 4. Possibly optimize the pattern
    
    // Complex patterns have higher compilation cost:
    let simple = Pattern::compile("*.rs").unwrap();        // Fast
    let complex = Pattern::compile("src/**/test/**/*.rs").unwrap();  // Slower
    
    // Patterns with character classes are more complex:
    let with_class = Pattern::compile("file[0-9].txt").unwrap();
    
    // Each of these requires parsing work that can be avoided by reuse
}

Compiling once and reusing is dramatically faster than recompiling on every match.

Pattern Matching Operations

use glob::Pattern;
use std::path::Path;
 
fn matching_operations() {
    let pattern = Pattern::compile("src/**/*.rs").unwrap();
    
    // matches: Check if pattern matches the entire path
    assert!(pattern.matches("src/main.rs"));
    assert!(pattern.matches("src/lib/module.rs"));
    assert!(pattern.matches("src/a/b/c/d.rs"));
    assert!(!pattern.matches("test/main.rs"));
    assert!(!pattern.matches("src"));  // Doesn't match directories
    
    // matches_path: Same but takes a Path
    assert!(pattern.matches_path(Path::new("src/main.rs")));
    
    // matches_with: Returns a Matched structure with capture info
    if let Some(matched) = pattern.matches_with("src/main.rs") {
        println!("Matched with: {:?}", matched);
        // Can inspect what was matched
    }
    
    // Pattern can match multiple paths:
    let pattern = Pattern::compile("*.txt").unwrap();
    assert!(pattern.matches("a.txt"));
    assert!(pattern.matches("b.txt"));
    assert!(pattern.matches("any.txt"));
}

A compiled pattern supports multiple matching methods that can be called repeatedly.

Glob Pattern Syntax

use glob::Pattern;
 
fn pattern_syntax() {
    // * matches any sequence of characters except path separator
    let star = Pattern::compile("*.rs").unwrap();
    assert!(star.matches("main.rs"));
    assert!(star.matches("lib.rs"));
    assert!(!star.matches("src/main.rs"));  // * doesn't cross /
    
    // ** matches any sequence including path separators
    let double_star = Pattern::compile("src/**/*.rs").unwrap();
    assert!(double_star.matches("src/main.rs"));
    assert!(double_star.matches("src/lib/mod.rs"));
    assert!(double_star.matches("src/a/b/c.rs"));
    
    // ? matches single character
    let question = Pattern::compile("file?.txt").unwrap();
    assert!(question.matches("file1.txt"));
    assert!(question.matches("fileA.txt"));
    assert!(!question.matches("file10.txt"));
    
    // [...] character class
    let char_class = Pattern::compile("file[0-9].txt").unwrap();
    assert!(char_class.matches("file0.txt"));
    assert!(char_class.matches("file5.txt"));
    assert!(!char_class.matches("fileA.txt"));
    
    // [!] negated character class
    let negated = Pattern::compile("file[!0-9].txt").unwrap();
    assert!(negated.matches("fileA.txt"));
    assert!(!negated.matches("file5.txt"));
    
    // {a,b} alternatives
    let alt = Pattern::compile("*.{rs,toml}").unwrap();
    assert!(alt.matches("main.rs"));
    assert!(alt.matches("Cargo.toml"));
    assert!(!alt.matches("main.c"));
}

Understanding glob syntax helps write patterns that compile correctly.

Storing Compiled Patterns

use glob::Pattern;
use std::collections::HashMap;
 
// Pattern: Store patterns in structs for reuse
struct FileFilter {
    include_patterns: Vec<Pattern>,
    exclude_patterns: Vec<Pattern>,
}
 
impl FileFilter {
    fn new(include: &[&str], exclude: &[&str]) -> Result<Self, glob::PatternError> {
        let include_patterns = include
            .iter()
            .map(|p| Pattern::compile(p))
            .collect::<Result<Vec<_>, _>>()?;
        
        let exclude_patterns = exclude
            .iter()
            .map(|p| Pattern::compile(p))
            .collect::<Result<Vec<_>, _>>()?;
        
        Ok(FileFilter {
            include_patterns,
            exclude_patterns,
        })
    }
    
    fn is_included(&self, path: &str) -> bool {
        // Patterns compiled once, used many times
        let included = self.include_patterns.iter().any(|p| p.matches(path));
        let excluded = self.exclude_patterns.iter().any(|p| p.matches(path));
        included && !excluded
    }
}
 
// Pattern: Lazy static compilation
use std::sync::OnceLock;
 
fn lazy_pattern() -> &'static Pattern {
    static PATTERN: OnceLock<Pattern> = OnceLock::new();
    PATTERN.get_or_init(|| Pattern::compile("*.rs").unwrap())
}
 
fn use_lazy_pattern() {
    let pattern = lazy_pattern();
    assert!(pattern.matches("main.rs"));
    
    // Pattern is compiled once on first use
    // Subsequent calls return the cached pattern
}

Storing compiled patterns in structs or using lazy initialization ensures compilation happens only once.

Pattern Compilation Errors

use glob::Pattern;
 
fn error_handling() {
    // Invalid patterns return PatternError
    let result = Pattern::compile("file[.txt");
    match result {
        Ok(pattern) => {
            println!("Valid pattern");
        }
        Err(e) => {
            println!("Invalid pattern: {}", e);
            // Error includes position and description
        }
    }
    
    // Common errors:
    
    // Unclosed character class
    assert!(Pattern::compile("file[0-9.txt").is_err());
    
    // Invalid range (end < start)
    assert!(Pattern::compile("file[9-0].txt").is_err());
    
    // Unclosed brace group
    assert!(Pattern::compile("*.{rs,toml").is_err());
    
    // Valid patterns:
    assert!(Pattern::compile("*.rs").is_ok());
    assert!(Pattern::compile("src/**/*.rs").is_ok());
    assert!(Pattern::compile("file[0-9].txt").is_ok());
    
    // Error provides useful information:
    match Pattern::compile("invalid[") {
        Err(e) => {
            println!("Error at position {}", e.pos);
            println!("Message: {}", e.msg);
        }
        Ok(_) => unreachable!(),
    }
}

Compilation errors indicate invalid glob syntax—handle these at startup, not in hot paths.

Integration with glob::glob

use glob::{glob, Pattern};
 
fn glob_function_comparison() {
    // glob::glob: Combines compilation and iteration
    // Compiles pattern EVERY call
    for entry in glob("src/**/*.rs").unwrap() {
        if let Ok(path) = entry {
            println!("Found: {:?}", path);
        }
    }
    
    // For repeated use, compile pattern separately:
    let pattern = Pattern::compile("src/**/*.rs").unwrap();
    
    // Then use pattern.matches() on paths you iterate:
    for entry in std::fs::read_dir("src").unwrap() {
        let path = entry.unwrap().path();
        let path_str = path.to_string_lossy();
        if pattern.matches(&path_str) {
            println!("Matches pattern: {:?}", path);
        }
    }
    
    // Key difference:
    // - glob::glob: filesystem iteration + pattern matching
    // - Pattern::compile + matches: just pattern matching on strings
    
    // Use glob::glob when you need filesystem traversal
    // Use Pattern::compile when you have strings/paths already
}
 
fn filesystem_vs_string_matching() {
    // Pattern::compile is for matching against strings
    let pattern = Pattern::compile("**/*.rs").unwrap();
    
    // This matches strings, doesn't access filesystem:
    assert!(pattern.matches("src/main.rs"));
    assert!(pattern.matches("lib/mod.rs"));
    
    // glob::glob actually traverses filesystem:
    // for entry in glob("src/**/*.rs")? { ... }
    
    // Use Pattern::compile when:
    // - You have string paths (from configs, user input, etc.)
    // - You're filtering paths from other sources
    // - You want to check if paths match patterns
    
    // Use glob::glob when:
    // - You need to find files on filesystem
    // - You want filesystem traversal with pattern filtering
}

Pattern::compile is for matching strings; glob::glob combines compilation with filesystem traversal.

Real-World Use Cases

use glob::Pattern;
use std::path::Path;
 
// Use case 1: Configuration file patterns
struct PathMatcher {
    patterns: Vec<Pattern>,
}
 
impl PathMatcher {
    fn from_config(config_patterns: &[String]) -> Result<Self, glob::PatternError> {
        // Compile all patterns once at startup
        let patterns = config_patterns
            .iter()
            .map(|p| Pattern::compile(p))
            .collect::<Result<Vec<_>, _>>()?;
        
        Ok(PathMatcher { patterns })
    }
    
    fn matches_any(&self, path: &str) -> bool {
        // Fast matching against pre-compiled patterns
        self.patterns.iter().any(|p| p.matches(path))
    }
}
 
// Use case 2: Build system file filtering
struct BuildFilter {
    source_patterns: Vec<Pattern>,
    exclude_patterns: Vec<Pattern>,
}
 
impl BuildFilter {
    fn new() -> Self {
        BuildFilter {
            source_patterns: vec![
                Pattern::compile("src/**/*.rs").unwrap(),
                Pattern::compile("lib/**/*.rs").unwrap(),
            ],
            exclude_patterns: vec![
                Pattern::compile("**/test_*.rs").unwrap(),
                Pattern::compile("**/*_test.rs").unwrap(),
            ],
        }
    }
    
    fn should_compile(&self, path: &str) -> bool {
        let is_source = self.source_patterns.iter().any(|p| p.matches(path));
        let is_excluded = self.exclude_patterns.iter().any(|p| p.matches(path));
        is_source && !is_excluded
    }
}
 
// Use case 3: Multiple pattern types
struct FileClassifier {
    rust_pattern: Pattern,
    toml_pattern: Pattern,
    md_pattern: Pattern,
}
 
impl FileClassifier {
    fn new() -> Self {
        FileClassifier {
            rust_pattern: Pattern::compile("**/*.rs").unwrap(),
            toml_pattern: Pattern::compile("**/*.toml").unwrap(),
            md_pattern: Pattern::compile("**/*.md").unwrap(),
        }
    }
    
    fn classify(&self, path: &str) -> Option<&'static str> {
        // All patterns compiled once, checked many times
        if self.rust_pattern.matches(path) {
            Some("rust")
        } else if self.toml_pattern.matches(path) {
            Some("toml")
        } else if self.md_pattern.matches(path) {
            Some("markdown")
        } else {
            None
        }
    }
}

Configuration, build systems, and file classification benefit from pre-compiled patterns.

Performance Best Practices

use glob::Pattern;
 
fn best_practices() {
    // 1. Compile patterns at startup or first use
    let pattern = Pattern::compile("*.rs").unwrap();
    
    // 2. Store compiled patterns in structs
    struct Matcher {
        pattern: Pattern,  // Compiled once
    }
    
    // 3. Use lazy initialization for global patterns
    use std::sync::OnceLock;
    static GLOBAL_PATTERN: OnceLock<Pattern> = OnceLock::new();
    
    fn get_pattern() -> &'static Pattern {
        GLOBAL_PATTERN.get_or_init(|| {
            Pattern::compile("src/**/*.rs").unwrap()
        })
    }
    
    // 4. Validate patterns at config load time
    fn load_patterns(config: &[&str]) -> Result<Vec<Pattern>, glob::PatternError> {
        config.iter()
            .map(|p| Pattern::compile(p))
            .collect()
    }
    
    // 5. Avoid compiling in loops
    // BAD:
    for path in paths {
        let p = Pattern::compile("*.rs").unwrap();  // Don't do this!
        if p.matches(path) { /* ... */ }
    }
    
    // GOOD:
    let p = Pattern::compile("*.rs").unwrap();  // Compile once
    for path in paths {
        if p.matches(path) { /* ... */ }
    }
}

Compile patterns once, use many times—avoid compilation in hot paths.

Pattern vs. Regex

use glob::Pattern;
 
fn pattern_vs_regex() {
    // glob::Pattern: Glob-style matching (simpler, file-focused)
    let glob_pattern = Pattern::compile("src/**/*.rs").unwrap();
    assert!(glob_pattern.matches("src/lib/mod.rs"));
    
    // regex: Regular expressions (more powerful, general purpose)
    // let regex = regex::Regex::new(r"src/.*\.rs").unwrap();
    // assert!(regex.is_match("src/lib/mod.rs"));
    
    // Use glob::Pattern when:
    // - Matching file paths
    // - User-provided glob patterns (from config, CLI)
    // - Standard glob syntax is sufficient
    // - Simpler syntax is preferred
    
    // Use regex when:
    // - Complex pattern matching needed
    // - Character-level matching
    // - Advanced features (groups, alternations, etc.)
    // - Not specifically for file paths
    
    // Performance comparison:
    // - glob::Pattern: Optimized for path matching
    // - regex: General purpose, potentially faster for complex patterns
}
 
fn when_glob_fits() {
    // Glob patterns are ideal for file matching:
    let patterns = vec![
        "*.rs",           // Rust files
        "src/**/main.rs", // main.rs anywhere in src
        "target/**/*.rlib", // Build artifacts
        ".*",             // Hidden files
    ];
    
    // Each pattern is intuitive for file matching
    // Users understand glob syntax from shells
    // Direct translation from CLI arguments to Pattern::compile
}

Glob patterns are specialized for file paths; use regex for general pattern matching.

Synthesis

Quick reference:

use glob::Pattern;
 
// Compilation:
let pattern = Pattern::compile("src/**/*.rs")?;
// Equivalent to:
let pattern = Pattern::new("src/**/*.rs")?;
 
// Usage:
pattern.matches("src/main.rs")  // bool
pattern.matches_path(Path::new("src/main.rs"))  // bool
 
// Best practices:
// ✅ Compile once, use many times
// ✅ Store in structs for reuse
// ✅ Validate at startup/config load
// ✅ Use lazy initialization for global patterns
// ❌ Compile inside loops
// ❌ Compile on every function call
// ❌ Ignore compilation errors
 
// Performance characteristics:
// Compilation: O(n) where n = pattern length
// Matching: O(m * p) where m = path length, p = pattern complexity
// Memory: Stores compiled representation, minimal overhead
 
// Pattern syntax:
// *      - any characters except /
// **     - any characters including /
// ?      - single character
// [abc]  - character class
// [a-z]  - character range
// [!abc] - negated class
// {a,b}  - alternatives
 
// Error handling:
match Pattern::compile("invalid[") {
    Ok(pattern) => { /* use pattern */ },
    Err(e) => {
        println!("Error at position {}: {}", e.pos, e.msg);
    }
}

Key insight: Pattern::compile transforms a glob string into a reusable matcher, amortizing the parsing cost across many matches. The compilation step parses the pattern syntax, validates it, and builds an internal representation optimized for matching. This is essential for performance when matching many paths—the compilation cost is paid once, while matching is fast and repeated. Store compiled patterns in structs, use lazy initialization for globals, and validate patterns at configuration load time rather than in hot paths. The distinction between Pattern::compile (string matching) and glob::glob (filesystem traversal) matters: use compiled patterns when you have strings to match, use glob::glob when you need to find files on disk.