How do I match file patterns with glob in Rust?

Walkthrough

The glob crate provides support for matching file paths against Unix shell-style patterns. It allows you to search for files and directories using wildcards like *, ?, and **. The crate handles the differences between Unix and Windows path separators automatically and provides an iterator-based API for traversing matched paths. Glob patterns are useful for build scripts, file processing tools, and any application that needs to discover files matching certain patterns.

Key concepts:

  1. Wildcards* matches any sequence, ? matches single character
  2. Recursive matching** matches any number of directories
  3. Character classes[abc] matches any character in the set
  4. Ranges[a-z] matches characters in a range
  5. Negation[!abc] matches characters NOT in the set

Code Example

# Cargo.toml
[dependencies]
glob = "0.3"
use glob::glob;
 
fn main() {
    // Match all Rust files in src directory
    for entry in glob("src/**/*.rs").expect("Failed to read glob pattern") {
        match entry {
            Ok(path) => println!("Found: {:?}", path.display()),
            Err(e) => println!("Error: {:?}", e),
        }
    }
}

Basic Wildcard Matching

use glob::glob;
 
fn main() {
    println!("All files in current directory:");
    for entry in glob("*").unwrap() {
        if let Ok(path) = entry {
            println!("  {}", path.display());
        }
    }
    
    println!("\nAll .rs files:");
    for entry in glob("*.rs").unwrap() {
        if let Ok(path) = entry {
            println!("  {}", path.display());
        }
    }
    
    println!("\nFiles starting with 'main':");
    for entry in glob("main*").unwrap() {
        if let Ok(path) = entry {
            println!("  {}", path.display());
        }
    }
}

Single Character Wildcard

use glob::glob;
 
fn main() {
    // ? matches exactly one character
    println!("Files like fileX.txt (single char): ");
    for entry in glob("file?.txt").unwrap() {
        if let Ok(path) = entry {
            println!("  {}", path.display());
        }
    }
    
    // Multiple ? for multiple characters
    println!("\nFiles like testXX.txt:");
    for entry in glob("test??.txt").unwrap() {
        if let Ok(path) = entry {
            println!("  {}", path.display());
        }
    }
}

Recursive Directory Matching

use glob::glob;
 
fn main() {
    // ** matches any number of directories
    println!("All Rust files recursively:");
    for entry in glob("**/*.rs").unwrap() {
        if let Ok(path) = entry {
            println!("  {}", path.display());
        }
    }
    
    println!("\nAll Markdown files in docs:");
    for entry in glob("docs/**/*.md").unwrap() {
        if let Ok(path) = entry {
            println!("  {}", path.display());
        }
    }
    
    println!("\nAll files in any src directory:");
    for entry in glob("**/src/**/*").unwrap() {
        if let Ok(path) = entry {
            println!("  {}", path.display());
        }
    }
}

Character Classes

use glob::glob;
 
fn main() {
    // Match specific characters
    println!("Files starting with a, b, or c:");
    for entry in glob("[abc]*").unwrap() {
        if let Ok(path) = entry {
            println!("  {}", path.display());
        }
    }
    
    // Character ranges
    println!("\nFiles starting with lowercase letter:");
    for entry in glob("[a-z]*").unwrap() {
        if let Ok(path) = entry {
            println!("  {}", path.display());
        }
    }
    
    // Numeric range
    println!("\nFiles like file1.txt to file9.txt:");
    for entry in glob("file[1-9].txt").unwrap() {
        if let Ok(path) = entry {
            println!("  {}", path.display());
        }
    }
    
    // Multiple ranges
    println!("\nFiles starting with letter or digit:");
    for entry in glob("[a-zA-Z0-9]*").unwrap() {
        if let Ok(path) = entry {
            println!("  {}", path.display());
        }
    }
}

Negation in Character Classes

use glob::glob;
 
fn main() {
    // [!abc] matches anything EXCEPT a, b, or c
    println!("Files NOT starting with a, b, or c:");
    for entry in glob("[!abc]*").unwrap() {
        if let Ok(path) = entry {
            println!("  {}", path.display());
        }
    }
    
    // Files not starting with a digit
    println!("\nFiles NOT starting with a digit:");
    for entry in glob("[!0-9]*").unwrap() {
        if let Ok(path) = entry {
            println!("  {}", path.display());
        }
    }
    
    // Files without .txt extension
    println!("\nFiles NOT ending with .txt:");
    for entry in glob("*[!.][!t][!x][!t]").unwrap() {
        if let Ok(path) = entry {
            println!("  {}", path.display());
        }
    }
}

Combining Patterns

use glob::glob;
 
fn main() {
    // Complex pattern: src directory, Rust files, starting with lowercase
    println!("Rust files in src starting with lowercase:");
    for entry in glob("src/[a-z]*.rs").unwrap() {
        if let Ok(path) = entry {
            println!("  {}", path.display());
        }
    }
    
    // Test files anywhere
    println!("\nTest files:");
    for entry in glob("**/*test*.rs").unwrap() {
        if let Ok(path) = entry {
            println!("  {}", path.display());
        }
    }
    
    // Config files (json or yaml)
    println!("\nConfig files (json or yaml):");
    for entry in glob("config/*.{json,yaml,yml}").unwrap() {
        if let Ok(path) = entry {
            println!("  {}", path.display());
        }
    }
    
    // Actually, {} is not supported - use separate globs
    println!("\nJSON files:");
    for entry in glob("config/*.json").unwrap() {
        if let Ok(path) = entry {
            println!("  {}", path.display());
        }
    }
    println!("YAML files:");
    for entry in glob("config/*.yaml").unwrap() {
        if let Ok(path) = entry {
            println!("  {}", path.display());
        }
    }
}

Pattern Options

use glob::{glob_with, MatchOptions};
 
fn main() {
    // Default options
    let options = MatchOptions::new();
    
    println!("With default options:");
    for entry in glob_with("*.RS", options).unwrap() {
        if let Ok(path) = entry {
            println!("  {}", path.display());
        }
    }
    
    // Case-insensitive matching
    let case_insensitive = MatchOptions {
        case_sensitive: false,
        require_literal_separator: false,
        require_literal_leading_dot: false,
    };
    
    println!("\nCase-insensitive (*.RS matches .rs):");
    for entry in glob_with("*.RS", case_insensitive).unwrap() {
        if let Ok(path) = entry {
            println!("  {}", path.display());
        }
    }
    
    // Require literal separator (don't treat / specially)
    let literal_sep = MatchOptions {
        case_sensitive: true,
        require_literal_separator: true,
        require_literal_leading_dot: false,
    };
    
    println!("\nLiteral separator (no / in *):" );
    for entry in glob_with("src/*.rs", literal_sep).unwrap() {
        if let Ok(path) = entry {
            println!("  {}", path.display());
        }
    }
    
    // Require literal leading dot (don't match hidden files with *)
    let literal_dot = MatchOptions {
        case_sensitive: true,
        require_literal_separator: false,
        require_literal_leading_dot: true,
    };
    
    println!("\nLiteral leading dot (* won't match .hidden):" );
    for entry in glob_with("*", literal_dot).unwrap() {
        if let Ok(path) = entry {
            println!("  {}", path.display());
        }
    }
}

Handling Errors

use glob::glob;
 
fn main() {
    // Pattern errors
    match glob("[invalid") {
        Ok(entries) => {
            for entry in entries {
                println!("{:?}", entry);
            }
        }
        Err(e) => println!("Invalid pattern: {:?}", e),
    }
    
    // Access errors during iteration
    println!("\nSearching for files:");
    for entry in glob("**/*.rs").unwrap() {
        match entry {
            Ok(path) => {
                println!("Found: {}", path.display());
            }
            Err(e) => {
                println!("Error accessing path: {}", e);
            }
        }
    }
}

Working with Results

use glob::glob;
use std::fs;
 
fn main() {
    // Collect all matching paths
    let paths: Vec<_> = glob("**/*.rs")
        .unwrap()
        .filter_map(Result::ok)
        .collect();
    
    println!("Found {} Rust files", paths.len());
    
    // Count files
    let count = glob("src/**/*.rs")
        .unwrap()
        .filter_map(Result::ok)
        .count();
    println!("\n{} Rust files in src", count);
    
    // Get file metadata
    println!("\nFile sizes:");
    for entry in glob("*.rs").unwrap().filter_map(Result::ok) {
        if let Ok(metadata) = fs::metadata(&entry) {
            println!("  {}: {} bytes", entry.display(), metadata.len());
        }
    }
}

Real-World: Find Source Files

use glob::glob;
use std::path::PathBuf;
 
struct SourceFiles {
    rust_files: Vec<PathBuf>,
    header_files: Vec<PathBuf>,
    all_files: usize,
}
 
impl SourceFiles {
    fn scan(root: &str) -> Self {
        let rust_files: Vec<PathBuf> = glob(&format!("{}/**/*.rs", root))
            .unwrap()
            .filter_map(Result::ok)
            .collect();
        
        let header_files: Vec<PathBuf> = glob(&format!("{}/**/*.h", root))
            .unwrap()
            .filter_map(Result::ok)
            .collect();
        
        let all_files = glob(&format!("{}/**/*", root))
            .unwrap()
            .filter_map(Result::ok)
            .count();
        
        Self { rust_files, header_files, all_files }
    }
    
    fn print_summary(&self) {
        println!("Source files summary:");
        println!("  Rust files: {}", self.rust_files.len());
        println!("  Header files: {}", self.header_files.len());
        println!("  Total files: {}", self.all_files);
    }
}
 
fn main() {
    let sources = SourceFiles::scan(".");
    sources.print_summary();
    
    println!("\nRust files:");
    for path in &sources.rust_files {
        println!("  {}", path.display());
    }
}

Real-World: Clean Build Artifacts

use glob::glob;
use std::fs;
use std::path::Path;
 
fn clean_build_artifacts() -> Vec<String> {
    let mut removed = Vec::new();
    
    // Remove .o files
    for entry in glob("**/*.o").unwrap().filter_map(Result::ok) {
        if fs::remove_file(&entry).is_ok() {
            removed.push(entry.display().to_string());
        }
    }
    
    // Remove .class files (Java)
    for entry in glob("**/*.class").unwrap().filter_map(Result::ok) {
        if fs::remove_file(&entry).is_ok() {
            removed.push(entry.display().to_string());
        }
    }
    
    // Remove target directories
    for entry in glob("**/target").unwrap().filter_map(Result::ok) {
        if entry.is_dir() {
            if fs::remove_dir_all(&entry).is_ok() {
                removed.push(format!("{} (directory)", entry.display()));
            }
        }
    }
    
    removed
}
 
fn main() {
    println!("Cleaning build artifacts...");
    let removed = clean_build_artifacts();
    
    if removed.is_empty() {
        println!("No build artifacts found.");
    } else {
        println!("Removed:");
        for item in removed {
            println!("  {}", item);
        }
    }
}

Real-World: Find Duplicate Files

use glob::glob;
use std::collections::HashMap;
use std::fs;
use std::path::PathBuf;
 
fn find_duplicates(pattern: &str) -> HashMap<u64, Vec<PathBuf>> {
    let mut size_to_files: HashMap<u64, Vec<PathBuf>> = HashMap::new();
    
    for entry in glob(pattern).unwrap().filter_map(Result::ok) {
        if let Ok(metadata) = fs::metadata(&entry) {
            if metadata.is_file() {
                let size = metadata.len();
                size_to_files.entry(size).or_default().push(entry);
            }
        }
    }
    
    // Keep only sizes with multiple files (potential duplicates)
    size_to_files.retain(|_, files| files.len() > 1);
    
    size_to_files
}
 
fn main() {
    let duplicates = find_duplicates("**/*.*");
    
    if duplicates.is_empty() {
        println!("No duplicate files found.");
    } else {
        println!("Potential duplicates (same size):");
        for (size, files) in duplicates {
            println!("\nSize: {} bytes", size);
            for file in files {
                println!("  {}", file.display());
            }
        }
    }
}

Real-World: Batch File Rename

use glob::glob;
use std::fs;
use std::path::Path;
 
fn batch_rename(pattern: &str, find: &str, replace: &str) -> Vec<(String, String)> {
    let mut renamed = Vec::new();
    
    for entry in glob(pattern).unwrap().filter_map(Result::ok) {
        let filename = entry.file_name().unwrap().to_string_lossy();
        
        if filename.contains(find) {
            let new_filename = filename.replace(find, replace);
            let new_path = entry.with_file_name(&new_filename);
            
            if fs::rename(&entry, &new_path).is_ok() {
                renamed.push((
                    entry.display().to_string(),
                    new_path.display().to_string(),
                ));
            }
        }
    }
    
    renamed
}
 
fn main() {
    // Rename .jpeg to .jpg
    println!("Renaming .jpeg to .jpg...");
    let renamed = batch_rename("**/*.jpeg", ".jpeg", ".jpg");
    
    for (old, new) in renamed {
        println!("  {} -> {}", old, new);
    }
}

Real-World: File Statistics

use glob::glob;
use std::collections::HashMap;
use std::fs;
use std::path::Path;
 
struct FileStats {
    total_files: usize,
    total_dirs: usize,
    total_size: u64,
    by_extension: HashMap<String, usize>,
}
 
impl FileStats {
    fn from_pattern(pattern: &str) -> Self {
        let mut stats = Self {
            total_files: 0,
            total_dirs: 0,
            total_size: 0,
            by_extension: HashMap::new(),
        };
        
        for entry in glob(pattern).unwrap().filter_map(Result::ok) {
            if let Ok(metadata) = fs::metadata(&entry) {
                if metadata.is_file() {
                    stats.total_files += 1;
                    stats.total_size += metadata.len();
                    
                    // Track by extension
                    if let Some(ext) = entry.extension() {
                        let ext_str = ext.to_string_lossy().to_string();
                        *stats.by_extension.entry(ext_str).or_insert(0) += 1;
                    } else {
                        *stats.by_extension.entry("(no ext)".to_string()).or_insert(0) += 1;
                    }
                } else if metadata.is_dir() {
                    stats.total_dirs += 1;
                }
            }
        }
        
        stats
    }
    
    fn print(&self) {
        println!("File Statistics:");
        println!("  Total files: {}", self.total_files);
        println!("  Total directories: {}", self.total_dirs);
        println!("  Total size: {} bytes ({:.2} MB)", 
                 self.total_size, 
                 self.total_size as f64 / 1_048_576.0);
        
        println!("\n  By extension:");
        let mut exts: Vec<_> = self.by_extension.iter().collect();
        exts.sort_by(|a, b| b.1.cmp(a.1));
        
        for (ext, count) in exts {
            println!("    .{}: {}", ext, count);
        }
    }
}
 
fn main() {
    let stats = FileStats::from_pattern("**/*");
    stats.print();
}

Real-World: Find Large Files

use glob::glob;
use std::fs;
 
struct FileSize {
    path: String,
    size: u64,
}
 
fn find_large_files(pattern: &str, min_size_mb: u64) -> Vec<FileSize> {
    let min_size = min_size_mb * 1_048_576; // MB to bytes
    
    let mut large_files: Vec<FileSize> = glob(pattern)
        .unwrap()
        .filter_map(Result::ok)
        .filter_map(|path| {
            fs::metadata(&path).ok().and_then(|meta| {
                if meta.is_file() && meta.len() >= min_size {
                    Some(FileSize {
                        path: path.display().to_string(),
                        size: meta.len(),
                    })
                } else {
                    None
                }
            })
        })
        .collect();
    
    // Sort by size descending
    large_files.sort_by(|a, b| b.size.cmp(&a.size));
    large_files
}
 
fn main() {
    println!("Files larger than 1 MB:");
    let large_files = find_large_files("**/*", 1);
    
    for file in large_files {
        println!("  {} ({:.2} MB)", file.path, file.size as f64 / 1_048_576.0);
    }
}

Real-World: Find Old Files

use glob::glob;
use std::fs;
use std::time::{SystemTime, Duration};
 
struct AgedFile {
    path: String,
    age_days: u64,
}
 
fn find_old_files(pattern: &str, min_age_days: u64) -> Vec<AgedFile> {
    let now = SystemTime::now();
    let min_age = Duration::from_secs(min_age_days * 24 * 60 * 60);
    
    let mut old_files: Vec<AgedFile> = glob(pattern)
        .unwrap()
        .filter_map(Result::ok)
        .filter_map(|path| {
            fs::metadata(&path).ok().and_then(|meta| {
                meta.modified().ok().and_then(|modified| {
                    now.duration_since(modified).ok().and_then(|age| {
                        if age >= min_age {
                            Some(AgedFile {
                                path: path.display().to_string(),
                                age_days: age.as_secs() / (24 * 60 * 60),
                            })
                        } else {
                            None
                        }
                    })
                })
            })
        })
        .collect();
    
    old_files.sort_by(|a, b| b.age_days.cmp(&a.age_days));
    old_files
}
 
fn main() {
    println!("Files older than 30 days:");
    let old_files = find_old_files("**/*", 30);
    
    for file in old_files {
        println!("  {} ({} days old)", file.path, file.age_days);
    }
}

Real-World: Validate Project Structure

use glob::glob;
use std::path::Path;
 
struct ProjectValidator {
    errors: Vec<String>,
    warnings: Vec<String>,
}
 
impl ProjectValidator {
    fn new() -> Self {
        Self {
            errors: Vec::new(),
            warnings: Vec::new(),
        }
    }
    
    fn validate(&mut self) {
        self.check_cargo_toml();
        self.check_src_structure();
        self.check_readme();
        self.check_tests();
    }
    
    fn check_cargo_toml(&mut self) {
        if !Path::new("Cargo.toml").exists() {
            self.errors.push("Missing Cargo.toml".to_string());
        }
    }
    
    fn check_src_structure(&mut self) {
        let main_rs = glob("src/main.rs").unwrap().filter_map(Result::ok).count();
        let lib_rs = glob("src/lib.rs").unwrap().filter_map(Result::ok).count();
        
        if main_rs == 0 && lib_rs == 0 {
            self.errors.push("Missing src/main.rs or src/lib.rs".to_string());
        }
        
        // Check for Rust files in src
        let rust_files = glob("src/**/*.rs").unwrap().filter_map(Result::ok).count();
        if rust_files == 0 {
            self.warnings.push("No Rust files found in src/".to_string());
        }
    }
    
    fn check_readme(&mut self) {
        let readmes = glob("README*").unwrap().filter_map(Result::ok).count();
        if readmes == 0 {
            self.warnings.push("Missing README file".to_string());
        }
    }
    
    fn check_tests(&mut self) {
        let tests = glob("tests/**/*.rs").unwrap().filter_map(Result::ok).count();
        let inline_tests = glob("src/**/*.rs").unwrap()
            .filter_map(Result::ok)
            .filter_map(|path| std::fs::read_to_string(&path).ok())
            .filter(|content| content.contains("#[test]"))
            .count();
        
        if tests == 0 && inline_tests == 0 {
            self.warnings.push("No test files found".to_string());
        }
    }
    
    fn print_results(&self) {
        if !self.errors.is_empty() {
            println!("Errors:");
            for error in &self.errors {
                println!("  ❌ {}", error);
            }
        }
        
        if !self.warnings.is_empty() {
            println!("Warnings:");
            for warning in &self.warnings {
                println!("  ⚠️  {}", warning);
            }
        }
        
        if self.errors.is_empty() && self.warnings.is_empty() {
            println!("✅ Project structure looks good!");
        }
    }
}
 
fn main() {
    let mut validator = ProjectValidator::new();
    validator.validate();
    validator.print_results();
}

Real-World: Find TODO Comments

use glob::glob;
use std::fs;
 
struct TodoItem {
    file: String,
    line: usize,
    content: String,
}
 
fn find_todos(pattern: &str) -> Vec<TodoItem> {
    let mut todos = Vec::new();
    
    for entry in glob(pattern).unwrap().filter_map(Result::ok) {
        if let Ok(content) = fs::read_to_string(&entry) {
            for (line_num, line) in content.lines().enumerate() {
                if line.contains("TODO") || line.contains("FIXME") || line.contains("HACK") {
                    todos.push(TodoItem {
                        file: entry.display().to_string(),
                        line: line_num + 1,
                        content: line.trim().to_string(),
                    });
                }
            }
        }
    }
    
    todos
}
 
fn main() {
    println!("Finding TODOs in Rust files...");
    let todos = find_todos("**/*.rs");
    
    println!("\nFound {} items:\n", todos.len());
    
    for todo in todos {
        println!("{}:{}:", todo.file, todo.line);
        println!("  {}", todo.content);
        println!();
    }
}

Real-World: Backup Files

use glob::glob;
use std::fs;
use std::path::Path;
 
fn backup_files(pattern: &str, backup_dir: &str) -> Vec<String> {
    let mut backed_up = Vec::new();
    
    // Create backup directory
    fs::create_dir_all(backup_dir).ok();
    
    for entry in glob(pattern).unwrap().filter_map(Result::ok) {
        if entry.is_file() {
            if let Some(filename) = entry.file_name() {
                let backup_path = Path::new(backup_dir).join(filename);
                
                // Add timestamp to avoid overwrites
                let timestamp = chrono::Local::now().format("%Y%m%d_%H%M%S");
                let backup_name = format!(
                    "{}_{}",
                    filename.to_string_lossy(),
                    timestamp
                );
                let backup_path = Path::new(backup_dir).join(&backup_name);
                
                if fs::copy(&entry, &backup_path).is_ok() {
                    backed_up.push(format!(
                        "{} -> {}",
                        entry.display(),
                        backup_path.display()
                    ));
                }
            }
        }
    }
    
    backed_up
}
 
fn main() {
    println!("Backing up Rust files...");
    let backed_up = backup_files("**/*.rs", "backups");
    
    println!("Backed up {} files:", backed_up.len());
    for item in backed_up {
        println!("  {}", item);
    }
}

GlobBuilder for Advanced Patterns

use glob::glob;
 
fn main() {
    // Escape special characters
    let pattern = glob::Pattern::escape("file[1].txt");
    println!("Escaped pattern: {}", pattern);
    
    // Use escaped pattern
    for entry in glob(&pattern).unwrap() {
        if let Ok(path) = entry {
            println!("Found: {}", path.display());
        }
    }
    
    // Match pattern against a path
    let pattern = glob::Pattern::new("src/**/*.rs").unwrap();
    
    let test_paths = vec![
        "src/main.rs",
        "src/lib.rs",
        "src/utils/helper.rs",
        "tests/test.rs",
        "Cargo.toml",
    ];
    
    println!("\nMatching against pattern 'src/**/*.rs':");
    for path in test_paths {
        let matches = pattern.matches(path);
        println!("  {}: {}", path, matches);
    }
}

Pattern Matching Directly

use glob::Pattern;
 
fn main() {
    // Compile pattern once for multiple matches
    let pattern = Pattern::new("*.rs").unwrap();
    
    let filenames = vec!["main.rs", "lib.rs", "test.txt", "README.md"];
    
    println!("Files matching *.rs:");
    for filename in filenames {
        if pattern.matches(filename) {
            println!("  {}", filename);
        }
    }
    
    // Match with path
    let pattern = Pattern::new("src/**/*.rs").unwrap();
    
    let paths = vec![
        "src/main.rs",
        "src/lib.rs",
        "src/utils/helper.rs",
        "tests/test.rs",
    ];
    
    println!("\nPaths matching src/**/*.rs:");
    for path in paths {
        if pattern.matches(path) {
            println!("  {}", path);
        }
    }
}

Summary

  • glob(pattern) returns an iterator over matching paths
  • * matches any sequence of characters (except /)
  • ? matches exactly one character
  • ** matches any number of directories recursively
  • [abc] matches any character in the set
  • [a-z] matches characters in a range
  • [!abc] matches characters NOT in the set
  • glob_with(pattern, options) for case-insensitive and other options
  • Use filter_map(Result::ok) to ignore errors
  • Pattern::new() creates a reusable pattern
  • pattern.matches(path) tests a single path
  • Pattern::escape(string) escapes special characters
  • Handle both pattern errors and file access errors
  • Perfect for: build scripts, file processing, project validation