Loading page…
Rust walkthroughs
Loading page…
regex::Regex::new and regex::RegexBuilder::new for configuring regex patterns?regex::Regex::new is a simple constructor that compiles a pattern string with default settings, while regex::RegexBuilder provides a builder pattern for configuring regex compilation options like case insensitivity, multi-line mode, size limits, and more. The builder pattern allows fine-grained control over regex behavior before compilation, whereas Regex::new always uses defaults. Key configurable options include case sensitivity, multi-line matching, dot-matches-newline, size limits (to prevent denial-of-service from pathological patterns), and Unicode handling. Use Regex::new for simple patterns with default behavior; use RegexBuilder when you need to customize compilation settings or enforce limits on pattern complexity.
use regex::Regex;
fn main() -> Result<(), regex::Error> {
// Regex::new uses default settings
let re = Regex::new(r"\d{4}-\d{2}-\d{2}")?;
// Matches dates like 2024-01-15
assert!(re.is_match("2024-01-15"));
// Case sensitive by default
let re = Regex::new(r"hello")?;
assert!(re.is_match("hello"));
assert!(!re.is_match("HELLO")); // Doesn't match due to case
Ok(())
}Regex::new compiles with all default settings.
use regex::RegexBuilder;
fn main() -> Result<(), regex::Error> {
// RegexBuilder allows configuration before compilation
let re = RegexBuilder::new(r"hello")
.case_insensitive(true)
.build()?;
// Now case insensitive
assert!(re.is_match("hello"));
assert!(re.is_match("HELLO"));
assert!(re.is_match("Hello"));
Ok(())
}RegexBuilder provides methods to configure options before compiling.
use regex::RegexBuilder;
fn main() -> Result<(), regex::Error> {
// Default (case sensitive)
let re1 = RegexBuilder::new(r"hello")
.build()?;
assert!(!re1.is_match("HELLO"));
// Case insensitive via builder
let re2 = RegexBuilder::new(r"hello")
.case_insensitive(true)
.build()?;
assert!(re2.is_match("HELLO"));
// Case insensitive via inline flag (works with Regex::new too)
let re3 = regex::Regex::new(r"(?i)hello")?;
assert!(re3.is_match("HELLO"));
Ok(())
}Case insensitivity can be set via builder or inline flag.
use regex::{Regex, RegexBuilder};
fn main() -> Result<(), regex::Error> {
let text = "first line\nsecond line\nthird line";
// Default: ^ and $ match start/end of string only
let re1 = Regex::new(r"^second")?;
assert!(!re1.is_match(text)); // "second" is not at start of string
// Multi-line: ^ and $ match start/end of each line
let re2 = RegexBuilder::new(r"^second")
.multi_line(true)
.build()?;
assert!(re2.is_match(text)); // Matches "second" at start of line
// Equivalent inline flag
let re3 = Regex::new(r"(?m)^second")?;
assert!(re3.is_match(text));
Ok(())
}Multi-line mode changes how ^ and $ anchors work.
use regex::{Regex, RegexBuilder};
fn main() -> Result<(), regex::Error> {
let text = "hello\nworld";
// Default: . does NOT match newline
let re1 = Regex::new(r"hello.world")?;
assert!(!re1.is_match(text)); // . doesn't match \n
// Dot-matches-newline mode
let re2 = RegexBuilder::new(r"hello.world")
.dot_matches_new_line(true)
.build()?;
assert!(re2.is_match(text)); // . now matches \n
// Equivalent inline flag
let re3 = Regex::new(r"(?s)hello.world")?;
assert!(re3.is_match(text));
Ok(())
}Configure whether . matches newline characters.
use regex::RegexBuilder;
fn main() -> Result<(), regex::Error> {
// RegexBuilder allows setting size limits
// This prevents denial-of-service from complex patterns
// Limit on compiled regex size
let re = RegexBuilder::new(r"\d+")
.size_limit(1024) // 1KB max compiled size
.build()?;
// More complex pattern
let complex = RegexBuilder::new(r"(a+)+b")
.size_limit(100) // Very small limit
.build();
// Some complex patterns might exceed the limit
// and return an error
Ok(())
}Size limits prevent excessive memory usage from complex patterns.
use regex::RegexBuilder;
fn main() -> Result<(), regex::Error> {
// The nest_limit controls recursion depth
// Prevents exponential backtracking on pathological inputs
let re = RegexBuilder::new(r"(a+)+$")
.nest_limit(100) // Limit nesting depth
.build()?;
// Without this limit, matching "aaaaaaaaaaaaaaa!"
// against "(a+)+$" could take exponential time
// The regex crate has safe defaults, but you can
// tighten them further for untrusted patterns
Ok(())
}Nest limits protect against catastrophic backtracking.
use regex::{Regex, RegexBuilder};
fn main() -> Result<(), regex::Error> {
// Default: Unicode-aware matching
let re1 = Regex::new(r"\w+")?;
assert!(re1.is_match("日本語")); // Matches Unicode word chars
// Disable Unicode for ASCII-only matching
let re2 = RegexBuilder::new(r"\w+")
.unicode(false)
.build()?;
assert!(!re2.is_match("日本語")); // Only ASCII word chars
assert!(re2.is_match("hello")); // ASCII still works
// Equivalent inline flag
let re3 = Regex::new(r"(?-u)\w+")?;
assert!(!re3.is_match("日本語"));
Ok(())
}Configure whether patterns are Unicode-aware.
use regex::{Regex, RegexBuilder};
fn main() -> Result<(), regex::Error> {
// By default, octal escapes like \123 are parsed
// But they can conflict with backreferences
// With octal disabled, \1 is always a backreference
let re = RegexBuilder::new(r"(.)\1")
.octal(false)
.build()?;
// Without this setting, \1 could be interpreted as
// either a backreference or an octal escape
Ok(())
}Control whether octal escapes are parsed.
use regex::{Regex, RegexBuilder};
fn main() -> Result<(), regex::Error> {
let text = "aaa";
// Default: greedy matching
let re1 = Regex::new(r"a+)?;
let match1 = re1.find(text).unwrap();
assert_eq!(match1.as_str(), "a"); // Non-greedy
// Swap greed: non-greedy becomes greedy and vice versa
let re2 = RegexBuilder::new(r"a+")
.swap_greed(true)
.build()?;
let match2 = re2.find(text).unwrap();
assert_eq!(match2.as_str(), "a"); // Now non-greedy behavior
Ok(())
}Invert greedy/non-greedy quantifier behavior.
use regex::{Regex, RegexBuilder};
fn main() -> Result<(), regex::Error> {
// Verbose mode: ignore whitespace and allow comments
let re = RegexBuilder::new(r"
\d{4} # year
- # separator
\d{2} # month
- # separator
\d{2} # day
")
.ignore_whitespace(true)
.build()?;
assert!(re.is_match("2024-01-15"));
// Equivalent inline flag
let re2 = Regex::new(r"(?x)\d{4} - \d{2} - \d{2}")?;
Ok(())
}Verbose mode allows comments and ignores whitespace.
use regex::{Regex, RegexBuilder};
fn main() -> Result<(), regex::Error> {
let text = "line1\r\nline2";
// Default: \r\n is two characters, $ doesn't match before \r
let re1 = Regex::new(r"line1$")?;
assert!(!re1.is_match(text));
// CRLF mode: treats \r\n as line ending
let re2 = RegexBuilder::new(r"line1$")
.crlf(true)
.build()?;
// $ matches before \r\n
assert!(re2.is_match(text));
Ok(())
}CRLF mode changes line ending handling.
use regex::RegexBuilder;
fn main() -> Result<(), regex::Error> {
// Custom line terminator for multi-line mode
let re = RegexBuilder::new(r"^line$")
.multi_line(true)
.line_terminator('\n') // Explicitly set
.build()?;
// By default, multi-line mode uses \n as line terminator
// You can customize this for different line ending conventions
Ok(())
}Customize the line terminator character.
use regex::{Regex, bytes::Regex as BytesRegex};
fn main() -> Result<(), regex::Error> {
// Regular regex works on &str
let re1 = Regex::new(r"\d+")?;
assert!(re1.is_match("12345"));
// Bytes regex works on &[u8]
let re2 = BytesRegex::new(r"\d+")?;
assert!(re2.is_match(b"12345")); // Note: b"..." for bytes
// Useful for binary data or non-UTF8 content
let binary_data: &[u8] = &[0x01, 0x02, b'1', b'2', b'3', 0x03];
assert!(re2.is_match(binary_data));
Ok(())
}Use bytes regex for binary data.
use regex::RegexBuilder;
fn main() {
// Both Regex::new and RegexBuilder::build return Result
let result = RegexBuilder::new(r"(unclosed")
.build();
match result {
Ok(re) => println!("Compiled: {:?}", re),
Err(e) => println!("Error: {}", e),
}
// RegexBuilder validates during build, not during configuration
// The pattern is only compiled when .build() is called
// Regex::new returns the same error type
let result2 = regex::Regex::new(r"(unclosed");
assert!(result2.is_err());
}Both methods return Result<Regex, Error>.
use regex::RegexBuilder;
fn main() -> Result<(), regex::Error> {
// RegexBuilder can configure performance-related settings
// Smaller size_limit = less memory, but may fail
let re1 = RegexBuilder::new(r"\w+")
.size_limit(1024)
.build()?;
// Smaller nest_limit = faster but more limited
let re2 = RegexBuilder::new(r"(a|b)+")
.nest_limit(50)
.build()?;
// Disabling Unicode can be faster for ASCII-only content
let re3 = RegexBuilder::new(r"\w+")
.unicode(false)
.build()?;
// Pre-compile regexes used frequently
// Both Regex::new and RegexBuilder::build() compile once
// The resulting Regex can be used many times efficiently
Ok(())
}Configure limits and features that affect performance.
use regex::{Regex, RegexBuilder};
fn main() -> Result<(), regex::Error> {
// Use Regex::new for simple patterns with defaults
let simple = Regex::new(r"\d{4}-\d{2}-\d{2}")?;
// Use RegexBuilder when you need:
// 1. Case-insensitive matching
let case_insensitive = RegexBuilder::new(r"hello")
.case_insensitive(true)
.build()?;
// 2. Multi-line mode
let multiline = RegexBuilder::new(r"^line")
.multi_line(true)
.build()?;
// 3. Security limits (untrusted patterns)
let safe = RegexBuilder::new(user_provided_pattern)
.size_limit(1024)
.nest_limit(100)
.build()?;
// 4. ASCII-only matching for performance
let ascii = RegexBuilder::new(r"\w+")
.unicode(false)
.build()?;
// 5. Verbose patterns with comments
let verbose = RegexBuilder::new(r"
\d{4} # year
-\d{2} # month
-\d{2} # day
")
.ignore_whitespace(true)
.build()?;
Ok(())
}Choose based on whether you need configuration.
use regex::{Regex, RegexBuilder};
fn main() -> Result<(), regex::Error> {
// Some settings can be set via inline flags
// These work with Regex::new
let re1 = Regex::new(r"(?i)hello")?; // case insensitive
let re2 = Regex::new(r"(?m)^line")?; // multi-line
let re3 = Regex::new(r"(?s)hello.world")?; // dot matches newline
let re4 = Regex::new(r"(?x)\d+ # number")?; // verbose
let re5 = Regex::new(r"(?-u)\w+")?; // ASCII-only
// RegexBuilder settings override inline flags for initial setting
// But inline flags in the pattern can still change behavior locally
let re6 = RegexBuilder::new(r"(?i)hello")
.case_insensitive(false) // Pattern still uses (?i)
.build()?;
// Inline flags in the pattern take precedence where they appear
Ok(())
}Inline flags work with both approaches; builder settings provide defaults.
| Aspect | Regex::new | RegexBuilder |
|--------|--------------|----------------|
| Syntax | Simple | Builder pattern |
| Defaults | All defaults | Configurable |
| Case sensitivity | Case sensitive | Configurable |
| Multi-line | Disabled | Configurable |
| Dot-newline | Disabled | Configurable |
| Unicode | Enabled | Configurable |
| Size limit | Default (large) | Configurable |
| Nest limit | Default (large) | Configurable |
| Use case | Simple patterns | Custom settings |
Regex::new and RegexBuilder offer two ways to compile regex patterns, differing in flexibility:
Regex::new(pattern) is the simple, one-line approach. Use it when:
RegexBuilder::new(pattern).option(value).build() provides granular control. Use it when:
Key insight: Most settings available through RegexBuilder can also be set via inline flags like (?i), (?m), (?s), etc. The builder approach is preferable when:
Security consideration: When accepting regex patterns from untrusted sources, always use RegexBuilder with strict size_limit and nest_limit values. The regex crate's defaults are reasonable for trusted patterns but may allow resource exhaustion with adversarial input.