How does nom::bytes::complete::take_until differ from take_until_inclusive for delimiter parsing behavior?
take_until consumes input up to but not including the delimiter, returning the content before the delimiter, while take_until_inclusive consumes both the content and the delimiter, including the delimiter in the returned value. This difference affects how subsequent parsers process the remaining input: after take_until, the delimiter remains in the input stream to be consumed by the next parser, while after take_until_inclusive, the input stream starts after the delimiter. The choice depends on whether you need to separately process the delimiterâfor example, to verify its exact value or use it as a separator in a larger structure.
Basic take_until Behavior
use nom::bytes::complete::take_until;
use nom::IResult;
fn basic_take_until() {
let input = "hello,world";
// take_until returns content before the delimiter
let result: IResult<&str, &str> = take_until(",")(input);
match result {
Ok((remaining, taken)) => {
println!("Taken: {:?}", taken); // "hello"
println!("Remaining: {:?}", remaining); // ",world"
}
Err(e) => println!("Error: {:?}", e),
}
// The delimiter "," is NOT included in the output
// It remains in the remaining input
}take_until leaves the delimiter in the input for subsequent parsers.
Basic take_until_inclusive Behavior
use nom::bytes::complete::take_until_inclusive;
use nom::IResult;
fn basic_take_until_inclusive() {
let input = "hello,world";
// take_until_inclusive returns content AND the delimiter
let result: IResult<&str, &str> = take_until_inclusive(",")(input);
match result {
Ok((remaining, taken)) => {
println!("Taken: {:?}", taken); // "hello,"
println!("Remaining: {:?}", remaining); // "world"
}
Err(e) => println!("Error: {:?}", e),
}
// The delimiter "," IS included in the output
// The remaining input starts after the delimiter
}take_until_inclusive includes the delimiter in the output and consumes it.
Remaining Input Comparison
use nom::bytes::complete::{take_until, take_until_inclusive};
use nom::IResult;
fn remaining_input_comparison() {
let input = "key:value";
// With take_until:
let (_, remaining_after_take) = take_until::<_, _, nom::error::Error<_>>(":")(input).unwrap();
println!("After take_until: {:?}", remaining_after_take);
// ":value" - delimiter still in input
// With take_until_inclusive:
let (_, remaining_after_inclusive) = take_until_inclusive::<_, _, nom::error::Error<_>>(":")(input).unwrap();
println!("After take_until_inclusive: {:?}", remaining_after_inclusive);
// "value" - delimiter consumed
// Subsequent parsing differs:
// After take_until, you need to consume the delimiter separately
// After take_until_inclusive, the delimiter is already consumed
}The key difference is whether the delimiter remains in the input stream.
Consuming the Delimiter After take_until
use nom::bytes::complete::take_until;
use nom::character::complete::char;
use nom::sequence::tuple;
use nom::IResult;
fn consuming_delimiter() {
let input = "name:value";
// Common pattern: take_until followed by consuming the delimiter
let result: IResult<&str, (&str, &str)> = tuple((
take_until(":"),
char(':'), // Consume the delimiter
))(input);
match result {
Ok((remaining, (content, _delimiter))) => {
println!("Content: {:?}", content); // "name"
println!("Remaining: {:?}", remaining); // "value"
}
Err(e) => println!("Error: {:?}", e),
}
// This pattern is equivalent to take_until_inclusive
// but gives you control over delimiter handling
}After take_until, explicitly consume the delimiter with another parser.
Parsing Key-Value Pairs
use nom::bytes::complete::{take_until, take_until_inclusive};
use nom::character::complete::char;
use nom::sequence::{preceded, tuple};
use nom::IResult;
fn key_value_with_take_until() {
// With take_until, we parse key, then consume ':', then take value
fn parse_kv(input: &str) -> IResult<&str, (&str, &str)> {
let (remaining, key) = take_until(":")(input)?;
let (remaining, _) = char(':')(remaining)?; // Consume delimiter
let (remaining, value) = take_until("\n")(remaining)?;
Ok((remaining, (key, value)))
}
let input = "name:John\n";
let (_, (key, value)) = parse_kv(input).unwrap();
println!("Key: {:?}, Value: {:?}", key, value);
// Key: "name", Value: "John"
}
fn key_value_with_take_until_inclusive() {
// With take_until_inclusive, the delimiter is consumed automatically
fn parse_kv_inclusive(input: &str) -> IResult<&str, (&str, &str)> {
let (remaining, key_and_delim) = take_until_inclusive(":")(input)?;
// key_and_delim is "name:" - need to strip the delimiter
let key = &key_and_delim[..key_and_delim.len() - 1];
let (remaining, value_and_delim) = take_until_inclusive("\n")(remaining)?;
let value = &value_and_delim[..value_and_delim.len() - 1];
Ok((remaining, (key, value)))
}
}For key-value pairs, take_until with explicit delimiter consumption is cleaner.
When Delimiter Value Matters
use nom::bytes::complete::take_until;
use nom::character::complete::char;
use nom::sequence::tuple;
use nom::IResult;
fn when_delimiter_matters() {
// Use take_until when you need to verify the delimiter
let input = "section1\n---\nsection2";
fn parse_section(input: &str) -> IResult<&str, (&str, &str)> {
let (remaining, content) = take_until("\n---\n")(input)?;
let (remaining, _) = tuple((
char('\n'), char('-'), char('-'), char('-'), char('\n')
))(remaining)?;
// We've verified the exact delimiter format
Ok((remaining, (content, "---")))
}
// take_until is useful when:
// 1. You need to validate the delimiter
// 2. The delimiter varies and you need to see it
// 3. You want to preserve delimiter for later processing
}Use take_until when the delimiter needs verification or processing.
When Delimiter is Just a Separator
use nom::bytes::complete::take_until_inclusive;
use nom::IResult;
fn when_delimiter_is_separator() {
// Use take_until_inclusive when delimiter is purely a boundary
let input = "first paragraph\n\nsecond paragraph\n\nthird";
fn parse_paragraph(input: &str) -> IResult<&str, &str> {
// The "\n\n" is just a separator, we don't care about it
take_until_inclusive("\n\n")(input)
}
let mut remaining = input;
while !remaining.is_empty() {
match take_until_inclusive::<_, _, nom::error::Error<_>>("\n\n")(remaining) {
Ok((rem, paragraph)) => {
// paragraph includes the delimiter, strip it
let content = paragraph.strip_suffix("\n\n").unwrap_or(paragraph);
println!("Paragraph: {:?}", content);
remaining = rem;
}
Err(_) => {
// Last paragraph, no delimiter
println!("Last: {:?}", remaining);
break;
}
}
}
}Use take_until_inclusive when the delimiter is just a separator to skip.
Parsing Until Multiple Delimiters
use nom::bytes::complete::take_until;
use nom::branch::alt;
use nom::character::complete::char;
use nom::IResult;
fn multiple_delimiters() {
let input = "value1, value2; value3";
// With take_until, we can check which delimiter was found
fn parse_item(input: &str) -> IResult<&str, (&str, char)> {
let (remaining, content) = take_until(",;")(input)?;
// remaining starts with the delimiter that was found
let delim = remaining.chars().next().unwrap();
let remaining = &remaining[1..]; // Skip delimiter
Ok((remaining, (content, delim)))
}
// take_until with multiple character patterns
// searches for ANY of the characters in the pattern
let input = "abc;def";
let (remaining, content) = take_until(",;")(input).unwrap();
println!("Content: {:?}, Remaining: {:?}", content, remaining);
// Content: "abc", Remaining: ";def"
}take_until can search for multiple delimiter characters simultaneously.
Handling Missing Delimiters
use nom::bytes::complete::{take_until, take_until_inclusive};
use nom::error::{Error, ErrorKind};
use nom::IResult;
fn missing_delimiter() {
// Both functions fail if delimiter is not found
let input = "no delimiter here";
let result: IResult<&str, &str> = take_until(":")(input);
match result {
Err(nom::Err::Error(Error { input: i, code: ErrorKind::TakeUntil })) => {
println!("take_until failed: delimiter not found");
}
_ => {}
}
let result: IResult<&str, &str> = take_until_inclusive(":")(input);
match result {
Err(nom::Err::Error(Error { input: i, code: ErrorKind::TakeUntilInclusive })) => {
println!("take_until_inclusive failed: delimiter not found");
}
_ => {}
}
// Both return an error of kind TakeUntil or TakeUntilInclusive
// Handle with alt or optional for optional delimiters
}Both functions fail if the delimiter isn't found in the input.
Nested Structure Parsing
use nom::bytes::complete::take_until;
use nom::character::complete::char;
use nom::sequence::delimited;
use nom::IResult;
fn nested_structures() {
// take_until works well with delimited combinator
fn parse_braced_content(input: &str) -> IResult<&str, &str> {
// {content} - but content can't contain '}'
delimited(char('{'), take_until("}"), char('}'))(input)
}
let input = "{hello}";
let (_, content) = parse_braced_content(input).unwrap();
println!("Content: {:?}", content); // "hello"
// This pattern is clean because:
// 1. take_until gets content up to '}'
// 2. The '}' is consumed by the closing parser
// 3. No need to strip delimiter from content
}
fn nested_with_inclusive() {
// take_until_inclusive requires stripping the delimiter
fn parse_braced_inclusive(input: &str) -> IResult<&str, &str> {
let (remaining, content_with_delim) = take_until_inclusive("}")(input)?;
let content = &content_with_delim[..content_with_delim.len() - 1];
Ok((remaining, content))
}
// More cumbersome when you want to handle delimiters separately
}take_until works cleanly with the delimited combinator pattern.
Streaming vs Complete Parsers
use nom::bytes::complete::{take_until, take_until_inclusive};
use nom::bytes::streaming::{take_until as streaming_take_until, take_until_inclusive as streaming_take_until_inclusive};
use nom::IResult;
fn streaming_vs_complete() {
// Complete parsers (used above):
// - Assume all input is available
// - Fail if delimiter not found (no more input coming)
// Streaming parsers:
// - Assume more input may arrive
// - Return Incomplete if delimiter might be in future input
fn complete_parse(input: &[u8]) -> IResult<&[u8], &[u8]> {
take_until(b":")(input)
}
fn streaming_parse(input: &[u8]) -> IResult<&[u8], &[u8]> {
streaming_take_until(b":")(input)
}
// With partial input:
let partial: &[u8] = b"hello"; // No delimiter, might be incomplete
// Complete parser fails (delimiter not found)
let result = complete_parse(partial);
assert!(result.is_err());
// Streaming parser returns Incomplete (need more data)
let result = streaming_parse(partial);
match result {
Err(nom::Err::Incomplete(_)) => println!("Need more data"),
_ => {}
}
}Streaming variants return Incomplete when the delimiter might appear in future input.
Binary Protocol Parsing
use nom::bytes::complete::take_until;
use nom::number::complete::be_u32;
use nom::sequence::tuple;
use nom::IResult;
fn binary_protocol() {
// Binary protocols often use length-prefixed or delimiter-based messages
// Delimiter-based:
fn parse_delimited_message(input: &[u8]) -> IResult<&[u8], &[u8]> {
take_until(b"\x00")(input) // Null-terminated string
}
let message = b"hello\x00world";
let (remaining, content) = parse_delimited_message(message).unwrap();
assert_eq!(content, b"hello");
assert_eq!(remaining, b"\x00world"); // Delimiter still there
// After parsing, consume the delimiter:
let (remaining, _) = nom::bytes::complete::tag(b"\x00")(remaining).unwrap();
assert_eq!(remaining, b"world");
// Or use take_until_inclusive to consume delimiter:
fn parse_inclusive_message(input: &[u8]) -> IResult<&[u8], &[u8]> {
take_until_inclusive(b"\x00")(input)
}
let (remaining, content) = parse_inclusive_message(b"hello\x00world").unwrap();
assert_eq!(content, b"hello\x00"); // Includes delimiter
assert_eq!(remaining, b"world");
}For binary protocols, choose based on whether you need the delimiter in output.
Performance Considerations
use nom::bytes::complete::{take_until, take_until_inclusive};
use nom::IResult;
fn performance_comparison() {
let input = "a".repeat(1000) + ",rest";
// Both scan the input for the delimiter
// Performance is similar - O(n) scan
// take_until_inclusive returns slightly more data
// (includes the delimiter in output)
// For large inputs, the difference is minimal
// Choose based on semantics, not performance
// If you always strip the delimiter from take_until_inclusive:
// - take_until + explicit delimiter is cleaner
// - No string slicing needed
// If you want the delimiter in the output:
// - take_until_inclusive is simpler
// - One parser call instead of two
}Performance is similar; choose based on semantic clarity.
Error Handling Patterns
use nom::bytes::complete::take_until;
use nom::character::complete::char;
use nom::sequence::preceded;
use nom::combinator::map_res;
use nom::IResult;
fn error_handling() {
// Common pattern: take_until with meaningful error context
fn parse_field(input: &str) -> IResult<&str, &str> {
take_until(":")(input)
.map_err(|e| {
// Add context about what we were looking for
e.map(|err| {
nom::error::VerboseError {
errors: vec![(err.input, nom::error::VerboseErrorKind::Context("expected field:name format"))]
}
})
})
}
// With take_until, errors are clear: delimiter not found
// With take_until_inclusive, errors are the same kind
// Both return ErrorKind::TakeUntil or ErrorKind::TakeUntilInclusive
}Both use the same error kind; error handling patterns are similar.
Common Patterns Summary
use nom::bytes::complete::{take_until, take_until_inclusive};
use nom::character::complete::char;
use nom::sequence::{tuple, preceded};
fn pattern_summary() {
// Pattern 1: Key-value (take_until preferred)
// key:value\n
// Use take_until + char(':') to get clean key
// Then take_until for value
// Pattern 2: Line-based (take_until preferred)
// line1\nline2\n
// Use take_until("\n") + char('\n')
// Clean lines without newline
// Pattern 3: Paragraph (take_until_inclusive OK)
// paragraph\n\nparagraph
// If you're going to strip delimiter anyway
// Can use take_until_inclusive for simplicity
// Pattern 4: Delimited content (take_until + delimited)
// {content}
// Use delimited(char('{'), take_until("}"), char('}'))
// Clean content extraction
// Pattern 5: Consuming delimiter with content (take_until_inclusive)
// If you need delimiter included in output
// Or if delimiter is just a boundary to consume
}Choose based on whether you need delimiter processing or clean content.
Synthesis
Quick reference:
use nom::bytes::complete::{take_until, take_until_inclusive};
use nom::IResult;
fn quick_reference() {
let input = "hello,world";
// take_until: content before delimiter, delimiter in remaining
let (remaining, content) = take_until(",")(input).unwrap();
// content = "hello"
// remaining = ",world"
// take_until_inclusive: content including delimiter, delimiter consumed
let (remaining, content) = take_until_inclusive(",")(input).unwrap();
// content = "hello,"
// remaining = "world"
// When to use take_until:
// - Need to verify delimiter format
// - Using delimited() combinator
// - Want clean content without delimiter
// - Delimiter varies and matters
// When to use take_until_inclusive:
// - Delimiter is just a separator
// - Want one parser instead of two
// - Don't need to examine delimiter
// - Boundary to skip over
// Key difference: delimiter in remaining vs. consumed
}Key insight: The choice between take_until and take_until_inclusive is fundamentally about delimiter ownership. After take_until, the delimiter remains in the input streamâyou own it as a parser and decide what to do with it. This is ideal when the delimiter has semantic meaning (like distinguishing between : and = in key-value pairs) or when you want clean content extraction. After take_until_inclusive, the delimiter is consumedâthe caller doesn't need to know about it. This is ideal when the delimiter is purely structural (like --- section separators) and you just want to move past it. The take_until approach composes better with other combinators like delimited and preceded, making it the more common choice for structured parsing. Reserve take_until_inclusive for cases where you explicitly want the delimiter in the output or when it simplifies a larger parsing pattern.
