How does `nom::bytes::complete::take_until` differ from `take_until_inclusive` for delimiter parsing behavior?

take_until consumes input up to but not including the delimiter, returning the content before the delimiter, while take_until_inclusive consumes both the content and the delimiter, including the delimiter in the returned value. This difference affects how subsequent parsers process the remaining input: after take_until, the delimiter remains in the input stream to be consumed by the next parser, while after take_until_inclusive, the input stream starts after the delimiter. The choice depends on whether you need to separately process the delimiter—for example, to verify its exact value or use it as a separator in a larger structure.

Basic take_until Behavior

use nom::bytes::complete::take_until;
use nom::IResult;
 
fn basic_take_until() {
    let input = "hello,world";
    
    // take_until returns content before the delimiter
    let result: IResult<&str, &str> = take_until(",")(input);
    
    match result {
        Ok((remaining, taken)) => {
            println!("Taken: {:?}", taken);      // "hello"
            println!("Remaining: {:?}", remaining); // ",world"
        }
        Err(e) => println!("Error: {:?}", e),
    }
    
    // The delimiter "," is NOT included in the output
    // It remains in the remaining input
}

take_until leaves the delimiter in the input for subsequent parsers.

Basic take_until_inclusive Behavior

use nom::bytes::complete::take_until_inclusive;
use nom::IResult;
 
fn basic_take_until_inclusive() {
    let input = "hello,world";
    
    // take_until_inclusive returns content AND the delimiter
    let result: IResult<&str, &str> = take_until_inclusive(",")(input);
    
    match result {
        Ok((remaining, taken)) => {
            println!("Taken: {:?}", taken);      // "hello,"
            println!("Remaining: {:?}", remaining); // "world"
        }
        Err(e) => println!("Error: {:?}", e),
    }
    
    // The delimiter "," IS included in the output
    // The remaining input starts after the delimiter
}

take_until_inclusive includes the delimiter in the output and consumes it.

Remaining Input Comparison

use nom::bytes::complete::{take_until, take_until_inclusive};
use nom::IResult;
 
fn remaining_input_comparison() {
    let input = "key:value";
    
    // With take_until:
    let (_, remaining_after_take) = take_until::<_, _, nom::error::Error<_>>(":")(input).unwrap();
    println!("After take_until: {:?}", remaining_after_take);
    // ":value" - delimiter still in input
    
    // With take_until_inclusive:
    let (_, remaining_after_inclusive) = take_until_inclusive::<_, _, nom::error::Error<_>>(":")(input).unwrap();
    println!("After take_until_inclusive: {:?}", remaining_after_inclusive);
    // "value" - delimiter consumed
    
    // Subsequent parsing differs:
    // After take_until, you need to consume the delimiter separately
    // After take_until_inclusive, the delimiter is already consumed
}

The key difference is whether the delimiter remains in the input stream.

Consuming the Delimiter After take_until

use nom::bytes::complete::take_until;
use nom::character::complete::char;
use nom::sequence::tuple;
use nom::IResult;
 
fn consuming_delimiter() {
    let input = "name:value";
    
    // Common pattern: take_until followed by consuming the delimiter
    let result: IResult<&str, (&str, &str)> = tuple((
        take_until(":"),
        char(':'),  // Consume the delimiter
    ))(input);
    
    match result {
        Ok((remaining, (content, _delimiter))) => {
            println!("Content: {:?}", content);    // "name"
            println!("Remaining: {:?}", remaining); // "value"
        }
        Err(e) => println!("Error: {:?}", e),
    }
    
    // This pattern is equivalent to take_until_inclusive
    // but gives you control over delimiter handling
}

After take_until, explicitly consume the delimiter with another parser.

Parsing Key-Value Pairs

use nom::bytes::complete::{take_until, take_until_inclusive};
use nom::character::complete::char;
use nom::sequence::{preceded, tuple};
use nom::IResult;
 
fn key_value_with_take_until() {
    // With take_until, we parse key, then consume ':', then take value
    fn parse_kv(input: &str) -> IResult<&str, (&str, &str)> {
        let (remaining, key) = take_until(":")(input)?;
        let (remaining, _) = char(':')(remaining)?;  // Consume delimiter
        let (remaining, value) = take_until("\n")(remaining)?;
        Ok((remaining, (key, value)))
    }
    
    let input = "name:John\n";
    let (_, (key, value)) = parse_kv(input).unwrap();
    println!("Key: {:?}, Value: {:?}", key, value);
    // Key: "name", Value: "John"
}
 
fn key_value_with_take_until_inclusive() {
    // With take_until_inclusive, the delimiter is consumed automatically
    fn parse_kv_inclusive(input: &str) -> IResult<&str, (&str, &str)> {
        let (remaining, key_and_delim) = take_until_inclusive(":")(input)?;
        // key_and_delim is "name:" - need to strip the delimiter
        let key = &key_and_delim[..key_and_delim.len() - 1];
        
        let (remaining, value_and_delim) = take_until_inclusive("\n")(remaining)?;
        let value = &value_and_delim[..value_and_delim.len() - 1];
        
        Ok((remaining, (key, value)))
    }
}

For key-value pairs, take_until with explicit delimiter consumption is cleaner.

When Delimiter Value Matters

use nom::bytes::complete::take_until;
use nom::character::complete::char;
use nom::sequence::tuple;
use nom::IResult;
 
fn when_delimiter_matters() {
    // Use take_until when you need to verify the delimiter
    let input = "section1\n---\nsection2";
    
    fn parse_section(input: &str) -> IResult<&str, (&str, &str)> {
        let (remaining, content) = take_until("\n---\n")(input)?;
        let (remaining, _) = tuple((
            char('\n'), char('-'), char('-'), char('-'), char('\n')
        ))(remaining)?;
        // We've verified the exact delimiter format
        Ok((remaining, (content, "---")))
    }
    
    // take_until is useful when:
    // 1. You need to validate the delimiter
    // 2. The delimiter varies and you need to see it
    // 3. You want to preserve delimiter for later processing
}

Use take_until when the delimiter needs verification or processing.

When Delimiter is Just a Separator

use nom::bytes::complete::take_until_inclusive;
use nom::IResult;
 
fn when_delimiter_is_separator() {
    // Use take_until_inclusive when delimiter is purely a boundary
    let input = "first paragraph\n\nsecond paragraph\n\nthird";
    
    fn parse_paragraph(input: &str) -> IResult<&str, &str> {
        // The "\n\n" is just a separator, we don't care about it
        take_until_inclusive("\n\n")(input)
    }
    
    let mut remaining = input;
    while !remaining.is_empty() {
        match take_until_inclusive::<_, _, nom::error::Error<_>>("\n\n")(remaining) {
            Ok((rem, paragraph)) => {
                // paragraph includes the delimiter, strip it
                let content = paragraph.strip_suffix("\n\n").unwrap_or(paragraph);
                println!("Paragraph: {:?}", content);
                remaining = rem;
            }
            Err(_) => {
                // Last paragraph, no delimiter
                println!("Last: {:?}", remaining);
                break;
            }
        }
    }
}

Use take_until_inclusive when the delimiter is just a separator to skip.

Parsing Until Multiple Delimiters

use nom::bytes::complete::take_until;
use nom::branch::alt;
use nom::character::complete::char;
use nom::IResult;
 
fn multiple_delimiters() {
    let input = "value1, value2; value3";
    
    // With take_until, we can check which delimiter was found
    fn parse_item(input: &str) -> IResult<&str, (&str, char)> {
        let (remaining, content) = take_until(",;")(input)?;
        // remaining starts with the delimiter that was found
        
        let delim = remaining.chars().next().unwrap();
        let remaining = &remaining[1..];  // Skip delimiter
        
        Ok((remaining, (content, delim)))
    }
    
    // take_until with multiple character patterns
    // searches for ANY of the characters in the pattern
    let input = "abc;def";
    let (remaining, content) = take_until(",;")(input).unwrap();
    println!("Content: {:?}, Remaining: {:?}", content, remaining);
    // Content: "abc", Remaining: ";def"
}

take_until can search for multiple delimiter characters simultaneously.

Handling Missing Delimiters

use nom::bytes::complete::{take_until, take_until_inclusive};
use nom::error::{Error, ErrorKind};
use nom::IResult;
 
fn missing_delimiter() {
    // Both functions fail if delimiter is not found
    let input = "no delimiter here";
    
    let result: IResult<&str, &str> = take_until(":")(input);
    match result {
        Err(nom::Err::Error(Error { input: i, code: ErrorKind::TakeUntil })) => {
            println!("take_until failed: delimiter not found");
        }
        _ => {}
    }
    
    let result: IResult<&str, &str> = take_until_inclusive(":")(input);
    match result {
        Err(nom::Err::Error(Error { input: i, code: ErrorKind::TakeUntilInclusive })) => {
            println!("take_until_inclusive failed: delimiter not found");
        }
        _ => {}
    }
    
    // Both return an error of kind TakeUntil or TakeUntilInclusive
    // Handle with alt or optional for optional delimiters
}

Both functions fail if the delimiter isn't found in the input.

Nested Structure Parsing

use nom::bytes::complete::take_until;
use nom::character::complete::char;
use nom::sequence::delimited;
use nom::IResult;
 
fn nested_structures() {
    // take_until works well with delimited combinator
    fn parse_braced_content(input: &str) -> IResult<&str, &str> {
        // {content} - but content can't contain '}'
        delimited(char('{'), take_until("}"), char('}'))(input)
    }
    
    let input = "{hello}";
    let (_, content) = parse_braced_content(input).unwrap();
    println!("Content: {:?}", content); // "hello"
    
    // This pattern is clean because:
    // 1. take_until gets content up to '}'
    // 2. The '}' is consumed by the closing parser
    // 3. No need to strip delimiter from content
}
 
fn nested_with_inclusive() {
    // take_until_inclusive requires stripping the delimiter
    fn parse_braced_inclusive(input: &str) -> IResult<&str, &str> {
        let (remaining, content_with_delim) = take_until_inclusive("}")(input)?;
        let content = &content_with_delim[..content_with_delim.len() - 1];
        Ok((remaining, content))
    }
    
    // More cumbersome when you want to handle delimiters separately
}

take_until works cleanly with the delimited combinator pattern.

Streaming vs Complete Parsers

use nom::bytes::complete::{take_until, take_until_inclusive};
use nom::bytes::streaming::{take_until as streaming_take_until, take_until_inclusive as streaming_take_until_inclusive};
use nom::IResult;
 
fn streaming_vs_complete() {
    // Complete parsers (used above):
    // - Assume all input is available
    // - Fail if delimiter not found (no more input coming)
    
    // Streaming parsers:
    // - Assume more input may arrive
    // - Return Incomplete if delimiter might be in future input
    
    fn complete_parse(input: &[u8]) -> IResult<&[u8], &[u8]> {
        take_until(b":")(input)
    }
    
    fn streaming_parse(input: &[u8]) -> IResult<&[u8], &[u8]> {
        streaming_take_until(b":")(input)
    }
    
    // With partial input:
    let partial: &[u8] = b"hello";  // No delimiter, might be incomplete
    
    // Complete parser fails (delimiter not found)
    let result = complete_parse(partial);
    assert!(result.is_err());
    
    // Streaming parser returns Incomplete (need more data)
    let result = streaming_parse(partial);
    match result {
        Err(nom::Err::Incomplete(_)) => println!("Need more data"),
        _ => {}
    }
}

Streaming variants return Incomplete when the delimiter might appear in future input.

Binary Protocol Parsing

use nom::bytes::complete::take_until;
use nom::number::complete::be_u32;
use nom::sequence::tuple;
use nom::IResult;
 
fn binary_protocol() {
    // Binary protocols often use length-prefixed or delimiter-based messages
    
    // Delimiter-based:
    fn parse_delimited_message(input: &[u8]) -> IResult<&[u8], &[u8]> {
        take_until(b"\x00")(input)  // Null-terminated string
    }
    
    let message = b"hello\x00world";
    let (remaining, content) = parse_delimited_message(message).unwrap();
    assert_eq!(content, b"hello");
    assert_eq!(remaining, b"\x00world");  // Delimiter still there
    
    // After parsing, consume the delimiter:
    let (remaining, _) = nom::bytes::complete::tag(b"\x00")(remaining).unwrap();
    assert_eq!(remaining, b"world");
    
    // Or use take_until_inclusive to consume delimiter:
    fn parse_inclusive_message(input: &[u8]) -> IResult<&[u8], &[u8]> {
        take_until_inclusive(b"\x00")(input)
    }
    
    let (remaining, content) = parse_inclusive_message(b"hello\x00world").unwrap();
    assert_eq!(content, b"hello\x00");  // Includes delimiter
    assert_eq!(remaining, b"world");
}

For binary protocols, choose based on whether you need the delimiter in output.

Performance Considerations

use nom::bytes::complete::{take_until, take_until_inclusive};
use nom::IResult;
 
fn performance_comparison() {
    let input = "a".repeat(1000) + ",rest";
    
    // Both scan the input for the delimiter
    // Performance is similar - O(n) scan
    
    // take_until_inclusive returns slightly more data
    // (includes the delimiter in output)
    
    // For large inputs, the difference is minimal
    // Choose based on semantics, not performance
    
    // If you always strip the delimiter from take_until_inclusive:
    // - take_until + explicit delimiter is cleaner
    // - No string slicing needed
    
    // If you want the delimiter in the output:
    // - take_until_inclusive is simpler
    // - One parser call instead of two
}

Performance is similar; choose based on semantic clarity.

Error Handling Patterns

use nom::bytes::complete::take_until;
use nom::character::complete::char;
use nom::sequence::preceded;
use nom::combinator::map_res;
use nom::IResult;
 
fn error_handling() {
    // Common pattern: take_until with meaningful error context
    
    fn parse_field(input: &str) -> IResult<&str, &str> {
        take_until(":")(input)
            .map_err(|e| {
                // Add context about what we were looking for
                e.map(|err| {
                    nom::error::VerboseError {
                        errors: vec![(err.input, nom::error::VerboseErrorKind::Context("expected field:name format"))]
                    }
                })
            })
    }
    
    // With take_until, errors are clear: delimiter not found
    // With take_until_inclusive, errors are the same kind
    
    // Both return ErrorKind::TakeUntil or ErrorKind::TakeUntilInclusive
}

Both use the same error kind; error handling patterns are similar.

Common Patterns Summary

use nom::bytes::complete::{take_until, take_until_inclusive};
use nom::character::complete::char;
use nom::sequence::{tuple, preceded};
 
fn pattern_summary() {
    // Pattern 1: Key-value (take_until preferred)
    // key:value\n
    // Use take_until + char(':') to get clean key
    // Then take_until for value
    
    // Pattern 2: Line-based (take_until preferred)
    // line1\nline2\n
    // Use take_until("\n") + char('\n')
    // Clean lines without newline
    
    // Pattern 3: Paragraph (take_until_inclusive OK)
    // paragraph\n\nparagraph
    // If you're going to strip delimiter anyway
    // Can use take_until_inclusive for simplicity
    
    // Pattern 4: Delimited content (take_until + delimited)
    // {content}
    // Use delimited(char('{'), take_until("}"), char('}'))
    // Clean content extraction
    
    // Pattern 5: Consuming delimiter with content (take_until_inclusive)
    // If you need delimiter included in output
    // Or if delimiter is just a boundary to consume
}

Choose based on whether you need delimiter processing or clean content.

Synthesis

Quick reference:

use nom::bytes::complete::{take_until, take_until_inclusive};
use nom::IResult;
 
fn quick_reference() {
    let input = "hello,world";
    
    // take_until: content before delimiter, delimiter in remaining
    let (remaining, content) = take_until(",")(input).unwrap();
    // content = "hello"
    // remaining = ",world"
    
    // take_until_inclusive: content including delimiter, delimiter consumed
    let (remaining, content) = take_until_inclusive(",")(input).unwrap();
    // content = "hello,"
    // remaining = "world"
    
    // When to use take_until:
    // - Need to verify delimiter format
    // - Using delimited() combinator
    // - Want clean content without delimiter
    // - Delimiter varies and matters
    
    // When to use take_until_inclusive:
    // - Delimiter is just a separator
    // - Want one parser instead of two
    // - Don't need to examine delimiter
    // - Boundary to skip over
    
    // Key difference: delimiter in remaining vs. consumed
}

Key insight: The choice between take_until and take_until_inclusive is fundamentally about delimiter ownership. After take_until, the delimiter remains in the input stream—you own it as a parser and decide what to do with it. This is ideal when the delimiter has semantic meaning (like distinguishing between : and = in key-value pairs) or when you want clean content extraction. After take_until_inclusive, the delimiter is consumed—the caller doesn't need to know about it. This is ideal when the delimiter is purely structural (like --- section separators) and you just want to move past it. The take_until approach composes better with other combinators like delimited and preceded, making it the more common choice for structured parsing. Reserve take_until_inclusive for cases where you explicitly want the delimiter in the output or when it simplifies a larger parsing pattern.

How does nom::bytes::complete::take_until differ from take_until_inclusive for delimiter parsing behavior?