What is the difference between `nom::bytes::complete::take_until` and `take_until1` for substring parsing?

take_until and take_until1 both consume input until a specified substring is found, but take_until can match zero characters (empty match before the delimiter) while take_until1 requires at least one character before the delimiter, returning an error if the input starts with the delimiter. This distinction matters when parsing formats where empty content between delimiters is semantically different from missing content.

Basic Parser Behavior

use nom::bytes::complete::{take_until, take_until1};
use nom::IResult;
 
fn basic_behavior() {
    // Both parsers find a substring and consume everything before it
    let input = "hello, world!";
    
    // Find "world" and return everything before it
    let result: IResult<&str, &str> = take_until("world")(input);
    // Ok((", world!", "hello, "))
    // Returns: (remaining_after_match, matched_content)
    
    let result1: IResult<&str, &str> = take_until1("world")(input);
    // Ok((", world!", "hello, "))
    // Same result when there are characters before the delimiter
}

Both parsers return the content before the delimiter and the remaining input after the delimiter.

The Key Difference: Empty Matches

use nom::bytes::complete::{take_until, take_until1};
use nom::IResult;
 
fn empty_match_difference() {
    // Input that starts with the delimiter
    let input = "delimiter comes first";
    
    // take_until: SUCCEEDS with empty match
    let result: IResult<&str, &str> = take_until("delimiter")(input);
    assert_eq!(result, Ok(("delimiter comes first", "")));
    // Matched empty string before delimiter
    
    // take_until1: FAILS because no characters before delimiter
    let result1: IResult<&str, &str> = take_until1("delimiter")(input);
    assert!(result1.is_err());
    // Error: need at least 1 character before delimiter
    
    // This is the fundamental difference:
    // - take_until: accepts empty match (0+ characters)
    // - take_until1: requires non-empty match (1+ characters)
}

The 1 suffix in take_until1 indicates "one or more characters required before the delimiter."

Parse Results Breakdown

use nom::bytes::complete::{take_until, take_until1};
use nom::IResult;
use nom::error::{Error, ErrorKind};
 
fn result_analysis() {
    // Successful cases (same behavior)
    let input1 = "content<!--comment-->";
    
    let result = take_until("<!--")(input1);
    // Ok(("<!--comment-->", "content"))
    
    let result1 = take_until1("<!--")(input1);
    // Ok(("<!--comment-->", "content"))
    // Both succeed with same result when content exists
    
    // Empty content case (different behavior)
    let input2 = "<!--comment-->";
    
    let result = take_until("<!--")(input2);
    // Ok(("<!--comment-->", ""))
    // Succeeds with empty string
    
    let result1 = take_until1("<!--")(input2);
    // Err(Err::Error(Error { input: "<!--comment-->", code: TakeUntil }))
    // Fails because no characters before delimiter
    
    // Delimiter not found case (same behavior)
    let input3 = "no delimiter here";
    
    let result = take_until("<!--")(input3);
    // Err(Err::Error(Error { input: "no delimiter here", code: TakeUntil }))
    
    let result1 = take_until1("<!--")(input3);
    // Err(Err::Error(Error { input: "no delimiter here", code: TakeUntil }))
    // Both fail when delimiter not present
}

The difference only manifests when the delimiter appears at the start of the input.

Practical Example: HTML/XML Parsing

use nom::bytes::complete::{take_until, take_until1};
use nom::IResult;
use nom::sequence::delimited;
use nom::character::complete::tag;
 
// Parsing XML/HTML comments
fn parse_comment_content(input: &str) -> IResult<&str, &str> {
    // Content between <!-- and -->
    delimited(tag("<!--"), take_until("-->"), tag("-->"))(input)
}
 
fn parse_nonempty_comment(input: &str) -> IResult<&str, &str> {
    // Content must be non-empty
    delimited(tag("<!--"), take_until1("-->"), tag("-->"))(input)
}
 
fn comment_examples() {
    // Non-empty comment
    let result = parse_comment_content("<!-- hello -->");
    assert_eq!(result, Ok(("", " hello ")));
    
    let result1 = parse_nonempty_comment("<!-- hello -->");
    assert_eq!(result1, Ok(("", " hello ")));
    
    // Empty comment
    let result = parse_comment_content("<!---->");
    assert_eq!(result, Ok(("", "")));
    // Succeeds with empty string
    
    let result1 = parse_nonempty_comment("<!---->");
    assert!(result1.is_err());
    // Fails because comment is empty
}

Choose take_until1 when empty content is invalid semantically.

Parsing Key-Value Pairs

use nom::bytes::complete::{take_until, take_until1};
use nom::IResult;
use nom::sequence::tuple;
use nom::character::complete::char;
 
fn parse_key_value(input: &str) -> IResult<&str, (&str, &str)> {
    // Key should not be empty
    let (remaining, key) = take_until1("=")(input)?;
    let (remaining, _) = char('=')(remaining)?;
    let (remaining, value) = take_until(";")(remaining)?;
    let (remaining, _) = char(';')(remaining)?;
    Ok((remaining, (key, value)))
}
 
fn parse_key_value_lenient(input: &str) -> IResult<&str, (&str, &str)> {
    // Key can be empty
    let (remaining, key) = take_until("=")(input)?;
    let (remaining, _) = char('=')(remaining)?;
    let (remaining, value) = take_until(";")(remaining)?;
    let (remaining, _) = char(';')(remaining)?;
    Ok((remaining, (key, value)))
}
 
fn key_value_examples() {
    // Valid input
    let result = parse_key_value("name=value;");
    assert_eq!(result, Ok(("", ("name", "value"))));
    
    // Empty key
    let result = parse_key_value("=value;");
    assert!(result.is_err()); // take_until1 fails
    
    let result_lenient = parse_key_value_lenient("=value;");
    assert_eq!(result_lenient, Ok(("", ("", "value"))));
    // take_until succeeds with empty key
}

Use take_until1 to enforce non-empty fields in structured data.

Combining with Other Parsers

use nom::bytes::complete::{take_until, take_until1, tag};
use nom::IResult;
use nom::sequence::{preceded, terminated};
use nom::combinator::opt;
 
fn parse_until_marker(input: &str) -> IResult<&str, &str> {
    terminated(take_until("MARKER"), tag("MARKER"))(input)
}
 
fn parse_nonempty_until_marker(input: &str) -> IResult<&str, &str> {
    terminated(take_until1("MARKER"), tag("MARKER"))(input)
}
 
fn combined_examples() {
    // Content before marker
    let result = parse_until_marker("contentMARKERrest");
    assert_eq!(result, Ok(("rest", "content")));
    
    let result1 = parse_nonempty_until_marker("contentMARKERrest");
    assert_eq!(result1, Ok(("rest", "content")));
    
    // Marker at start
    let result = parse_until_marker("MARKERrest");
    assert_eq!(result, Ok(("rest", "")));
    
    let result1 = parse_nonempty_until_marker("MARKERrest");
    assert!(result1.is_err());
    
    // This is useful for parsing structured formats:
    // - take_until: fields can be empty
    // - take_until1: fields must be present
}
 
fn parse_csv_field(input: &str) -> IResult<&str, &str> {
    // Field ends at comma or end of input
    // Use take_until for field content
    match take_until(",")(input) {
        Ok((remaining, field)) if remaining.starts_with(',') => {
            Ok((&remaining[1..], field))
        }
        Ok((_, field)) => {
            // No comma found, entire input is the field
            Ok(("", field))
        }
        Err(e) => Err(e),
    }
}

The choice affects error handling in larger parser compositions.

Error Handling Differences

use nom::bytes::complete::{take_until, take_until1};
use nom::IResult;
use nom::error::{Error, ErrorKind};
 
fn error_analysis() {
    // take_until errors
    let input = "no delimiter";
    
    match take_until("END")(input) {
        Err(nom::Err::Error(e)) => {
            println!("Error kind: {:?}", e.code);
            // ErrorKind::TakeUntil
            println!("Input: {:?}", e.input);
            // "no delimiter" (full input)
        }
        _ => unreachable!(),
    }
    
    // take_until1 errors
    let input = "DELIMITER here";
    
    match take_until1("DELIMITER")(input) {
        Err(nom::Err::Error(e)) => {
            println!("Error kind: {:?}", e.code);
            // ErrorKind::TakeUntil
            println!("Input: {:?}", e.input);
            // "DELIMITER here" (full input)
        }
        _ => unreachable!(),
    }
    
    // Both use the same ErrorKind::TakeUntil
    // The difference is when they succeed vs fail
}

Both use ErrorKind::TakeUntil for errors; the difference is in success conditions.

Streaming vs Complete Parsers

use nom::bytes::complete::{take_until, take_until1};
use nom::bytes::streaming::{take_until as streaming_take_until, take_until1 as streaming_take_until1};
use nom::IResult;
 
fn streaming_vs_complete() {
    // Complete parsers assume all input is available
    // They error if delimiter is not found
    
    let input = "incomplete";
    let result = take_until("END")(input);
    assert!(result.is_err());
    
    // Streaming parsers return Needed if delimiter might come later
    let result_streaming = streaming_take_until("END")(input);
    // Returns Err(Err::Incomplete(Needed::Size(n)))
    // Indicates more input needed to determine result
    
    // For streaming take_until1:
    let input = "DELIMITER";
    let result_streaming = streaming_take_until1("DELIMITER")(input);
    // Still returns error for empty match in streaming mode
}

Streaming parsers handle incomplete input differently, but the empty-match distinction remains.

Real-World Example: URL Parsing

use nom::bytes::complete::{take_until, take_until1};
use nom::IResult;
use nom::sequence::preceded;
use nom::character::complete::char;
 
fn parse_url_path(input: &str) -> IResult<&str, &str> {
    // Path segment before query string
    take_until("?")(input)
}
 
fn parse_url_path_nonempty(input: &str) -> IResult<&str, &str> {
    // Path must not be empty before query
    take_until1("?")(input)
}
 
fn url_examples() {
    // URL with path and query
    let result = parse_url_path("/users?id=123");
    assert_eq!(result, Ok(("?id=123", "/users")));
    
    // URL with query but no path (just domain)
    let result = parse_url_path("?query=value");
    assert_eq!(result, Ok(("?query=value", "")));
    // Empty path is valid
    
    let result = parse_url_path_nonempty("?query=value");
    assert!(result.is_err());
    // Empty path rejected
}

URL paths can be empty; query parameter values might require non-empty constraints.

Real-World Example: Protocol Parsing

use nom::bytes::complete::{take_until, take_until1};
use nom::IResult;
use nom::sequence::tuple;
use nom::character::complete::char;
 
// Simple protocol: COMMAND\r\nBODY\r\nEND\r\n
fn parse_command(input: &str) -> IResult<&str, (&str, &str)> {
    // Command cannot be empty
    let (remaining, command) = take_until1("\r\n")(input)?;
    let (remaining, _) = tag("\r\n")(remaining)?;
    
    // Body can be empty
    let (remaining, body) = take_until("\r\nEND\r\n")(remaining)?;
    let (remaining, _) = tag("\r\nEND\r\n")(remaining)?;
    
    Ok((remaining, (command, body)))
}
 
fn protocol_examples() {
    let input = "PING\r\n\r\nEND\r\n";
    let result = parse_command(input);
    assert_eq!(result, Ok(("", ("PING", ""))));
    // Empty body allowed
    
    let input = "PING\r\nhello world\r\nEND\r\n";
    let result = parse_command(input);
    assert_eq!(result, Ok(("", ("PING", "hello world"))));
    
    let input = "\r\nbody\r\nEND\r\n";
    let result = parse_command(input);
    assert!(result.is_err());
    // Empty command not allowed
}
 
use nom::bytes::complete::tag;

Protocol parsers often distinguish between required and optional fields.

Performance Considerations

use nom::bytes::complete::{take_until, take_until1};
 
fn performance_notes() {
    // Both have O(n*m) worst case where:
    // n = input length
    // m = delimiter length
    // But optimized implementations are faster
    
    // The "1" variant has minimal overhead:
    // - Same search algorithm
    // - Just checks if matched content is empty
    
    // Performance characteristics:
    // 1. Search for delimiter substring
    // 2. Return prefix (take_until) or error if empty (take_until1)
    
    // When delimiter is at start:
    // take_until:  O(1) - immediately returns empty match
    // take_until1: O(1) - immediately returns error
    
    // When delimiter at end:
    // Both: O(n) - search entire input
    
    // Recommendation: Use take_until when empty content is valid
    // Use take_until1 when you'd need to check for empty content anyway
}

Performance is nearly identical; choose based on semantics, not speed.

Combinator Patterns

use nom::bytes::complete::{take_until, take_until1, tag};
use nom::IResult;
use nom::combinator::{map, map_parser, verify};
use nom::sequence::{preceded, terminated};
 
// Pattern 1: Non-empty field in delimited format
fn non_empty_field(input: &str, delimiter: &str) -> IResult<&str, &str> {
    terminated(take_until1(delimiter), tag(delimiter))(input)
}
 
// Pattern 2: Optional field (empty allowed)
fn optional_field(input: &str, delimiter: &str) -> IResult<&str, &str> {
    terminated(take_until(delimiter), tag(delimiter))(input)
}
 
// Pattern 3: Multiple fields with different constraints
fn parse_record(input: &str) -> IResult<&str, (&str, &str, &str)> {
    // First two fields required, third optional
    let (remaining, field1) = take_until1("|")(input)?;
    let (remaining, _) = tag("|")(remaining)?;
    let (remaining, field2) = take_until1("|")(remaining)?;
    let (remaining, _) = tag("|")(remaining)?;
    let (remaining, field3) = take_until("|")(remaining)?;
    
    Ok((remaining, (field1, field2, field3)))
}
 
fn record_examples() {
    let result = parse_record("name|value|extra|");
    assert_eq!(result, Ok(("", ("name", "value", "extra"))));
    
    let result = parse_record("name|value||");
    assert_eq!(result, Ok(("", ("name", "value", ""))));
    // Third field empty is OK
    
    let result = parse_record("name||extra|");
    assert!(result.is_err());
    // Second field empty is NOT OK
}

Mix take_until and take_until1 to express field requirements.

Summary Table

fn summary() {
    // | Parser       | Match Length | Empty at Start | Delimiter at Start |
    // |--------------|--------------|----------------|-------------------|
    // | take_until   | 0+ chars     | Succeeds       | Succeeds (empty)  |
    // | take_until1  | 1+ chars     | N/A            | Fails             |
    
    // | Parser       | No Delimiter | Delimiter Found | Error Type      |
    // |--------------|--------------|-----------------|-----------------|
    // | take_until   | Error        | Success         | TakeUntil       |
    // | take_until1  | Error        | Success*        | TakeUntil       |
    
    // * take_until1 fails if match is empty
    
    // | Use Case                    | Recommendation   |
    // |-----------------------------|------------------|
    // | Optional content           | take_until       |
    // | Required non-empty content | take_until1      |
    // | May start with delimiter   | take_until       |
    // | Must not be empty          | take_until1      |
}

Synthesis

Quick reference:

use nom::bytes::complete::{take_until, take_until1};
 
// take_until: accepts empty match
let result = take_until("END")("END more");
// Ok(("END more", ""))  - empty match succeeds
 
// take_until1: requires non-empty match
let result = take_until1("END")("END more");
// Err - no characters before delimiter
 
// Both behave identically when delimiter follows content
let result = take_until("END")("contentEND");
// Ok(("END", "content"))
 
let result = take_until1("END")("contentEND");
// Ok(("END", "content"))

Key insight: The take_until and take_until1 parsers share identical search behavior—both scan input for a substring delimiter and return everything before it. The sole difference is how they handle the edge case where the delimiter appears at the very start of input: take_until succeeds with an empty match (zero characters before the delimiter), while take_until1 fails with an error because it requires at least one character before the delimiter. This distinction mirrors the naming convention in nom where functions suffixed with 1 require non-empty matches (compare take vs take1, many0 vs many1). Use take_until when empty content between delimiters is semantically valid (empty fields in CSV, empty body in protocol messages, empty URL path), and use take_until1 when empty content should be rejected at the parser level (required fields, non-empty identifiers, validation of required attributes). The performance difference is negligible—take_until1 simply adds an empty-check after finding the delimiter—so choose based on correctness and error messaging rather than efficiency.

What is the difference between nom::bytes::complete::take_until and take_until1 for substring parsing?