Loading page…
Rust walkthroughs
Loading page…
nom::bytes::complete::take_until for parsing delimited content without consuming the delimiter?take_until consumes bytes from the input until it encounters a specified tag (delimiter), returning the consumed content as output while leaving the delimiter in the remaining input for subsequent parsers to handle. This separation enables compositional parsing where one parser extracts delimited content and another parser consumes or processes the delimiter independently, avoiding redundant backtracking and enabling cleaner grammar definitions for protocols, file formats, and text processing tasks.
use nom::bytes::complete::take_until;
use nom::IResult;
fn main() {
let input = "hello,world;rest";
// Parse until we find the delimiter ","
let result: IResult<&str, &str> = take_until(",")(input);
match result {
Ok((remaining, output)) => {
println!("Output: {}", output); // "hello"
println!("Remaining: {}", remaining); // ",world;rest"
}
Err(e) => println!("Error: {:?}", e),
}
// Note: The comma (delimiter) is NOT consumed
// It remains at the start of the remaining input
}take_until returns the content before the delimiter and leaves the delimiter in the remaining input.
use nom::bytes::complete::{take_until, tag};
use nom::sequence::tuple;
use nom::IResult;
fn main() {
let input = "content<delimiter>rest";
// Approach 1: take_until leaves delimiter for next parser
let (remaining, (content, _delimiter)) = tuple((
take_until("<"),
tag("<"),
))(input).unwrap();
println!("Content: {}", content); // "content"
println!("Remaining: {}", remaining); // "delimiter>rest"
// Compare with take_until_and_consume (if it existed):
// If delimiter was consumed automatically, we couldn't:
// - Use the delimiter in decision logic
// - Parse multiple delimiter variants
// - Include delimiter in output
}Leaving the delimiter unconsumed allows subsequent parsers to decide what to do with it.
use nom::bytes::complete::{take_until, tag};
use nom::sequence::preceded;
use nom::IResult;
fn parse_key_value(input: &str) -> IResult<&str, (&str, &str)> {
let (remaining, key) = take_until("=")(input)?;
let (remaining, _) = tag("=")(remaining)?;
let (remaining, value) = take_until(";")(remaining)?;
let (remaining, _) = tag(";")(remaining)?;
Ok((remaining, (key, value)))
}
fn main() {
let input = "name=Alice;age=30;city=Boston";
let (remaining, (key, value)) = parse_key_value(input).unwrap();
println!("Key: {}", key); // "name"
println!("Value: {}", value); // "Alice"
println!("Remaining: {}", remaining); // "age=30;city=Boston"
// Parse next key-value pair
let (remaining, (key2, value2)) = parse_key_value(remaining).unwrap();
println!("Key: {}", key2); // "age"
println!("Value: {}", value2); // "30"
}take_until enables parsing structured content by extracting between delimiters.
use nom::bytes::complete::{take_until, take_until1, tag};
use nom::branch::alt;
use nom::IResult;
fn main() {
// Scenario: content ends with either ";" or "\n"
let input1 = "hello;world";
let input2 = "hello\nworld";
// take_until doesn't support alternatives directly
// But leaving delimiter unconsumed lets us handle multiple options
fn parse_until_delimiter(input: &str) -> IResult<&str, &str> {
// We need to find which delimiter comes first
// This requires a different approach
// Option 1: Use a custom parser
let pos = input.find(|c| c == ';' || c == '\n');
match pos {
Some(p) => Ok((&input[p..], &input[..p])),
None => Err(nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::TakeUntil))),
}
}
let (rem, content) = parse_until_delimiter(input1).unwrap();
println!("Content: {}", content); // "hello"
println!("Delimiter starts: {}", rem); // ";world"
// Now we can parse the delimiter with alt
let (rem, delim) = alt((tag(";"), tag("\n")))(rem).unwrap();
println!("Delimiter: {}", delim); // ";"
}Leaving the delimiter allows using alt to match multiple delimiter variants.
use nom::bytes::complete::{take_until, tag};
use nom::sequence::delimited;
use nom::IResult;
fn parse_parenthesized(input: &str) -> IResult<&str, &str> {
// delimited(open, content, close)
// But we can build it manually with take_until
let (remaining, _) = tag("(")(input)?;
let (remaining, content) = take_until(")")(remaining)?;
let (remaining, _) = tag(")")(remaining)?;
Ok((remaining, content))
}
fn main() {
let input = "(content inside)outside";
let (remaining, content) = parse_parenthesized(input).unwrap();
println!("Content: {}", content); // "content inside"
println!("Remaining: {}", remaining); // "outside"
// This works because:
// 1. tag("(") consumes the opening paren
// 2. take_until(")") extracts content, leaving ")"
// 3. tag(")") consumes the closing paren
}Parsing parenthesized content requires extracting before the delimiter and then consuming it separately.
use nom::bytes::complete::{take_until, tag};
use nom::sequence::tuple;
use nom::IResult;
// Simple protocol: COMMAND:payload\n
fn parse_command(input: &str) -> IResult<&str, (&str, &str)> {
let (remaining, command) = take_until(":")(input)?;
let (remaining, _) = tag(":")(remaining)?;
let (remaining, payload) = take_until("\n")(remaining)?;
let (remaining, _) = tag("\n")(remaining)?;
Ok((remaining, (command, payload)))
}
fn main() {
let input = "GET:/users/123\nPOST:/data\n";
let (remaining, (cmd, payload)) = parse_command(input).unwrap();
println!("Command: {}", cmd); // "GET"
println!("Payload: {}", payload); // "/users/123"
let (remaining, (cmd2, payload2)) = parse_command(remaining).unwrap();
println!("Command: {}", cmd2); // "POST"
println!("Payload: {}", payload2); // "/data"
// The delimiter "\n" is not part of the payload
// Because take_until leaves it, and we consume it separately
}Protocol parsing benefits from clean separation between content and delimiters.
use nom::bytes::complete::{take_until, tag, take_while, take_till};
use nom::IResult;
fn main() {
let input = "hello,world";
// take_until: consumes until delimiter, leaves delimiter
let result1: IResult<&str, &str> = take_until(",")(input);
// Ok(("world", "hello")) -- Note: comma consumed from input, not output
// tag: matches exact string, consumes it
let result2: IResult<&str, &str> = tag("hello")(input);
// Ok((",world", "hello"))
// take_till: consumes until predicate is true (predicate checks chars)
let result3: IResult<&str, &str> = take_till(|c: char| c == ',')(input);
// Ok((",world", "hello"))
// take_while: consumes while predicate is true
let result4: IResult<&str, &str> = take_while(|c: char| c != ',')(input);
// Ok((",world", "hello"))
// Key difference:
// - take_until: takes a string/bytes pattern as delimiter
// - take_till: takes a predicate function (char-by-char check)
// - tag: matches and consumes a specific pattern
// - take_while: consumes while condition holds
}take_until uses string matching; take_till uses character predicates.
use nom::bytes::complete::{take_until, take_until1};
use nom::IResult;
fn main() {
// take_until: allows empty match (if delimiter is at start)
let input1 = ",content";
let result1: IResult<&str, &str> = take_until(",")(input1);
// Ok((",content", "")) -- Empty string is valid output
// take_until1: requires at least one character before delimiter
let result2: IResult<&str, &str> = take_until1(",")(input1);
// Err(Error) -- Cannot have empty content
// Use take_until1 when empty content is invalid
// Use take_until when empty content is acceptable
let input2 = "content,";
let result3: IResult<&str, &str> = take_until(",")(input2);
// Ok((",", "content"))
let result4: IResult<&str, &str> = take_until1(",")(input2);
// Ok((",", "content"))
// Both work the same when content is non-empty
}take_until allows empty matches; take_until1 requires at least one character.
use nom::bytes::complete::{take_until, tag};
use nom::sequence::{preceded, tuple};
use nom::multi::many0;
use nom::IResult;
#[derive(Debug)]
struct Attribute {
name: String,
value: String,
}
fn parse_attribute(input: &str) -> IResult<&str, Attribute> {
let (remaining, name) = take_until("=")(input)?;
let (remaining, _) = tag("=")(remaining)?;
let (remaining, value) = take_until(";")(remaining)?;
let (remaining, _) = tag(";")(remaining)?;
Ok((remaining, Attribute {
name: name.to_string(),
value: value.to_string(),
}))
}
fn parse_attributes(input: &str) -> IResult<&str, Vec<Attribute>> {
many0(parse_attribute)(input)
}
fn main() {
let input = "name=Alice;age=30;active=true;";
let (remaining, attrs) = parse_attributes(input).unwrap();
for attr in &attrs {
println!("{} = {}", attr.name, attr.value);
}
// name = Alice
// age = 30
// active = true
// remaining would be empty if all parsed successfully
}take_until composes naturally with other parsers for complex grammars.
use nom::bytes::complete::take_until;
use nom::IResult;
fn main() {
let input = "hello world";
// If delimiter is not found, take_until returns an error
let result: IResult<&str, &str> = take_until(",")(input);
match result {
Ok((remaining, output)) => {
println!("Found: {} (remaining: {})", output, remaining);
}
Err(nom::Err::Error(e)) => {
// Error: delimiter not found
println!("Error: delimiter not found in input");
println!("Input was: {}", e.input);
}
Err(nom::Err::Failure(e)) => {
println!("Failure: {:?}", e);
}
Err(nom::Err::Incomplete(_)) => {
println!("Incomplete: need more data");
}
}
// For streaming parsers, Incomplete can be returned
// take_until may need more data to find the delimiter
}take_until returns an error if the delimiter is not found in the input.
use nom::bytes::complete::take_until;
use nom::IResult;
fn main() {
// Binary protocols often use specific byte sequences as delimiters
let input: &[u8] = b"data\x00more_data\x00end";
// Parse null-terminated strings
let (remaining, content): (&[u8], &[u8]) = take_until(&b"\x00"[..])(input).unwrap();
println!("Content: {:?}", std::str::from_utf8(content)); // Ok("data")
println!("Remaining: {:?}", remaining); // [0, 109, 111, 114, ...] -- starts with \x00
// Consume the null terminator
let (remaining, _) = nom::bytes::complete::tag(&b"\x00"[..])(remaining).unwrap();
// Parse next segment
let (remaining, content2): (&[u8], &[u8]) = take_until(&b"\x00"[..])(remaining).unwrap();
println!("Content2: {:?}", std::str::from_utf8(content2)); // Ok("more_data")
}Binary formats often use specific byte sequences as delimiters; take_until works with byte slices.
use nom::bytes::{complete, streaming};
fn main() {
// nom provides both complete and streaming versions
// complete::take_until
// - Expects complete input
// - Returns Error if delimiter not found
// - Used when all input is available
// streaming::take_until
// - Designed for incremental parsing
// - Returns Incomplete if delimiter might be in future data
// - Used for network protocols, file streaming
let input = "hello";
// Complete version:
let result_complete: nom::IResult<&str, &str> =
complete::take_until(",")(input);
// Returns Error (delimiter not found)
// Streaming version:
let result_streaming: nom::IResult<&str, &str> =
streaming::take_until(",")(input);
// May return Incomplete (need more data)
// Because delimiter might arrive in next chunk
// Use complete for in-memory parsing
// Use streaming for incremental/async parsing
}Use complete::take_until for full input; streaming::take_until for incremental parsing.
use nom::bytes::complete::{take_until, tag};
use nom::sequence::{preceded, delimited, pair};
use nom::combinator::map;
use nom::IResult;
fn main() {
let input = "START:content:END";
// Using take_until with preceded
let result: IResult<&str, &str> = preceded(
tag("START:"),
take_until(":END"),
)(input);
// Ok((":END", "content"))
// Using take_until with delimited
let result2: IResult<&str, &str> = delimited(
tag("START:"),
take_until(":END"),
tag(":END"),
)(input);
// Ok(("", "content"))
// Using take_until with map for transformation
let result3: IResult<&str, String> = map(
take_until(":"),
|s: &str| s.to_uppercase(),
)("hello:world");
// Ok((":world", "HELLO"))
// take_until integrates cleanly with all nom combinators
}take_until works seamlessly with nom's combinator ecosystem.
use nom::bytes::complete::{take_until, tag};
use nom::branch::alt;
use nom::IResult;
fn main() {
// Scenario: Delimiter may need different handling
// Example: HTTP headers end with "\r\n\r\n" or "\n\n"
// But the body may start immediately after
let input = "Content-Length: 42\r\n\r\nbody data";
// If take_until consumed the delimiter, we'd need to know which one matched
// By leaving it, we can decide what to do
let (remaining, headers_raw) = take_until("\r\n\r\n")(input).unwrap();
println!("Headers: {}", headers_raw); // "Content-Length: 42"
println!("Remaining: {:?}", remaining); // "\r\n\r\nbody data"
// Now we can use alt to handle multiple delimiter formats
let (remaining, _) = alt((
tag("\r\n\r\n"),
tag("\n\n"),
))(remaining).unwrap();
println!("Body: {:?}", remaining); // "body data"
// This approach allows:
// 1. Processing headers before consuming delimiter
// 2. Handling multiple delimiter variants
// 3. Including delimiter in output if needed
}Leaving delimiters unconsumed enables flexible handling of multiple formats and downstream decisions.
use nom::bytes::complete::{take_until, tag};
use nom::multi::separated_list0;
use nom::sequence::preceded;
use nom::IResult;
fn parse_field(input: &str) -> IResult<&str, &str> {
take_until(",")(input)
}
fn main() {
// Simple CSV-like parsing
let input = "a,b,c,d";
// Manually parse with take_until
let (remaining, field1) = take_until(",")(input).unwrap();
println!("Field 1: {}", field1); // "a"
let (remaining, _) = tag(",")(remaining).unwrap();
let (remaining, field2) = take_until(",")(remaining).unwrap();
println!("Field 2: {}", field2); // "b"
// For quoted fields, you'd need more sophisticated parsing
// But take_until works for simple unquoted content
// Note: For the last field, you need special handling
// Either use a different delimiter (newline) or end of input
let last_field = "d"; // Remaining after last comma
println!("Last field: {}", last_field);
}take_until handles simple delimited formats; complex cases may need additional logic.
What take_until does:
Why delimiter is left unconsumed:
altCommon patterns:
// Pattern 1: Extract then consume delimiter
let (rem, content) = take_until(",")(input)?;
let (rem, _) = tag(",")(rem)?;
// Pattern 2: Multiple delimiter options
let (rem, content) = take_until(...)(input)?;
let (rem, delim) = alt((tag(","), tag(";")))(rem)?;
// Pattern 3: Delimited content
let (rem, content) = delimited(tag("("), take_until(")"), tag(")"))(input)?;Key comparison:
| Parser | Consumes | Output | Delimiter in remaining |
|--------|----------|--------|------------------------|
| take_until | Until delimiter | Before delimiter | Yes (at start) |
| tag | Exact pattern | The pattern | No |
| take_till | Until predicate true | Before predicate | Yes (at start) |
| take_while | While predicate true | Consumed chars | No (predicate false) |
When to use take_until: