Loading page…
Rust walkthroughs
Loading page…
nom::bytes::complete::take and take_until for input consumption?nom::bytes::complete::take and take_until are both parser combinators for consuming input, but they differ fundamentally in how they determine what to consume: take consumes a fixed number of bytes or characters specified at call time, while take_until consumes everything until it encounters a specific pattern or delimiter. The trade-off is between precision and flexibility—take provides exact control over consumption but requires knowing the length in advance, while take_until handles variable-length content but requires scanning for a terminator. Both return the consumed input and remaining input as a tuple, enabling composition in larger parser chains.
use nom::bytes::complete::take;
use nom::IResult;
fn main() {
let input = "Hello, World!";
// take consumes exactly N bytes
fn take_5(input: &str) -> IResult<&str, &str> {
take(5usize)(input)
}
let (remaining, consumed) = take_5(input).unwrap();
println!("Consumed: {}", consumed); // "Hello"
println!("Remaining: {}", remaining); // ", World!"
// Dynamic length
fn take_n(input: &str, n: usize) -> IResult<&str, &str> {
take(n)(input)
}
let (remaining, consumed) = take_n(input, 7).unwrap();
println!("Consumed: {}", consumed); // "Hello, "
println!("Remaining: {}", remaining); // "World!"
}take consumes exactly the specified number of bytes, returning both consumed and remaining input.
use nom::bytes::complete::take_until;
use nom::IResult;
fn main() {
let input = "key=value;other=data";
// take_until consumes until a delimiter is found
fn until_equals(input: &str) -> IResult<&str, &str> {
take_until("=")(input)
}
let (remaining, consumed) = until_equals(input).unwrap();
println!("Consumed: {}", consumed); // "key"
println!("Remaining: {}", remaining); // "=value;other=data"
// The delimiter is NOT consumed
assert!(remaining.starts_with('='));
// take_until with different delimiters
fn until_semicolon(input: &str) -> IResult<&str, &str> {
take_until(";")(input)
}
let input2 = "hello;world";
let (remaining, consumed) = until_semicolon(input2).unwrap();
println!("Consumed: {}", consumed); // "hello"
println!("Remaining: {}", remaining); // ";world"
}take_until scans for a delimiter and consumes everything before it, leaving the delimiter in the remaining input.
use nom::bytes::complete::{take, take_until};
use nom::IResult;
fn main() {
let input = "abcdefghij";
// take: precise byte count
fn take_3(input: &str) -> IResult<&str, &str> {
take(3usize)(input)
}
let (remaining, consumed) = take_3(input).unwrap();
println!("take(3): '{}' -> '{}'", consumed, remaining);
// take_until: pattern-based termination
fn until_f(input: &str) -> IResult<&str, &str> {
take_until("f")(input)
}
let (remaining, consumed) = until_f(input).unwrap();
println!("take_until(f): '{}' -> '{}'", consumed, remaining);
// Key difference:
// - take(3): always consumes exactly 3 bytes
// - take_until("f"): consumes until 'f' is found
// take doesn't care about content
let (remaining, consumed) = take(3usize)("xyz123").unwrap();
println!("take(3) on 'xyz123': '{}'", consumed); // "xyz"
// take_until scans content
let (remaining, consumed) = take_until("xyz")("abcxyz123").unwrap();
println!("take_until(xyz): '{}'", consumed); // "abc"
}take uses byte count; take_until uses pattern matching to determine boundaries.
use nom::bytes::complete::{take, take_until};
use nom::error::{Error, ErrorKind};
use nom::IResult;
fn main() {
// take error: insufficient input
let short_input = "ab";
let result: IResult<&str, &str> = take(5usize)(short_input);
match result {
Err(nom::Err::Error(Error { input, code })) => {
println!("take error: input='{}', code={:?}", input, code);
// Error: not enough data
}
_ => println!("Unexpected result"),
}
// take_until error: delimiter not found
let input = "hello world";
let result: IResult<&str, &str> = take_until("xyz")(input);
match result {
Err(nom::Err::Error(Error { input, code })) => {
println!("take_until error: input='{}', code={:?}", input, code);
// Error: delimiter not found
}
_ => println!("Unexpected result"),
}
// Both fail, but for different reasons:
// - take: not enough bytes
// - take_until: delimiter not found
}take fails when input is too short; take_until fails when the delimiter is not found.
use nom::bytes::complete::{take, take_until};
use nom::IResult;
fn main() {
// take: O(1) - just pointer arithmetic
// No scanning needed, just offset the pointer
fn fixed_take(input: &str) -> IResult<&str, &str> {
take(100usize)(input) // Constant time
}
// take_until: O(n) - scans each byte
// Must examine each byte until delimiter found
fn scan_until(input: &str) -> IResult<&str, &str> {
take_until("\n")(input) // Linear scan
}
// For large inputs, take is faster
// For variable-length content, take_until is necessary
// Example: parsing fixed-width fields
fn parse_fixed_record(input: &str) -> IResult<&str, (&str, &str, &str)> {
let (input, field1) = take(10usize)(input)?;
let (input, field2) = take(20usize)(input)?;
let (input, field3) = take(15usize)(input)?;
Ok((input, (field1, field2, field3)))
}
let record = "AAAAAAAAAABBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCC";
let (remaining, (f1, f2, f3)) = parse_fixed_record(record).unwrap();
println!("Field 1: '{}'", f1.trim());
println!("Field 2: '{}'", f2.trim());
println!("Field 3: '{}'", f3.trim());
}take is O(1); take_until is O(n) where n is the distance to the delimiter.
use nom::bytes::complete::{take_until, take};
use nom::character::complete::char;
use nom::sequence::tuple;
use nom::IResult;
fn main() {
// take_until is ideal for delimiter-separated content
fn parse_pair(input: &str) -> IResult<&str, (&str, &str)> {
let (input, key) = take_until("=")(input)?;
let (input, _) = char('=')(input)?; // Consume the delimiter
let (input, value) = take_until(";")(input)?;
let (input, _) = char(';')(input)?; // Consume the delimiter
Ok((input, (key, value)))
}
let input = "name=Alice;age=30;";
let (remaining, (key, value)) = parse_pair(input).unwrap();
println!("Key: '{}', Value: '{}'", key, value);
println!("Remaining: '{}'", remaining);
// Multiple pairs
fn parse_pairs(input: &str) -> IResult<&str, Vec<(&str, &str)>> {
let mut pairs = Vec::new();
let mut input = input;
while !input.is_empty() {
let (rem, key) = take_until("=")(input)?;
let (rem, _) = char('=')(rem)?;
let (rem, value) = take_until(";")(rem)?;
let (rem, _) = char(';')(rem)?;
pairs.push((key, value));
input = rem;
}
Ok((input, pairs))
}
let input = "name=Alice;age=30;city=Boston;";
let (_, pairs) = parse_pairs(input).unwrap();
for (k, v) in pairs {
println!("{} = {}", k, v);
}
}take_until excels at parsing delimiter-separated content where field lengths vary.
use nom::bytes::complete::take;
use nom::number::complete::{be_u16, be_u32};
use nom::IResult;
fn main() {
// Binary protocols often use fixed-length fields
// take is perfect for these cases
// Example: TLV (Type-Length-Value) format
fn parse_tlv(input: &[u8]) -> IResult<&[u8], (u8, u16, &[u8])> {
let (input, tag) = take(1usize)(input)?; // Type: 1 byte
let (input, length) = take(2usize)(input)?; // Length: 2 bytes
// Parse length as u16 (big-endian)
let length = u16::from_be_bytes([length[0], length[1]]);
let (input, value) = take(length as usize)(input)?; // Value: variable
Ok((input, (tag[0], length, value)))
}
// Simulated TLV data
let data: &[u8] = &[
0x01, // Type: 1
0x00, 0x05, // Length: 5
b'H', b'e', b'l', b'l', b'o', // Value: "Hello"
];
let (remaining, (tag, length, value)) = parse_tlv(data).unwrap();
println!("Tag: {}, Length: {}, Value: {:?}", tag, length, std::str::from_utf8(value));
// Fixed-length header
fn parse_header(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8], &[u8])> {
let (input, magic) = take(4usize)(input)?; // Magic bytes
let (input, version) = take(2usize)(input)?; // Version
let (input, flags) = take(2usize)(input)?; // Flags
Ok((input, (magic, version, flags)))
}
}Binary protocols with fixed-length fields benefit from take's precise byte counting.
use nom::bytes::complete::{take, take_until};
use nom::sequence::preceded;
use nom::character::complete::char;
use nom::IResult;
fn main() {
// Mixed parsing: fixed header, variable content
fn parse_message(input: &str) -> IResult<&str, (&str, &str)> {
// First 4 bytes are fixed-length header
let (input, header) = take(4usize)(input)?;
// Rest until delimiter is variable content
let (input, content) = take_until("\n")(input)?;
let (input, _) = char('\n')(input)?;
Ok((input, (header, content)))
}
let input = "HEADhello world\nmore data";
let (remaining, (header, content)) = parse_message(input).unwrap();
println!("Header: '{}'", header);
println!("Content: '{}'", content);
println!("Remaining: '{}'", remaining);
// HTTP-style parsing
fn parse_http_request(input: &str) -> IResult<&str, (&str, &str, &str)> {
// Method until space
let (input, method) = take_until(" ")(input)?;
let (input, _) = char(' ')(input)?;
// Path until space
let (input, path) = take_until(" ")(input)?;
let (input, _) = char(' ')(input)?;
// Version is fixed length
let (input, version) = take(8usize)(input)?;
Ok((input, (method, path, version)))
}
let request = "GET /index.html HTTP/1.1\r\n";
let (remaining, (method, path, version)) = parse_http_request(request).unwrap();
println!("Method: '{}', Path: '{}', Version: '{}'", method, path, version);
}Complex protocols often combine take for fixed structures and take_until for variable content.
use nom::bytes::complete::take_until;
use nom::IResult;
fn main() {
// take_until supports multi-byte delimiters
let input = "content<![CDATA[more data]]>rest";
fn until_cdata(input: &str) -> IResult<&str, &str> {
take_until("<![CDATA[")(input)
}
let (remaining, consumed) = until_cdata(input).unwrap();
println!("Before CDATA: '{}'", consumed);
println!("Remaining: '{}'", remaining);
// XML comment delimiter
let xml = "<!-- comment -->content";
fn until_comment_end(input: &str) -> IResult<&str, &str> {
take_until("-->")(input)
}
let (remaining, consumed) = until_comment_end(xml).unwrap();
println!("Comment content: '{}'", consumed);
// C-style string literal (until quote)
let c_string = r#"hello\"world" rest"#;
fn until_quote(input: &str) -> IResult<&str, &str> {
take_until("\"")(input)
}
// Note: take_until doesn't handle escape sequences
// For escaped delimiters, use a custom parser
}take_until handles multi-byte delimiters naturally, scanning for the complete pattern.
use nom::bytes::streaming::{take, take_until};
use nom::IResult;
fn main() {
// Streaming variant for incremental parsing
// Returns Err(Err::Incomplete) when more data is needed
// Complete: all data available
use nom::bytes::complete::{take as take_complete, take_until as take_until_complete};
// Streaming: data may be incomplete
use nom::bytes::streaming::{take as take_streaming, take_until as take_until_streaming};
// For take, streaming returns Incomplete if not enough bytes
let input = "abc";
let result: IResult<&str, &str> = take_streaming(5usize)(input);
match result {
Err(nom::Err::Incomplete(needed)) => {
println!("Need more data: {:?}", needed);
}
_ => {}
}
// For take_until, streaming returns Incomplete if delimiter not found
// and input may be incomplete
let input = "hello world";
let result: IResult<&str, &str> = take_until_streaming("\n")(input);
match result {
Err(nom::Err::Incomplete(needed)) => {
println!("Delimiter may be in next chunk: {:?}", needed);
}
Err(nom::Err::Error(_)) => {
println!("Delimiter definitely not found");
}
Ok((remaining, consumed)) => {
println!("Found: '{}'", consumed);
}
_ => {}
}
}Streaming variants handle incremental parsing where input may be incomplete.
use nom::bytes::complete::take_until;
use nom::character::complete::char;
use nom::sequence::delimited;
use nom::IResult;
fn main() {
// Parsing delimited content
fn parse_braced(input: &str) -> IResult<&str, &str> {
delimited(char('{'), take_until("}"), char('}'))(input)
}
let input = "{content}rest";
let (remaining, content) = parse_braced(input).unwrap();
println!("Braced content: '{}'", content);
// Nested structures require careful handling
fn parse_outer(input: &str) -> IResult<&str, &str> {
take_until("}{")(input)
}
// Note: take_until doesn't understand nesting
// For nested structures, use a custom recursive parser
// Example: balanced braces (simplified)
fn parse_balanced(input: &str) -> IResult<&str, &str> {
let mut depth = 0;
let mut end = 0;
for (i, c) in input.char_indices() {
match c {
'{' => depth += 1,
'}' => {
depth -= 1;
if depth == 0 {
end = i + 1;
break;
}
}
_ => {}
}
}
if depth == 0 && end > 0 {
Ok((&input[end..], &input[..end]))
} else {
Err(nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::TakeUntil)))
}
}
let nested = "{{inner}}rest";
let (remaining, content) = parse_balanced(nested).unwrap();
println!("Balanced: '{}'", content);
}take_until doesn't understand nesting; complex structures need custom parsers.
use nom::bytes::complete::{take, take_until};
use nom::error::{Error, ErrorKind};
use nom::IResult;
fn main() {
// Recovering from take errors
fn safe_take(input: &str, n: usize) -> Result<&str, String> {
match take::<usize, &str, Error<&str>>(n)(input) {
Ok((_, consumed)) => Ok(consumed),
Err(_) => {
// Return what's available
if input.len() < n {
Ok(input) // Or return error
} else {
Err(format!("Unexpected error taking {} bytes", n))
}
}
}
}
// Recovering from take_until errors
fn safe_take_until(input: &str, delim: &str) -> Result<&str, String> {
match take_until::<&str, &str, &str, Error<&str>>(delim)(input) {
Ok((_, consumed)) => Ok(consumed),
Err(_) => {
// Delimiter not found - return all input
Ok(input) // Or return error
}
}
}
let short = "abc";
let result = safe_take(short, 5);
println!("Safe take result: {:?}", result);
let no_delim = "hello world";
let result = safe_take_until(no_delim, "xyz");
println!("Safe take_until result: {:?}", result);
}Handle parse errors gracefully when input doesn't meet expectations.
use nom::bytes::complete::take_until;
use nom::character::complete::{char, newline};
use nom::multi::separated_list0;
use nom::sequence::separated_pair;
use nom::IResult;
fn main() {
// CSV field parsing with take_until
fn parse_field(input: &str) -> IResult<&str, &str> {
take_until(",")(input)
}
fn parse_row(input: &str) -> IResult<&str, Vec<&str>> {
let mut fields = Vec::new();
let mut input = input;
loop {
// Parse field until comma or newline
let (rem, field) = take_until::<_, _, _, nom::error::Error<&str>>(",\n")(input)?;
fields.push(field.trim());
// Check what terminated the field
if rem.starts_with(',') {
input = &rem[1..]; // Skip comma
} else if rem.starts_with('\n') {
input = &rem[1..]; // Skip newline
break;
} else if rem.is_empty() {
break;
} else {
input = rem;
}
}
Ok((input, fields))
}
let csv = "name,Alice,30\nage,Bob,25";
let (remaining, fields) = parse_row(csv).unwrap();
println!("Fields: {:?}", fields);
// Alternative: use a more sophisticated approach
fn parse_csv_line(input: &str) -> IResult<&str, Vec<&str>> {
let mut fields = Vec::new();
let mut input = input;
while !input.is_empty() {
// Try to take until comma
if let Ok((rem, field)) = take_until::<_, _, _, nom::error::Error<&str>>(",")(input) {
if rem.is_empty() || rem.starts_with('\n') {
// End of line
fields.push(field.trim());
input = rem.strip_prefix('\n').unwrap_or(rem);
if rem.is_empty() || !rem.starts_with('\n') {
break;
}
} else {
// More fields
fields.push(field.trim());
input = &rem[1..]; // Skip comma
}
} else {
break;
}
}
Ok((input, fields))
}
}CSV parsing demonstrates take_until for delimiter-separated fields.
Method comparison:
| Aspect | take | take_until |
|--------|--------|--------------|
| Input type | Fixed count | Pattern/delimiter |
| Length determination | Known in advance | Found by scanning |
| Time complexity | O(1) | O(n) |
| Error condition | Insufficient input | Delimiter not found |
| Use case | Fixed-width fields | Variable-length content |
When to use take:
| Scenario | Reason | |----------|--------| | Fixed-width fields | Exact byte count known | | Binary protocols | Header structures | | TLV formats | Fixed type/length portions | | Performance-critical | O(1) complexity | | Known-length slices | Precise extraction |
When to use take_until:
| Scenario | Reason | |----------|--------| | Delimiter-separated content | Variable length | | Text protocols | Keyword-terminated | | Key-value pairs | Separator-based | | Line-oriented formats | Newline-terminated | | String literals | Quote-delimited |
Key insight: take and take_until serve complementary roles in parser construction. take provides O(1) fixed-length consumption, ideal for binary protocols, fixed-width fields, and structures where the byte count is known ahead of time. take_until provides O(n) delimiter-based consumption, essential for text protocols, variable-length fields, and any content terminated by patterns rather than counted bytes. The choice impacts both the parser's error behavior (insufficient input vs. missing delimiter) and its performance characteristics. In practice, complex parsers combine both: take for fixed headers and length prefixes, take_until for variable content terminated by delimiters. Both return (remaining, consumed) tuples enabling clean composition with other nom combinators.