How do I parse binary and text data with nom in Rust?

Walkthrough

The nom crate is a parser combinator library that makes it easy to build safe parsers without macros or complicated code. It uses function composition to combine small parsing functions into larger ones. Nom works with both text and binary data, handles errors gracefully, and is extremely fast. The library provides many built-in parsers for common patterns like numbers, strings, whitespace, and binary formats. You can chain parsers using combinators like map, then, or, and many to build complex parsers from simple building blocks.

Key concepts:

  1. Parser functions — take input, return IResult with remaining input and result
  2. Combinators — functions that combine or transform parsers
  3. Bytes parsers — work with binary data (take, tag, be_u32, etc.)
  4. Character parsers — work with text (alpha1, digit1, space1, etc.)
  5. Sequence combinatorstuple, preceded, delimited, pair
  6. Choice combinatorsalt, separated by | for alternatives
  7. Repetition combinatorsmany0, many1, count, fold_many0

Code Example

# Cargo.toml
[dependencies]
nom = "7"
use nom::{
    IResult,
    bytes::complete::tag,
    character::complete::{alpha1, digit1, space0},
    sequence::{preceded, tuple},
};
 
fn parse_greeting(input: &str) -> IResult<&str, (&str, &str)> {
    let (input, (greeting, _, name)) = tuple((
        tag("Hello"),
        space0,
        alpha1,
    ))(input)?;
    Ok((input, (greeting, name)))
}
 
fn main() {
    let result = parse_greeting("Hello World");
    println!("{:?}", result);
    // Ok(("", ("Hello", "World")))
}

Basic Parsers

use nom::{
    IResult,
    bytes::complete::{tag, take},
    character::complete::{alpha1, digit1, alphanumeric1, space0, space1},
};
 
fn main() {
    // tag - match exact string
    let result: IResult<&str, &str> = tag("hello")("hello world");
    println!("tag: {:?}", result); // Ok((" world", "hello"))
    
    // alpha1 - one or more alphabetic characters
    let result: IResult<&str, &str> = alpha1("hello123");
    println!("alpha1: {:?}", result); // Ok(("123", "hello"))
    
    // digit1 - one or more digits
    let result: IResult<&str, &str> = digit1("123abc");
    println!("digit1: {:?}", result); // Ok(("abc", "123"))
    
    // alphanumeric1 - one or more alphanumeric characters
    let result: IResult<&str, &str> = alphanumeric1("abc123!");
    println!("alphanumeric1: {:?}", result); // Ok(("!", "abc123"))
    
    // space0 - zero or more spaces
    let result: IResult<&str, &str> = space0("   hello");
    println!("space0: {:?}", result); // Ok(("hello", "   "))
    
    // space1 - one or more spaces
    let result: IResult<&str, &str> = space1("   hello");
    println!("space1: {:?}", result); // Ok(("hello", "   "))
    
    // take - take n bytes/characters
    let result: IResult<&str, &str> = take(3usize)("hello");
    println!("take(3): {:?}", result); // Ok(("lo", "hel"))
}

Combining Parsers with tuple

use nom::{
    IResult,
    bytes::complete::tag,
    character::complete::{alpha1, digit1, space0, space1},
    sequence::tuple,
};
 
fn main() {
    // Parse: "name: value"
    let input = "age: 25";
    let (remaining, (name, _, _, value)) = tuple((
        alpha1,          // "age"
        tag(":"),        // ":"
        space0,          // " "
        digit1,          // "25"
    ))(input).unwrap();
    
    println!("name: {}, value: {}, remaining: '{}'", name, value, remaining);
    
    // Parse: "Hello World!"
    let input = "Hello World!";
    let (remaining, (greeting, _, name, punctuation)) = tuple((
        alpha1,
        space1,
        alpha1,
        tag("!"),
    ))(input).unwrap();
    
    println!("greeting: {}, name: {}, punctuation: {}", 
             greeting, name, punctuation);
}

Sequence Combinators

use nom::{
    IResult,
    bytes::complete::tag,
    character::complete::alpha1,
    sequence::{preceded, delimited, pair, separated_pair},
};
 
fn main() {
    // preceded - parse second, return only second's result
    let result: IResult<&str, &str> = preceded(tag("Hello "), alpha1)("Hello World");
    println!("preceded: {:?}", result); // Ok(("", "World"))
    
    // delimited - parse (open, content, close), return content
    let result: IResult<&str, &str> = delimited(tag("("), alpha1, tag(")"))("(content)");
    println!("delimited: {:?}", result); // Ok(("", "content"))
    
    // pair - parse two in sequence, return both results
    let result: IResult<&str, (&str, &str)> = pair(alpha1, tag("!"))("Hello!");
    println!("pair: {:?}", result); // Ok(("", ("Hello", "!")))
    
    // separated_pair - parse (first, sep, second), return both
    let result: IResult<&str, (&str, &str)> = separated_pair(
        alpha1,
        tag(":"),
        alpha1,
    )("key:value");
    println!("separated_pair: {:?}", result); // Ok(("", ("key", "value")))
}

Map and Transform Results

use nom::{
    IResult,
    bytes::complete::tag,
    character::complete::digit1,
    combinator::map,
    sequence::tuple,
};
 
fn main() {
    // map - transform parser result
    let parse_number = map(digit1, |s: &str| s.parse::<i32>().unwrap());
    let result: IResult<&str, i32> = parse_number("123abc");
    println!("map digit to i32: {:?}", result); // Ok(("abc", 123))
    
    // map with tuple
    let parse_point = map(
        tuple((
            digit1,
            tag(","),
            digit1,
        )),
        |(x, _, y): (&str, &str, &str)| (x.parse::<i32>().unwrap(), y.parse::<i32>().unwrap())
    );
    let result: IResult<&str, (i32, i32)> = parse_point("10,20");
    println!("map tuple to point: {:?}", result); // Ok(("", (10, 20)))
    
    // map to struct
    #[derive(Debug)]
    struct Color {
        name: String,
        code: String,
    }
    
    let parse_color = map(
        tuple((
            alpha1,
            tag(": #"),
            digit1,
        )),
        |(name, _, code): (&str, &str, &str)| Color {
            name: name.to_string(),
            code: code.to_string(),
        }
    );
    let result: IResult<&str, Color> = parse_color("red: #FF0000");
    println!("map to struct: {:?}", result);
}

Choice with alt

use nom::{
    IResult,
    branch::alt,
    bytes::complete::tag,
    character::complete::{alpha1, digit1},
};
 
fn main() {
    // alt - try parsers in order, return first success
    let parse_word_or_number = alt((alpha1, digit1));
    
    let result1: IResult<&str, &str> = parse_word_or_number("hello");
    println!("alpha: {:?}", result1); // Ok(("", "hello"))
    
    let result2: IResult<&str, &str> = parse_word_or_number("123");
    println!("digit: {:?}", result2); // Ok(("", "123"))
    
    // alt with tags
    let parse_direction = alt((
        tag("up"),
        tag("down"),
        tag("left"),
        tag("right"),
    ));
    
    let result: IResult<&str, &str> = parse_direction("left!");
    println!("direction: {:?}", result); // Ok(("!", "left"))
    
    // alt with mapped values
    #[derive(Debug, PartialEq)]
    enum Bool {
        True,
        False,
    }
    
    let parse_bool = alt((
        map(tag("true"), |_| Bool::True),
        map(tag("false"), |_| Bool::False),
    ));
    
    use nom::combinator::map;
    let result: IResult<&str, Bool> = parse_bool("true!");
    println!("bool: {:?}", result); // Ok(("!", Bool::True))
}

Repetition with many

use nom::{
    IResult,
    bytes::complete::tag,
    character::complete::{alpha1, space0, space1, digit1},
    multi::{many0, many1, separated_list0, separated_list1, count},
    sequence::preceded,
};
 
fn main() {
    // many0 - zero or more occurrences
    let parse_words = many0(alpha1);
    let result: IResult<&str, Vec<&str>> = parse_words("hello world");
    println!("many0 alpha1: {:?}", result); // Ok((" world", vec!["hello"]))
    
    // many1 - one or more (fails if none)
    let parse_numbers = many1(digit1);
    let result: IResult<&str, Vec<&str>> = parse_numbers("123 456");
    println!("many1 digit1: {:?}", result); // Ok((" 456", vec!["123"]))
    
    // separated_list0 - parse list with separator
    let parse_list = separated_list0(tag(", "), alpha1);
    let result: IResult<&str, Vec<&str>> = parse_list("apple, banana, cherry");
    println!("separated_list0: {:?}", result);
    // Ok(("", vec!["apple", "banana", "cherry"]))
    
    // separated_list1 - requires at least one element
    let parse_numbers = separated_list1(tag(","), digit1);
    let result: IResult<&str, Vec<&str>> = parse_numbers("1,2,3,4");
    println!("separated_list1: {:?}", result);
    // Ok(("", vec!["1", "2", "3", "4"]))
    
    // count - exact number of occurrences
    let parse_three = count(digit1, 3);
    let result: IResult<&str, Vec<&str>> = parse_three("123abc");
    println!("count(3): {:?}", result); // Ok(("abc", vec!["1", "2", "3"]))
}

Optional Parsers

use nom::{
    IResult,
    bytes::complete::tag,
    character::complete::alpha1,
    combinator::opt,
    sequence::tuple,
};
 
fn main() {
    // opt - optional parser, returns Option
    let parser = tuple((
        alpha1,
        opt(tag("!")),
        opt(tag("?")),
    ));
    
    let result1: IResult<&str, (&str, Option<&str>, Option<&str>)> = parser("hello!?");
    println!("With both: {:?}", result1);
    // Ok(("", ("hello", Some("!"), Some("?"))))
    
    let result2: IResult<&str, (&str, Option<&str>, Option<&str>)> = parser("hello");
    println!("With none: {:?}", result2);
    // Ok(("", ("hello", None, None)))
    
    // Using opt with default values
    let parser = |input| {
        let (input, (name, punctuation)) = tuple((
            alpha1,
            opt(tag("!")),
        ))(input)?;
        
        let greeting = format!("Hello {}{}", name, punctuation.unwrap_or("."));
        Ok((input, greeting))
    };
    
    let result: IResult<&str, String> = parser("World!");
    println!("Greeting: {:?}", result);
}

Parsing Numbers

use nom::{
    IResult,
    character::complete::{digit1, char},
    combinator::map,
    branch::alt,
};
 
fn parse_u32(input: &str) -> IResult<&str, u32> {
    map(digit1, |s: &str| s.parse().unwrap())(input)
}
 
fn parse_i32(input: &str) -> IResult<&str, i32> {
    let (input, negative) = opt(char('-'))(input)?;
    let (input, digits) = digit1(input)?;
    let num: i32 = digits.parse().unwrap();
    Ok((input, if negative.is_some() { -num } else { num }))
}
 
fn main() {
    // Parse unsigned
    let result = parse_u32("12345abc");
    println!("u32: {:?}", result); // Ok(("abc", 12345))
    
    // Parse signed
    let result1 = parse_i32("42");
    println!("i32 positive: {:?}", result1); // Ok(("", 42))
    
    let result2 = parse_i32("-42");
    println!("i32 negative: {:?}", result2); // Ok(("", -42))
    
    // Built-in number parsers
    use nom::character::complete::{u32, i32};
    
    let result: IResult<&str, u32> = u32(input);
    // Note: built-in parsers are strict about complete number
    
    // Parse float
    fn parse_float(input: &str) -> IResult<&str, f64> {
        let (input, int_part) = digit1(input)?;
        let (input, _) = char('.')(input)?;
        let (input, dec_part) = digit1(input)?;
        let num: f64 = format!("{}.{}", int_part, dec_part).parse().unwrap();
        Ok((input, num))
    }
    
    let result = parse_float("3.14159abc");
    println!("float: {:?}", result); // Ok(("abc", 3.14159))
}

Binary Parsing

use nom::{
    IResult,
    bytes::complete::{tag, take},
    number::complete::{be_u8, be_u16, be_u32, le_u8, le_u16, le_u32},
};
 
fn main() {
    // Parse big-endian numbers
    let input: &[u8] = &[0x12, 0x34, 0x56, 0x78];
    let (remaining, value) = be_u32(input).unwrap();
    println!("be_u32: {} (remaining: {:?})", value, remaining);
    // 0x12345678 = 305419896
    
    // Parse little-endian numbers
    let input: &[u8] = &[0x78, 0x56, 0x34, 0x12];
    let (remaining, value) = le_u32(input).unwrap();
    println!("le_u32: {} (remaining: {:?})", value, remaining);
    // 0x12345678 = 305419896
    
    // Parse bytes
    let input: &[u8] = &[0x01, 0x02, 0x03, 0x04, 0x05];
    let (remaining, bytes) = take(3usize)(input).unwrap();
    println!("take 3 bytes: {:?} (remaining: {:?})", bytes, remaining);
    
    // Parse tag (magic bytes)
    let input: &[u8] = b"PNG\x89\x00\x01";
    let (remaining, _) = tag(b"PNG")(input).unwrap();
    println!("After PNG tag: {:?}", remaining);
}

Parse a Simple Expression Language

use nom::{
    IResult,
    bytes::complete::tag,
    character::complete::{alpha1, digit1, space0, char},
    sequence::{preceded, delimited},
    branch::alt,
    multi::many0,
    combinator::map,
};
 
#[derive(Debug, PartialEq)]
enum Expr {
    Number(i32),
    Variable(String),
    Add(Box<Expr>, Box<Expr>),
    Subtract(Box<Expr>, Box<Expr>),
    Multiply(Box<Expr>, Box<Expr>),
}
 
fn parse_number(input: &str) -> IResult<&str, Expr> {
    map(digit1, |s: &str| Expr::Number(s.parse().unwrap()))(input)
}
 
fn parse_variable(input: &str) -> IResult<&str, Expr> {
    map(alpha1, |s: &str| Expr::Variable(s.to_string()))(input)
}
 
fn parse_parens(input: &str) -> IResult<&str, Expr> {
    delimited(
        char('('),
        parse_expr,
        char(')'),
    )(input)
}
 
fn parse_term(input: &str) -> IResult<&str, Expr> {
    alt((parse_number, parse_variable, parse_parens))(input)
}
 
fn parse_expr(input: &str) -> IResult<&str, Expr> {
    let (input, first) = parse_term(input)?;
    
    let (input, expr) = many0(|input| {
        let (input, _) = space0(input)?;
        let (input, op) = alt((char('+'), char('-'), char('*')))(input)?;
        let (input, _) = space0(input)?;
        let (input, right) = parse_term(input)?;
        Ok((input, (op, right)))
    })(input)?.iter().fold((input, first), |(i, left), (op, right)| {
        let expr = match op {
            '+' => Expr::Add(Box::new(left.clone()), Box::new(right.clone())),
            '-' => Expr::Subtract(Box::new(left.clone()), Box::new(right.clone())),
            '*' => Expr::Multiply(Box::new(left.clone()), Box::new(right.clone())),
            _ => unreachable!(),
        };
        (i, expr)
    });
    
    Ok((input, expr))
}
 
fn main() {
    let expressions = vec![
        "42",
        "x",
        "1 + 2",
        "10 - 3",
        "2 * 3",
        "(1 + 2) * 3",
        "a + b * c",
    ];
    
    for expr in expressions {
        match parse_expr(expr) {
            Ok((remaining, parsed)) => {
                println!("{:?} => {:?}", expr, parsed);
            }
            Err(e) => {
                println!("Error parsing '{}': {:?}", expr, e);
            }
        }
    }
}

Parse CSV

use nom::{
    IResult,
    bytes::complete::{tag, is_not},
    character::complete::{char, space0},
    multi::separated_list1,
    combinator::opt,
    sequence::delimited,
};
 
fn parse_field(input: &str) -> IResult<&str, String> {
    // Quoted field
    if input.starts_with('"') {
        let (input, _) = char('"')(input)?;
        let (input, content) = is_not("\"")(input)?;
        let (input, _) = char('"')(input)?;
        Ok((input, content.to_string()))
    } else {
        // Unquoted field
        let (input, content) = is_not(",\n")(input)?;
        Ok((input, content.trim().to_string()))
    }
}
 
fn parse_csv_line(input: &str) -> IResult<&str, Vec<String>> {
    separated_list1(char(','), parse_field)(input)
}
 
fn parse_csv(input: &str) -> IResult<&str, Vec<Vec<String>>> {
    let lines: Vec<&str> = input.lines().collect();
    let mut result = Vec::new();
    let mut remaining = input;
    
    for line in lines {
        let (rem, fields) = parse_csv_line(remaining)?;
        result.push(fields);
        remaining = rem;
        // Skip newline
        if !remaining.is_empty() {
            let (rem, _) = opt(tag("\n"))(remaining)?;
            remaining = rem;
        }
    }
    
    Ok((remaining, result))
}
 
fn main() {
    let csv_data = r#"name,age,city
"John Doe",30,"New York"
"Jane Smith",25,"Los Angeles"
Bob,35,Chicago"#;
    
    match parse_csv(csv_data) {
        Ok((_, rows)) => {
            println!("CSV rows:");
            for row in rows {
                println!("  {:?}", row);
            }
        }
        Err(e) => println!("Error: {:?}", e),
    }
}

Parse URL

use nom::{
    IResult,
    bytes::complete::{tag, take_until},
    character::complete::{alpha1, alphanumeric1, digit1},
    combinator::{map, opt},
    sequence::{preceded, tuple},
    multi::many0,
};
 
#[derive(Debug, Default)]
struct Url {
    scheme: String,
    host: String,
    port: Option<u16>,
    path: String,
    query: Option<String>,
    fragment: Option<String>,
}
 
fn parse_scheme(input: &str) -> IResult<&str, &str> {
    let (input, scheme) = alpha1(input)?;
    let (input, _) = tag("://")(input)?;
    Ok((input, scheme))
}
 
fn parse_host(input: &str) -> IResult<&str, String> {
    let (input, parts) = many0(|i| {
        let (i, part) = alphanumeric1(i)?;
        let (i, _) = opt(tag("."))(i)?;
        Ok((i, part))
    })(input)?;
    Ok((input, parts.join(".")))
}
 
fn parse_url(input: &str) -> IResult<&str, Url> {
    let (input, scheme) = parse_scheme(input)?;
    let (input, host) = parse_host(input)?;
    let (input, port) = opt(preceded(
        tag(":"),
        map(digit1, |s: &str| s.parse().unwrap())
    ))(input)?;
    let (input, path) = opt(|i| {
        let (i, _) = tag("/")(i)?;
        let (i, path) = take_until("?")(i).or_else(|_| take_until("#")(i)).or_else(|_| Ok(("", i)))?;
        Ok((i, format!("/{}", path)))
    }).map(|opt| opt.unwrap_or_default())(input)?;
    
    Ok((input, Url {
        scheme: scheme.to_string(),
        host,
        port,
        path,
        query: None,
        fragment: None,
    }))
}
 
fn main() {
    let urls = vec![
        "https://example.com",
        "http://localhost:8080",
        "https://api.example.com/v1/users",
    ];
    
    for url in urls {
        match parse_url(url) {
            Ok((remaining, parsed)) => {
                println!("{} => {:?}", url, parsed);
            }
            Err(e) => {
                println!("Error parsing '{}': {:?}", url, e);
            }
        }
    }
}

Parse JSON

use nom::{
    IResult,
    bytes::complete::{tag, take_until},
    character::complete::{char, digit1, space0},
    branch::alt,
    combinator::{map, opt},
    multi::{many0, separated_list0},
    sequence::delimited,
};
 
#[derive(Debug, PartialEq)]
enum JsonValue {
    Null,
    Bool(bool),
    Number(f64),
    String(String),
    Array(Vec<JsonValue>),
    Object(std::collections::HashMap<String, JsonValue>),
}
 
fn parse_null(input: &str) -> IResult<&str, JsonValue> {
    map(tag("null"), |_| JsonValue::Null)(input)
}
 
fn parse_bool(input: &str) -> IResult<&str, JsonValue> {
    alt((
        map(tag("true"), |_| JsonValue::Bool(true)),
        map(tag("false"), |_| JsonValue::Bool(false)),
    ))(input)
}
 
fn parse_number(input: &str) -> IResult<&str, JsonValue> {
    let (input, negative) = opt(char('-'))(input)?;
    let (input, int_part) = digit1(input)?;
    let (input, dec_part) = opt(|i| {
        let (i, _) = char('.')(i)?;
        digit1(i)
    })(input)?;
    
    let num_str = match (negative, dec_part) {
        (Some(_), Some(dec)) => format!("-{}.{}", int_part, dec),
        (Some(_), None) => format!("-{}", int_part),
        (None, Some(dec)) => format!("{}.{}", int_part, dec),
        (None, None) => int_part.to_string(),
    };
    
    Ok((input, JsonValue::Number(num_str.parse().unwrap())))
}
 
fn parse_string(input: &str) -> IResult<&str, JsonValue> {
    let (input, _) = char('"')(input)?;
    let (input, content) = take_until("\"")(input)?;
    let (input, _) = char('"')(input)?;
    Ok((input, JsonValue::String(content.to_string())))
}
 
fn parse_json_value(input: &str) -> IResult<&str, JsonValue> {
    let (input, _) = space0(input)?;
    let (input, value) = alt((
        parse_null,
        parse_bool,
        parse_number,
        parse_string,
    ))(input)?;
    let (input, _) = space0(input)?;
    Ok((input, value))
}
 
fn main() {
    let values = vec![
        "null",
        "true",
        "false",
        "42",
        "-3.14",
        "\"hello\"",
    ];
    
    for value in values {
        match parse_json_value(value) {
            Ok((remaining, parsed)) => {
                println!("{} => {:?}", value, parsed);
            }
            Err(e) => {
                println!("Error: {:?}", e);
            }
        }
    }
}

Error Handling

use nom::{
    IResult,
    bytes::complete::tag,
    character::complete::alpha1,
    error::{Error, ErrorKind},
    Err as NomErr,
};
 
fn parse_greeting(input: &str) -> IResult<&str, &str> {
    let (input, _) = tag("Hello ")(input)?;
    let (input, name) = alpha1(input)?;
    Ok((input, name))
}
 
fn main() {
    // Success case
    match parse_greeting("Hello World") {
        Ok((remaining, name)) => println!("Parsed: '{}', remaining: '{}'", name, remaining),
        Err(e) => println!("Error: {:?}", e),
    }
    
    // Error case - wrong tag
    match parse_greeting("Hi World") {
        Ok((remaining, name)) => println!("Parsed: '{}', remaining: '{}'", name, remaining),
        Err(NomErr::Error(e)) => {
            println!("Parse error at '{}' - expected '{:?}'", 
                     e.input, e.code);
        }
        Err(NomErr::Failure(e)) => {
            println!("Failure: {:?}", e);
        }
        Err(NomErr::Incomplete(needed)) => {
            println!("Need more data: {:?}", needed);
        }
    }
    
    // Error case - no alpha after tag
    match parse_greeting("Hello 123") {
        Ok((remaining, name)) => println!("Parsed: '{}', remaining: '{}'", name, remaining),
        Err(e) => println!("Error: {:?}", e),
    }
}

Complete vs Streaming

use nom::{
    IResult,
    bytes::{complete, streaming},
    character::complete as char_complete,
    character::streaming as char_streaming,
};
 
fn main() {
    // Complete parsers - for when you have all input
    // Fails if pattern not found
    let result: IResult<&str, &str> = char_complete::alpha1("hello");
    println!("Complete alpha1: {:?}", result);
    
    // Streaming parsers - for parsing chunks
    // May return Incomplete if more data needed
    let result: IResult<&str, &str> = char_streaming::alpha1("hello");
    println!("Streaming alpha1: {:?}", result);
    
    // Streaming with incomplete input
    // For streaming, may need more data
    use nom::InputTakeAtPosition;
    let result: IResult<&str, &str> = char_streaming::alpha1("");
    println!("Streaming empty: {:?}", result);
}

Real-World: HTTP Request Parser

use nom::{
    IResult,
    bytes::complete::{tag, take_until},
    character::complete::{alpha1, not_line_ending, space0, space1},
    sequence::tuple,
    multi::many0,
    combinator::map,
};
 
#[derive(Debug)]
struct HttpRequest {
    method: String,
    path: String,
    version: String,
    headers: Vec<(String, String)>,
    body: String,
}
 
fn parse_request_line(input: &str) -> IResult<&str, (String, String, String)> {
    let (input, (method, _, path, _, version, _)) = tuple((
        alpha1,
        space1,
        take_until(" "),
        space1,
        take_until("\r\n"),
        tag("\r\n"),
    ))(input)?;
    
    Ok((input, (method.to_string(), path.to_string(), version.to_string())))
}
 
fn parse_header(input: &str) -> IResult<&str, (String, String)> {
    let (input, (name, _, value, _)) = tuple((
        take_until(":"),
        tag(": "),
        take_until("\r\n"),
        tag("\r\n"),
    ))(input)?;
    
    Ok((input, (name.to_string(), value.to_string())))
}
 
fn parse_headers(input: &str) -> IResult<&str, Vec<(String, String)>> {
    many0(parse_header)(input)
}
 
fn parse_http_request(input: &str) -> IResult<&str, HttpRequest> {
    let (input, (method, path, version)) = parse_request_line(input)?;
    let (input, headers) = parse_headers(input)?;
    let (input, _) = tag("\r\n")(input)?; // Empty line before body
    let body = input.to_string();
    
    Ok((input, HttpRequest {
        method,
        path,
        version,
        headers,
        body,
    }))
}
 
fn main() {
    let request = "GET /index.html HTTP/1.1\r\n\
Host: example.com\r\n\
User-Agent: Mozilla/5.0\r\n\
Accept: text/html\r\n\
\r\n\
<body>";
    
    match parse_http_request(request) {
        Ok((_, http_request)) => {
            println!("Parsed HTTP Request:");
            println!("  Method: {}", http_request.method);
            println!("  Path: {}", http_request.path);
            println!("  Version: {}", http_request.version);
            println!("  Headers:");
            for (name, value) in http_request.headers {
                println!("    {}: {}", name, value);
            }
            println!("  Body: {}", http_request.body);
        }
        Err(e) => println!("Error: {:?}", e),
    }
}

Real-World: Log File Parser

use nom::{
    IResult,
    bytes::complete::take_until,
    character::complete::{char, digit1, space0, space1},
    sequence::tuple,
    combinator::map,
};
 
#[derive(Debug)]
struct LogEntry {
    timestamp: String,
    level: String,
    message: String,
}
 
fn parse_timestamp(input: &str) -> IResult<&str, String> {
    let (input, ts) = take_until(" ")(input)?;
    Ok((input, ts.to_string()))
}
 
fn parse_level(input: &str) -> IResult<&str, String> {
    let (input, level) = take_until(" ")(input)?;
    Ok((input, level.to_string()))
}
 
fn parse_log_line(input: &str) -> IResult<&str, LogEntry> {
    let (input, (ts, _, level, _, message)) = tuple((
        parse_timestamp,
        space1,
        parse_level,
        space1,
        |i| map(take_until("\n"), |s: &str| s.to_string())(i),
    ))(input)?;
    
    Ok((input, LogEntry {
        timestamp: ts,
        level,
        message,
    }))
}
 
fn parse_logs(input: &str) -> Vec<LogEntry> {
    input
        .lines()
        .filter_map(|line| parse_log_line(line).ok())
        .map(|(_, entry)| entry)
        .collect()
}
 
fn main() {
    let logs = r#"2024-01-15T10:30:00 INFO Application started
2024-01-15T10:30:01 DEBUG Loading configuration
2024-01-15T10:30:02 WARN Config file not found, using defaults
2024-01-15T10:30:03 ERROR Failed to connect to database
2024-01-15T10:30:04 INFO Retrying connection..."#;
    
    let entries = parse_logs(logs);
    
    println!("Parsed {} log entries:\n", entries.len());
    for entry in entries {
        println!("[{}] {} - {}", entry.timestamp, entry.level, entry.message);
    }
}

Summary

  • Parser functions return IResult<Input, Output> with remaining input and result
  • Use tag("literal") to match exact strings
  • Use alpha1, digit1, alphanumeric1 for character classes
  • Use take(n) to consume a fixed number of bytes
  • Use tuple((parser1, parser2, ...)) to sequence parsers
  • Use alt((parser1, parser2, ...)) for alternatives
  • Use map(parser, fn) to transform results
  • Use many0, many1, count for repetition
  • Use opt for optional parsers
  • Use preceded, delimited, separated_pair for common patterns
  • Use be_u8, be_u16, le_u32 etc. for binary parsing
  • Use complete parsers for full input, streaming for chunks
  • Perfect for: text parsing, binary protocols, DSLs, config files