How do I parse structured data with nom in Rust?

Walkthrough

The nom crate is a parser combinator library that lets you build parsers by combining small, reusable parsing functions. It's especially powerful for parsing binary formats, text protocols, and custom data formats. Nom uses a functional style where parsers are composed together to create complex parsers from simple building blocks. Each parser takes input and returns IResult, which contains the remaining input and the parsed result on success, or an error on failure.

Key concepts:

  1. Parser combinators — functions that take input and return IResult<Input, Output>
  2. Combinators — functions that combine parsers (like map, then, alt)
  3. Built-in parsers — for bytes, characters, numbers, and more
  4. Streaming vs complete — handle partial or complete input
  5. Error handling — detailed error types with context

Code Example

# Cargo.toml
[dependencies]
nom = "7.1"
use nom::{
    IResult,
    bytes::complete::tag,
    character::complete::{digit1, space0},
    combinator::map_res,
};
 
fn parse_number(input: &str) -> IResult<&str, i32> {
    map_res(digit1, |s: &str| s.parse::<i32>())(input)
}
 
fn main() {
    let result = parse_number("123abc");
    match result {
        Ok((remaining, number)) => println!("Parsed {} with remaining: '{}'" , number, remaining),
        Err(e) => println!("Error: {:?}", e),
    }
}

Basic Parsers

use nom::{
    IResult,
    bytes::complete::{tag, take, take_while, take_while1},
    character::complete::{alpha1, digit1, space0, space1, alphanumeric1},
};
 
fn main() {
    // tag - match literal string
    let result: IResult<&str, &str> = tag("hello")("hello world");
    match result {
        Ok((remaining, matched)) => println!("tag: matched '{}', remaining '{}'", matched, remaining),
        Err(e) => println!("Error: {:?}", e),
    }
    
    // take - take n bytes/characters
    let result: IResult<&str, &str> = take(3usize)("abcdef");
    println!("take 3: {:?}", result);
    
    // alpha1 - one or more alphabetic characters
    let result: IResult<&str, &str> = alpha1("hello123");
    println!("alpha1: {:?}", result);
    
    // digit1 - one or more digits
    let result: IResult<&str, &str> = digit1("123abc");
    println!("digit1: {:?}", result);
    
    // alphanumeric1 - one or more alphanumeric characters
    let result: IResult<&str, &str> = alphanumeric1("hello123!@#");
    println!("alphanumeric1: {:?}", result);
    
    // space0 - zero or more spaces
    let result: IResult<&str, &str> = space0("   hello");
    println!("space0: {:?}", result);
    
    // space1 - one or more spaces
    let result: IResult<&str, &str> = space1("   hello");
    println!("space1: {:?}", result);
    
    // take_while - take while predicate is true
    fn is_vowel(c: char) -> bool {
        matches!(c, 'a' | 'e' | 'i' | 'o' | 'u')
    }
    let result: IResult<&str, &str> = take_while(is_vowel)("aeiobcd");
    println!("take_while vowel: {:?}", result);
    
    // take_while1 - at least one character must match
    let result: IResult<&str, &str> = take_while1(is_vowel)("aeiobcd");
    println!("take_while1 vowel: {:?}", result);
}

Combinators

use nom::{
    IResult,
    bytes::complete::tag,
    character::complete::{alpha1, digit1, space0, space1, char},
    combinator::{map, map_res, opt, peek, not, recognize},
    sequence::{preceded, terminated, delimited, tuple, pair},
    multi::{many0, many1, separated_list0, separated_list1},
    branch::alt,
};
 
fn main() {
    // map - transform parser output
    let mut parse_upper = map(alpha1, |s: &str| s.to_uppercase());
    let result: IResult<&str, String> = parse_upper("hello world");
    println!("map to uppercase: {:?}", result);
    
    // map_res - transform with fallible function
    let mut parse_int = map_res(digit1, |s: &str| s.parse::<i32>());
    let result: IResult<&str, i32> = parse_int("123abc");
    println!("map_res to int: {:?}", result);
    
    // opt - optional parser
    let mut parse_optional = opt(tag("hello"));
    let result: IResult<&str, Option<&str>> = parse_optional("hello world");
    println!("opt present: {:?}", result);
    let result: IResult<&str, Option<&str>> = parse_optional("world");
    println!("opt absent: {:?}", result);
    
    // peek - look ahead without consuming
    let mut peek_hello = peek(tag("hello"));
    let result: IResult<&str, &str> = peek_hello("hello world");
    println!("peek: {:?}", result);
    
    // preceded - ignore first parser, return second
    let mut parse_after_prefix = preceded(tag("prefix:"), alpha1);
    let result: IResult<&str, &str> = parse_after_prefix("prefix:hello");
    println!("preceded: {:?}", result);
    
    // terminated - return first parser, ignore second
    let mut parse_before_suffix = terminated(alpha1, tag("!"));
    let result: IResult<&str, &str> = parse_before_suffix("hello!");
    println!("terminated: {:?}", result);
    
    // delimited - ignore first and third, return second
    let mut parse_quoted = delimited(char('"'), alpha1, char('"'));
    let result: IResult<&str, &str> = parse_quoted("\"hello\"");
    println!("delimited: {:?}", result);
    
    // pair - parse two in sequence
    let mut parse_pair = pair(alpha1, digit1);
    let result: IResult<&str, (&str, &str)> = parse_pair("abc123");
    println!("pair: {:?}", result);
    
    // tuple - parse multiple in sequence
    let mut parse_tuple = tuple((alpha1, char('-'), digit1));
    let result: IResult<&str, (&str, char, &str)> = parse_tuple("abc-123");
    println!("tuple: {:?}", result);
    
    // alt - try multiple parsers, return first success
    let mut parse_choice = alt((tag("foo"), tag("bar"), tag("baz")));
    let result: IResult<&str, &str> = parse_choice("bar hello");
    println!("alt: {:?}", result);
    
    // many0 - zero or more repetitions
    let mut parse_many = many0(tag("ab"));
    let result: IResult<&str, Vec<&str>> = parse_many("abababxyz");
    println!("many0: {:?}", result);
    
    // many1 - one or more repetitions
    let mut parse_many1 = many1(tag("ab"));
    let result: IResult<&str, Vec<&str>> = parse_many1("abababxyz");
    println!("many1: {:?}", result);
    
    // separated_list0 - parse separated list (zero or more)
    let mut parse_list = separated_list0(char(','), alpha1);
    let result: IResult<&str, Vec<&str>> = parse_list("a,b,c,d");
    println!("separated_list0: {:?}", result);
    
    // separated_list1 - parse separated list (one or more)
    let mut parse_list1 = separated_list1(char(','), alpha1);
    let result: IResult<&str, Vec<&str>> = parse_list1("a,b,c");
    println!("separated_list1: {:?}", result);
}

Parsing Numbers

use nom::{
    IResult,
    character::complete::{digit1, char},
    bytes::complete::tag,
    combinator::{map_res, opt, recognize},
    sequence::tuple,
};
 
fn main() {
    // Parse integer
    fn parse_int(input: &str) -> IResult<&str, i32> {
        map_res(
            recognize(opt(char('-')).and_then(|_| digit1)),
            |s: &str| s.parse::<i32>()
        )(input)
    }
    
    println!("parse_int: {:?}", parse_int("123"));
    println!("parse_int negative: {:?}", parse_int("-456"));
    
    // Parse float
    fn parse_float(input: &str) -> IResult<&str, f64> {
        map_res(
            recognize(
                tuple((
                    opt(char('-')),
                    digit1,
                    char('.'),
                    digit1,
                ))
            ),
            |s: &str| s.parse::<f64>()
        )(input)
    }
    
    println!("parse_float: {:?}", parse_float("3.14"));
    println!("parse_float negative: {:?}", parse_float("-2.5"));
    
    // Parse hex
    fn parse_hex(input: &str) -> IResult<&str, u32> {
        let (input, _) = tag("0x")(input)?;
        map_res(
            nom::character::complete::hex_digit1,
            |s: &str| u32::from_str_radix(s, 16)
        )(input)
    }
    
    println!("parse_hex: {:?}", parse_hex("0xFF"));
    println!("parse_hex: {:?}", parse_hex("0xDEADBEEF"));
    
    // Parse binary
    fn parse_binary(input: &str) -> IResult<&str, u32> {
        let (input, _) = tag("0b")(input)?;
        map_res(
            nom::character::complete::bin_digit1,
            |s: &str| u32::from_str_radix(s, 2)
        )(input)
    }
    
    println!("parse_binary: {:?}", parse_binary("0b1010"));
    println!("parse_binary: {:?}", parse_binary("0b11111111"));
}

Parsing Expressions

use nom::{
    IResult,
    bytes::complete::tag,
    character::complete::{char, digit1, space0, alpha1},
    combinator::{map_res, map},
    sequence::{tuple, preceded},
    branch::alt,
};
 
#[derive(Debug, Clone)]
enum Expr {
    Number(i64),
    Variable(String),
    Add(Box<Expr>, Box<Expr>),
    Sub(Box<Expr>, Box<Expr>),
    Mul(Box<Expr>, Box<Expr>),
    Div(Box<Expr>, Box<Expr>),
}
 
fn parse_number(input: &str) -> IResult<&str, Expr> {
    map_res(digit1, |s: &str| s.parse::<i64>().map(Expr::Number))(input)
}
 
fn parse_variable(input: &str) -> IResult<&str, Expr> {
    map(alpha1, |s: &str| Expr::Variable(s.to_string()))(input)
}
 
fn parse_atom(input: &str) -> IResult<&str, Expr> {
    alt((parse_number, parse_variable))(input)
}
 
fn parse_parens(input: &str) -> IResult<&str, Expr> {
    let (input, _) = char('(')(input)?;
    let (input, _) = space0(input)?;
    let (input, expr) = parse_expr(input)?;
    let (input, _) = space0(input)?;
    let (input, _) = char(')')(input)?;
    Ok((input, expr))
}
 
fn parse_term(input: &str) -> IResult<&str, Expr> {
    let (input, left) = alt((parse_parens, parse_atom))(input)?;
    let (input, _) = space0(input)?;
    
    fn fold_terms(mut input: &str, left: Expr) -> IResult<&str, Expr> {
        let op_result = alt((char('*'), char('/')))(input);
        match op_result {
            Ok((remaining, op)) => {
                let (remaining, _) = space0(remaining)?;
                let (remaining, right) = alt((parse_parens, parse_atom))(remaining)?;
                let (remaining, _) = space0(remaining)?;
                let expr = match op {
                    '*' => Expr::Mul(Box::new(left), Box::new(right)),
                    '/' => Expr::Div(Box::new(left), Box::new(right)),
                    _ => unreachable!(),
                };
                fold_terms(remaining, expr)
            }
            Err(_) => Ok((input, left)),
        }
    }
    
    fold_terms(input, left)
}
 
fn parse_expr(input: &str) -> IResult<&str, Expr> {
    let (input, left) = parse_term(input)?;
    let (input, _) = space0(input)?;
    
    fn fold_exprs(mut input: &str, left: Expr) -> IResult<&str, Expr> {
        let op_result = alt((char('+'), char('-')))(input);
        match op_result {
            Ok((remaining, op)) => {
                let (remaining, _) = space0(remaining)?;
                let (remaining, right) = parse_term(remaining)?;
                let (remaining, _) = space0(remaining)?;
                let expr = match op {
                    '+' => Expr::Add(Box::new(left), Box::new(right)),
                    '-' => Expr::Sub(Box::new(left), Box::new(right)),
                    _ => unreachable!(),
                };
                fold_exprs(remaining, expr)
            }
            Err(_) => Ok((input, left)),
        }
    }
    
    fold_exprs(input, left)
}
 
fn eval(expr: &Expr, vars: &std::collections::HashMap<String, i64>) -> i64 {
    match expr {
        Expr::Number(n) => *n,
        Expr::Variable(name) => *vars.get(name).unwrap_or(&0),
        Expr::Add(a, b) => eval(a, vars) + eval(b, vars),
        Expr::Sub(a, b) => eval(a, vars) - eval(b, vars),
        Expr::Mul(a, b) => eval(a, vars) * eval(b, vars),
        Expr::Div(a, b) => eval(a, vars) / eval(b, vars),
    }
}
 
fn main() {
    // Simple number
    let result = parse_expr("42");
    println!("Number: {:?}", result);
    
    // Variable
    let result = parse_expr("x");
    println!("Variable: {:?}", result);
    
    // Addition
    let result = parse_expr("1 + 2");
    println!("Addition: {:?}", result);
    
    // Precedence (multiplication before addition)
    let result = parse_expr("2 + 3 * 4");
    println!("With precedence: {:?}", result);
    if let Ok((_, expr)) = result {
        println!("  Evaluated: {}", eval(&expr, &std::collections::HashMap::new()));
    }
    
    // Parentheses
    let result = parse_expr("(2 + 3) * 4");
    println!("Parentheses: {:?}", result);
    if let Ok((_, expr)) = result {
        println!("  Evaluated: {}", eval(&expr, &std::collections::HashMap::new()));
    }
    
    // Complex expression
    let result = parse_expr("10 - 2 * 3 + 4");
    println!("Complex: {:?}", result);
    if let Ok((_, expr)) = result {
        println!("  Evaluated: {}", eval(&expr, &std::collections::HashMap::new()));
    }
}

Parsing JSON

use nom::{
    IResult,
    bytes::complete::tag,
    character::complete::{char, digit1, space0, alpha1},
    combinator::{map, map_res, opt, recognize},
    sequence::{delimited, preceded, tuple},
    multi::{many0, separated_list0},
    branch::alt,
};
 
#[derive(Debug, Clone, PartialEq)]
enum JsonValue {
    Null,
    Bool(bool),
    Number(f64),
    String(String),
    Array(Vec<JsonValue>),
    Object(Vec<(String, JsonValue)>),
}
 
fn parse_null(input: &str) -> IResult<&str, JsonValue> {
    map(tag("null"), |_| JsonValue::Null)(input)
}
 
fn parse_bool(input: &str) -> IResult<&str, JsonValue> {
    alt((
        map(tag("true"), |_| JsonValue::Bool(true)),
        map(tag("false"), |_| JsonValue::Bool(false)),
    ))(input)
}
 
fn parse_number(input: &str) -> IResult<&str, JsonValue> {
    let (input, neg) = opt(char('-'))(input)?;
    let (input, int_part) = digit1(input)?;
    let (input, frac_part) = opt(preceded(char('.'), digit1))(input)?;
    let (input, exp_part) = opt(tuple((
        alt((char('e'), char('E'))),
        opt(alt((char('+'), char('-')))),
        digit1,
    )))(input)?;
    
    let mut num_str = String::new();
    if neg.is_some() { num_str.push('-'); }
    num_str.push_str(int_part);
    if let Some(frac) = frac_part { num_str.push_str(&format!(".{}", frac)); }
    if let Some((_, sign, exp)) = exp_part {
        num_str.push('e');
        if let Some(s) = sign { num_str.push(s); }
        num_str.push_str(exp);
    }
    
    let num: f64 = num_str.parse().unwrap();
    Ok((input, JsonValue::Number(num)))
}
 
fn parse_string(input: &str) -> IResult<&str, JsonValue> {
    let (input, _) = char('"')(input)?;
    let (input, chars) = many0(alt((
        map_res(
            preceded(char('\\'), alt((
                tag("\\"), tag("\""), tag("n"), tag("t"), tag("r"),
            ))),
            |s: &str| match s {
                "\\" => Ok('\\'),
                "\"" => Ok('"'),
                "n" => Ok('\n'),
                "t" => Ok('\t'),
                "r" => Ok('\r'),
                _ => Err(()),
            }
        ),
        map_res(
            nom::bytes::complete::take_while1(|c: char| c != '"' && c != '\\'),
            |s: &str| Ok::<_, ()>(s.chars().next().unwrap())
        ),
    )))(input)?;
    let (input, _) = char('"')(input)?;
    
    Ok((input, JsonValue::String(chars.into_iter().collect())))
}
 
fn parse_value(input: &str) -> IResult<&str, JsonValue> {
    let (input, _) = space0(input)?;
    let (input, value) = alt((
        parse_null,
        parse_bool,
        parse_number,
        parse_string,
        parse_array,
        parse_object,
    ))(input)?;
    let (input, _) = space0(input)?;
    Ok((input, value))
}
 
fn parse_array(input: &str) -> IResult<&str, JsonValue> {
    let (input, _) = char('[')(input)?;
    let (input, _) = space0(input)?;
    let (input, values) = separated_list0(
        delimited(space0, char(','), space0),
        parse_value,
    )(input)?;
    let (input, _) = space0(input)?;
    let (input, _) = char(']')(input)?;
    Ok((input, JsonValue::Array(values)))
}
 
fn parse_object(input: &str) -> IResult<&str, JsonValue> {
    let (input, _) = char('{')(input)?;
    let (input, _) = space0(input)?;
    let (input, pairs) = separated_list0(
        delimited(space0, char(','), space0),
        |input: &str| {
            let (input, _) = space0(input)?;
            let (input, key) = parse_string(input)?;
            let (input, _) = space0(input)?;
            let (input, _) = char(':')(input)?;
            let (input, value) = parse_value(input)?;
            let key_str = match key {
                JsonValue::String(s) => s,
                _ => unreachable!(),
            };
            Ok((input, (key_str, value)))
        },
    )(input)?;
    let (input, _) = space0(input)?;
    let (input, _) = char('}')(input)?;
    Ok((input, JsonValue::Object(pairs)))
}
 
fn parse_json(input: &str) -> IResult<&str, JsonValue> {
    let (input, _) = space0(input)?;
    let (input, value) = parse_value(input)?;
    let (input, _) = space0(input)?;
    Ok((input, value))
}
 
fn main() {
    // Parse null
    let result = parse_json("null");
    println!("null: {:?}", result);
    
    // Parse bool
    let result = parse_json("true");
    println!("bool: {:?}", result);
    
    // Parse number
    let result = parse_json("42.5e10");
    println!("number: {:?}", result);
    
    // Parse string
    let result = parse_json("\"hello\\nworld\"");
    println!("string: {:?}", result);
    
    // Parse array
    let result = parse_json("[1, 2, 3]");
    println!("array: {:?}", result);
    
    // Parse object
    let result = parse_json("{\"name\": \"Alice\", \"age\": 30}");
    println!("object: {:?}", result);
    
    // Parse complex JSON
    let json = r#"
        {
            "name": "Test",
            "values": [1, 2, 3],
            "nested": {
                "active": true,
                "count": null
            }
        }
    "#;
    let result = parse_json(json);
    println!("complex: {:?}", result);
}

Parsing URL

use nom::{
    IResult,
    bytes::complete::{tag, take_until, take_while, take_while1},
    character::complete::char,
    combinator::{opt, map},
    sequence::tuple,
};
 
#[derive(Debug, Clone)]
struct Url {
    scheme: String,
    host: String,
    port: Option<u16>,
    path: String,
    query: Option<String>,
    fragment: Option<String>,
}
 
fn parse_scheme(input: &str) -> IResult<&str, String> {
    let (input, scheme) = take_while1(|c: char| c.is_alphanumeric() || c == '+' || c == '-' || c == '.')(input)?;
    let (input, _) = tag("://")(input)?;
    Ok((input, scheme.to_lowercase()))
}
 
fn parse_host(input: &str) -> IResult<&str, String> {
    let (input, host) = take_while1(|c: char| c.is_alphanumeric() || c == '.' || c == '-' || c == '_')(input)?;
    Ok((input, host.to_string()))
}
 
fn parse_port(input: &str) -> IResult<&str, u16> {
    let (input, _) = char(':')(input)?;
    let (input, port) = take_while1(|c: char| c.is_ascii_digit())(input)?;
    Ok((input, port.parse().unwrap()))
}
 
fn parse_path(input: &str) -> IResult<&str, String> {
    let (input, path) = take_while(|c: char| c != '?' && c != '#')(input)?;
    Ok((input, if path.is_empty() { "/".to_string() } else { path.to_string() }))
}
 
fn parse_query(input: &str) -> IResult<&str, String> {
    let (input, _) = char('?')(input)?;
    let (input, query) = take_while(|c: char| c != '#')(input)?;
    Ok((input, query.to_string()))
}
 
fn parse_fragment(input: &str) -> IResult<&str, String> {
    let (input, _) = char('#')(input)?;
    Ok(("", input.to_string()))
}
 
fn parse_url(input: &str) -> IResult<&str, Url> {
    let (input, scheme) = parse_scheme(input)?;
    let (input, host) = parse_host(input)?;
    let (input, port) = opt(parse_port)(input)?;
    let (input, path) = parse_path(input)?;
    let (input, query) = opt(parse_query)(input)?;
    let (input, fragment) = opt(parse_fragment)(input)?;
    
    Ok((input, Url {
        scheme,
        host,
        port,
        path,
        query,
        fragment,
    }))
}
 
fn main() {
    let urls = vec![
        "https://example.com",
        "http://localhost:8080/api",
        "https://api.example.com:443/users?id=123#profile",
        "ftp://files.example.com/downloads/file.txt",
    ];
    
    for url in urls {
        match parse_url(url) {
            Ok((remaining, parsed)) => {
                println!("URL: {}", url);
                println!("  scheme: {}", parsed.scheme);
                println!("  host: {}", parsed.host);
                println!("  port: {:?}", parsed.port);
                println!("  path: {}", parsed.path);
                println!("  query: {:?}", parsed.query);
                println!("  fragment: {:?}", parsed.fragment);
                println!("  remaining: '{}'", remaining);
                println!();
            }
            Err(e) => println!("Error parsing '{}': {:?}", url, e),
        }
    }
}

Parsing HTTP Request

use nom::{
    IResult,
    bytes::complete::{tag, take_until, take_while},
    character::complete::{char, space0, space1, alphanumeric1},
    combinator::opt,
    multi::many_till,
};
 
#[derive(Debug, Clone)]
struct HttpRequest {
    method: String,
    path: String,
    version: String,
    headers: Vec<(String, String)>,
    body: Option<String>,
}
 
fn parse_method(input: &str) -> IResult<&str, String> {
    let (input, method) = take_while(|c: char| c.is_ascii_uppercase())(input)?;
    Ok((input, method.to_string()))
}
 
fn parse_path(input: &str) -> IResult<&str, String> {
    let (input, path) = take_while(|c: char| c != ' ')(input)?;
    Ok((input, path.to_string()))
}
 
fn parse_version(input: &str) -> IResult<&str, String> {
    let (input, _) = tag("HTTP/")(input)?;
    let (input, version) = take_while(|c: char| c.is_ascii_digit() || c == '.')(input)?;
    Ok((input, version.to_string()))
}
 
fn parse_header(input: &str) -> IResult<&str, (String, String)> {
    let (input, name) = take_until(":")(input)?;
    let (input, _) = char(':')(input)?;
    let (input, _) = space0(input)?;
    let (input, value) = take_while(|c: char| c != '\r' && c != '\n')(input)?;
    let (input, _) = tag("\r\n")(input)?;
    Ok((input, (name.trim().to_string(), value.trim().to_string())))
}
 
fn parse_headers(input: &str) -> IResult<&str, Vec<(String, String)>> {
    let (input, headers) = many_till(parse_header, tag("\r\n"))(input)?;
    Ok((input, headers.0))
}
 
fn parse_http_request(input: &str) -> IResult<&str, HttpRequest> {
    // Request line: METHOD PATH HTTP/VERSION\r\n
    let (input, method) = parse_method(input)?;
    let (input, _) = space1(input)?;
    let (input, path) = parse_path(input)?;
    let (input, _) = space1(input)?;
    let (input, version) = parse_version(input)?;
    let (input, _) = tag("\r\n")(input)?;
    
    // Headers
    let (input, headers) = parse_headers(input)?;
    
    // Body (if any)
    let body = if input.is_empty() {
        None
    } else {
        Some(input.to_string())
    };
    
    Ok(("", HttpRequest {
        method,
        path,
        version,
        headers,
        body,
    }))
}
 
fn main() {
    let request = "GET /api/users?id=123 HTTP/1.1\r\nHost: example.com\r\nContent-Type: application/json\r\nAccept: */*\r\n\r\n{\"name\": \"Alice\"}";
    
    match parse_http_request(request) {
        Ok((_, req)) => {
            println!("Method: {}", req.method);
            println!("Path: {}", req.path);
            println!("Version: HTTP/{}", req.version);
            println!("Headers:");
            for (name, value) in &req.headers {
                println!("  {}: {}", name, value);
            }
            println!("Body: {:?}", req.body);
        }
        Err(e) => println!("Error: {:?}", e),
    }
}

Parsing CSV

use nom::{
    IResult,
    bytes::complete::{tag, take_until, take_while, escaped},
    character::complete::{char, none_of, one_of},
    combinator::{map, opt},
    multi::{many0, separated_list0},
    sequence::delimited,
};
 
fn parse_field(input: &str) -> IResult<&str, String> {
    // Quoted field
    if input.starts_with('"') {
        let (input, _) = char('"')(input)?;
        let (input, content) = many0(alt((
            map(none_of("\"\\n"), |c| c),
            map(preceded(char('\\'), one_of("\"\\n")), |c| c),
        )))(input)?;
        let (input, _) = char('"')(input)?;
        Ok((input, content.into_iter().collect()))
    } else {
        // Unquoted field
        let (input, content) = take_while(|c: char| c != ',' && c != '\n' && c != '\r')(input)?;
        Ok((input, content.to_string()))
    }
}
 
use nom::branch::alt;
use nom::sequence::preceded;
 
fn parse_csv_line(input: &str) -> IResult<&str, Vec<String>> {
    separated_list0(char(','), parse_field)(input)
}
 
fn parse_csv(input: &str) -> IResult<&str, Vec<Vec<String>>> {
    let lines: Vec<&str> = input.lines().collect();
    let mut result = Vec::new();
    let mut remaining = input;
    
    for _ in &lines {
        if remaining.is_empty() {
            break;
        }
        let (input, line) = parse_csv_line(remaining)?;
        result.push(line);
        remaining = input;
        // Skip newline
        if remaining.starts_with('\n') {
            remaining = &remaining[1..];
        } else if remaining.starts_with("\r\n") {
            remaining = &remaining[2..];
        }
    }
    
    Ok((remaining, result))
}
 
fn main() {
    let csv = "name,age,city\nAlice,30,New York\nBob,25,\"San Francisco\"\nCharlie,35,Chicago";
    
    match parse_csv(csv) {
        Ok((_, rows)) => {
            for (i, row) in rows.iter().enumerate() {
                println!("Row {}: {:?}", i + 1, row);
            }
        }
        Err(e) => println!("Error: {:?}", e),
    }
}

Error Handling

use nom::{
    IResult,
    bytes::complete::tag,
    character::complete::digit1,
    error::{Error, ErrorKind, ParseError, VerboseError, verbose_error},
    combinator::map_res,
};
 
// Basic error type
fn parse_with_basic_error(input: &str) -> IResult<&str, i32, Error<&str>> {
    let (input, _) = tag("number:")(input)?;
    map_res(digit1, |s: &str| s.parse::<i32>())(input)
}
 
// Verbose error with context
fn parse_with_verbose_error(input: &str) -> IResult<&str, i32, VerboseError<&str>> {
    let (input, _) = tag("number:")(input).map_err(|e: nom::Err<VerboseError<&str>>| {
        e.map(|ve| VerboseError {
            errors: vec![(input, nom::error::VerboseErrorKind::Context("expected 'number:'")),
                         ...ve.errors]
        })
    })?;
    map_res(digit1, |s: &str| s.parse::<i32>())(input)
}
 
fn main() {
    // Basic error
    match parse_with_basic_error("numbr:123") {
        Ok((_, n)) => println!("Parsed: {}", n),
        Err(nom::Err::Error(e)) => println!("Basic error: {:?}", e),
        Err(nom::Err::Failure(e)) => println!("Failure: {:?}", e),
        Err(nom::Err::Incomplete(n)) => println!("Need more: {:?}", n),
    }
    
    // Success case
    match parse_with_basic_error("number:42") {
        Ok((remaining, n)) => println!("Parsed {} with remaining '{}'", n, remaining),
        Err(e) => println!("Error: {:?}", e),
    }
}

Binary Parsing

use nom::{
    IResult,
    bytes::complete::{tag, take},
    number::complete::{be_u16, be_u32, le_u16, le_u32},
    combinator::map,
    multi::many0,
};
 
#[derive(Debug)]
struct BinaryHeader {
    magic: [u8; 4],
    version: u16,
    count: u32,
}
 
#[derive(Debug)]
struct BinaryRecord {
    id: u32,
    value: u16,
}
 
fn parse_header(input: &[u8]) -> IResult<&[u8], BinaryHeader> {
    let (input, magic) = take(4usize)(input)?;
    let (input, version) = be_u16(input)?;
    let (input, count) = be_u32(input)?;
    
    let mut magic_arr = [0u8; 4];
    magic_arr.copy_from_slice(magic);
    
    Ok((input, BinaryHeader {
        magic: magic_arr,
        version,
        count,
    }))
}
 
fn parse_record(input: &[u8]) -> IResult<&[u8], BinaryRecord> {
    let (input, id) = le_u32(input)?;
    let (input, value) = le_u16(input)?;
    
    Ok((input, BinaryRecord { id, value }))
}
 
fn parse_binary_file(input: &[u8]) -> IResult<&[u8], (BinaryHeader, Vec<BinaryRecord>)> {
    let (input, header) = parse_header(input)?;
    let (input, records) = many0(parse_record)(input)?;
    
    Ok((input, (header, records)))
}
 
fn main() {
    // Create a binary file
    // Magic: "TEST", Version: 1, Count: 2
    // Record 1: id=100, value=2000
    // Record 2: id=101, value=3000
    let data: Vec<u8> = vec![
        // Header (big endian)
        b'T', b'E', b'S', b'T',  // magic
        0x00, 0x01,              // version (BE)
        0x00, 0x00, 0x00, 0x02,  // count (BE)
        // Record 1 (little endian)
        0x64, 0x00, 0x00, 0x00,  // id = 100 (LE)
        0xD0, 0x07,              // value = 2000 (LE)
        // Record 2 (little endian)
        0x65, 0x00, 0x00, 0x00,  // id = 101 (LE)
        0xB8, 0x0B,              // value = 3000 (LE)
    ];
    
    match parse_binary_file(&data) {
        Ok((remaining, (header, records))) => {
            println!("Header:");
            println!("  Magic: {:?}", std::str::from_utf8(&header.magic).unwrap());
            println!("  Version: {}", header.version);
            println!("  Count: {}", header.count);
            println!("Records:");
            for record in &records {
                println!("  id={}, value={}", record.id, record.value);
            }
            println!("Remaining bytes: {}", remaining.len());
        }
        Err(e) => println!("Error: {:?}", e),
    }
}

Real-World Example: Config File Parser

use nom::{
    IResult,
    bytes::complete::{tag, take_until, take_while},
    character::complete::{char, space0, alphanumeric1},
    combinator::{map, opt},
    multi::{many0, separated_list0},
    branch::alt,
};
 
#[derive(Debug, Clone)]
enum ConfigValue {
    String(String),
    Number(i64),
    Boolean(bool),
    List(Vec<ConfigValue>),
}
 
#[derive(Debug, Clone)]
struct ConfigEntry {
    key: String,
    value: ConfigValue,
}
 
#[derive(Debug, Clone)]
struct Config {
    entries: Vec<ConfigEntry>,
}
 
fn parse_key(input: &str) -> IResult<&str, String> {
    let (input, key) = take_while(|c: char| c.is_alphanumeric() || c == '_' || c == '.')(input)?;
    Ok((input, key.to_string()))
}
 
fn parse_string_value(input: &str) -> IResult<&str, ConfigValue> {
    let (input, _) = char('"')(input)?;
    let (input, value) = take_until("\"")(input)?;
    let (input, _) = char('"')(input)?;
    Ok((input, ConfigValue::String(value.to_string())))
}
 
fn parse_number_value(input: &str) -> IResult<&str, ConfigValue> {
    let (input, neg) = opt(char('-'))(input)?;
    let (input, num) = take_while(|c: char| c.is_ascii_digit())(input)?;
    let mut num_str = String::new();
    if neg.is_some() { num_str.push('-'); }
    num_str.push_str(num);
    Ok((input, ConfigValue::Number(num_str.parse().unwrap())))
}
 
fn parse_bool_value(input: &str) -> IResult<&str, ConfigValue> {
    alt((
        map(tag("true"), |_| ConfigValue::Boolean(true)),
        map(tag("false"), |_| ConfigValue::Boolean(false)),
    ))(input)
}
 
fn parse_list_value(input: &str) -> IResult<&str, ConfigValue> {
    let (input, _) = char('[')(input)?;
    let (input, _) = space0(input)?;
    let (input, values) = separated_list0(
        delimited(space0, char(','), space0),
        alt((parse_string_value, parse_number_value, parse_bool_value)),
    )(input)?;
    let (input, _) = space0(input)?;
    let (input, _) = char(']')(input)?;
    Ok((input, ConfigValue::List(values)))
}
 
use nom::sequence::delimited;
 
fn parse_value(input: &str) -> IResult<&str, ConfigValue> {
    alt((
        parse_string_value,
        parse_number_value,
        parse_bool_value,
        parse_list_value,
    ))(input)
}
 
fn parse_comment(input: &str) -> IResult<&str, ()> {
    let (input, _) = tag("#")(input)?;
    let (input, _) = take_while(|c: char| c != '\n')(input)?;
    Ok((input, ()))
}
 
fn parse_entry(input: &str) -> IResult<&str, Option<ConfigEntry>> {
    let (input, _) = space0(input)?;
    
    // Check for comment or empty line
    if input.is_empty() || input.starts_with('#') || input.starts_with('\n') {
        let (input, _) = opt(parse_comment)(input)?;
        let (input, _) = opt(char('\n'))(input)?;
        return Ok((input, None));
    }
    
    let (input, key) = parse_key(input)?;
    let (input, _) = space0(input)?;
    let (input, _) = char('=')(input)?;
    let (input, _) = space0(input)?;
    let (input, value) = parse_value(input)?;
    let (input, _) = space0(input)?;
    let (input, _) = opt(char('\n'))(input)?;
    
    Ok((input, Some(ConfigEntry { key, value })))
}
 
fn parse_config(input: &str) -> IResult<&str, Config> {
    let (input, entries) = many0(parse_entry)(input)?;
    let entries: Vec<_> = entries.into_iter().flatten().collect();
    Ok((input, Config { entries }))
}
 
impl Config {
    fn get(&self, key: &str) -> Option<&ConfigValue> {
        self.entries.iter().find(|e| e.key == key).map(|e| &e.value)
    }
}
 
fn main() {
    let config_text = r#"
# Application configuration
app.name = "MyApp"
app.version = 1
app.debug = true
 
# Server settings
server.host = "localhost"
server.port = 8080
server.timeout = 30
 
# Features
features.enabled = ["auth", "logging", "metrics"]
features.max_connections = 100
"#;
    
    match parse_config(config_text) {
        Ok((_, config)) => {
            println!("Parsed config:");
            for entry in &config.entries {
                println!("  {} = {:?}", entry.key, entry.value);
            }
            
            // Access values
            if let Some(ConfigValue::String(name)) = config.get("app.name") {
                println!("\nApp name: {}", name);
            }
            
            if let Some(ConfigValue::List(features)) = config.get("features.enabled") {
                println!("Enabled features: {:?}", features);
            }
        }
        Err(e) => println!("Error: {:?}", e),
    }
}

Summary

  • Nom is a parser combinator library for building parsers from small composable pieces
  • IResult<Input, Output> is the return type: Ok((remaining_input, output)) or Err
  • Use bytes::complete for byte/character matching: tag, take, take_while
  • Use character::complete for character-specific parsers: alpha1, digit1, char
  • Use combinator for transforming and combining: map, map_res, opt, peek
  • Use sequence for sequential parsing: tuple, pair, preceded, terminated, delimited
  • Use multi for repetitions: many0, many1, separated_list0, separated_list1
  • Use branch for alternatives: alt
  • Use number::complete for binary parsing: be_u16, le_u32, etc.
  • Handle errors with Error, VerboseError, or custom error types
  • Parse binary data with &[u8] input instead of &str
  • Build complex parsers by composing simple ones
  • Use ? operator for easy error propagation
  • Ideal for: parsing protocols, file formats, DSLs, configuration files, expressions