How do I parse structured text in Rust?

Walkthrough

Nom is a parser combinator library that builds complex parsers from small, composable functions. Instead of writing grammar files and generating code, you write parsers directly in Rust using combinators—functions that take parsers and return new parsers. Nom is fast, produces zero allocations for many patterns, and handles streaming input gracefully.

Key concepts:

  1. Parsers — functions taking input and returning IResult<Input, Output, Error>
  2. Combinators — functions combining parsers (sequence, choice, repetition)
  3. Input types&str for text, &[u8] for binary
  4. Error handling — rich error types with context
  5. Streaming/Complete — handle partial or complete input

Nom excels at parsing text protocols, binary formats, configuration files, and domain-specific languages.

Code Example

# Cargo.toml
[dependencies]
nom = "7"
use nom::{
    IResult,
    bytes::complete::{tag, take, take_while},
    character::complete::{alpha1, digit1, space0, space1},
    sequence::{pair, preceded, separated_pair, tuple},
    combinator::map,
};
 
// Basic parser: parse "hello" from input
fn parse_hello(input: &str) -> IResult<&str, &str> {
    tag("hello")(input)
}
 
fn main() {
    // Successful parse
    let (remaining, matched) = parse_hello("hello world").unwrap();
    println!("Matched: '{}'", matched);      // "hello"
    println!("Remaining: '{}'", remaining);  // " world"
    
    // Failed parse
    let result = parse_hello("goodbye");
    println!("Result: {:?}", result);  // Err
}

Parsing Numbers and Identifiers

use nom::{
    IResult,
    bytes::complete::{tag, take_while},
    character::complete::{
        alpha1, alphanumeric1, digit1, 
        space0, space1, multispace0,
        char,
    },
    combinator::{map, map_res, opt, recognize},
    sequence::{pair, tuple},
};
 
// Parse a number string to i32
fn parse_number(input: &str) -> IResult<&str, i32> {
    map_res(digit1, |s: &str| s.parse::<i32>())(input)
}
 
// Parse an identifier (letter followed by alphanumeric)
fn parse_identifier(input: &str) -> IResult<&str, &str> {
    recognize(pair(alpha1, alphanumeric0))(input)
}
 
// Parse a signed integer
fn parse_signed_int(input: &str) -> IResult<&str, i32> {
    map_res(
        recognize(pair(opt(char('-')), digit1)),
        |s: &str| s.parse::<i32>()
    )(input)
}
 
fn alphanumeric0(input: &str) -> IResult<&str, &str> {
    take_while(|c: char| c.is_alphanumeric())(input)
}
 
fn main() {
    // Parse number
    let (_, num) = parse_number("42 abc").unwrap();
    println!("Number: {}", num);
    
    // Parse identifier
    let (_, id) = parse_identifier("hello123 world").unwrap();
    println!("Identifier: {}", id);
    
    // Parse signed integer
    let (_, neg) = parse_signed_int("-42").unwrap();
    println!("Negative: {}", neg);
}

Sequence and Choice Combinators

use nom::{
    IResult,
    bytes::complete::tag,
    character::complete::{alpha1, space0, space1, char},
    sequence::{pair, tuple, preceded, delimited, terminated},
    combinator::opt,
    branch::alt,
};
 
// Sequence: parse two things in order
fn parse_key_value(input: &str) -> IResult<&str, (&str, &str)> {
    let (input, key) = alpha1(input)?;
    let (input, _) = tag("=")(input)?;
    let (input, value) = alpha1(input)?;
    Ok((input, (key, value)))
}
 
// Same using tuple combinator
fn parse_key_value_tuple(input: &str) -> IResult<&str, (&str, &str)> {
    let (input, (key, _, value)) = tuple((alpha1, tag("="), alpha1))(input)?;
    Ok((input, (key, value)))
}
 
// Delimited: parse content between delimiters
fn parse_quoted(input: &str) -> IResult<&str, &str> {
    delimited(char('"'), alpha1, char('"'))(input)
}
 
// Preceded: skip prefix, return following content
fn parse_label(input: &str) -> IResult<&str, &str> {
    preceded(tag("label: "), alpha1)(input)
}
 
// Terminated: parse content, skip suffix
fn parse_statement(input: &str) -> IResult<&str, &str> {
    terminated(alpha1, char(';'))(input)
}
 
// Choice: try multiple alternatives
fn parse_direction(input: &str) -> IResult<&str, &str> {
    alt((tag("up"), tag("down"), tag("left"), tag("right")))(input)
}
 
fn main() {
    let (_, (k, v)) = parse_key_value("name=value").unwrap();
    println!("{} = {}", k, v);
    
    let (_, quoted) = parse_quoted("\"hello\"").unwrap();
    println!("Quoted: {}", quoted);
    
    let (_, label) = parse_label("label: test").unwrap();
    println!("Label: {}", label);
    
    let (_, stmt) = parse_statement("print;").unwrap();
    println!("Statement: {}", stmt);
    
    let (_, dir) = parse_direction("up").unwrap();
    println!("Direction: {}", dir);
}

Repetition and Counting

use nom::{
    IResult,
    bytes::complete::tag,
    character::complete::{alpha1, char, digit1, space0},
    multi::{many0, many1, separated_list0, separated_list1, count},
    sequence::delimited,
};
 
// Zero or more occurrences
fn parse_words(input: &str) -> IResult<&str, Vec<&str>> {
    many0(alpha1)(input)
}
 
// One or more with separator
fn parse_csv(input: &str) -> IResult<&str, Vec<&str>> {
    separated_list0(tag(","), alpha1)(input)
}
 
// Parse a list in brackets
fn parse_list(input: &str) -> IResult<&str, Vec<i32>> {
    delimited(
        char('['),
        separated_list0(tag(", "), parse_i32),
        char(']')
    )(input)
}
 
fn parse_i32(input: &str) -> IResult<&str, i32> {
    nom::combinator::map_res(digit1, |s: &str| s.parse::<i32>())(input)
}
 
// Parse exactly N items
fn parse_three_numbers(input: &str) -> IResult<&str, Vec<i32>> {
    count(parse_i32, 3)(input)
}
 
fn main() {
    let (_, words) = parse_words("abc123").unwrap();
    println!("Words: {:?}", words);  // ["abc"]
    
    let (_, csv) = parse_csv("a,b,c").unwrap();
    println!("CSV: {:?}", csv);
    
    let (_, list) = parse_list("[1, 2, 3]").unwrap();
    println!("List: {:?}", list);
    
    let (_, three) = parse_three_numbers("123").unwrap();
    println!("Three numbers: {:?}", three);
}

Building a Calculator Expression Parser

use nom::{
    IResult,
    bytes::complete::tag,
    character::complete::{char, digit1, space0},
    sequence::{delimited, pair},
    combinator::{map, map_res},
    multi::many0,
};
 
#[derive(Debug, Clone)]
enum Expr {
    Number(i64),
    Add(Box<Expr>, Box<Expr>),
    Sub(Box<Expr>, Box<Expr>),
    Mul(Box<Expr>, Box<Expr>),
    Div(Box<Expr>, Box<Expr>),
}
 
fn parse_number(input: &str) -> IResult<&str, Expr> {
    map_res(digit1, |s: &str| {
        s.parse::<i64>().map(Expr::Number)
    })(input)
}
 
fn parse_parens(input: &str) -> IResult<&str, Expr> {
    delimited(char('('), parse_expr, char(')'))(input)
}
 
fn parse_atom(input: &str) -> IResult<&str, Expr> {
    let (input, _) = space0(input)?;
    let (input, expr) = nom::branch::alt((parse_parens, parse_number))(input)?;
    let (input, _) = space0(input)?;
    Ok((input, expr))
}
 
// Parse multiplication/division (higher precedence)
fn parse_term(input: &str) -> IResult<&str, Expr> {
    let (input, mut left) = parse_atom(input)?;
    
    let (input, ops) = many0(pair(
        nom::branch::alt((char('*'), char('/'))),
        parse_atom
    ))(input)?;
    
    for (op, right) in ops {
        left = match op {
            '*' => Expr::Mul(Box::new(left), Box::new(right)),
            '/' => Expr::Div(Box::new(left), Box::new(right)),
            _ => unreachable!(),
        };
    }
    
    Ok((input, left))
}
 
// Parse addition/subtraction (lower precedence)
fn parse_expr(input: &str) -> IResult<&str, Expr> {
    let (input, mut left) = parse_term(input)?;
    
    let (input, ops) = many0(pair(
        nom::branch::alt((char('+'), char('-'))),
        parse_term
    ))(input)?;
    
    for (op, right) in ops {
        left = match op {
            '+' => Expr::Add(Box::new(left), Box::new(right)),
            '-' => Expr::Sub(Box::new(left), Box::new(right)),
            _ => unreachable!(),
        };
    }
    
    Ok((input, left))
}
 
fn evaluate(expr: &Expr) -> i64 {
    match expr {
        Expr::Number(n) => *n,
        Expr::Add(a, b) => evaluate(a) + evaluate(b),
        Expr::Sub(a, b) => evaluate(a) - evaluate(b),
        Expr::Mul(a, b) => evaluate(a) * evaluate(b),
        Expr::Div(a, b) => evaluate(a) / evaluate(b),
    }
}
 
fn main() {
    let expressions = vec![
        "42",
        "2 + 3",
        "10 - 4",
        "2 * 3 + 4",
        "2 + 3 * 4",
        "(2 + 3) * 4",
        "10 / 2 + 3",
    ];
    
    for expr in expressions {
        let (_, parsed) = parse_expr(expr).unwrap();
        let result = evaluate(&parsed);
        println!("{} = {}", expr, result);
    }
}

Parsing Key-Value Configuration

use nom::{
    IResult,
    bytes::complete::{tag, take_till, take_while},
    character::complete::{char, multispace0, not_line_ending},
    sequence::{preceded, terminated},
    combinator::{map, opt},
    multi::many0,
};
use std::collections::HashMap;
 
// Parse until end of line
fn till_end_of_line(input: &str) -> IResult<&str, &str> {
    terminated(
        take_till(|c| c == '\n' || c == '\r'),
        opt(tag("\r\n"))
    )(input)
}
 
// Parse a comment line
fn parse_comment(input: &str) -> IResult<&str, ()> {
    map(
        preceded(char('#'), not_line_ending),
        |_| ()
    )(input)
}
 
// Parse whitespace and comments
fn parse_ws_or_comment(input: &str) -> IResult<&str, ()> {
    map(
        many0(preceded(multispace0, opt(parse_comment))),
        |_| ()
    )(input)
}
 
// Parse a key-value pair
fn parse_kv(input: &str) -> IResult<&str, (String, String)> {
    let (input, _) = multispace0(input)?;
    let (input, key) = take_while(|c: char| c.is_alphanumeric() || c == '_')(input)?;
    let (input, _) = preceded(multispace0, char('='))(input)?;
    let (input, value) = preceded(multispace0, till_end_of_line)(input)?;
    
    Ok((input, (key.to_string(), value.trim().to_string())))
}
 
// Parse entire config
fn parse_config(input: &str) -> IResult<&str, HashMap<String, String>> {
    let (input, _) = parse_ws_or_comment(input)?;
    let (input, pairs) = many0(parse_kv)(input)?;
    
    let mut config = HashMap::new();
    for (key, value) in pairs {
        config.insert(key, value);
    }
    
    Ok((input, config))
}
 
fn main() {
    let config_text = r#"
# Server configuration
host = localhost
port = 8080
 
# Database
database_url = postgres://user:pass@localhost/db
max_connections = 10
"#;
    
    let (_, config) = parse_config(config_text).unwrap();
    
    for (key, value) in &config {
        println!("{} = {}", key, value);
    }
}

Binary Format Parsing

use nom::{
    IResult,
    bytes::complete::{tag, take},
    number::complete::{be_u16, be_u32, le_u16, le_u32},
    sequence::tuple,
};
 
#[derive(Debug)]
struct BinaryHeader {
    magic: [u8; 4],
    version: u16,
    flags: u16,
    length: u32,
}
 
#[derive(Debug)]
struct BinaryRecord {
    id: u32,
    value: u16,
    name: String,
}
 
fn parse_header(input: &[u8]) -> IResult<&[u8], BinaryHeader> {
    let (input, magic_bytes) = take(4usize)(input)?;
    let magic: [u8; 4] = magic_bytes.try_into().unwrap();
    
    let (input, version) = le_u16(input)?;
    let (input, flags) = le_u16(input)?;
    let (input, length) = le_u32(input)?;
    
    Ok((input, BinaryHeader { magic, version, flags, length }))
}
 
fn parse_record(input: &[u8]) -> IResult<&[u8], BinaryRecord> {
    let (input, id) = le_u32(input)?;
    let (input, value) = le_u16(input)?;
    let (input, name_len) = le_u16(input)?
    let (input, name_bytes) = take(name_len as usize)(input)?;
    let name = String::from_utf8_lossy(name_bytes).to_string();
    
    Ok((input, BinaryRecord { id, value, name }))
}
 
fn main() {
    // Simulated binary data
    let data: Vec<u8> = vec![
        b'T', b'E', b'S', b'T',  // magic
        0x01, 0x00,              // version (little-endian u16)
        0x00, 0x10,              // flags
        0x10, 0x00, 0x00, 0x00,  // length
    ];
    
    let (_, header) = parse_header(&data).unwrap();
    println!("Header: {:?}", header);
}

Error Handling with Context

use nom::{
    IResult,
    bytes::complete::tag,
    character::complete::alpha1,
    error::{context, ErrorKind, VerboseError},
};
 
type Result<'a, T> = IResult<&'a str, T, VerboseError<&'a str>>;
 
fn parse_greeting(input: &str) -> Result<&str> {
    context("greeting", 
        tag("hello")
    )(input)
}
 
fn parse_name(input: &str) -> Result<&str> {
    context("name",
        alpha1
    )(input)
}
 
fn parse_full(input: &str) -> Result<(&str, &str)> {
    let (input, greeting) = context("full_greeting", parse_greeting)(input)?;
    let (input, _) = context("space", tag(" "))(input)?;
    let (input, name) = context("full_name", parse_name)(input)?;
    Ok((input, (greeting, name)))
}
 
fn main() {
    // Successful parse
    match parse_full("hello world") {
        Ok((remaining, (greeting, name))) => {
            println!("Greeting: {}, Name: {}", greeting, name);
            println!("Remaining: '{}'", remaining);
        }
        Err(e) => println!("Error: {:?}", e),
    }
    
    // Failed parse with context
    match parse_full("goodbye world") {
        Ok(_) => println!("Unexpected success"),
        Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => {
            println!("Parse error: {:?}", e);
        }
        Err(nom::Err::Incomplete(_)) => println!("Need more input"),
    }
}

Summary

  • Parsers return IResult<Input, Output, Error>Ok((remaining, output)) on success
  • Use tag("literal") to match exact strings, alpha1/digit1 for character classes
  • map(parser, f) transforms parser output with a function
  • map_res(parser, f) applies a fallible function (like parse::<i32>())
  • Sequence combinators: tuple, pair, preceded, terminated, delimited
  • Choice combinator: alt tries multiple parsers in order
  • Repetition: many0 (zero+), many1 (one+), separated_list0/separated_list1
  • For recursive grammars, use Box<Expr> or fn pointers
  • Binary parsing: use nom::number::complete::{le_u16, be_u32, ...} for endianness
  • Add error context with context("name", parser) and VerboseError
  • Parser precedence: lower precedence parses higher precedence recursively
  • Input can be &str (text) or &[u8] (binary) — parsers adapt automatically
  • Use take(n) for fixed-length, take_while(predicate) for variable-length