How do I parse structured text and data in Rust?

Walkthrough

Nom is a parser combinator library that builds complex parsers from small, reusable functions. Each parser takes input and returns IResult with remaining input and output, or an error. Parsers are composed together to handle increasingly complex formats. Nom works with &str, &[u8], and custom input types.

Core concepts:

  1. Combinators — functions that combine or transform parsers
  2. IResultResult<(I, O), Err>, where I is remaining input and O is parsed output
  3. Streaming vs Complete — streaming for chunked input, complete for all-at-once parsing
  4. Error handling — rich error types with context and location information

Nom excels at parsing protocols, file formats, configuration files, and domain-specific languages.

Code Example

# Cargo.toml
[dependencies]
nom = "7"
use nom::{
    branch::alt,
    bytes::complete::{tag, take_until, take_while, take_while1},
    character::complete::{char, digit1, multispace0, space0, space1},
    combinator::{map, map_res, opt, recognize},
    multi::{many0, many1, separated_list0, separated_list1},
    sequence::{delimited, preceded, separated_pair, terminated, tuple},
    IResult, Parser,
};
 
// ===== Basic Parsers =====
 
fn parse_hello(input: &str) -> IResult<&str, &str> {
    tag("hello")(input)
}
 
fn parse_digits(input: &str) -> IResult<&str, i32> {
    map_res(digit1, |s: &str| s.parse::<i32>())(input)
}
 
fn parse_alpha(input: &str) -> IResult<&str, &str> {
    take_while1(|c: char| c.is_alphabetic())(input)
}
 
fn main() {
    // Parse literal string
    let (remaining, matched) = parse_hello("hello world").unwrap();
    println!("Matched: '{}', Remaining: '{}'", matched, remaining);
    
    // Parse digits to integer
    let (remaining, num) = parse_digits("42 is the answer").unwrap();
    println!("Number: {}, Remaining: '{}'", num, remaining);
}

Parsing Structured Data

use nom::{
    bytes::complete::tag,
    character::complete::{char, digit1, space0},
    combinator::map_res,
    sequence::separated_pair,
    IResult,
};
 
// Parse key=value pairs
fn parse_key_value(input: &str) -> IResult<&str, (&str, &str)> {
    let (input, key) = nom::bytes::complete::take_until("=")(input)?;
    let (input, _) = tag("=")(input)?;
    let (input, value) = nom::bytes::complete::take_until("\n")(input)?;
    Ok((input, (key, value)))
}
 
// Parse "name: age" format
fn parse_person(input: &str) -> IResult<&str, Person> {
    let (input, name) = nom::bytes::complete::take_until(":")(input)?;
    let (input, _) = tag(": ")(input)?;
    let (input, age) = map_res(digit1, |s: &str| s.parse::<u32>())(input)?;
    
    Ok((input, Person { name: name.to_string(), age }))
}
 
#[derive(Debug, Clone)]
struct Person {
    name: String,
    age: u32,
}
 
// More idiomatic using combinators
fn parse_person_idiomatic(input: &str) -> IResult<&str, Person> {
    let (input, (name, age)) = separated_pair(
        nom::bytes::complete::take_until(":"),
        tag(": "),
        map_res(digit1, |s: &str| s.parse::<u32>()),
    )(input)?;
    
    Ok((input, Person { name: name.to_string(), age }))
}

Parsing Expressions (Calculator)

use nom::{
    branch::alt,
    bytes::complete::tag,
    character::complete::{char, digit1, multispace0, space0},
    combinator::{map, map_res},
    sequence::delimited,
    IResult,
};
 
#[derive(Debug, Clone)]
enum Expr {
    Number(i64),
    Add(Box<Expr>, Box<Expr>),
    Sub(Box<Expr>, Box<Expr>),
    Mul(Box<Expr>, Box<Expr>),
    Div(Box<Expr>, Box<Expr>),
}
 
fn parse_number(input: &str) -> IResult<&str, Expr> {
    map_res(digit1, |s: &str| {
        s.parse::<i64>().map(Expr::Number)
    })(input)
}
 
fn parse_parens(input: &str) -> IResult<&str, Expr> {
    delimited(
        char('('),
        parse_expr,
        char(')'),
    )(input)
}
 
fn parse_factor(input: &str) -> IResult<&str, Expr> {
    let (input, _) = multispace0(input)?;
    alt((parse_number, parse_parens))(input)
}
 
fn parse_term(input: &str) -> IResult<&str, Expr> {
    let (input, mut left) = parse_factor(input)?;
    
    let mut input = input;
    loop {
        let (remaining, _) = multispace0(input)?;
        
        if let Ok((rem, _)) = char::<_, nom::error::Error<_>>('*')(remaining) {
            let (rem, _) = multispace0(rem)?;
            let (rem, right) = parse_factor(rem)?;
            left = Expr::Mul(Box::new(left), Box::new(right));
            input = rem;
        } else if let Ok((rem, _)) = char::<_, nom::error::Error<_>>('/')(remaining) {
            let (rem, _) = multispace0(rem)?;
            let (rem, right) = parse_factor(rem)?;
            left = Expr::Div(Box::new(left), Box::new(right));
            input = rem;
        } else {
            break;
        }
    }
    
    Ok((input, left))
}
 
fn parse_expr(input: &str) -> IResult<&str, Expr> {
    let (input, mut left) = parse_term(input)?;
    
    let mut input = input;
    loop {
        let (remaining, _) = multispace0(input)?;
        
        if let Ok((rem, _)) = char::<_, nom::error::Error<_>>('+')(remaining) {
            let (rem, _) = multispace0(rem)?;
            let (rem, right) = parse_term(rem)?;
            left = Expr::Add(Box::new(left), Box::new(right));
            input = rem;
        } else if let Ok((rem, _)) = char::<_, nom::error::Error<_>>('-')(remaining) {
            let (rem, _) = multispace0(rem)?;
            let (rem, right) = parse_term(rem)?;
            left = Expr::Sub(Box::new(left), Box::new(right));
            input = rem;
        } else {
            break;
        }
    }
    
    Ok((input, left))
}
 
fn eval(expr: &Expr) -> i64 {
    match expr {
        Expr::Number(n) => *n,
        Expr::Add(a, b) => eval(a) + eval(b),
        Expr::Sub(a, b) => eval(a) - eval(b),
        Expr::Mul(a, b) => eval(a) * eval(b),
        Expr::Div(a, b) => eval(a) / eval(b),
    }
}
 
fn main() {
    let expressions = vec![
        "42",
        "2 + 3",
        "10 - 4",
        "3 * 4",
        "2 + 3 * 4",
        "(2 + 3) * 4",
        "10 / 2 + 3",
    ];
    
    for expr in expressions {
        match parse_expr(expr) {
            Ok((remaining, parsed)) => {
                println!("{} = {}", expr, eval(&parsed));
            }
            Err(e) => println!("Error parsing '{}': {:?}", expr, e),
        }
    }
}

Parsing JSON-like Configuration

use nom::{
    branch::alt,
    bytes::complete::{tag, take_until, take_while},
    character::complete::{char, digit1, multispace0, none_of},
    combinator::{cut, map, map_res, opt, value},
    multi::{many0, separated_list0},
    sequence::{delimited, preceded, terminated},
    IResult,
};
 
#[derive(Debug, Clone, PartialEq)]
enum JsonValue {
    Null,
    Bool(bool),
    Number(f64),
    String(String),
    Array(Vec<JsonValue>),
    Object(std::collections::HashMap<String, JsonValue>),
}
 
fn parse_null(input: &str) -> IResult<&str, JsonValue> {
    value(JsonValue::Null, tag("null"))(input)
}
 
fn parse_bool(input: &str) -> IResult<&str, JsonValue> {
    alt((
        value(JsonValue::Bool(true), tag("true")),
        value(JsonValue::Bool(false), tag("false")),
    ))(input)
}
 
fn parse_number(input: &str) -> IResult<&str, JsonValue> {
    map_res(
        recognize(|input: &str| {
            let (input, _) = opt(char('-'))(input)?;
            let (input, _) = digit1(input)?;
            let (input, _) = opt(preceded(char('.'), digit1))(input)?;
            let (input, _) = opt(preceded(alt((char('e'), char('E'))), digit1))(input)?;
            Ok((input, ()))
        }),
        |s: &str| s.parse::<f64>().map(JsonValue::Number),
    )(input)
}
 
fn parse_string(input: &str) -> IResult<&str, JsonValue> {
    let (input, _) = char('"')(input)?;
    let (input, content) = many0(none_of("\""))(input)?;
    let (input, _) = char('"')(input)?;
    
    Ok((input, JsonValue::String(content.into_iter().collect())))
}
 
fn parse_array(input: &str) -> IResult<&str, JsonValue> {
    let (input, _) = char('[')(input)?;
    let (input, _) = multispace0(input)?;
    let (input, values) = separated_list0(
        delimited(multispace0, char(','), multispace0),
        parse_json_value,
    )(input)?;
    let (input, _) = multispace0(input)?;
    let (input, _) = char(']')(input)?;
    
    Ok((input, JsonValue::Array(values)))
}
 
fn parse_object(input: &str) -> IResult<&str, JsonValue> {
    let (input, _) = char('{')(input)?;
    let (input, _) = multispace0(input)?;
    
    let (input, pairs) = separated_list0(
        delimited(multispace0, char(','), multispace0),
        |input| {
            let (input, _) = multispace0(input)?;
            let (input, key) = parse_string(input)?;
            let (input, _) = multispace0(input)?;
            let (input, _) = char(':')(input)?;
            let (input, _) = multispace0(input)?;
            let (input, value) = parse_json_value(input)?;
            
            let JsonValue::String(key_str) = key else {
                panic!("Expected string key");
            };
            
            Ok((input, (key_str, value)))
        },
    )(input)?;
    
    let (input, _) = multispace0(input)?;
    let (input, _) = char('}')(input)?;
    
    Ok((input, JsonValue::Object(pairs.into_iter().collect())))
}
 
fn parse_json_value(input: &str) -> IResult<&str, JsonValue> {
    let (input, _) = multispace0(input)?;
    let (input, value) = alt((
        parse_null,
        parse_bool,
        parse_number,
        parse_string,
        parse_array,
        parse_object,
    ))(input)?;
    let (input, _) = multispace0(input)?;
    
    Ok((input, value))
}
 
fn main() {
    let inputs = vec![
        r#"null"#,
        r#"true"#,
        r#"42"#,
        r#""hello""#,
        r#"[1, 2, 3]"#,
        r#"{"name": "Alice", "age": 30}"#,
    ];
    
    for input in inputs {
        match parse_json_value(input) {
            Ok((remaining, value)) => println!("Parsed: {:?}", value),
            Err(e) => println!("Error: {:?}", e),
        }
    }
}

Summary

  • Use tag("literal") to match exact strings and take_while(pred) for variable-length matches
  • Convert parsed strings with map_res(parser, |s| s.parse::<T>())
  • Combine parsers with tuple((parser1, parser2, ...)) for sequences
  • Use alt((parser1, parser2, ...)) for alternatives (OR)
  • separated_list0(sep, item) parses comma-separated lists (zero or more)
  • delimited(open, content, close) wraps content with delimiters like parentheses
  • preceded(before, parser) and terminated(parser, after) ignore parts of input
  • many0(parser) for zero or more, many1(parser) for one or more repetitions
  • Handle whitespace with multispace0 between tokens
  • For expression parsing, handle operator precedence with layered parsers (term < expr)
  • Nom's error types can be extended with custom context for better error messages
  • Use nom::error::VerboseError with nom::combinator::cut for detailed error reporting