How do I parse structured text in Rust?
Walkthrough
Nom is a parser combinator library that builds complex parsers from small, composable functions. Instead of writing grammar files and generating code, you write parsers directly in Rust using combinators—functions that take parsers and return new parsers. Nom is fast, produces zero allocations for many patterns, and handles streaming input gracefully.
Key concepts:
- Parsers — functions taking input and returning
IResult<Input, Output, Error> - Combinators — functions combining parsers (sequence, choice, repetition)
- Input types —
&strfor text,&[u8]for binary - Error handling — rich error types with context
- Streaming/Complete — handle partial or complete input
Nom excels at parsing text protocols, binary formats, configuration files, and domain-specific languages.
Code Example
# Cargo.toml
[dependencies]
nom = "7"use nom::{
IResult,
bytes::complete::{tag, take, take_while},
character::complete::{alpha1, digit1, space0, space1},
sequence::{pair, preceded, separated_pair, tuple},
combinator::map,
};
// Basic parser: parse "hello" from input
fn parse_hello(input: &str) -> IResult<&str, &str> {
tag("hello")(input)
}
fn main() {
// Successful parse
let (remaining, matched) = parse_hello("hello world").unwrap();
println!("Matched: '{}'", matched); // "hello"
println!("Remaining: '{}'", remaining); // " world"
// Failed parse
let result = parse_hello("goodbye");
println!("Result: {:?}", result); // Err
}Parsing Numbers and Identifiers
use nom::{
IResult,
bytes::complete::{tag, take_while},
character::complete::{
alpha1, alphanumeric1, digit1,
space0, space1, multispace0,
char,
},
combinator::{map, map_res, opt, recognize},
sequence::{pair, tuple},
};
// Parse a number string to i32
fn parse_number(input: &str) -> IResult<&str, i32> {
map_res(digit1, |s: &str| s.parse::<i32>())(input)
}
// Parse an identifier (letter followed by alphanumeric)
fn parse_identifier(input: &str) -> IResult<&str, &str> {
recognize(pair(alpha1, alphanumeric0))(input)
}
// Parse a signed integer
fn parse_signed_int(input: &str) -> IResult<&str, i32> {
map_res(
recognize(pair(opt(char('-')), digit1)),
|s: &str| s.parse::<i32>()
)(input)
}
fn alphanumeric0(input: &str) -> IResult<&str, &str> {
take_while(|c: char| c.is_alphanumeric())(input)
}
fn main() {
// Parse number
let (_, num) = parse_number("42 abc").unwrap();
println!("Number: {}", num);
// Parse identifier
let (_, id) = parse_identifier("hello123 world").unwrap();
println!("Identifier: {}", id);
// Parse signed integer
let (_, neg) = parse_signed_int("-42").unwrap();
println!("Negative: {}", neg);
}Sequence and Choice Combinators
use nom::{
IResult,
bytes::complete::tag,
character::complete::{alpha1, space0, space1, char},
sequence::{pair, tuple, preceded, delimited, terminated},
combinator::opt,
branch::alt,
};
// Sequence: parse two things in order
fn parse_key_value(input: &str) -> IResult<&str, (&str, &str)> {
let (input, key) = alpha1(input)?;
let (input, _) = tag("=")(input)?;
let (input, value) = alpha1(input)?;
Ok((input, (key, value)))
}
// Same using tuple combinator
fn parse_key_value_tuple(input: &str) -> IResult<&str, (&str, &str)> {
let (input, (key, _, value)) = tuple((alpha1, tag("="), alpha1))(input)?;
Ok((input, (key, value)))
}
// Delimited: parse content between delimiters
fn parse_quoted(input: &str) -> IResult<&str, &str> {
delimited(char('"'), alpha1, char('"'))(input)
}
// Preceded: skip prefix, return following content
fn parse_label(input: &str) -> IResult<&str, &str> {
preceded(tag("label: "), alpha1)(input)
}
// Terminated: parse content, skip suffix
fn parse_statement(input: &str) -> IResult<&str, &str> {
terminated(alpha1, char(';'))(input)
}
// Choice: try multiple alternatives
fn parse_direction(input: &str) -> IResult<&str, &str> {
alt((tag("up"), tag("down"), tag("left"), tag("right")))(input)
}
fn main() {
let (_, (k, v)) = parse_key_value("name=value").unwrap();
println!("{} = {}", k, v);
let (_, quoted) = parse_quoted("\"hello\"").unwrap();
println!("Quoted: {}", quoted);
let (_, label) = parse_label("label: test").unwrap();
println!("Label: {}", label);
let (_, stmt) = parse_statement("print;").unwrap();
println!("Statement: {}", stmt);
let (_, dir) = parse_direction("up").unwrap();
println!("Direction: {}", dir);
}Repetition and Counting
use nom::{
IResult,
bytes::complete::tag,
character::complete::{alpha1, char, digit1, space0},
multi::{many0, many1, separated_list0, separated_list1, count},
sequence::delimited,
};
// Zero or more occurrences
fn parse_words(input: &str) -> IResult<&str, Vec<&str>> {
many0(alpha1)(input)
}
// One or more with separator
fn parse_csv(input: &str) -> IResult<&str, Vec<&str>> {
separated_list0(tag(","), alpha1)(input)
}
// Parse a list in brackets
fn parse_list(input: &str) -> IResult<&str, Vec<i32>> {
delimited(
char('['),
separated_list0(tag(", "), parse_i32),
char(']')
)(input)
}
fn parse_i32(input: &str) -> IResult<&str, i32> {
nom::combinator::map_res(digit1, |s: &str| s.parse::<i32>())(input)
}
// Parse exactly N items
fn parse_three_numbers(input: &str) -> IResult<&str, Vec<i32>> {
count(parse_i32, 3)(input)
}
fn main() {
let (_, words) = parse_words("abc123").unwrap();
println!("Words: {:?}", words); // ["abc"]
let (_, csv) = parse_csv("a,b,c").unwrap();
println!("CSV: {:?}", csv);
let (_, list) = parse_list("[1, 2, 3]").unwrap();
println!("List: {:?}", list);
let (_, three) = parse_three_numbers("123").unwrap();
println!("Three numbers: {:?}", three);
}Building a Calculator Expression Parser
use nom::{
IResult,
bytes::complete::tag,
character::complete::{char, digit1, space0},
sequence::{delimited, pair},
combinator::{map, map_res},
multi::many0,
};
#[derive(Debug, Clone)]
enum Expr {
Number(i64),
Add(Box<Expr>, Box<Expr>),
Sub(Box<Expr>, Box<Expr>),
Mul(Box<Expr>, Box<Expr>),
Div(Box<Expr>, Box<Expr>),
}
fn parse_number(input: &str) -> IResult<&str, Expr> {
map_res(digit1, |s: &str| {
s.parse::<i64>().map(Expr::Number)
})(input)
}
fn parse_parens(input: &str) -> IResult<&str, Expr> {
delimited(char('('), parse_expr, char(')'))(input)
}
fn parse_atom(input: &str) -> IResult<&str, Expr> {
let (input, _) = space0(input)?;
let (input, expr) = nom::branch::alt((parse_parens, parse_number))(input)?;
let (input, _) = space0(input)?;
Ok((input, expr))
}
// Parse multiplication/division (higher precedence)
fn parse_term(input: &str) -> IResult<&str, Expr> {
let (input, mut left) = parse_atom(input)?;
let (input, ops) = many0(pair(
nom::branch::alt((char('*'), char('/'))),
parse_atom
))(input)?;
for (op, right) in ops {
left = match op {
'*' => Expr::Mul(Box::new(left), Box::new(right)),
'/' => Expr::Div(Box::new(left), Box::new(right)),
_ => unreachable!(),
};
}
Ok((input, left))
}
// Parse addition/subtraction (lower precedence)
fn parse_expr(input: &str) -> IResult<&str, Expr> {
let (input, mut left) = parse_term(input)?;
let (input, ops) = many0(pair(
nom::branch::alt((char('+'), char('-'))),
parse_term
))(input)?;
for (op, right) in ops {
left = match op {
'+' => Expr::Add(Box::new(left), Box::new(right)),
'-' => Expr::Sub(Box::new(left), Box::new(right)),
_ => unreachable!(),
};
}
Ok((input, left))
}
fn evaluate(expr: &Expr) -> i64 {
match expr {
Expr::Number(n) => *n,
Expr::Add(a, b) => evaluate(a) + evaluate(b),
Expr::Sub(a, b) => evaluate(a) - evaluate(b),
Expr::Mul(a, b) => evaluate(a) * evaluate(b),
Expr::Div(a, b) => evaluate(a) / evaluate(b),
}
}
fn main() {
let expressions = vec![
"42",
"2 + 3",
"10 - 4",
"2 * 3 + 4",
"2 + 3 * 4",
"(2 + 3) * 4",
"10 / 2 + 3",
];
for expr in expressions {
let (_, parsed) = parse_expr(expr).unwrap();
let result = evaluate(&parsed);
println!("{} = {}", expr, result);
}
}Parsing Key-Value Configuration
use nom::{
IResult,
bytes::complete::{tag, take_till, take_while},
character::complete::{char, multispace0, not_line_ending},
sequence::{preceded, terminated},
combinator::{map, opt},
multi::many0,
};
use std::collections::HashMap;
// Parse until end of line
fn till_end_of_line(input: &str) -> IResult<&str, &str> {
terminated(
take_till(|c| c == '\n' || c == '\r'),
opt(tag("\r\n"))
)(input)
}
// Parse a comment line
fn parse_comment(input: &str) -> IResult<&str, ()> {
map(
preceded(char('#'), not_line_ending),
|_| ()
)(input)
}
// Parse whitespace and comments
fn parse_ws_or_comment(input: &str) -> IResult<&str, ()> {
map(
many0(preceded(multispace0, opt(parse_comment))),
|_| ()
)(input)
}
// Parse a key-value pair
fn parse_kv(input: &str) -> IResult<&str, (String, String)> {
let (input, _) = multispace0(input)?;
let (input, key) = take_while(|c: char| c.is_alphanumeric() || c == '_')(input)?;
let (input, _) = preceded(multispace0, char('='))(input)?;
let (input, value) = preceded(multispace0, till_end_of_line)(input)?;
Ok((input, (key.to_string(), value.trim().to_string())))
}
// Parse entire config
fn parse_config(input: &str) -> IResult<&str, HashMap<String, String>> {
let (input, _) = parse_ws_or_comment(input)?;
let (input, pairs) = many0(parse_kv)(input)?;
let mut config = HashMap::new();
for (key, value) in pairs {
config.insert(key, value);
}
Ok((input, config))
}
fn main() {
let config_text = r#"
# Server configuration
host = localhost
port = 8080
# Database
database_url = postgres://user:pass@localhost/db
max_connections = 10
"#;
let (_, config) = parse_config(config_text).unwrap();
for (key, value) in &config {
println!("{} = {}", key, value);
}
}Binary Format Parsing
use nom::{
IResult,
bytes::complete::{tag, take},
number::complete::{be_u16, be_u32, le_u16, le_u32},
sequence::tuple,
};
#[derive(Debug)]
struct BinaryHeader {
magic: [u8; 4],
version: u16,
flags: u16,
length: u32,
}
#[derive(Debug)]
struct BinaryRecord {
id: u32,
value: u16,
name: String,
}
fn parse_header(input: &[u8]) -> IResult<&[u8], BinaryHeader> {
let (input, magic_bytes) = take(4usize)(input)?;
let magic: [u8; 4] = magic_bytes.try_into().unwrap();
let (input, version) = le_u16(input)?;
let (input, flags) = le_u16(input)?;
let (input, length) = le_u32(input)?;
Ok((input, BinaryHeader { magic, version, flags, length }))
}
fn parse_record(input: &[u8]) -> IResult<&[u8], BinaryRecord> {
let (input, id) = le_u32(input)?;
let (input, value) = le_u16(input)?;
let (input, name_len) = le_u16(input)?
let (input, name_bytes) = take(name_len as usize)(input)?;
let name = String::from_utf8_lossy(name_bytes).to_string();
Ok((input, BinaryRecord { id, value, name }))
}
fn main() {
// Simulated binary data
let data: Vec<u8> = vec![
b'T', b'E', b'S', b'T', // magic
0x01, 0x00, // version (little-endian u16)
0x00, 0x10, // flags
0x10, 0x00, 0x00, 0x00, // length
];
let (_, header) = parse_header(&data).unwrap();
println!("Header: {:?}", header);
}Error Handling with Context
use nom::{
IResult,
bytes::complete::tag,
character::complete::alpha1,
error::{context, ErrorKind, VerboseError},
};
type Result<'a, T> = IResult<&'a str, T, VerboseError<&'a str>>;
fn parse_greeting(input: &str) -> Result<&str> {
context("greeting",
tag("hello")
)(input)
}
fn parse_name(input: &str) -> Result<&str> {
context("name",
alpha1
)(input)
}
fn parse_full(input: &str) -> Result<(&str, &str)> {
let (input, greeting) = context("full_greeting", parse_greeting)(input)?;
let (input, _) = context("space", tag(" "))(input)?;
let (input, name) = context("full_name", parse_name)(input)?;
Ok((input, (greeting, name)))
}
fn main() {
// Successful parse
match parse_full("hello world") {
Ok((remaining, (greeting, name))) => {
println!("Greeting: {}, Name: {}", greeting, name);
println!("Remaining: '{}'", remaining);
}
Err(e) => println!("Error: {:?}", e),
}
// Failed parse with context
match parse_full("goodbye world") {
Ok(_) => println!("Unexpected success"),
Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => {
println!("Parse error: {:?}", e);
}
Err(nom::Err::Incomplete(_)) => println!("Need more input"),
}
}Summary
- Parsers return
IResult<Input, Output, Error>—Ok((remaining, output))on success - Use
tag("literal")to match exact strings,alpha1/digit1for character classes map(parser, f)transforms parser output with a functionmap_res(parser, f)applies a fallible function (likeparse::<i32>())- Sequence combinators:
tuple,pair,preceded,terminated,delimited - Choice combinator:
alttries multiple parsers in order - Repetition:
many0(zero+),many1(one+),separated_list0/separated_list1 - For recursive grammars, use
Box<Expr>orfnpointers - Binary parsing: use
nom::number::complete::{le_u16, be_u32, ...}for endianness - Add error context with
context("name", parser)andVerboseError - Parser precedence: lower precedence parses higher precedence recursively
- Input can be
&str(text) or&[u8](binary) — parsers adapt automatically - Use
take(n)for fixed-length,take_while(predicate)for variable-length
