Loading page…
Rust walkthroughs
Loading page…
Nom is a parser combinator library that builds complex parsers from small, composable functions. Instead of writing grammar files and generating code, you write parsers directly in Rust using combinators—functions that take parsers and return new parsers. Nom is fast, produces zero allocations for many patterns, and handles streaming input gracefully.
Key concepts:
IResult<Input, Output, Error>&str for text, &[u8] for binaryNom excels at parsing text protocols, binary formats, configuration files, and domain-specific languages.
# Cargo.toml
[dependencies]
nom = "7"use nom::{
IResult,
bytes::complete::{tag, take, take_while},
character::complete::{alpha1, digit1, space0, space1},
sequence::{pair, preceded, separated_pair, tuple},
combinator::map,
};
// Basic parser: parse "hello" from input
fn parse_hello(input: &str) -> IResult<&str, &str> {
tag("hello")(input)
}
fn main() {
// Successful parse
let (remaining, matched) = parse_hello("hello world").unwrap();
println!("Matched: '{}'", matched); // "hello"
println!("Remaining: '{}'", remaining); // " world"
// Failed parse
let result = parse_hello("goodbye");
println!("Result: {:?}", result); // Err
}use nom::{
IResult,
bytes::complete::{tag, take_while},
character::complete::{
alpha1, alphanumeric1, digit1,
space0, space1, multispace0,
char,
},
combinator::{map, map_res, opt, recognize},
sequence::{pair, tuple},
};
// Parse a number string to i32
fn parse_number(input: &str) -> IResult<&str, i32> {
map_res(digit1, |s: &str| s.parse::<i32>())(input)
}
// Parse an identifier (letter followed by alphanumeric)
fn parse_identifier(input: &str) -> IResult<&str, &str> {
recognize(pair(alpha1, alphanumeric0))(input)
}
// Parse a signed integer
fn parse_signed_int(input: &str) -> IResult<&str, i32> {
map_res(
recognize(pair(opt(char('-')), digit1)),
|s: &str| s.parse::<i32>()
)(input)
}
fn alphanumeric0(input: &str) -> IResult<&str, &str> {
take_while(|c: char| c.is_alphanumeric())(input)
}
fn main() {
// Parse number
let (_, num) = parse_number("42 abc").unwrap();
println!("Number: {}", num);
// Parse identifier
let (_, id) = parse_identifier("hello123 world").unwrap();
println!("Identifier: {}", id);
// Parse signed integer
let (_, neg) = parse_signed_int("-42").unwrap();
println!("Negative: {}", neg);
}use nom::{
IResult,
bytes::complete::tag,
character::complete::{alpha1, space0, space1, char},
sequence::{pair, tuple, preceded, delimited, terminated},
combinator::opt,
branch::alt,
};
// Sequence: parse two things in order
fn parse_key_value(input: &str) -> IResult<&str, (&str, &str)> {
let (input, key) = alpha1(input)?;
let (input, _) = tag("=")(input)?;
let (input, value) = alpha1(input)?;
Ok((input, (key, value)))
}
// Same using tuple combinator
fn parse_key_value_tuple(input: &str) -> IResult<&str, (&str, &str)> {
let (input, (key, _, value)) = tuple((alpha1, tag("="), alpha1))(input)?;
Ok((input, (key, value)))
}
// Delimited: parse content between delimiters
fn parse_quoted(input: &str) -> IResult<&str, &str> {
delimited(char('"'), alpha1, char('"'))(input)
}
// Preceded: skip prefix, return following content
fn parse_label(input: &str) -> IResult<&str, &str> {
preceded(tag("label: "), alpha1)(input)
}
// Terminated: parse content, skip suffix
fn parse_statement(input: &str) -> IResult<&str, &str> {
terminated(alpha1, char(';'))(input)
}
// Choice: try multiple alternatives
fn parse_direction(input: &str) -> IResult<&str, &str> {
alt((tag("up"), tag("down"), tag("left"), tag("right")))(input)
}
fn main() {
let (_, (k, v)) = parse_key_value("name=value").unwrap();
println!("{} = {}", k, v);
let (_, quoted) = parse_quoted("\"hello\"").unwrap();
println!("Quoted: {}", quoted);
let (_, label) = parse_label("label: test").unwrap();
println!("Label: {}", label);
let (_, stmt) = parse_statement("print;").unwrap();
println!("Statement: {}", stmt);
let (_, dir) = parse_direction("up").unwrap();
println!("Direction: {}", dir);
}use nom::{
IResult,
bytes::complete::tag,
character::complete::{alpha1, char, digit1, space0},
multi::{many0, many1, separated_list0, separated_list1, count},
sequence::delimited,
};
// Zero or more occurrences
fn parse_words(input: &str) -> IResult<&str, Vec<&str>> {
many0(alpha1)(input)
}
// One or more with separator
fn parse_csv(input: &str) -> IResult<&str, Vec<&str>> {
separated_list0(tag(","), alpha1)(input)
}
// Parse a list in brackets
fn parse_list(input: &str) -> IResult<&str, Vec<i32>> {
delimited(
char('['),
separated_list0(tag(", "), parse_i32),
char(']')
)(input)
}
fn parse_i32(input: &str) -> IResult<&str, i32> {
nom::combinator::map_res(digit1, |s: &str| s.parse::<i32>())(input)
}
// Parse exactly N items
fn parse_three_numbers(input: &str) -> IResult<&str, Vec<i32>> {
count(parse_i32, 3)(input)
}
fn main() {
let (_, words) = parse_words("abc123").unwrap();
println!("Words: {:?}", words); // ["abc"]
let (_, csv) = parse_csv("a,b,c").unwrap();
println!("CSV: {:?}", csv);
let (_, list) = parse_list("[1, 2, 3]").unwrap();
println!("List: {:?}", list);
let (_, three) = parse_three_numbers("123").unwrap();
println!("Three numbers: {:?}", three);
}use nom::{
IResult,
bytes::complete::tag,
character::complete::{char, digit1, space0},
sequence::{delimited, pair},
combinator::{map, map_res},
multi::many0,
};
#[derive(Debug, Clone)]
enum Expr {
Number(i64),
Add(Box<Expr>, Box<Expr>),
Sub(Box<Expr>, Box<Expr>),
Mul(Box<Expr>, Box<Expr>),
Div(Box<Expr>, Box<Expr>),
}
fn parse_number(input: &str) -> IResult<&str, Expr> {
map_res(digit1, |s: &str| {
s.parse::<i64>().map(Expr::Number)
})(input)
}
fn parse_parens(input: &str) -> IResult<&str, Expr> {
delimited(char('('), parse_expr, char(')'))(input)
}
fn parse_atom(input: &str) -> IResult<&str, Expr> {
let (input, _) = space0(input)?;
let (input, expr) = nom::branch::alt((parse_parens, parse_number))(input)?;
let (input, _) = space0(input)?;
Ok((input, expr))
}
// Parse multiplication/division (higher precedence)
fn parse_term(input: &str) -> IResult<&str, Expr> {
let (input, mut left) = parse_atom(input)?;
let (input, ops) = many0(pair(
nom::branch::alt((char('*'), char('/'))),
parse_atom
))(input)?;
for (op, right) in ops {
left = match op {
'*' => Expr::Mul(Box::new(left), Box::new(right)),
'/' => Expr::Div(Box::new(left), Box::new(right)),
_ => unreachable!(),
};
}
Ok((input, left))
}
// Parse addition/subtraction (lower precedence)
fn parse_expr(input: &str) -> IResult<&str, Expr> {
let (input, mut left) = parse_term(input)?;
let (input, ops) = many0(pair(
nom::branch::alt((char('+'), char('-'))),
parse_term
))(input)?;
for (op, right) in ops {
left = match op {
'+' => Expr::Add(Box::new(left), Box::new(right)),
'-' => Expr::Sub(Box::new(left), Box::new(right)),
_ => unreachable!(),
};
}
Ok((input, left))
}
fn evaluate(expr: &Expr) -> i64 {
match expr {
Expr::Number(n) => *n,
Expr::Add(a, b) => evaluate(a) + evaluate(b),
Expr::Sub(a, b) => evaluate(a) - evaluate(b),
Expr::Mul(a, b) => evaluate(a) * evaluate(b),
Expr::Div(a, b) => evaluate(a) / evaluate(b),
}
}
fn main() {
let expressions = vec![
"42",
"2 + 3",
"10 - 4",
"2 * 3 + 4",
"2 + 3 * 4",
"(2 + 3) * 4",
"10 / 2 + 3",
];
for expr in expressions {
let (_, parsed) = parse_expr(expr).unwrap();
let result = evaluate(&parsed);
println!("{} = {}", expr, result);
}
}use nom::{
IResult,
bytes::complete::{tag, take_till, take_while},
character::complete::{char, multispace0, not_line_ending},
sequence::{preceded, terminated},
combinator::{map, opt},
multi::many0,
};
use std::collections::HashMap;
// Parse until end of line
fn till_end_of_line(input: &str) -> IResult<&str, &str> {
terminated(
take_till(|c| c == '\n' || c == '\r'),
opt(tag("\r\n"))
)(input)
}
// Parse a comment line
fn parse_comment(input: &str) -> IResult<&str, ()> {
map(
preceded(char('#'), not_line_ending),
|_| ()
)(input)
}
// Parse whitespace and comments
fn parse_ws_or_comment(input: &str) -> IResult<&str, ()> {
map(
many0(preceded(multispace0, opt(parse_comment))),
|_| ()
)(input)
}
// Parse a key-value pair
fn parse_kv(input: &str) -> IResult<&str, (String, String)> {
let (input, _) = multispace0(input)?;
let (input, key) = take_while(|c: char| c.is_alphanumeric() || c == '_')(input)?;
let (input, _) = preceded(multispace0, char('='))(input)?;
let (input, value) = preceded(multispace0, till_end_of_line)(input)?;
Ok((input, (key.to_string(), value.trim().to_string())))
}
// Parse entire config
fn parse_config(input: &str) -> IResult<&str, HashMap<String, String>> {
let (input, _) = parse_ws_or_comment(input)?;
let (input, pairs) = many0(parse_kv)(input)?;
let mut config = HashMap::new();
for (key, value) in pairs {
config.insert(key, value);
}
Ok((input, config))
}
fn main() {
let config_text = r#"
# Server configuration
host = localhost
port = 8080
# Database
database_url = postgres://user:pass@localhost/db
max_connections = 10
"#;
let (_, config) = parse_config(config_text).unwrap();
for (key, value) in &config {
println!("{} = {}", key, value);
}
}use nom::{
IResult,
bytes::complete::{tag, take},
number::complete::{be_u16, be_u32, le_u16, le_u32},
sequence::tuple,
};
#[derive(Debug)]
struct BinaryHeader {
magic: [u8; 4],
version: u16,
flags: u16,
length: u32,
}
#[derive(Debug)]
struct BinaryRecord {
id: u32,
value: u16,
name: String,
}
fn parse_header(input: &[u8]) -> IResult<&[u8], BinaryHeader> {
let (input, magic_bytes) = take(4usize)(input)?;
let magic: [u8; 4] = magic_bytes.try_into().unwrap();
let (input, version) = le_u16(input)?;
let (input, flags) = le_u16(input)?;
let (input, length) = le_u32(input)?;
Ok((input, BinaryHeader { magic, version, flags, length }))
}
fn parse_record(input: &[u8]) -> IResult<&[u8], BinaryRecord> {
let (input, id) = le_u32(input)?;
let (input, value) = le_u16(input)?;
let (input, name_len) = le_u16(input)?
let (input, name_bytes) = take(name_len as usize)(input)?;
let name = String::from_utf8_lossy(name_bytes).to_string();
Ok((input, BinaryRecord { id, value, name }))
}
fn main() {
// Simulated binary data
let data: Vec<u8> = vec![
b'T', b'E', b'S', b'T', // magic
0x01, 0x00, // version (little-endian u16)
0x00, 0x10, // flags
0x10, 0x00, 0x00, 0x00, // length
];
let (_, header) = parse_header(&data).unwrap();
println!("Header: {:?}", header);
}use nom::{
IResult,
bytes::complete::tag,
character::complete::alpha1,
error::{context, ErrorKind, VerboseError},
};
type Result<'a, T> = IResult<&'a str, T, VerboseError<&'a str>>;
fn parse_greeting(input: &str) -> Result<&str> {
context("greeting",
tag("hello")
)(input)
}
fn parse_name(input: &str) -> Result<&str> {
context("name",
alpha1
)(input)
}
fn parse_full(input: &str) -> Result<(&str, &str)> {
let (input, greeting) = context("full_greeting", parse_greeting)(input)?;
let (input, _) = context("space", tag(" "))(input)?;
let (input, name) = context("full_name", parse_name)(input)?;
Ok((input, (greeting, name)))
}
fn main() {
// Successful parse
match parse_full("hello world") {
Ok((remaining, (greeting, name))) => {
println!("Greeting: {}, Name: {}", greeting, name);
println!("Remaining: '{}'", remaining);
}
Err(e) => println!("Error: {:?}", e),
}
// Failed parse with context
match parse_full("goodbye world") {
Ok(_) => println!("Unexpected success"),
Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => {
println!("Parse error: {:?}", e);
}
Err(nom::Err::Incomplete(_)) => println!("Need more input"),
}
}IResult<Input, Output, Error> — Ok((remaining, output)) on successtag("literal") to match exact strings, alpha1/digit1 for character classesmap(parser, f) transforms parser output with a functionmap_res(parser, f) applies a fallible function (like parse::<i32>())tuple, pair, preceded, terminated, delimitedalt tries multiple parsers in ordermany0 (zero+), many1 (one+), separated_list0/separated_list1Box<Expr> or fn pointersnom::number::complete::{le_u16, be_u32, ...} for endiannesscontext("name", parser) and VerboseError&str (text) or &[u8] (binary) — parsers adapt automaticallytake(n) for fixed-length, take_while(predicate) for variable-length