How do I parse structured text and data in Rust?
Walkthrough
Nom is a parser combinator library that builds complex parsers from small, reusable functions. Each parser takes input and returns IResult with remaining input and output, or an error. Parsers are composed together to handle increasingly complex formats. Nom works with &str, &[u8], and custom input types.
Core concepts:
- Combinators — functions that combine or transform parsers
- IResult —
Result<(I, O), Err>, whereIis remaining input andOis parsed output - Streaming vs Complete — streaming for chunked input, complete for all-at-once parsing
- Error handling — rich error types with context and location information
Nom excels at parsing protocols, file formats, configuration files, and domain-specific languages.
Code Example
# Cargo.toml
[dependencies]
nom = "7"use nom::{
branch::alt,
bytes::complete::{tag, take_until, take_while, take_while1},
character::complete::{char, digit1, multispace0, space0, space1},
combinator::{map, map_res, opt, recognize},
multi::{many0, many1, separated_list0, separated_list1},
sequence::{delimited, preceded, separated_pair, terminated, tuple},
IResult, Parser,
};
// ===== Basic Parsers =====
fn parse_hello(input: &str) -> IResult<&str, &str> {
tag("hello")(input)
}
fn parse_digits(input: &str) -> IResult<&str, i32> {
map_res(digit1, |s: &str| s.parse::<i32>())(input)
}
fn parse_alpha(input: &str) -> IResult<&str, &str> {
take_while1(|c: char| c.is_alphabetic())(input)
}
fn main() {
// Parse literal string
let (remaining, matched) = parse_hello("hello world").unwrap();
println!("Matched: '{}', Remaining: '{}'", matched, remaining);
// Parse digits to integer
let (remaining, num) = parse_digits("42 is the answer").unwrap();
println!("Number: {}, Remaining: '{}'", num, remaining);
}Parsing Structured Data
use nom::{
bytes::complete::tag,
character::complete::{char, digit1, space0},
combinator::map_res,
sequence::separated_pair,
IResult,
};
// Parse key=value pairs
fn parse_key_value(input: &str) -> IResult<&str, (&str, &str)> {
let (input, key) = nom::bytes::complete::take_until("=")(input)?;
let (input, _) = tag("=")(input)?;
let (input, value) = nom::bytes::complete::take_until("\n")(input)?;
Ok((input, (key, value)))
}
// Parse "name: age" format
fn parse_person(input: &str) -> IResult<&str, Person> {
let (input, name) = nom::bytes::complete::take_until(":")(input)?;
let (input, _) = tag(": ")(input)?;
let (input, age) = map_res(digit1, |s: &str| s.parse::<u32>())(input)?;
Ok((input, Person { name: name.to_string(), age }))
}
#[derive(Debug, Clone)]
struct Person {
name: String,
age: u32,
}
// More idiomatic using combinators
fn parse_person_idiomatic(input: &str) -> IResult<&str, Person> {
let (input, (name, age)) = separated_pair(
nom::bytes::complete::take_until(":"),
tag(": "),
map_res(digit1, |s: &str| s.parse::<u32>()),
)(input)?;
Ok((input, Person { name: name.to_string(), age }))
}Parsing Expressions (Calculator)
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::{char, digit1, multispace0, space0},
combinator::{map, map_res},
sequence::delimited,
IResult,
};
#[derive(Debug, Clone)]
enum Expr {
Number(i64),
Add(Box<Expr>, Box<Expr>),
Sub(Box<Expr>, Box<Expr>),
Mul(Box<Expr>, Box<Expr>),
Div(Box<Expr>, Box<Expr>),
}
fn parse_number(input: &str) -> IResult<&str, Expr> {
map_res(digit1, |s: &str| {
s.parse::<i64>().map(Expr::Number)
})(input)
}
fn parse_parens(input: &str) -> IResult<&str, Expr> {
delimited(
char('('),
parse_expr,
char(')'),
)(input)
}
fn parse_factor(input: &str) -> IResult<&str, Expr> {
let (input, _) = multispace0(input)?;
alt((parse_number, parse_parens))(input)
}
fn parse_term(input: &str) -> IResult<&str, Expr> {
let (input, mut left) = parse_factor(input)?;
let mut input = input;
loop {
let (remaining, _) = multispace0(input)?;
if let Ok((rem, _)) = char::<_, nom::error::Error<_>>('*')(remaining) {
let (rem, _) = multispace0(rem)?;
let (rem, right) = parse_factor(rem)?;
left = Expr::Mul(Box::new(left), Box::new(right));
input = rem;
} else if let Ok((rem, _)) = char::<_, nom::error::Error<_>>('/')(remaining) {
let (rem, _) = multispace0(rem)?;
let (rem, right) = parse_factor(rem)?;
left = Expr::Div(Box::new(left), Box::new(right));
input = rem;
} else {
break;
}
}
Ok((input, left))
}
fn parse_expr(input: &str) -> IResult<&str, Expr> {
let (input, mut left) = parse_term(input)?;
let mut input = input;
loop {
let (remaining, _) = multispace0(input)?;
if let Ok((rem, _)) = char::<_, nom::error::Error<_>>('+')(remaining) {
let (rem, _) = multispace0(rem)?;
let (rem, right) = parse_term(rem)?;
left = Expr::Add(Box::new(left), Box::new(right));
input = rem;
} else if let Ok((rem, _)) = char::<_, nom::error::Error<_>>('-')(remaining) {
let (rem, _) = multispace0(rem)?;
let (rem, right) = parse_term(rem)?;
left = Expr::Sub(Box::new(left), Box::new(right));
input = rem;
} else {
break;
}
}
Ok((input, left))
}
fn eval(expr: &Expr) -> i64 {
match expr {
Expr::Number(n) => *n,
Expr::Add(a, b) => eval(a) + eval(b),
Expr::Sub(a, b) => eval(a) - eval(b),
Expr::Mul(a, b) => eval(a) * eval(b),
Expr::Div(a, b) => eval(a) / eval(b),
}
}
fn main() {
let expressions = vec![
"42",
"2 + 3",
"10 - 4",
"3 * 4",
"2 + 3 * 4",
"(2 + 3) * 4",
"10 / 2 + 3",
];
for expr in expressions {
match parse_expr(expr) {
Ok((remaining, parsed)) => {
println!("{} = {}", expr, eval(&parsed));
}
Err(e) => println!("Error parsing '{}': {:?}", expr, e),
}
}
}Parsing JSON-like Configuration
use nom::{
branch::alt,
bytes::complete::{tag, take_until, take_while},
character::complete::{char, digit1, multispace0, none_of},
combinator::{cut, map, map_res, opt, value},
multi::{many0, separated_list0},
sequence::{delimited, preceded, terminated},
IResult,
};
#[derive(Debug, Clone, PartialEq)]
enum JsonValue {
Null,
Bool(bool),
Number(f64),
String(String),
Array(Vec<JsonValue>),
Object(std::collections::HashMap<String, JsonValue>),
}
fn parse_null(input: &str) -> IResult<&str, JsonValue> {
value(JsonValue::Null, tag("null"))(input)
}
fn parse_bool(input: &str) -> IResult<&str, JsonValue> {
alt((
value(JsonValue::Bool(true), tag("true")),
value(JsonValue::Bool(false), tag("false")),
))(input)
}
fn parse_number(input: &str) -> IResult<&str, JsonValue> {
map_res(
recognize(|input: &str| {
let (input, _) = opt(char('-'))(input)?;
let (input, _) = digit1(input)?;
let (input, _) = opt(preceded(char('.'), digit1))(input)?;
let (input, _) = opt(preceded(alt((char('e'), char('E'))), digit1))(input)?;
Ok((input, ()))
}),
|s: &str| s.parse::<f64>().map(JsonValue::Number),
)(input)
}
fn parse_string(input: &str) -> IResult<&str, JsonValue> {
let (input, _) = char('"')(input)?;
let (input, content) = many0(none_of("\""))(input)?;
let (input, _) = char('"')(input)?;
Ok((input, JsonValue::String(content.into_iter().collect())))
}
fn parse_array(input: &str) -> IResult<&str, JsonValue> {
let (input, _) = char('[')(input)?;
let (input, _) = multispace0(input)?;
let (input, values) = separated_list0(
delimited(multispace0, char(','), multispace0),
parse_json_value,
)(input)?;
let (input, _) = multispace0(input)?;
let (input, _) = char(']')(input)?;
Ok((input, JsonValue::Array(values)))
}
fn parse_object(input: &str) -> IResult<&str, JsonValue> {
let (input, _) = char('{')(input)?;
let (input, _) = multispace0(input)?;
let (input, pairs) = separated_list0(
delimited(multispace0, char(','), multispace0),
|input| {
let (input, _) = multispace0(input)?;
let (input, key) = parse_string(input)?;
let (input, _) = multispace0(input)?;
let (input, _) = char(':')(input)?;
let (input, _) = multispace0(input)?;
let (input, value) = parse_json_value(input)?;
let JsonValue::String(key_str) = key else {
panic!("Expected string key");
};
Ok((input, (key_str, value)))
},
)(input)?;
let (input, _) = multispace0(input)?;
let (input, _) = char('}')(input)?;
Ok((input, JsonValue::Object(pairs.into_iter().collect())))
}
fn parse_json_value(input: &str) -> IResult<&str, JsonValue> {
let (input, _) = multispace0(input)?;
let (input, value) = alt((
parse_null,
parse_bool,
parse_number,
parse_string,
parse_array,
parse_object,
))(input)?;
let (input, _) = multispace0(input)?;
Ok((input, value))
}
fn main() {
let inputs = vec![
r#"null"#,
r#"true"#,
r#"42"#,
r#""hello""#,
r#"[1, 2, 3]"#,
r#"{"name": "Alice", "age": 30}"#,
];
for input in inputs {
match parse_json_value(input) {
Ok((remaining, value)) => println!("Parsed: {:?}", value),
Err(e) => println!("Error: {:?}", e),
}
}
}Summary
- Use
tag("literal")to match exact strings andtake_while(pred)for variable-length matches - Convert parsed strings with
map_res(parser, |s| s.parse::<T>()) - Combine parsers with
tuple((parser1, parser2, ...))for sequences - Use
alt((parser1, parser2, ...))for alternatives (OR) separated_list0(sep, item)parses comma-separated lists (zero or more)delimited(open, content, close)wraps content with delimiters like parenthesespreceded(before, parser)andterminated(parser, after)ignore parts of inputmany0(parser)for zero or more,many1(parser)for one or more repetitions- Handle whitespace with
multispace0between tokens - For expression parsing, handle operator precedence with layered parsers (term < expr)
- Nom's error types can be extended with custom context for better error messages
- Use
nom::error::VerboseErrorwithnom::combinator::cutfor detailed error reporting
