How do I parse text with nom in Rust?

Walkthrough

The nom crate is a parser combinator library that lets you build powerful parsers by combining small, reusable parsing functions. Instead of writing a monolithic parser, you compose parsers like tag(), alphanumeric1, separated_list0(), and map() to handle complex input formats. Nom works with both text and binary data, handles errors gracefully, and produces zero-allocation parsers when possible. It's ideal for parsing configuration files, network protocols, programming languages, custom data formats, and structured text.

Key concepts:

  1. Parser combinators — small parsers combined to build complex ones
  2. IResult<Input, Output> — the result type: Ok((remaining, output)) or Err
  3. Built-in parserstag, alphanumeric1, digit1, space1, etc.
  4. Combinatorsmap, flat_map, and_then, alt, many0, many1
  5. Error handling — detailed error types with context

Code Example

# Cargo.toml
[dependencies]
nom = "7"
use nom::{
    bytes::complete::tag,
    sequence::preceded,
    character::complete::alphanumeric1,
    IResult,
};
 
fn parse_greeting(input: &str) -> IResult<&str, &str> {
    let (input, _) = tag("Hello, ")(input)?;
    let (input, name) = alphanumeric1(input)?;
    Ok((input, name))
}
 
fn main() {
    let result = parse_greeting("Hello, World!");
    println!("{:?}", result); // Ok(("!", "World"))
}

Understanding IResult

use nom::{
    bytes::complete::tag,
    character::complete::digit1,
    IResult, error::Error,
};
 
fn main() {
    // IResult<Input, Output> = Result<(Input, Output), Error<Input>>
    // Success: Ok((remaining_input, parsed_output))
    // Failure: Err(Error)
    
    // Simple tag parser
    let result: IResult<&str, &str> = tag("hello")("hello world");
    match result {
        Ok((remaining, matched)) => {
            println!("Matched: '{}'", matched);
            println!("Remaining: '{}'", remaining);
        }
        Err(e) => {
            println!("Error: {:?}", e);
        }
    }
    
    // Failed parse
    let failed = tag("hello")("goodbye");
    println!("\nFailed parse: {:?}", failed);
}

Basic Parsers

use nom::{
    bytes::complete::{tag, take, take_while, take_while1},
    character::complete::{alphanumeric1, digit1, space0, space1, newline},
    IResult,
};
 
fn main() {
    // tag - match exact string
    let (rem, matched) = tag("abc")("abcdef").unwrap();
    println!("tag: matched='{}', remaining='{}'", matched, rem);
    
    // take - take exactly n bytes/chars
    let (rem, taken) = take(3u8)("abcdef").unwrap();
    println!("take(3): '{}', remaining='{}'", taken, rem);
    
    // alphanumeric1 - one or more alphanumeric chars
    let (rem, alpha) = alphanumeric1("hello123!!!").unwrap();
    println!("alphanumeric1: '{}', remaining='{}'", alpha, rem);
    
    // digit1 - one or more digits
    let (rem, digits) = digit1("12345abc").unwrap();
    println!("digit1: '{}', remaining='{}'", digits, rem);
    
    // space0 - zero or more spaces
    let (rem, spaces) = space0("   text").unwrap();
    println!("space0: {} spaces, remaining='{}'", spaces.len(), rem);
    
    // take_while - take while predicate is true
    let (rem, vowels) = take_while(|c: char| "aeiou".contains(c))("aeioubcd").unwrap();
    println!("take_while vowels: '{}', remaining='{}'", vowels, rem);
}

Sequence Combinators

use nom::{
    bytes::complete::tag,
    sequence::{tuple, pair, preceded, terminated, delimited, separated_pair},
    character::complete::alphanumeric1,
    IResult,
};
 
fn main() {
    // tuple - parse multiple things in sequence
    let parser = tuple((tag("abc"), tag("123"), tag("xyz")));
    let (rem, (a, b, c)) = parser("abc123xyz").unwrap();
    println!("tuple: '{}, {}, {}', remaining='{}'", a, b, c, rem);
    
    // pair - parse two things
    let parser = pair(tag("hello"), tag("world"));
    let (rem, (a, b)) = parser("helloworld").unwrap();
    println!("pair: '{}, {}'", a, b);
    
    // preceded - skip first, return second
    let parser = preceded(tag("prefix"), alphanumeric1);
    let (rem, result) = parser("prefixdata").unwrap();
    println!("preceded: '{}', remaining='{}'", result, rem);
    
    // terminated - return first, skip second
    let parser = terminated(alphanumeric1, tag(";"));
    let (rem, result) = parser("data;more").unwrap();
    println!("terminated: '{}', remaining='{}'", result, rem);
    
    // delimited - skip first and third, return middle
    let parser = delimited(tag("(") , alphanumeric1, tag(")"));
    let (rem, result) = parser("(content)rest").unwrap();
    println!("delimited: '{}', remaining='{}'", result, rem);
    
    // separated_pair - return both, skip separator
    let parser = separated_pair(alphanumeric1, tag(":"), alphanumeric1);
    let (rem, (key, value)) = parser("key:value").unwrap();
    println!("separated_pair: '{}', '{}'", key, value);
}

Choice Combinators

use nom::{
    bytes::complete::tag,
    branch::alt,
    character::complete::{digit1, alphanumeric1},
    IResult,
};
 
fn main() {
    // alt - try multiple parsers, use first that succeeds
    let parser = alt((tag("abc"), tag("123"), tag("xyz")));
    
    let (rem, result) = parser("abc").unwrap();
    println!("alt(abc): '{}'", result);
    
    let (rem, result) = parser("123").unwrap();
    println!("alt(123): '{}'", result);
    
    let (rem, result) = parser("xyz").unwrap();
    println!("alt(xyz): '{}'", result);
    
    // Combining with different types requires map
    let parser = alt((
        map_res(digit1, |s: &str| s.parse::<i32>()),
        map_res(alphanumeric1, |_| Ok::<_, std::num::ParseIntError>(-1)),
    ));
}
 
fn map_res<I, O1, O2, E, F, G>(parser: F, f: G) -> impl FnMut(I) -> IResult<I, O2, E>
where
    F: nom::Parser<I, O1, E>,
    G: Fn(O1) -> Result<O2, E>,
{
    nom::combinator::map_res(parser, f)
}

Repetition Combinators

use nom::{
    bytes::complete::tag,
    multi::{many0, many1, many_m_n, separated_list0, separated_list1},
    character::complete::digit1,
    sequence::delimited,
    IResult,
};
 
fn main() {
    // many0 - zero or more occurrences
    let parser = many0(tag("ab"));
    let (rem, result) = parser("abababxyz").unwrap();
    println!("many0: {:?}, remaining='{}'", result, rem);
    
    // Empty is also valid
    let (rem, result) = parser("xyz").unwrap();
    println!("many0 (empty): {:?}", result);
    
    // many1 - one or more occurrences (fails if none)
    let parser = many1(tag("ab"));
    let (rem, result) = parser("abab").unwrap();
    println!("many1: {:?}", result);
    
    // many_m_n - between m and n occurrences
    let parser = many_m_n(2, 4, tag("x"));
    let (rem, result) = parser("xxxxx").unwrap();
    println!("many_m_n(2,4): {:?}, remaining='{}'", result, rem);
    
    // separated_list0 - list with separator (zero or more)
    let parser = separated_list0(tag(","), digit1);
    let (rem, result) = parser("1,2,3,4").unwrap();
    println!("separated_list0: {:?}", result);
    
    // separated_list1 - list with separator (one or more)
    let parser = separated_list1(tag(","), digit1);
    let (rem, result) = parser("a,b,c").unwrap(); // This won't work
    println!("separated_list1 with digits: {:?}", result);
}

Transforming Output

use nom::{
    bytes::complete::tag,
    character::complete::digit1,
    combinator::{map, map_res, map_parser, value, opt},
    sequence::tuple,
    IResult,
};
 
fn main() {
    // map - transform the output
    let parser = map(digit1, |s: &str| s.len());
    let (rem, result) = parser("12345abc").unwrap();
    println!("map (length): {}, remaining='{}'", result, rem);
    
    // map_res - transform with fallible function
    let parser = map_res(digit1, |s: &str| s.parse::<i32>());
    let (rem, result) = parser("42abc").unwrap();
    println!("map_res (parse): {}, remaining='{}'", result, rem);
    
    // value - return a constant value on success
    let parser = value(42, tag("answer"));
    let (rem, result) = parser("answer!").unwrap();
    println!("value: {}, remaining='{}'", result, rem);
    
    // opt - make parser optional
    let parser = tuple((digit1, opt(tag("abc"))));
    let (rem, (num, suffix)) = parser("123abc").unwrap();
    println!("opt (with): num='{}', suffix={:?}", num, suffix);
    
    let (rem, (num, suffix)) = parser("123xyz").unwrap();
    println!("opt (without): num='{}', suffix={:?}", num, suffix);
}

Parsing Numbers

use nom::{
    character::complete::{digit1, char, one_of},
    combinator::{map_res, recognize, opt},
    sequence::tuple,
    bytes::complete::take_while_m,
    IResult,
};
 
// Parse an integer
fn parse_int(input: &str) -> IResult<&str, i64> {
    map_res(
        recognize(tuple((
            opt(char('-')),
            digit1
        ))),
        |s: &str| s.parse()
    )(input)
}
 
// Parse a float
fn parse_float(input: &str) -> IResult<&str, f64> {
    map_res(
        recognize(tuple((
            opt(char('-')),
            digit1,
            opt(tuple((
                char('.'),
                digit1
            ))),
            opt(tuple((
                one_of("eE"),
                opt(one_of("+-")),
                digit1
            )))
        ))),
        |s: &str| s.parse()
    )(input)
}
 
fn main() {
    let (rem, num) = parse_int("-42abc").unwrap();
    println!("Int: {}, remaining='{}'", num, rem);
    
    let (rem, num) = parse_float("3.14end").unwrap();
    println!("Float: {}, remaining='{}'", num, rem);
    
    let (rem, num) = parse_float("-2.5e-3xyz").unwrap();
    println!("Scientific: {}, remaining='{}'", num, rem);
}

Parsing Strings with Escapes

use nom::{
    bytes::complete::{tag, take_till, take_while},
    character::complete::{char, satisfy},
    sequence::delimited,
    combinator::{map, recognize, verify},
    branch::alt,
    IResult,
};
 
fn parse_escaped_char(input: &str) -> IResult<&str, char> {
    preceded(char('\\'), alt((
        map(char('n'), |_| '\n'),
        map(char('t'), |_| '\t'),
        map(char('r'), |_| '\r'),
        map(char('\\'), |_| '\\'),
        map(char('"'), _| '"'),
        // Unicode: \u{XXXX}
        preceded(
            tag("u{"),
            map_res(
                verify(
                    take_while(|c: char| c.is_ascii_hexdigit()),
                    |s: &str| s.len() <= 4
                ),
                |s: &str| {
                    u32::from_str_radix(s, 16)
                        .map(|n| char::from_u32(n).unwrap_or(''))
                }
            )
        ).terminated(char('}')),
    )))(input)
}
 
use nom::sequence::preceded;
use nom::combinator::value;
use nom::multi::fold_many0;
use nom::sequence::terminated;
 
fn parse_string_content(input: &str) -> IResult<&str, String> {
    fold_many0(
        alt((
            map_res(take_till1(|c| c == '"' || c == '\\'), |s: &str| s.to_owned()),
            map(parse_escaped_char, |c| c.to_string()),
        )),
        String::new,
        |mut acc, s| {
            acc.push_str(&s);
            acc
        }
    )(input)
}
 
fn parse_string(input: &str) -> IResult<&str, String> {
    delimited(char('"'), parse_string_content, char('"'))(input)
}
 
fn take_till1<T, Input, Error: nom::error::ParseError<Input>>(
    cond: T,
) -> impl FnMut(Input) -> IResult<Input, Input, Error>
where
    T: Fn(Input::Item) -> bool,
    Input: nom::InputTakeAtPosition,
{
    nom::bytes::complete::take_till1(cond)
}
 
fn main() {
    let (rem, s) = parse_string("\"hello world\"rest").unwrap();
    println!("String: '{}', remaining='{}'", s, rem);
    
    let (rem, s) = parse_string("\"line1\\nline2\"").unwrap();
    println!("Escaped: '{}'", s);
}

Parsing Key-Value Pairs

use nom::{
    bytes::complete::tag,
    character::complete::{alphanumeric1, space0, not_line_ending, newline},
    sequence::{separated_pair, terminated, preceded},
    multi::many1,
    combinator::map,
    IResult,
};
 
#[derive(Debug)]
struct KeyValue {
    key: String,
    value: String,
}
 
fn parse_line(input: &str) -> IResult<&str, KeyValue> {
    map(
        terminated(
            separated_pair(
                alphanumeric1,
                preceded(space0, tag("=")),
                preceded(space0, not_line_ending)
            ),
            newline
        ),
        |(key, value)| KeyValue {
            key: key.to_string(),
            value: value.trim().to_string(),
        }
    )(input)
}
 
fn parse_config(input: &str) -> IResult<&str, Vec<KeyValue>> {
    many1(parse_line)(input)
}
 
fn main() {
    let config = r#"host=localhost
port=8080
debug=true
"#;
    
    let (remaining, kvs) = parse_config(config).unwrap();
    println!("Parsed config:");
    for kv in kvs {
        println!("  {} = {}", kv.key, kv.value);
    }
}

Parsing JSON-like Data

use nom::{
    bytes::complete::tag,
    character::complete::{char, digit1, space0},
    sequence::delimited,
    branch::alt,
    multi::separated_list0,
    combinator::{map, map_res, value},
    IResult,
};
 
#[derive(Debug, Clone)]
enum JsonValue {
    Null,
    Bool(bool),
    Number(f64),
    String(String),
    Array(Vec<JsonValue>),
    Object(Vec<(String, JsonValue)>),
}
 
fn parse_null(input: &str) -> IResult<&str, JsonValue> {
    value(JsonValue::Null, tag("null"))(input)
}
 
fn parse_bool(input: &str) -> IResult<&str, JsonValue> {
    alt((
        value(JsonValue::Bool(true), tag("true")),
        value(JsonValue::Bool(false), tag("false")),
    ))(input)
}
 
fn parse_number(input: &str) -> IResult<&str, JsonValue> {
    map_res(
        recognize(tuple((
            opt(char('-')),
            digit1,
            opt(tuple((
                char('.'),
                digit1
            )))
        ))),
        |s: &str| s.parse::<f64>().map(JsonValue::Number)
    )(input)
}
 
use nom::sequence::tuple;
use nom::combinator::opt;
 
fn parse_json_string(input: &str) -> IResult<&str, String> {
    delimited(
        char('"'),
        map_res(
            nom::bytes::complete::take_while(|c| c != '"'),
            |s: &str| s.to_string().into_result()
        ),
        char('"')
    )(input)
}
 
trait IntoResult<T> {
    fn into_result(self) -> Result<T, String>;
}
 
impl<T> IntoResult<T> for T {
    fn into_result(self) -> Result<T, String> { Ok(self) }
}
 
fn parse_json_value(input: &str) -> IResult<&str, JsonValue> {
    delimited(
        space0,
        alt((
            parse_null,
            parse_bool,
            parse_number,
            // parse_json_string would need more work
        )),
        space0
    )(input)
}
 
fn parse_array(input: &str) -> IResult<&str, JsonValue> {
    map(
        delimited(
            char('['),
            separated_list0(tag(","), parse_json_value),
            char(']')
        ),
        JsonValue::Array
    )(input)
}
 
fn main() {
    let (rem, val) = parse_null("null rest").unwrap();
    println!("Null: {:?}", val);
    
    let (rem, val) = parse_bool("true rest").unwrap();
    println!("Bool: {:?}", val);
    
    let (rem, val) = parse_number("3.14 rest").unwrap();
    println!("Number: {:?}", val);
    
    let (rem, val) = parse_array("[1, 2, 3]").unwrap();
    println!("Array: {:?}", val);
}

Parsing a Calculator Expression

use nom::{
    bytes::complete::tag,
    character::complete::{char, digit1, space0},
    sequence::{delimited, preceded},
    branch::alt,
    combinator::{map_res, map},
    IResult,
};
 
#[derive(Debug)]
enum Expr {
    Number(i64),
    Add(Box<Expr>, Box<Expr>),
    Sub(Box<Expr>, Box<Expr>),
    Mul(Box<Expr>, Box<Expr>),
    Div(Box<Expr>, Box<Expr>),
}
 
fn parse_number(input: &str) -> IResult<&str, Expr> {
    map_res(digit1, |s: &str| s.parse::<i64>().map(Expr::Number))(input)
}
 
fn parse_parens(input: &str) -> IResult<&str, Expr> {
    delimited(
        preceded(space0, char('(')),
        parse_expr,
        preceded(space0, char(')'))
    )(input)
}
 
fn parse_atom(input: &str) -> IResult<&str, Expr> {
    preceded(space0, alt((parse_number, parse_parens)))(input)
}
 
fn parse_term(input: &str) -> IResult<&str, Expr> {
    let (input, first) = parse_atom(input)?;
    let (input, result) = nom::combinator::fold_many0(
        tuple((
            delimited(space0, alt((char('*'), char('/'))), space0),
            parse_atom
        )),
        first,
        |acc, (op, right)| match op {
            '*' => Expr::Mul(Box::new(acc), Box::new(right)),
            '/' => Expr::Div(Box::new(acc), Box::new(right)),
            _ => acc,
        }
    )(input)?;
    Ok((input, result))
}
 
fn parse_expr(input: &str) -> IResult<&str, Expr> {
    let (input, first) = parse_term(input)?;
    let (input, result) = nom::combinator::fold_many0(
        tuple((
            delimited(space0, alt((char('+'), char('-'))), space0),
            parse_term
        )),
        first,
        |acc, (op, right)| match op {
            '+' => Expr::Add(Box::new(acc), Box::new(right)),
            '-' => Expr::Sub(Box::new(acc), Box::new(right)),
            _ => acc,
        }
    )(input)?;
    Ok((input, result))
}
 
fn eval(expr: &Expr) -> i64 {
    match expr {
        Expr::Number(n) => *n,
        Expr::Add(a, b) => eval(a) + eval(b),
        Expr::Sub(a, b) => eval(a) - eval(b),
        Expr::Mul(a, b) => eval(a) * eval(b),
        Expr::Div(a, b) => eval(a) / eval(b),
    }
}
 
fn main() {
    let expressions = vec![
        "1 + 2",
        "3 * 4 + 5",
        "(1 + 2) * 3",
        "10 - 2 * 3",
    ];
    
    for expr in expressions {
        let (rem, parsed) = parse_expr(expr).unwrap();
        let result = eval(&parsed);
        println!("{} = {}", expr, result);
    }
}

Parsing HTTP Request

use nom::{
    bytes::complete::{tag, take_until, take_while},
    character::complete::{alphanumeric1, not_line_ending, newline, space0, space1},
    sequence::{preceded, terminated, tuple},
    multi::many1,
    combinator::map,
    IResult,
};
 
#[derive(Debug)]
struct HttpRequest {
    method: String,
    path: String,
    version: String,
    headers: Vec<(String, String)>,
    body: String,
}
 
fn parse_request_line(input: &str) -> IResult<&str, (String, String, String)> {
    map(
        terminated(
            tuple((
                map(alphanumeric1, String::from),
                preceded(space1, take_while(|c| c != ' ')),
                preceded(space1, preceded(tag("HTTP/"), not_line_ending))
            )),
            newline
        ),
        |(method, path, version)| (method, path.to_string(), version.to_string())
    )(input)
}
 
fn parse_header(input: &str) -> IResult<&str, (String, String)> {
    map(
        terminated(
            tuple((
                map(take_until(":"), String::from),
                preceded(tag(": "), map(not_line_ending, String::from))
            )),
            newline
        ),
        |(key, value)| (key, value)
    )(input)
}
 
fn parse_headers(input: &str) -> IResult<&str, Vec<(String, String)>> {
    terminated(many1(parse_header), newline)(input)
}
 
fn parse_http_request(input: &str) -> IResult<&str, HttpRequest> {
    let (input, (method, path, version)) = parse_request_line(input)?;
    let (input, headers) = parse_headers(input)?;
    let body = input.to_string();
    
    Ok(("", HttpRequest {
        method,
        path,
        version,
        headers,
        body,
    }))
}
 
fn main() {
    let request = "GET /index.html HTTP/1.1\nHost: example.com\nUser-Agent: Test\nAccept: */*\n\n";
    
    let (remaining, req) = parse_http_request(request).unwrap();
    println!("Method: {}", req.method);
    println!("Path: {}", req.path);
    println!("Version: {}", req.version);
    println!("Headers:");
    for (key, value) in req.headers {
        println!("  {}: {}", key, value);
    }
}

Parsing URL

use nom::{
    bytes::complete::{tag, take_until, take_while, take_while1},
    character::complete::char,
    sequence::{preceded, tuple},
    combinator::{map, opt, rest},
    branch::alt,
    IResult,
};
 
#[derive(Debug)]
struct Url {
    scheme: String,
    host: String,
    port: Option<u16>,
    path: String,
    query: Option<String>,
}
 
fn parse_scheme(input: &str) -> IResult<&str, String> {
    map(
        take_while1(|c: char| c.is_alphanumeric() || c == '+' || c == '-' || c == '.'),
        String::from
    )(input)
}
 
fn parse_host(input: &str) -> IResult<&str, String> {
    map(
        take_while1(|c: char| c.is_alphanumeric() || c == '.' || c == '-'),
        String::from
    )(input)
}
 
fn parse_url(input: &str) -> IResult<&str, Url> {
    let (input, scheme) = parse_scheme(input)?;
    let (input, _) = tag("://")(input)?;
    let (input, host) = parse_host(input)?;
    let (input, port) = opt(preceded(char(':'),
        map(take_while(|c: char| c.is_ascii_digit()), |s: &str| s.parse().unwrap())
    ))(input)?;
    let (input, path) = alt((
        map(preceded(char('/'), take_until("?")), |s: &str| format!("/{}", s)),
        map(take_until("?"), |s: &str| s.to_string()),
        map(rest, |s: &str| if s.is_empty() { "/".to_string() } else { s.to_string() }),
    ))(input)?;
    let (input, query) = opt(preceded(char('?'), rest))(input)?;
    
    Ok((input, Url {
        scheme,
        host,
        port,
        path,
        query: query.map(String::from),
    }))
}
 
fn main() {
    let urls = vec![
        "https://example.com",
        "http://localhost:8080/api",
        "https://api.example.com:443/users?id=123",
    ];
    
    for url in urls {
        match parse_url(url) {
            Ok((_, parsed)) => println!("{:?}", parsed),
            Err(e) => println!("Error parsing '{}': {:?}", url, e),
        }
    }
}

Error Handling

use nom::{
    bytes::complete::tag,
    character::complete::digit1,
    error::{Error, ErrorKind, VerboseError, VerboseErrorKind},
    Err as NomErr,
    IResult,
};
 
fn parse_with_error(input: &str) -> IResult<&str, i32, VerboseError<&str>> {
    let (input, _) = tag("value: ")(input)?;
    let (input, num) = digit1(input)?;
    let (_, num) = num.parse::<i32>().map_err(|_| {
        NomErr::Error(VerboseError::from_error_kind(input, ErrorKind::Digit))
    })?;
    Ok((input, num))
}
 
fn main() {
    // Success
    match parse_with_error("value: 42") {
        Ok((rem, num)) => println!("Parsed: {}, remaining: '{}'", num, rem),
        Err(e) => println!("Error: {:?}", e),
    }
    
    // Failure with verbose error
    match parse_with_error("wrong: 42") {
        Ok((rem, num)) => println!("Parsed: {}", num),
        Err(NomErr::Error(e)) => {
            println!("Parse error at position {}:", e.errors.first().map(|(i, _)| i).unwrap());
        }
        Err(_) => println!("Other error"),
    }
}

Real-World: Parsing CSV

use nom::{
    bytes::complete::{tag, take_till},
    character::complete::{char, newline, not_line_ending},
    sequence::terminated,
    multi::separated_list1,
    combinator::opt,
    IResult,
};
 
fn parse_field(input: &str) -> IResult<&str, String> {
    take_till(|c| c == ',' || c == '\n')(input)
        .map(|(rem, field)| (rem, field.trim().to_string()))
}
 
fn parse_csv_line(input: &str) -> IResult<&str, Vec<String>> {
    terminated(
        separated_list1(tag(","), parse_field),
        opt(newline)
    )(input)
}
 
fn parse_csv(input: &str) -> IResult<&str, Vec<Vec<String>>> {
    separated_list1(newline, parse_csv_line)(input)
}
 
fn main() {
    let csv = "name,age,city\nAlice,30,NYC\nBob,25,LA\nCharlie,35,Chicago";
    
    let (_, rows) = parse_csv(csv).unwrap();
    
    println!("CSV Data:");
    for row in rows {
        println!("  {:?}", row);
    }
}

Real-World: Parsing Log Lines

use nom::{
    bytes::complete::tag,
    character::complete::{char, digit1, not_line_ending, space1},
    sequence::{preceded, tuple},
    combinator::map,
    IResult,
};
 
#[derive(Debug)]
struct LogEntry {
    timestamp: String,
    level: String,
    message: String,
}
 
fn parse_timestamp(input: &str) -> IResult<&str, String> {
    map(
        tuple((
            digit1, char('-'), digit1, char('-'), digit1,
            char('T'),
            digit1, char(':'), digit1, char(':'), digit1
        )),
        |t: (&str, char, &str, char, &str, char, &str, char, &str, char, &str)| {
            format!("{}{}{}{}{}{}{}{}{}{}{}",
                    t.0, t.1, t.2, t.3, t.4, t.5, t.6, t.7, t.8, t.9, t.10)
        }
    )(input)
}
 
fn parse_log_level(input: &str) -> IResult<&str, String> {
    preceded(
        char('['),
        map(
            nom::bytes::complete::take_while(|c| c != ']'),
            String::from
        )
    )(input)
}
 
fn parse_log_entry(input: &str) -> IResult<&str, LogEntry> {
    let (input, timestamp) = parse_timestamp(input)?;
    let (input, _) = space1(input)?;
    let (input, level) = parse_log_level(input)?;
    let (input, _) = tag("] ")(input)?;
    let (input, message) = map(not_line_ending, String::from)(input)?;
    
    Ok((input, LogEntry {
        timestamp,
        level,
        message,
    }))
}
 
fn main() {
    let log = "2024-01-15T10:30:45 [INFO] Server started on port 8080\n2024-01-15T10:30:46 [WARN] Connection pool running low";
    
    for line in log.lines() {
        match parse_log_entry(line) {
            Ok((_, entry)) => println!("{:?}", entry),
            Err(_) => println!("Failed to parse: {}", line),
        }
    }
}

Real-World: Parsing Command Arguments

use nom::{
    bytes::complete::{tag, take_while},
    character::complete::{alphanumeric1, char, space0, space1},
    sequence::preceded,
    branch::alt,
    multi::many0,
    combinator::{map, opt},
    IResult,
};
 
#[derive(Debug, Clone)]
enum Arg {
    Flag(String),
    Option(String, String),
    Positional(String),
}
 
fn parse_flag(input: &str) -> IResult<&str, Arg> {
    map(
        preceded(tag("--"), alphanumeric1),
        |s: &str| Arg::Flag(s.to_string())
    )(input)
}
 
fn parse_short_flag(input: &str) -> IResult<&str, Arg> {
    map(
        preceded(char('-'), char),
        |c: char| Arg::Flag(c.to_string())
    )(input)
}
 
fn parse_option(input: &str) -> IResult<&str, Arg> {
    let (input, name) = preceded(tag("--"), alphanumeric1)(input)?;
    let (input, _) = preceded(space0, char('='))(input)?;
    let (input, value) = take_while(|c| c != ' ')(input)?;
    
    Ok((input, Arg::Option(name.to_string(), value.to_string())))
}
 
fn parse_positional(input: &str) -> IResult<&str, Arg> {
    map(
        take_while(|c: char| !c.is_whitespace()),
        |s: &str| Arg::Positional(s.to_string())
    )(input)
}
 
fn parse_arg(input: &str) -> IResult<&str, Arg> {
    preceded(
        space0,
        alt((parse_option, parse_flag, parse_short_flag, parse_positional))
    )(input)
}
 
fn parse_args(input: &str) -> IResult<&str, Vec<Arg>> {
    many0(parse_arg)(input)
}
 
fn main() {
    let cmd = "cmd --verbose --output=file.txt -f input.txt output.txt";
    
    let (_, args) = parse_args(cmd).unwrap();
    
    println!("Parsed arguments:");
    for arg in args {
        println!("  {:?}", arg);
    }
}

Debugging Parsers

use nom::{
    bytes::complete::tag,
    character::complete::digit1,
    combinator::peek,
    IResult,
};
 
fn debug_parser<I: std::fmt::Debug, O: std::fmt::Debug, E: std::fmt::Debug>(
    name: &str,
    parser: impl Fn(I) -> IResult<I, O, E>,
) -> impl Fn(I) -> IResult<I, O, E> {
    move |input: I| {
        println!("[{}] Input: {:?}", name, input);
        match parser(input) {
            Ok((remaining, output)) => {
                println!("[{}] Success: {:?}", name, output);
                println!("[{}] Remaining: {:?}", name, remaining);
                Ok((remaining, output))
            }
            Err(e) => {
                println!("[{}] Error: {:?}", name, e);
                Err(e)
            }
        }
    }
}
 
fn main() {
    let parser = debug_parser("number", digit1);
    let result = parser("123abc");
    println!("Result: {:?}\n", result);
    
    let parser = debug_parser("tag_hello", tag("hello"));
    let result = parser("hello world");
    println!("Result: {:?}", result);
}

Summary

  • IResult<Input, Output> returns Ok((remaining, parsed)) or Err
  • tag("literal") matches exact string
  • take(n) takes exactly n characters/bytes
  • alphanumeric1, digit1, space1 match character classes
  • map(parser, f) transforms parser output
  • map_res(parser, f) transforms with fallible function
  • alt((p1, p2, ...)) tries alternatives
  • tuple((p1, p2, ...)) sequences parsers
  • preceded(skip, keep), terminated(keep, skip), delimited(open, keep, close) for sequences
  • many0, many1 for repetition (zero/one or more)
  • separated_list0(sep, elem) parses delimited lists
  • opt(parser) makes parser optional
  • Use fold_many0 for left-recursive grammars
  • VerboseError provides detailed error messages
  • Build complex parsers by composing simple ones
  • Perfect for: parsers, protocols, config files, DSLs, text processing