How do I benchmark Rust code?

Walkthrough

Proper benchmarking is crucial for performance optimization. The criterion crate provides statistically rigorous benchmarking with automatic warmup, multiple runs, and detailed statistical analysis. It detects performance regressions and generates HTML reports with beautiful charts.

Key features:

  1. Automatic iteration count calibration—no manual tuning needed
  2. Statistical analysis including confidence intervals and outliers
  3. Comparison against previous runs to detect regressions
  4. HTML reports with visualizations
  5. Works with both synchronous and async code

Criterion is far more reliable than ad-hoc timing with Instant::now() because it handles warmup, cache effects, and statistical significance.

Code Example

# Cargo.toml
[dependencies]
criterion = "0.5"
 
[[bench]]
name = "my_benchmark"
harness = false

Create benches/my_benchmark.rs:

use criterion::{black_box, criterion_group, criterion_main, Criterion};
 
// The function to benchmark
fn fibonacci(n: u64) -> u64 {
    match n {
        0 => 0,
        1 => 1,
        _ => fibonacci(n - 1) + fibonacci(n - 2),
    }
}
 
// A more efficient version
fn fibonacci_iterative(n: u64) -> u64 {
    if n == 0 {
        return 0;
    }
    let mut a = 0u64;
    let mut b = 1u64;
    for _ in 1..n {
        let temp = a + b;
        a = b;
        b = temp;
    }
    b
}
 
fn benchmark_fibonacci(c: &mut Criterion) {
    // Simple benchmark
    c.bench_function("fibonacci 20", |b| {
        b.iter(|| fibonacci(black_box(20)))
    });
 
    // Compare two implementations
    c.bench_function("fibonacci_iterative 20", |b| {
        b.iter(|| fibonacci_iterative(black_box(20)))
    });
 
    // Benchmark across a range of inputs
    let mut group = c.benchmark_group("fibonacci_comparison");
    for n in [10u64, 15, 20, 25].iter() {
        group.bench_with_input(format!("recursive_{}", n), n, |b, &n| {
            b.iter(|| fibonacci(black_box(n)))
        });
        group.bench_with_input(format!("iterative_{}", n), n, |b, &n| {
            b.iter(|| fibonacci_iterative(black_box(n)))
        });
    }
    group.finish();
}
 
// Register the benchmark function
criterion_group!(benches, benchmark_fibonacci);
 
// Generate main function to run benchmarks
criterion_main!(benches);

Run benchmarks:

cargo bench

Benchmarking with Inputs

use criterion::{black_box, criterion_group, criterion_main, Criterion, BatchSize};
 
fn sort_vector(data: &mut Vec<i32>) {
    data.sort();
}
 
fn generate_data(size: usize) -> Vec<i32> {
    (0..size).rev().collect() // Reverse order for worst case
}
 
fn benchmark_sorting(c: &mut Criterion) {
    let mut group = c.benchmark_group("sorting");
    
    for size in [100, 1000, 10000].iter() {
        group.bench_with_input(format!("size_{}", size), size, |b, &size| {
            // Setup runs once, iter runs multiple times with fresh data
            b.iter_batched(
                || generate_data(size),  // Setup
                |mut data| sort_vector(&mut data),  // Routine
                BatchSize::SmallInput,
            )
        });
    }
    
    group.finish();
}
 
criterion_group!(benches, benchmark_sorting);
criterion_main!(benches);

Async Benchmarking

use criterion::{criterion_group, criterion_main, Criterion, BatchSize};
use tokio::runtime::Runtime;
 
async fn async_operation(n: u64) -> u64 {
    tokio::time::sleep(std::time::Duration::from_micros(10)).await;
    n * 2
}
 
fn benchmark_async(c: &mut Criterion) {
    let rt = Runtime::new().unwrap();
    
    c.bench_function("async_operation", |b| {
        b.to_async(&rt).iter(|| async_operation(black_box(100)))
    });
}
 
criterion_group!(benches, benchmark_async);
criterion_main!(benches);

Comparing Against Previous Runs

use criterion::{criterion_group, criterion_main, Criterion};
 
fn benchmark_with_baseline(c: &mut Criterion) {
    let mut group = c.benchmark_group("comparison");
    
    group.bench_function("current_version", |b| {
        b.iter(|| {
            // Your optimized code
            (0..1000).sum::<u64>()
        })
    });
    
    group.finish();
}
 
criterion_group!(benches, benchmark_with_baseline);
criterion_main!(benches);

Run with baseline comparison:

cargo bench -- --save-baseline new_feature
cargo bench -- --baseline new_feature  # Compare against saved baseline

Summary

  • Add [[bench]] section to Cargo.toml with harness = false to use Criterion instead of libtest
  • Use black_box() to prevent the compiler from optimizing away computations
  • bench_function() for single benchmarks; benchmark_group() for related benchmarks with inputs
  • iter_batched() provides setup/teardown for each iteration when needed
  • Use .to_async() with a Tokio runtime for async code
  • Run cargo bench to execute; check target/criterion/ for HTML reports
  • Save and compare baselines with --save-baseline and --baseline flags to detect regressions
  • Criterion automatically handles warmup, calibration, and statistical analysis