How do I write benchmarks with criterion in Rust?
Walkthrough
The criterion crate is a statistics-driven benchmarking library for Rust that provides accurate and reliable performance measurements. Unlike basic timing approaches, criterion uses statistical analysis to detect small regressions, handle outliers, and provide confidence intervals. It automatically runs multiple iterations, warms up caches, and produces detailed reports including HTML visualizations. Criterion is essential for performance-critical code, detecting regressions in CI/CD, and optimizing hot paths.
Key concepts:
- Benchmark Groups — organize related benchmarks together
- Bencher — provides methods for benchmarking code
- Throughput — measure bytes/elements processed per second
- Comparison — compare current run against previous results
- HTML Reports — generate visual benchmark reports
Code Example
# Cargo.toml
[dependencies]
criterion = { version = "0.5", features = ["html_reports"] }
[[bench]]
name = "my_benchmark"
harness = false// benches/my_benchmark.rs
use criterion::{black_box, criterion_group, criterion_main, Criterion};
fn fibonacci(n: u64) -> u64 {
match n {
0 => 1,
1 => 1,
n => fibonacci(n - 1) + fibonacci(n - 2),
}
}
fn criterion_benchmark(c: &mut Criterion) {
c.bench_function("fib 20", |b| b.iter(|| fibonacci(black_box(20))));
}
criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);Basic Benchmark Setup
// benches/basic.rs
use criterion::{black_box, criterion_group, criterion_main, Criterion};
fn add(a: i32, b: i32) -> i32 {
a + b
}
fn bench_add(c: &mut Criterion) {
c.bench_function("add", |b| {
b.iter(|| add(black_box(10), black_box(20)))
});
}
criterion_group!(benches, bench_add);
criterion_main!(benches);Run with:
cargo benchUnderstanding black_box
use criterion::{black_box, criterion_group, criterion_main, Criterion};
// black_box prevents compiler optimizations
// Without it, the compiler might optimize away our computation
fn expensive_computation(n: u64) -> u64 {
(1..=n).sum()
}
fn bench_computation(c: &mut Criterion) {
// GOOD: Use black_box to prevent optimization
c.bench_function("sum 1 to 1000", |b| {
b.iter(|| expensive_computation(black_box(1000)))
});
// Also black_box the result to ensure it's used
c.bench_function("sum with result black_box", |b| {
b.iter(|| black_box(expensive_computation(1000)))
});
}
criterion_group!(benches, bench_computation);
criterion_main!(benches);Benchmark Groups
use criterion::{criterion_group, criterion_main, Criterion};
fn sort_vec(data: &mut Vec<i32>) {
data.sort();
}
fn sort_slice(data: &mut [i32]) {
data.sort();
}
fn bench_sorting(c: &mut Criterion) {
let mut group = c.benchmark_group("sorting");
// Benchmark vec sorting
group.bench_function("sort vec 100", |b| {
let mut data: Vec<i32> = (0..100).rev().collect();
b.iter(|| {
let mut v = data.clone();
sort_vec(&mut v);
v
})
});
// Different size
group.bench_function("sort vec 1000", |b| {
let mut data: Vec<i32> = (0..1000).rev().collect();
b.iter(|| {
let mut v = data.clone();
sort_vec(&mut v);
v
})
});
group.finish();
}
criterion_group!(benches, bench_sorting);
criterion_main!(benches);Throughput Measurement
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
fn process_data(data: &[u8]) -> u64 {
data.iter().map(|&b| b as u64).sum()
}
fn bench_throughput(c: &mut Criterion) {
let data: Vec<u8> = (0..=255).cycle().take(1024 * 1024).collect();
let mut group = c.benchmark_group("throughput");
// Tell criterion how much data we're processing
group.throughput(Throughput::Bytes(data.len() as u64));
group.bench_function("process 1MB", |b| {
b.iter(|| process_data(&data))
});
group.finish();
}
criterion_group!(benches, bench_throughput);
criterion_main!(benches);Elements Throughput
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
fn find_item(haystack: &[i32], needle: i32) -> Option<usize> {
haystack.iter().position(|&x| x == needle)
}
fn bench_search(c: &mut Criterion) {
let mut group = c.benchmark_group("search");
let data: Vec<i32> = (0..10_000).collect();
// Measure in elements, not bytes
group.throughput(Throughput::Elements(data.len() as u64));
group.bench_function("linear search", |b| {
b.iter(|| find_item(&data, black_box(9999)))
});
group.finish();
}
criterion_group!(benches, bench_search);
criterion_main!(benches);Parameterized Benchmarks
use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId};
fn count_primes(limit: usize) -> usize {
(2..=limit).filter(|&n| is_prime(n)).count()
}
fn is_prime(n: usize) -> bool {
if n < 2 { return false; }
(2..=(n as f64).sqrt() as usize).all(|i| n % i != 0)
}
fn bench_primes(c: &mut Criterion) {
let mut group = c.benchmark_group("primes");
for size in [100, 1000, 10000, 100000].iter() {
group.bench_with_input(BenchmarkId::new("count", size), size, |b, &size| {
b.iter(|| count_primes(size))
});
}
group.finish();
}
criterion_group!(benches, bench_primes);
criterion_main!(benches);Comparing Implementations
use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId};
// Different implementations
fn sum_iter(data: &[i32]) -> i32 {
data.iter().sum()
}
fn sum_loop(data: &[i32]) -> i32 {
let mut total = 0;
for &n in data {
total += n;
}
total
}
fn sum_fold(data: &[i32]) -> i32 {
data.iter().fold(0, |acc, &n| acc + n)
}
fn bench_implementations(c: &mut Criterion) {
let mut group = c.benchmark_group("sum implementations");
let data: Vec<i32> = (0..10000).collect();
for (name, func) in [
("iter", sum_iter as fn(&[i32]) -> i32),
("loop", sum_loop as fn(&[i32]) -> i32),
("fold", sum_fold as fn(&[i32]) -> i32),
] {
group.bench_function(name, |b| b.iter(|| func(&data)));
}
group.finish();
}
criterion_group!(benches, bench_implementations);
criterion_main!(benches);Comparing Multiple Input Sizes
use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId};
fn hash_string(s: &str) -> u64 {
let mut hash: u64 = 0;
for byte in s.bytes() {
hash = hash.wrapping_mul(31).wrapping_add(byte as u64);
}
hash
}
fn bench_hash_sizes(c: &mut Criterion) {
let mut group = c.benchmark_group("hash_string");
let sizes: Vec<(String, usize)> = vec![
("small".to_string(), 10),
("medium".to_string(), 100),
("large".to_string(), 1000),
];
for (name, size) in sizes {
let data = "x".repeat(size);
group.throughput(criterion::Throughput::Bytes(size as u64));
group.bench_with_input(BenchmarkId::new("hash", name), &data, |b, data| {
b.iter(|| hash_string(data))
});
}
group.finish();
}
criterion_group!(benches, bench_hash_sizes);
criterion_main!(benches);Custom Configuration
use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId};
use std::time::Duration;
fn work(n: usize) -> usize {
(0..n).sum()
}
fn bench_custom_config(c: &mut Criterion) {
let mut group = c.benchmark_group("custom_config");
// Custom sample size (default: 100)
group.sample_size(50);
// Custom measurement time (default: 5 seconds)
group.measurement_time(Duration::from_secs(10));
// Custom warm-up time (default: 3 seconds)
group.warm_up_time(Duration::from_secs(2));
// Number of warm-up iterations
group.nresamples(10_000);
group.bench_function("work 1000", |b| {
b.iter(|| work(black_box(1000)))
});
group.finish();
}
criterion_group!(benches, bench_custom_config);
criterion_main!(benches);Benchmarking with Setup
use criterion::{criterion_group, criterion_main, Criterion, BatchSize};
fn process_string(s: String) -> String {
s.to_uppercase()
}
fn bench_with_setup(c: &mut Criterion) {
c.bench_function("process_string", |b| {
// iter_batched runs setup for each iteration
b.iter_batched(
|| "hello world".to_string(), // setup
|s| process_string(s), // routine
BatchSize::SmallInput, // batch size
)
});
}
criterion_group!(benches, bench_with_setup);
criterion_main!(benches);Batch Sizes
use criterion::{criterion_group, criterion_main, Criterion, BatchSize};
fn create_vec(n: usize) -> Vec<i32> {
(0..n).collect()
}
fn sort_vec(v: &mut Vec<i32>) {
v.sort();
}
fn bench_batch_sizes(c: &mut Criterion) {
let mut group = c.benchmark_group("batching");
// iter_batched_ref for mutable references
group.bench_function("sort 1000", |b| {
b.iter_batched_ref(
|| create_vec(1000), // setup
|v| sort_vec(v), // routine (gets &mut Vec)
BatchSize::SmallInput,
)
});
// Different batch sizes
// SmallInput: setup cost dominates, many iterations
// LargeInput: test cost dominates, fewer iterations
// PerIteration: run setup for every iteration
group.finish();
}
criterion_group!(benches, bench_batch_sizes);
criterion_main!(benches);External Setup (Avoid Timing)
use criterion::{black_box, criterion_group, criterion_main, Criterion};
fn expensive_setup() -> Vec<i32> {
// This should NOT be in the benchmark
(0..10000).collect()
}
fn fast_operation(data: &[i32]) -> i32 {
data.iter().sum()
}
fn bench_external_setup(c: &mut Criterion) {
// Setup happens once, outside the benchmark
let data = expensive_setup();
c.bench_function("fast_operation", |b| {
b.iter(|| fast_operation(&data))
});
}
criterion_group!(benches, bench_external_setup);
criterion_main!(benches);Async Benchmarking
use criterion::{criterion_group, criterion_main, Criterion};
// Async benchmarking with tokio
async fn async_operation(n: usize) -> usize {
// Simulate async work
tokio::time::sleep(std::time::Duration::from_micros(100)).await;
n * 2
}
fn bench_async(c: &mut Criterion) {
let rt = tokio::runtime::Runtime::new().unwrap();
c.bench_function("async_operation", |b| {
b.to_async(&rt).iter(|| async_operation(100))
});
}
criterion_group!(benches, bench_async);
criterion_main!(benches);Benchmarking File I/O
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
use std::io::{Read, Cursor};
fn read_all<R: Read>(mut reader: R, buf: &mut Vec<u8>) -> std::io::Result<()> {
buf.clear();
reader.read_to_end(buf)?;
Ok(())
}
fn bench_io(c: &mut Criterion) {
let data = vec![0u8; 1024 * 1024]; // 1MB
let mut buf = Vec::with_capacity(data.len());
let mut group = c.benchmark_group("file_io");
group.throughput(Throughput::Bytes(data.len() as u64));
group.bench_function("read_1mb", |b| {
b.iter(|| {
let cursor = Cursor::new(&data);
read_all(cursor, &mut buf).unwrap();
black_box(&buf);
})
});
group.finish();
}
criterion_group!(benches, bench_io);
criterion_main!(benches);Memory Usage Profiling
use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId};
fn allocate_vec(size: usize) -> Vec<u8> {
vec![0u8; size]
}
fn bench_memory(c: &mut Criterion) {
let mut group = c.benchmark_group("allocation");
for size in [100, 1000, 10000, 100000] {
group.bench_with_input(
BenchmarkId::new("allocate", size),
&size,
|b, &size| b.iter(|| allocate_vec(size))
);
}
group.finish();
}
criterion_group!(benches, bench_memory);
criterion_main!(benches);Regression Detection
use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId};
fn slow_function(n: usize) -> usize {
let mut result = 0;
for i in 0..n {
for j in 0..n {
result += i * j;
}
}
result
}
fn bench_regression(c: &mut Criterion) {
let mut group = c.benchmark_group("regression");
// Criterion compares against saved baseline
// To save baseline: cargo bench -- --save-baseline main
// To compare: cargo bench -- --baseline main
group.bench_function("slow 100", |b| {
b.iter(|| slow_function(black_box(100)))
});
group.finish();
}
criterion_group!(benches, bench_regression);
criterion_main!(benches);Multiple Benchmark Groups
use criterion::{criterion_group, criterion_main, Criterion};
// Group 1: String operations
fn bench_strings(c: &mut Criterion) {
let mut group = c.benchmark_group("strings");
group.bench_function("to_uppercase", |b| {
b.iter(|| "hello".to_uppercase())
});
group.bench_function("to_lowercase", |b| {
b.iter(|| "HELLO".to_lowercase())
});
group.finish();
}
// Group 2: Number operations
fn bench_numbers(c: &mut Criterion) {
let mut group = c.benchmark_group("numbers");
group.bench_function("sqrt", |b| {
b.iter(|| (1..1000).map(|n| (n as f64).sqrt()).sum::<f64>())
});
group.bench_function("pow", |b| {
b.iter(|| (1..100).map(|n| (n as f64).powi(2)).sum::<f64>())
});
group.finish();
}
criterion_group!(string_benches, bench_strings);
criterion_group!(number_benches, bench_numbers);
criterion_main!(string_benches, number_benches);Filtering Benchmarks
// Run specific benchmarks:
// cargo bench -- string_benches
// cargo bench -- to_uppercase
// cargo bench -- "strings/*"
use criterion::{criterion_group, criterion_main, Criterion};
fn bench_a(c: &mut Criterion) {
c.bench_function("function_a", |b| b.iter(|| 1 + 1));
}
fn bench_b(c: &mut Criterion) {
c.bench_function("function_b", |b| b.iter(|| 2 + 2));
}
criterion_group!(group_a, bench_a);
criterion_group!(group_b, bench_b);
criterion_main!(group_a, group_b);Profiling Integration
use criterion::{criterion_group, criterion_main, Criterion, Profiler, ProfileData};
use std::process::Command;
// Custom profiler (e.g., for perf or valgrind)
struct PerfProfiler;
impl Profiler for PerfProfiler {
fn start(&self, _profile_data: &mut ProfileData) {
// Start profiling
}
fn stop(&self, _profile_data: &mut ProfileData) {
// Stop profiling
}
}
fn bench_with_profiler(c: &mut Criterion) {
// Uncomment to enable profiling:
// c.profile BenchProfiler::new();
c.bench_function("profiled", |b| {
b.iter(|| (0..1000).sum::<u64>())
});
}
criterion_group!(benches, bench_with_profiler);
criterion_main!(benches);Real-World Example: Parser Benchmark
use criterion::{black_box, criterion_group, criterion_main, Criterion, BenchmarkId, Throughput};
#[derive(Debug)]
enum Token {
Number(i32),
Plus,
Minus,
}
fn parse_expression(input: &str) -> Vec<Token> {
input.split_whitespace()
.filter_map(|s| match s {
"+" => Some(Token::Plus),
"-" => Some(Token::Minus),
n => n.parse().ok().map(Token::Number),
})
.collect()
}
fn bench_parser(c: &mut Criterion) {
let mut group = c.benchmark_group("parser");
let inputs = [
("simple", "1 + 2"),
("medium", "1 + 2 - 3 + 4 - 5"),
("complex", "100 + 200 - 300 + 400 - 500 + 600 - 700 + 800"),
];
for (name, input) in inputs {
group.throughput(Throughput::Bytes(input.len() as u64));
group.bench_with_input(
BenchmarkId::new("parse", name),
input,
|b, input| b.iter(|| parse_expression(black_box(input)))
);
}
group.finish();
}
criterion_group!(benches, bench_parser);
criterion_main!(benches);Real-World Example: Collection Benchmarks
use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId};
use std::collections::{HashMap, BTreeMap};
fn bench_collections(c: &mut Criterion) {
let mut group = c.benchmark_group("map_insert");
for size in [100, 1000, 10000] {
group.bench_with_input(
BenchmarkId::new("HashMap", size),
&size,
|b, &size| {
b.iter(|| {
let mut map = HashMap::new();
for i in 0..size {
map.insert(i, i * 2);
}
map
})
}
);
group.bench_with_input(
BenchmarkId::new("BTreeMap", size),
&size,
|b, &size| {
b.iter(|| {
let mut map = BTreeMap::new();
for i in 0..size {
map.insert(i, i * 2);
}
map
})
}
);
}
group.finish();
}
criterion_group!(benches, bench_collections);
criterion_main!(benches);Summary
c.bench_function()benchmarks a single functionc.benchmark_group()organizes related benchmarks- Use
black_box()to prevent compiler optimizations Throughput::Bytes()andThroughput::Elements()measure throughputBenchmarkIdfor parameterized benchmarksiter_batched()for setup routines that shouldn't be timedto_async()for async benchmarks with a runtime- Enable
html_reportsfeature for visual reports - Run with
cargo bench - Filter with
cargo bench -- "pattern" - Compare baselines with
--save-baselineand--baseline - Criterion provides statistical confidence intervals, not just averages
- Perfect for: detecting performance regressions, optimizing hot paths, comparing implementations, CI/CD benchmarking
