What is the purpose of `criterion::Throughput` for reporting benchmark results in meaningful units?

criterion::Throughput configures benchmark output to display throughput in meaningful units like bytes per second or elements per second, rather than just iterations and time. This makes benchmark results more interpretable for real-world scenarios where the rate of processing data is more relevant than raw iteration counts.

The Throughput Purpose

use criterion::{Criterion, Throughput};
 
fn throughput_purpose() {
    // Without Throughput, Criterion reports:
    // - Time per iteration
    // - Iterations per second
    // - Sample statistics
    
    // With Throughput, Criterion adds:
    // - Bytes processed per second
    // - Elements processed per second
    // - Custom throughput metrics
    
    // This transforms abstract "iterations" into meaningful rates
    // like "500 MB/s" for file processing benchmarks
}

Throughput adds meaningful throughput units to benchmark output.

Basic Throughput Configuration

use criterion::{Criterion, Throughput};
 
fn basic_throughput(c: &mut Criterion) {
    let data = vec![0u8; 1024 * 1024]; // 1 MB of data
    
    c.bench_function("process_1mb", |b| {
        b.throughput(Throughput::Bytes(1024 * 1024));
        b.iter(|| {
            process_data(&data);
        });
    });
    
    // Output will show:
    // process_1mb          time:   [2.1234 ms 2.1456 ms 2.1678 ms]
    //                      thrpt:  [463.24 MiB/s 468.02 MiB/s 472.88 MiB/s]
    //                      bytes:  [463.24 MiB/s ...]
}
 
fn process_data(data: &[u8]) -> u64 {
    data.iter().map(|&b| b as u64).sum()
}

Configure throughput to see bytes per second in addition to time per iteration.

Throughput Units

use criterion::Throughput;
 
fn throughput_units() {
    // Bytes - for binary data processing
    let bytes = Throughput::Bytes(1024);           // 1 KiB
    
    // Elements - for processing collections
    let elements = Throughput::Elements(1000);      // 1000 items
    
    // The units affect how Criterion displays throughput:
    
    // Bytes throughput shows:
    // - Bytes/s, KiB/s, MiB/s, GiB/s (auto-scaled)
    
    // Elements throughput shows:
    // - elem/s, Kelem/s, Melem/s, Gelem/s (auto-scaled)
}

Throughput supports bytes and elements as unit types.

Bytes Throughput for I/O Operations

use criterion::{Criterion, Throughput};
 
fn io_benchmark(c: &mut Criterion) {
    let data = vec![0u8; 10 * 1024 * 1024]; // 10 MB
    
    let mut group = c.benchmark_group("io_throughput");
    
    // Benchmark with throughput reporting
    group.throughput(Throughput::Bytes(10 * 1024 * 1024));
    
    group.bench_function("read_10mb", |b| {
        b.iter(|| {
            // Simulate reading 10 MB
            let sum: u64 = data.iter().map(|&b| b as u64).sum();
            sum
        });
    });
    
    group.bench_function("write_10mb", |b| {
        b.iter(|| {
            // Simulate writing 10 MB
            data.clone()
        });
    });
    
    group.finish();
    
    // Output includes throughput:
    // read_10mb            time:   [1.2345 ms 1.2456 ms 1.2567 ms]
    //                      thrpt:  [7.8123 GiB/s 7.9012 GiB/s 7.9876 GiB/s]
}

I/O benchmarks benefit from throughput reporting to compare against disk/network speeds.

Elements Throughput for Collection Processing

use criterion::{Criterion, Throughput};
 
fn collection_benchmark(c: &mut Criterion) {
    let items: Vec<i32> = (0..100_000).collect();
    
    let mut group = c.benchmark_group("collection_throughput");
    
    // Process 100,000 elements per iteration
    group.throughput(Throughput::Elements(100_000));
    
    group.bench_function("sum_items", |b| {
        b.iter(|| {
            items.iter().sum::<i32>()
        });
    });
    
    group.bench_function("filter_items", |b| {
        b.iter(|| {
            items.iter().filter(|&&x| x > 50000).count()
        });
    });
    
    group.finish();
    
    // Output shows elements per second:
    // sum_items            time:   [123.45 µs 124.56 µs 125.67 µs]
    //                      thrpt:  [795.23 Melem/s 801.45 Melem/s 807.89 Melem/s]
}

Collection benchmarks show elements per second to measure processing rate.

Group-Level Throughput

use criterion::{Criterion, Throughput};
 
fn group_throughput(c: &mut Criterion) {
    // Throughput can be set at the group level
    // Applies to all benchmarks in the group
    
    let mut group = c.benchmark_group("hash_tables");
    group.throughput(Throughput::Elements(10_000));
    
    let data: Vec<i32> = (0..10_000).collect();
    
    // All benchmarks in this group report elem/s
    group.bench_function("lookup", |b| {
        b.iter(|| {
            data.contains(&5000)
        });
    });
    
    group.bench_function("insert", |b| {
        b.iter(|| {
            let mut set = std::collections::HashSet::new();
            for &item in &data {
                set.insert(item);
            }
            set
        });
    });
    
    group.finish();
}

Set throughput once at the group level for all benchmarks.

Per-Benchmark Throughput

use criterion::{Criterion, Throughput};
 
fn per_benchmark_throughput(c: &mut Criterion) {
    let mut group = c.benchmark_group("variable_sizes");
    
    // Different benchmarks can have different throughputs
    // Useful for comparing across data sizes
    
    // Small dataset
    let small_data = vec![0u8; 1024];
    group.bench_function("process_1kb", |b| {
        b.throughput(Throughput::Bytes(1024));
        b.iter(|| process_data(&small_data));
    });
    
    // Medium dataset
    let medium_data = vec![0u8; 1024 * 1024];
    group.bench_function("process_1mb", |b| {
        b.throughput(Throughput::Bytes(1024 * 1024));
        b.iter(|| process_data(&medium_data));
    });
    
    // Large dataset
    let large_data = vec![0u8; 10 * 1024 * 1024];
    group.bench_function("process_10mb", |b| {
        b.throughput(Throughput::Bytes(10 * 1024 * 1024));
        b.iter(|| process_data(&large_data));
    });
    
    group.finish();
    
    // Each benchmark shows its own throughput
    // Useful for scaling analysis
}

Per-benchmark throughput allows comparing different data sizes.

Throughput vs Iterations

use criterion::{Criterion, Throughput};
 
fn throughput_vs_iterations(c: &mut Criterion) {
    // Without throughput:
    c.bench_function("without_throughput", |b| {
        // Reports only time per iteration
        b.iter(|| {
            let data = vec![0u8; 1024 * 1024];
            process_data(&data);
        });
    });
    // Output: time per iteration, iterations per second
    // No throughput metric
    
    // With throughput:
    c.bench_function("with_throughput", |b| {
        b.throughput(Throughput::Bytes(1024 * 1024));
        b.iter(|| {
            let data = vec![0u8; 1024 * 1024];
            process_data(&data);
        });
    });
    // Output: time per iteration, AND bytes per second
    // Allows comparison with hardware limits (disk speed, network bandwidth)
}

Without throughput, only time and iterations are reported; with throughput, rate is shown.

Interpreting Throughput Results

use criterion::{Criterion, Throughput};
 
fn interpreting_results(c: &mut Criterion) {
    // Throughput helps compare against hardware limits:
    
    // Disk I/O: HDD ~100-200 MB/s, SSD ~500-3500 MB/s, NVMe ~3000-7000 MB/s
    // Network: 1 Gbps ~125 MB/s, 10 Gbps ~1.25 GB/s
    // Memory: DDR4 ~20-25 GB/s
    
    // Example: Memory bandwidth benchmark
    let data = vec![0u64; 1_000_000]; // 8 MB
    
    c.bench_function("memory_scan", |b| {
        b.throughput(Throughput::Bytes(8_000_000));
        b.iter(|| {
            data.iter().sum::<u64>()
        });
    });
    
    // If throughput shows ~10 GB/s, we know we're using ~40% of DDR4 bandwidth
    // This provides actionable insight beyond "1.6 ms per iteration"
}

Throughput enables comparison against hardware specifications.

Comparing Algorithms with Throughput

use criterion::{Criterion, Throughput};
 
fn algorithm_comparison(c: &mut Criterion) {
    let data: Vec<u32> = (0..1_000_000).collect();
    
    let mut group = c.benchmark_group("sorting");
    group.throughput(Throughput::Elements(1_000_000));
    
    group.bench_function("sort_default", |b| {
        b.iter_batched(
            || data.clone(),
            |mut d| { d.sort(); d },
            criterion::BatchSize::SmallInput,
        );
    });
    
    group.bench_function("sort_unstable", |b| {
        b.iter_batched(
            || data.clone(),
            |mut d| { d.sort_unstable(); d },
            criterion::BatchSize::SmallInput,
        );
    });
    
    group.finish();
    
    // Output shows elements/s:
    // sort_default         thrpt:  [45.123 Melem/s 46.234 Melem/s 47.345 Melem/s]
    // sort_unstable        thrpt:  [55.456 Melem/s 56.567 Melem/s 57.678 Melem/s]
    
    // Comparing throughput directly: unstable ~20% faster
    // More intuitive than comparing 19.8 ms vs 16.5 ms
}

Throughput comparisons are often more intuitive than time comparisons.

Throughput Scaling Analysis

use criterion::{Criterion, Throughput};
 
fn scaling_analysis(c: &mut Criterion) {
    let mut group = c.benchmark_group("scaling");
    
    let sizes = [1_000, 10_000, 100_000, 1_000_000];
    
    for size in sizes {
        let data: Vec<u32> = (0..size).collect();
        
        group.throughput(Throughput::Elements(size));
        
        group.bench_function(format!("process_{}", size), |b| {
            b.iter(|| {
                data.iter().filter(|&&x| x % 2 == 0).count()
            });
        });
    }
    
    group.finish();
    
    // Throughput reveals algorithmic complexity:
    // If throughput stays constant (~500 Melem/s across sizes),
    // the algorithm is O(n) with good cache behavior.
    
    // If throughput decreases with size,
    // there may be cache pressure or worse complexity.
}

Throughput consistency across sizes indicates good scaling behavior.

Serialization Benchmark Example

use criterion::{Criterion, Throughput};
use serde::{Serialize, Deserialize};
 
#[derive(Serialize, Deserialize)]
struct Record {
    id: u64,
    name: String,
    values: Vec<f64>,
}
 
fn serialization_benchmark(c: &mut Criterion) {
    let records: Vec<Record> = (0..1000)
        .map(|i| Record {
            id: i,
            name: format!("record_{}", i),
            values: vec![i as f64; 10],
        })
        .collect();
    
    let mut group = c.benchmark_group("serialization");
    
    // Calculate approximate bytes
    let approx_bytes = 1000 * (8 + 20 + 10 * 8); // rough estimate
    group.throughput(Throughput::Bytes(approx_bytes as u64));
    
    group.bench_function("json_serialize", |b| {
        b.iter(|| serde_json::to_string(&records).unwrap());
    });
    
    group.bench_function("bincode_serialize", |b| {
        b.iter(|| bincode::serialize(&records).unwrap());
    });
    
    group.finish();
    
    // Compare serialization throughput:
    // json_serialize       thrpt:  [850.23 MiB/s 862.45 MiB/s 874.67 MiB/s]
    // bincode_serialize    thrpt:  [2.1 GiB/s 2.2 GiB/s 2.3 GiB/s]
    
    // Throughput makes it clear bincode is ~2.5x faster
}

Serialization benchmarks show throughput in bytes per second.

Network Benchmark Example

use criterion::{Criterion, Throughput};
 
fn network_benchmark(c: &mut Criterion) {
    // Simulate network packet processing
    
    let packet_sizes = [64, 512, 1500, 9000]; // Common MTU sizes
    
    let mut group = c.benchmark_group("packet_processing");
    
    for size in packet_sizes {
        let packet = vec![0u8; size];
        
        group.bench_function(format!("process_{}_byte_packet", size), |b| {
            b.throughput(Throughput::Bytes(size as u64));
            b.iter(|| {
                // Simulate packet processing
                process_packet(&packet)
            });
        });
    }
    
    group.finish();
    
    // Throughput shows packets/s and bytes/s
    // Useful for comparing against line rate:
    // 1 Gbps = 125 MB/s
    // 10 Gbps = 1.25 GB/s
    // 100 Gbps = 12.5 GB/s
}
 
fn process_packet(packet: &[u8]) -> u64 {
    packet.len() as u64
}

Network benchmarks compare throughput against line rates.

Throughput with BatchSize

use criterion::{Criterion, Throughput, BatchSize};
 
fn throughput_with_batch(c: &mut Criterion) {
    let data: Vec<i32> = (0..100_000).collect();
    
    c.bench_function("batch_processing", |b| {
        b.throughput(Throughput::Elements(100_000));
        b.iter_batched(
            || data.clone(),       // Setup: clone input
            |input| {               // Routine: process
                input.into_iter().filter(|&x| x > 50000).count()
            },
            BatchSize::SmallInput, // Hint for batch sizing
        )
    });
    
    // Throughput is applied to each iteration
    // If each iteration processes 100,000 elements,
    // throughput shows elem/s
}

Use iter_batched with throughput for benchmarks requiring setup.

HTML Report Throughput Display

use criterion::{Criterion, Throughput};
 
fn html_report(c: &mut Criterion) {
    // Throughput appears in HTML reports generated by Criterion
    
    let data = vec![0u8; 1024 * 1024];
    
    c.bench_function("with_throughput_report", |b| {
        b.throughput(Throughput::Bytes(1024 * 1024));
        b.iter(|| process_data(&data));
    });
    
    // In HTML report:
    // - Throughput is displayed prominently
    // - Charts show throughput over time
    // - Comparisons show throughput differences
    // - Units are auto-scaled (KiB/s, MiB/s, GiB/s)
    
    // This makes reports more accessible to stakeholders
    // who may not understand "nanoseconds per iteration"
}

Throughput metrics appear in Criterion's HTML reports with charts.

Complete Summary

use criterion::Throughput;
 
fn complete_summary() {
    // ┌─────────────────────────────────────────────────────────────────────────┐
    // │ Unit                   │ Display As                                  │
    // ├─────────────────────────────────────────────────────────────────────────┤
    // │ Bytes(1024)            │ 1.00 KiB/s                                  │
    // │ Bytes(1024*1024)       │ 1.00 MiB/s                                  │
    // │ Bytes(1024*1024*1024)  │ 1.00 GiB/s                                  │
    // │ Elements(1000)         │ 1.00 Kelem/s                                │
    // │ Elements(1_000_000)    │ 1.00 Melem/s                                │
    // │ Elements(1_000_000_000) │ 1.00 Gelem/s                                │
    // └─────────────────────────────────────────────────────────────────────────┘
    
    // ┌─────────────────────────────────────────────────────────────────────────┐
    // │ Without Throughput     │ With Throughput                             │
    // ├─────────────────────────────────────────────────────────────────────────┤
    // │ Time per iteration     │ Time per iteration                          │
    // │ Iterations per second  │ Iterations per second                       │
    // │                        │ Bytes per second OR elements per second     │
    // │                        │ Comparison against hardware limits          │
    // │                        │ More intuitive for stakeholders             │
    // └─────────────────────────────────────────────────────────────────────────┘
    
    // When to use Throughput:
    // - I/O operations (bytes/s meaningful)
    // - Collection processing (elements/s meaningful)
    // - Comparing against hardware specs
    // - Presenting to non-technical stakeholders
    // - Scaling analysis across data sizes
}
 
// Key insight:
// criterion::Throughput configures benchmark output to include throughput
// metrics (bytes/s or elements/s) alongside time per iteration. This is
// essential for:
//
// 1. I/O benchmarks - Compare against disk/network speeds
//    - Disk: HDD ~100-200 MB/s, SSD ~500-3500 MB/s
//    - Network: 1 Gbps ~125 MB/s, 10 Gbps ~1.25 GB/s
//    - Memory: DDR4 ~20-25 GB/s
//
// 2. Collection processing - Measure algorithm throughput
//    - Compare sorting algorithms by elements/s
//    - Understand cache behavior across sizes
//    - Identify algorithmic complexity
//
// 3. Communication - Make results accessible
//    - "500 MB/s" is more intuitive than "2 µs/iteration"
//    - Stakeholders can compare to hardware limits
//    - Easier to reason about scalability
//
// Throughput::Bytes() for:
// - File I/O, network I/O
// - Serialization/deserialization
// - Compression/decompression
// - Encryption/decryption
//
// Throughput::Elements() for:
// - Collection operations
// - Parsing (lines, records, items)
// - Processing pipelines
// - Algorithm comparisons
//
// The throughput metric complements time metrics:
// - Time shows per-iteration cost
// - Throughput shows processing rate
// - Together, they provide complete picture

Key insight: criterion::Throughput adds throughput metrics (bytes/s or elements/s) to benchmark output, making results meaningful for real-world comparison. Without throughput, benchmarks report only time per iteration—useful for optimization but hard to contextualize. With throughput, you can compare against hardware limits (disk speed, network bandwidth, memory bandwidth) and communicate results intuitively to stakeholders. Use Throughput::Bytes() for I/O, serialization, and binary processing; use Throughput::Elements() for collections, parsing, and algorithm comparisons.

What is the purpose of criterion::Throughput for reporting benchmark results in meaningful units?