What is the purpose of `criterion::Throughput::Bytes` for normalizing benchmark results by data size?

criterion::Throughput::Bytes configures a benchmark to report throughput in bytes per second rather than just iterations per second, allowing meaningful performance comparisons across benchmarks that process different amounts of data. Without throughput configuration, Criterion reports iterations per second—a metric that doesn't account for how much work each iteration performs. By specifying throughput, you tell Criterion how many bytes each iteration processes, enabling it to calculate and display throughput rates like "2.5 GiB/s" or "150 MB/s". This is essential for comparing implementations that handle different data sizes, demonstrating linear scaling characteristics, and communicating benchmark results in intuitive units.

The Problem: Iterations Don't Measure Work

use criterion::{black_box, criterion_group, criterion_main, Criterion};
 
fn process_data(data: &[u8]) -> u8 {
    data.iter().sum()
}
 
fn without_throughput(c: &mut Criterion) {
    let data = vec![1u8; 1_000_000];  // 1 MB
    
    c.bench_function("process_1mb", |bencher| {
        bencher.iter(|| process_data(black_box(&data)))
    });
    
    // Output shows: ~5000 iterations/second
    // But: Is that fast? Slow? How does it compare to 10 MB input?
    // We can't tell without understanding the data size
}

Without throughput, iterations per second doesn't communicate actual performance.

Adding Throughput Measurement

use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
 
fn with_throughput(c: &mut Criterion) {
    let data = vec![1u8; 1_000_000];  // 1 MB = 1,000,000 bytes
    
    c.bench_function("process_1mb", |bencher| {
        // Tell Criterion: each iteration processes 1,000,000 bytes
        bencher.throughput(Throughput::Bytes(1_000_000));
        
        bencher.iter(|| process_data(black_box(&data)))
    });
    
    // Output now shows: ~5 GB/s throughput
    // This is meaningful! We can compare it to other implementations
    // or to memory bandwidth limits
}
 
fn process_data(data: &[u8]) -> u8 {
    data.iter().sum()
}
 
criterion_group!(benches, with_throughput);
criterion_main!(benches);

Throughput::Bytes enables throughput reporting in meaningful units.

Throughput with Different Data Sizes

use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
 
fn process_data(data: &[u8]) -> u8 {
    data.iter().sum()
}
 
fn multiple_sizes(c: &mut Criterion) {
    let sizes = [100, 1_000, 10_000, 100_000, 1_000_000];
    
    for size in sizes {
        let data = vec![1u8; size];
        
        c.bench_function(&format!("process_{}b", size), |bencher| {
            bencher.throughput(Throughput::Bytes(size as u64));
            bencher.iter(|| process_data(black_box(&data)))
        });
    }
    
    // Output shows throughput for each size:
    // process_100b:   10 GB/s
    // process_1000b:  12 GB/s
    // process_10000b: 12.5 GB/s
    // process_100000b: 12.3 GB/s
    // process_1000000b: 11.8 GB/s
    
    // You can see throughput is fairly consistent
    // (with some overhead for smaller sizes)
    // This validates O(n) scaling
}
 
criterion_group!(benches, multiple_sizes);
criterion_main!(benches);

Throughput reveals scaling characteristics across data sizes.

Throughput Measurement Units

use criterion::{Criterion, Throughput};
 
fn throughput_units(c: &mut Criterion) {
    // Bytes: Basic unit
    let bytes = 1_000;
    Throughput::Bytes(bytes);
    
    // Criterion automatically converts to appropriate units:
    // - Bytes/s for small values
    // - KB/s for thousands
    // - MB/s for millions
    // - GB/s for billions
    
    // The conversion uses binary (KiB, MiB, GiB) for:
    // - Throughput::KiB(n)  // n * 1024 bytes
    // - Throughput::MiB(n)  // n * 1024 * 1024 bytes
    
    // Or decimal (KB, MB, GB) if you prefer:
    // Just use Throughput::Bytes(n * 1000) for decimal
    
    // Example with convenience constructors:
    let kb = Throughput::KiB(1);      // 1024 bytes
    let mb = Throughput::MiB(1);      // 1048576 bytes
    
    c.bench_function("kib_test", |bencher| {
        bencher.throughput(kb);
        bencher.iter(|| /* process 1 KiB */)
    });
}

Criterion provides convenient constructors for common units.

Throughput::Bytes vs Throughput::Elements

use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
 
fn process_items(items: &[u32]) -> u32 {
    items.iter().sum()
}
 
fn bytes_vs_elements(c: &mut Criterion) {
    let items: Vec<u32> = vec![1; 1000];
    
    // Throughput::Bytes: Report in bytes/second
    c.bench_function("by_bytes", |bencher| {
        // Each element is 4 bytes (u32), so total is 4000 bytes
        bencher.throughput(Throughput::Bytes(items.len() as u64 * 4));
        bencher.iter(|| process_items(black_box(&items)))
    });
    
    // Throughput::Elements: Report in items/second
    c.bench_function("by_elements", |bencher| {
        bencher.throughput(Throughput::Elements(items.len() as u64));
        bencher.iter(|| process_items(black_box(&items)))
    });
    
    // Both are useful:
    // - Bytes: Compare to memory/disk/network bandwidth
    // - Elements: Compare processing rate for items
}
 
criterion_group!(benches, bytes_vs_elements);
criterion_main!(benches);

Use Throughput::Elements when counting logical items, not bytes.

Demonstrating Linear Scaling

use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
 
fn linear_scan(data: &[u8], target: u8) -> Option<usize> {
    data.iter().position(|&b| b == target)
}
 
fn demonstrate_scaling(c: &mut Criterion) {
    let sizes = [1_000, 10_000, 100_000, 1_000_000];
    
    for size in sizes {
        let data = vec![0u8; size];
        
        c.bench_function(&format!("scan_{}", size), |bencher| {
            bencher.throughput(Throughput::Bytes(size as u64));
            // Searching for value not in array (worst case)
            bencher.iter(|| linear_scan(black_box(&data), 255))
        });
    }
    
    // Without throughput: iterations/second decreases as size increases
    // With throughput: throughput stays roughly constant
    // This demonstrates O(n) complexity
    
    // Example output:
    // scan_1000:     150 GB/s
    // scan_10000:    148 GB/s
    // scan_100000:   145 GB/s
    // scan_1000000:  142 GB/s
    
    // Throughput remains stable across sizes
    // This proves linear scaling
}
 
criterion_group!(benches, demonstrate_scaling);
criterion_main!(benches);

Stable throughput across sizes proves linear time complexity.

Comparing Implementations

use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
 
fn naive_sum(data: &[u8]) -> u64 {
    data.iter().map(|&b| b as u64).sum()
}
 
fn optimized_sum(data: &[u8]) -> u64 {
    // Process in chunks for better SIMD utilization
    data.chunks(8)
        .map(|chunk| {
            chunk.iter().map(|&b| b as u64).sum::<u64>()
        })
        .sum()
}
 
fn compare_implementations(c: &mut Criterion) {
    let data = vec![1u8; 10_000_000];  // 10 MB
    
    let mut group = c.benchmark_group("sum_comparison");
    
    group.throughput(Throughput::Bytes(data.len() as u64));
    
    group.bench_function("naive", |bencher| {
        bencher.iter(|| naive_sum(black_box(&data)))
    });
    
    group.bench_function("optimized", |bencher| {
        bencher.iter(|| optimized_sum(black_box(&data)))
    });
    
    group.finish();
    
    // Output shows throughput for each:
    // naive:     8.5 GB/s
    // optimized: 12.3 GB/s
    // 
    // Throughput comparison makes optimization clear
    // "optimized is 45% faster" is meaningful
    // vs "optimized does more iterations" which is confusing
}
 
criterion_group!(benches, compare_implementations);
criterion_main!(benches);

Throughput enables meaningful performance comparisons.

Benchmark Groups with Shared Throughput

use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
 
fn serialize_json(data: &[(String, i32)]) -> String {
    serde_json::to_string(data).unwrap()
}
 
fn serialize_bincode(data: &[(String, i32)]) -> Vec<u8> {
    bincode::serialize(data).unwrap()
}
 
fn group_throughput(c: &mut Criterion) {
    let data: Vec<(String, i32)> = (0..1000)
        .map(|i| (format!("key_{}", i), i))
        .collect();
    
    // Calculate serialized sizes for throughput
    let json_size = serialize_json(&data).len() as u64;
    let bincode_size = serialize_bincode(&data).len() as u64;
    
    // Different formats have different sizes
    // But we want to compare throughput rates
    
    c.bench_function("serde_json", |bencher| {
        bencher.throughput(Throughput::Bytes(json_size));
        bencher.iter(|| serialize_json(black_box(&data)))
    });
    
    c.bench_function("bincode", |bencher| {
        bencher.throughput(Throughput::Bytes(bincode_size));
        bencher.iter(|| serialize_bincode(black_box(&data)))
    });
    
    // Throughput now shows serialization rate
    // Comparison: "JSON: 500 MB/s vs Bincode: 2 GB/s"
}
 
criterion_group!(benches, group_throughput);
criterion_main!(benches);

Each benchmark can have its own throughput based on actual output size.

Variable Data Size Benchmarks

use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
 
fn dynamic_throughput(c: &mut Criterion) {
    let mut group = c.benchmark_group("variable_size");
    
    for &size in &[100, 1000, 10_000] {
        let data = vec![0u8; size];
        
        group.throughput(Throughput::Bytes(size as u64));
        
        group.bench_function(format!("size_{}", size), |bencher| {
            bencher.iter(|| {
                // Each iteration processes 'size' bytes
                data.iter().sum::<u8>()
            })
        });
    }
    
    group.finish();
}
 
criterion_group!(benches, dynamic_throughput);
criterion_main!(benches);

Groups can set throughput for all contained benchmarks.

Real-World Example: File Parsing

use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
 
fn parse_csv(data: &str) -> Vec<Vec<String>> {
    data.lines()
        .map(|line| {
            line.split(',')
                .map(|s| s.to_string())
                .collect()
        })
        .collect()
}
 
fn parse_json_lines(data: &str) -> Vec<String> {
    serde_json::from_str::<Vec<String>>(data).unwrap_or_default()
}
 
fn file_parsing_bench(c: &mut Criterion) {
    // Simulated file content
    let csv_data = "a,b,c\n1,2,3\n4,5,6\n".repeat(1000);
    let json_data = serde_json::to_string(&vec!["item"; 3000]).unwrap();
    
    let csv_size = csv_data.len() as u64;
    let json_size = json_data.len() as u64;
    
    c.bench_function("parse_csv", |bencher| {
        bencher.throughput(Throughput::Bytes(csv_size));
        bencher.iter(|| parse_csv(black_box(&csv_data)))
    });
    
    c.bench_function("parse_json", |bencher| {
        bencher.throughput(Throughput::Bytes(json_size));
        bencher.iter(|| parse_json_lines(black_box(&json_data)))
    });
    
    // Now we can compare:
    // - CSV parsing: 150 MB/s
    // - JSON parsing: 80 MB/s
    // 
    // These are comparable metrics!
    // "JSON is half the throughput" is meaningful
}
 
criterion_group!(benches, file_parsing_bench);
criterion_main!(benches);

Throughput normalizes parsing benchmarks for comparison.

Understanding Throughput Calculation

use criterion::{Throughput};
 
fn calculation_explanation() {
    // Criterion calculates throughput as:
    // throughput = bytes_per_iteration / iteration_time
    
    // Example:
    // - Each iteration processes 1,000,000 bytes
    // - Each iteration takes 100 microseconds
    // - Throughput = 1,000,000 bytes / 0.0001 seconds
    // - Throughput = 10,000,000,000 bytes/second = 10 GB/s
    
    // What Throughput::Bytes does:
    // 1. Associates a byte count with each iteration
    // 2. During analysis, divides byte count by iteration time
    // 3. Reports result in human-readable units (KB/s, MB/s, GB/s)
    
    // The byte count is per-iteration:
    Throughput::Bytes(1000);  // Each iteration: 1000 bytes
    Throughput::KiB(1);       // Each iteration: 1024 bytes
    Throughput::MiB(1);       // Each iteration: 1048576 bytes
    
    // For Throughput::Elements:
    Throughput::Elements(100); // Each iteration: 100 elements
    // Reports in elements/second
}

Throughput is bytes divided by time per iteration.

Memory Bandwidth Context

use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
 
fn memory_copy(src: &[u8], dst: &mut [u8]) {
    dst.copy_from_slice(src);
}
 
fn memory_bandwidth_context(c: &mut Criterion) {
    let size = 100_000_000;  // 100 MB
    let src = vec![0u8; size];
    let mut dst = vec![0u8; size];
    
    c.bench_function("memory_copy", |bencher| {
        bencher.throughput(Throughput::Bytes(size as u64));
        bencher.iter(|| memory_copy(black_box(&src), black_box(&mut dst)))
    });
    
    // Modern DDR4 memory: ~25 GB/s theoretical bandwidth
    // A result of 20 GB/s means you're achieving 80% of theoretical
    // This context helps identify if you're memory-bound
    
    // If you get 5 GB/s, there's room for optimization
    // If you get 24 GB/s, you're near the hardware limit
}
 
criterion_group!(benches, memory_bandwidth_context);
criterion_main!(benches);

Throughput enables comparison to hardware limits.

Network Throughput Example

use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
 
fn serialize_network_packet(data: &[u8]) -> Vec<u8> {
    // Simulate adding header + checksum
    let mut packet = Vec::with_capacity(data.len() + 10);
    packet.extend_from_slice(b"HEADER");
    packet.extend_from_slice(data);
    packet.extend_from_slice(&[0u8; 4]);  // checksum placeholder
    packet
}
 
fn network_benchmark(c: &mut Criterion) {
    let payload_sizes = [64, 256, 1024, 1500];  // Common MTU sizes
    
    for size in payload_sizes {
        let data = vec![0u8; size];
        
        c.bench_function(&format!("packet_{}b", size), |bencher| {
            bencher.throughput(Throughput::Bytes(size as u64));
            bencher.iter(|| serialize_network_packet(black_box(&data)))
        });
    }
    
    // Compare to network speeds:
    // - 100 Mbps = 12.5 MB/s
    // - 1 Gbps = 125 MB/s
    // - 10 Gbps = 1250 MB/s
    // 
    // If serialization is 2 GB/s, it's faster than 10 GbE
    // Network becomes the bottleneck, not serialization
}
 
criterion_group!(benches, network_benchmark);
criterion_main!(benches);

Network throughput benchmarks compare to real-world network speeds.

Throughput in Regression Detection

use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
 
fn data_processing(data: &[u8]) -> u64 {
    data.iter().map(|&b| b as u64).sum()
}
 
fn regression_detection(c: &mut Criterion) {
    let data = vec![42u8; 10_000_000];
    
    c.bench_function("process", |bencher| {
        bencher.throughput(Throughput::MiB(10));  // 10 MiB
        bencher.iter(|| data_processing(black_box(&data)))
    });
    
    // Criterion saves historical results
    // With throughput configured, it tracks:
    // - Baseline: 8.5 GiB/s ± 0.2 GiB/s
    // - Current:  6.1 GiB/s ± 0.1 GiB/s  
    // - Change:   -28.2% (regression!)
    // 
    // This is more useful than:
    // - Baseline: 8500 iter/s
    // - Current:  6100 iter/s
    // - Change:   -28.2%
    // 
    // Because "iter/s" doesn't tell you if you're
    // processing more data or optimized the loop overhead
}
 
criterion_group!(benches, regression_detection);
criterion_main!(benches);

Throughput measurements make regression detection meaningful.

Elements vs Bytes Decision

use criterion::{Criterion, Throughput};
 
fn when_to_use_which(c: &mut Criterion) {
    // Use Throughput::Bytes when:
    // - Processing raw data (files, buffers, streams)
    // - Memory bandwidth is relevant
    // - Comparing to storage/network speeds
    // - Size in bytes is meaningful
    
    // Use Throughput::Elements when:
    // - Processing structured data (records, objects)
    // - Item count is more meaningful than bytes
    // - Comparing logical throughput rates
    
    // Example: Hash table insertion
    fn insert_items(items: &[u32], table: &mut std::collections::HashSet<u32>) {
        for &item in items {
            table.insert(item);
        }
    }
    
    // Elements makes sense here:
    // "1 million insertions per second"
    // is clearer than "4 MB inserted per second"
    
    // Example: File compression
    fn compress(data: &[u8]) -> Vec<u8> {
        // compression logic
        data.to_vec()
    }
    
    // Bytes makes sense here:
    // "500 MB/s compression rate"
    // is clearer than "50000000 bytes per second"
}

Choose the unit that best communicates performance characteristics.

Synthesis

Quick reference:

use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
 
fn quick_reference(c: &mut Criterion) {
    let data = vec![0u8; 1_000_000];
    
    c.bench_function("example", |bencher| {
        // Set throughput: each iteration processes 1_000_000 bytes
        bencher.throughput(Throughput::Bytes(1_000_000));
        
        bencher.iter(|| {
            // Benchmark body
            data.iter().sum::<u8>()
        })
    });
    
    // Output includes throughput:
    // example              time:   [100.23 us 100.45 us 100.67 us]
    //                      thrpt:  [9.9335 GiB/s 9.9552 GiB/s 9.9768 GiB/s]
    
    // Key constructors:
    Throughput::Bytes(n);      // n bytes per iteration
    Throughput::KiB(n);        // n * 1024 bytes per iteration
    Throughput::MiB(n);        // n * 1048576 bytes per iteration
    Throughput::Elements(n);   // n elements per iteration
    
    // When to use:
    // - Processing data: Throughput::Bytes
    // - Counting items: Throughput::Elements
    // - Comparing implementations: Same throughput unit
    // - Regression detection: Throughput shows real change
    
    // Benefits:
    // 1. Intuitive units (MB/s vs iterations/s)
    // 2. Hardware context (memory/disk/network speed)
    // 3. Cross-benchmark comparison
    // 4. Complexity validation (stable throughput = linear)
}
 
criterion_group!(benches, quick_reference);
criterion_main!(benches);
 
// Key insight:
// Throughput::Bytes transforms benchmark output from
// "how many iterations per second" to "how much work per second"
// This makes performance results meaningful and comparable.

Key insight: criterion::Throughput::Bytes bridges the gap between abstract benchmark metrics and real-world performance. Without throughput, Criterion reports iterations per second—a metric that depends on iteration complexity and provides no context for comparison. By specifying Throughput::Bytes(n), you declare "each iteration processes n bytes," enabling Criterion to calculate and report bytes per second. This transforms "50,000 iterations/second" into "5 GB/s," making the result immediately comprehensible and comparable to memory bandwidth, disk speed, or network throughput. Use Throughput::Bytes for any benchmark processing raw data where throughput rate matters more than iteration count. Use Throughput::Elements for logical item counts. The resulting throughput measurements validate algorithmic complexity (stable throughput across sizes proves linear scaling), enable meaningful implementation comparisons, and provide context for regression detection—seeing "throughput dropped 30%" is actionable; seeing "iteration rate dropped 30%" requires understanding what each iteration does.

What is the purpose of criterion::Throughput::Bytes for normalizing benchmark results by data size?