Home/Performance Optimization/Benchmarking with Criterion

Benchmarking with Criterion

Proper performance measurement

intermediate

benchmarkcriterionperformance

🎮 Interactive Playground

What is Criterion?

Criterion is a statistics-driven benchmarking library for Rust that provides accurate, reliable performance measurements. Unlike simple timing measurements, Criterion uses statistical techniques to detect performance changes, identify outliers, and generate detailed reports with graphs and analysis.

Key features:

Statistical analysis: Detects performance regressions with confidence
Outlier detection: Identifies and handles anomalous measurements
Comparison: Compare implementations against baselines
Visualization: HTML reports with graphs
Parameterization: Test with multiple input sizes
Throughput measurement: Measure ops/second or bytes/second

The Problem

Naive benchmarking has many pitfalls:

// ❌ Unreliable benchmarking
use std::time::Instant;

fn bad_benchmark() {
    let start = Instant::now();
    expensive_function();
    let duration = start.elapsed();
    println!("Took {:?}", duration); // Single measurement, no statistics
}

Problems with simple timing:

Single measurements: Don't account for variance
No warmup: Cold caches and JIT compilation
Optimizer tricks: Compiler may optimize away code
No comparison: Can't detect regressions
Environmental noise: System load affects results
No statistical analysis: Can't determine significance

Example Code

Example 1: Basic Criterion Benchmark

use criterion::{black_box, criterion_group, criterion_main, Criterion};

fn fibonacci_recursive(n: u32) -> u32 {
    match n {
        0 => 0,
        1 => 1,
        n => fibonacci_recursive(n - 1) + fibonacci_recursive(n - 2),
    }
}

fn fibonacci_iterative(n: u32) -> u32 {
    let mut a = 0;
    let mut b = 1;
    for _ in 0..n {
        let tmp = a;
        a = b;
        b = tmp + b;
    }
    a
}

fn criterion_benchmark(c: &mut Criterion) {
    // Simple benchmark
    c.bench_function("fib_recursive_20", |b| {
        b.iter(|| fibonacci_recursive(black_box(20)))
    });

    c.bench_function("fib_iterative_20", |b| {
        b.iter(|| fibonacci_iterative(black_box(20)))
    });
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);

Example 2: Parameterized Benchmarks

use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};

fn sort_vec(data: &mut Vec<i32>) {
    data.sort();
}

fn sort_unstable_vec(data: &mut Vec<i32>) {
    data.sort_unstable();
}

fn sorting_benchmark(c: &mut Criterion) {
    let mut group = c.benchmark_group("sorting");

    for size in [100, 1000, 10000, 100000].iter() {
        // Benchmark stable sort
        group.bench_with_input(BenchmarkId::new("stable", size), size, |b, &size| {
            b.iter_batched(
                || (0..size).rev().collect::<Vec<_>>(),
                |mut data| sort_vec(black_box(&mut data)),
                criterion::BatchSize::SmallInput,
            )
        });

        // Benchmark unstable sort
        group.bench_with_input(BenchmarkId::new("unstable", size), size, |b, &size| {
            b.iter_batched(
                || (0..size).rev().collect::<Vec<_>>(),
                |mut data| sort_unstable_vec(black_box(&mut data)),
                criterion::BatchSize::SmallInput,
            )
        });
    }

    group.finish();
}

criterion_group!(benches, sorting_benchmark);
criterion_main!(benches);

Example 3: Throughput Measurement

use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};

fn process_bytes(data: &[u8]) -> u64 {
    data.iter().map(|&b| b as u64).sum()
}

fn checksum(data: &[u8]) -> u32 {
    data.iter().fold(0u32, |acc, &b| acc.wrapping_add(b as u32))
}

fn compress_simulate(data: &[u8]) -> Vec<u8> {
    // Simulate compression
    data.chunks(2).map(|chunk| chunk[0]).collect()
}

fn throughput_benchmark(c: &mut Criterion) {
    let sizes = vec![1024, 10 * 1024, 100 * 1024, 1024 * 1024];

    for size in sizes {
        let data = vec![0xFFu8; size];

        let mut group = c.benchmark_group("throughput");
        group.throughput(Throughput::Bytes(size as u64));

        group.bench_function(BenchmarkId::new("process_bytes", size), |b| {
            b.iter(|| process_bytes(black_box(&data)))
        });

        group.bench_function(BenchmarkId::new("checksum", size), |b| {
            b.iter(|| checksum(black_box(&data)))
        });

        group.bench_function(BenchmarkId::new("compress", size), |b| {
            b.iter(|| compress_simulate(black_box(&data)))
        });

        group.finish();
    }
}

criterion_group!(benches, throughput_benchmark);
criterion_main!(benches);

Example 4: Comparing Implementations

use criterion::{black_box, criterion_group, criterion_main, Criterion};
use std::collections::{HashMap, BTreeMap};

fn hashmap_insert(count: usize) {
    let mut map = HashMap::new();
    for i in 0..count {
        map.insert(i, i * 2);
    }
}

fn btreemap_insert(count: usize) {
    let mut map = BTreeMap::new();
    for i in 0..count {
        map.insert(i, i * 2);
    }
}

fn hashmap_lookup(map: &HashMap<usize, usize>, keys: &[usize]) -> usize {
    keys.iter().filter_map(|k| map.get(k)).sum()
}

fn btreemap_lookup(map: &BTreeMap<usize, usize>, keys: &[usize]) -> usize {
    keys.iter().filter_map(|k| map.get(k)).sum()
}

fn map_benchmark(c: &mut Criterion) {
    let mut group = c.benchmark_group("map_insert");

    for size in [100, 1000, 10000].iter() {
        group.bench_with_input(BenchmarkId::new("HashMap", size), size, |b, &size| {
            b.iter(|| hashmap_insert(black_box(size)))
        });

        group.bench_with_input(BenchmarkId::new("BTreeMap", size), size, |b, &size| {
            b.iter(|| btreemap_insert(black_box(size)))
        });
    }

    group.finish();

    // Lookup benchmarks
    let mut group = c.benchmark_group("map_lookup");

    let size = 10000;
    let hashmap: HashMap<_, _> = (0..size).map(|i| (i, i * 2)).collect();
    let btreemap: BTreeMap<_, _> = (0..size).map(|i| (i, i * 2)).collect();
    let keys: Vec<_> = (0..size).step_by(10).collect();

    group.bench_function("HashMap", |b| {
        b.iter(|| hashmap_lookup(black_box(&hashmap), black_box(&keys)))
    });

    group.bench_function("BTreeMap", |b| {
        b.iter(|| btreemap_lookup(black_box(&btreemap), black_box(&keys)))
    });

    group.finish();
}

criterion_group!(benches, map_benchmark);
criterion_main!(benches);

Example 5: Custom Measurements

use criterion::{
    criterion_group, criterion_main, measurement::WallTime, BenchmarkGroup, Criterion,
};
use std::time::Duration;

fn allocate_vec(size: usize) -> Vec<u8> {
    vec![0u8; size]
}

fn allocate_boxed_slice(size: usize) -> Box<[u8]> {
    vec![0u8; size].into_boxed_slice()
}

fn memory_benchmark(c: &mut Criterion) {
    let mut group = c.benchmark_group("memory_allocation");

    // Configure measurement time
    group.measurement_time(Duration::from_secs(10));
    group.sample_size(1000);

    for size in [1024, 10 * 1024, 100 * 1024].iter() {
        group.bench_with_input(
            criterion::BenchmarkId::new("Vec", size),
            size,
            |b, &size| {
                b.iter(|| {
                    let v = allocate_vec(size);
                    criterion::black_box(v);
                })
            },
        );

        group.bench_with_input(
            criterion::BenchmarkId::new("BoxedSlice", size),
            size,
            |b, &size| {
                b.iter(|| {
                    let v = allocate_boxed_slice(size);
                    criterion::black_box(v);
                })
            },
        );
    }

    group.finish();
}

criterion_group!(benches, memory_benchmark);
criterion_main!(benches);

Example 6: Setup and Teardown

use criterion::{black_box, criterion_group, criterion_main, BatchSize, Criterion};
use std::fs::File;
use std::io::{BufWriter, Write};

fn write_data_unbuffered(path: &str, data: &[u8]) -> std::io::Result<()> {
    let mut file = File::create(path)?;
    file.write_all(data)?;
    Ok(())
}

fn write_data_buffered(path: &str, data: &[u8]) -> std::io::Result<()> {
    let file = File::create(path)?;
    let mut writer = BufWriter::new(file);
    writer.write_all(data)?;
    Ok(())
}

fn io_benchmark(c: &mut Criterion) {
    let data = vec![0u8; 1024 * 1024]; // 1 MB

    let mut group = c.benchmark_group("file_io");

    group.bench_function("unbuffered", |b| {
        b.iter_batched(
            || {
                // Setup: create temp file path
                let path = format!("/tmp/bench_unbuf_{}", rand::random::<u64>());
                (path, data.clone())
            },
            |(path, data)| {
                // Benchmark this
                write_data_unbuffered(&path, &data).unwrap();
                black_box(());
            },
            BatchSize::SmallInput,
        );
        // Teardown: files are not cleaned up in this simple example
    });

    group.bench_function("buffered", |b| {
        b.iter_batched(
            || {
                let path = format!("/tmp/bench_buf_{}", rand::random::<u64>());
                (path, data.clone())
            },
            |(path, data)| {
                write_data_buffered(&path, &data).unwrap();
                black_box(());
            },
            BatchSize::SmallInput,
        );
    });

    group.finish();
}

criterion_group!(benches, io_benchmark);
criterion_main!(benches);

Example 7: Baseline Comparison

use criterion::{black_box, criterion_group, criterion_main, Criterion};

fn old_implementation(n: usize) -> usize {
    (0..n).sum()
}

fn new_implementation(n: usize) -> usize {
    // Optimized version using formula
    n * (n - 1) / 2
}

fn regression_benchmark(c: &mut Criterion) {
    let mut group = c.benchmark_group("sum_optimization");

    // Save baseline with: cargo bench --bench my_bench -- --save-baseline old
    // Compare with: cargo bench --bench my_bench -- --baseline old

    group.bench_function("old", |b| {
        b.iter(|| old_implementation(black_box(10000)))
    });

    group.bench_function("new", |b| {
        b.iter(|| new_implementation(black_box(10000)))
    });

    group.finish();
}

criterion_group!(benches, regression_benchmark);
criterion_main!(benches);

Example 8: Real-World: String Operations

use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};

fn concat_push_str(strings: &[&str]) -> String {
    let mut result = String::new();
    for s in strings {
        result.push_str(s);
    }
    result
}

fn concat_with_capacity(strings: &[&str]) -> String {
    let capacity: usize = strings.iter().map(|s| s.len()).sum();
    let mut result = String::with_capacity(capacity);
    for s in strings {
        result.push_str(s);
    }
    result
}

fn concat_collect(strings: &[&str]) -> String {
    strings.iter().copied().collect()
}

fn concat_join(strings: &[&str]) -> String {
    strings.join("")
}

fn string_benchmark(c: &mut Criterion) {
    let mut group = c.benchmark_group("string_concat");

    let test_strings = vec!["hello", "world", "foo", "bar", "baz"];
    let sizes = vec![10, 100, 1000];

    for size in sizes {
        let strings: Vec<&str> = test_strings
            .iter()
            .cycle()
            .take(size)
            .copied()
            .collect();

        group.bench_with_input(BenchmarkId::new("push_str", size), &strings, |b, s| {
            b.iter(|| concat_push_str(black_box(s)))
        });

        group.bench_with_input(
            BenchmarkId::new("with_capacity", size),
            &strings,
            |b, s| b.iter(|| concat_with_capacity(black_box(s))),
        );

        group.bench_with_input(BenchmarkId::new("collect", size), &strings, |b, s| {
            b.iter(|| concat_collect(black_box(s)))
        });

        group.bench_with_input(BenchmarkId::new("join", size), &strings, |b, s| {
            b.iter(|| concat_join(black_box(s)))
        });
    }

    group.finish();
}

criterion_group!(benches, string_benchmark);
criterion_main!(benches);

Example 9: Profiling Integration

use criterion::{criterion_group, criterion_main, Criterion, profiler::Profiler};
use std::fs::File;
use std::path::Path;

// Custom profiler implementation
struct MyProfiler;

impl Profiler for MyProfiler {
    fn start_profiling(&mut self, benchmark_id: &str, benchmark_dir: &Path) {
        println!("Starting profiling for: {}", benchmark_id);
        // Integration with perf, valgrind, etc.
    }

    fn stop_profiling(&mut self, benchmark_id: &str, benchmark_dir: &Path) {
        println!("Stopping profiling for: {}", benchmark_id);
        // Stop profiler and save data
    }
}

fn heavy_computation(n: usize) -> usize {
    (0..n).map(|i| i * i).sum()
}

fn profiling_benchmark(c: &mut Criterion) {
    // c.with_profiler(MyProfiler);

    c.bench_function("heavy_computation", |b| {
        b.iter(|| heavy_computation(criterion::black_box(10000)))
    });
}

criterion_group!(benches, profiling_benchmark);
criterion_main!(benches);

Example 10: Async Benchmarks

use criterion::{black_box, criterion_group, criterion_main, Criterion};
use tokio::runtime::Runtime;

async fn async_computation(n: usize) -> usize {
    tokio::time::sleep(tokio::time::Duration::from_micros(n as u64)).await;
    n * 2
}

async fn async_parallel(n: usize) -> Vec<usize> {
    let mut tasks = Vec::new();
    for i in 0..n {
        tasks.push(tokio::spawn(async move { i * 2 }));
    }

    let mut results = Vec::new();
    for task in tasks {
        results.push(task.await.unwrap());
    }
    results
}

fn async_benchmark(c: &mut Criterion) {
    let rt = Runtime::new().unwrap();

    c.bench_function("async_computation", |b| {
        b.to_async(&rt)
            .iter(|| async { async_computation(black_box(100)).await })
    });

    c.bench_function("async_parallel_10", |b| {
        b.to_async(&rt)
            .iter(|| async { async_parallel(black_box(10)).await })
    });

    c.bench_function("async_parallel_100", |b| {
        b.to_async(&rt)
            .iter(|| async { async_parallel(black_box(100)).await })
    });
}

criterion_group!(benches, async_benchmark);
criterion_main!(benches);

Why It Works

Statistical Analysis

Criterion uses statistical methods to:

Detect outliers: Removes anomalous measurements
Calculate confidence intervals: Shows measurement uncertainty
Perform regression analysis: Detects performance changes
Compare distributions: Statistical significance testing

Warm-up Period

Before measurements:

JIT compilation: Allows runtime optimization
Cache warming: Loads data into caches
CPU frequency: Stabilizes turbo boost

Multiple Iterations

Running many iterations:

Reduces variance: Averages out noise
Statistical power: More confidence in results
Outlier detection: Identifies unusual runs

Black Box

black_box() prevents compiler optimizations:

// Without black_box - may optimize away
b.iter(|| fibonacci(20));

// With black_box - forces computation
b.iter(|| fibonacci(black_box(20)));

When to Use

Use Criterion when:

Measuring performance of functions
Comparing implementations
Detecting regressions
Optimizing hot paths
Making data-driven decisions

Run benchmarks when:

After optimization attempts
Before/after refactoring
In CI/CD pipelines
When investigating performance issues
For performance-critical code

Compare with baseline when:

Testing optimizations
Tracking performance over time
Preventing regressions
Validating improvements

⚠️ Anti-patterns

⚠️ Mistake #1: Not Using black_box

// ❌ DON'T: Compiler may optimize away
c.bench_function("bad", |b| {
    b.iter(|| expensive_function(42))
});

// ✅ DO: Use black_box to prevent optimization
c.bench_function("good", |b| {
    b.iter(|| expensive_function(black_box(42)))
});

⚠️ Mistake #2: Including Setup in Benchmark

// ❌ DON'T: Include setup time
c.bench_function("bad", |b| {
    b.iter(|| {
        let data = vec![0; 10000]; // Setup!
        process(&data)
    })
});

// ✅ DO: Use iter_batched for setup
c.bench_function("good", |b| {
    b.iter_batched(
        || vec![0; 10000], // Setup
        |data| process(&data), // Benchmark
        BatchSize::SmallInput,
    )
});

⚠️ Mistake #3: Too Few Iterations

// ❌ DON'T: Reduce sample size too much
group.sample_size(10); // Not enough for statistics

// ✅ DO: Use reasonable sample size
group.sample_size(100); // Default is usually good

⚠️ Mistake #4: Ignoring Variance

// ❌ DON'T: Ignore high variance in results
// If variance is high, results are unreliable

// ✅ DO: Investigate why variance is high
// - System load
// - Cache effects
// - Non-deterministic algorithms

⚠️ Mistake #5: Benchmarking Debug Builds

// ❌ DON'T: Benchmark in debug mode
// cargo bench runs in release mode by default

// ✅ DO: Always benchmark release builds
// cargo bench --release (default)

Advanced Example: Custom Metric

use criterion::{
    criterion_group, criterion_main, measurement::Measurement, BenchmarkGroup, Criterion,
};
use std::time::{Duration, Instant};

// Custom measurement that tracks allocations
struct AllocationCounter {
    start_count: usize,
}

impl Measurement for AllocationCounter {
    type Intermediate = usize;
    type Value = usize;

    fn start(&self) -> Self::Intermediate {
        // In reality, you'd query allocator
        0
    }

    fn end(&self, i: Self::Intermediate) -> Self::Value {
        // Return allocation count
        i
    }

    fn add(&self, v1: &Self::Value, v2: &Self::Value) -> Self::Value {
        v1 + v2
    }

    fn zero(&self) -> Self::Value {
        0
    }

    fn to_f64(&self, val: &Self::Value) -> f64 {
        *val as f64
    }

    fn formatter(&self) -> &dyn criterion::measurement::ValueFormatter {
        // Return custom formatter
        &AllocationFormatter
    }
}

struct AllocationFormatter;

impl criterion::measurement::ValueFormatter for AllocationFormatter {
    fn format_value(&self, value: f64) -> String {
        format!("{} allocations", value as usize)
    }

    fn format_throughput(&self, throughput: &criterion::Throughput, value: f64) -> String {
        format!("{} allocs/op", value)
    }

    fn scale_values(&self, _: f64, _: &mut [f64]) -> &'static str {
        "allocations"
    }

    fn scale_throughputs(&self, _: f64, _: &mut [f64]) -> &'static str {
        "allocs/op"
    }

    fn scale_for_machines(&self, values: &mut [f64]) -> &'static str {
        "allocations"
    }
}

Advanced Example: Flamegraph Integration

use criterion::{criterion_group, criterion_main, Criterion};
use pprof::criterion::{Output, PProfProfiler};

fn complex_computation() {
    // Simulate complex work
    let mut sum = 0;
    for i in 0..10000 {
        sum += fibonacci_recursive(i % 20);
    }
    criterion::black_box(sum);
}

fn fibonacci_recursive(n: u32) -> u32 {
    match n {
        0 => 0,
        1 => 1,
        n => fibonacci_recursive(n - 1) + fibonacci_recursive(n - 2),
    }
}

fn flamegraph_benchmark(c: &mut Criterion) {
    c.bench_function("complex", |b| b.iter(|| complex_computation()));
}

criterion_group! {
    name = benches;
    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
    targets = flamegraph_benchmark
}
criterion_main!(benches);

Advanced Example: Comparative Analysis

use criterion::{black_box, criterion_group, criterion_main, Criterion};

// Different vector initialization strategies
fn vec_with_capacity(n: usize) -> Vec<i32> {
    let mut v = Vec::with_capacity(n);
    for i in 0..n {
        v.push(i as i32);
    }
    v
}

fn vec_from_iterator(n: usize) -> Vec<i32> {
    (0..n as i32).collect()
}

fn vec_with_resize(n: usize) -> Vec<i32> {
    let mut v = Vec::new();
    v.resize(n, 0);
    for i in 0..n {
        v[i] = i as i32;
    }
    v
}

fn vec_from_macro(n: usize) -> Vec<i32> {
    vec![0; n]
}

fn comparative_benchmark(c: &mut Criterion) {
    let mut group = c.benchmark_group("vec_initialization");

    let sizes = vec![10, 100, 1000, 10000];

    for size in sizes {
        group.bench_with_input(
            criterion::BenchmarkId::new("with_capacity", size),
            &size,
            |b, &size| b.iter(|| vec_with_capacity(black_box(size))),
        );

        group.bench_with_input(
            criterion::BenchmarkId::new("from_iterator", size),
            &size,
            |b, &size| b.iter(|| vec_from_iterator(black_box(size))),
        );

        group.bench_with_input(
            criterion::BenchmarkId::new("with_resize", size),
            &size,
            |b, &size| b.iter(|| vec_with_resize(black_box(size))),
        );

        group.bench_with_input(
            criterion::BenchmarkId::new("from_macro", size),
            &size,
            |b, &size| b.iter(|| vec_from_macro(black_box(size))),
        );
    }

    group.finish();
}

criterion_group!(benches, comparative_benchmark);
criterion_main!(benches);

Performance Characteristics

Measurement Overhead

Criterion overhead: ~1-10 microseconds per iteration
Statistical analysis: Adds seconds to benchmark time
Report generation: Adds time after benchmarks

Accuracy

Timing precision: Nanosecond resolution
Statistical confidence: 95% confidence intervals
Outlier detection: Identifies and excludes anomalies

Trade-offs

Accuracy vs Speed: More samples = better accuracy, longer time
Detail vs Clarity: More parameters = harder to interpret
Overhead vs Precision: Warmup improves accuracy but takes time

Exercises

Beginner

Write a basic benchmark comparing two sorting algorithms
Create a parameterized benchmark testing different input sizes
Benchmark string concatenation methods

Intermediate

Implement throughput measurement for a data processing function
Create benchmarks with proper setup/teardown
Compare HashMap vs BTreeMap performance characteristics
Benchmark async code with Tokio

Advanced

Integrate flamegraph generation with benchmarks
Create custom measurement metrics
Build a regression testing suite
Implement benchmark-driven optimization workflow
Create comparative analysis across algorithm variants

Real-World Usage

Rust Standard Library

// Benchmarks for std collections
cargo bench --manifest-path library/std/Cargo.toml

Tokio

// Criterion benchmarks for async runtime
#[tokio::test]
async fn benchmark_task_spawning() {
    // ...
}

Serde

// Benchmarking serialization performance
criterion_group!(benches, json_benchmark, bincode_benchmark);

Regex

// Benchmarking regex compilation and matching
c.bench_function("compile", |b| b.iter(|| Regex::new(pattern)));

Configuration

Cargo.toml Setup

[dev-dependencies]
criterion = { version = "0.5", features = ["html_reports"] }

[[bench]]
name = "my_benchmark"
harness = false

Custom Configuration

use criterion::{Criterion, PlotConfiguration, AxisScale};

fn custom_criterion() -> Criterion {
    Criterion::default()
        .sample_size(1000)              // More samples
        .measurement_time(Duration::from_secs(10))
        .warm_up_time(Duration::from_secs(3))
        .noise_threshold(0.05)          // 5% noise tolerance
        .significance_level(0.05)       // 95% confidence
        .plot_config(PlotConfiguration::default()
            .summary_scale(AxisScale::Logarithmic))
}

Command Line Usage

# Run all benchmarks
cargo bench

# Run specific benchmark
cargo bench --bench my_benchmark

# Save baseline
cargo bench -- --save-baseline master

# Compare with baseline
cargo bench -- --baseline master

# Filter benchmarks
cargo bench fibonacci

# Generate profiles
cargo bench --bench my_benchmark -- --profile-time=5

# List benchmarks without running
cargo bench -- --list

# Verbose output
cargo bench -- --verbose

# Quick mode (fewer samples)
cargo bench -- --quick

Interpreting Results

Understanding Output

fib_recursive_20        time:   [25.123 us 25.234 us 25.389 us]
                        change: [-1.2345% +0.1234% +1.4567%] (p = 0.89 > 0.05)
                        No change in performance detected.

fib_iterative_20        time:   [123.45 ns 124.23 ns 125.67 ns]
                        change: [-15.234% -13.456% -11.234%] (p = 0.00 < 0.05)
                        Performance has improved.

Time range: [lower bound, estimate, upper bound]
Change: % change from baseline
p-value: Statistical significance (< 0.05 = significant)

Variance

thrpt:  [789.12 KiB/s 812.34 KiB/s 845.67 KiB/s]
Found 5 outliers among 100 measurements (5.00%)
  2 (2.00%) low mild
  3 (3.00%) high mild

Outliers: Unusual measurements (excluded from analysis)
Variance: How spread out measurements are

Main Function

fn main() {
    // Criterion benchmarks run via criterion_main! macro
    // To run: cargo bench
    println!("Run benchmarks with: cargo bench");
}

Integration with CI/CD

GitHub Actions Example

name: Benchmarks

on: [push, pull_request]

jobs:
  benchmark:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v2
      - uses: actions-rs/toolchain@v1
        with:
          toolchain: stable
      - name: Run benchmarks
        run: cargo bench -- --save-baseline PR_${{ github.event.number }}

      - name: Compare with master
        run: cargo bench -- --baseline master

      - name: Upload results
        uses: actions/upload-artifact@v2
        with:
          name: benchmark-results
          path: target/criterion/

Tips and Tricks

1. Prevent Optimization

// Ensure compiler doesn't optimize away your code
b.iter(|| {
    let result = expensive_function(black_box(input));
    black_box(result)
});

2. Batch Size Selection

// LargeInput: Setup is expensive, amortize over many iterations
// SmallInput: Setup is cheap, run once per iteration
// PerIteration: Setup must run every time
b.iter_batched(setup, routine, BatchSize::SmallInput);

3. Warming Up

// Some benchmarks need extra warmup
group.warm_up_time(Duration::from_secs(5));

4. Reducing Noise

// Reduce system interference
// - Close other applications
// - Disable CPU frequency scaling
// - Use `nice` to set priority

5. Multiple Baselines

// Save different baselines for comparison
// cargo bench -- --save-baseline v1.0
// cargo bench -- --save-baseline v2.0
// cargo bench -- --baseline v1.0

🎮 Try it Yourself

🎮