How do I compress data with Zstandard in Rust?

Walkthrough

The zstd crate provides Rust bindings for Zstandard (zstd), a fast lossless compression algorithm developed by Facebook. It offers compression ratios comparable to gzip but with much faster decompression speeds—often 2-3x faster. Zstd supports multiple compression levels (1-22), dictionary compression for small data, and streaming for large files. It's ideal for applications requiring fast compression/decompression: databases, network protocols, file archives, log storage, and real-time data processing.

Key concepts:

  1. Compression levels — 1 (fastest) to 22 (best ratio), default is 3
  2. One-shot compressioncompress() and decompress() for simple cases
  3. StreamingEncoder and Decoder for large data
  4. Dictionary compression — pretrained dictionaries improve ratio on small data
  5. Parallel compression — use zstdmt for multi-threaded compression

Code Example

# Cargo.toml
[dependencies]
zstd = "0.13"
use zstd::{encode_all, decode_all};
 
fn main() -> Result<(), Box<dyn std::error::Error>> {
    let data = b"Hello, World! This is some data to compress.";
    
    let compressed = encode_all(&data[..], 3)?;
    println!("Compressed: {} bytes", compressed.len());
    
    let decompressed = decode_all(&compressed[..])?;
    println!("Decompressed: {} bytes", decompressed.len());
    
    Ok(())
}

Basic Compression and Decompression

use zstd::{compress, decompress};
 
fn main() -> Result<(), Box<dyn std::error::Error>> {
    let original = b"This is a string that will be compressed using zstd. \
                     Zstandard offers great compression ratios with fast speeds.";
    
    // Compress with default level (3)
    let compressed = compress(&original[..], 3)?;
    println!("Original: {} bytes", original.len());
    println!("Compressed: {} bytes", compressed.len());
    println!("Ratio: {:.2}%", 
             (compressed.len() as f64 / original.len() as f64) * 100.0);
    
    // Decompress
    let decompressed = decompress(&compressed[..], original.len())?;
    assert_eq!(&original[..], &decompressed[..]);
    println!("Decompression successful!");
    
    Ok(())
}

Compression Levels

use zstd::encode_all;
 
fn main() -> Result<(), Box<dyn std::error::Error>> {
    let data = b"Hello, World! ".repeat(1000);
    
    println!("Original: {} bytes", data.len());
    println!("\nCompression levels comparison:");
    println!("{:^8} {:^12} {:^10}", "Level", "Size", "Ratio");
    println!("{:-^8} {:-^12} {:-^10}", "", "", "");
    
    // Test various compression levels
    for level in [1, 3, 5, 10, 15, 19, 22] {
        let compressed = encode_all(&data[..], level)?;
        let ratio = (compressed.len() as f64 / data.len() as f64) * 100.0;
        println!("{:>8} {:>12} {:>9.1}%", level, compressed.len(), ratio);
    }
    
    Ok(())
}

Streaming Compression

use zstd::stream::{Encoder, Decoder};
use std::io::{self, Read, Write, Cursor};
 
fn main() -> Result<(), Box<dyn std::error::Error>> {
    // Compress data in chunks
    let data = b"This is a large piece of data that we'll compress in a streaming fashion. ".repeat(100);
    
    let mut buffer = Vec::new();
    {
        let mut encoder = Encoder::new(&mut buffer, 3)?;
        encoder.write_all(&data)?;
        encoder.finish()?; // Important: call finish() to flush
    }
    
    println!("Original: {} bytes", data.len());
    println!("Compressed: {} bytes", buffer.len());
    
    // Decompress streaming
    let mut decompressed = Vec::new();
    {
        let mut decoder = Decoder::new(&buffer[..])?;
        decoder.read_to_end(&mut decompressed)?;
    }
    
    assert_eq!(&data[..], &decompressed[..]);
    println!("Streaming decompression successful!");
    
    Ok(())
}

File Compression

use zstd::stream::{Encoder, Decoder};
use std::fs::File;
use std::io::{BufReader, BufWriter, CopyBufReader};
 
fn compress_file(input_path: &str, output_path: &str, level: i32) 
    -> Result<(), Box<dyn std::error::Error>> 
{
    let input = File::open(input_path)?;
    let output = File::create(output_path)?;
    
    let mut encoder = Encoder::new(BufWriter::new(output), level)?;
    
    // Use copy_bidirectional or manual read/write
    let mut reader = BufReader::new(input);
    std::io::copy(&mut reader, &mut encoder)?;
    
    encoder.finish()?;
    
    println!("Compressed {} to {}", input_path, output_path);
    Ok(())
}
 
fn decompress_file(input_path: &str, output_path: &str) 
    -> Result<(), Box<dyn std::error::Error>> 
{
    let input = File::open(input_path)?;
    let output = File::create(output_path)?;
    
    let mut decoder = Decoder::new(BufReader::new(input))?;
    let mut writer = BufWriter::new(output);
    
    std::io::copy(&mut decoder, &mut writer)?;
    
    println!("Decompressed {} to {}", input_path, output_path);
    Ok(())
}
 
fn main() -> Result<(), Box<dyn std::error::Error>> {
    // Create a test file
    let test_data = "This is test file content. ".repeat(1000);
    std::fs::write("test_input.txt", &test_data)?;
    
    // Compress
    compress_file("test_input.txt", "test_output.zst", 3)?;
    
    // Check sizes
    let original_size = std::fs::metadata("test_input.txt")?.len();
    let compressed_size = std::fs::metadata("test_output.zst")?.len();
    println!("Ratio: {:.1}%", 
             (compressed_size as f64 / original_size as f64) * 100.0);
    
    // Decompress
    decompress_file("test_output.zst", "test_decompressed.txt")?;
    
    // Verify
    let decompressed = std::fs::read_to_string("test_decompressed.txt")?;
    assert_eq!(test_data, decompressed);
    println!("Verification passed!");
    
    // Cleanup
    std::fs::remove_file("test_input.txt")?;
    std::fs::remove_file("test_output.zst")?;
    std::fs::remove_file("test_decompressed.txt")?;
    
    Ok(())
}

Working with Vec

use zstd::{encode_all, decode_all};
 
fn main() -> Result<(), Box<dyn std::error::Error>> {
    let data: Vec<u8> = (0..=255).cycle().take(10000).collect();
    
    // One-shot compression
    let compressed = encode_all(&data[..], 10)?;
    println!("Compressed {} bytes to {} bytes", data.len(), compressed.len());
    
    // Decompress with known size
    let decompressed: Vec<u8> = decode_all(&compressed[..])?;
    assert_eq!(data, decompressed);
    
    // If size is unknown, use streaming
    use zstd::stream::Decoder;
    use std::io::Read;
    
    let mut decoder = Decoder::new(&compressed[..])?;
    let mut streaming_decompressed = Vec::new();
    decoder.read_to_end(&mut streaming_decompressed)?;
    
    assert_eq!(data, streaming_decompressed);
    println!("All methods produce same result!");
    
    Ok(())
}

Dictionary Compression

use zstd::{encode_all, decode_all, dict::{EncoderDictionary, DecoderDictionary}};
 
fn main() -> Result<(), Box<dyn std::error::Error>> {
    // Sample training data (similar to what you'll compress)
    let training_data = b"
        User: alice@example.com logged in from 192.168.1.1
        User: bob@example.com logged in from 192.168.1.2
        User: charlie@example.com logged in from 192.168.1.3
        User: diana@example.com logged in from 192.168.1.4
    ".repeat(10);
    
    // Train a dictionary (usually done offline)
    let dict = zstd::dict::from_samples(
        &[&training_data[..]],
        1024, // Dictionary size
    )?;
    
    println!("Dictionary size: {} bytes", dict.len());
    
    // Data to compress (similar structure to training data)
    let new_data = b"User: eve@example.com logged in from 192.168.1.5";
    
    // Compress with dictionary
    let compressed_with_dict = zstd::dict::encode_all(&new_data[..], &dict, 3)?;
    let compressed_without_dict = encode_all(&new_data[..], 3)?;
    
    println!("\nWithout dictionary: {} bytes", compressed_without_dict.len());
    println!("With dictionary: {} bytes", compressed_with_dict.len());
    
    // Decompress with dictionary
    let decompressed = zstd::dict::decode_all(&compressed_with_dict[..], &dict)?;
    assert_eq!(&new_data[..], &decompressed[..]);
    
    println!("Dictionary compression working!");
    
    Ok(())
}

Customizing Encoder/Decoder

use zstd::stream::{Encoder, Decoder};
use zstd::stream::raw::CParameter;
use std::io::Cursor;
 
fn main() -> Result<(), Box<dyn std::error::Error>> {
    let data = b"Some data to compress with custom settings.";
    
    let mut buffer = Vec::new();
    
    // Create encoder with custom parameters
    let mut encoder = Encoder::new(&mut buffer, 3)?;
    
    // Set parameters
    encoder.set_parameter(CParameter::EnableLongDistanceMatching(true))?;
    encoder.set_parameter(CParameter::WindowLog(21))?; // 2MB window
    encoder.set_parameter(CParameter::ChecksumFlag(true))?;
    
    encoder.write_all(data)?;
    encoder.finish()?;
    
    println!("Compressed with custom parameters: {} bytes", buffer.len());
    
    // Decompress
    let mut decompressed = Vec::new();
    let mut decoder = Decoder::new(&buffer[..])?;
    std::io::Read::read_to_end(&mut decoder, &mut decompressed)?;
    
    assert_eq!(&data[..], &decompressed[..]);
    println!("Decompression with custom settings successful!");
    
    Ok(()) 
}

Real-World: Log Compression

use zstd::stream::{Encoder, Decoder};
use std::io::{self, Write};
use std::fs::File;
 
struct LogCompressor {
    encoder: Encoder<File>,
    entries: usize,
}
 
impl LogCompressor {
    fn create(path: &str, level: i32) -> io::Result<Self> {
        let file = File::create(path)?;
        let encoder = Encoder::new(file, level)?;
        
        Ok(LogCompressor {
            encoder,
            entries: 0,
        })
    }
    
    fn write_entry(&mut self, level: &str, message: &str) -> io::Result<()> {
        writeln!(self.encoder, "[{}] {}", level, message)?;
        self.entries += 1;
        Ok(())
    }
    
    fn finish(self) -> io::Result<usize> {
        self.encoder.finish()?;
        Ok(self.entries)
    }
}
 
fn main() -> io::Result<()> {
    {
        let mut compressor = LogCompressor::create("app.log.zst", 3)?;
        
        for i in 0..1000 {
            compressor.write_entry("INFO", &format!("Processing item {}", i))?;
        }
        
        let count = compressor.finish()?;
        println!("Wrote {} log entries", count);
    }
    
    // Read back
    let file = File::open("app.log.zst")?;
    let mut decoder = Decoder::new(file)?;
    let mut content = String::new();
    std::io::Read::read_to_string(&mut decoder, &mut content)?;
    
    let lines = content.lines().count();
    println!("Read back {} lines", lines);
    
    std::fs::remove_file("app.log.zst")?;
    Ok(())
}

Real-World: Network Protocol Compression

use zstd::{encode_all, decode_all};
 
#[derive(Debug, Clone)]
struct Packet {
    id: u32,
    data: Vec<u8>,
}
 
impl Packet {
    fn compress(&self, level: i32) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
        let mut bytes = Vec::new();
        bytes.extend_from_slice(&self.id.to_le_bytes());
        bytes.extend_from_slice(&self.data);
        
        let compressed = encode_all(&bytes[..], level)?;
        Ok(compressed)
    }
    
    fn decompress(compressed: &[u8]) -> Result<Self, Box<dyn std::error::Error>> {
        let decompressed = decode_all(compressed)?;
        
        if decompressed.len() < 4 {
            return Err("Packet too short".into());
        }
        
        let id = u32::from_le_bytes([
            decompressed[0], decompressed[1], 
            decompressed[2], decompressed[3]
        ]);
        let data = decompressed[4..].to_vec();
        
        Ok(Packet { id, data })
    }
}
 
fn main() -> Result<(), Box<dyn std::error::Error>> {
    let packet = Packet {
        id: 42,
        data: b"This is important packet data".to_vec(),
    };
    
    let compressed = packet.compress(3)?;
    println!("Original packet: {} bytes", 4 + packet.data.len());
    println!("Compressed packet: {} bytes", compressed.len());
    
    let decompressed = Packet::decompress(&compressed)?;
    println!("Decompressed packet: {:?}", decompressed);
    
    assert_eq!(packet.id, decompressed.id);
    assert_eq!(packet.data, decompressed.data);
    
    Ok(())
}

Real-World: Backup Archive

use zstd::stream::Encoder;
use std::fs::File;
use std::io::{self, Write, BufReader};
use std::path::Path;
 
struct BackupArchive {
    encoder: Encoder<File>,
    file_count: usize,
}
 
impl BackupArchive {
    fn create(path: &str, level: i32) -> io::Result<Self> {
        let file = File::create(path)?;
        Ok(BackupArchive {
            encoder: Encoder::new(file, level)?,
            file_count: 0,
        })
    }
    
    fn add_file(&mut self, name: &str, content: &[u8]) -> io::Result<()> {
        // Write header: name length (4 bytes) + name + content length (8 bytes) + content
        let name_bytes = name.as_bytes();
        
        self.encoder.write_all(&(name_bytes.len() as u32).to_le_bytes())?;
        self.encoder.write_all(name_bytes)?;
        self.encoder.write_all(&(content.len() as u64).to_le_bytes())?;
        self.encoder.write_all(content)?;
        
        self.file_count += 1;
        Ok(())
    }
    
    fn finish(self) -> io::Result<usize> {
        self.encoder.finish()?;
        Ok(self.file_count)
    }
}
 
fn main() -> io::Result<()> {
    {
        let mut backup = BackupArchive::create("backup.zst", 10)?;
        
        backup.add_file("config.txt", b"configuration data")?;
        backup.add_file("data/users.csv", b"id,name\n1,Alice\n2,Bob")?;
        backup.add_file("data/items.json", b"[1, 2, 3]")?;
        
        let count = backup.finish()?;
        println!("Added {} files to backup", count);
    }
    
    let size = std::fs::metadata("backup.zst")?.len();
    println!("Backup size: {} bytes", size);
    
    std::fs::remove_file("backup.zst")?;
    Ok(())
}

Real-World: Database Value Compression

use zstd::{encode_all, decode_all};
use std::collections::HashMap;
 
struct CompressedStorage {
    data: HashMap<String, Vec<u8>>,
    level: i32,
    compression_stats: (usize, usize), // (original, compressed)
}
 
impl CompressedStorage {
    fn new(level: i32) -> Self {
        CompressedStorage {
            data: HashMap::new(),
            level,
            compression_stats: (0, 0),
        }
    }
    
    fn store(&mut self, key: &str, value: &[u8]) -> Result<(), Box<dyn std::error::Error>> {
        let compressed = encode_all(value, self.level)?;
        
        self.compression_stats.0 += value.len();
        self.compression_stats.1 += compressed.len();
        
        self.data.insert(key.to_string(), compressed);
        Ok(())
    }
    
    fn retrieve(&self, key: &str) -> Result<Option<Vec<u8>>, Box<dyn std::error::Error>> {
        match self.data.get(key) {
            Some(compressed) => {
                let decompressed = decode_all(compressed)?;
                Ok(Some(decompressed))
            }
            None => Ok(None),
        }
    }
    
    fn compression_ratio(&self) -> f64 {
        if self.compression_stats.0 == 0 {
            0.0
        } else {
            (self.compression_stats.1 as f64 / self.compression_stats.0 as f64) * 100.0
        }
    }
}
 
fn main() -> Result<(), Box<dyn std::error::Error>> {
    let mut storage = CompressedStorage::new(3);
    
    storage.store("users", br#"[
        {"id": 1, "name": "Alice", "email": "alice@example.com"},
        {"id": 2, "name": "Bob", "email": "bob@example.com"},
        {"id": 3, "name": "Charlie", "email": "charlie@example.com"}
    ]"#)?;
    
    storage.store("config", b"debug=false\nport=8080\nhost=localhost")?;
    
    let users = storage.retrieve("users")?;
    println!("Retrieved users: {} bytes", users.unwrap().len());
    
    println!("Compression ratio: {:.1}%", storage.compression_ratio());
    
    Ok(())
}

Real-World: Streaming HTTP Response

use zstd::stream::Encoder;
use std::io::{Cursor, Read};
 
struct CompressedResponse {
    buffer: Cursor<Vec<u8>>,
    original_size: usize,
}
 
impl CompressedResponse {
    fn new(data: &[u8], level: i32) -> Result<Self, Box<dyn std::error::Error>> {
        let mut compressed = Vec::new();
        {
            let mut encoder = Encoder::new(&mut compressed, level)?;
            std::io::Write::write_all(&mut encoder, data)?;
            encoder.finish()?;
        }
        
        Ok(CompressedResponse {
            buffer: Cursor::new(compressed),
            original_size: data.len(),
        })
    }
    
    fn compressed_size(&self) -> usize {
        self.buffer.get_ref().len()
    }
    
    fn original_size(&self) -> usize {
        self.original_size
    }
    
    fn content_encoding(&self) -> &'static str {
        "zstd"
    }
}
 
impl Read for CompressedResponse {
    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
        self.buffer.read(buf)
    }
}
 
fn main() -> Result<(), Box<dyn std::error::Error>> {
    let html = br#"
    <!DOCTYPE html>
    <html>
    <head><title>Hello World</title></head>
    <body>
        <h1>Hello, World!</h1>
        <p>This is a sample HTML page that will be compressed with zstd.</p>
    </body>
    </html>
    "#.repeat(100);
    
    let response = CompressedResponse::new(&html, 3)?;
    
    println!("Content-Encoding: {}", response.content_encoding());
    println!("Original size: {} bytes", response.original_size());
    println!("Compressed size: {} bytes", response.compressed_size());
    println!("Compression ratio: {:.1}%", 
             (response.compressed_size() as f64 / response.original_size() as f64) * 100.0);
    
    Ok(())
}

Benchmarking Compression

use zstd::{encode_all, decode_all};
use std::time::Instant;
 
fn benchmark(data: &[u8], level: i32) -> (usize, usize, u128, u128) {
    let start = Instant::now();
    let compressed = encode_all(data, level).unwrap();
    let compress_time = start.elapsed().as_nanos();
    
    let start = Instant::now();
    let _decompressed = decode_all(&compressed).unwrap();
    let decompress_time = start.elapsed().as_nanos();
    
    (data.len(), compressed.len(), compress_time, decompress_time)
}
 
fn main() {
    let data: Vec<u8> = (0..=255u8)
        .cycle()
        .take(100_000)
        .collect();
    
    println!("Benchmark: {} bytes", data.len());
    println!("\n{:^8} {:^10} {:^10} {:^15} {:^15}", 
             "Level", "Size", "Ratio", "Compress (ns)", "Decompress (ns)");
    println!("{:-^8} {:-^10} {:-^10} {:-^15} {:-^15}", "", "", "", "", "");
    
    for level in [1, 3, 6, 10, 15, 19] {
        let (orig, comp, comp_time, decomp_time) = benchmark(&data, level);
        let ratio = (comp as f64 / orig as f64) * 100.0;
        println!(
            "{:>8} {:>10} {:>9.1}% {:>15} {:>15}",
            level, comp, ratio, comp_time, decomp_time
        );
    }
}

Error Handling

use zstd::stream::{Encoder, Decoder};
use zstd::Error;
use std::io::Cursor;
 
fn safe_compress(data: &[u8], level: i32) -> Result<Vec<u8>, String> {
    let mut compressed = Vec::new();
    let mut encoder = Encoder::new(&mut compressed, level)
        .map_err(|e| format!("Failed to create encoder: {}", e))?;
    
    std::io::Write::write_all(&mut encoder, data)
        .map_err(|e| format!("Write error: {}", e))?;
    
    encoder.finish()
        .map_err(|e| format!("Finish error: {}", e))?;
    
    Ok(compressed)
}
 
fn safe_decompress(compressed: &[u8]) -> Result<Vec<u8>, String> {
    let mut decompressed = Vec::new();
    let mut decoder = Decoder::new(compressed)
        .map_err(|e| format!("Failed to create decoder: {}", e))?;
    
    std::io::Read::read_to_end(&mut decoder, &mut decompressed)
        .map_err(|e| format!("Read error: {}", e))?;
    
    Ok(decompressed)
}
 
fn main() {
    let data = b"Test data for safe compression";
    
    match safe_compress(data, 3) {
        Ok(compressed) => {
            println!("Compressed to {} bytes", compressed.len());
            
            match safe_decompress(&compressed) {
                Ok(decompressed) => {
                    assert_eq!(&data[..], &decompressed[..]);
                    println!("Decompression successful!");
                }
                Err(e) => println!("Decompression error: {}", e),
            }
        }
        Err(e) => println!("Compression error: {}", e),
    }
    
    // Test invalid data
    match safe_decompress(b"not valid zstd data") {
        Ok(_) => println!("Unexpected success"),
        Err(e) => println!("Expected error: {}", e),
    }
}

Thread-Safe Compression Pool

use zstd::encode_all;
use std::sync::{Arc, Mutex};
use std::thread;
 
struct CompressionResult {
    id: usize,
    original_size: usize,
    compressed_size: usize,
}
 
fn main() {
    let results: Arc<Mutex<Vec<CompressionResult>>> = Arc::new(Mutex::new(Vec::new()));
    let mut handles = vec![];
    
    for i in 0..4 {
        let results = results.clone();
        
        handles.push(thread::spawn(move || {
            let data: Vec<u8> = (i as u8..).cycle().take(10000).collect();
            let compressed = encode_all(&data[..], 3).unwrap();
            
            let result = CompressionResult {
                id: i,
                original_size: data.len(),
                compressed_size: compressed.len(),
            };
            
            results.lock().unwrap().push(result);
        }));
    }
    
    for handle in handles {
        handle.join().unwrap();
    }
    
    let results = results.lock().unwrap();
    println!("Compression results:");
    for r in results.iter() {
        println!("  Thread {}: {} -> {} bytes ({:.1}%)", 
                 r.id, r.original_size, r.compressed_size,
                 (r.compressed_size as f64 / r.original_size as f64) * 100.0);
    }
}

Summary

  • encode_all(data, level) for simple one-shot compression
  • decode_all(compressed) for simple one-shot decompression
  • Encoder::new(writer, level) for streaming compression
  • Decoder::new(reader) for streaming decompression
  • Always call encoder.finish() to finalize compressed output
  • Compression levels: 1 (fastest) to 22 (best ratio), default is 3
  • Dictionary compression improves ratios for small, similar data
  • Use CParameter for advanced encoder configuration
  • zstd offers better compression than gzip with faster decompression
  • Thread-safe for parallel compression of independent chunks
  • Perfect for: log storage, network protocols, backups, database values, file archives, real-time data compression