How do I compress data with Zstandard (zstd) in Rust?

Walkthrough

The zstd crate provides Rust bindings to Facebook's Zstandard (zstd) compression library. Zstandard offers excellent compression ratios comparable to gzip while being significantly faster — often 2-5x faster at decompression. It's ideal for applications requiring fast compression with good ratios, such as log storage, network protocols, database compression, and file archiving.

Key concepts:

  1. Compression levels — range from 1 (fastest) to 22 (best ratio), with 3 as default
  2. Streaming API — compress/decompress large data without loading entirely into memory
  3. Dictionary compression — pre-trained dictionaries for small, similar data
  4. One-shot API — simple functions for compressing complete buffers
  5. Encoder/Decoder — streaming interfaces wrapping Read and Write

Code Example

# Cargo.toml
[dependencies]
zstd = "0.13"
use zstd::{encode_all, decode_all};
 
fn main() -> std::io::Result<()> {
    let data = b"Hello, World! This is some data to compress.";
    
    // Compress with default level (3)
    let compressed = encode_all(&data[..], 3)?;
    println!("Original: {} bytes", data.len());
    println!("Compressed: {} bytes", compressed.len());
    
    // Decompress
    let decompressed = decode_all(&compressed[..])?;
    println!("Decompressed: {} bytes", decompressed.len());
    
    assert_eq!(data.to_vec(), decompressed);
    Ok(())
}

Basic Compression and Decompression

use zstd::{encode_all, decode_all};
 
fn main() -> std::io::Result<()> {
    let data = b"The quick brown fox jumps over the lazy dog. ".repeat(100);
    
    println!("Original size: {} bytes", data.len());
    
    // Compress at different levels
    for level in [1, 3, 9, 19] {
        let compressed = encode_all(&data[..], level)?;
        let ratio = data.len() as f64 / compressed.len() as f64;
        println!("Level {}: {} bytes (ratio: {:.2}x)", 
            level, compressed.len(), ratio);
    }
    
    // Decompress
    let compressed = encode_all(&data[..], 3)?;
    let decompressed = decode_all(&compressed[..])?;
    
    assert_eq!(data, decompressed.as_slice());
    println!("\nDecompression successful!");
    
    Ok(())
}

Streaming Compression with Encoder

use std::fs::File;
use std::io::{self, prelude::*};
use zstd::Encoder;
 
fn main() -> io::Result<()> {
    // Compress to a file
    let output_file = File::create("compressed.zst")?;
    let mut encoder = Encoder::new(output_file, 3)?;
    
    // Write data in chunks
    for i in 0..10 {
        let chunk = format!("Chunk {} with some data. ", i);
        encoder.write_all(chunk.as_bytes())?;
    }
    
    // Finish compression (important!)
    let output_file = encoder.finish()?;
    drop(output_file);
    
    println!("Compressed data written to compressed.zst");
    
    Ok(())
}

Streaming Decompression with Decoder

use std::fs::File;
use std::io::{self, prelude::*};
use zstd::Decoder;
 
fn main() -> io::Result<()> {
    // Decompress from a file
    let input_file = File::open("compressed.zst")?;
    let mut decoder = Decoder::new(input_file)?;
    
    let mut decompressed = String::new();
    decoder.read_to_string(&mut decompressed)?;
    
    println!("Decompressed content:\n{}", decompressed);
    
    Ok(())
}

Compressing Files

use std::fs::File;
use std::io::{self, prelude::*, BufReader, BufWriter};
use zstd::{Encoder, Decoder};
 
fn compress_file(input_path: &str, output_path: &str, level: i32) -> io::Result<()> {
    let input_file = File::open(input_path)?;
    let output_file = File::create(output_path)?;
    
    let reader = BufReader::new(input_file);
    let mut encoder = Encoder::new(BufWriter::new(output_file), level)?;
    
    // Stream the data
    let bytes = std::io::copy(&mut reader.take(u64::MAX), &mut encoder)?;
    encoder.finish()?;
    
    println!("Compressed {} bytes", bytes);
    Ok(())
}
 
fn decompress_file(input_path: &str, output_path: &str) -> io::Result<()> {
    let input_file = File::open(input_path)?;
    let output_file = File::create(output_path)?;
    
    let mut decoder = Decoder::new(input_file)?;
    let mut writer = BufWriter::new(output_file);
    
    let bytes = std::io::copy(&mut decoder, &mut writer)?;
    writer.flush()?;
    
    println!("Decompressed {} bytes", bytes);
    Ok(())
}
 
fn main() -> io::Result<()> {
    // Create a test file
    let test_content = "Test content for compression. ".repeat(1000);
    std::fs::write("test_input.txt", &test_content)?;
    
    // Compress
    compress_file("test_input.txt", "test_output.zst", 9)?;
    
    // Decompress
    decompress_file("test_output.zst", "test_decompressed.txt")?;
    
    // Verify
    let original = std::fs::read("test_input.txt")?;
    let decompressed = std::fs::read("test_decompressed.txt")?;
    assert_eq!(original, decompressed);
    
    println!("Verification successful!");
    
    // Show sizes
    let original_size = std::fs::metadata("test_input.txt")?.len();
    let compressed_size = std::fs::metadata("test_output.zst")?.len();
    println!("Original: {} bytes", original_size);
    println!("Compressed: {} bytes ({:.1}%)", 
        compressed_size, 
        (compressed_size as f64 / original_size as f64) * 100.0
    );
    
    Ok(())
}

In-Memory Compression

use zstd::{encode_all, decode_all, stream::encode_to_vec};
 
fn main() -> std::io::Result<()> {
    let data = b"This is some data that we want to compress in memory.";
    
    // One-shot compression
    let compressed = encode_all(&data[..], 3)?;
    let decompressed = decode_all(&compressed[..])?;
    
    println!("Original: {} bytes", data.len());
    println!("Compressed: {} bytes", compressed.len());
    println!("Decompressed: {} bytes", decompressed.len());
    
    assert_eq!(data.to_vec(), decompressed);
    
    // Stream to vec
    let mut cursor = std::io::Cursor::new(&data[..]);
    let compressed_stream = encode_to_vec(&mut cursor, 3)?;
    
    println!("\nStreamed compression: {} bytes", compressed_stream.len());
    
    Ok(())
}

Compression Levels Comparison

use zstd::encode_all;
use std::time::Instant;
 
fn main() -> std::io::Result<()> {
    // Generate some test data
    let data: Vec<u8> = (0..1_000_000)
        .map(|i| ((i * 7 + 13) % 256) as u8)
        .collect();
    
    println!("Original data: {} bytes", data.len());
    println!();
    println!("{:<8} {:<12} {:<12} {:<10} {:<12}", 
        "Level", "Compressed", "Ratio", "Time", "MB/s");
    println!("{}", "-".repeat(54));
    
    for level in [1, 2, 3, 5, 7, 9, 12, 15, 19, 22] {
        let start = Instant::now();
        let compressed = encode_all(&data[..], level)?;
        let duration = start.elapsed();
        
        let ratio = data.len() as f64 / compressed.len() as f64;
        let mb_per_sec = (data.len() as f64 / 1_000_000.0) / duration.as_secs_f64();
        
        println!("{:<8} {:<12} {:<12.2} {:<10} {:<12.1}", 
            level, 
            format!("{} bytes", compressed.len()),
            ratio,
            format!("{:?}", duration),
            mb_per_sec
        );
    }
    
    Ok(())
}

Working with Custom Buffers

use zstd::{Encoder, Decoder};
use std::io::{self, Cursor, Read, Write};
 
fn compress_to_buffer(data: &[u8], level: i32) -> io::Result<Vec<u8>> {
    let mut output = Vec::new();
    {
        let mut encoder = Encoder::new(&mut output, level)?;
        encoder.write_all(data)?;
        encoder.finish()?;
    }
    Ok(output)
}
 
fn decompress_from_buffer(compressed: &[u8]) -> io::Result<Vec<u8>> {
    let mut decoder = Decoder::new(Cursor::new(compressed))?;
    let mut output = Vec::new();
    decoder.read_to_end(&mut output)?;
    Ok(output)
}
 
fn main() -> io::Result<()> {
    let data = b"Some important data that needs compression.";
    
    let compressed = compress_to_buffer(data, 9)?;
    let decompressed = decompress_from_buffer(&compressed)?;
    
    println!("Original: {} bytes", data.len());
    println!("Compressed: {} bytes", compressed.len());
    
    assert_eq!(data.to_vec(), decompressed);
    println!("Round-trip successful!");
    
    Ok(())
}

Dictionary Compression

use zstd::{encode_all, decode_all, dict::EncoderDictionary, dict::DecoderDictionary, Encoder, Decoder};
use std::io::{Cursor, Read};
 
fn main() -> std::io::Result<()> {
    // Training data - similar small samples
    let training_data: Vec<&[u8]> = vec![
        b"User: alice, Action: login, Time: 10:00".as_slice(),
        b"User: bob, Action: logout, Time: 10:05".as_slice(),
        b"User: alice, Action: purchase, Time: 10:10".as_slice(),
        b"User: charlie, Action: login, Time: 10:15".as_slice(),
        b"User: bob, Action: purchase, Time: 10:20".as_slice(),
        b"User: alice, Action: logout, Time: 10:25".as_slice(),
        b"User: charlie, Action: purchase, Time: 10:30".as_slice(),
        b"User: bob, Action: login, Time: 10:35".as_slice(),
    ];
    
    // Flatten training data
    let flat_training: Vec<u8> = training_data.iter()
        .flat_map(|s| s.iter().copied())
        .collect();
    
    // Create dictionary
    let dictionary = zstd::dict::from_buffer(&flat_training, 1024, 0)?;
    println!("Dictionary size: {} bytes", dictionary.len());
    
    // Create encoder and decoder dictionaries
    let encoder_dict = EncoderDictionary::copy(&dictionary);
    let decoder_dict = DecoderDictionary::copy(&dictionary);
    
    // Test data (similar to training data)
    let test_data = b"User: diana, Action: login, Time: 10:40";
    
    // Compress with dictionary
    let compressed_with_dict = {
        let mut output = Vec::new();
        let mut encoder = Encoder::with_prepared_dictionary(&mut output, &encoder_dict, 3)?;
        encoder.write_all(test_data)?;
        encoder.finish()?;
        output
    };
    
    // Compress without dictionary
    let compressed_without_dict = encode_all(&test_data[..], 3)?;
    
    println!("\nOriginal: {} bytes", test_data.len());
    println!("Without dict: {} bytes", compressed_without_dict.len());
    println!("With dict: {} bytes", compressed_with_dict.len());
    
    // Decompress with dictionary
    let mut decoder = Decoder::with_prepared_dictionary(Cursor::new(&compressed_with_dict), &decoder_dict)?;
    let mut decompressed = Vec::new();
    decoder.read_to_end(&mut decompressed)?;
    
    assert_eq!(test_data.to_vec(), decompressed);
    println!("Decompression successful!");
    
    Ok(())
}

Chunked Processing

use zstd::{Encoder, Decoder};
use std::io::{self, Read, Write};
 
struct ChunkCompressor {
    chunk_size: usize,
    level: i32,
}
 
impl ChunkCompressor {
    fn new(chunk_size: usize, level: i32) -> Self {
        Self { chunk_size, level }
    }
    
    fn compress_chunks(&self, data: &[u8]) -> io::Result<Vec<Vec<u8>>> {
        let mut compressed_chunks = Vec::new();
        
        for chunk in data.chunks(self.chunk_size) {
            let mut output = Vec::new();
            let mut encoder = Encoder::new(&mut output, self.level)?;
            encoder.write_all(chunk)?;
            encoder.finish()?;
            compressed_chunks.push(output);
        }
        
        Ok(compressed_chunks)
    }
    
    fn decompress_chunks(&self, compressed_chunks: &[Vec<u8>]) -> io::Result<Vec<u8>> {
        let mut decompressed = Vec::new();
        
        for chunk in compressed_chunks {
            let mut decoder = Decoder::new(Cursor::new(chunk))?;
            let mut buffer = Vec::new();
            decoder.read_to_end(&mut buffer)?;
            decompressed.extend(buffer);
        }
        
        Ok(decompressed)
    }
}
 
fn main() -> io::Result<()> {
    // Generate test data
    let data: Vec<u8> = (0..100_000)
        .map(|i| ((i % 256) as u8))
        .collect();
    
    let compressor = ChunkCompressor::new(10_000, 3);
    
    // Compress in chunks
    let compressed_chunks = compressor.compress_chunks(&data)?;
    println!("Created {} compressed chunks", compressed_chunks.len());
    
    // Show sizes
    let total_compressed: usize = compressed_chunks.iter().map(|c| c.len()).sum();
    println!("Total compressed: {} bytes (from {} bytes)", 
        total_compressed, data.len());
    
    // Decompress
    let decompressed = compressor.decompress_chunks(&compressed_chunks)?;
    assert_eq!(data, decompressed);
    
    println!("Round-trip successful!");
    
    Ok(())
}

Real-World Example: Log Compressor

use std::fs::{self, File};
use std::io::{self, BufWriter, Write};
use std::path::{Path, PathBuf};
use zstd::Encoder;
use chrono::Local;
 
// Note: Add chrono = "0.4" to Cargo.toml
 
struct LogCompressor {
    output_dir: PathBuf,
    current_file: Option<Encoder<BufWriter<File>>>,
    current_date: String,
    level: i32,
    bytes_written: u64,
    bytes_compressed: u64,
}
 
impl LogCompressor {
    fn new(output_dir: impl AsRef<Path>, level: i32) -> io::Result<Self> {
        fs::create_dir_all(&output_dir)?;
        Ok(Self {
            output_dir: output_dir.as_ref().to_path_buf(),
            current_file: None,
            current_date: String::new(),
            level,
            bytes_written: 0,
            bytes_compressed: 0,
        })
    }
    
    fn write_log(&mut self, entry: &str) -> io::Result<()> {
        let today = Local::now().format("%Y-%m-%d").to_string();
        
        // Rotate file if date changed
        if today != self.current_date {
            self.rotate_file(&today)?;
            self.current_date = today;
        }
        
        // Create file if needed
        if self.current_file.is_none() {
            let filename = format!("logs-{}.zst", self.current_date);
            let path = self.output_dir.join(&filename);
            let file = File::create(&path)?;
            let encoder = Encoder::new(BufWriter::new(file), self.level)?;
            self.current_file = Some(encoder);
        }
        
        // Write entry
        if let Some(ref mut encoder) = self.current_file {
            let bytes = entry.as_bytes();
            encoder.write_all(bytes)?;
            encoder.write_all(b"\n")?;
            self.bytes_written += bytes.len() as u64 + 1;
        }
        
        Ok(())
    }
    
    fn rotate_file(&mut self, new_date: &str) -> io::Result<()> {
        if let Some(encoder) = self.current_file.take() {
            let writer = encoder.finish()?;
            writer.flush()?;
        }
        self.bytes_compressed = 0;
        println!("Rotated log file for {}", new_date);
        Ok(())
    }
    
    fn finish(&mut self) -> io::Result<()> {
        if let Some(encoder) = self.current_file.take() {
            let writer = encoder.finish()?;
            writer.flush()?;
        }
        Ok(())
    }
    
    fn stats(&self) -> (u64, u64) {
        (self.bytes_written, self.bytes_compressed)
    }
}
 
impl Drop for LogCompressor {
    fn drop(&mut self) {
        let _ = self.finish();
    }
}
 
fn main() -> io::Result<()> {
    let mut compressor = LogCompressor::new("compressed_logs", 9)?;
    
    let log_entries = vec![
        "2024-01-15 10:00:00 INFO Application started",
        "2024-01-15 10:00:01 DEBUG Loading configuration",
        "2024-01-15 10:00:02 INFO Server listening on port 8080",
        "2024-01-15 10:00:03 WARN High memory usage detected",
        "2024-01-15 10:00:04 ERROR Connection failed: timeout",
        "2024-01-15 10:00:05 INFO Retrying connection...",
        "2024-01-15 10:00:06 INFO Connection established",
    ];
    
    for entry in &log_entries {
        compressor.write_log(entry)?;
    }
    
    compressor.finish()?;
    
    println!("Log compression complete!");
    Ok(())
}

Real-World Example: Network Protocol

use zstd::{Encoder, Decoder};
use std::io::{self, Cursor, Read, Write};
 
struct CompressedMessage {
    message_type: u8,
    payload: Vec<u8>,
}
 
impl CompressedMessage {
    fn new(message_type: u8, payload: &[u8]) -> Self {
        Self {
            message_type,
            payload: payload.to_vec(),
        }
    }
    
    fn encode(&self, level: i32) -> io::Result<Vec<u8>> {
        let mut output = Vec::new();
        
        // Message type (1 byte)
        output.push(self.message_type);
        
        // Compressed payload
        let mut encoder = Encoder::new(&mut output, level)?;
        encoder.write_all(&self.payload)?;
        encoder.finish()?;
        
        Ok(output)
    }
    
    fn decode(data: &[u8]) -> io::Result<Self> {
        if data.is_empty() {
            return Err(io::Error::new(io::ErrorKind::InvalidData, "Empty message"));
        }
        
        let message_type = data[0];
        let mut decoder = Decoder::new(Cursor::new(&data[1..]))?;
        let mut payload = Vec::new();
        decoder.read_to_end(&mut payload)?;
        
        Ok(Self {
            message_type,
            payload,
        })
    }
}
 
fn main() -> io::Result<()> {
    let original = CompressedMessage::new(42, b"Hello, this is a message payload that will be compressed!");
    
    println!("Original payload: {} bytes", original.payload.len());
    
    // Encode
    let encoded = original.encode(9)?;
    println!("Encoded message: {} bytes", encoded.len());
    
    // Decode
    let decoded = CompressedMessage::decode(&encoded)?;
    println!("Decoded payload: {} bytes", decoded.payload.len());
    
    assert_eq!(original.message_type, decoded.message_type);
    assert_eq!(original.payload, decoded.payload);
    
    println!("Round-trip successful!");
    
    Ok(())
}

Real-World Example: Database Value Compression

use zstd::{encode_all, decode_all};
use std::collections::HashMap;
 
#[derive(Clone, PartialEq)]
struct CompressedValue {
    compressed: Vec<u8>,
    original_size: usize,
}
 
impl CompressedValue {
    fn new(data: &[u8], level: i32) -> std::io::Result<Self> {
        let compressed = encode_all(data, level)?;
        Ok(Self {
            compressed,
            original_size: data.len(),
        })
    }
    
    fn decompress(&self) -> std::io::Result<Vec<u8>> {
        decode_all(&self.compressed[..])
    }
    
    fn compression_ratio(&self) -> f64 {
        self.original_size as f64 / self.compressed.len() as f64
    }
    
    fn size_saved(&self) -> usize {
        self.original_size.saturating_sub(self.compressed.len())
    }
}
 
struct CompressedStore {
    data: HashMap<String, CompressedValue>,
    compression_level: i32,
    total_original: usize,
    total_compressed: usize,
}
 
impl CompressedStore {
    fn new(compression_level: i32) -> Self {
        Self {
            data: HashMap::new(),
            compression_level,
            total_original: 0,
            total_compressed: 0,
        }
    }
    
    fn insert(&mut self, key: &str, value: &[u8]) -> std::io::Result<()> {
        let compressed = CompressedValue::new(value, self.compression_level)?;
        
        self.total_original += value.len();
        self.total_compressed += compressed.compressed.len();
        
        self.data.insert(key.to_string(), compressed);
        Ok(())
    }
    
    fn get(&self, key: &str) -> std::io::Result<Option<Vec<u8>>> {
        match self.data.get(key) {
            Some(compressed) => Ok(Some(compressed.decompress()?)),
            None => Ok(None),
        }
    }
    
    fn stats(&self) -> (usize, usize, f64) {
        let ratio = self.total_original as f64 / self.total_compressed as f64;
        (self.total_original, self.total_compressed, ratio)
    }
}
 
fn main() -> std::io::Result<()> {
    let mut store = CompressedStore::new(9);
    
    // Store some data
    let entries = vec![
        ("user:1:profile", br#"{"name":"Alice","email":"alice@example.com","age":30,"bio":"Software engineer with 10 years of experience in Rust and systems programming."}"#),
        ("user:2:profile", br#"{"name":"Bob","email":"bob@example.com","age":25,"bio":"Full-stack developer passionate about web technologies and cloud architecture."}"#),
        ("config:app", br#"{"debug":false,"max_connections":100,"timeout":30,"retry_count":3,"features":["auth","logging","metrics"]}"#),
    ];
    
    for (key, value) in &entries {
        store.insert(key, value)?;
    }
    
    // Retrieve
    let profile = store.get("user:1:profile")?;
    println!("Retrieved: {}", String::from_utf8_lossy(profile.unwrap().as_slice()));
    
    // Stats
    let (original, compressed, ratio) = store.stats();
    println!("\nStats:");
    println!("  Original: {} bytes", original);
    println!("  Compressed: {} bytes", compressed);
    println!("  Ratio: {:.2}x", ratio);
    println!("  Saved: {} bytes", original - compressed);
    
    Ok(())
}

Error Handling

use zstd::{encode_all, decode_all};
use std::io;
 
fn safe_compress(data: &[u8], level: i32) -> io::Result<Vec<u8>> {
    // Validate compression level
    if level < 1 || level > 22 {
        return Err(io::Error::new(
            io::ErrorKind::InvalidInput,
            format!("Invalid compression level: {}. Must be 1-22.", level)
        ));
    }
    
    // Validate data size
    if data.is_empty() {
        return Ok(Vec::new());
    }
    
    encode_all(data, level)
}
 
fn safe_decompress(compressed: &[u8], max_size: usize) -> io::Result<Vec<u8>> {
    if compressed.is_empty() {
        return Ok(Vec::new());
    }
    
    let decompressed = decode_all(compressed)?;
    
    // Check for decompression bomb
    if decompressed.len() > max_size {
        return Err(io::Error::new(
            io::ErrorKind::InvalidData,
            format!("Decompressed data too large: {} bytes (max: {})", 
                decompressed.len(), max_size)
        ));
    }
    
    Ok(decompressed)
}
 
fn main() -> io::Result<()> {
    let data = b"Some test data for compression";
    
    // Test valid compression
    let compressed = safe_compress(data, 3)?;
    let decompressed = safe_decompress(&compressed, 1_000_000)?;
    assert_eq!(data.to_vec(), decompressed);
    println!("Round-trip successful!");
    
    // Test invalid level
    match safe_compress(data, 30) {
        Ok(_) => println!("Unexpected success"),
        Err(e) => println!("Expected error: {}", e),
    }
    
    // Test with small max size
    match safe_decompress(&compressed, 5) {
        Ok(_) => println!("Unexpected success"),
        Err(e) => println!("Expected error: {}", e),
    }
    
    Ok(())
}

Integration with Other Libraries

use zstd::{Encoder, Decoder};
use std::io::{self, Cursor, Read, Write};
use flate2::{Compress, Decompress, Compression};
use flate2::read::{ZlibEncoder, ZlibDecoder};
 
// Note: Add flate2 = "1.0" to Cargo.toml
 
fn compare_compression(data: &[u8]) -> io::Result<()> {
    // Zstandard compression
    let zstd_compressed = {
        let mut output = Vec::new();
        let mut encoder = Encoder::new(&mut output, 3)?;
        encoder.write_all(data)?;
        encoder.finish()?;
        output
    };
    
    // zlib compression
    let zlib_compressed = {
        let mut encoder = ZlibEncoder::new(Cursor::new(data), Compression::default());
        let mut output = Vec::new();
        encoder.read_to_end(&mut output)?;
        output
    };
    
    println!("Original: {} bytes", data.len());
    println!("zstd:     {} bytes", zstd_compressed.len());
    println!("zlib:     {} bytes", zlib_compressed.len());
    
    Ok(())
}
 
fn main() -> io::Result<()> {
    let data = b"The quick brown fox jumps over the lazy dog. ".repeat(100);
    compare_compression(&data)
}

Summary

  • Use encode_all() and decode_all() for simple one-shot compression/decompression
  • Use Encoder and Decoder for streaming large data without loading into memory
  • Compression levels range from 1 (fastest) to 22 (best ratio), with 3 as default
  • Zstandard offers faster decompression than gzip with similar compression ratios
  • Dictionary compression improves ratios for small, similar data (like log entries)
  • Always call encoder.finish() to finalize compression
  • Use Encoder::new(writer, level) and Decoder::new(reader) for streaming
  • For chunked data, compress each chunk separately or use a single streaming encoder
  • Ideal for: log compression, network protocols, database storage, file archiving
  • Handles both Read and Write traits for easy integration
  • Validate decompressed size limits to prevent decompression bombs