How do I compress data with Zstandard in Rust?

Walkthrough

The zstd crate provides Rust bindings for the Zstandard (zstd) compression algorithm, a fast lossless compression algorithm with compression ratios comparable to gzip but with much faster decompression speeds. Zstandard offers a wide range of compression levels (1-22), supports streaming compression, dictionary compression for small data, and multi-threaded compression. It's ideal for file compression, network protocols, database storage, and anywhere you need efficient compression with fast decompression.

Key concepts:

  1. Compression levels — level 1 (fastest) to level 22 (best ratio), default is 3
  2. Streaming API — compress/decompress data in chunks without loading entirely into memory
  3. Dictionary compression — train dictionaries for better compression on small, similar data
  4. Multi-threading — parallel compression for large files
  5. Frame format — zstd frames can be concatenated and decompressed together

Code Example

# Cargo.toml
[dependencies]
zstd = "0.13"
use zstd::{encode_all, decode_all};
use std::io::Cursor;
 
fn main() -> Result<(), Box<dyn std::error::Error>> {
    let data = b"Hello, Zstandard compression! This is a test message.";
    
    // Compress
    let compressed = encode_all(Cursor::new(data), 3)?;
    println!("Original: {} bytes", data.len());
    println!("Compressed: {} bytes", compressed.len());
    
    // Decompress
    let decompressed = decode_all(Cursor::new(compressed))?;
    println!("Decompressed: {} bytes", decompressed.len());
    assert_eq!(data.to_vec(), decompressed);
    
    Ok(())
}

Basic Compression and Decompression

use zstd::{encode_all, decode_all};
use std::io::Cursor;
 
fn main() -> Result<(), Box<dyn std::error::Error>> {
    let original = b"This is some text that will be compressed. 
                      The more repetitive the content, the better the compression.";
    
    // Compress at level 3 (default)
    let compressed = encode_all(Cursor::new(original), 3)?;
    println!("Original size: {} bytes", original.len());
    println!("Compressed size: {} bytes", compressed.len());
    println!("Compression ratio: {:.2}", original.len() as f64 / compressed.len() as f64);
    
    // Decompress
    let decompressed = decode_all(Cursor::new(compressed))?;
    assert_eq!(original.to_vec(), decompressed);
    println!("Decompression successful!");
    
    Ok(())
}

Compression Levels

use zstd::{encode_all, decode_all};
use std::io::Cursor;
 
fn main() -> Result<(), Box<dyn std::error::Error>> {
    // Create some compressible data
    let data = "Hello, World! ".repeat(1000);
    let original = data.as_bytes();
    
    println!("Original size: {} bytes", original.len());
    println!("\nCompression level comparison:");
    
    for level in [1, 3, 5, 10, 15, 19, 22] {
        let compressed = encode_all(Cursor::new(original), level)?;
        let ratio = original.len() as f64 / compressed.len() as f64;
        println!("  Level {:2}: {:6} bytes (ratio: {:.2}x)", 
                 level, compressed.len(), ratio);
    }
    
    Ok(())
}

Streaming Compression

use zstd::stream::{Encoder, Decoder};
use std::io::{self, Read, Write, Cursor};
 
fn main() -> io::Result<()> {
    // Compress using streaming
    let source_data = b"This is a longer piece of text that we will compress 
                        using streaming compression. Streaming is useful for 
                        large data that doesn't fit in memory.";
    
    let mut source = Cursor::new(source_data);
    let mut compressed = Vec::new();
    
    {
        let mut encoder = Encoder::new(&mut compressed, 3)?;
        io::copy(&mut source, &mut encoder)?;
        encoder.finish()?;
    }
    
    println!("Original: {} bytes", source_data.len());
    println!("Compressed: {} bytes", compressed.len());
    
    // Decompress using streaming
    let mut decompressed = Vec::new();
    let mut compressed_reader = Cursor::new(&compressed);
    
    {
        let mut decoder = Decoder::new(&mut compressed_reader)?;
        io::copy(&mut decoder, &mut decompressed)?;
    }
    
    assert_eq!(source_data.to_vec(), decompressed);
    println!("Streaming decompression successful!");
    
    Ok(())
}

Compressing Files

use zstd::stream::{Encoder, Decoder};
use std::fs::File;
use std::io::{self, BufReader, BufWriter, Copy};
 
fn compress_file(input_path: &str, output_path: &str, level: i32) -> io::Result<()> {
    let input = File::open(input_path)?;
    let output = File::create(output_path)?;
    
    let mut reader = BufReader::new(input);
    let mut writer = BufWriter::new(output);
    
    let mut encoder = Encoder::new(&mut writer, level)?;
    io::copy(&mut reader, &mut encoder)?;
    encoder.finish()?;
    
    println!("Compressed {} -> {}", input_path, output_path);
    Ok(())
}
 
fn decompress_file(input_path: &str, output_path: &str) -> io::Result<()> {
    let input = File::open(input_path)?;
    let output = File::create(output_path)?;
    
    let mut reader = BufReader::new(input);
    let mut writer = BufWriter::new(output);
    
    let mut decoder = Decoder::new(&mut reader)?;
    io::copy(&mut decoder, &mut writer)?;
    
    println!("Decompressed {} -> {}", input_path, output_path);
    Ok(())
}
 
fn main() -> io::Result<()> {
    // Create a test file
    let test_content = "Hello, Zstandard! ".repeat(100);
    std::fs::write("test_input.txt", &test_content)?;
    
    // Compress
    compress_file("test_input.txt", "test_output.zst", 3)?;
    
    // Get file sizes
    let original_size = std::fs::metadata("test_input.txt")?.len();
    let compressed_size = std::fs::metadata("test_output.zst")?.len();
    println!("Compression ratio: {:.2}x", original_size as f64 / compressed_size as f64);
    
    // Decompress
    decompress_file("test_output.zst", "test_decompressed.txt")?;
    
    // Verify
    let decompressed_content = std::fs::read_to_string("test_decompressed.txt")?;
    assert_eq!(test_content, decompressed_content);
    println!("Files match!");
    
    // Cleanup
    std::fs::remove_file("test_input.txt")?;
    std::fs::remove_file("test_output.zst")?;
    std::fs::remove_file("test_decompressed.txt")?;
    
    Ok(())
}

Multi-threaded Compression

use zstd::stream::Encoder;
use std::io::{self, Cursor};
 
fn main() -> io::Result<()> {
    // Create large data
    let data: Vec<u8> = (0..10_000_000)
        .map(|i| (i % 256) as u8)
        .collect();
    
    println!("Original size: {} MB", data.len() / 1_000_000);
    
    // Single-threaded compression
    let mut compressed_single = Vec::new();
    let start = std::time::Instant::now();
    {
        let mut encoder = Encoder::new(&mut compressed_single, 3)?;
        io::copy(&mut Cursor::new(&data), &mut encoder)?;
        encoder.finish()?;
    }
    let single_time = start.elapsed();
    println!("Single-threaded: {:?} ({} bytes)", single_time, compressed_single.len());
    
    // Multi-threaded compression (requires 'threads' feature)
    // Note: This example shows the API; actual threading requires
    // the 'threads' feature in Cargo.toml
    let mut compressed_multi = Vec::new();
    let start = std::time::Instant::now();
    {
        let mut encoder = Encoder::new(&mut compressed_multi, 3)?;
        // In practice, use .multithread(num_threads) with threads feature
        io::copy(&mut Cursor::new(&data), &mut encoder)?;
        encoder.finish()?;
    }
    let multi_time = start.elapsed();
    println!("Compressed: {:?} ({} bytes)", multi_time, compressed_multi.len());
    
    Ok(())
}

Dictionary Compression

use zstd::{encode_all, decode_all, dict::EncoderDictionary, dict::DecoderDictionary};
use std::io::Cursor;
 
fn train_dictionary(samples: &[&[u8]], dict_size: usize) -> Vec<u8> {
    use zstd::dict::from_samples;
    from_samples(samples, dict_size).expect("Failed to train dictionary")
}
 
fn main() -> Result<(), Box<dyn std::error::Error>> {
    // Small similar data samples (dictionaries help most with small, similar data)
    let samples: Vec<&[u8]> = vec![
        b"user_id=123&action=login&timestamp=2024-01-01T10:00:00",
        b"user_id=456&action=logout&timestamp=2024-01-01T11:00:00",
        b"user_id=789&action=click&timestamp=2024-01-01T12:00:00",
        b"user_id=101&action=purchase&timestamp=2024-01-01T13:00:00",
        b"user_id=202&action=view&timestamp=2024-01-01T14:00:00",
    ];
    
    // Train a dictionary
    let dict = train_dictionary(&samples, 1024);
    println!("Dictionary size: {} bytes", dict.len());
    
    // Create encoder/decoder with dictionary
    let encoder_dict = EncoderDictionary::copy(&dict, 3);
    let decoder_dict = DecoderDictionary::copy(&dict);
    
    // New data to compress (similar format)
    let new_data = b"user_id=999&action=comment&timestamp=2024-01-01T15:00:00";
    
    // Compress with dictionary
    let compressed_with_dict = zstd::stream::encode_all(
        Cursor::new(new_data),
        3
    )?;
    
    println!("\nOriginal: {} bytes", new_data.len());
    println!("Compressed: {} bytes", compressed_with_dict.len());
    
    // Decompress
    let decompressed = decode_all(Cursor::new(&compressed_with_dict))?;
    assert_eq!(new_data.to_vec(), decompressed);
    
    println!("Dictionary compression successful!");
    
    Ok(())
}

Real-World: Log Compression

use zstd::stream::Encoder;
use std::io::{self, Write};
use std::fs::File;
 
struct LogCompressor {
    encoder: Encoder<File>,
    entry_count: u64,
}
 
impl LogCompressor {
    fn create(path: &str, level: i32) -> io::Result<Self> {
        let file = File::create(path)?;
        let encoder = Encoder::new(file, level)?;
        Ok(Self {
            encoder,
            entry_count: 0,
        })
    }
    
    fn write_entry(&mut self, timestamp: &str, level: &str, message: &str) -> io::Result<()> {
        writeln!(self.encoder, "[{}] [{}] {}", timestamp, level, message)?;
        self.entry_count += 1;
        Ok(())
    }
    
    fn finish(self) -> io::Result<u64> {
        self.encoder.finish()?;
        Ok(self.entry_count)
    }
}
 
fn main() -> io::Result<()> {
    let mut compressor = LogCompressor::create("app.log.zst", 3)?;
    
    // Write many log entries
    for i in 0..1000 {
        compressor.write_entry(
            "2024-01-01T12:00:00",
            "INFO",
            &format!("Processing request {}", i)
        )?;
    }
    
    compressor.write_entry("2024-01-01T12:01:00", "ERROR", "Connection failed")?;
    compressor.write_entry("2024-01-01T12:02:00", "WARN", "Rate limit approaching")?;
    
    let count = compressor.finish()?;
    println!("Wrote {} log entries", count);
    
    // Read back
    use zstd::stream::Decoder;
    let file = File::open("app.log.zst")?;
    let mut decoder = Decoder::new(file)?;
    let mut content = String::new();
    std::io::Read::read_to_string(&mut decoder, &mut content)?;
    
    println!("Decompressed {} characters", content.len());
    println!("First line: {}", content.lines().next().unwrap_or(""));
    
    std::fs::remove_file("app.log.zst")?;
    Ok(())
}

Real-World: Backup Compression

use zstd::stream::{Encoder, Decoder};
use std::fs::File;
use std::io::{self, BufReader, BufWriter};
use std::path::Path;
 
fn compress_backup(source: &Path, dest: &Path, level: i32) -> io::Result<()> {
    let input = File::open(source)?;
    let output = File::create(dest)?;
    
    let mut reader = BufReader::new(input);
    let mut writer = BufWriter::new(output);
    
    let mut encoder = Encoder::new(&mut writer, level)?;
    io::copy(&mut reader, &mut encoder)?;
    encoder.finish()?;
    
    Ok(())
}
 
fn decompress_backup(source: &Path, dest: &Path) -> io::Result<()> {
    let input = File::open(source)?;
    let output = File::create(dest)?;
    
    let mut reader = BufReader::new(input);
    let mut writer = BufWriter::new(output);
    
    let mut decoder = Decoder::new(&mut reader)?;
    io::copy(&mut decoder, &mut writer)?;
    
    Ok(())
}
 
fn main() -> io::Result<()> {
    // Create test backup
    let backup_content = "Database backup v1.0\n".repeat(1000);
    std::fs::write("backup.dat", &backup_content)?;
    
    let original_size = std::fs::metadata("backup.dat")?.len();
    println!("Original: {} bytes", original_size);
    
    // Compress
    compress_backup(Path::new("backup.dat"), Path::new("backup.dat.zst"), 10)?;
    let compressed_size = std::fs::metadata("backup.dat.zst")?.len();
    println!("Compressed: {} bytes ({:.1}% reduction)", 
             compressed_size,
             100.0 * (1.0 - compressed_size as f64 / original_size as f64));
    
    // Decompress and verify
    decompress_backup(Path::new("backup.dat.zst"), Path::new("backup_restored.dat"))?;
    
    let restored = std::fs::read_to_string("backup_restored.dat")?;
    assert_eq!(backup_content, restored);
    println!("Backup restored successfully!");
    
    // Cleanup
    std::fs::remove_file("backup.dat")?;
    std::fs::remove_file("backup.dat.zst")?;
    std::fs::remove_file("backup_restored.dat")?;
    
    Ok(())
}

Real-World: Network Compression

use zstd::{encode_all, decode_all};
use std::io::Cursor;
 
struct Message {
    id: u32,
    payload: String,
}
 
impl Message {
    fn new(id: u32, payload: &str) -> Self {
        Self { id, payload: payload.to_string() }
    }
    
    fn serialize(&self) -> Vec<u8> {
        let mut data = Vec::new();
        data.extend_from_slice(&self.id.to_be_bytes());
        data.extend_from_slice(self.payload.as_bytes());
        data
    }
    
    fn deserialize(data: &[u8]) -> Self {
        let id = u32::from_be_bytes([data[0], data[1], data[2], data[3]]);
        let payload = String::from_utf8_lossy(&data[4..]).to_string();
        Self { id, payload }
    }
    
    fn compress(&self, level: i32) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
        let data = self.serialize();
        Ok(encode_all(Cursor::new(&data), level)?)
    }
    
    fn decompress(data: &[u8]) -> Result<Self, Box<dyn std::error::Error>> {
        let decompressed = decode_all(Cursor::new(data))?;
        Ok(Self::deserialize(&decompressed))
    }
}
 
fn main() -> Result<(), Box<dyn std::error::Error>> {
    // Create a large message
    let message = Message::new(1, &"Hello, Network! ".repeat(100));
    
    println!("Original payload: {} bytes", message.payload.len());
    
    // Compress for transmission
    let compressed = message.compress(3)?;
    println!("Compressed: {} bytes", compressed.len());
    
    // Simulate network transmission
    let received_data = compressed;
    
    // Decompress on receiver
    let received_message = Message::decompress(&received_data)?;
    println!("Received message ID: {}", received_message.id);
    println!("Received payload length: {}", received_message.payload.len());
    
    assert_eq!(message.id, received_message.id);
    assert_eq!(message.payload, received_message.payload);
    
    Ok(())
}

Real-World: Chunked Compression

use zstd::stream::{Encoder, Decoder};
use std::io::{self, Read, Cursor};
 
const CHUNK_SIZE: usize = 1024;
 
struct ChunkedCompressor {
    buffer: Vec<u8>,
}
 
impl ChunkedCompressor {
    fn new() -> Self {
        Self { buffer: Vec::new() }
    }
    
    fn compress_chunks(&mut self, data: &[u8]) -> io::Result<Vec<u8>> {
        let mut compressed = Vec::new();
        {
            let mut encoder = Encoder::new(&mut compressed, 3)?;
            
            for chunk in data.chunks(CHUNK_SIZE) {
                io::Write::write_all(&mut encoder, chunk)?;
            }
            
            encoder.finish()?;
        }
        Ok(compressed)
    }
    
    fn decompress_chunks(&mut self, compressed: &[u8]) -> io::Result<Vec<u8>> {
        let mut decompressed = Vec::new();
        let cursor = Cursor::new(compressed);
        let mut decoder = Decoder::new(cursor)?;
        
        decoder.read_to_end(&mut decompressed)?;
        Ok(decompressed)
    }
}
 
fn main() -> io::Result<()> {
    // Create large data
    let data: Vec<u8> = (0..100_000)
        .map(|i| (i % 256) as u8)
        .collect();
    
    let mut compressor = ChunkedCompressor::new();
    
    // Compress
    let compressed = compressor.compress_chunks(&data)?;
    println!("Original: {} bytes", data.len());
    println!("Compressed: {} bytes", compressed.len());
    
    // Decompress
    let decompressed = compressor.decompress_chunks(&compressed)?;
    assert_eq!(data, decompressed);
    println!("Chunked compression verified!");
    
    Ok(())
}

In-Memory Compression

use zstd::{encode_all, decode_all};
use std::io::Cursor;
 
fn compress_data(data: &[u8], level: i32) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
    Ok(encode_all(Cursor::new(data), level)?)
}
 
fn decompress_data(compressed: &[u8]) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
    Ok(decode_all(Cursor::new(compressed))?)
}
 
fn main() -> Result<(), Box<dyn std::error::Error>> {
    let original = b"In-memory compression example with repeated text. ".repeat(50);
    
    // Compress
    let compressed = compress_data(&original, 5)?;
    println!("Original: {} bytes", original.len());
    println!("Compressed: {} bytes", compressed.len());
    println!("Ratio: {:.2}x", original.len() as f64 / compressed.len() as f64);
    
    // Decompress
    let decompressed = decompress_data(&compressed)?;
    assert_eq!(original.to_vec(), decompressed);
    
    Ok(())
}

Getting Compression Info

use zstd::decode_all;
use std::io::Cursor;
 
fn main() -> Result<(), Box<dyn std::error::Error>> {
    let data = b"Some data to compress for demonstration.";
    let compressed = zstd::encode_all(Cursor::new(data), 3)?;
    
    // Zstandard frame header information
    println!("Compressed data:");
    println!("  Length: {} bytes", compressed.len());
    
    // Magic number check
    if compressed.len() >= 4 {
        let magic = u32::from_le_bytes([compressed[0], compressed[1], compressed[2], compressed[3]]);
        println!("  Magic number: 0x{:08X}", magic);
        // Zstandard magic number is 0xFD2FB528
        if magic == 0x28B52FFD {
            println!("  Valid Zstandard frame!");
        }
    }
    
    // Decompress to verify
    let decompressed = decode_all(Cursor::new(&compressed))?;
    println!("  Decompressed size: {} bytes", decompressed.len());
    
    Ok(())
}

Error Handling

use zstd::stream::Decoder;
use std::io::{Cursor, self};
 
fn decompress_safe(compressed: &[u8]) -> Result<Vec<u8>, String> {
    let cursor = Cursor::new(compressed);
    let mut decoder = Decoder::new(cursor)
        .map_err(|e| format!("Failed to create decoder: {}", e))?;
    
    let mut decompressed = Vec::new();
    decoder.read_to_end(&mut decompressed)
        .map_err(|e| format!("Failed to decompress: {}", e))?;
    
    Ok(decompressed)
}
 
fn main() {
    // Invalid compressed data
    let invalid = b"not valid zstd data";
    match decompress_safe(invalid) {
        Ok(data) => println!("Decompressed: {} bytes", data.len()),
        Err(e) => println!("Error: {}", e),
    }
    
    // Valid data
    let valid = zstd::encode_all(Cursor::new(b"Hello"), 3).unwrap();
    match decompress_safe(&valid) {
        Ok(data) => println!("Decompressed: {:?}", String::from_utf8_lossy(&data)),
        Err(e) => println!("Error: {}", e),
    }
}

Concatenating Frames

use zstd::stream::{Encoder, Decoder};
use std::io::{self, Cursor, Read};
 
fn main() -> io::Result<()> {
    // Zstandard supports concatenated frames
    let mut combined = Vec::new();
    
    // Create first frame
    {
        let mut encoder = Encoder::new(&mut combined, 3)?;
        io::Write::write_all(&mut encoder, b"First frame data. ")?;
        encoder.finish()?;
    }
    
    // Create second frame (appended)
    {
        let mut encoder = Encoder::new(&mut combined, 3)?;
        io::Write::write_all(&mut encoder, b"Second frame data. ")?;
        encoder.finish()?;
    }
    
    // Create third frame
    {
        let mut encoder = Encoder::new(&mut combined, 3)?;
        io::Write::write_all(&mut encoder, b"Third frame data.")?;
        encoder.finish()?;
    }
    
    println!("Combined frames: {} bytes", combined.len());
    
    // Decompress all frames at once
    let mut decompressed = Vec::new();
    let cursor = Cursor::new(&combined);
    let mut decoder = Decoder::new(cursor)?;
    decoder.read_to_end(&mut decompressed)?;
    
    println!("Decompressed: {:?}", String::from_utf8_lossy(&decompressed));
    
    Ok(())
}

Real-World: Compressed Cache

use zstd::{encode_all, decode_all};
use std::collections::HashMap;
use std::io::Cursor;
 
struct CompressedCache {
    cache: HashMap<String, Vec<u8>>,
    level: i32,
    hits: u64,
    misses: u64,
}
 
impl CompressedCache {
    fn new(level: i32) -> Self {
        Self {
            cache: HashMap::new(),
            level,
            hits: 0,
            misses: 0,
        }
    }
    
    fn insert(&mut self, key: &str, value: &[u8]) -> Result<usize, Box<dyn std::error::Error>> {
        let compressed = encode_all(Cursor::new(value), self.level)?;
        let compressed_size = compressed.len();
        self.cache.insert(key.to_string(), compressed);
        Ok(compressed_size)
    }
    
    fn get(&mut self, key: &str) -> Option<Vec<u8>> {
        if let Some(compressed) = self.cache.get(key) {
            self.hits += 1;
            decode_all(Cursor::new(compressed)).ok()
        } else {
            self.misses += 1;
            None
        }
    }
    
    fn stats(&self) -> (u64, u64, usize) {
        let total_compressed: usize = self.cache.values().map(|v| v.len()).sum();
        (self.hits, self.misses, total_compressed)
    }
}
 
fn main() -> Result<(), Box<dyn std::error::Error>> {
    let mut cache = CompressedCache::new(5);
    
    // Insert some data
    let data1 = "This is some cached data that will be compressed.".repeat(10);
    let data2 = "Another piece of cached content.".repeat(20);
    let data3 = "Yet more data for the cache.".repeat(15);
    
    let size1 = cache.insert("key1", data1.as_bytes())?;
    let size2 = cache.insert("key2", data2.as_bytes())?;
    let size3 = cache.insert("key3", data3.as_bytes())?;
    
    println!("Compressed sizes: {} + {} + {} = {}", size1, size2, size3, size1 + size2 + size3);
    
    // Retrieve data
    if let Some(retrieved) = cache.get("key1") {
        println!("Retrieved key1: {} bytes", retrieved.len());
    }
    
    if let Some(retrieved) = cache.get("key2") {
        println!("Retrieved key2: {} bytes", retrieved.len());
    }
    
    // Miss
    cache.get("nonexistent");
    
    let (hits, misses, total_compressed) = cache.stats();
    println!("Stats: {} hits, {} misses, {} bytes compressed storage", hits, misses, total_compressed);
    
    Ok(())
}

Summary

  • encode_all(source, level) compresses data in one step
  • decode_all(source) decompresses data in one step
  • Encoder::new(writer, level) creates a streaming compressor
  • Decoder::new(reader) creates a streaming decompressor
  • Compression levels: 1 (fastest) to 22 (best ratio), default 3
  • Higher levels = better compression but slower encoding
  • Decompression speed is fast regardless of compression level
  • Use streaming API for large files or memory-constrained environments
  • Dictionary compression improves ratios for small, similar data
  • Multi-threaded compression available with 'threads' feature
  • Zstandard frames can be concatenated and decompressed together
  • Perfect for: logs, backups, network protocols, caching, file storage