How do I use Zstd compression in Rust?
Walkthrough
The zstd crate provides Rust bindings for Facebook's Zstandard (zstd) compression algorithm. Zstd offers compression ratios comparable to gzip but with much faster decompression speeds. It's become a popular choice for real-time compression in databases, file systems, network protocols, and data archival. The crate supports both streaming compression (for large files or real-time data) and bulk compression (for in-memory data), with configurable compression levels from 1 (fastest) to 22 (best ratio).
Key concepts:
- Compression levels — 1-22, default is 3; higher = better ratio but slower
- Bulk API —
compress()anddecompress()for simple in-memory operations - Streaming API —
EncoderandDecoderfor large data or streaming - Dictionary compression — pre-trained dictionaries for small, similar data
- Frame vs block — zstd can work on complete frames or individual blocks
Code Example
# Cargo.toml
[dependencies]
zstd = "0.13"use zstd::{compress, decompress};
fn main() -> Result<(), Box<dyn std::error::Error>> {
let data = b"Hello, World! This is some test data.";
let compressed = compress(data, 3)?;
let decompressed = decompress(&compressed, data.len())?;
assert_eq!(data.to_vec(), decompressed);
Ok(())
}Basic Compression and Decompression
use zstd::{compress, decompress};
fn main() -> Result<(), Box<dyn std::error::Error>> {
let original = b"This is some text that will be compressed.
The more repetitive the data, the better the compression ratio.";
// Compress with default level (3)
let compressed = compress(original, 3)?;
println!("Original: {} bytes", original.len());
println!("Compressed: {} bytes", compressed.len());
println!("Ratio: {:.1}%", compressed.len() as f64 / original.len() as f64 * 100.0);
// Decompress - need to know original size or use streaming
let decompressed = decompress(&compressed, original.len())?;
assert_eq!(original.to_vec(), decompressed);
println!("Decompression successful!");
Ok(())
}Compression Levels
use zstd::{compress, decompress};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// Generate test data
let data: Vec<u8> = (0..10000).map(|i| (i % 256) as u8).collect();
println!("Original: {} bytes", data.len());
println!("\nCompression level comparison:");
for level in [1, 3, 6, 9, 12, 15, 19, 22] {
let compressed = compress(&data, level)?;
let ratio = compressed.len() as f64 / data.len() as f64 * 100.0;
println!(" Level {:2}: {:6} bytes ({:.1}%)", level, compressed.len(), ratio);
}
// Verify decompression works
let compressed = compress(&data, 10)?;
let decompressed = decompress(&compressed, data.len())?;
assert_eq!(data, decompressed);
Ok(())
}Streaming Compression to File
use std::fs::File;
use std::io::{self, BufWriter, Write};
use zstd::stream::Encoder;
fn compress_file(input_path: &str, output_path: &str, level: i32) -> io::Result<()> {
let input = File::open(input_path)?;
let output = File::create(output_path)?;
// Create encoder wrapping the output file
let mut encoder = Encoder::new(output, level)?;
// Stream all data through encoder
io::copy(&mut io::BufReader::new(input), &mut encoder)?;
// Must call finish() to finalize the compressed data
encoder.finish()?;
Ok(())
}
fn main() -> io::Result<()> {
// Create test file
std::fs::write("input.txt", "Hello, this is a test file to compress!\n".repeat(100))
.expect("Failed to write test file");
compress_file("input.txt", "output.zst", 3)?;
let original_size = std::fs::metadata("input.txt")?.len();
let compressed_size = std::fs::metadata("output.zst")?.len();
println!("Original: {} bytes", original_size);
println!("Compressed: {} bytes", compressed_size);
// Cleanup
std::fs::remove_file("input.txt")?;
std::fs::remove_file("output.zst")?;
Ok(())
}Streaming Decompression from File
use std::fs::File;
use std::io::{self, BufReader};
use zstd::stream::Decoder;
fn decompress_file(input_path: &str, output_path: &str) -> io::Result<()> {
let input = File::open(input_path)?;
let output = File::create(output_path)?;
// Create decoder wrapping the input file
let mut decoder = Decoder::new(input)?;
// Stream decompressed data to output
io::copy(&mut decoder, &mut io::BufWriter::new(output))?;
Ok(())
}
fn main() -> io::Result<()> {
// Create and compress test file
std::fs::write("test.txt", "Test content for decompression!\n".repeat(50))
.expect("Failed to write test file");
// Compress
let input = File::open("test.txt")?;
let output = File::create("test.zst")?;
let mut encoder = zstd::stream::Encoder::new(output, 3)?;
io::copy(&mut io::BufReader::new(input), &mut encoder)?;
encoder.finish()?;
// Decompress
decompress_file("test.zst", "test_recovered.txt")?;
// Verify
let original = std::fs::read_to_string("test.txt")?;
let recovered = std::fs::read_to_string("test_recovered.txt")?;
assert_eq!(original, recovered);
println!("Round-trip successful!");
// Cleanup
std::fs::remove_file("test.txt")?;
std::fs::remove_file("test.zst")?;
std::fs::remove_file("test_recovered.txt")?;
Ok(())
}In-Memory Streaming
use std::io::{self, Cursor, Read, Write};
use zstd::stream::{Encoder, Decoder};
fn compress_to_memory(data: &[u8], level: i32) -> io::Result<Vec<u8>> {
let mut buffer = Vec::new();
{
let mut encoder = Encoder::new(&mut buffer, level)?;
encoder.write_all(data)?;
encoder.finish()?;
}
Ok(buffer)
}
fn decompress_from_memory(compressed: &[u8]) -> io::Result<Vec<u8>> {
let mut decoder = Decoder::new(Cursor::new(compressed))?;
let mut decompressed = Vec::new();
decoder.read_to_end(&mut decompressed)?;
Ok(decompressed)
}
fn main() -> io::Result<()> {
let data = b"This is some data to compress in memory.";
let compressed = compress_to_memory(data, 3)?;
println!("Compressed: {} bytes", compressed.len());
let decompressed = decompress_from_memory(&compressed)?;
println!("Decompressed: {} bytes", decompressed.len());
assert_eq!(data.to_vec(), decompressed);
println!("Round-trip successful!");
Ok(())
}Chunked Streaming
use std::io::{self, Read, Write};
use zstd::stream::{Encoder, Decoder};
fn compress_chunks(
chunks: &[Vec<u8>],
level: i32,
) -> io::Result<Vec<u8>> {
let mut output = Vec::new();
{
let mut encoder = Encoder::new(&mut output, level)?;
for chunk in chunks {
encoder.write_all(chunk)?;
}
encoder.finish()?;
}
Ok(output)
}
fn decompress_to_chunks(
compressed: &[u8],
chunk_size: usize,
) -> io::Result<Vec<Vec<u8>>> {
let mut decoder = Decoder::new(std::io::Cursor::new(compressed))?;
let mut chunks = Vec::new();
loop {
let mut chunk = vec![0u8; chunk_size];
match decoder.read(&mut chunk) {
Ok(0) => break, // EOF
Ok(n) => {
chunk.truncate(n);
chunks.push(chunk);
}
Err(e) => return Err(e),
}
}
Ok(chunks)
}
fn main() -> io::Result<()> {
let chunks = vec![
b"First chunk of data. ".to_vec(),
b"Second chunk here. ".to_vec(),
b"Third and final chunk.".to_vec(),
];
let compressed = compress_chunks(&chunks, 3)?;
println!("Compressed {} chunks into {} bytes", chunks.len(), compressed.len());
let decompressed = decompress_to_chunks(&compressed, 10)?;
println!("Decompressed into {} chunks", decompressed.len());
// Reassemble
let original: Vec<u8> = chunks.iter().flatten().copied().collect();
let recovered: Vec<u8> = decompressed.iter().flatten().copied().collect();
assert_eq!(original, recovered);
Ok(())
}Dictionary Compression
use zstd::{compress, decompress};
// Dictionary compression works best with similar, small data
fn create_training_data() -> Vec<u8> {
// Sample data that represents the type of data you'll compress
let samples = [
b"User ID: 1, Name: Alice, Email: alice@example.com".as_slice(),
b"User ID: 2, Name: Bob, Email: bob@example.com".as_slice(),
b"User ID: 3, Name: Charlie, Email: charlie@example.com".as_slice(),
b"User ID: 4, Name: Diana, Email: diana@example.com".as_slice(),
];
samples.iter().flat_map(|s| s.iter().copied()).collect()
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
// In practice, you'd train a dictionary using zstd CLI:
// zstd --train samples/* -o dict.bin
let sample_data = create_training_data();
// Without dictionary
let small_data = b"User ID: 5, Name: Eve, Email: eve@example.com";
let compressed_normal = compress(small_data, 3)?;
println!("Small data: {} bytes", small_data.len());
println!("Without dict: {} bytes", compressed_normal.len());
// Note: Dictionary training is complex; see zstd::dict module
// This shows the concept; real implementation requires
// training a dictionary from sample data
println!("\nNote: For dictionary compression, train a dictionary");
println!("from sample data using zstd CLI or the dict module.");
Ok(())
}Parallel Compression
use std::sync::mpsc;
use std::thread;
use zstd::compress;
struct Chunk {
index: usize,
data: Vec<u8>,
}
struct CompressedChunk {
index: usize,
data: Vec<u8>,
}
fn parallel_compress(
data: &[u8],
chunk_size: usize,
level: i32,
num_workers: usize,
) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
let chunks: Vec<_> = data
.chunks(chunk_size)
.enumerate()
.map(|(i, chunk)| Chunk {
index: i,
data: chunk.to_vec(),
})
.collect();
let (sender, receiver) = mpsc::channel();
let (done_tx, done_rx) = mpsc::channel();
// Spawn workers
for _ in 0..num_workers {
let sender = sender.clone();
let done_tx = done_tx.clone();
thread::spawn(move || {
while let Ok(chunk) = receiver.recv() {
let compressed = compress(&chunk.data, level).unwrap();
let result = CompressedChunk {
index: chunk.index,
data: compressed,
};
if sender.send(result).is_err() {
break;
}
}
drop(done_tx);
});
}
drop(done_tx); // Drop our reference
// Send chunks to workers
let sender_clone = sender.clone();
thread::spawn(move || {
for chunk in chunks {
if sender_clone.send(chunk).is_err() {
break;
}
}
});
// Collect results
drop(sender); // Drop so channel closes properly
let mut results: Vec<CompressedChunk> = Vec::new();
while let Ok(chunk) = sender.recv() {
results.push(chunk);
}
// Wait for workers to finish
while done_rx.recv().is_ok() {}
// Sort by index and concatenate
results.sort_by_key(|c| c.index);
// Simple concatenation (not a proper format)
let mut output = Vec::new();
for chunk in results {
output.extend_from_slice(&chunk.data);
}
Ok(output)
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
// Large data for parallel processing
let data: Vec<u8> = (0..100_000).map(|i| (i % 256) as u8).collect();
println!("Compressing {} bytes with 4 workers...", data.len());
let _compressed = parallel_compress(&data, 10000, 3, 4)?;
println!("Parallel compression complete!");
Ok(())
}Custom Encoder Options
use std::io::{self, Cursor};
use zstd::stream::{Encoder, Decoder};
use zstd::encode_stream::Options;
fn main() -> io::Result<()> {
let data = b"Data to compress with custom options.";
// Create custom options
let mut options = Options::new();
options.set_compression_level(15); // Higher compression
options.set_window_log(23); // 8MB window
options.set_workers(4); // Multi-threaded compression (requires feature)
// Compress with custom options
let mut output = Vec::new();
{
let mut encoder = Encoder::with_options(&mut output, options)?;
encoder.write_all(data)?;
encoder.finish()?;
}
println!("Compressed with custom options: {} bytes", output.len());
// Decompress normally
let mut decoder = Decoder::new(Cursor::new(&output))?;
let mut decompressed = Vec::new();
io::copy(&mut decoder, &mut Cursor::new(&mut decompressed))?;
println!("Decompressed: {} bytes", decompressed.len());
Ok(())
}Compression with Checksums
use std::io::{self, Read, Write};
use zstd::stream::{Encoder, Decoder};
fn compress_with_checksum(data: &[u8], level: i32) -> io::Result<Vec<u8>> {
let mut output = Vec::new();
{
let mut encoder = Encoder::new(&mut output, level)?;
// Enable content checksums
encoder.include_checksum(true)?;
encoder.write_all(data)?;
encoder.finish()?;
}
Ok(output)
}
fn decompress_and_verify(compressed: &[u8]) -> io::Result<Vec<u8>> {
let mut decoder = Decoder::new(std::io::Cursor::new(compressed))?;
let mut decompressed = Vec::new();
decoder.read_to_end(&mut decompressed)?;
Ok(decompressed)
}
fn main() -> io::Result<()> {
let data = b"Important data that needs integrity checking.";
let compressed = compress_with_checksum(data, 3)?;
println!("Compressed with checksum: {} bytes", compressed.len());
// Corruption would be detected during decompression
let decompressed = decompress_and_verify(&compressed)?;
assert_eq!(data.to_vec(), decompressed);
println!("Checksum verified successfully!");
Ok(())
}Database Record Compression
use zstd::{compress, decompress};
use std::collections::HashMap;
#[derive(Debug, Clone)]
struct Record {
id: u64,
name: String,
email: String,
data: Vec<u8>,
}
impl Record {
fn to_bytes(&self) -> Vec<u8> {
let mut bytes = Vec::new();
bytes.extend_from_slice(&self.id.to_le_bytes());
bytes.extend_from_slice(&(self.name.len() as u16).to_le_bytes());
bytes.extend_from_slice(self.name.as_bytes());
bytes.extend_from_slice(&(self.email.len() as u16).to_le_bytes());
bytes.extend_from_slice(self.email.as_bytes());
bytes.extend_from_slice(&self.data);
bytes
}
fn from_bytes(bytes: &[u8]) -> Self {
let id = u64::from_le_bytes(bytes[0..8].try_into().unwrap());
let name_len = u16::from_le_bytes(bytes[8..10].try_into().unwrap()) as usize;
let name = String::from_utf8_lossy(&bytes[10..10+name_len]).into_owned();
let email_start = 10 + name_len;
let email_len = u16::from_le_bytes(
bytes[email_start..email_start+2].try_into().unwrap()
) as usize;
let email = String::from_utf8_lossy(
&bytes[email_start+2..email_start+2+email_len]
).into_owned();
let data_start = email_start + 2 + email_len;
let data = bytes[data_start..].to_vec();
Self { id, name, email, data }
}
}
struct CompressedStore {
records: HashMap<u64, Vec<u8>>, // id -> compressed bytes
level: i32,
}
impl CompressedStore {
fn new(level: i32) -> Self {
Self {
records: HashMap::new(),
level,
}
}
fn put(&mut self, record: Record) -> Result<(), Box<dyn std::error::Error>> {
let bytes = record.to_bytes();
let compressed = compress(&bytes, self.level)?;
self.records.insert(record.id, compressed);
Ok(())
}
fn get(&self, id: u64) -> Result<Option<Record>, Box<dyn std::error::Error>> {
if let Some(compressed) = self.records.get(&id) {
let bytes = decompress(compressed, 0)?; // 0 = auto-detect size
Ok(Some(Record::from_bytes(&bytes)))
} else {
Ok(None)
}
}
fn stats(&self) -> (usize, usize) {
let compressed_size: usize = self.records.values().map(|v| v.len()).sum();
(self.records.len(), compressed_size)
}
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut store = CompressedStore::new(10);
store.put(Record {
id: 1,
name: "Alice".to_string(),
email: "alice@example.com".to_string(),
data: vec![1, 2, 3, 4, 5],
})?;
store.put(Record {
id: 2,
name: "Bob".to_string(),
email: "bob@example.com".to_string(),
data: vec![10, 20, 30, 40, 50],
})?;
let record = store.get(1)?.unwrap();
println!("Retrieved: {:?}", record);
let (count, size) = store.stats();
println!("Store: {} records, {} compressed bytes", count, size);
Ok(())
}Network Protocol Compression
use std::io::{self, Cursor, Read, Write};
use zstd::stream::{Encoder, Decoder};
struct CompressedStream {
level: i32,
}
impl CompressedStream {
fn new(level: i32) -> Self {
Self { level }
}
fn compress_message(&self, message: &[u8]) -> io::Result<Vec<u8>> {
let mut output = Vec::new();
{
let mut encoder = Encoder::new(&mut output, self.level)?;
encoder.write_all(message)?;
encoder.finish()?;
}
Ok(output)
}
fn decompress_message(&self, compressed: &[u8]) -> io::Result<Vec<u8>> {
let mut decoder = Decoder::new(Cursor::new(compressed))?;
let mut output = Vec::new();
decoder.read_to_end(&mut output)?;
Ok(output)
}
}
fn main() -> io::Result<()> {
let stream = CompressedStream::new(3);
// Simulate network messages
let messages = [
b"GET /api/users HTTP/1.1\r\nHost: example.com\r\n\r\n".as_slice(),
b"POST /api/data HTTP/1.1\r\nHost: example.com\r\nContent-Length: 100\r\n\r\n".as_slice(),
];
for msg in messages {
let compressed = stream.compress_message(msg)?;
let decompressed = stream.decompress_message(&compressed)?;
println!("Original: {} bytes -> Compressed: {} bytes",
msg.len(), compressed.len());
assert_eq!(msg.to_vec(), decompressed);
}
println!("All messages compressed and decompressed successfully!");
Ok(())
}Benchmarking Compression
use std::time::Instant;
use zstd::{compress, decompress};
fn benchmark_compression(data: &[u8], level: i32) -> (f64, f64, usize) {
let start = Instant::now();
let compressed = compress(data, level).unwrap();
let compress_time = start.elapsed().as_secs_f64();
let start = Instant::now();
decompress(&compressed, data.len()).unwrap();
let decompress_time = start.elapsed().as_secs_f64();
(compress_time, decompress_time, compressed.len())
}
fn main() {
// Test data
let mut data = Vec::new();
for i in 0..100_000 {
data.push((i % 256) as u8);
}
println!("Benchmark on {} bytes:", data.len());
println!("\n{:8} {:10} {:10} {:10} {:10}",
"Level", "Compress", "Decompress", "Size", "Ratio%");
println!("{:8} {:10} {:10} {:10} {:10}",
"-----", "--------", "----------", "----", "------");
for level in [1, 3, 6, 9, 12, 15, 19, 22] {
let (compress_time, decompress_time, size) =
benchmark_compression(&data, level);
let ratio = size as f64 / data.len() as f64 * 100.0;
println!("{:8} {:8.3}s {:9.3}s {:10} {:9.1}%",
level, compress_time, decompress_time, size, ratio);
}
}File Archiver
use std::fs::{self, File};
use std::io::{self, BufReader, BufWriter, Read, Write};
use std::path::Path;
use zstd::stream::Encoder;
struct ArchiveHeader {
file_count: u32,
}
struct FileEntry {
name_len: u16,
original_size: u64,
compressed_size: u64,
}
fn create_archive(
files: &[&Path],
output_path: &Path,
level: i32,
) -> io::Result<()> {
let output = File::create(output_path)?;
let mut encoder = Encoder::new(BufWriter::new(output), level)?;
// Write header
let header = ArchiveHeader {
file_count: files.len() as u32,
};
encoder.write_all(&header.file_count.to_le_bytes())?;
for file_path in files {
let metadata = fs::metadata(file_path)?;
let name = file_path.file_name().unwrap().to_str().unwrap();
// Write file entry metadata
let entry = FileEntry {
name_len: name.len() as u16,
original_size: metadata.len(),
compressed_size: 0, // Unknown until compressed
};
encoder.write_all(&entry.name_len.to_le_bytes())?;
encoder.write_all(&entry.original_size.to_le_bytes())?;
encoder.write_all(name.as_bytes())?;
// Write file content
let mut input = File::open(file_path)?;
io::copy(&mut input, &mut encoder)?;
}
encoder.finish()?;
Ok(())
}
fn main() -> io::Result<()> {
// Create test files
fs::write("file1.txt", "Content of file 1\n".repeat(100))?;
fs::write("file2.txt", "Content of file 2\n".repeat(100))?;
// Create archive
let files = vec![Path::new("file1.txt"), Path::new("file2.txt")];
create_archive(&files, Path::new("archive.zst"), 10)?;
let archive_size = fs::metadata("archive.zst")?.len();
println!("Created archive: {} bytes", archive_size);
// Cleanup
fs::remove_file("file1.txt")?;
fs::remove_file("file2.txt")?;
fs::remove_file("archive.zst")?;
Ok(())
}Comparing with Gzip
use std::io::{self, Cursor, Read, Write};
use zstd::stream::{Encoder as ZstdEncoder, Decoder as ZstdDecoder};
#[cfg(feature = "gzip_comparison")]
fn compare_with_gzip() {
// This would require flate2 crate
// Showing concept only
}
fn main() -> io::Result<()> {
let data: Vec<u8> = (0..100_000)
.map(|i| (i % 256) as u8)
.collect();
println!("Comparing compression methods:");
println!("Original: {} bytes", data.len());
// Zstd
let mut zstd_output = Vec::new();
{
let mut encoder = ZstdEncoder::new(&mut zstd_output, 3)?;
encoder.write_all(&data)?;
encoder.finish()?;
}
println!("Zstd level 3: {} bytes ({:.1}%)",
zstd_output.len(),
zstd_output.len() as f64 / data.len() as f64 * 100.0);
// Decompression speed comparison
use std::time::Instant;
let start = Instant::now();
let mut decoder = ZstdDecoder::new(Cursor::new(&zstd_output))?;
let mut decompressed = Vec::new();
decoder.read_to_end(&mut decompressed)?;
let zstd_decompress_time = start.elapsed();
println!("Zstd decompression: {:?}", zstd_decompress_time);
Ok(())
}Error Handling
use zstd::{compress, decompress};
use std::io::{self, Read};
use zstd::stream::Decoder;
fn safe_decompress(compressed: &[u8]) -> Result<Vec<u8>, String> {
// Method 1: Known size
let decompressed = decompress(compressed, 1024)
.map_err(|e| format!("Decompression failed: {}", e))?;
Ok(decompressed)
}
fn streaming_decompress(compressed: &[u8]) -> io::Result<Vec<u8>> {
let mut decoder = Decoder::new(std::io::Cursor::new(compressed))?;
let mut output = Vec::new();
decoder.read_to_end(&mut output)?;
Ok(output)
}
fn is_valid_zstd(data: &[u8]) -> bool {
// Zstd magic number: 0xFD2FB528
if data.len() < 4 {
return false;
}
let magic = u32::from_le_bytes([data[0], data[1], data[2], data[3]]);
magic == 0xFD2FB528
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
// Test data
let data = b"Test data";
let compressed = compress(data, 3)?;
// Verify format
println!("Valid Zstd: {}", is_valid_zstd(&compressed));
// Safe decompression
match safe_decompress(&compressed) {
Ok(decompressed) => println!("Decompressed: {} bytes", decompressed.len()),
Err(e) => eprintln!("Error: {}", e),
}
// Corrupted data
let mut corrupted = compressed.clone();
if !corrupted.is_empty() {
corrupted[0] = 0;
}
match safe_decompress(&corrupted) {
Ok(_) => println!("Unexpected success"),
Err(e) => println!("Correctly detected corruption: {}", e),
}
Ok(())
}Summary
zstd::compress(data, level)for simple in-memory compressionzstd::decompress(data, max_size)for simple decompressionEncoder::new(writer, level)for streaming compressionDecoder::new(reader)for streaming decompression- Call
encoder.finish()to finalize compressed output - Compression levels: 1-22 (higher = better ratio, slower)
- Default level 3 provides good balance of speed and ratio
- Streaming API handles large files without loading into memory
- Enable checksums with
encoder.include_checksum(true) - Dictionary compression improves ratio for small, similar data
- Multi-threaded compression via
Options::set_workers() - Perfect for: databases, file systems, network protocols, log compression, archival
