How do I compress and decompress data with zstd in Rust?
Walkthrough
The zstd crate provides Rust bindings for the Zstandard (zstd) compression algorithm. Zstandard is a fast lossless compression algorithm offering compression ratios comparable to DEFLATE/zlib but with much faster compression and decompression speeds. It's widely used in production systems for log compression, data archival, and network transmission. The crate supports both streaming and bulk compression, with configurable compression levels.
Key concepts:
- Compression levels — from 1 (fastest) to 21 (best compression), default is 3
- Bulk operations — compress/decompress entire buffers at once
- Streaming — Encoder/Decoder for processing large or streaming data
- Dictionaries — pre-trained compression dictionaries for small data
- Checksums — optional content verification
- Threads — multi-threaded compression for faster processing
Code Example
# Cargo.toml
[dependencies]
zstd = "0.13"use zstd::{encode_all, decode_all};
fn main() {
let data = b"Hello, World! This is some sample data for compression.";
// Compress
let compressed = encode_all(data, 3).unwrap();
println!("Original: {} bytes", data.len());
println!("Compressed: {} bytes", compressed.len());
// Decompress
let decompressed = decode_all(&compressed).unwrap();
assert_eq!(data.to_vec(), decompressed);
println!("Decompression successful!");
}Basic Compression
use zstd::{encode_all, decode_all};
fn main() {
let data = b"Lorem ipsum dolor sit amet, consectetur adipiscing elit. "
.repeat(10);
// Compress with different levels
for level in [1, 3, 10, 19] {
let compressed = encode_all(&data[..], level).unwrap();
let ratio = data.len() as f64 / compressed.len() as f64;
println!(
"Level {}: {} -> {} bytes ({:.2}x)",
level, data.len(), compressed.len(), ratio
);
}
// Decompress
let compressed = encode_all(&data[..], 3).unwrap();
let decompressed = decode_all(&compressed).unwrap();
assert_eq!(data.to_vec(), decompressed);
println!("Round-trip successful!");
}Compression Levels
use zstd::encode_all;
fn main() {
let data = vec![0u8; 1_000_000]; // Highly compressible data
println!("Testing compression levels:");
println!("Original size: {} bytes", data.len());
for level in 1..=21 {
let start = std::time::Instant::now();
let compressed = encode_all(&data[..], level).unwrap();
let elapsed = start.elapsed();
println!(
"Level {:2}: {:7} bytes ({:5.2}x) in {:?}",
level,
compressed.len(),
data.len() as f64 / compressed.len() as f64,
elapsed
);
}
}Bulk Operations
use zstd::{encode_all, decode_all};
fn main() {
// Small data
let small = b"Hello, World!";
let compressed = encode_all(small, 3).unwrap();
let decompressed = decode_all(&compressed).unwrap();
println!("Small: {} -> {} -> {} bytes",
small.len(), compressed.len(), decompressed.len());
// Medium data
let medium: Vec<u8> = (0..10000).map(|i| (i % 256) as u8).collect();
let compressed = encode_all(&medium[..], 3).unwrap();
let decompressed = decode_all(&compressed).unwrap();
assert_eq!(medium, decompressed);
println!("Medium: {} -> {} -> {} bytes",
medium.len(), compressed.len(), decompressed.len());
// Large data
let large: Vec<u8> = (0..1_000_000).map(|i| (i % 256) as u8).collect();
let compressed = encode_all(&large[..], 3).unwrap();
let decompressed = decode_all(&compressed).unwrap();
assert_eq!(large, decompressed);
println!("Large: {} -> {} bytes ({:.2}x)",
large.len(), compressed.len(),
large.len() as f64 / compressed.len() as f64);
}Streaming Compression
use zstd::stream::{Encoder, Decoder};
use std::io::{self, Read, Write, Cursor};
fn main() -> io::Result<()> {
// Create a large data stream
let data: Vec<u8> = (0..1_000_000)
.map(|i| (i % 256) as u8)
.collect();
// Compress to a Vec
let mut compressed = Vec::new();
{
let mut encoder = Encoder::new(&mut compressed, 3)?;
encoder.write_all(&data)?;
encoder.finish()?;
}
println!("Compressed {} -> {} bytes", data.len(), compressed.len());
// Decompress
let mut decompressed = Vec::new();
{
let mut decoder = Decoder::new(&compressed[..])?;
decoder.read_to_end(&mut decompressed)?;
}
assert_eq!(data, decompressed);
println!("Decompression successful!");
Ok(())
}Streaming with Files
use zstd::stream::{Encoder, Decoder};
use std::fs::File;
use std::io::{self, BufReader, BufWriter, Read, Write};
fn compress_file(input_path: &str, output_path: &str, level: i32) -> io::Result<()> {
let input = File::open(input_path)?;
let output = File::create(output_path)?;
let reader = BufReader::new(input);
let writer = BufWriter::new(output);
let mut encoder = Encoder::new(writer, level)?;
let mut buffered_reader = BufReader::new(reader);
// Copy data through encoder
io::copy(&mut buffered_reader, &mut encoder)?;
encoder.finish()?;
Ok(())
}
fn decompress_file(input_path: &str, output_path: &str) -> io::Result<()> {
let input = File::open(input_path)?;
let output = File::create(output_path)?;
let reader = BufReader::new(input);
let writer = BufWriter::new(output);
let mut decoder = Decoder::new(reader)?;
// Copy data through decoder
io::copy(&mut decoder, &mut writer)?;
Ok(())
}
fn main() -> io::Result<()> {
// Create test file
std::fs::write("test_input.txt",
"Hello, World!\n".repeat(1000))
?;
// Compress
compress_file("test_input.txt", "test_output.zst", 3)?;
// Decompress
decompress_file("test_output.zst", "test_output.txt")?;
// Verify
let original = std::fs::read("test_input.txt")?;
let decompressed = std::fs::read("test_output.txt")?;
assert_eq!(original, decompressed);
let original_size = std::fs::metadata("test_input.txt")?.len();
let compressed_size = std::fs::metadata("test_output.zst")?.len();
println!("Original: {} bytes", original_size);
println!("Compressed: {} bytes", compressed_size);
println!("Ratio: {:.2}x", original_size as f64 / compressed_size as f64);
// Cleanup
std::fs::remove_file("test_input.txt")?;
std::fs::remove_file("test_output.zst")?;
std::fs::remove_file("test_output.txt")?;
Ok(())
}Chunked Compression
use zstd::stream::{Encoder, Decoder};
use std::io::{Cursor, Read};
fn main() {
// Simulate processing data in chunks
let chunks: Vec<&[u8]> = vec![
b"First chunk of data. ",
b"Second chunk of data. ",
b"Third chunk of data. ",
b"Fourth and final chunk. ",
];
// Compress chunk by chunk
let mut compressed = Vec::new();
{
let mut encoder = Encoder::new(&mut compressed, 3).unwrap();
for chunk in &chunks {
std::io::Write::write_all(&mut encoder, chunk).unwrap();
}
encoder.finish().unwrap();
}
println!("Compressed {} bytes", compressed.len());
// Decompress
let mut decoder = Decoder::new(&compressed[..]).unwrap();
let mut decompressed = Vec::new();
decoder.read_to_end(&mut decompressed).unwrap();
let expected: Vec<u8> = chunks.iter()
.flat_map(|c| c.iter().copied())
.collect();
assert_eq!(expected, decompressed);
println!("Decompressed {} bytes", decompressed.len());
}Using Encoders and Decoders
use zstd::stream::{Encoder, Decoder, CopyEncoder};
use std::io::{self, Cursor};
fn main() -> io::Result<()> {
// Encoder with custom options
let data = b"Some repetitive data: hello hello hello";
// Basic encoder
let mut compressed = Vec::new();
let mut encoder = Encoder::new(&mut compressed, 10)?;
io::copy(&mut &data[..], &mut encoder)?;
encoder.finish()?;
println!("Compressed with level 10: {} bytes", compressed.len());
// Decoder
let mut decompressed = Vec::new();
let mut decoder = Decoder::new(Cursor::new(&compressed))?;
decoder.read_to_end(&mut decompressed)?;
println!("Decompressed: {} bytes", decompressed.len());
// Verify
assert_eq!(data.to_vec(), decompressed);
Ok(())
}In-Place Decompression
use zstd::decode_all;
fn main() {
// Pre-allocated decompression buffer
let data = b"Compress this data for testing purposes.".repeat(100);
let compressed = zstd::encode_all(&data[..], 3).unwrap();
println!("Original: {} bytes", data.len());
println!("Compressed: {} bytes", compressed.len());
// Decompress all at once
let decompressed = decode_all(&compressed).unwrap();
assert_eq!(data.to_vec(), decompressed);
println!("Decompressed: {} bytes", decompressed.len());
}Multi-threaded Compression
use zstd::stream::{Encoder, Decoder};
use zstd::CompressionLevel;
use std::io::{self, Cursor};
fn main() -> io::Result<()> {
// Large data set
let data: Vec<u8> = (0..10_000_000)
.map(|i| (i % 256) as u8)
.collect();
// Single-threaded compression
let start = std::time::Instant::now();
let mut compressed_single = Vec::new();
{
let encoder = Encoder::new(&mut compressed_single, 3)?;
// Note: Default is single-threaded
// Multi-threading requires the 'threads' feature
}
// Using zstd's parallel compression (requires 'threads' feature)
// In Cargo.toml: zstd = { version = "0.13", features = ["threads"] }
#[cfg(feature = "threads")]
{
let start = std::time::Instant::now();
let mut compressed_parallel = Vec::new();
{
let mut encoder = Encoder::new(&mut compressed_parallel, 3)?;
// Use multiple threads for compression
encoder.multithread(4)?; // Use 4 threads
io::copy(&mut Cursor::new(&data), &mut encoder)?;
encoder.finish()?;
}
let parallel_time = start.elapsed();
println!("Parallel compression: {:?}", parallel_time);
}
println!("Note: Enable 'threads' feature for multi-threaded compression");
Ok(())
}Checksums
use zstd::stream::{Encoder, Decoder};
use std::io::{self, Cursor};
fn main() -> io::Result<()> {
let data = b"Important data that needs verification.";
// Compress with checksum enabled
let mut compressed = Vec::new();
{
let mut encoder = Encoder::new(&mut compressed, 3)?;
// Enable content checksum
encoder.include_checksum(true)?;
io::copy(&mut Cursor::new(data), &mut encoder)?;
encoder.finish()?;
}
println!("Compressed with checksum: {} bytes", compressed.len());
// Decompress - checksum is verified automatically
let mut decompressed = Vec::new();
{
let mut decoder = Decoder::new(Cursor::new(&compressed))?;
io::copy(&mut decoder, &mut decompressed)?;
}
println!("Decompressed: {} bytes", decompressed.len());
// If corrupted, decompression would fail
// Let's verify the checksum works
let mut corrupted = compressed.clone();
if corrupted.len() > 10 {
corrupted[10] ^= 0xFF; // Flip some bits
}
// This should fail with checksum error
match Decoder::new(Cursor::new(&corrupted)) {
Ok(mut decoder) => {
let mut result = Vec::new();
match decoder.read_to_end(&mut result) {
Ok(_) => println!("Decompression succeeded (no checksum or not corrupted)"),
Err(e) => println!("Decompression failed: {}", e),
}
}
Err(e) => println!("Decoder creation failed: {}", e),
}
Ok(())
}Compression with Dictionary
use zstd::{encode_all, decode_all};
use zstd::dict::{EncoderDictionary, DecoderDictionary};
use zstd::stream::{Encoder, Decoder};
use std::io::{Cursor, io};
fn main() -> io::Result<()> {
// Create training data (many similar small messages)
let training_data: Vec<&[u8]> = (0..1000)
.map(|i| {
format!("User {} logged in from IP 192.168.1.{} at time {}",
i % 100, i % 256, i).as_bytes()
})
.collect();
// Concatenate training data
let training: Vec<u8> = training_data.iter()
.flat_map(|d| d.iter().copied())
.collect();
// Create dictionary (in practice, use `zstd train` CLI)
// For this example, we'll use a simple pre-computed dictionary
// Note: Dictionary training requires the 'zdict_builder' feature
// Without dictionary
let sample = b"User 42 logged in from IP 192.168.1.100 at time 42";
let compressed_no_dict = encode_all(sample, 3)?;
println!("Without dict: {} bytes", compressed_no_dict.len());
// Dictionary compression is most effective for small, similar data
// The dictionary captures common patterns in the training data
println!("Note: Use 'zstd train' CLI to create dictionaries from training data");
println!("Dictionary compression is most effective for small, similar records");
Ok(())
}Partial Decompression
use zstd::stream::Decoder;
use std::io::{self, Cursor, Read};
fn main() -> io::Result<()> {
// Compress data
let original = b"Line 1\nLine 2\nLine 3\nLine 4\nLine 5";
let compressed = zstd::encode_all(&original[..], 3)?;
// Decompress partially
let mut decoder = Decoder::new(Cursor::new(&compressed))?;
// Read only first 6 bytes
let mut partial = [0u8; 6];
decoder.read_exact(&mut partial)?;
println!("First 6 bytes: {:?}", String::from_utf8_lossy(&partial));
// Continue reading
let mut rest = Vec::new();
decoder.read_to_end(&mut rest)?;
println!("Rest: {:?}", String::from_utf8_lossy(&rest));
Ok(())
}Error Handling
use zstd::stream::Decoder;
use std::io::{self, Cursor};
fn main() {
// Attempt to decompress invalid data
let invalid_data = b"not valid zstd data";
match zstd::decode_all(&invalid_data[..]) {
Ok(decompressed) => {
println!("Decompressed: {} bytes", decompressed.len());
}
Err(e) => {
println!("Decompression failed: {}", e);
// Error is a zstd::Error which can be inspected
}
}
// Handle errors in streaming
let mut compressed = zstd::encode_all(b"valid data", 3).unwrap();
let result = io::catch_unwind(std::panic::AssertUnwindSafe(|| {
let mut decoder = Decoder::new(Cursor::new(&compressed)).unwrap();
let mut output = Vec::new();
decoder.read_to_end(&mut output).unwrap();
output
}));
match result {
Ok(data) => println!("Successfully decompressed {} bytes", data.len()),
Err(_) => println!("Panic during decompression"),
}
}Real-World: Log Compression
use zstd::stream::Encoder;
use std::fs::File;
use std::io::{self, BufWriter, Write};
use std::time::{SystemTime, UNIX_EPOCH};
struct LogCompressor {
encoder: Encoder<BufWriter<File>>,
lines_written: u64,
}
impl LogCompressor {
fn new(path: &str, level: i32) -> io::Result<Self> {
let file = File::create(path)?;
let writer = BufWriter::new(file);
let encoder = Encoder::new(writer, level)?;
Ok(Self {
encoder,
lines_written: 0,
})
}
fn write_log(&mut self, level: &str, message: &str) -> io::Result<()> {
let timestamp = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs();
writeln!(self.encoder, "[{}] {} - {}", timestamp, level, message)?;
self.lines_written += 1;
Ok(())
}
fn finish(mut self) -> io::Result<u64> {
self.encoder.finish()?;
Ok(self.lines_written)
}
}
fn main() -> io::Result<()> {
let mut compressor = LogCompressor::new("app.log.zst", 3)?;
compressor.write_log("INFO", "Application started")?;
compressor.write_log("INFO", "Processing request from 192.168.1.1")?;
compressor.write_log("WARN", "High memory usage detected")?;
compressor.write_log("ERROR", "Connection timeout to database")?;
compressor.write_log("INFO", "Application shutting down")?;
let lines = compressor.finish()?;
println!("Wrote {} compressed log lines", lines);
// Verify
let compressed_size = std::fs::metadata("app.log.zst")?.len();
println!("Compressed file size: {} bytes", compressed_size);
// Decompress and read
let compressed = std::fs::read("app.log.zst")?;
let decompressed = zstd::decode_all(&compressed[..])?;
println!("Decompressed content:\n{}",
String::from_utf8_lossy(&decompressed));
std::fs::remove_file("app.log.zst")?;
Ok(())
}Real-World: Data Archiver
use zstd::{encode_all, decode_all};
use std::collections::HashMap;
struct Archive {
entries: HashMap<String, Vec<u8>>,
compression_level: i32,
}
impl Archive {
fn new(compression_level: i32) -> Self {
Self {
entries: HashMap::new(),
compression_level,
}
}
fn add(&mut self, name: &str, data: &[u8]) {
let compressed = encode_all(data, self.compression_level).unwrap();
self.entries.insert(name.to_string(), compressed);
}
fn get(&self, name: &str) -> Option<Vec<u8>> {
self.entries.get(name).and_then(|compressed| {
decode_all(compressed).ok()
})
}
fn save(&self) -> Vec<u8> {
let mut result = Vec::new();
// Simple format: [name_len:2][name][data_len:4][data]...
for (name, compressed) in &self.entries {
let name_bytes = name.as_bytes();
result.extend_from_slice(&(name_bytes.len() as u16).to_le_bytes());
result.extend_from_slice(name_bytes);
result.extend_from_slice(&(compressed.len() as u32).to_le_bytes());
result.extend_from_slice(compressed);
}
// Compress the entire archive
encode_all(&result[..], self.compression_level).unwrap()
}
fn load(data: &[u8]) -> Self {
let decompressed = decode_all(data).unwrap();
let mut entries = HashMap::new();
let mut pos = 0;
while pos < decompressed.len() {
let name_len = u16::from_le_bytes([decompressed[pos], decompressed[pos + 1]]) as usize;
pos += 2;
let name = String::from_utf8(decompressed[pos..pos + name_len].to_vec()).unwrap();
pos += name_len;
let data_len = u32::from_le_bytes([
decompressed[pos], decompressed[pos + 1],
decompressed[pos + 2], decompressed[pos + 3],
]) as usize;
pos += 4;
let compressed = decompressed[pos..pos + data_len].to_vec();
pos += data_len;
entries.insert(name, compressed);
}
Self { entries, compression_level: 3 }
}
fn list(&self) -> Vec<&String> {
self.entries.keys().collect()
}
fn stats(&self) -> (usize, usize) {
let original_size: usize = self.entries
.values()
.map(|c| decode_all(c).unwrap().len())
.sum();
let compressed_size: usize = self.entries.values().map(|c| c.len()).sum();
(original_size, compressed_size)
}
}
fn main() {
let mut archive = Archive::new(10);
// Add files
archive.add("file1.txt", b"This is file 1 content".repeat(100).as_bytes());
archive.add("file2.txt", b"This is file 2 content".repeat(50).as_bytes());
archive.add("config.json", b"{\"name\": \"test\", \"value\": 42}".as_bytes());
println!("Files in archive: {:?}", archive.list());
// Get file
if let Some(content) = archive.get("file1.txt") {
println!("file1.txt: {} bytes", content.len());
}
// Stats
let (original, compressed) = archive.stats();
println!("Original: {} bytes", original);
println!("Compressed: {} bytes", compressed);
println!("Ratio: {:.2}x", original as f64 / compressed as f64);
// Save archive
let saved = archive.save();
println!("Saved archive: {} bytes", saved.len());
// Load archive
let loaded = Archive::load(&saved);
assert!(loaded.get("file1.txt").is_some());
println!("Loaded archive with {} files", loaded.entries.len());
}Real-World: Network Protocol
use zstd::{encode_all, decode_all};
struct CompressedMessage {
message_type: u8,
payload: Vec<u8>,
}
impl CompressedMessage {
fn new(message_type: u8, payload: &[u8]) -> Self {
Self {
message_type,
payload: payload.to_vec(),
}
}
fn encode(&self, level: i32) -> Vec<u8> {
// Format: [type:1][compressed_len:4][compressed_data]
let compressed = encode_all(&self.payload, level).unwrap();
let mut result = Vec::with_capacity(5 + compressed.len());
result.push(self.message_type);
result.extend_from_slice(&(compressed.len() as u32).to_le_bytes());
result.extend_from_slice(&compressed);
result
}
fn decode(data: &[u8]) -> Option<Self> {
if data.len() < 5 {
return None;
}
let message_type = data[0];
let compressed_len = u32::from_le_bytes([
data[1], data[2], data[3], data[4],
]) as usize;
if data.len() < 5 + compressed_len {
return None;
}
let compressed = &data[5..5 + compressed_len];
let payload = decode_all(compressed).ok()?;
Some(Self { message_type, payload })
}
fn compression_ratio(&self, compressed_len: usize) -> f64 {
self.payload.len() as f64 / compressed_len as f64
}
}
fn main() {
// Large message payload
let payload = b"This is a large message payload that will benefit from compression. "
.repeat(100);
// Create and encode message
let message = CompressedMessage::new(1, &payload);
let encoded = message.encode(3);
println!("Original payload: {} bytes", payload.len());
println!("Encoded message: {} bytes", encoded.len());
println!("Compression ratio: {:.2}x",
message.compression_ratio(encoded.len() - 5));
// Decode message
let decoded = CompressedMessage::decode(&encoded).unwrap();
assert_eq!(decoded.message_type, 1);
assert_eq!(decoded.payload, payload.to_vec());
println!("Message type: {}", decoded.message_type);
println!("Payload: {} bytes", decoded.payload.len());
}Real-World: Backup System
use zstd::stream::{Encoder, Decoder};
use std::fs::File;
use std::io::{self, BufReader, BufWriter, Read, Write};
use std::path::Path;
struct BackupConfig {
compression_level: i32,
include_checksum: bool,
}
impl Default for BackupConfig {
fn default() -> Self {
Self {
compression_level: 3,
include_checksum: true,
}
}
}
fn backup_file(input_path: &Path, output_path: &Path, config: &BackupConfig) -> io::Result<u64> {
let input = File::open(input_path)?;
let output = File::create(output_path)?;
let reader = BufReader::new(input);
let writer = BufWriter::new(output);
let mut encoder = Encoder::new(writer, config.compression_level)?;
if config.include_checksum {
encoder.include_checksum(true)?;
}
let bytes_written = io::copy(&mut reader, &mut encoder)?;
encoder.finish()?;
Ok(bytes_written)
}
fn restore_file(input_path: &Path, output_path: &Path) -> io::Result<u64> {
let input = File::open(input_path)?;
let output = File::create(output_path)?;
let reader = BufReader::new(input);
let writer = BufWriter::new(output);
let mut decoder = Decoder::new(reader)?;
let bytes_written = io::copy(&mut decoder, &mut writer)?;
Ok(bytes_written)
}
fn main() -> io::Result<()> {
// Create test file
std::fs::write("backup_test.txt",
"Important backup data\n".repeat(1000))
?;
let config = BackupConfig::default();
// Backup
let original_size = backup_file(
Path::new("backup_test.txt"),
Path::new("backup_test.txt.zst"),
&config,
)?;
let compressed_size = std::fs::metadata("backup_test.txt.zst")?.len();
println!("Original: {} bytes", original_size);
println!("Compressed: {} bytes", compressed_size);
println!("Ratio: {:.2}x", original_size as f64 / compressed_size as f64);
// Restore
let restored_size = restore_file(
Path::new("backup_test.txt.zst"),
Path::new("backup_test_restored.txt"),
)?;
println!("Restored: {} bytes", restored_size);
// Verify
let original = std::fs::read("backup_test.txt")?;
let restored = std::fs::read("backup_test_restored.txt")?;
assert_eq!(original, restored);
println!("Backup and restore successful!");
// Cleanup
std::fs::remove_file("backup_test.txt")?;
std::fs::remove_file("backup_test.txt.zst")?;
std::fs::remove_file("backup_test_restored.txt")?;
Ok(())
}Summary
- Use
encode_all(data, level)for simple bulk compression - Use
decode_all(data)for simple bulk decompression - Compression levels: 1 (fastest) to 21 (best), default is 3
- Use
EncoderandDecoderfor streaming operations - Enable checksums with
encoder.include_checksum(true)for data integrity - Multi-threaded compression requires
threadsfeature - Dictionary compression improves ratios for small, similar data
- Stream processing is memory-efficient for large files
- Perfect for: log compression, data archival, network protocols, backups
- Zstandard offers better ratios than DEFLATE with faster speeds
