How do I compress data with Zstandard (zstd) in Rust?
Walkthrough
The zstd crate provides Rust bindings to Facebook's Zstandard (zstd) compression library. Zstandard offers excellent compression ratios comparable to gzip while being significantly faster — often 2-5x faster at decompression. It's ideal for applications requiring fast compression with good ratios, such as log storage, network protocols, database compression, and file archiving.
Key concepts:
- Compression levels — range from 1 (fastest) to 22 (best ratio), with 3 as default
- Streaming API — compress/decompress large data without loading entirely into memory
- Dictionary compression — pre-trained dictionaries for small, similar data
- One-shot API — simple functions for compressing complete buffers
- Encoder/Decoder — streaming interfaces wrapping
ReadandWrite
Code Example
# Cargo.toml
[dependencies]
zstd = "0.13"use zstd::{encode_all, decode_all};
fn main() -> std::io::Result<()> {
let data = b"Hello, World! This is some data to compress.";
// Compress with default level (3)
let compressed = encode_all(&data[..], 3)?;
println!("Original: {} bytes", data.len());
println!("Compressed: {} bytes", compressed.len());
// Decompress
let decompressed = decode_all(&compressed[..])?;
println!("Decompressed: {} bytes", decompressed.len());
assert_eq!(data.to_vec(), decompressed);
Ok(())
}Basic Compression and Decompression
use zstd::{encode_all, decode_all};
fn main() -> std::io::Result<()> {
let data = b"The quick brown fox jumps over the lazy dog. ".repeat(100);
println!("Original size: {} bytes", data.len());
// Compress at different levels
for level in [1, 3, 9, 19] {
let compressed = encode_all(&data[..], level)?;
let ratio = data.len() as f64 / compressed.len() as f64;
println!("Level {}: {} bytes (ratio: {:.2}x)",
level, compressed.len(), ratio);
}
// Decompress
let compressed = encode_all(&data[..], 3)?;
let decompressed = decode_all(&compressed[..])?;
assert_eq!(data, decompressed.as_slice());
println!("\nDecompression successful!");
Ok(())
}Streaming Compression with Encoder
use std::fs::File;
use std::io::{self, prelude::*};
use zstd::Encoder;
fn main() -> io::Result<()> {
// Compress to a file
let output_file = File::create("compressed.zst")?;
let mut encoder = Encoder::new(output_file, 3)?;
// Write data in chunks
for i in 0..10 {
let chunk = format!("Chunk {} with some data. ", i);
encoder.write_all(chunk.as_bytes())?;
}
// Finish compression (important!)
let output_file = encoder.finish()?;
drop(output_file);
println!("Compressed data written to compressed.zst");
Ok(())
}Streaming Decompression with Decoder
use std::fs::File;
use std::io::{self, prelude::*};
use zstd::Decoder;
fn main() -> io::Result<()> {
// Decompress from a file
let input_file = File::open("compressed.zst")?;
let mut decoder = Decoder::new(input_file)?;
let mut decompressed = String::new();
decoder.read_to_string(&mut decompressed)?;
println!("Decompressed content:\n{}", decompressed);
Ok(())
}Compressing Files
use std::fs::File;
use std::io::{self, prelude::*, BufReader, BufWriter};
use zstd::{Encoder, Decoder};
fn compress_file(input_path: &str, output_path: &str, level: i32) -> io::Result<()> {
let input_file = File::open(input_path)?;
let output_file = File::create(output_path)?;
let reader = BufReader::new(input_file);
let mut encoder = Encoder::new(BufWriter::new(output_file), level)?;
// Stream the data
let bytes = std::io::copy(&mut reader.take(u64::MAX), &mut encoder)?;
encoder.finish()?;
println!("Compressed {} bytes", bytes);
Ok(())
}
fn decompress_file(input_path: &str, output_path: &str) -> io::Result<()> {
let input_file = File::open(input_path)?;
let output_file = File::create(output_path)?;
let mut decoder = Decoder::new(input_file)?;
let mut writer = BufWriter::new(output_file);
let bytes = std::io::copy(&mut decoder, &mut writer)?;
writer.flush()?;
println!("Decompressed {} bytes", bytes);
Ok(())
}
fn main() -> io::Result<()> {
// Create a test file
let test_content = "Test content for compression. ".repeat(1000);
std::fs::write("test_input.txt", &test_content)?;
// Compress
compress_file("test_input.txt", "test_output.zst", 9)?;
// Decompress
decompress_file("test_output.zst", "test_decompressed.txt")?;
// Verify
let original = std::fs::read("test_input.txt")?;
let decompressed = std::fs::read("test_decompressed.txt")?;
assert_eq!(original, decompressed);
println!("Verification successful!");
// Show sizes
let original_size = std::fs::metadata("test_input.txt")?.len();
let compressed_size = std::fs::metadata("test_output.zst")?.len();
println!("Original: {} bytes", original_size);
println!("Compressed: {} bytes ({:.1}%)",
compressed_size,
(compressed_size as f64 / original_size as f64) * 100.0
);
Ok(())
}In-Memory Compression
use zstd::{encode_all, decode_all, stream::encode_to_vec};
fn main() -> std::io::Result<()> {
let data = b"This is some data that we want to compress in memory.";
// One-shot compression
let compressed = encode_all(&data[..], 3)?;
let decompressed = decode_all(&compressed[..])?;
println!("Original: {} bytes", data.len());
println!("Compressed: {} bytes", compressed.len());
println!("Decompressed: {} bytes", decompressed.len());
assert_eq!(data.to_vec(), decompressed);
// Stream to vec
let mut cursor = std::io::Cursor::new(&data[..]);
let compressed_stream = encode_to_vec(&mut cursor, 3)?;
println!("\nStreamed compression: {} bytes", compressed_stream.len());
Ok(())
}Compression Levels Comparison
use zstd::encode_all;
use std::time::Instant;
fn main() -> std::io::Result<()> {
// Generate some test data
let data: Vec<u8> = (0..1_000_000)
.map(|i| ((i * 7 + 13) % 256) as u8)
.collect();
println!("Original data: {} bytes", data.len());
println!();
println!("{:<8} {:<12} {:<12} {:<10} {:<12}",
"Level", "Compressed", "Ratio", "Time", "MB/s");
println!("{}", "-".repeat(54));
for level in [1, 2, 3, 5, 7, 9, 12, 15, 19, 22] {
let start = Instant::now();
let compressed = encode_all(&data[..], level)?;
let duration = start.elapsed();
let ratio = data.len() as f64 / compressed.len() as f64;
let mb_per_sec = (data.len() as f64 / 1_000_000.0) / duration.as_secs_f64();
println!("{:<8} {:<12} {:<12.2} {:<10} {:<12.1}",
level,
format!("{} bytes", compressed.len()),
ratio,
format!("{:?}", duration),
mb_per_sec
);
}
Ok(())
}Working with Custom Buffers
use zstd::{Encoder, Decoder};
use std::io::{self, Cursor, Read, Write};
fn compress_to_buffer(data: &[u8], level: i32) -> io::Result<Vec<u8>> {
let mut output = Vec::new();
{
let mut encoder = Encoder::new(&mut output, level)?;
encoder.write_all(data)?;
encoder.finish()?;
}
Ok(output)
}
fn decompress_from_buffer(compressed: &[u8]) -> io::Result<Vec<u8>> {
let mut decoder = Decoder::new(Cursor::new(compressed))?;
let mut output = Vec::new();
decoder.read_to_end(&mut output)?;
Ok(output)
}
fn main() -> io::Result<()> {
let data = b"Some important data that needs compression.";
let compressed = compress_to_buffer(data, 9)?;
let decompressed = decompress_from_buffer(&compressed)?;
println!("Original: {} bytes", data.len());
println!("Compressed: {} bytes", compressed.len());
assert_eq!(data.to_vec(), decompressed);
println!("Round-trip successful!");
Ok(())
}Dictionary Compression
use zstd::{encode_all, decode_all, dict::EncoderDictionary, dict::DecoderDictionary, Encoder, Decoder};
use std::io::{Cursor, Read};
fn main() -> std::io::Result<()> {
// Training data - similar small samples
let training_data: Vec<&[u8]> = vec![
b"User: alice, Action: login, Time: 10:00".as_slice(),
b"User: bob, Action: logout, Time: 10:05".as_slice(),
b"User: alice, Action: purchase, Time: 10:10".as_slice(),
b"User: charlie, Action: login, Time: 10:15".as_slice(),
b"User: bob, Action: purchase, Time: 10:20".as_slice(),
b"User: alice, Action: logout, Time: 10:25".as_slice(),
b"User: charlie, Action: purchase, Time: 10:30".as_slice(),
b"User: bob, Action: login, Time: 10:35".as_slice(),
];
// Flatten training data
let flat_training: Vec<u8> = training_data.iter()
.flat_map(|s| s.iter().copied())
.collect();
// Create dictionary
let dictionary = zstd::dict::from_buffer(&flat_training, 1024, 0)?;
println!("Dictionary size: {} bytes", dictionary.len());
// Create encoder and decoder dictionaries
let encoder_dict = EncoderDictionary::copy(&dictionary);
let decoder_dict = DecoderDictionary::copy(&dictionary);
// Test data (similar to training data)
let test_data = b"User: diana, Action: login, Time: 10:40";
// Compress with dictionary
let compressed_with_dict = {
let mut output = Vec::new();
let mut encoder = Encoder::with_prepared_dictionary(&mut output, &encoder_dict, 3)?;
encoder.write_all(test_data)?;
encoder.finish()?;
output
};
// Compress without dictionary
let compressed_without_dict = encode_all(&test_data[..], 3)?;
println!("\nOriginal: {} bytes", test_data.len());
println!("Without dict: {} bytes", compressed_without_dict.len());
println!("With dict: {} bytes", compressed_with_dict.len());
// Decompress with dictionary
let mut decoder = Decoder::with_prepared_dictionary(Cursor::new(&compressed_with_dict), &decoder_dict)?;
let mut decompressed = Vec::new();
decoder.read_to_end(&mut decompressed)?;
assert_eq!(test_data.to_vec(), decompressed);
println!("Decompression successful!");
Ok(())
}Chunked Processing
use zstd::{Encoder, Decoder};
use std::io::{self, Read, Write};
struct ChunkCompressor {
chunk_size: usize,
level: i32,
}
impl ChunkCompressor {
fn new(chunk_size: usize, level: i32) -> Self {
Self { chunk_size, level }
}
fn compress_chunks(&self, data: &[u8]) -> io::Result<Vec<Vec<u8>>> {
let mut compressed_chunks = Vec::new();
for chunk in data.chunks(self.chunk_size) {
let mut output = Vec::new();
let mut encoder = Encoder::new(&mut output, self.level)?;
encoder.write_all(chunk)?;
encoder.finish()?;
compressed_chunks.push(output);
}
Ok(compressed_chunks)
}
fn decompress_chunks(&self, compressed_chunks: &[Vec<u8>]) -> io::Result<Vec<u8>> {
let mut decompressed = Vec::new();
for chunk in compressed_chunks {
let mut decoder = Decoder::new(Cursor::new(chunk))?;
let mut buffer = Vec::new();
decoder.read_to_end(&mut buffer)?;
decompressed.extend(buffer);
}
Ok(decompressed)
}
}
fn main() -> io::Result<()> {
// Generate test data
let data: Vec<u8> = (0..100_000)
.map(|i| ((i % 256) as u8))
.collect();
let compressor = ChunkCompressor::new(10_000, 3);
// Compress in chunks
let compressed_chunks = compressor.compress_chunks(&data)?;
println!("Created {} compressed chunks", compressed_chunks.len());
// Show sizes
let total_compressed: usize = compressed_chunks.iter().map(|c| c.len()).sum();
println!("Total compressed: {} bytes (from {} bytes)",
total_compressed, data.len());
// Decompress
let decompressed = compressor.decompress_chunks(&compressed_chunks)?;
assert_eq!(data, decompressed);
println!("Round-trip successful!");
Ok(())
}Real-World Example: Log Compressor
use std::fs::{self, File};
use std::io::{self, BufWriter, Write};
use std::path::{Path, PathBuf};
use zstd::Encoder;
use chrono::Local;
// Note: Add chrono = "0.4" to Cargo.toml
struct LogCompressor {
output_dir: PathBuf,
current_file: Option<Encoder<BufWriter<File>>>,
current_date: String,
level: i32,
bytes_written: u64,
bytes_compressed: u64,
}
impl LogCompressor {
fn new(output_dir: impl AsRef<Path>, level: i32) -> io::Result<Self> {
fs::create_dir_all(&output_dir)?;
Ok(Self {
output_dir: output_dir.as_ref().to_path_buf(),
current_file: None,
current_date: String::new(),
level,
bytes_written: 0,
bytes_compressed: 0,
})
}
fn write_log(&mut self, entry: &str) -> io::Result<()> {
let today = Local::now().format("%Y-%m-%d").to_string();
// Rotate file if date changed
if today != self.current_date {
self.rotate_file(&today)?;
self.current_date = today;
}
// Create file if needed
if self.current_file.is_none() {
let filename = format!("logs-{}.zst", self.current_date);
let path = self.output_dir.join(&filename);
let file = File::create(&path)?;
let encoder = Encoder::new(BufWriter::new(file), self.level)?;
self.current_file = Some(encoder);
}
// Write entry
if let Some(ref mut encoder) = self.current_file {
let bytes = entry.as_bytes();
encoder.write_all(bytes)?;
encoder.write_all(b"\n")?;
self.bytes_written += bytes.len() as u64 + 1;
}
Ok(())
}
fn rotate_file(&mut self, new_date: &str) -> io::Result<()> {
if let Some(encoder) = self.current_file.take() {
let writer = encoder.finish()?;
writer.flush()?;
}
self.bytes_compressed = 0;
println!("Rotated log file for {}", new_date);
Ok(())
}
fn finish(&mut self) -> io::Result<()> {
if let Some(encoder) = self.current_file.take() {
let writer = encoder.finish()?;
writer.flush()?;
}
Ok(())
}
fn stats(&self) -> (u64, u64) {
(self.bytes_written, self.bytes_compressed)
}
}
impl Drop for LogCompressor {
fn drop(&mut self) {
let _ = self.finish();
}
}
fn main() -> io::Result<()> {
let mut compressor = LogCompressor::new("compressed_logs", 9)?;
let log_entries = vec![
"2024-01-15 10:00:00 INFO Application started",
"2024-01-15 10:00:01 DEBUG Loading configuration",
"2024-01-15 10:00:02 INFO Server listening on port 8080",
"2024-01-15 10:00:03 WARN High memory usage detected",
"2024-01-15 10:00:04 ERROR Connection failed: timeout",
"2024-01-15 10:00:05 INFO Retrying connection...",
"2024-01-15 10:00:06 INFO Connection established",
];
for entry in &log_entries {
compressor.write_log(entry)?;
}
compressor.finish()?;
println!("Log compression complete!");
Ok(())
}Real-World Example: Network Protocol
use zstd::{Encoder, Decoder};
use std::io::{self, Cursor, Read, Write};
struct CompressedMessage {
message_type: u8,
payload: Vec<u8>,
}
impl CompressedMessage {
fn new(message_type: u8, payload: &[u8]) -> Self {
Self {
message_type,
payload: payload.to_vec(),
}
}
fn encode(&self, level: i32) -> io::Result<Vec<u8>> {
let mut output = Vec::new();
// Message type (1 byte)
output.push(self.message_type);
// Compressed payload
let mut encoder = Encoder::new(&mut output, level)?;
encoder.write_all(&self.payload)?;
encoder.finish()?;
Ok(output)
}
fn decode(data: &[u8]) -> io::Result<Self> {
if data.is_empty() {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Empty message"));
}
let message_type = data[0];
let mut decoder = Decoder::new(Cursor::new(&data[1..]))?;
let mut payload = Vec::new();
decoder.read_to_end(&mut payload)?;
Ok(Self {
message_type,
payload,
})
}
}
fn main() -> io::Result<()> {
let original = CompressedMessage::new(42, b"Hello, this is a message payload that will be compressed!");
println!("Original payload: {} bytes", original.payload.len());
// Encode
let encoded = original.encode(9)?;
println!("Encoded message: {} bytes", encoded.len());
// Decode
let decoded = CompressedMessage::decode(&encoded)?;
println!("Decoded payload: {} bytes", decoded.payload.len());
assert_eq!(original.message_type, decoded.message_type);
assert_eq!(original.payload, decoded.payload);
println!("Round-trip successful!");
Ok(())
}Real-World Example: Database Value Compression
use zstd::{encode_all, decode_all};
use std::collections::HashMap;
#[derive(Clone, PartialEq)]
struct CompressedValue {
compressed: Vec<u8>,
original_size: usize,
}
impl CompressedValue {
fn new(data: &[u8], level: i32) -> std::io::Result<Self> {
let compressed = encode_all(data, level)?;
Ok(Self {
compressed,
original_size: data.len(),
})
}
fn decompress(&self) -> std::io::Result<Vec<u8>> {
decode_all(&self.compressed[..])
}
fn compression_ratio(&self) -> f64 {
self.original_size as f64 / self.compressed.len() as f64
}
fn size_saved(&self) -> usize {
self.original_size.saturating_sub(self.compressed.len())
}
}
struct CompressedStore {
data: HashMap<String, CompressedValue>,
compression_level: i32,
total_original: usize,
total_compressed: usize,
}
impl CompressedStore {
fn new(compression_level: i32) -> Self {
Self {
data: HashMap::new(),
compression_level,
total_original: 0,
total_compressed: 0,
}
}
fn insert(&mut self, key: &str, value: &[u8]) -> std::io::Result<()> {
let compressed = CompressedValue::new(value, self.compression_level)?;
self.total_original += value.len();
self.total_compressed += compressed.compressed.len();
self.data.insert(key.to_string(), compressed);
Ok(())
}
fn get(&self, key: &str) -> std::io::Result<Option<Vec<u8>>> {
match self.data.get(key) {
Some(compressed) => Ok(Some(compressed.decompress()?)),
None => Ok(None),
}
}
fn stats(&self) -> (usize, usize, f64) {
let ratio = self.total_original as f64 / self.total_compressed as f64;
(self.total_original, self.total_compressed, ratio)
}
}
fn main() -> std::io::Result<()> {
let mut store = CompressedStore::new(9);
// Store some data
let entries = vec![
("user:1:profile", br#"{"name":"Alice","email":"alice@example.com","age":30,"bio":"Software engineer with 10 years of experience in Rust and systems programming."}"#),
("user:2:profile", br#"{"name":"Bob","email":"bob@example.com","age":25,"bio":"Full-stack developer passionate about web technologies and cloud architecture."}"#),
("config:app", br#"{"debug":false,"max_connections":100,"timeout":30,"retry_count":3,"features":["auth","logging","metrics"]}"#),
];
for (key, value) in &entries {
store.insert(key, value)?;
}
// Retrieve
let profile = store.get("user:1:profile")?;
println!("Retrieved: {}", String::from_utf8_lossy(profile.unwrap().as_slice()));
// Stats
let (original, compressed, ratio) = store.stats();
println!("\nStats:");
println!(" Original: {} bytes", original);
println!(" Compressed: {} bytes", compressed);
println!(" Ratio: {:.2}x", ratio);
println!(" Saved: {} bytes", original - compressed);
Ok(())
}Error Handling
use zstd::{encode_all, decode_all};
use std::io;
fn safe_compress(data: &[u8], level: i32) -> io::Result<Vec<u8>> {
// Validate compression level
if level < 1 || level > 22 {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
format!("Invalid compression level: {}. Must be 1-22.", level)
));
}
// Validate data size
if data.is_empty() {
return Ok(Vec::new());
}
encode_all(data, level)
}
fn safe_decompress(compressed: &[u8], max_size: usize) -> io::Result<Vec<u8>> {
if compressed.is_empty() {
return Ok(Vec::new());
}
let decompressed = decode_all(compressed)?;
// Check for decompression bomb
if decompressed.len() > max_size {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!("Decompressed data too large: {} bytes (max: {})",
decompressed.len(), max_size)
));
}
Ok(decompressed)
}
fn main() -> io::Result<()> {
let data = b"Some test data for compression";
// Test valid compression
let compressed = safe_compress(data, 3)?;
let decompressed = safe_decompress(&compressed, 1_000_000)?;
assert_eq!(data.to_vec(), decompressed);
println!("Round-trip successful!");
// Test invalid level
match safe_compress(data, 30) {
Ok(_) => println!("Unexpected success"),
Err(e) => println!("Expected error: {}", e),
}
// Test with small max size
match safe_decompress(&compressed, 5) {
Ok(_) => println!("Unexpected success"),
Err(e) => println!("Expected error: {}", e),
}
Ok(())
}Integration with Other Libraries
use zstd::{Encoder, Decoder};
use std::io::{self, Cursor, Read, Write};
use flate2::{Compress, Decompress, Compression};
use flate2::read::{ZlibEncoder, ZlibDecoder};
// Note: Add flate2 = "1.0" to Cargo.toml
fn compare_compression(data: &[u8]) -> io::Result<()> {
// Zstandard compression
let zstd_compressed = {
let mut output = Vec::new();
let mut encoder = Encoder::new(&mut output, 3)?;
encoder.write_all(data)?;
encoder.finish()?;
output
};
// zlib compression
let zlib_compressed = {
let mut encoder = ZlibEncoder::new(Cursor::new(data), Compression::default());
let mut output = Vec::new();
encoder.read_to_end(&mut output)?;
output
};
println!("Original: {} bytes", data.len());
println!("zstd: {} bytes", zstd_compressed.len());
println!("zlib: {} bytes", zlib_compressed.len());
Ok(())
}
fn main() -> io::Result<()> {
let data = b"The quick brown fox jumps over the lazy dog. ".repeat(100);
compare_compression(&data)
}Summary
- Use
encode_all()anddecode_all()for simple one-shot compression/decompression - Use
EncoderandDecoderfor streaming large data without loading into memory - Compression levels range from 1 (fastest) to 22 (best ratio), with 3 as default
- Zstandard offers faster decompression than gzip with similar compression ratios
- Dictionary compression improves ratios for small, similar data (like log entries)
- Always call
encoder.finish()to finalize compression - Use
Encoder::new(writer, level)andDecoder::new(reader)for streaming - For chunked data, compress each chunk separately or use a single streaming encoder
- Ideal for: log compression, network protocols, database storage, file archiving
- Handles both
ReadandWritetraits for easy integration - Validate decompressed size limits to prevent decompression bombs
