Loading page…
Rust walkthroughs
Loading page…
The zstd crate provides Rust bindings for Facebook's Zstandard (zstd) compression algorithm. Zstd offers compression ratios comparable to gzip but with much faster decompression speeds. It's become a popular choice for real-time compression in databases, file systems, network protocols, and data archival. The crate supports both streaming compression (for large files or real-time data) and bulk compression (for in-memory data), with configurable compression levels from 1 (fastest) to 22 (best ratio).
Key concepts:
compress() and decompress() for simple in-memory operationsEncoder and Decoder for large data or streaming# Cargo.toml
[dependencies]
zstd = "0.13"use zstd::{compress, decompress};
fn main() -> Result<(), Box<dyn std::error::Error>> {
let data = b"Hello, World! This is some test data.";
let compressed = compress(data, 3)?;
let decompressed = decompress(&compressed, data.len())?;
assert_eq!(data.to_vec(), decompressed);
Ok(())
}use zstd::{compress, decompress};
fn main() -> Result<(), Box<dyn std::error::Error>> {
let original = b"This is some text that will be compressed.
The more repetitive the data, the better the compression ratio.";
// Compress with default level (3)
let compressed = compress(original, 3)?;
println!("Original: {} bytes", original.len());
println!("Compressed: {} bytes", compressed.len());
println!("Ratio: {:.1}%", compressed.len() as f64 / original.len() as f64 * 100.0);
// Decompress - need to know original size or use streaming
let decompressed = decompress(&compressed, original.len())?;
assert_eq!(original.to_vec(), decompressed);
println!("Decompression successful!");
Ok(())
}use zstd::{compress, decompress};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// Generate test data
let data: Vec<u8> = (0..10000).map(|i| (i % 256) as u8).collect();
println!("Original: {} bytes", data.len());
println!("\nCompression level comparison:");
for level in [1, 3, 6, 9, 12, 15, 19, 22] {
let compressed = compress(&data, level)?;
let ratio = compressed.len() as f64 / data.len() as f64 * 100.0;
println!(" Level {:2}: {:6} bytes ({:.1}%)", level, compressed.len(), ratio);
}
// Verify decompression works
let compressed = compress(&data, 10)?;
let decompressed = decompress(&compressed, data.len())?;
assert_eq!(data, decompressed);
Ok(())
}use std::fs::File;
use std::io::{self, BufWriter, Write};
use zstd::stream::Encoder;
fn compress_file(input_path: &str, output_path: &str, level: i32) -> io::Result<()> {
let input = File::open(input_path)?;
let output = File::create(output_path)?;
// Create encoder wrapping the output file
let mut encoder = Encoder::new(output, level)?;
// Stream all data through encoder
io::copy(&mut io::BufReader::new(input), &mut encoder)?;
// Must call finish() to finalize the compressed data
encoder.finish()?;
Ok(())
}
fn main() -> io::Result<()> {
// Create test file
std::fs::write("input.txt", "Hello, this is a test file to compress!\n".repeat(100))
.expect("Failed to write test file");
compress_file("input.txt", "output.zst", 3)?;
let original_size = std::fs::metadata("input.txt")?.len();
let compressed_size = std::fs::metadata("output.zst")?.len();
println!("Original: {} bytes", original_size);
println!("Compressed: {} bytes", compressed_size);
// Cleanup
std::fs::remove_file("input.txt")?;
std::fs::remove_file("output.zst")?;
Ok(())
}use std::fs::File;
use std::io::{self, BufReader};
use zstd::stream::Decoder;
fn decompress_file(input_path: &str, output_path: &str) -> io::Result<()> {
let input = File::open(input_path)?;
let output = File::create(output_path)?;
// Create decoder wrapping the input file
let mut decoder = Decoder::new(input)?;
// Stream decompressed data to output
io::copy(&mut decoder, &mut io::BufWriter::new(output))?;
Ok(())
}
fn main() -> io::Result<()> {
// Create and compress test file
std::fs::write("test.txt", "Test content for decompression!\n".repeat(50))
.expect("Failed to write test file");
// Compress
let input = File::open("test.txt")?;
let output = File::create("test.zst")?;
let mut encoder = zstd::stream::Encoder::new(output, 3)?;
io::copy(&mut io::BufReader::new(input), &mut encoder)?;
encoder.finish()?;
// Decompress
decompress_file("test.zst", "test_recovered.txt")?;
// Verify
let original = std::fs::read_to_string("test.txt")?;
let recovered = std::fs::read_to_string("test_recovered.txt")?;
assert_eq!(original, recovered);
println!("Round-trip successful!");
// Cleanup
std::fs::remove_file("test.txt")?;
std::fs::remove_file("test.zst")?;
std::fs::remove_file("test_recovered.txt")?;
Ok(())
}use std::io::{self, Cursor, Read, Write};
use zstd::stream::{Encoder, Decoder};
fn compress_to_memory(data: &[u8], level: i32) -> io::Result<Vec<u8>> {
let mut buffer = Vec::new();
{
let mut encoder = Encoder::new(&mut buffer, level)?;
encoder.write_all(data)?;
encoder.finish()?;
}
Ok(buffer)
}
fn decompress_from_memory(compressed: &[u8]) -> io::Result<Vec<u8>> {
let mut decoder = Decoder::new(Cursor::new(compressed))?;
let mut decompressed = Vec::new();
decoder.read_to_end(&mut decompressed)?;
Ok(decompressed)
}
fn main() -> io::Result<()> {
let data = b"This is some data to compress in memory.";
let compressed = compress_to_memory(data, 3)?;
println!("Compressed: {} bytes", compressed.len());
let decompressed = decompress_from_memory(&compressed)?;
println!("Decompressed: {} bytes", decompressed.len());
assert_eq!(data.to_vec(), decompressed);
println!("Round-trip successful!");
Ok(())
}use std::io::{self, Read, Write};
use zstd::stream::{Encoder, Decoder};
fn compress_chunks(
chunks: &[Vec<u8>],
level: i32,
) -> io::Result<Vec<u8>> {
let mut output = Vec::new();
{
let mut encoder = Encoder::new(&mut output, level)?;
for chunk in chunks {
encoder.write_all(chunk)?;
}
encoder.finish()?;
}
Ok(output)
}
fn decompress_to_chunks(
compressed: &[u8],
chunk_size: usize,
) -> io::Result<Vec<Vec<u8>>> {
let mut decoder = Decoder::new(std::io::Cursor::new(compressed))?;
let mut chunks = Vec::new();
loop {
let mut chunk = vec![0u8; chunk_size];
match decoder.read(&mut chunk) {
Ok(0) => break, // EOF
Ok(n) => {
chunk.truncate(n);
chunks.push(chunk);
}
Err(e) => return Err(e),
}
}
Ok(chunks)
}
fn main() -> io::Result<()> {
let chunks = vec![
b"First chunk of data. ".to_vec(),
b"Second chunk here. ".to_vec(),
b"Third and final chunk.".to_vec(),
];
let compressed = compress_chunks(&chunks, 3)?;
println!("Compressed {} chunks into {} bytes", chunks.len(), compressed.len());
let decompressed = decompress_to_chunks(&compressed, 10)?;
println!("Decompressed into {} chunks", decompressed.len());
// Reassemble
let original: Vec<u8> = chunks.iter().flatten().copied().collect();
let recovered: Vec<u8> = decompressed.iter().flatten().copied().collect();
assert_eq!(original, recovered);
Ok(())
}use zstd::{compress, decompress};
// Dictionary compression works best with similar, small data
fn create_training_data() -> Vec<u8> {
// Sample data that represents the type of data you'll compress
let samples = [
b"User ID: 1, Name: Alice, Email: alice@example.com".as_slice(),
b"User ID: 2, Name: Bob, Email: bob@example.com".as_slice(),
b"User ID: 3, Name: Charlie, Email: charlie@example.com".as_slice(),
b"User ID: 4, Name: Diana, Email: diana@example.com".as_slice(),
];
samples.iter().flat_map(|s| s.iter().copied()).collect()
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
// In practice, you'd train a dictionary using zstd CLI:
// zstd --train samples/* -o dict.bin
let sample_data = create_training_data();
// Without dictionary
let small_data = b"User ID: 5, Name: Eve, Email: eve@example.com";
let compressed_normal = compress(small_data, 3)?;
println!("Small data: {} bytes", small_data.len());
println!("Without dict: {} bytes", compressed_normal.len());
// Note: Dictionary training is complex; see zstd::dict module
// This shows the concept; real implementation requires
// training a dictionary from sample data
println!("\nNote: For dictionary compression, train a dictionary");
println!("from sample data using zstd CLI or the dict module.");
Ok(())
}use std::sync::mpsc;
use std::thread;
use zstd::compress;
struct Chunk {
index: usize,
data: Vec<u8>,
}
struct CompressedChunk {
index: usize,
data: Vec<u8>,
}
fn parallel_compress(
data: &[u8],
chunk_size: usize,
level: i32,
num_workers: usize,
) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
let chunks: Vec<_> = data
.chunks(chunk_size)
.enumerate()
.map(|(i, chunk)| Chunk {
index: i,
data: chunk.to_vec(),
})
.collect();
let (sender, receiver) = mpsc::channel();
let (done_tx, done_rx) = mpsc::channel();
// Spawn workers
for _ in 0..num_workers {
let sender = sender.clone();
let done_tx = done_tx.clone();
thread::spawn(move || {
while let Ok(chunk) = receiver.recv() {
let compressed = compress(&chunk.data, level).unwrap();
let result = CompressedChunk {
index: chunk.index,
data: compressed,
};
if sender.send(result).is_err() {
break;
}
}
drop(done_tx);
});
}
drop(done_tx); // Drop our reference
// Send chunks to workers
let sender_clone = sender.clone();
thread::spawn(move || {
for chunk in chunks {
if sender_clone.send(chunk).is_err() {
break;
}
}
});
// Collect results
drop(sender); // Drop so channel closes properly
let mut results: Vec<CompressedChunk> = Vec::new();
while let Ok(chunk) = sender.recv() {
results.push(chunk);
}
// Wait for workers to finish
while done_rx.recv().is_ok() {}
// Sort by index and concatenate
results.sort_by_key(|c| c.index);
// Simple concatenation (not a proper format)
let mut output = Vec::new();
for chunk in results {
output.extend_from_slice(&chunk.data);
}
Ok(output)
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
// Large data for parallel processing
let data: Vec<u8> = (0..100_000).map(|i| (i % 256) as u8).collect();
println!("Compressing {} bytes with 4 workers...", data.len());
let _compressed = parallel_compress(&data, 10000, 3, 4)?;
println!("Parallel compression complete!");
Ok(())
}use std::io::{self, Cursor};
use zstd::stream::{Encoder, Decoder};
use zstd::encode_stream::Options;
fn main() -> io::Result<()> {
let data = b"Data to compress with custom options.";
// Create custom options
let mut options = Options::new();
options.set_compression_level(15); // Higher compression
options.set_window_log(23); // 8MB window
options.set_workers(4); // Multi-threaded compression (requires feature)
// Compress with custom options
let mut output = Vec::new();
{
let mut encoder = Encoder::with_options(&mut output, options)?;
encoder.write_all(data)?;
encoder.finish()?;
}
println!("Compressed with custom options: {} bytes", output.len());
// Decompress normally
let mut decoder = Decoder::new(Cursor::new(&output))?;
let mut decompressed = Vec::new();
io::copy(&mut decoder, &mut Cursor::new(&mut decompressed))?;
println!("Decompressed: {} bytes", decompressed.len());
Ok(())
}use std::io::{self, Read, Write};
use zstd::stream::{Encoder, Decoder};
fn compress_with_checksum(data: &[u8], level: i32) -> io::Result<Vec<u8>> {
let mut output = Vec::new();
{
let mut encoder = Encoder::new(&mut output, level)?;
// Enable content checksums
encoder.include_checksum(true)?;
encoder.write_all(data)?;
encoder.finish()?;
}
Ok(output)
}
fn decompress_and_verify(compressed: &[u8]) -> io::Result<Vec<u8>> {
let mut decoder = Decoder::new(std::io::Cursor::new(compressed))?;
let mut decompressed = Vec::new();
decoder.read_to_end(&mut decompressed)?;
Ok(decompressed)
}
fn main() -> io::Result<()> {
let data = b"Important data that needs integrity checking.";
let compressed = compress_with_checksum(data, 3)?;
println!("Compressed with checksum: {} bytes", compressed.len());
// Corruption would be detected during decompression
let decompressed = decompress_and_verify(&compressed)?;
assert_eq!(data.to_vec(), decompressed);
println!("Checksum verified successfully!");
Ok(())
}use zstd::{compress, decompress};
use std::collections::HashMap;
#[derive(Debug, Clone)]
struct Record {
id: u64,
name: String,
email: String,
data: Vec<u8>,
}
impl Record {
fn to_bytes(&self) -> Vec<u8> {
let mut bytes = Vec::new();
bytes.extend_from_slice(&self.id.to_le_bytes());
bytes.extend_from_slice(&(self.name.len() as u16).to_le_bytes());
bytes.extend_from_slice(self.name.as_bytes());
bytes.extend_from_slice(&(self.email.len() as u16).to_le_bytes());
bytes.extend_from_slice(self.email.as_bytes());
bytes.extend_from_slice(&self.data);
bytes
}
fn from_bytes(bytes: &[u8]) -> Self {
let id = u64::from_le_bytes(bytes[0..8].try_into().unwrap());
let name_len = u16::from_le_bytes(bytes[8..10].try_into().unwrap()) as usize;
let name = String::from_utf8_lossy(&bytes[10..10+name_len]).into_owned();
let email_start = 10 + name_len;
let email_len = u16::from_le_bytes(
bytes[email_start..email_start+2].try_into().unwrap()
) as usize;
let email = String::from_utf8_lossy(
&bytes[email_start+2..email_start+2+email_len]
).into_owned();
let data_start = email_start + 2 + email_len;
let data = bytes[data_start..].to_vec();
Self { id, name, email, data }
}
}
struct CompressedStore {
records: HashMap<u64, Vec<u8>>, // id -> compressed bytes
level: i32,
}
impl CompressedStore {
fn new(level: i32) -> Self {
Self {
records: HashMap::new(),
level,
}
}
fn put(&mut self, record: Record) -> Result<(), Box<dyn std::error::Error>> {
let bytes = record.to_bytes();
let compressed = compress(&bytes, self.level)?;
self.records.insert(record.id, compressed);
Ok(())
}
fn get(&self, id: u64) -> Result<Option<Record>, Box<dyn std::error::Error>> {
if let Some(compressed) = self.records.get(&id) {
let bytes = decompress(compressed, 0)?; // 0 = auto-detect size
Ok(Some(Record::from_bytes(&bytes)))
} else {
Ok(None)
}
}
fn stats(&self) -> (usize, usize) {
let compressed_size: usize = self.records.values().map(|v| v.len()).sum();
(self.records.len(), compressed_size)
}
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut store = CompressedStore::new(10);
store.put(Record {
id: 1,
name: "Alice".to_string(),
email: "alice@example.com".to_string(),
data: vec![1, 2, 3, 4, 5],
})?;
store.put(Record {
id: 2,
name: "Bob".to_string(),
email: "bob@example.com".to_string(),
data: vec![10, 20, 30, 40, 50],
})?;
let record = store.get(1)?.unwrap();
println!("Retrieved: {:?}", record);
let (count, size) = store.stats();
println!("Store: {} records, {} compressed bytes", count, size);
Ok(())
}use std::io::{self, Cursor, Read, Write};
use zstd::stream::{Encoder, Decoder};
struct CompressedStream {
level: i32,
}
impl CompressedStream {
fn new(level: i32) -> Self {
Self { level }
}
fn compress_message(&self, message: &[u8]) -> io::Result<Vec<u8>> {
let mut output = Vec::new();
{
let mut encoder = Encoder::new(&mut output, self.level)?;
encoder.write_all(message)?;
encoder.finish()?;
}
Ok(output)
}
fn decompress_message(&self, compressed: &[u8]) -> io::Result<Vec<u8>> {
let mut decoder = Decoder::new(Cursor::new(compressed))?;
let mut output = Vec::new();
decoder.read_to_end(&mut output)?;
Ok(output)
}
}
fn main() -> io::Result<()> {
let stream = CompressedStream::new(3);
// Simulate network messages
let messages = [
b"GET /api/users HTTP/1.1\r\nHost: example.com\r\n\r\n".as_slice(),
b"POST /api/data HTTP/1.1\r\nHost: example.com\r\nContent-Length: 100\r\n\r\n".as_slice(),
];
for msg in messages {
let compressed = stream.compress_message(msg)?;
let decompressed = stream.decompress_message(&compressed)?;
println!("Original: {} bytes -> Compressed: {} bytes",
msg.len(), compressed.len());
assert_eq!(msg.to_vec(), decompressed);
}
println!("All messages compressed and decompressed successfully!");
Ok(())
}use std::time::Instant;
use zstd::{compress, decompress};
fn benchmark_compression(data: &[u8], level: i32) -> (f64, f64, usize) {
let start = Instant::now();
let compressed = compress(data, level).unwrap();
let compress_time = start.elapsed().as_secs_f64();
let start = Instant::now();
decompress(&compressed, data.len()).unwrap();
let decompress_time = start.elapsed().as_secs_f64();
(compress_time, decompress_time, compressed.len())
}
fn main() {
// Test data
let mut data = Vec::new();
for i in 0..100_000 {
data.push((i % 256) as u8);
}
println!("Benchmark on {} bytes:", data.len());
println!("\n{:8} {:10} {:10} {:10} {:10}",
"Level", "Compress", "Decompress", "Size", "Ratio%");
println!("{:8} {:10} {:10} {:10} {:10}",
"-----", "--------", "----------", "----", "------");
for level in [1, 3, 6, 9, 12, 15, 19, 22] {
let (compress_time, decompress_time, size) =
benchmark_compression(&data, level);
let ratio = size as f64 / data.len() as f64 * 100.0;
println!("{:8} {:8.3}s {:9.3}s {:10} {:9.1}%",
level, compress_time, decompress_time, size, ratio);
}
}use std::fs::{self, File};
use std::io::{self, BufReader, BufWriter, Read, Write};
use std::path::Path;
use zstd::stream::Encoder;
struct ArchiveHeader {
file_count: u32,
}
struct FileEntry {
name_len: u16,
original_size: u64,
compressed_size: u64,
}
fn create_archive(
files: &[&Path],
output_path: &Path,
level: i32,
) -> io::Result<()> {
let output = File::create(output_path)?;
let mut encoder = Encoder::new(BufWriter::new(output), level)?;
// Write header
let header = ArchiveHeader {
file_count: files.len() as u32,
};
encoder.write_all(&header.file_count.to_le_bytes())?;
for file_path in files {
let metadata = fs::metadata(file_path)?;
let name = file_path.file_name().unwrap().to_str().unwrap();
// Write file entry metadata
let entry = FileEntry {
name_len: name.len() as u16,
original_size: metadata.len(),
compressed_size: 0, // Unknown until compressed
};
encoder.write_all(&entry.name_len.to_le_bytes())?;
encoder.write_all(&entry.original_size.to_le_bytes())?;
encoder.write_all(name.as_bytes())?;
// Write file content
let mut input = File::open(file_path)?;
io::copy(&mut input, &mut encoder)?;
}
encoder.finish()?;
Ok(())
}
fn main() -> io::Result<()> {
// Create test files
fs::write("file1.txt", "Content of file 1\n".repeat(100))?;
fs::write("file2.txt", "Content of file 2\n".repeat(100))?;
// Create archive
let files = vec![Path::new("file1.txt"), Path::new("file2.txt")];
create_archive(&files, Path::new("archive.zst"), 10)?;
let archive_size = fs::metadata("archive.zst")?.len();
println!("Created archive: {} bytes", archive_size);
// Cleanup
fs::remove_file("file1.txt")?;
fs::remove_file("file2.txt")?;
fs::remove_file("archive.zst")?;
Ok(())
}use std::io::{self, Cursor, Read, Write};
use zstd::stream::{Encoder as ZstdEncoder, Decoder as ZstdDecoder};
#[cfg(feature = "gzip_comparison")]
fn compare_with_gzip() {
// This would require flate2 crate
// Showing concept only
}
fn main() -> io::Result<()> {
let data: Vec<u8> = (0..100_000)
.map(|i| (i % 256) as u8)
.collect();
println!("Comparing compression methods:");
println!("Original: {} bytes", data.len());
// Zstd
let mut zstd_output = Vec::new();
{
let mut encoder = ZstdEncoder::new(&mut zstd_output, 3)?;
encoder.write_all(&data)?;
encoder.finish()?;
}
println!("Zstd level 3: {} bytes ({:.1}%)",
zstd_output.len(),
zstd_output.len() as f64 / data.len() as f64 * 100.0);
// Decompression speed comparison
use std::time::Instant;
let start = Instant::now();
let mut decoder = ZstdDecoder::new(Cursor::new(&zstd_output))?;
let mut decompressed = Vec::new();
decoder.read_to_end(&mut decompressed)?;
let zstd_decompress_time = start.elapsed();
println!("Zstd decompression: {:?}", zstd_decompress_time);
Ok(())
}use zstd::{compress, decompress};
use std::io::{self, Read};
use zstd::stream::Decoder;
fn safe_decompress(compressed: &[u8]) -> Result<Vec<u8>, String> {
// Method 1: Known size
let decompressed = decompress(compressed, 1024)
.map_err(|e| format!("Decompression failed: {}", e))?;
Ok(decompressed)
}
fn streaming_decompress(compressed: &[u8]) -> io::Result<Vec<u8>> {
let mut decoder = Decoder::new(std::io::Cursor::new(compressed))?;
let mut output = Vec::new();
decoder.read_to_end(&mut output)?;
Ok(output)
}
fn is_valid_zstd(data: &[u8]) -> bool {
// Zstd magic number: 0xFD2FB528
if data.len() < 4 {
return false;
}
let magic = u32::from_le_bytes([data[0], data[1], data[2], data[3]]);
magic == 0xFD2FB528
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
// Test data
let data = b"Test data";
let compressed = compress(data, 3)?;
// Verify format
println!("Valid Zstd: {}", is_valid_zstd(&compressed));
// Safe decompression
match safe_decompress(&compressed) {
Ok(decompressed) => println!("Decompressed: {} bytes", decompressed.len()),
Err(e) => eprintln!("Error: {}", e),
}
// Corrupted data
let mut corrupted = compressed.clone();
if !corrupted.is_empty() {
corrupted[0] = 0;
}
match safe_decompress(&corrupted) {
Ok(_) => println!("Unexpected success"),
Err(e) => println!("Correctly detected corruption: {}", e),
}
Ok(())
}zstd::compress(data, level) for simple in-memory compressionzstd::decompress(data, max_size) for simple decompressionEncoder::new(writer, level) for streaming compressionDecoder::new(reader) for streaming decompressionencoder.finish() to finalize compressed outputencoder.include_checksum(true)Options::set_workers()