Loading pageā¦
Rust walkthroughs
Loading pageā¦
bytes::Bytes differ from Vec<u8> and when would you choose one over the other for network programming?Vec<u8> is Rust's standard heap-allocated byte vector, while bytes::Bytes is a reference-counted byte buffer optimized for network programming. The key difference lies in how they handle ownership and cloningāBytes enables cheap clones through reference counting, making it ideal for scenarios where the same data needs to be shared across multiple owners.
Vec<u8> owns its data uniquely:
fn vec_basics() {
let mut data: Vec<u8> = vec![1, 2, 3, 4, 5];
data.push(6); // Can mutate
data[0] = 100; // Direct indexing
// Clone creates a full copy
let copy = data.clone();
// Both own separate allocations
println!("Original: {:?}", data); // [100, 2, 3, 4, 5, 6]
println!("Copy: {:?}", copy); // [1, 2, 3, 4, 5, 6]
}Each Vec<u8> owns its heap allocation, and cloning copies all the data.
Bytes uses atomic reference counting to share data:
use bytes::Bytes;
fn bytes_basics() {
let data: Bytes = Bytes::from(vec![1, 2, 3, 4, 5]);
// Clone is O(1) - just increments reference count
let copy1 = data.clone();
let copy2 = data.clone();
// All three point to the same underlying data
println!("Original: {:?}", data); // [1, 2, 3, 4, 5]
println!("Copy1: {:?}", copy1); // [1, 2, 3, 4, 5]
println!("Copy2: {:?}", copy2); // [1, 2, 3, 4, 5]
// No heap allocation for clones
}Cloning Bytes only increments an atomic counterāno data is copied.
use bytes::Bytes;
fn clone_performance() {
let big_data: Vec<u8> = vec![0u8; 10_000_000]; // 10 MB
// Vec clone: copies 10 MB
let start = std::time::Instant::now();
let vec_copy = big_data.clone();
println!("Vec clone: {:?}", start.elapsed()); // Milliseconds
// Bytes clone: copies 0 bytes
let bytes_data: Bytes = Bytes::from(big_data);
let start = std::time::Instant::now();
let bytes_copy = bytes_data.clone();
println!("Bytes clone: {:?}", start.elapsed()); // Nanoseconds
// The difference grows with data size
}For large buffers, Bytes::clone is dramatically faster than Vec::clone.
use bytes::Bytes;
fn slicing() {
let data: Bytes = Bytes::from("hello world");
// slice creates a view into the same data
let slice = data.slice(0..5);
println!("Slice: {:?}", slice); // "hello"
println!("Original: {:?}", data); // "hello world"
// Both share the same underlying allocation
// slice just has different offset/length
let nested = slice.slice(2..4);
println!("Nested: {:?}", nested); // "ll"
// Still same allocation, just different view
}Bytes::slice creates a new view without copying data, unlike Vec slicing which requires ownership.
fn vec_slicing() {
let data: Vec<u8> = b"hello world".to_vec();
// Slicing creates a reference, not owned data
let slice: &[u8] = &data[0..5];
// To get owned data, must copy
let owned: Vec<u8> = data[0..5].to_vec();
// This copies 5 bytes
// If you need to pass owned data elsewhere, you must clone
}
fn vec_slice_lifetime() {
// Vec slices are borrowed
let slice: &[u8];
{
let data = b"hello".to_vec();
slice = &data[0..2]; // Error: data does not live long enough
}
// slice is invalid here
}Vec slices are borrowed references with lifetime constraints; Bytes slices are owned.
use bytes::Bytes;
fn bytes_internals() {
// Bytes is conceptually:
// struct Bytes {
// ptr: *const u8,
// len: usize,
// // Reference to shared ownership
// data: Arc<SharedData>,
// }
let original = Bytes::from(vec![1, 2, 3, 4, 5]);
let clone = original.clone();
let slice = original.slice(1..4);
// All three share:
// - Same heap allocation [1, 2, 3, 4, 5]
// - Same Arc reference count
// They differ only in:
// - ptr offset
// - len
drop(original);
drop(clone);
// Data still alive because slice holds reference
drop(slice);
// Now the allocation can be freed
}Bytes combines pointer arithmetic with reference counting for efficient sharing.
use bytes::Bytes;
use std::sync::Arc;
// Simulating a broadcast scenario
fn broadcast_example() {
let message: Bytes = Bytes::from("important broadcast message");
// Same message to 1000 clients
for _ in 0..1000 {
let client_message = message.clone(); // O(1)
// send_to_client(client_message);
}
// With Vec<u8>, this would copy 1000 times
}
// Simulating Vec approach
fn broadcast_vec() {
let message: Vec<u8> = b"important broadcast message".to_vec();
// Same message to 1000 clients
for _ in 0..1000 {
let client_message = message.clone(); // Copies all bytes
// send_to_client(client_message);
}
// 1000x memory and CPU cost
}Broadcasting data to multiple consumers is where Bytes excels.
use bytes::Bytes;
use std::collections::HashMap;
use std::sync::{Arc, Mutex};
struct ResponseCache {
cache: Mutex<HashMap<String, Bytes>>,
}
impl ResponseCache {
fn new() -> Self {
Self {
cache: Mutex::new(HashMap::new()),
}
}
fn get_or_insert(&self, path: String, generator: impl FnOnce() -> Bytes) -> Bytes {
let mut cache = self.cache.lock().unwrap();
cache.entry(path)
.or_insert_with(generator)
.clone() // O(1) clone
}
}
fn cached_responses() {
let cache = Arc::new(ResponseCache::new());
// First request generates the response
let response1 = cache.get_or_insert(
"/api/data".to_string(),
|| Bytes::from(generate_expensive_response())
);
// Subsequent requests just clone the Bytes reference
let response2 = cache.get_or_insert(
"/api/data".to_string(),
|| Bytes::from(generate_expensive_response()) // Not called
);
// Both responses share the same underlying data
}
fn generate_expensive_response() -> Vec<u8> {
// Expensive computation...
b"{\"data\": \"value\"}".to_vec()
}The cache stores Bytes, allowing multiple requests to share the same data.
use bytes::{Bytes, Buf};
fn reading_from_bytes() {
let mut data = Bytes::from(&b"\x01\x02\x03\x04\x05\x06\x07\x08"[..]);
// Buf trait provides reading methods
let first_byte = data.get_u8(); // 0x01
let second_byte = data.get_u8(); // 0x02
let two_bytes = data.get_u16(); // 0x0304
// Reading advances the internal cursor
// But doesn't consume the Bytes
println!("Remaining: {:?}", data); // [5, 6, 7, 8]
// Can also use slice-like access
let data2 = Bytes::from(&b"hello"[..]);
println!("As string: {:?}", std::str::from_utf8(&data2).unwrap());
}The Buf trait provides convenient methods for parsing binary data.
use bytes::{BytesMut, BufMut};
fn writing_to_bytes() {
let mut buffer = BytesMut::with_capacity(64);
// Write various types
buffer.put_u8(0x01);
buffer.put_u16(0x0203);
buffer.put_u32(0x04050607);
buffer.put_slice(b"hello");
println!("Written: {:?}", buffer);
// Freeze into immutable Bytes
let frozen: bytes::Bytes = buffer.freeze();
// frozen is immutable and shareable
let shared = frozen.clone(); // O(1)
}BytesMut is the mutable builder for Bytes, allowing efficient construction.
use bytes::{BytesMut, BufMut};
fn growable_buffer() {
let mut buffer = BytesMut::new();
// Grows automatically
buffer.put_slice(b"hello");
buffer.put_slice(b" ");
buffer.put_slice(b"world");
println!("Content: {:?}", buffer); // "hello world"
println!("Capacity: {}", buffer.capacity());
// Can reserve capacity for efficiency
buffer.reserve(1024);
buffer.put_slice(&[0u8; 1024]);
}BytesMut grows like Vec but can freeze into shareable Bytes.
use bytes::{Bytes, Buf};
fn parse_message(mut data: Bytes) -> Option<Message> {
if data.len() < 4 {
return None;
}
let length = data.get_u32() as usize;
if data.len() < length {
return None;
}
let message_type = data.get_u8();
let payload = data.split_to(length - 1);
Some(Message {
message_type,
payload, // Bytes, cheap to pass around
})
}
struct Message {
message_type: u8,
payload: Bytes,
}
fn handle_messages() {
let raw = Bytes::from(&b"\x00\x00\x00\x05\x01test"[..]);
if let Some(message) = parse_message(raw) {
println!("Type: {}", message.message_type);
println!("Payload: {:?}", message.payload);
}
}Bytes is well-suited for protocol parsing where different parts of a message need to be passed around.
use bytes::{Bytes, BytesMut, BufMut};
// With Vec<u8>
struct VecProtocol {
buffer: Vec<u8>,
}
impl VecProtocol {
fn read_message(&mut self) -> Option<(u8, Vec<u8>)> {
// Must copy payload into new Vec
let message_type = self.buffer[0];
let payload = self.buffer[1..].to_vec(); // Copy!
Some((message_type, payload))
}
}
// With Bytes
struct BytesProtocol {
buffer: Bytes,
}
impl BytesProtocol {
fn read_message(&mut self) -> Option<(u8, Bytes)> {
let message_type = self.buffer[0];
// Split without copying
let mut payload = self.buffer.clone();
payload.advance(1); // Skip type byte
Some((message_type, payload)) // No copy!
}
}The Bytes version avoids copying the payload.
use bytes::{Bytes, BytesMut};
fn mutation_scenario() {
// Bytes is immutable - can't modify in place
let data = Bytes::from("hello");
// This won't compile:
// data[0] = b'H';
// Must use BytesMut for mutation
let mut mutable = BytesMut::from("hello");
mutable[0] = b'H';
// Or convert to Vec, modify, convert back
let mut vec = data.to_vec();
vec[0] = b'H';
let data = Bytes::from(vec);
}If you need frequent mutations, Vec<u8> or BytesMut is better.
use bytes::Bytes;
fn small_data() {
// For very small data, Bytes overhead might not be worth it
let small: Vec<u8> = vec![1, 2, 3];
// Bytes has Arc overhead
let bytes: Bytes = Bytes::from(small.clone());
// For tiny amounts, Vec clone is fast anyway
let vec_copy = small.clone();
let bytes_copy = bytes.clone();
// The overhead of Arc may exceed copy cost for tiny data
// Rule of thumb: Bytes shines for data > ~1KB
}For small amounts of data, Vec<u8> may be simpler and equally efficient.
use bytes::Bytes;
use tokio::net::TcpStream;
use tokio::io::{AsyncReadExt, AsyncWriteExt};
async fn network_io() -> std::io::Result<()> {
let mut stream = TcpStream::connect("127.0.0.1:8080").await?;
// Read into BytesMut
let mut buffer = bytes::BytesMut::with_capacity(1024);
stream.read_buf(&mut buffer).await?;
// Freeze to share without copying
let received = buffer.freeze();
// Can share with multiple handlers
let handler1_data = received.clone();
let handler2_data = received.clone();
// Write Bytes directly
stream.write_all(&received).await?;
Ok(())
}Tokio and other async runtimes integrate well with Bytes.
use bytes::Bytes;
use hyper::{Body, Request, Response};
// hyper::Body wraps Bytes
async fn handle_request(req: Request<Body>) -> Response<Body> {
// Collect body into Bytes
let body_bytes = hyper::body::to_bytes(req.into_body())
.await
.unwrap();
// Share the same bytes in response
let response_bytes = body_bytes.clone();
Response::new(Body::from(response_bytes))
}Hyper uses Bytes internally for efficient request/response handling.
use bytes::{Bytes, Buf};
fn parse_headers(mut data: Bytes) -> Headers {
let mut headers = Headers::new();
while data.len() >= 2 {
let name_len = data.get_u8() as usize;
let name = data.split_to(name_len);
let value_len = data.get_u8() as usize;
let value = data.split_to(value_len);
// name and value are Bytes views into original
// No copying of actual data
headers.insert(name, value);
data.advance(1); // Skip delimiter
}
headers
}
struct Headers {
data: Vec<(Bytes, Bytes)>,
}
impl Headers {
fn new() -> Self {
Self { data: Vec::new() }
}
fn insert(&mut self, name: Bytes, value: Bytes) {
self.data.push((name, value));
}
}Header names and values are views into the original buffer without copying.
use bytes::Bytes;
use std::mem::size_of;
fn memory_layout() {
// Vec<u8> is three pointers
println!("Vec<u8> size: {} bytes", size_of::<Vec<u8>>()); // 24 bytes
// Bytes is similar but with Arc
println!("Bytes size: {} bytes", size_of::<Bytes>()); // ~32 bytes
// The size difference is negligible
// The benefit is in clone behavior
}The size overhead is minimal; the benefit is in avoiding data copies.
use bytes::{Bytes, BytesMut, BufMut};
// Use Vec<u8> when:
// - You need to mutate the data frequently
// - Data is small (< 1KB)
// - Only one owner ever needs the data
// - Simple, non-network code
fn vec_appropriate() {
let mut buffer = Vec::new();
// Building small, temporary data
buffer.extend_from_slice(b"status: ");
buffer.push(200_u8);
buffer.extend_from_slice(b"\n");
// Only used once, no sharing needed
process(&buffer);
}
fn process(data: &[u8]) {}
// Use Bytes when:
// - Same data shared across multiple owners
// - Network protocols and I/O
// - Parsing without copying
// - Working with async runtimes
// - Data is moderate to large
fn bytes_appropriate() {
// Parse a network message
let message = Bytes::from(receive_from_network());
// Extract header for logging
let header = message.slice(0..4);
// Extract body for processing
let body = message.slice(4..);
// Both header and body share the original allocation
log_header(header);
process_body(body);
// Send same data to multiple recipients
broadcast(message);
}
fn receive_from_network() -> Vec<u8> {
vec![0u8; 1024]
}
fn log_header(header: Bytes) {}
fn process_body(body: Bytes) {}
fn broadcast(data: Bytes) {}
// Use BytesMut when:
// - Building Bytes incrementally
// - Need growable buffer for network writes
fn bytesmut_appropriate() {
let mut buffer = BytesMut::with_capacity(1024);
// Build message incrementally
buffer.put_u32(0x12345678);
buffer.put_slice(b"hello");
buffer.put_u8(0);
// Freeze when done building
let message = buffer.freeze();
// Now shareable
send_message(message.clone());
send_message(message);
}
fn send_message(msg: Bytes) {}The key differences between Bytes and Vec<u8>:
Ownership and Cloning:
Vec<u8>: Unique ownership, clone copies all dataBytes: Shared ownership via Arc, clone is O(1)Mutation:
Vec<u8>: Mutable, can modify in placeBytes: Immutable, use BytesMut for buildingSlicing:
Vec<u8> slices are borrowed references with lifetimesBytes slices are owned, can be passed anywhereUse Vec<u8> when:
Use Bytes when:
Use BytesMut when:
Bytes incrementallyIn network programming, Bytes is almost always preferred for received data that needs to be parsed, shared, or passed to multiple handlers. The O(1) clone operation enables patterns that would be prohibitively expensive with Vec<u8>.