Loading page…
Rust walkthroughs
Loading page…
dashmap::DashSet compare to std::sync::Arc<std::collections::HashSet> for concurrent set operations?DashSet provides a concurrent hash set implementation using sharded internal locking, allowing multiple threads to operate on different keys simultaneously without contention, while Arc<HashSet> requires external synchronization through a Mutex or RwLock that blocks all threads during any modification. The fundamental difference is concurrency granularity: DashSet partitions the set into multiple shards, each with its own lock, so threads accessing different shards can proceed in parallel. Arc<Mutex<HashSet>> uses a single lock for the entire set, forcing all threads to serialize access even when operating on unrelated elements. This makes DashSet significantly more scalable under concurrent write loads, while Arc<RwLock<HashSet>> can handle concurrent reads but still serializes all writes.
use dashmap::DashSet;
use std::sync::Arc;
use std::thread;
fn main() {
// DashSet: built for concurrency
let dash_set = Arc::new(DashSet::new());
let mut handles = Vec::new();
// Spawn threads that modify the set concurrently
for i in 0..4 {
let set = Arc::clone(&dash_set);
let handle = thread::spawn(move || {
for j in 0..100 {
set.insert(i * 100 + j);
}
});
handles.push(handle);
}
for handle in handles {
handle.join().unwrap();
}
println!("DashSet size: {}", dash_set.len()); // 400
}DashSet allows concurrent insertions without explicit locking.
use std::collections::HashSet;
use std::sync::{Arc, Mutex};
use std::thread;
fn main() {
// Arc<Mutex<HashSet>>: requires explicit locking
let hash_set = Arc::new(Mutex::new(HashSet::new()));
let mut handles = Vec::new();
for i in 0..4 {
let set = Arc::clone(&hash_set);
let handle = thread::spawn(move || {
for j in 0..100 {
// Must acquire lock for each operation
let mut guard = set.lock().unwrap();
guard.insert(i * 100 + j);
// Lock released when guard drops
}
});
handles.push(handle);
}
for handle in handles {
handle.join().unwrap();
}
println!("HashSet size: {}", hash_set.lock().unwrap().len());
}Arc<Mutex<HashSet>> requires explicit lock acquisition for every operation.
use dashmap::DashSet;
use std::collections::HashSet;
use std::sync::{Arc, Mutex};
use std::time::Instant;
use std::thread;
fn main() {
let iterations = 100_000;
// DashSet: fine-grained locking
let dash_set = Arc::new(DashSet::new());
let start = Instant::now();
let mut handles = Vec::new();
for i in 0..8 {
let set = Arc::clone(&dash_set);
handles.push(thread::spawn(move || {
for j in 0..iterations {
set.insert((i * iterations + j) as u64);
}
}));
}
for h in handles { h.join().unwrap(); }
println!("DashSet: {:?}", start.elapsed());
// Arc<Mutex<HashSet>>: coarse-grained locking
let hash_set = Arc::new(Mutex::new(HashSet::new()));
let start = Instant::now();
let mut handles = Vec::new();
for i in 0..8 {
let set = Arc::clone(&hash_set);
handles.push(thread::spawn(move || {
for j in 0..iterations {
set.lock().unwrap().insert((i * iterations + j) as u64);
}
}));
}
for h in handles { h.join().unwrap(); }
println!("Mutex<HashSet>: {:?}", start.elapsed());
}DashSet typically shows better performance under concurrent write contention.
use dashmap::DashSet;
fn main() {
// DashSet internally uses multiple shards
// Default is number of CPUs * 4
// Each shard has its own lock
// Keys are distributed across shards by hash
let set = DashSet::<u64>::new();
// Operations only lock the relevant shard
// Thread A inserting key 1 -> locks shard for key 1
// Thread B inserting key 2 -> locks shard for key 2
// If different shards, both proceed in parallel
set.insert(1); // Locks shard N
set.insert(2); // Locks shard M (possibly different)
// Custom shard amount
let custom_shards = DashSet::<u64>::with_shard_amount(64);
// More shards = more parallelism, more memory overhead
}DashSet shards distribute contention across multiple locks.
use std::collections::HashSet;
use std::sync::{Arc, RwLock};
use dashmap::DashSet;
use std::thread;
fn main() {
// Arc<RwLock<HashSet>> allows concurrent reads
let hash_set = Arc::new(RwLock::new(HashSet::<u64>::new()));
// Populate
{
let mut guard = hash_set.write().unwrap();
for i in 0..100 {
guard.insert(i);
}
}
// Concurrent reads work well
let mut handles = Vec::new();
for _ in 0..4 {
let set = Arc::clone(&hash_set);
handles.push(thread::spawn(move || {
let guard = set.read().unwrap();
// Multiple readers can hold read lock simultaneously
guard.contains(&50)
}));
}
// But writes still block everything
// Any writer blocks all readers
}Arc<RwLock<HashSet>> supports concurrent reads but serializes writes.
use dashmap::DashSet;
use std::thread;
fn main() {
let set = DashSet::new();
// Populate
for i in 0..1000 {
set.insert(i);
}
// DashSet reads are also sharded
// Each shard lock only affects its portion
let mut handles = Vec::new();
for _ in 0..4 {
let set = &set;
handles.push(thread::spawn(move || {
for i in 0..1000 {
set.contains(&i); // Only locks one shard briefly
}
}));
}
// DashSet doesn't distinguish read vs write
// Both use same shard locks
// But short lock duration means high throughput
}DashSet uses the same sharding for reads and writes.
use dashmap::DashSet;
use std::collections::HashSet;
use std::sync::{Arc, Mutex};
fn main() {
// DashSet: direct operations
let dash = DashSet::new();
dash.insert(1);
dash.insert(2);
let has_one = dash.contains(&1);
dash.remove(&1);
let len = dash.len();
// Arc<Mutex<HashSet>>: requires lock guard
let mutex_set = Arc::new(Mutex::new(HashSet::new()));
{
let mut guard = mutex_set.lock().unwrap();
guard.insert(1);
guard.insert(2);
} // Lock held for entire block
{
let guard = mutex_set.lock().unwrap();
let has_one = guard.contains(&1);
}
{
let mut guard = mutex_set.lock().unwrap();
guard.remove(&1);
}
// DashSet is more ergonomic for scattered operations
// No need to manage guard lifetimes
}DashSet provides direct method calls without explicit lock management.
use dashmap::DashSet;
use std::collections::HashSet;
use std::sync::{Arc, Mutex};
fn main() {
// DashSet iteration
let dash = DashSet::new();
for i in 0..10 {
dash.insert(i);
}
// Iteration yields references, but the set remains accessible
for item in dash.iter() {
println!("Item: {}", item);
// Other threads can still access different shards
}
// Arc<Mutex<HashSet>> iteration
let mutex_set = Arc::new(Mutex::new(HashSet::new()));
for i in 0..10 {
mutex_set.lock().unwrap().insert(i);
}
{
let guard = mutex_set.lock().unwrap();
for item in guard.iter() {
println!("Item: {}", item);
// Lock held for entire iteration!
// No other thread can access the set
}
}
}DashSet iteration only locks one shard at a time; Mutex iteration holds the lock throughout.
use dashmap::DashSet;
use std::mem::size_of_val;
fn main() {
let dash = DashSet::<u64>::new();
let mutex_set = std::sync::Mutex::new(std::collections::HashSet::<u64>::new());
// DashSet has overhead for:
// - Multiple shard vectors
// - Multiple locks (one per shard)
// - Internal synchronization state
// Mutex<HashSet> has:
// - Single lock
// - Single hash table
// DashSet trades memory for concurrency
// More shards = more parallelism = more memory
let custom_shards = DashSet::<u64>::with_shard_amount(256);
// 256 shards = 256 locks + 256 hash tables
}DashSet has higher memory overhead due to sharding.
use dashmap::DashSet;
use std::thread;
use std::time::Instant;
fn main() {
// High contention write scenario
let set = DashSet::new();
let start = Instant::now();
// Many threads, many writes
let handles: Vec<_> = (0..16)
.map(|_| {
thread::spawn(|| {
let set = DashSet::new();
for i in 0..10_000 {
set.insert(i);
}
set
})
})
.collect();
// DashSet excels when:
// - Multiple writers
// - Random access patterns
// - Keys distributed across shards
// - Need atomic operations
println!("Completed in {:?}", start.elapsed());
}DashSet excels with concurrent writes and random access patterns.
use std::collections::HashSet;
use std::sync::{Arc, Mutex};
fn main() {
// Low contention or single-threaded scenarios
// Scenario 1: Read-heavy with infrequent writes
// RwLock<HashSet> would be better here
// Scenario 2: Batch operations where lock once is efficient
let set = Arc::new(Mutex::new(HashSet::new()));
// Batch insert - acquire lock once
{
let mut guard = set.lock().unwrap();
for i in 0..1000 {
guard.insert(i);
}
} // Single lock/unlock
// Scenario 3: Need consistent view of entire set
{
let guard = set.lock().unwrap();
// Guaranteed no modifications during this block
let snapshot: Vec<_> = guard.iter().copied().collect();
// Process snapshot consistently
}
// Scenario 4: Minimal memory overhead needed
// Mutex<HashSet> has lower overhead
}Mutex<HashSet> can be better for batch operations or when needing consistent snapshots.
use dashmap::DashSet;
fn main() {
let set = DashSet::new();
// DashSet provides atomic operations
set.insert(1); // Atomic insert
// Conditional operations
if !set.contains(&1) {
set.insert(1); // Race condition possible!
// But individual operations are atomic
}
// Better: use DashSet's atomic semantics
let inserted = set.insert(2); // Returns true if new
// For compound atomic operations, DashSet has limited support
// You'd need to use DashMap with unit values for more complex atomic ops
set.remove(&1); // Returns Option<T>
}Individual DashSet operations are atomic, but compound operations need care.
use std::collections::HashSet;
use std::sync::{Arc, Mutex};
fn main() {
// Arc<Mutex<HashSet>> provides easy consistent snapshots
let set = Arc::new(Mutex::new(HashSet::new()));
// Populate
for i in 0..100 {
set.lock().unwrap().insert(i);
}
// Consistent snapshot - no modifications during iteration
let snapshot: Vec<_> = {
let guard = set.lock().unwrap();
guard.iter().copied().collect()
};
// DashSet doesn't provide global consistent snapshots
// Each shard is independently locked
use dashmap::DashSet;
let dash = DashSet::new();
for i in 0..100 {
dash.insert(i);
}
// No way to lock all shards atomically
// Snapshot would be "eventually consistent"
let partial: Vec<_> = dash.iter().collect();
// Elements might be added/removed during iteration
}Mutex<HashSet> provides easy consistent snapshots; DashSet does not.
use dashmap::DashSet;
use std::hash::{Hash, Hasher};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct User {
id: u64,
name: String,
}
fn main() {
let users = DashSet::new();
users.insert(User { id: 1, name: "Alice".into() });
users.insert(User { id: 2, name: "Bob".into() });
// Check membership
let query = User { id: 1, name: "Alice".into() };
if users.contains(&query) {
println!("Alice is in the set");
}
// DashSet requires T: Send + Sync + Hash + Eq
// Same as HashSet, plus Send + Sync for thread safety
}DashSet works with any Send + Sync + Hash + Eq type.
// Use DashSet when:
// 1. Multiple threads need concurrent write access
// 2. Operations are scattered (not batched)
// 3. Keys are well-distributed across hash space
// 4. Can tolerate per-operation locking overhead
// 5. Don't need global consistent snapshots
// 6. Memory overhead is acceptable
// Use Arc<Mutex<HashSet>> when:
// 1. Write contention is low
// 2. Operations are batched (lock once, many ops)
// 3. Need consistent snapshots of entire set
// 4. Memory overhead matters
// 5. Single writer or infrequent modifications
// Use Arc<RwLock<HashSet>> when:
// 1. Read-heavy workload
// 2. Writers are infrequent
// 3. Need consistent read snapshots
// 4. Can tolerate write starvationChoose based on access patterns and requirements.
Concurrency model:
DashSet: Sharded locking, fine-grained concurrencyArc<Mutex<HashSet>>: Single lock, coarse-grained concurrencyArc<RwLock<HashSet>>: Single lock, concurrent reads onlyPerformance characteristics:
DashSet: High throughput under write contentionMutex<HashSet>: Fast for single-threaded or low contentionRwLock<HashSet>: Fast for read-heavy workloadsAPI ergonomics:
DashSet: Direct method calls, no lock guardsMutex<HashSet>>: Explicit lock acquisition, guard managementMemory overhead:
DashSet: Higher (multiple shards, multiple locks)Mutex<HashSet>: Lower (single allocation)Consistency guarantees:
DashSet: Per-operation atomicity, eventual consistency for iterationMutex<HashSet>: Global consistency while lock is heldUse DashSet for:
Use Mutex for:
Key insight: DashSet trades memory overhead and per-operation cost for scalability under contention. The sharded design means that threads working on different parts of the set don't block each other, which is crucial for high-throughput concurrent workloads. However, this comes at a cost: each operation has slightly more overhead than a simple hash lookup, and there's no way to get a consistent snapshot of the entire set. Arc<Mutex<HashSet>> is simpler and has lower per-operation overhead when contention is low, but becomes a bottleneck when many threads need to write. Arc<RwLock<HashSet>> can be a middle ground for read-heavy workloads, but any writer still blocks all readers. The choice depends on your specific concurrency pattern: if you have scattered concurrent writes, DashSet wins; if you have batched writes or need consistent views, Mutex or RwLock may be better.