Loading page…
Rust walkthroughs
Loading page…
indexmap::IndexSet differ from std::collections::HashSet for ordered unique collections?indexmap::IndexSet preserves insertion order while maintaining unique elements, whereas std::collections::HashSet provides no ordering guarantees. The ordering property enables index-based access, deterministic iteration, and predictable serialization, at the cost of additional memory for maintaining a parallel index structure and slightly slower insertion and removal operations. IndexSet stores elements in a contiguous vector alongside a hash map, allowing O(1) lookup by value (like HashSet) and O(1) lookup by index (unlike HashSet). The deterministic iteration order makes IndexSet suitable for configuration management, reproducible builds, and testing scenarios where HashSet's nondeterministic order would cause problems. For pure set operations without ordering requirements, HashSet remains more memory-efficient and performs better on insertion-heavy workloads.
use std::collections::HashSet;
use indexmap::IndexSet;
fn main() {
// HashSet: no order guarantees
let mut hashset: HashSet<&str> = HashSet::new();
hashset.insert("first");
hashset.insert("second");
hashset.insert("third");
println!("HashSet iteration order:");
for item in &hashset {
println!(" {}", item);
}
// Order is arbitrary and may differ between runs
// IndexSet: preserves insertion order
let mut indexset: IndexSet<&str> = IndexSet::new();
indexset.insert("first");
indexset.insert("second");
indexset.insert("third");
println!("\nIndexSet iteration order:");
for item in &indexset {
println!(" {}", item);
}
// Always prints: first, second, third
}IndexSet iterates in insertion order; HashSet order is unspecified.
use indexmap::IndexSet;
fn main() {
let mut set: IndexSet<&str> = IndexSet::new();
set.insert("apple");
set.insert("banana");
set.insert("cherry");
// Access by index - O(1)
let first = set.get_index(0);
let second = set.get_index(1);
let third = set.get_index(2);
println!("Index 0: {:?}", first); // Some("apple")
println!("Index 1: {:?}", second); // Some("banana")
println!("Index 2: {:?}", third); // Some("cherry")
// Get index of a value - O(1)
let index = set.get_index_of("banana");
println!("Index of 'banana': {:?}", index); // Some(1)
// HashSet does not support index-based access
// You would need to iterate and count
}IndexSet provides get_index() for position-based access; HashSet has no such API.
use indexmap::IndexSet;
fn main() {
let mut set: IndexSet<String> = IndexSet::new();
set.insert("config.toml".to_string());
set.insert("data.json".to_string());
set.insert("output.csv".to_string());
// Check if element exists and get its position
if let Some(index) = set.get_index_of("data.json") {
println!("'data.json' is at index {}", index);
}
// Use get_index_of for conditional logic based on position
let important_file = "config.toml";
match set.get_index_of(important_file) {
Some(0) => println!("{} is the first file", important_file),
Some(n) => println!("{} is at position {}", important_file, n),
None => println!("{} not found", important_file),
}
// Iteration with index information
for (index, item) in set.iter().enumerate() {
println!("Index {}: {}", index, item);
}
}get_index_of() returns the position of an element; HashSet cannot provide this efficiently.
use indexmap::IndexSet;
use std::collections::HashSet;
fn main() {
// IndexSet::insert returns (index, bool)
let mut set: IndexSet<&str> = IndexSet::new();
let (idx1, inserted1) = set.insert("first");
println!("Insert 'first': index={}, inserted={}", idx1, inserted1);
// index=0, inserted=true
let (idx2, inserted2) = set.insert("second");
println!("Insert 'second': index={}, inserted={}", idx2, inserted2);
// index=1, inserted=true
let (idx3, inserted3) = set.insert("first"); // Duplicate
println!("Insert 'first' again: index={}, inserted={}", idx3, inserted3);
// index=0, inserted=false (not inserted, existing index returned)
// HashSet::insert returns only bool
let mut hashset: HashSet<&str> = HashSet::new();
let inserted = hashset.insert("first");
println!("\nHashSet insert: inserted={}", inserted);
// No index information available
}IndexSet::insert returns (index, bool) giving both position and success status.
use indexmap::IndexSet;
fn main() {
let mut set: IndexSet<&str> = IndexSet::new();
set.insert("a");
set.insert("b");
set.insert("c");
set.insert("d");
set.insert("e");
println!("Before removal: {:?}", set.iter().collect::<Vec<_>>());
// ["a", "b", "c", "d", "e"]
// Remove by value - shifts subsequent elements
set.remove("c");
println!("After removing 'c': {:?}", set.iter().collect::<Vec<_>>());
// ["a", "b", "d", "e"]
// Indices of "d" and "e" changed!
// Remove by index - use swap_remove for O(1)
let removed = set.swap_remove_index(1); // Remove "b"
println!("Removed index 1: {:?}", removed);
println!("After swap_remove_index: {:?}", set.iter().collect::<Vec<_>>());
// "d" and "e" remain, "b" removed, "d" may swap positions
// shift_remove preserves order but is O(n)
let mut set2: IndexSet<&str> = IndexSet::new();
set2.insert("a");
set2.insert("b");
set2.insert("c");
set2.insert("d");
set2.shift_remove("b");
println!("After shift_remove: {:?}", set2.iter().collect::<Vec<_>>());
// ["a", "c", "d"] - order preserved
}IndexSet offers remove, swap_remove, and shift_remove with different performance trade-offs.
use std::collections::HashSet;
use indexmap::IndexSet;
use std::time::Instant;
fn main() {
// Memory comparison
// HashSet: stores elements with hash table overhead
// IndexSet: stores elements + parallel index array
let n = 100_000;
// Insertion performance
let mut hashset: HashSet<u64> = HashSet::with_capacity(n);
let mut indexset: IndexSet<u64> = IndexSet::with_capacity(n);
let start = Instant::now();
for i in 0..n {
hashset.insert(i);
}
let hashset_insert = start.elapsed();
let start = Instant::now();
for i in 0..n {
indexset.insert(i);
}
let indexset_insert = start.elapsed();
println!("Insert {} elements:", n);
println!(" HashSet: {:?}", hashset_insert);
println!(" IndexSet: {:?}", indexset_insert);
// HashSet is typically faster for insertion
// Lookup performance
let start = Instant::now();
let mut found = 0;
for i in (0..n).step_by(100) {
if hashset.contains(&i) {
found += 1;
}
}
let hashset_lookup = start.elapsed();
let start = Instant::now();
let mut found = 0;
for i in (0..n).step_by(100) {
if indexset.contains(&i) {
found += 1;
}
}
let indexset_lookup = start.elapsed();
println!("\nLookup performance:");
println!(" HashSet: {:?}", hashset_lookup);
println!(" IndexSet: {:?}", indexset_lookup);
// Lookup is O(1) for both, similar performance
}HashSet is faster for insertion; lookup is similar; IndexSet uses more memory.
use indexmap::IndexSet;
use std::collections::HashSet;
use std::mem::size_of_val;
fn main() {
// Internal structure comparison:
// HashSet: hash table with entries
// Each entry contains: hash, key, metadata
// Memory depends on load factor (default 0.875)
// IndexSet: hash table + parallel array
// - Hash table maps hashes to indices
// - Dense array stores values in insertion order
// - Extra array of indices for hash -> position mapping
let elements = vec!["apple", "banana", "cherry"];
let hashset: HashSet<&str> = elements.iter().copied().collect();
let indexset: IndexSet<&str> = elements.iter().copied().collect();
// IndexSet uses more memory due to parallel structure
// but provides O(1) indexed access
println!("HashSet size: {} bytes", size_of_val(&hashset));
println!("IndexSet size: {} bytes", size_of_val(&indexset));
// Note: size_of_val only shows stack size, not heap allocation
// For large collections, IndexSet memory overhead is ~20-30% more
}IndexSet maintains a parallel array structure for ordering, increasing memory usage.
use indexmap::IndexSet;
fn main() {
// Configuration with priority order
let mut config_files: IndexSet<String> = IndexSet::new();
// Insert in priority order
config_files.insert("/etc/app/config.toml".to_string());
config_files.insert("~/.config/app/config.toml".to_string());
config_files.insert("./config.toml".to_string());
// Process in order - later configs override earlier ones
for (priority, path) in config_files.iter().enumerate() {
println!("Priority {}: {}", priority, path);
}
// Index 0 = highest priority (first inserted)
// This order is preserved and deterministic
// HashSet would shuffle these unpredictably
}IndexSet preserves configuration priority order for deterministic override behavior.
use indexmap::IndexSet;
use serde::{Serialize, Deserialize};
#[derive(Serialize, Deserialize)]
struct Document {
tags: IndexSet<String>,
}
fn main() {
let mut doc = Document {
tags: IndexSet::new(),
};
doc.tags.insert("rust".to_string());
doc.tags.insert("programming".to_string());
doc.tags.insert("tutorial".to_string());
// Serialization preserves order
let json = serde_json::to_string_pretty(&doc).unwrap();
println!("{}", json);
// Output maintains: rust, programming, tutorial order
// Deserialization also preserves order
let doc2: Document = serde_json::from_str(&json).unwrap();
assert_eq!(doc.tags, doc2.tags);
}IndexSet serializes deterministically; HashSet order varies between runs.
use indexmap::IndexSet;
fn deduplicate_preserve_order(items: Vec<String>) -> Vec<String> {
// IndexSet automatically handles duplicates
// while preserving first-occurrence order
let set: IndexSet<String> = items.into_iter().collect();
set.into_iter().collect()
}
fn main() {
let items = vec![
"first".to_string(),
"second".to_string(),
"first".to_string(), // Duplicate
"third".to_string(),
"second".to_string(), // Duplicate
];
let deduped = deduplicate_preserve_order(items);
println!("Deduplicated: {:?}", deduped);
// ["first", "second", "third"]
// HashSet would lose the original order
}IndexSet removes duplicates while preserving first-occurrence order.
use std::collections::HashSet;
use indexmap::IndexSet;
fn main() {
let items = vec!["a", "b", "c", "d", "e"];
let hashset: HashSet<&str> = items.iter().copied().collect();
let indexset: IndexSet<&str> = items.iter().copied().collect();
// HashSet iteration order is unspecified
println!("HashSet iteration:");
for item in &hashset {
println!(" {}", item);
}
// Order may vary between runs
// IndexSet iteration is deterministic
println!("\nIndexSet iteration:");
for item in &indexset {
println!(" {}", item);
}
// Always: a, b, c, d, e
// IndexSet supports indexed iteration
println!("\nIndexSet indexed:");
for i in 0..indexset.len() {
println!(" [{}] = {}", i, indexset[i]);
}
// HashSet does not support indexing
}IndexSet provides deterministic iteration and supports indexing; HashSet does not.
use indexmap::IndexSet;
fn main() {
let mut a: IndexSet<i32> = IndexSet::new();
a.insert(1);
a.insert(2);
a.insert(3);
let mut b: IndexSet<i32> = IndexSet::new();
b.insert(2);
b.insert(3);
b.insert(4);
// Set operations maintain order from the left operand
let intersection: IndexSet<i32> = a.intersection(&b).copied().collect();
println!("Intersection: {:?}", intersection);
// [2, 3] - order from 'a'
let union: IndexSet<i32> = a.union(&b).copied().collect();
println!("Union: {:?}", union);
// [1, 2, 3, 4] - 'a' elements first, then 'b' elements not in 'a'
let difference: IndexSet<i32> = a.difference(&b).copied().collect();
println!("Difference: {:?}", difference);
// [1] - elements in 'a' but not in 'b'
let symmetric_diff: IndexSet<i32> = a.symmetric_difference(&b).copied().collect();
println!("Symmetric difference: {:?}", symmetric_diff);
// [1, 4] - elements in one but not both
}Set operations on IndexSet maintain deterministic order based on operands.
use indexmap::IndexSet;
fn main() {
let mut set: IndexSet<&str> = IndexSet::new();
set.insert("first");
set.insert("second");
set.insert("third");
// Get both value and index
for (index, value) in set.iter().enumerate() {
println!("[{}] = {}", index, value);
}
// Check position relative to another element
let pos_a = set.get_index_of("first");
let pos_b = set.get_index_of("third");
match (pos_a, pos_b) {
(Some(a), Some(b)) if a < b => {
println!("'first' comes before 'third'");
}
(Some(a), Some(b)) if a > b => {
println!("'third' comes before 'first'");
}
_ => {}
}
}IndexSet enables position-based comparisons between elements.
use std::collections::HashSet;
use indexmap::IndexSet;
fn main() {
// HashSet to IndexSet - order is arbitrary
let hashset: HashSet<i32> = vec![3, 1, 4, 1, 5].into_iter().collect();
let indexset: IndexSet<i32> = hashset.into_iter().collect();
println!("HashSet -> IndexSet: {:?}", indexset);
// Order depends on HashSet iteration
// IndexSet to HashSet - loses order
let indexset: IndexSet<i32> = vec![1, 2, 3].into_iter().collect();
let hashset: HashSet<i32> = indexset.into_iter().collect();
println!("IndexSet -> HashSet: {:?}", hashset);
// Order lost
// Vec to IndexSet - preserves order
let vec = vec!["a", "b", "c"];
let indexset: IndexSet<&str> = vec.into_iter().collect();
println!("Vec -> IndexSet: {:?}", indexset);
// Order preserved
// IndexSet to Vec - preserves order
let vec: Vec<&str> = indexset.into_iter().collect();
println!("IndexSet -> Vec: {:?}", vec);
// Order preserved
}Convert from Vec to IndexSet to preserve order; HashSet conversions lose ordering.
use indexmap::IndexSet;
fn main() {
// Pre-allocate capacity
let mut set: IndexSet<i32> = IndexSet::with_capacity(100);
println!("Capacity: {}, Len: {}", set.capacity(), set.len());
// Insert elements
for i in 0..50 {
set.insert(i);
}
println!("After inserts - Capacity: {}, Len: {}", set.capacity(), set.len());
// Reserve additional capacity
set.reserve(50);
println!("After reserve - Capacity: {}, Len: {}", set.capacity(), set.len());
// Shrink to fit
set.shrink_to_fit();
println!("After shrink - Capacity: {}, Len: {}", set.capacity(), set.len());
// Clear but keep capacity
set.clear();
println!("After clear - Capacity: {}, Len: {}", set.capacity(), set.len());
}IndexSet provides the same capacity management as HashSet.
use std::collections::HashSet;
use indexmap::IndexSet;
fn main() {
// Common operations shared between HashSet and IndexSet
// Creation
let mut hs: HashSet<i32> = HashSet::new();
let mut is: IndexSet<i32> = IndexSet::new();
// Insertion
hs.insert(1);
is.insert(1);
// Contains
hs.contains(&1);
is.contains(&1);
// Removal
hs.remove(&1);
is.remove(&1);
// Length
hs.len();
is.len();
// Iteration
for _ in &hs {}
for _ in &is {}
// Set operations (return iterators)
// intersection, union, difference, symmetric_difference
// IndexSet-specific:
// - get_index(index) -> Option<&T>
// - get_index_of(value) -> Option<usize>
// - insert_full(value) -> (usize, bool)
// - swap_remove_index(index) -> Option<T>
// - shift_remove_index(index) -> Option<T>
// - indices() -> impl Iterator<Item = usize>
}Most HashSet operations work identically on IndexSet; ordering and index APIs are the additions.
use std::collections::HashSet;
use indexmap::IndexSet;
fn main() {
// Use HashSet when:
// 1. Order doesn't matter
// 2. Memory efficiency is critical
// 3. Insertion-heavy workloads
// 4. Only need membership testing
let mut unique_words: HashSet<String> = HashSet::new();
for word in ["the", "quick", "brown", "fox", "the"] {
unique_words.insert(word.to_string());
}
// Order doesn't matter, just need uniqueness
// Use IndexSet when:
// 1. Insertion order matters
// 2. Need index-based access
// 3. Deterministic iteration required
// 4. Serialization must be stable
// 5. Position information is useful
let mut ordered_commands: IndexSet<String> = IndexSet::new();
for cmd in ["init", "build", "test", "deploy"] {
ordered_commands.insert(cmd.to_string());
}
// Commands must execute in specific order
// Might need to check position or index
}Choose based on whether ordering matters for your use case.
use std::collections::HashSet;
use indexmap::IndexSet;
fn main() {
println!("HashSet vs IndexSet:");
println!("| Feature | HashSet | IndexSet |");
println!("|---------|---------|----------|");
println!("| Order preservation | No | Yes |");
println!("| Index access | No | Yes (O(1)) |");
println!("| Insert position | No | Yes |");
println!("| Deterministic iteration | No | Yes |");
println!("| Memory usage | Lower | Higher |");
println!("| Insertion speed | Faster | Slower |");
println!("| Lookup speed | O(1) | O(1) |");
println!("| Removal by index | No | Yes |");
}Key differences:
| Aspect | HashSet | IndexSet |
|--------|-----------|------------|
| Insertion order | Not preserved | Preserved |
| Index access | Not supported | get_index(i), O(1) |
| Position lookup | Not supported | get_index_of(v), O(1) |
| insert() returns | bool | (index, bool) |
| Memory overhead | Baseline | ~20-30% more |
| Iteration | Unspecified order | Deterministic |
Performance characteristics:
| Operation | HashSet | IndexSet |
|-----------|-----------|------------|
| insert | O(1) amortized | O(1) amortized, slightly slower |
| contains | O(1) | O(1) |
| remove | O(1) | O(1) for swap_remove, O(n) for shift_remove |
| get_index | N/A | O(1) |
| get_index_of | N/A | O(1) |
| Memory | Lower | Higher |
When to use HashSet:
When to use IndexSet:
Key insight: IndexSet is an ordered set that maintains insertion order through a parallel array structure, enabling deterministic iteration and O(1) index-based access at the cost of additional memory and slightly slower insertions. The trade-off is between the convenience of ordered access and memory/performance overhead. For most applications, the overhead is negligible, but in memory-constrained environments or insertion-heavy workloads where order truly doesn't matter, HashSet remains preferable. The API is largely compatible, making switching between them straightforward—most methods are shared, with IndexSet adding index-specific operations like get_index(), get_index_of(), and insert_full(). The ordering guarantee also makes IndexSet valuable for serialization, testing reproducibility, and configuration management where consistent ordering produces predictable behavior across runs and machines.