What is the purpose of indexmap::IndexSet and when would you use it over std::collections::HashSet?

indexmap::IndexSet is a hash set that preserves insertion order, combining the O(1) lookup performance of a hash set with the ordered iteration semantics of a Vec. Unlike std::collections::HashSet, which provides no ordering guarantees and iterates in arbitrary order based on internal hash table layout, IndexSet maintains elements in the order they were inserted. This makes it valuable when you need both unique elements and predictable iteration—such as maintaining ordered unique items, implementing stable deduplication, or requiring position-based access to set elements. The trade-off is slightly higher memory usage and insertion cost to maintain the ordering metadata.

Basic IndexSet Usage

use indexmap::IndexSet;
use std::collections::HashSet;
 
fn main() {
    // IndexSet maintains insertion order
    let mut index_set: IndexSet<&str> = IndexSet::new();
    index_set.insert("apple");
    index_set.insert("banana");
    index_set.insert("cherry");
    
    println!("IndexSet iteration:");
    for item in &index_set {
        println!("  {}", item);
    }
    // Output: apple, banana, cherry (insertion order)
    
    // HashSet has arbitrary order
    let mut hash_set: HashSet<&str> = HashSet::new();
    hash_set.insert("apple");
    hash_set.insert("banana");
    hash_set.insert("cherry");
    
    println!("\nHashSet iteration:");
    for item in &hash_set {
        println!("  {}", item);
    }
    // Output: unpredictable order (based on hash table layout)
}

IndexSet iterates in insertion order; HashSet order is arbitrary.

Position-Based Access

use indexmap::IndexSet;
 
fn main() {
    let mut set: IndexSet<&str> = IndexSet::new();
    set.insert("first");
    set.insert("second");
    set.insert("third");
    
    // IndexSet supports index-based access
    let first = set.get_index(0);
    println!("Index 0: {:?}", first);  // Some("first")
    
    let second = set.get_index(1);
    println!("Index 1: {:?}", second);  // Some("second")
    
    // Get index of a value
    let idx = set.get_index_of("second");
    println!("'second' at index: {:?}", idx);  // Some(1)
    
    // HashSet doesn't support position-based access
    // There's no way to get "first inserted element" from HashSet
}

IndexSet provides position-based access unavailable in HashSet.

Stable Deduplication

use indexmap::IndexSet;
 
fn main() {
    // Problem: deduplicate while preserving order
    let items = vec!["banana", "apple", "cherry", "apple", "banana", "date"];
    
    // Using IndexSet: preserves first occurrence order
    let unique: IndexSet<&str> = items.iter().copied().collect();
    println!("Unique (insertion order): {:?}", unique);
    // ["banana", "apple", "cherry", "date"]
    
    // Using HashSet: arbitrary order
    let unique_hash: std::collections::HashSet<&str> = items.iter().copied().collect();
    println!("Unique (arbitrary order): {:?}", unique_hash);
    // Unpredictable order
    
    // Converting back to Vec preserves order
    let unique_vec: Vec<&str> = unique.into_iter().collect();
    println!("As Vec: {:?}", unique_vec);
    // ["banana", "apple", "cherry", "date"]
}

IndexSet deduplicates while preserving insertion order.

Ordered Unique Collections

use indexmap::IndexSet;
 
fn main() {
    // User permissions: order matters for display
    let mut permissions: IndexSet<String> = IndexSet::new();
    
    // Add permissions in priority order
    permissions.insert("admin".to_string());
    permissions.insert("write".to_string());
    permissions.insert("read".to_string());
    
    // Later additions don't change order
    permissions.insert("admin".to_string());  // Already exists, no effect
    
    println!("Permissions in priority order:");
    for (idx, perm) in permissions.iter().enumerate() {
        println!("  {}. {}", idx + 1, perm);
    }
    // 1. admin
    // 2. write
    // 3. read
    
    // Can check if specific permission exists
    if permissions.contains("admin") {
        println!("Has admin permission");
    }
}

IndexSet maintains order for display or priority-based collections.

Performance Comparison

use indexmap::IndexSet;
use std::collections::HashSet;
use std::time::Instant;
 
fn main() {
    const N: usize = 100_000;
    
    // Insert performance
    let mut index_set: IndexSet<i32> = IndexSet::new();
    let mut hash_set: HashSet<i32> = HashSet::new();
    
    let start = Instant::now();
    for i in 0..N {
        index_set.insert(i);
    }
    let index_insert = start.elapsed();
    
    let start = Instant::now();
    for i in 0..N {
        hash_set.insert(i);
    }
    let hash_insert = start.elapsed();
    
    println!("Insert {} elements:", N);
    println!("  IndexSet: {:?}", index_insert);
    println!("  HashSet: {:?}", hash_insert);
    // IndexSet is slightly slower due to order tracking
    
    // Lookup performance (nearly identical)
    let start = Instant::now();
    for i in 0..N {
        let _ = index_set.contains(&(i % N));
    }
    let index_lookup = start.elapsed();
    
    let start = Instant::now();
    for i in 0..N {
        let _ = hash_set.contains(&(i % N));
    }
    let hash_lookup = start.elapsed();
    
    println!("\nLookup {} times:", N);
    println!("  IndexSet: {:?}", index_lookup);
    println!("  HashSet: {:?}", hash_lookup);
    // Very similar O(1) lookup performance
    
    // Memory overhead
    println!("\nMemory:");
    println!("  IndexSet: ~{} bytes overhead per element for order tracking", 
             std::mem::size_of::<usize>() * 2);
    println!("  HashSet: minimal overhead");
}

IndexSet has slightly higher insertion cost but similar lookup performance.

When to Use IndexSet vs HashSet

use indexmap::IndexSet;
use std::collections::HashSet;
 
// USE INDEXSET WHEN:
 
// 1. Need ordered iteration
fn display_ordered_items() {
    let mut items: IndexSet<&str> = IndexSet::new();
    items.insert("first");
    items.insert("second");
    items.insert("third");
    // Iteration order is guaranteed
}
 
// 2. Need position-based access
fn access_by_position() {
    let items: IndexSet<&str> = ["a", "b", "c"].iter().copied().collect();
    let first = items.get_index(0);  // Some("a")
}
 
// 3. Need to preserve insertion order after deduplication
fn stable_dedup(items: Vec<&str>) -> Vec<&str> {
    IndexSet::from_iter(items).into_iter().collect()
}
 
// 4. Need index-based operations
fn find_index() {
    let items: IndexSet<&str> = ["x", "y", "z"].iter().copied().collect();
    let idx = items.get_index_of("y");  // Some(1)
}
 
// USE HASHSET WHEN:
 
// 1. Only need uniqueness, order doesn't matter
fn just_unique() {
    let _: HashSet<&str> = ["a", "b", "a", "c"].iter().copied().collect();
}
 
// 2. Performance is critical, order isn't needed
fn performance_critical() {
    // HashSet has slightly better insert performance
}
 
// 3. Memory is constrained
fn memory_constrained() {
    // HashSet has lower memory overhead
}
 
fn main() {
    println!("See examples above");
}

Choose based on whether order matters for your use case.

IndexSet with IndexMap

use indexmap::{IndexSet, IndexMap};
 
fn main() {
    // IndexSet is the set analog of IndexMap
    // Both preserve insertion order
    
    let mut map: IndexMap<&str, i32> = IndexMap::new();
    map.insert("one", 1);
    map.insert("two", 2);
    
    let mut set: IndexSet<&str> = IndexSet::new();
    set.insert("one");
    set.insert("two");
    
    // Both iterate in insertion order
    println!("IndexMap:");
    for (k, v) in &map {
        println!("  {} => {}", k, v);
    }
    
    println!("\nIndexSet:");
    for v in &set {
        println!("  {}", v);
    }
    
    // Both support position-based access
    let first_key = map.get_index(0);
    let first_value = set.get_index(0);
}

IndexSet and IndexMap share similar ordering guarantees.

Removing Elements by Index

use indexmap::IndexSet;
 
fn main() {
    let mut set: IndexSet<&str> = IndexSet::new();
    set.insert("a");
    set.insert("b");
    set.insert("c");
    set.insert("d");
    
    println!("Before: {:?}", set);  // {"a", "b", "c", "d"}
    
    // Remove by index (shifts subsequent elements)
    let removed = set.shift_remove_index(1);
    println!("Removed index 1: {:?}", removed);  // Some("b")
    println!("After shift_remove: {:?}", set);  // {"a", "c", "d"}
    
    // Note: shift_remove maintains order but is O(n)
    // swap_remove would be O(1) but changes order
    
    set.insert("e");
    set.insert("f");
    println!("\nWith new elements: {:?}", set);  // {"a", "c", "d", "e", "f"}
    
    // swap_remove_index is faster but doesn't preserve order
    let swapped = set.swap_remove_index(1);
    println!("Swap removed index 1: {:?}", swapped);  // Some("c")
    println!("After swap_remove: {:?}", set);  // {"a", "f", "d", "e"} - order changed!
}

IndexSet offers shift_remove (preserves order, O(n)) and swap_remove (O(1), changes order).

Equality and Order

use indexmap::IndexSet;
use std::collections::HashSet;
 
fn main() {
    // HashSet: equality based only on elements
    let mut hs1: HashSet<&str> = HashSet::new();
    hs1.insert("a");
    hs1.insert("b");
    
    let mut hs2: HashSet<&str> = HashSet::new();
    hs2.insert("b");
    hs2.insert("a");
    
    println!("HashSet equality: {}", hs1 == hs2);  // true (same elements)
    
    // IndexSet: equality considers order
    let mut is1: IndexSet<&str> = IndexSet::new();
    is1.insert("a");
    is1.insert("b");
    
    let mut is2: IndexSet<&str> = IndexSet::new();
    is2.insert("b");
    is2.insert("a");
    
    println!("IndexSet equality: {}", is1 == is2);  // false (different order)
    
    // But same insertion order means equality
    let mut is3: IndexSet<&str> = IndexSet::new();
    is3.insert("a");
    is3.insert("b");
    
    println!("Same order equality: {}", is1 == is3);  // true
}

IndexSet equality considers order; HashSet equality considers only elements.

Real-World Use Case: Command History

use indexmap::IndexSet;
 
struct CommandHistory {
    commands: IndexSet<String>,
    max_size: usize,
}
 
impl CommandHistory {
    fn new(max_size: usize) -> Self {
        CommandHistory {
            commands: IndexSet::new(),
            max_size,
        }
    }
    
    fn add(&mut self, cmd: String) {
        // Remove if exists (will re-add at end)
        self.commands.shift_remove(&cmd);
        
        // Add to end
        self.commands.insert(cmd);
        
        // Enforce size limit
        while self.commands.len() > self.max_size {
            self.commands.shift_remove_index(0);
        }
    }
    
    fn get_history(&self) -> Vec<&String> {
        self.commands.iter().collect()
    }
    
    fn get_recent(&self, n: usize) -> Vec<&String> {
        self.commands.iter().rev().take(n).collect()
    }
}
 
fn main() {
    let mut history = CommandHistory::new(5);
    
    history.add("ls".to_string());
    history.add("cd ..".to_string());
    history.add("ls -la".to_string());
    history.add("grep foo".to_string());
    
    println!("History:");
    for cmd in history.get_history() {
        println!("  {}", cmd);
    }
    
    // Repeating command moves it to most recent
    history.add("ls".to_string());
    println!("\nAfter 'ls' again:");
    for cmd in history.get_history() {
        println!("  {}", cmd);
    }
}

IndexSet naturally maintains unique, ordered command history.

Real-World Use Case: Ordered Configuration Keys

use indexmap::IndexSet;
 
fn main() {
    // Configuration with ordered unique keys
    let mut config_keys: IndexSet<&str> = IndexSet::new();
    
    // Add in specific order for display
    config_keys.insert("host");
    config_keys.insert("port");
    config_keys.insert("database");
    config_keys.insert("username");
    config_keys.insert("password");
    
    // Generate config file with keys in order
    println!("# Configuration");
    for (idx, key) in config_keys.iter().enumerate() {
        let value = match *key {
            "host" => "localhost",
            "port" => "5432",
            "database" => "mydb",
            "username" => "user",
            "password" => "***",
            _ => "unknown",
        };
        println!("{}={}", key, value);
    }
    
    // Config file output is deterministic
}

IndexSet ensures configuration keys are unique and ordered.

Memory Layout

use indexmap::IndexSet;
use std::collections::HashSet;
 
fn main() {
    // IndexSet internal structure:
    // - Hash table for O(1) lookup
    // - Dense array for ordered iteration
    // - Indices connecting hash entries to array positions
    
    // HashSet internal structure:
    // - Hash table only
    // - Iteration follows table layout (arbitrary)
    
    // Memory comparison
    let set: IndexSet<i32> = (0..100).collect();
    let hset: HashSet<i32> = (0..100).collect();
    
    // IndexSet additional memory per element:
    // - Index into dense array
    // - Reverse index from dense to hash
    // Approximately 2 * size_of::<usize>() extra per element
    
    println!("IndexSet memory: uses additional indices for order tracking");
    println!("HashSet memory: minimal, just hash table");
    
    // Capacity differences
    let mut is: IndexSet<i32> = IndexSet::with_capacity(100);
    let mut hs: HashSet<i32> = HashSet::with_capacity(100);
    
    println!("\nBoth pre-allocated for 100 elements");
}

IndexSet has memory overhead for maintaining ordering indices.

Synthesis

Core distinction:

  • IndexSet: Hash set with insertion order preservation and position access
  • HashSet: Hash set with arbitrary iteration order, minimal overhead

Use IndexSet when:

  • Need deterministic iteration order
  • Position-based access (get_index, get_index_of)
  • Stable deduplication (preserves first occurrence order)
  • Ordered display or serialization
  • Implementing ordered unique collections
  • Need to know "what was inserted first"

Use HashSet when:

  • Only uniqueness matters, order is irrelevant
  • Performance is critical
  • Memory is constrained
  • Order would add unnecessary complexity

Performance characteristics:

  • Lookup: Both O(1), nearly identical
  • Insert: IndexSet slightly slower (maintains order indices)
  • Memory: IndexSet has ~2 pointers overhead per element
  • Removal: shift_remove is O(n), swap_remove is O(1) but changes order

Key operations unique to IndexSet:

  • get_index(i): Get element by position
  • get_index_of(&val): Find position of element
  • shift_remove_index(i): Remove and maintain order (O(n))
  • swap_remove_index(i): Remove fast, changes order (O(1))

Key insight: IndexSet bridges the gap between HashSet (unique elements, O(1) lookup) and Vec (ordered iteration, position access). The cost is memory overhead and slightly slower inserts. When order matters for uniqueness semantics, display, or position-based operations, IndexSet provides the right abstraction. When order truly doesn't matter, HashSet remains the simpler, more efficient choice.