Custom hash maps and hashing strategies
Hash tables provide O(1) average-case lookup by mapping keys to array indices via a hash function. Rust's HashMap is built on this principle, but understanding the internals helps you use it effectively.
Implementing hash tables in Rust requires:
Hash for custom typesuse std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
use std::fmt::Debug;
/// A simple hash table using separate chaining
pub struct HashTable<K, V> {
buckets: Vec<Vec<(K, V)>>,
len: usize,
capacity: usize,
}
impl<K: Hash + Eq + Clone, V: Clone> HashTable<K, V> {
pub fn new() -> Self {
Self::with_capacity(16)
}
pub fn with_capacity(capacity: usize) -> Self {
let capacity = capacity.max(1);
HashTable {
buckets: vec![Vec::new(); capacity],
len: 0,
capacity,
}
}
fn hash(&self, key: &K) -> usize {
let mut hasher = DefaultHasher::new();
key.hash(&mut hasher);
(hasher.finish() as usize) % self.capacity
}
pub fn insert(&mut self, key: K, value: V) -> Option<V> {
// Resize if load factor > 0.75
if self.len > self.capacity * 3 / 4 {
self.resize();
}
let index = self.hash(&key);
// Check if key exists
for (k, v) in &mut self.buckets[index] {
if k == &key {
let old = std::mem::replace(v, value);
return Some(old);
}
}
// Insert new entry
self.buckets[index].push((key, value));
self.len += 1;
None
}
pub fn get(&self, key: &K) -> Option<&V> {
let index = self.hash(key);
self.buckets[index]
.iter()
.find(|(k, _)| k == key)
.map(|(_, v)| v)
}
pub fn get_mut(&mut self, key: &K) -> Option<&mut V> {
let index = self.hash(key);
self.buckets[index]
.iter_mut()
.find(|(k, _)| k == key)
.map(|(_, v)| v)
}
pub fn remove(&mut self, key: &K) -> Option<V> {
let index = self.hash(key);
let bucket = &mut self.buckets[index];
if let Some(pos) = bucket.iter().position(|(k, _)| k == key) {
let (_, value) = bucket.swap_remove(pos);
self.len -= 1;
Some(value)
} else {
None
}
}
pub fn contains_key(&self, key: &K) -> bool {
self.get(key).is_some()
}
pub fn len(&self) -> usize {
self.len
}
pub fn is_empty(&self) -> bool {
self.len == 0
}
fn resize(&mut self) {
let new_capacity = self.capacity * 2;
let mut new_buckets = vec![Vec::new(); new_capacity];
for bucket in self.buckets.drain(..) {
for (key, value) in bucket {
let mut hasher = DefaultHasher::new();
key.hash(&mut hasher);
let index = (hasher.finish() as usize) % new_capacity;
new_buckets[index].push((key, value));
}
}
self.buckets = new_buckets;
self.capacity = new_capacity;
}
pub fn iter(&self) -> impl Iterator<Item = (&K, &V)> {
self.buckets.iter().flat_map(|bucket| {
bucket.iter().map(|(k, v)| (k, v))
})
}
}
impl<K: Hash + Eq + Clone, V: Clone> Default for HashTable<K, V> {
fn default() -> Self {
Self::new()
}
}
/// Custom type implementing Hash
#[derive(Debug, Clone, PartialEq, Eq)]
struct Person {
name: String,
age: u32,
}
impl Hash for Person {
fn hash<H: Hasher>(&self, state: &mut H) {
// Hash both fields
self.name.hash(state);
self.age.hash(state);
}
}
/// Open addressing with linear probing
pub struct OpenAddressTable<K, V> {
slots: Vec<Option<(K, V, bool)>>, // (key, value, is_deleted)
len: usize,
capacity: usize,
}
impl<K: Hash + Eq + Clone, V: Clone> OpenAddressTable<K, V> {
pub fn new() -> Self {
Self::with_capacity(16)
}
pub fn with_capacity(capacity: usize) -> Self {
OpenAddressTable {
slots: vec![None; capacity],
len: 0,
capacity,
}
}
fn hash(&self, key: &K) -> usize {
let mut hasher = DefaultHasher::new();
key.hash(&mut hasher);
(hasher.finish() as usize) % self.capacity
}
pub fn insert(&mut self, key: K, value: V) -> Option<V> {
if self.len >= self.capacity / 2 {
self.resize();
}
let mut index = self.hash(&key);
let start = index;
loop {
match &mut self.slots[index] {
None => {
self.slots[index] = Some((key, value, false));
self.len += 1;
return None;
}
Some((k, v, deleted)) if k == &key && !*deleted => {
let old = std::mem::replace(v, value);
return Some(old);
}
Some((_, _, true)) => {
// Reuse deleted slot
self.slots[index] = Some((key, value, false));
self.len += 1;
return None;
}
_ => {
index = (index + 1) % self.capacity;
if index == start {
panic!("Hash table full");
}
}
}
}
}
pub fn get(&self, key: &K) -> Option<&V> {
let mut index = self.hash(key);
let start = index;
loop {
match &self.slots[index] {
None => return None,
Some((k, v, false)) if k == key => return Some(v),
_ => {
index = (index + 1) % self.capacity;
if index == start {
return None;
}
}
}
}
}
fn resize(&mut self) {
let new_capacity = self.capacity * 2;
let old_slots = std::mem::replace(
&mut self.slots,
vec![None; new_capacity],
);
self.capacity = new_capacity;
self.len = 0;
for slot in old_slots {
if let Some((key, value, false)) = slot {
self.insert(key, value);
}
}
}
}
impl<K: Hash + Eq + Clone, V: Clone> Default for OpenAddressTable<K, V> {
fn default() -> Self {
Self::new()
}
}
fn main() {
// Custom hash table
let mut table = HashTable::new();
table.insert("apple", 1);
table.insert("banana", 2);
table.insert("cherry", 3);
println!("Get apple: {:?}", table.get(&"apple")); // Some(1)
println!("Contains banana: {}", table.contains_key(&"banana")); // true
// Update existing
table.insert("apple", 10);
println!("Updated apple: {:?}", table.get(&"apple")); // Some(10)
// Remove
table.remove(&"banana");
println!("After remove: {:?}", table.get(&"banana")); // None
// Custom type as key
let mut people_ages = HashTable::new();
let alice = Person { name: "Alice".to_string(), age: 30 };
people_ages.insert(alice.clone(), "Engineer");
println!("Alice's job: {:?}", people_ages.get(&alice));
// Open addressing table
let mut open_table = OpenAddressTable::new();
for i in 0..10 {
open_table.insert(i, i * 10);
}
println!("Open addressing get(5): {:?}", open_table.get(&5)); // Some(50)
}
| Strategy | Pros | Cons |
|----------|------|------|
| Separate Chaining | Simple, handles clustering | Extra memory for pointers |
| Linear Probing | Cache-friendly, simple | Primary clustering |
| Quadratic Probing | Reduces clustering | Secondary clustering |
| Double Hashing | Best distribution | Two hash functions needed |
use std::collections::HashMap;
fn main() {
let mut map = HashMap::new();
// Entry API for efficient insert-or-update
*map.entry("key").or_insert(0) += 1;
// or_insert_with for expensive defaults
map.entry("key2").or_insert_with(|| expensive_default());
// Pre-allocate for known size
let mut map: HashMap<String, i32> = HashMap::with_capacity(1000);
// Custom hasher for performance (if DoS not a concern)
use std::collections::hash_map::RandomState;
// Or use ahash, fxhash for speed
}
fn expensive_default() -> i32 { 42 }
// DON'T: Check then insert (double lookup)
if !map.contains_key(&key) {
map.insert(key, value);
}
// DO: Use entry API
map.entry(key).or_insert(value);
// DON'T: Implement Hash inconsistently with Eq
impl Hash for BadType {
fn hash<H: Hasher>(&self, state: &mut H) {
// If a == b, then hash(a) must equal hash(b)
}
}
Entry API for your hash tableRun this code in the official Rust Playground