1.1 概念
哈希表(Hash table,也叫散列表),是根据关键值(Key value)而直接进行访问的数据结构。也就是说,它通过把关键值映射到表中一个位置来访问记录,用来加快查找的速度。这个映射函数叫做散列函数,存放记录的数组叫做散列表。
给定表M,存在函数f(key),对任意给定的关键字值key,代入函数后若能得到包含该关键字的记录在表中的地址,则称表M为哈希(Hash)表,函数f(key)为哈希(Hash) 函数。
1.2 存储方法
直接寻址法(适用于连续的数值):取关键字或关键字的某个线性函数值为散列地址。即H(key)=key或H(key) = a·key + b,其中a和b为常数(这种散列函数叫做自身函数)。若其中H(key)中已经有值了,就往下一个找,直到H(key)中没有值了,就放进去。
y = x-100 (y=ax+b)
除留余数法(适用于不连续的数值):取关键字被某个不大于散列表表长 的数 h 除后所得的余数为散列地址。即 H(key) = key MOD h,h≤m。对h的选择很重要,一般取素数或m,若h选的不好,容易产生同义词。
下图散列表表长为7,所以素数(质数)选择为5
1.3 哈希表特性
哈希表查找数值的时间复杂度为 O(1)。
α (装填因子) = n(表中元素) / m (表长) 注:装填因子越大,冲突的可能性越大;装填因子越小,冲突的可能性越小。
如果发生冲突,可以采用线性探测法,即表长m / 装填因子α ,重新构造一个散列表,再次遇到冲突时,即放到下一个未放数值的空间(空白位置)中。
1.4 算法实现
class HashMap:"""哈希表实现,使用开放寻址法(线性探测)处理冲突"""# 定义墓碑标记(用于已删除的槽位)__TOMBSTONE = object()def __init__(self, initial_capacity=8, load_factor_threshold=0.7):"""初始化哈希表:param initial_capacity: 初始容量:param load_factor_threshold: 触发扩容的负载因子阈值"""self.capacity = initial_capacityself.load_factor_threshold = load_factor_thresholdself.size = 0 # 存储的键值对数量self.buckets = [None] * self.capacity # 存储桶数组def _hash(self, key):"""计算键的哈希值(使用Python内置hash函数)"""return hash(key) % self.capacitydef _resize(self, new_capacity):"""调整哈希表大小"""# 保存旧的桶数组old_buckets = self.buckets# 重置哈希表属性self.capacity = new_capacityself.buckets = [None] * new_capacityself.size = 0# 重新插入所有有效键值对for bucket in old_buckets:if bucket is not None and bucket is not self.__TOMBSTONE:key, value = bucketself.put(key, value)def _find_slot(self, key):"""查找键对应的槽位索引(用于插入、获取、删除)"""index = self._hash(key)first_tombstone = -1 # 记录遇到的第一个墓碑位置# 线性探测直到找到空槽或匹配的键while True:bucket = self.buckets[index]# 如果槽位为空,返回该位置if bucket is None:return (index, first_tombstone) if first_tombstone == -1 else (first_tombstone, -1)# 如果遇到墓碑,记录第一个墓碑位置if bucket == self.__TOMBSTONE:if first_tombstone == -1:first_tombstone = index# 如果找到匹配的键elif bucket[0] == key:return (index, -1)# 继续探测下一个槽位index = (index + 1) % self.capacitydef put(self, key, value):"""插入键值对(如果键已存在则更新值)"""# 检查是否需要扩容if self.size / self.capacity >= self.load_factor_threshold:self._resize(self.capacity * 2)# 查找合适的槽位index, first_tombstone = self._find_slot(key)# 如果键不存在且找到了墓碑位置,则使用墓碑位置if first_tombstone != -1:index = first_tombstone# 插入或更新键值对if self.buckets[index] is None or self.buckets[index] == self.__TOMBSTONE:self.size += 1self.buckets[index] = (key, value)def get(self, key, default=None):"""获取键对应的值,如果键不存在则返回默认值"""index, _ = self._find_slot(key)if self.buckets[index] is None or self.buckets[index] == self.__TOMBSTONE:return defaultreturn self.buckets[index][1]def remove(self, key):"""删除键值对"""index, _ = self._find_slot(key)# 如果键不存在if self.buckets[index] is None or self.buckets[index] == self.__TOMBSTONE:raise KeyError(f"Key not found: {key}")# 标记为墓碑self.buckets[index] = self.__TOMBSTONEself.size -= 1# 检查是否需要缩容if self.capacity > 8 and self.size / self.capacity <= 0.2:self._resize(max(8, self.capacity // 2))def contains_key(self, key):"""检查键是否存在"""index, _ = self._find_slot(key)return self.buckets[index] is not None and self.buckets[index] != self.__TOMBSTONEdef __len__(self):"""返回键值对数量"""return self.sizedef __contains__(self, key):"""支持 in 操作符"""return self.contains_key(key)def __getitem__(self, key):"""支持 [] 获取操作"""value = self.get(key)if value is None:raise KeyError(key)return valuedef __setitem__(self, key, value):"""支持 [] 设置操作"""self.put(key, value)def __delitem__(self, key):"""支持 del 操作"""self.remove(key)def keys(self):"""返回所有键的生成器"""for bucket in self.buckets:if bucket is not None and bucket != self.__TOMBSTONE:yield bucket[0]def values(self):"""返回所有值的生成器"""for bucket in self.buckets:if bucket is not None and bucket != self.__TOMBSTONE:yield bucket[1]def items(self):"""返回所有键值对的生成器"""for bucket in self.buckets:if bucket is not None and bucket != self.__TOMBSTONE:yield bucketdef __str__(self):"""返回哈希表的字符串表示"""items = []for key in self.keys():items.append(f"{key}: {self.get(key)}")return "{" + ", ".join(items) + "}"# 测试哈希表
if __name__ == "__main__":# 创建哈希表hash_map = HashMap()print("插入键值对:")hash_map["name"] = "Alice"hash_map["age"] = 30hash_map["city"] = "New York"hash_map["occupation"] = "Engineer"print(hash_map)print(f"当前大小: {len(hash_map)}")print("\n更新键值对:")hash_map["age"] = 31print(f"age: {hash_map['age']}")print("\n获取值:")print(f"name: {hash_map.get('name')}")print(f"country: {hash_map.get('country', 'Not found')}")print("\n检查键是否存在:")print(f"Contains 'city': {'city' in hash_map}")print(f"Contains 'country': {'country' in hash_map}")print("\n删除键值对:")del hash_map["occupation"]print(f"After deletion: {hash_map}")print(f"当前大小: {len(hash_map)}")print("\n测试扩容:")for i in range(10):hash_map[f"key{i}"] = f"value{i}"print(f"扩容后大小: {len(hash_map)}")print(f"当前容量: {hash_map.capacity}")print("\n测试缩容:")for i in range(8):del hash_map[f"key{i}"]print(f"删除多个键后大小: {len(hash_map)}")print(f"当前容量: {hash_map.capacity}")print("\n遍历所有键值对:")for key, value in hash_map.items():print(f"{key} => {value}")
运行结果: