标签:hashtable
HashTable,也叫哈希表/散列表,是一种通过key值直接访问在内存存储位置的数据结构
这种数据结构在插入、删除、查找的操作上具有“常数平均时间”的表现
散列函数:使用某种映射函数,将大数映射为小数(所给键值映射为较小的方便作为下标的数字)
(<stl_hash_fun>中定义size_t __stl_hash_string(const char* s))
使用hash function可能会带来“碰撞”问题,解决这个问题有多种方法,包括线性探测、二次探测、开链法
负载因子:元素个数/表格大小 除了开链法,其它情况负载因子都在0-1
插入:利用hash function计算出元素插入位置,如果该位置的空间不可用,则往下寻找,直到找到一
个可用空间
查找:利用hash function计算出元素位置,如果该位置的元素不是我们所要找的元素,则往下寻找,
直到找到
删除:必须使用惰性删除,也就是只标记删除记号,实际删除操作则待表格重新整理时进行,因
为hashtable中的每一个元素不仅表述它自己,也关系到其它元素的排列
线性探测:H+i (i=0,1,2,3,4...)
二测探测:H+i^2 (i=0,1,2,3,4...)
开链法:在每一个表格元素维护一个list 虽然是线性操作,但如果每个list够短,速度还是够快
二次探测实现:
//_Capacity()中new不出空间 #pragma once #include<string> #include<ostream> template<class K> struct HASHFUNC { size_t operator()(const K& key, size_t capacity) { return key%capacity; } }; template<> struct HASHFUNC<std::string> { size_t operator()(const std::string& s,int capacity) { const char* str = s.c_str(); return BKDRHash(str)%capacity; } static size_t BKDRHash(const char * str) { unsigned int seed = 131; // 31 131 1313 13131 131313 unsigned int hash = 0; while (*str) { hash = hash * seed + (*str++); } return (hash & 0x7FFFFFFF); } }; //hashtable的删除采用的是惰性删除,也就是只标记删除记号,实际删除操作则待表格重新整理时进行, //这是因为hashtable中每一个元素不仅表述它自己,也关系到其它元素的排列 enum Status { EMPTY, EXIST, DELETE }; template<class K, class V> struct KeyValue { KeyValue(const K key=K(), const V value=V()) :_key(key) ,_value(value) {} friend std::ostream& operator<<(std::ostream &_cout, const KeyValue<K, V>& kv); K _key; V _value; }; template<class K, class V, class COM = HASHFUNC<K>> class Hash { public: Hash(); bool Insert(K key, V value); KeyValue<K, V>* Find(K key); bool Remove(K key); void Print(); ~Hash(); //friend std::ostream& operator<<(std::ostream &_cout, const KeyValue<K, V>& kv); protected: void _Capacity() { const int _PrimeSize = 28; static const unsigned long _PrimeList[_PrimeSize] = { 53ul, 97ul, 193ul, 389ul, 769ul, 1543ul, 3079ul, 6151ul, 12289ul, 24593ul, 49157ul, 98317ul, 196613ul, 393241ul, 786433ul, 1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul, 1610612741ul, 3221225473ul, 4294967291ul }; //确定容量 size_t NewCapacity = _capacity; if (NewCapacity == 0){ NewCapacity = _PrimeList[0]; } else if (_size * 10 / NewCapacity >= 7){ for (int i = 0; i < _PrimeSize; ++i){ if (NewCapacity < _PrimeList[i]){ NewCapacity = _PrimeList[i]; break; } } } //把原表中的数据哈希到扩容后的表中 if ( NewCapacity != _capacity){ Status* NewStatus = new Status[NewCapacity]; KeyValue<K, V>* NewTables = new KeyValue<K, V>[NewCapacity];//************************new不出空间 memset(NewStatus, EMPTY, sizeof(NewStatus)*NewCapacity); if (_capacity != 0){ for (size_t index = 0; index < _capacity; ++index) { if (_status[index] == EXIST){ size_t i = HashFunc(_tables[index]._key, _capacity);//得到该关键字对应的位置 NewTables[i]._key = _tables[index]._key; NewTables[i]._value = _tables[index]._value; NewStatus[i] = EXIST; } } } std::swap(NewTables, _tables); std::swap(NewStatus, _status); _capacity = NewCapacity; } } size_t HashFunc(const K& key, size_t capacity) { return COM()(key, capacity); } size_t HashFunc2(const K& key,size_t capacity) { size_t i = 0; size_t res = HashFunc(key, capacity); while(_status[(res + (i*i)) % capacity] == EXIST){ i++; } return (res + (i*i)) % capacity; } int _Find(K key); protected: size_t _size; size_t _capacity; KeyValue<K,V>* _tables; Status* _status; //哈希表位置的状态 }; template<class K, class V, class COM = HASHFUNC<K>> Hash<K, V, COM >::Hash() : _size(0) , _capacity(0) , _tables(NULL) , _status(NULL) //, _tables(new KeyValue<K,V>[_capacity]) //, _status(new Status[_capacity]) { /*memset(_tables, NULL, sizeof(KeyValue<K,V>)*_capacity); memset(_status, EMPTY, sizeof(Status)*_capacity);*/ } template<class K, class V, class COM = HASHFUNC<K>> bool Hash<K, V, COM >::Insert(K key, V value) { _Capacity();//检查容量,扩容 size_t index = HashFunc2(key,_capacity);//**************** //while (_status[index]!=EMPTY){ // if (_tables[index]._key == key) // return false; // index++; //} //while (_status[index] == EXIST){ // index++; //} _tables[index]._key = key; _tables[index]._value = value; _status[index] = EXIST; ++_size; return true; } template<class K, class V, class COM = HASHFUNC<K>> int Hash<K, V, COM >::_Find(K key) { size_t k = HashFunc(key, _capacity); if (_tables[k]._key == key) return k; size_t i = 1; size_t index = (k + i*i) % _capacity; while (_tables[index]._key!=_tables[k]._key){ if (_status[index] == EXIST){ if (_tables[index]._key == key){ return index; } } i++; index = (k + i*i)%_capacity; } std::cout << "未找到key为" << key << "的数据" << std::endl; return -1; } template<class K, class V, class COM = HASHFUNC<K>> KeyValue<K, V >* Hash<K, V, COM >::Find(K key) { size_t index=_Find(key); if (index != -1){ std::cout << "已找到key为" << key << "的数据,下标为" << index << std::endl; return &_tables[index]; } return NULL; } template<class K, class V, class COM = HASHFUNC<K>> bool Hash<K, V, COM >::Remove(K key) { int index = _Find(key); if (index== -1) return false; _status[index] = DELETE; std::cout << "已删除key为"<<key<<"的数据" << std::endl; return true; } template<class K, class V, class COM = HASHFUNC<K>> void Hash<K, V, COM >::Print() { std::cout << "_tables:" << std::endl; for (size_t i = 0; i < _capacity; ++i){ if (_status[i] == EXIST){ std::cout << " _tables[" << i << "]-->" << _tables[i]._key<<","<<_tables[i]._value << std::endl; } } std::cout << std::endl; } template<class K, class V, class COM = HASHFUNC<K>> Hash<K, V, COM >::~Hash() { for (size_t i = 0; i < _capacity; ++i){ if (_status[i] == EXIST) _status[i] = DELETE; } delete[] _tables; delete[] _status; } template<class K, class V> std::ostream& operator<<(std::ostream &_cout, const KeyValue<K, V>& kv) { _cout <<kv._value << std::endl; return _cout; }
开链法实现:
#pragma once #include<vector> #include<string> template<class K> struct HashFuncDefault { size_t operator()(const K& key,const size_t& capacity) { return key%capacity; } }; template<> struct HashFuncDefault<std::string> { static size_t BKDRHash(const char * str) { unsigned int seed = 131; // 31 131 1313 13131 131313 unsigned int hash = 0; while (*str) { hash = hash * seed + (*str++); } return (hash & 0x7FFFFFFF); } size_t operator()(const std::string& key, const size_t& capacity) { const char* str = key.c_str(); return BKDRHash(str)%capacity; } }; template<class K,class V> struct KeyValue { KeyValue() :_key(0) , _value(0) , _next(NULL) {} KeyValue(const K& key,const V& value) :_key(key) , _value(value) , _next(NULL) {} K _key; V _value; KeyValue<K, V>* _next; }; template<class K, class V, class HashFunc = HashFuncDefault<K>> class HashTablesBucket { public: HashTablesBucket(); HashTablesBucket(const HashTablesBucket<K,V,HashFunc>& hst); HashTablesBucket<K,V,HashFunc>& operator=(const HashTablesBucket<K, V, HashFunc>& hst); bool Insert(const K& key, const V& value); KeyValue<K, V>* Find(const K& key); bool Remove(const K& key); void Print(); ~HashTablesBucket(); protected: void _Copy(const HashTablesBucket<K,V,HashFunc>& hsb) { _tables.resize(hsb._tables.size()); for (size_t i = 0; i < hsb._tables.size(); ++i){ KeyValue<K, V>*cur = hsb._tables[i]; while (cur){ KeyValue<K, V>* NewCur = new KeyValue<K, V>(cur->_key,cur->_value); NewCur->_next = _tables[i]; _tables[i]= NewCur; cur = cur->_next; } } } //检查容量 void _Capacity() { const int _PrimeSize = 28; static const unsigned long _PrimeList[_PrimeSize] = { 53ul, 97ul, 193ul, 389ul, 769ul, 1543ul, 3079ul, 6151ul, 12289ul, 24593ul, 49157ul, 98317ul, 196613ul, 393241ul, 786433ul, 1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul, 1610612741ul, 3221225473ul, 4294967291ul }; //扩容 size_t NewCapacity = _tables.size(); if (_size == _tables.size()){ for (size_t i = 0; i < _PrimeSize; ++i){ if (_tables.size()< _PrimeList[i]){ NewCapacity = _PrimeList[i]; break; } i++; } } //说明有扩容,需把原来的数据哈希到新的表中 HashTablesBucket<K, V, HashFunc> NewHTB; NewHTB._tables.resize(NewCapacity); if (NewCapacity != _tables.size()){ for(size_t i = 0; i < _tables.size(); ++i){ KeyValue<K, V>* cur = _tables[i]; while (cur){ size_t index=HashTablesBucketFunc(cur->_key); _tables[i] = cur->_next; NewHTB._tables[index]= cur; cur = _tables[i]; } } swap(NewHTB._tables, _tables); } } //返回下标 size_t HashTablesBucketFunc(const K& key) { return HashFunc()(key,_tables.size()); } //析构函数 void _Deatructor(KeyValue<K,V>* cur) { while (cur){ KeyValue<K, V>* tmp = cur; cur = cur->_next; delete tmp; tmp = NULL; } } protected: size_t _size; std::vector<KeyValue<K, V>*> _tables; }; template<class K, class V, class HashFunc> HashTablesBucket<K, V, HashFunc>::HashTablesBucket() :_size(0) {} //拷贝构造 template<class K, class V, class HashFunc> HashTablesBucket<K, V, HashFunc>::HashTablesBucket(const HashTablesBucket<K, V, HashFunc>& hst) : _size(hst._size) { _Copy(hst); } //赋值操作符的重载 template<class K, class V, class HashFunc> HashTablesBucket<K, V, HashFunc>& HashTablesBucket<K, V, HashFunc>::operator=(const HashTablesBucket<K, V, HashFunc>& hst) { if (this != &hst){ for (size_t i = 0; i < _tables.size(); ++i){ _Deatructor(_tables[i]); _tables[i] = NULL; } _size=hst._tables.size(); _Copy(hst); } return *this; } template<class K, class V, class HashFunc> bool HashTablesBucket<K, V, HashFunc>::Insert(const K& key, const V& value) { _Capacity(); size_t index = HashTablesBucketFunc(key);//二次探测 KeyValue<K, V>* cur = _tables[index]; while (cur) { if (cur->_key == key){ return false; } cur = cur->_next; } KeyValue<K, V>* kv = new KeyValue<K, V>(key, value); if (_tables[index]==NULL){ _tables[index] = kv; } else{ kv->_next = _tables[index]->_next; _tables[index]->_next = kv; } _size++; return true; } template<class K, class V, class HashFunc> KeyValue<K, V>* HashTablesBucket<K, V, HashFunc>::Find(const K& key) { size_t index = HashTablesBucketFunc(key); KeyValue<K, V>* cur = _tables[index]; while (cur){ if (cur->_key == key){ return cur; } cur = cur->_next; } return NULL; } template<class K, class V, class HashFunc> bool HashTablesBucket<K, V, HashFunc>::Remove(const K& key) { KeyValue<K, V>* tmp = Find(key); if (tmp == NULL){ std::cout << "未找到key为" << key << "的数据"<<std::endl; return false; } size_t index = HashTablesBucketFunc(key); KeyValue<K, V>* cur = _tables[index]; if (cur == NULL){ std::cout << "未找到key为" << key << "的数据" << std::endl; return false; } if (cur->_key == key){ _tables[index] = cur->_next; cur->_next = NULL; delete cur; cur = NULL; --_size; return true; } while (cur->_next){ if (cur->_next->_key == key){ KeyValue<K, V>* tmp = cur->_next; cur->_next = cur->_next->_next; tmp->_next = NULL; return true; } cur = cur->_next; } std::cout << "未找到key为" << key << "的数据" << std::endl; return false; } template<class K, class V, class HashFunc> void HashTablesBucket<K, V, HashFunc>::Print() { std::cout << "_tables:" << std::endl; for (size_t i = 0; i < _tables.size(); ++i){ KeyValue<K, V>* cur = _tables[i]; if (cur != NULL){ std::cout << " _tables[" << i << "]-->"; while (cur){ std::cout << cur->_key << "," << cur->_value << " "; cur = cur->_next; } std::cout << std::endl; } } } template<class K, class V, class HashFunc> HashTablesBucket<K, V, HashFunc>::~HashTablesBucket() { for (size_t i = 0; i < _tables.size(); ++i){ KeyValue<K, V>* cur = _tables[i]; _Deatructor(cur); _tables[i] = NULL; } }
本文出自 “零蛋蛋” 博客,谢绝转载!
标签:hashtable
原文地址:http://lingdandan.blog.51cto.com/10697032/1837451