标签:
c语言 <除法散列法> 高效 HashTable Dictionary ,不管集合大小,任意长度根据key查询都只是一次寻址左右,so 最快时间复杂度为O1!
先上代码,明天写原理注释!
HashDictionary.h
#define CM_STR_HASHFUNC_CONSTANT 31 #define KEYSIZE 40 struct Entry { int hashCode; int next; char key[KEYSIZE]; void* value; }; struct HashDictionary { int* buckets; Entry* entrys; int bucketslength; int entryslength; int count; int freeList; int freeCount; }; static int primes[] = { 3, 7, 11, 17, 23, 29, 37, 47, 59, 71, 89, 107, 131, 163, 197, 239, 293, 353, 431, 521, 631, 761, 919, 1103, 1327, 1597, 1931, 2333, 2801, 3371, 4049, 4861, 5839, 7013, 8419, 10103, 12143, 14591, 17519, 21023, 25229, 30293, 36353, 43627, 52361, 62851, 75431, 90523, 108631, 130363, 156437, 187751, 225307, 270371, 324449, 389357, 467237, 560689, 672827, 807403, 968897, 1162687, 1395263, 1674319, 2009191, 2411033, 2893249, 3471899, 4166287, 4999559, 5999471, 7199369}; //获取hash int GetHashCode(char* str); //是否质数 bool IsPrime(int candidate); //获取>min最近的质数 int GetPrime(int min); //初始化 void Initialize(HashDictionary* hd); //初始化 void Initialize(HashDictionary* hd,int capacity); //释放 void Disponse(HashDictionary* hd); //重定义大小 void Resize(HashDictionary* hd); //插入 add如果是true,并且key存在,则返回-1,否则替换新值 int Insert(HashDictionary* hd,char* key,void* value,bool add); //找到下标 没有返回 -1 int FindEntryIndex(HashDictionary* hd,char* key); //查找 没有返回null Entry* FindEntry(HashDictionary* hd,char* key); //删除 如果没有返回false bool Remove(HashDictionary* hd,char* key); //是否含有key bool ContainsKey(HashDictionary* hd,char* key); int GetCount(HashDictionary* hd); void GetAllKey(HashDictionary* hd,char* allkey);
HashDictionary.cpp
#include "stdafx.h" #include "HashDictionary.h" #include <malloc.h> #include <math.h> #include <limits.h> #include <string.h> int GetHashCode(char* str) { int hashcode=0; char *p; for(p=str; *p; p++){ hashcode = hashcode*CM_STR_HASHFUNC_CONSTANT + *p; } return hashcode; } bool IsPrime(int candidate) { if ((candidate & 1) != 0) { int limit = (int)sqrt((double)candidate); int divisor; for (divisor = 3; divisor <= limit; divisor += 2) { if ((candidate % divisor) == 0) return false; } return true; } return (candidate == 2); } int GetPrime(int min) { if (min < 0) return 3; int i; int length=sizeof(primes)/sizeof(primes[0]); for (i = 0; i < length; i++) { int prime = primes[i]; if (prime >= min) return prime; } for (int i = (min | 3); i < INT_MAX;i+=2) { if (IsPrime(i)) return i; } return min; } void Initialize(HashDictionary** hd) { Initialize(*hd,3); } void Initialize(HashDictionary* hd,int capacity) { int size=GetPrime(capacity); hd->buckets=(int*)malloc(sizeof(int)*size); int i = 0; for (; i < size; i++) { hd->buckets[i]=-1; } hd->bucketslength=size; hd->entrys=(Entry*)malloc(sizeof(Entry)*size); i = 0; for (; i < size; i++) { hd->entrys[i].hashCode=-1; hd->entrys[i].next=-1; hd->entrys[i].value=0; } hd->entryslength=size; hd->count=0; hd->freeCount=0; hd->freeList=-1; }; void Disponse(HashDictionary* hd) { free(hd->buckets); free(hd->entrys); } void Resize(HashDictionary* hd) { int newsize=hd->count*2; int* newbuckets=(int*)malloc(sizeof(int)*newsize); Entry* newentrys=(Entry*)malloc(sizeof(Entry)*newsize); int i = 0; for (; i < newsize; i++) { newbuckets[i]=-1; } for ( i = 0; i < newsize; i++) { newentrys[i].hashCode=-1; newentrys[i].next=-1; newentrys[i].value=0; } hd->bucketslength=hd->entryslength=newsize; memcpy(newentrys,hd->entrys,sizeof(Entry)*hd->count); free(hd->buckets); free(hd->entrys); hd->buckets=newbuckets; hd->entrys=newentrys; //重新排列 for (int i = 0; i < hd->count; i++) { if (hd->entrys[i].hashCode >= 0) { int bucket = hd->entrys[i].hashCode % hd->bucketslength; hd->entrys[i].next = hd->buckets[bucket]; hd->buckets[bucket] = i; } } } int Insert(HashDictionary* hd,char* key,void* value,bool add) { int hashCode = GetHashCode(key) & 0x7FFFFFFF; int targetBucket = hashCode % hd->bucketslength; //new value int i = hd->buckets[targetBucket]; for (i = hd->buckets[targetBucket]; i >= 0; i = hd->entrys[i].next) { if (hd->entrys[i].hashCode == hashCode && strcmp(hd->entrys[i].key,key)==0) { if (add) { return -1; } hd->entrys[i].value = value; return 2; } } int index; if (hd->freeCount > 0) { index = hd->freeList; hd->freeList =hd-> entrys[index].next; hd->freeCount--; } else { if (hd->count == hd->entryslength) { Resize(hd); targetBucket = hashCode % hd->bucketslength; } index = hd->count; hd->count++; } /*if( buckets[targetBucket]!=-1) printf("碰撞");*/ hd->entrys[index].hashCode = hashCode; hd->entrys[index].next = hd->buckets[targetBucket]; memcpy(hd->entrys[index].key,key,strlen(key)+1); hd->entrys[index].value = value; hd->buckets[targetBucket] = index; return 1; } int FindEntryIndex(HashDictionary* hd,char* key) { int hashCode = GetHashCode(key) & 0x7FFFFFFF; int i = hd->buckets[hashCode % hd->bucketslength]; for (; i >= 0; i = hd->entrys[i].next) { //printf("%s find item \n",entrys[i].key); if (hd->entrys[i].hashCode == hashCode && strcmp(hd->entrys[i].key,key)==0) { return i; } } return -1; } Entry* FindEntry(HashDictionary* hd,char* key) { int index=FindEntryIndex(hd,key); if(index>=0) { return &hd->entrys[index]; } return 0; } bool Remove(HashDictionary* hd,char* key) { int hashCode = GetHashCode(key) & 0x7FFFFFFF; int bucket = hashCode % hd->bucketslength; int last = -1; for (int i = hd->buckets[bucket]; i >= 0; last = i, i = hd->entrys[i].next) { if (hd->entrys[i].hashCode == hashCode && strcmp(hd->entrys[i].key,key)==0) { if (last < 0) { hd->buckets[bucket] = hd->entrys[i].next;//如果第一个 } else { hd->entrys[last].next = hd->entrys[i].next; } hd->entrys[i].hashCode = -1; hd->entrys[i].next = hd->freeList;//串连逻辑删除链表 memset(hd->entrys[i].key,0,20); hd->entrys[i].value =0; hd->freeList = i; hd->freeCount++; return true; } } return false; } bool ContainsKey(HashDictionary* hd,char* key) { return FindEntryIndex(hd,key) >= 0; } int GetCount(HashDictionary* hd) { return hd->count-hd->freeCount; } void GetAllKey(HashDictionary* hd,char* allkey) { int length=(hd->count-hd->freeCount); memset(allkey,0,KEYSIZE*length); int bytelength=KEYSIZE*length; for (int i = 0; i < hd->entryslength; i++) { if(hd->entrys[i].hashCode!=-1) { //strcat_s(allkey,bytelength,hd->entrys[i].key); memcpy_s(allkey+i*KEYSIZE,bytelength,hd->entrys[i].key,KEYSIZE); } } }
测试代码
// testDic.cpp : 定义控制台应用程序的入口点。 // #include "stdafx.h" #include "HashDictionary.h" #include <malloc.h> #include <Windows.h> #include<time.h> int _tmain(int argc, _TCHAR* argv[]) { //dark 89757 HashDictionary* hashdic=(HashDictionary*)malloc(sizeof(HashDictionary)); Initialize(hashdic,3);//3 这个参数是默认数组初始大小,一般设置为=估计有多少条数据,小了也无所谓,是动态增长的 #pragma region 基本操作 Insert(hashdic,"abc","this abc value",true); Insert(hashdic,"12341","this 12341 value~~~~",true); Insert(hashdic,"name","国人自强不息!",true); Insert(hashdic,"32.252","this abc value",true); printf(" abc: %s \n",FindEntry(hashdic,"abc")->value); Insert(hashdic,"abc","this NEW v~~~~",false);// <-false printf(" abc: %s \n",FindEntry(hashdic,"abc")->value); printf(" name: %s \n",FindEntry(hashdic,"name")->value); int count=GetCount(hashdic); printf("table条数:%d\n",count); char* allkey=(char*)malloc(count*KEYSIZE); GetAllKey(hashdic,allkey);//返回的结果是以KEYSIZE为单位长度的字符集合=》KEYSIZE+KEYSIZE+KEYSIZE+KEYSIZE for (int i = 0; i < count; i++) { printf("key item:%s\n",allkey+i*KEYSIZE);//零食输出 应该是 i*KEYSIZE~(i+1)*KEYSIZE 这段才是一个key } #pragma endregion Entry* find; printf( "---------------------------------------------\n"); #pragma region 删除 插入时间检测 clock_t start, finish; double duration; char* value="这是共有的value~~~~"; start = clock(); int i = 0; char tempkey[20]; for (; i < 1000000; i++) { _itoa_s(i,tempkey,10); if(i==50000){ Insert(hashdic,tempkey,"this debug 5000",true); } Insert(hashdic,tempkey,value,true); } finish = clock(); duration = (double)(finish - start) / CLOCKS_PER_SEC; printf( "插入hashtable 一百万条数:1000000 条! 消费时间: %f seconds\n", duration ); printf("table条数:%d\n",GetCount(hashdic)); Remove(hashdic,"100"); Remove(hashdic,"235"); Remove(hashdic,"888888"); //检测所有插入是否存在 for ( i = 0;i < 1000000; i++) { _itoa_s(i,tempkey,10); find= FindEntry(hashdic,tempkey); if(find==NULL) printf("%s没有找到!!!\n",tempkey); else { //printf( "%s\n", find->value ); 打印100万条很耗时 } } printf( "检查100万完成~\n"); #pragma endregion printf( "---------------------------------------------\n"); start = clock(); char *testkey="50000"; find= FindEntry(hashdic,testkey); printf("指定key查找: key:%s value:%s\n",testkey,find->value); char *testkey2="50001"; find= FindEntry(hashdic,testkey2); printf("指定key查找: key:%s value:%s\n",testkey2,find->value); finish = clock(); duration = (double)(finish - start) / CLOCKS_PER_SEC; printf( "查找两条耗时: %f seconds\n", duration ); printf( "---------------------------------------------\n"); start = clock(); Insert(hashdic,"汉字","this 汉字 的值",true); find= FindEntry(hashdic,"汉字"); printf("指定key查找: key:%s value:%s\n","汉字",find->value); duration = (double)(finish - start) / CLOCKS_PER_SEC; printf( "插入+查找耗时: %f seconds\n", duration ); printf("table条数:%d\n",GetCount(hashdic)); system("pause"); return 0; }
demo源码下载 地址1:链接: http://pan.baidu.com/s/1vstEu 密码: y5lq
demo源码下载 地址2:http://bcs.duapp.com/darkweb/testDic.zip
c语言 <除法散列法> 高效 HashTable Dictionary
标签:
原文地址:http://www.cnblogs.com/dark89757/p/4231222.html