标签:
c语言 <除法散列法> 高效 HashTable Dictionary ,不管集合大小,任意长度根据key查询都只是一次寻址左右,so 最快时间复杂度为O1!
先上代码,明天写原理注释!
HashDictionary.h
#define CM_STR_HASHFUNC_CONSTANT 31
#define KEYSIZE 40
struct Entry
{
int hashCode;
int next;
char key[KEYSIZE];
void* value;
};
struct HashDictionary
{
int* buckets;
Entry* entrys;
int bucketslength;
int entryslength;
int count;
int freeList;
int freeCount;
};
static int primes[] = {
3, 7, 11, 17, 23, 29, 37, 47, 59, 71, 89, 107, 131, 163, 197, 239, 293, 353, 431, 521, 631, 761, 919,
1103, 1327, 1597, 1931, 2333, 2801, 3371, 4049, 4861, 5839, 7013, 8419, 10103, 12143, 14591,
17519, 21023, 25229, 30293, 36353, 43627, 52361, 62851, 75431, 90523, 108631, 130363, 156437,
187751, 225307, 270371, 324449, 389357, 467237, 560689, 672827, 807403, 968897, 1162687, 1395263,
1674319, 2009191, 2411033, 2893249, 3471899, 4166287, 4999559, 5999471, 7199369};
//获取hash
int GetHashCode(char* str);
//是否质数
bool IsPrime(int candidate);
//获取>min最近的质数
int GetPrime(int min);
//初始化
void Initialize(HashDictionary* hd);
//初始化
void Initialize(HashDictionary* hd,int capacity);
//释放
void Disponse(HashDictionary* hd);
//重定义大小
void Resize(HashDictionary* hd);
//插入 add如果是true,并且key存在,则返回-1,否则替换新值
int Insert(HashDictionary* hd,char* key,void* value,bool add);
//找到下标 没有返回 -1
int FindEntryIndex(HashDictionary* hd,char* key);
//查找 没有返回null
Entry* FindEntry(HashDictionary* hd,char* key);
//删除 如果没有返回false
bool Remove(HashDictionary* hd,char* key);
//是否含有key
bool ContainsKey(HashDictionary* hd,char* key);
int GetCount(HashDictionary* hd);
void GetAllKey(HashDictionary* hd,char* allkey);
HashDictionary.cpp
#include "stdafx.h"
#include "HashDictionary.h"
#include <malloc.h>
#include <math.h>
#include <limits.h>
#include <string.h>
int GetHashCode(char* str)
{
int hashcode=0;
char *p;
for(p=str; *p; p++){
hashcode = hashcode*CM_STR_HASHFUNC_CONSTANT + *p;
}
return hashcode;
}
bool IsPrime(int candidate)
{
if ((candidate & 1) != 0)
{
int limit = (int)sqrt((double)candidate);
int divisor;
for (divisor = 3; divisor <= limit; divisor += 2)
{
if ((candidate % divisor) == 0)
return false;
}
return true;
}
return (candidate == 2);
}
int GetPrime(int min)
{
if (min < 0)
return 3;
int i;
int length=sizeof(primes)/sizeof(primes[0]);
for (i = 0; i < length; i++)
{
int prime = primes[i];
if (prime >= min)
return prime;
}
for (int i = (min | 3); i < INT_MAX;i+=2)
{
if (IsPrime(i))
return i;
}
return min;
}
void Initialize(HashDictionary** hd)
{
Initialize(*hd,3);
}
void Initialize(HashDictionary* hd,int capacity)
{
int size=GetPrime(capacity);
hd->buckets=(int*)malloc(sizeof(int)*size);
int i = 0;
for (; i < size; i++)
{
hd->buckets[i]=-1;
}
hd->bucketslength=size;
hd->entrys=(Entry*)malloc(sizeof(Entry)*size);
i = 0;
for (; i < size; i++)
{
hd->entrys[i].hashCode=-1;
hd->entrys[i].next=-1;
hd->entrys[i].value=0;
}
hd->entryslength=size;
hd->count=0;
hd->freeCount=0;
hd->freeList=-1;
};
void Disponse(HashDictionary* hd)
{
free(hd->buckets);
free(hd->entrys);
}
void Resize(HashDictionary* hd)
{
int newsize=hd->count*2;
int* newbuckets=(int*)malloc(sizeof(int)*newsize);
Entry* newentrys=(Entry*)malloc(sizeof(Entry)*newsize);
int i = 0;
for (; i < newsize; i++)
{
newbuckets[i]=-1;
}
for ( i = 0; i < newsize; i++)
{
newentrys[i].hashCode=-1;
newentrys[i].next=-1;
newentrys[i].value=0;
}
hd->bucketslength=hd->entryslength=newsize;
memcpy(newentrys,hd->entrys,sizeof(Entry)*hd->count);
free(hd->buckets);
free(hd->entrys);
hd->buckets=newbuckets;
hd->entrys=newentrys;
//重新排列
for (int i = 0; i < hd->count; i++) {
if (hd->entrys[i].hashCode >= 0) {
int bucket = hd->entrys[i].hashCode % hd->bucketslength;
hd->entrys[i].next = hd->buckets[bucket];
hd->buckets[bucket] = i;
}
}
}
int Insert(HashDictionary* hd,char* key,void* value,bool add)
{
int hashCode = GetHashCode(key) & 0x7FFFFFFF;
int targetBucket = hashCode % hd->bucketslength;
//new value
int i = hd->buckets[targetBucket];
for (i = hd->buckets[targetBucket]; i >= 0; i = hd->entrys[i].next) {
if (hd->entrys[i].hashCode == hashCode && strcmp(hd->entrys[i].key,key)==0) {
if (add) {
return -1;
}
hd->entrys[i].value = value;
return 2;
}
}
int index;
if (hd->freeCount > 0) {
index = hd->freeList;
hd->freeList =hd-> entrys[index].next;
hd->freeCount--;
}
else {
if (hd->count == hd->entryslength)
{
Resize(hd);
targetBucket = hashCode % hd->bucketslength;
}
index = hd->count;
hd->count++;
}
/*if( buckets[targetBucket]!=-1)
printf("碰撞");*/
hd->entrys[index].hashCode = hashCode;
hd->entrys[index].next = hd->buckets[targetBucket];
memcpy(hd->entrys[index].key,key,strlen(key)+1);
hd->entrys[index].value = value;
hd->buckets[targetBucket] = index;
return 1;
}
int FindEntryIndex(HashDictionary* hd,char* key)
{
int hashCode = GetHashCode(key) & 0x7FFFFFFF;
int i = hd->buckets[hashCode % hd->bucketslength];
for (; i >= 0; i = hd->entrys[i].next) {
//printf("%s find item \n",entrys[i].key);
if (hd->entrys[i].hashCode == hashCode && strcmp(hd->entrys[i].key,key)==0)
{
return i;
}
}
return -1;
}
Entry* FindEntry(HashDictionary* hd,char* key)
{
int index=FindEntryIndex(hd,key);
if(index>=0)
{
return &hd->entrys[index];
}
return 0;
}
bool Remove(HashDictionary* hd,char* key)
{
int hashCode = GetHashCode(key) & 0x7FFFFFFF;
int bucket = hashCode % hd->bucketslength;
int last = -1;
for (int i = hd->buckets[bucket]; i >= 0; last = i, i = hd->entrys[i].next) {
if (hd->entrys[i].hashCode == hashCode && strcmp(hd->entrys[i].key,key)==0) {
if (last < 0) {
hd->buckets[bucket] = hd->entrys[i].next;//如果第一个
}
else {
hd->entrys[last].next = hd->entrys[i].next;
}
hd->entrys[i].hashCode = -1;
hd->entrys[i].next = hd->freeList;//串连逻辑删除链表
memset(hd->entrys[i].key,0,20);
hd->entrys[i].value =0;
hd->freeList = i;
hd->freeCount++;
return true;
}
}
return false;
}
bool ContainsKey(HashDictionary* hd,char* key)
{
return FindEntryIndex(hd,key) >= 0;
}
int GetCount(HashDictionary* hd)
{
return hd->count-hd->freeCount;
}
void GetAllKey(HashDictionary* hd,char* allkey)
{
int length=(hd->count-hd->freeCount);
memset(allkey,0,KEYSIZE*length);
int bytelength=KEYSIZE*length;
for (int i = 0; i < hd->entryslength; i++)
{
if(hd->entrys[i].hashCode!=-1)
{
//strcat_s(allkey,bytelength,hd->entrys[i].key);
memcpy_s(allkey+i*KEYSIZE,bytelength,hd->entrys[i].key,KEYSIZE);
}
}
}
测试代码
// testDic.cpp : 定义控制台应用程序的入口点。
//
#include "stdafx.h"
#include "HashDictionary.h"
#include <malloc.h>
#include <Windows.h>
#include<time.h>
int _tmain(int argc, _TCHAR* argv[])
{
//dark 89757
HashDictionary* hashdic=(HashDictionary*)malloc(sizeof(HashDictionary));
Initialize(hashdic,3);//3 这个参数是默认数组初始大小,一般设置为=估计有多少条数据,小了也无所谓,是动态增长的
#pragma region 基本操作
Insert(hashdic,"abc","this abc value",true);
Insert(hashdic,"12341","this 12341 value~~~~",true);
Insert(hashdic,"name","国人自强不息!",true);
Insert(hashdic,"32.252","this abc value",true);
printf(" abc: %s \n",FindEntry(hashdic,"abc")->value);
Insert(hashdic,"abc","this NEW v~~~~",false);// <-false
printf(" abc: %s \n",FindEntry(hashdic,"abc")->value);
printf(" name: %s \n",FindEntry(hashdic,"name")->value);
int count=GetCount(hashdic);
printf("table条数:%d\n",count);
char* allkey=(char*)malloc(count*KEYSIZE);
GetAllKey(hashdic,allkey);//返回的结果是以KEYSIZE为单位长度的字符集合=》KEYSIZE+KEYSIZE+KEYSIZE+KEYSIZE
for (int i = 0; i < count; i++)
{
printf("key item:%s\n",allkey+i*KEYSIZE);//零食输出 应该是 i*KEYSIZE~(i+1)*KEYSIZE 这段才是一个key
}
#pragma endregion
Entry* find;
printf( "---------------------------------------------\n");
#pragma region 删除 插入时间检测
clock_t start, finish;
double duration;
char* value="这是共有的value~~~~";
start = clock();
int i = 0;
char tempkey[20];
for (; i < 1000000; i++)
{
_itoa_s(i,tempkey,10);
if(i==50000){
Insert(hashdic,tempkey,"this debug 5000",true);
}
Insert(hashdic,tempkey,value,true);
}
finish = clock();
duration = (double)(finish - start) / CLOCKS_PER_SEC;
printf( "插入hashtable 一百万条数:1000000 条! 消费时间: %f seconds\n", duration );
printf("table条数:%d\n",GetCount(hashdic));
Remove(hashdic,"100");
Remove(hashdic,"235");
Remove(hashdic,"888888");
//检测所有插入是否存在
for ( i = 0;i < 1000000; i++)
{
_itoa_s(i,tempkey,10);
find= FindEntry(hashdic,tempkey);
if(find==NULL)
printf("%s没有找到!!!\n",tempkey);
else
{
//printf( "%s\n", find->value ); 打印100万条很耗时
}
}
printf( "检查100万完成~\n");
#pragma endregion
printf( "---------------------------------------------\n");
start = clock();
char *testkey="50000";
find= FindEntry(hashdic,testkey);
printf("指定key查找: key:%s value:%s\n",testkey,find->value);
char *testkey2="50001";
find= FindEntry(hashdic,testkey2);
printf("指定key查找: key:%s value:%s\n",testkey2,find->value);
finish = clock();
duration = (double)(finish - start) / CLOCKS_PER_SEC;
printf( "查找两条耗时: %f seconds\n", duration );
printf( "---------------------------------------------\n");
start = clock();
Insert(hashdic,"汉字","this 汉字 的值",true);
find= FindEntry(hashdic,"汉字");
printf("指定key查找: key:%s value:%s\n","汉字",find->value);
duration = (double)(finish - start) / CLOCKS_PER_SEC;
printf( "插入+查找耗时: %f seconds\n", duration );
printf("table条数:%d\n",GetCount(hashdic));
system("pause");
return 0;
}
demo源码下载 地址1:链接: http://pan.baidu.com/s/1vstEu 密码: y5lq
demo源码下载 地址2:http://bcs.duapp.com/darkweb/testDic.zip
c语言 <除法散列法> 高效 HashTable Dictionary
标签:
原文地址:http://www.cnblogs.com/dark89757/p/4231222.html