c语言 <除法散列法> 高效 HashTable Dictionary

时间：2015-01-17 23:26:52 阅读：486 评论：0 收藏：0 [点我收藏+]

标签：

c语言 <除法散列法> 高效 HashTable Dictionary ，不管集合大小，任意长度根据key查询都只是一次寻址左右，so 最快时间复杂度为O1！

先上代码，明天写原理注释！

HashDictionary.h

#define CM_STR_HASHFUNC_CONSTANT 31
#define KEYSIZE 40

struct Entry
{
	int hashCode;
	int next;
	char key[KEYSIZE];
	void* value;
};

struct HashDictionary
{
	int* buckets;
	Entry* entrys;
	int bucketslength;
	int entryslength;
	int count;
	int freeList;
	int freeCount;
};

static int primes[] = {
	3, 7, 11, 17, 23, 29, 37, 47, 59, 71, 89, 107, 131, 163, 197, 239, 293, 353, 431, 521, 631, 761, 919,
	1103, 1327, 1597, 1931, 2333, 2801, 3371, 4049, 4861, 5839, 7013, 8419, 10103, 12143, 14591,
	17519, 21023, 25229, 30293, 36353, 43627, 52361, 62851, 75431, 90523, 108631, 130363, 156437,
	187751, 225307, 270371, 324449, 389357, 467237, 560689, 672827, 807403, 968897, 1162687, 1395263,
	1674319, 2009191, 2411033, 2893249, 3471899, 4166287, 4999559, 5999471, 7199369};



//获取hash
int GetHashCode(char* str);

//是否质数
bool IsPrime(int candidate);

//获取>min最近的质数
int GetPrime(int min);

//初始化
void Initialize(HashDictionary* hd);

//初始化
void Initialize(HashDictionary* hd,int capacity);

//释放
void Disponse(HashDictionary* hd);

//重定义大小
void Resize(HashDictionary* hd);

//插入 add如果是true，并且key存在，则返回-1，否则替换新值 
int Insert(HashDictionary* hd,char* key,void* value,bool add);

//找到下标 没有返回 -1
int FindEntryIndex(HashDictionary* hd,char* key);

//查找 没有返回null
Entry* FindEntry(HashDictionary* hd,char* key);

//删除 如果没有返回false
bool Remove(HashDictionary* hd,char* key);

//是否含有key
bool ContainsKey(HashDictionary* hd,char* key);

int GetCount(HashDictionary* hd);

void GetAllKey(HashDictionary* hd,char* allkey);

HashDictionary.cpp

#include "stdafx.h"
#include "HashDictionary.h"
#include <malloc.h>
#include <math.h>
#include <limits.h>
#include <string.h>

int GetHashCode(char* str)
{
	int hashcode=0;
	char *p;
	for(p=str; *p; p++){
		hashcode = hashcode*CM_STR_HASHFUNC_CONSTANT + *p;
	}
	return hashcode;
}

bool IsPrime(int candidate)
{
	if ((candidate & 1) != 0) 
	{
		int limit = (int)sqrt((double)candidate);
		int divisor;
		for (divisor = 3; divisor <= limit; divisor += 2) 
		{
			if ((candidate % divisor) == 0)
				return false;
		}
		return true;
	}
	return (candidate == 2); 
}

int GetPrime(int min) 
{
	if (min < 0)
		return 3;
	int i;
	int length=sizeof(primes)/sizeof(primes[0]);
	for (i = 0; i < length; i++) 
	{
		int prime = primes[i];
		if (prime >= min)
			return prime;
	}
	for (int i = (min | 3); i < INT_MAX;i+=2) 
	{
		if (IsPrime(i))
			return i;
	}
	return min;
}

void Initialize(HashDictionary** hd)
{
	Initialize(*hd,3);
}

void Initialize(HashDictionary* hd,int capacity)
{
	int size=GetPrime(capacity);
	hd->buckets=(int*)malloc(sizeof(int)*size);

	int i = 0;
	for (; i < size; i++)
	{
		hd->buckets[i]=-1;
	}
	hd->bucketslength=size;
	hd->entrys=(Entry*)malloc(sizeof(Entry)*size);
	i = 0;
	for (; i < size; i++)
	{
		hd->entrys[i].hashCode=-1;
		hd->entrys[i].next=-1;
		hd->entrys[i].value=0;
	}
	hd->entryslength=size;

	hd->count=0;
	hd->freeCount=0;
	hd->freeList=-1;
};

void Disponse(HashDictionary* hd)
{
	free(hd->buckets);
	free(hd->entrys);
}

void Resize(HashDictionary* hd)
{
	int newsize=hd->count*2;
	int* newbuckets=(int*)malloc(sizeof(int)*newsize);
	Entry* newentrys=(Entry*)malloc(sizeof(Entry)*newsize);

	int i = 0;
	for (; i < newsize; i++)
	{
		newbuckets[i]=-1;
	}
	for ( i = 0; i < newsize; i++)
	{
		newentrys[i].hashCode=-1;
		newentrys[i].next=-1;
		newentrys[i].value=0;
	}

	hd->bucketslength=hd->entryslength=newsize;

	memcpy(newentrys,hd->entrys,sizeof(Entry)*hd->count);

	free(hd->buckets);
	free(hd->entrys);

	hd->buckets=newbuckets;
	hd->entrys=newentrys;

	//重新排列
	for (int i = 0; i < hd->count; i++) {
		if (hd->entrys[i].hashCode >= 0) {
			int bucket = hd->entrys[i].hashCode % hd->bucketslength;
			hd->entrys[i].next = hd->buckets[bucket];
			hd->buckets[bucket] = i;
		}
	}
}

int Insert(HashDictionary* hd,char* key,void* value,bool add)
{
	int hashCode = GetHashCode(key) & 0x7FFFFFFF;
	int targetBucket = hashCode % hd->bucketslength;
	//new value
	int i = hd->buckets[targetBucket];
	for (i = hd->buckets[targetBucket]; i >= 0; i = hd->entrys[i].next) {
		if (hd->entrys[i].hashCode == hashCode && strcmp(hd->entrys[i].key,key)==0) {
			if (add) { 
				return -1;
			}
			hd->entrys[i].value = value;
			return 2;
		}
	}
	int index;
	if (hd->freeCount > 0) {
		index = hd->freeList;
		hd->freeList =hd-> entrys[index].next;
		hd->freeCount--;
	}
	else {
		if (hd->count == hd->entryslength)
		{
			Resize(hd);
			targetBucket = hashCode % hd->bucketslength;
		}
		index = hd->count;
		hd->count++;
	}
	/*if( buckets[targetBucket]!=-1)
	printf("碰撞");*/
	hd->entrys[index].hashCode = hashCode;
	hd->entrys[index].next = hd->buckets[targetBucket];
	memcpy(hd->entrys[index].key,key,strlen(key)+1);
	hd->entrys[index].value = value;
	hd->buckets[targetBucket] = index;
	return 1;
}

int FindEntryIndex(HashDictionary* hd,char* key)
{ 
	int hashCode = GetHashCode(key) & 0x7FFFFFFF;
	int i = hd->buckets[hashCode % hd->bucketslength];
	for (; i >= 0; i = hd->entrys[i].next) {
		//printf("%s find item \n",entrys[i].key);
		if (hd->entrys[i].hashCode == hashCode &&  strcmp(hd->entrys[i].key,key)==0) 
		{	
			return i;
		}
	}
	return -1;
}

Entry* FindEntry(HashDictionary* hd,char* key)
{ 
	int index=FindEntryIndex(hd,key);
	if(index>=0)
	{
		return &hd->entrys[index];
	}
	return 0;
}

bool Remove(HashDictionary* hd,char* key) 
{
	int hashCode = GetHashCode(key) & 0x7FFFFFFF;
	int bucket = hashCode % hd->bucketslength;
	int last = -1;
	for (int i = hd->buckets[bucket]; i >= 0; last = i, i = hd->entrys[i].next) {
		if (hd->entrys[i].hashCode == hashCode && strcmp(hd->entrys[i].key,key)==0) {
			if (last < 0) {
				hd->buckets[bucket] = hd->entrys[i].next;//如果第一个
			}
			else {
				hd->entrys[last].next = hd->entrys[i].next;
			}
			hd->entrys[i].hashCode = -1;
			hd->entrys[i].next = hd->freeList;//串连逻辑删除链表
			memset(hd->entrys[i].key,0,20);
			hd->entrys[i].value =0;
			hd->freeList = i;
			hd->freeCount++;
			return true;
		}
	}
		return false;

}

bool ContainsKey(HashDictionary* hd,char* key) 
{
	return FindEntryIndex(hd,key) >= 0;
}

int GetCount(HashDictionary* hd)
{
	return hd->count-hd->freeCount;
}

void GetAllKey(HashDictionary* hd,char* allkey)
{
	
	int length=(hd->count-hd->freeCount);
	memset(allkey,0,KEYSIZE*length);
	int bytelength=KEYSIZE*length;
	for (int i = 0; i < hd->entryslength; i++)
	{
		if(hd->entrys[i].hashCode!=-1)
		{
			//strcat_s(allkey,bytelength,hd->entrys[i].key);
			memcpy_s(allkey+i*KEYSIZE,bytelength,hd->entrys[i].key,KEYSIZE);
		}
	}

}

测试代码

// testDic.cpp : 定义控制台应用程序的入口点。
//

#include "stdafx.h"
#include "HashDictionary.h"
#include <malloc.h>
#include <Windows.h>
#include<time.h>
int _tmain(int argc, _TCHAR* argv[])
{
	//dark 89757


	HashDictionary* hashdic=(HashDictionary*)malloc(sizeof(HashDictionary));

	Initialize(hashdic,3);//3 这个参数是默认数组初始大小，一般设置为=估计有多少条数据，小了也无所谓，是动态增长的

#pragma region 基本操作

	Insert(hashdic,"abc","this abc value",true);

	Insert(hashdic,"12341","this 12341 value~~~~",true);

	Insert(hashdic,"name","国人自强不息！",true);

	Insert(hashdic,"32.252","this abc value",true);

	printf(" abc: %s \n",FindEntry(hashdic,"abc")->value);
	Insert(hashdic,"abc","this NEW v~~~~",false);// <-false
	printf(" abc: %s \n",FindEntry(hashdic,"abc")->value);

	printf(" name: %s \n",FindEntry(hashdic,"name")->value);  

	int count=GetCount(hashdic);
	printf("table条数:%d\n",count);
	char* allkey=(char*)malloc(count*KEYSIZE);
	GetAllKey(hashdic,allkey);//返回的结果是以KEYSIZE为单位长度的字符集合=》KEYSIZE+KEYSIZE+KEYSIZE+KEYSIZE

	for (int i = 0; i < count; i++)
	{
		printf("key item:%s\n",allkey+i*KEYSIZE);//零食输出 应该是   i*KEYSIZE~（i+1）*KEYSIZE 这段才是一个key
	}


#pragma endregion

	Entry* find;
	printf( "---------------------------------------------\n");
#pragma region 删除 插入时间检测
	clock_t start, finish;  
	double  duration; 

	char* value="这是共有的value~~~~";
	start = clock();  
	int i = 0;
	char tempkey[20];
	for (; i < 1000000; i++)
	{
		_itoa_s(i,tempkey,10);
		if(i==50000){
			Insert(hashdic,tempkey,"this debug 5000",true);
		}
		Insert(hashdic,tempkey,value,true);
	}

	finish = clock();  
	duration = (double)(finish - start) / CLOCKS_PER_SEC;  
	printf( "插入hashtable 一百万条数：1000000 条！ 消费时间: %f seconds\n", duration ); 
	printf("table条数:%d\n",GetCount(hashdic));

	Remove(hashdic,"100");
	Remove(hashdic,"235");
	Remove(hashdic,"888888");

	//检测所有插入是否存在
	for ( i = 0;i < 1000000; i++)
	{
		_itoa_s(i,tempkey,10);
		find= FindEntry(hashdic,tempkey);
		if(find==NULL)
			printf("%s没有找到!!!\n",tempkey);
		else
		{
			//printf( "%s\n", find->value ); 打印100万条很耗时
		}
	}
	printf( "检查100万完成~\n");

#pragma endregion

	printf( "---------------------------------------------\n");
	start = clock();
	char *testkey="50000";
	find= FindEntry(hashdic,testkey);
	printf("指定key查找: key:%s  value:%s\n",testkey,find->value);
	char *testkey2="50001";
	find= FindEntry(hashdic,testkey2);
	printf("指定key查找:  key:%s  value:%s\n",testkey2,find->value);
	finish = clock();  

	duration = (double)(finish - start) / CLOCKS_PER_SEC;  
	printf( "查找两条耗时: %f seconds\n", duration );  

	printf( "---------------------------------------------\n");

	start = clock();
	Insert(hashdic,"汉字","this 汉字 的值",true);
	find= FindEntry(hashdic,"汉字");
	printf("指定key查找: key:%s  value:%s\n","汉字",find->value);

	duration = (double)(finish - start) / CLOCKS_PER_SEC;  
	printf( "插入+查找耗时: %f seconds\n", duration );  

	printf("table条数:%d\n",GetCount(hashdic));

	system("pause");
	return 0;
}

demo源码下载地址1：链接: http://pan.baidu.com/s/1vstEu 密码: y5lq

demo源码下载地址2:http://bcs.duapp.com/darkweb/testDic.zip

c语言 <除法散列法> 高效 HashTable Dictionary

标签：

原文地址：http://www.cnblogs.com/dark89757/p/4231222.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行