码迷,mamicode.com
首页 > 其他好文 > 详细

海量字符串查找——bloom filter,c

时间:2016-06-13 19:02:21      阅读:322      评论:0      收藏:0      [点我收藏+]

标签:

对于海量字符串的查找,一般有两种方法,一种是建树,还有一种就是bf算法,即布隆过滤器,这个从原来上讲比较简单,也易于实现,主要就是根据哈希算法来实现。
对于海量字符串的查找,一般有两种方法,一种是建树,还有一种就是bf算法,即布隆过滤器,这个从原来上讲比较简单,也易于实现,主要就是根据哈希算法来实现。
int len(char *ch)
{
        int m=0;
        while(ch[m]!=‘\0‘) {
            m++;
        }
        return m;
}


bool judge(char *vertor,char ch[]){
    if (GETBIT(vertor,RSHash(ch,len(ch)))==0) return false;
    if (GETBIT(vertor,JSHash(ch,len(ch)))==0) return false;
    if (GETBIT(vertor,PJWHash(ch,len(ch)))==0) return false;
    if (GETBIT(vertor,ELFHash(ch,len(ch)))==0) return false;
    if (GETBIT(vertor,BKDRHash(ch,len(ch)))==0)    return false;
    if (GETBIT(vertor,SDBMHash(ch,len(ch)))==0)    return false;
    if (GETBIT(vertor,DJBHash(ch,len(ch)))==0) return false;
    if (GETBIT(vertor,DEKHash(ch,len(ch)))==0) return false;
    if (GETBIT(vertor,BPHash(ch,len(ch)))==0)  return false;
    if (GETBIT(vertor,FNVHash(ch,len(ch)))==0) return false;
    if (GETBIT(vertor,APHash(ch,len(ch)))==0)  return false;
        else
            return true;
 
   }

int main(int argc,char *argv[]){
    argv[1]="/Users/wangzunwen/emaillist.dat";
    argv[2]="/Users/wangzunwen/checklist.dat";
    argv[3]="/Users/wangzunwen/result2222.dat";
//    argv[1]="/Users/wangzunwen/23333.dat";
//    argv[2]="/Users/wangzunwen/text.dat";
//    argv[3]="/Users/wangzunwen/result.txt";
    clock_t  a=clock();
    int pos=1,k=0,j=0;
    FILE *fp_strpool,*fp_checkedstr,*fp_result;
    fp_strpool=fopen(argv[1], "r");//打开三个文件
    fp_checkedstr=fopen(argv[2], "r");
    fp_result=fopen(argv[3], "w");
    char ch[ARRAY_SIZE];
    char *vertor;
    char yes[5]="yes\n";
    char no[4]="no\n";
    vertor=(char *)calloc(SIZE , sizeof(char) );//申请位数组
    for (int i=0; i<SIZE; i++) {
        vertor[i]=0;
    }
        while (fscanf(fp_strpool, "%s",ch)==1) {
        //fgets(ch, ARRAY_SIZE, fp_strpool);
        SETBIT(vertor, RSHash(ch,len(ch)));
        SETBIT(vertor, JSHash(ch,len(ch)));
        SETBIT(vertor, PJWHash(ch,len(ch)));
        SETBIT(vertor, ELFHash(ch,len(ch)));
        SETBIT(vertor, BKDRHash(ch,len(ch)));
        SETBIT(vertor, SDBMHash(ch,len(ch)));
        SETBIT(vertor, DJBHash(ch,len(ch)));
        SETBIT(vertor, DEKHash(ch,len(ch)));
        SETBIT(vertor, BPHash(ch,len(ch)));
        SETBIT(vertor, FNVHash(ch,len(ch)));
        SETBIT(vertor, APHash(ch,len(ch)));
            j++;
            
    }
    while (fscanf(fp_checkedstr, "%s",ch)==1) {
        k++;
        //fgets(ch, ARRAY_SIZE, fp_checkedstr);
        if (judge(vertor,ch)) {
            printf("%d\n",pos);
            pos++;
            fputs(yes, fp_result);
        }
        else
            fputs(no, fp_result);
    }
    printf("%d %d\n",j,k);
    fclose(fp_result);
    fclose(fp_checkedstr);
    fclose(fp_strpool);//关闭文件
    clock_t  b=clock();
    double duration = (double)(b - a) / CLOCKS_PER_SEC;
    printf( "%f seconds\n", duration );
}

 

 

 
 
 
 

海量字符串查找——bloom filter,c

标签:

原文地址:http://www.cnblogs.com/xiaoba1203/p/5581467.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!