标签:
最近项目的计算节点用的最多的是一个笛卡尔乘积算法,由于字典集合比较多,总高度有时会达到很高的一个量级,例如:1G。这样笛卡尔算法所 耗费的时间将很大,如何提交笛卡尔算法效率已经是当务之急。
笛卡尔乘积的算法很简单,复杂度就摆在这里O(high*n) :n指字典集合个数,high是所有集合的高度乘积。复杂度没法降低,所以我们从循环里面的运算下手,尽量降低循环内的计算,性能有所提升,下面是老版笛卡尔和新版笛卡尔的代码:
新版: 对比旧版,减少了high*(maxHigh-1)*2*n次除法,效率有所提高
int DICJob::descarte(DICComputationTask* dicTask,map<string,charset_mem*>& dicMap,uint8 segSIndex,uint8 segEIndex,uint8** resData,uint64* resHigh,int* reWidth){ #define MAX_WORD_LEN 64 cout<<"begin descarte!"<<endl; uint64 i,j,k;// 循环变量 // maxDicHigh为所有字典库高度最大值, maxDicInx为最高字典库的下标 (均要除去star字典库) // maxDicOffset为最高字典库在表达式口令中的偏移量, expressionLen为表达式口令长度 uint64 maxDicHigh = 0, maxDicInx = 0, maxDicOffset = 0, expressionLen = 0; // highProduct为所有字典库高度乘积 (除去star字典库) // outerIter为外层迭代次数, innerIter为内层迭代次数 uint64 highProduct = 1, outerIter, innerIter; charset_mem* dicCharsetArray[MAX_WORD_LEN]={NULL}; for(int i=segSIndex; i<=segEIndex; i++){ map<string,charset_mem*>::iterator iterDic=dicMap.find(dicTask->cn_re.dicName[i]); if(iterDic==dicMap.end()){ cout<<dicTask->cn_re.dicName[i]<<" do not exist"<<endl; LOG(ERROR)<<dicTask->cn_re.dicName[i]<<" do not exist"; return -1; } int temHigh=dicTask->cn_re.segInfo[i].end-dicTask->cn_re.segInfo[i].begin+1; if(temHigh > maxDicHigh) { maxDicHigh = temHigh; maxDicInx = i; } dicCharsetArray[i-segSIndex]=iterDic->second; highProduct *= temHigh; expressionLen += iterDic->second->width; } uint8 *result = new uint8[highProduct*expressionLen]; memset(result,0,highProduct*expressionLen); // 每条口令后添加一个换行符,所以表达式宽度需要加1 //expressionLen += 1; // 求最高字典库在表达式口令中的偏移量 for(i = 0; i < maxDicInx; i++) { map<string,charset_mem*>::iterator iterDic=dicMap.find(dicTask->cn_re.dicName[i]); if(iterDic==dicMap.end()){ cout<<dicTask->cn_re.dicName[i]<<" do not exist"<<endl; LOG(ERROR)<<dicTask->cn_re.dicName[i]<<" do not exist"; return -1; } maxDicOffset += iterDic->second->width; } cout<<"descarte 11!"<<endl; // 内层迭代次数为最高字典库高度 // 外层迭代次数为除了star位和最高字典库的高度乘积 innerIter = maxDicHigh; outerIter = highProduct / maxDicHigh; // 外层循环,程序热点(hotspot) int thread_num = omp_get_max_threads(); #pragma omp parallel for num_threads(thread_num) shared(result) private(i, j, k) for(i = 0; i < outerIter; i++) { // star字典库字段设为0 // 构造外层模板 uint32 memOffset = 0; uint8 outerPwdTemplate[expressionLen]; uint8 outerPwdTemplate_32[32 * expressionLen]; memset(outerPwdTemplate, 0, expressionLen * sizeof(uint8)); //outerPwdTemplate[expressionLen - 1] = '\n'; // 口令后面添加一个换行 memset(outerPwdTemplate_32, 0, 32 * expressionLen * sizeof(uint8)); uint32 remainder , quotient = i; // 除数和余数 for(int k=segSIndex;k<=segEIndex;++k){ // 构造外层模板时,排除star字典库和最高字典库(用作内层模板) if( k != maxDicInx) { remainder = quotient % dicCharsetArray[k-segSIndex]->high; quotient = quotient / dicCharsetArray[k-segSIndex]->high; memcpy(outerPwdTemplate + memOffset, dicCharsetArray[k-segSIndex]->data + remainder * dicCharsetArray[k-segSIndex]->width,dicCharsetArray[k-segSIndex]->width); } memOffset += dicCharsetArray[k-segSIndex]->width; } for(k = 0; k < 32; k++) memcpy(outerPwdTemplate_32 + k * expressionLen, outerPwdTemplate, expressionLen); // 内层循环 uint8 innerPwdTemplate_32[32 * expressionLen]; memset(innerPwdTemplate_32, 0, 32 * expressionLen * sizeof(uint8)); for(j = 0; j < innerIter; j+=32) { uint32 kNum = (j + 32) < innerIter ? 32 : innerIter - j; // 构造内层模板 for(k = 0; k < kNum; k++) memcpy(innerPwdTemplate_32 + k * expressionLen + maxDicOffset, dicCharsetArray[maxDicInx]->data + (j + k) * dicCharsetArray[maxDicInx]->width, dicCharsetArray[maxDicInx]->width); // 将内层模板和外层模板做或操作,得到结果数组 for(k = 0; k < kNum * expressionLen; k++) result[(i * innerIter + j) * expressionLen + k] = innerPwdTemplate_32[k] | outerPwdTemplate_32[k]; } } cout<<"descarte 22!"<<endl; *resData=result; *resHigh=highProduct; *reWidth=expressionLen; return 0; //return highProduct * expressionLen; }
老版:
int Old_descarte(DICComputationTask* dicTask,map<string,charset_mem*>& dicMap,uint8 segSIndex,uint8 segEIndex,uint8** resData,uint64* resHigh,int* reWidth){ #define MAX_WORD_LEN 64 uint64 s[MAX_DIC_NUM]={0}; //字典段进制位 uint64 high=1; int width=0; charset_mem* dicCharsetArray[MAX_WORD_LEN]={NULL}; for(int i=segEIndex; i>=segSIndex; --i){ s[i]=high; map<string,charset_mem*>::iterator iterDic=dicMap.find(dicTask->cn_re.dicName[i]); if(iterDic==dicMap.end()){ cout<<dicTask->cn_re.dicName[i]<<" do not exist"<<endl; LOG(ERROR)<<dicTask->cn_re.dicName[i]<<" do not exist"; return -1; } dicCharsetArray[i-segSIndex]=iterDic->second; high*=dicTask->cn_re.segInfo[i].end-dicTask->cn_re.segInfo[i].begin+1; width+=iterDic->second->width; } uint8* data=new uint8[high*width]; memset(data,0,high*width); //笛卡尔乘积 int thread_num=omp_get_max_threads(); #pragma omp parallel for num_threads(thread_num) for(uint64 i=0;i<high;++i){ uint8 wordTmp[MAX_WORD_LEN]={0}; int offset=0; int charpos=i; for(int j=segSIndex;j<=segEIndex;++j){ int indexDic=charpos/s[j]; int offsetDic=(indexDic+dicTask->cn_re.segInfo[j].begin)*dicCharsetArray[j-segSIndex]->width; memcpy(wordTmp+offset,dicCharsetArray[j-segSIndex]->data+offsetDic,dicCharsetArray[j-segSIndex]->width); charpos=charpos%s[j]; offset+=dicCharsetArray[j-segSIndex]->width; } memcpy(data+i*width,wordTmp,width); } *resData=data; *resHigh=high; *reWidth=width; return 0; }
标签:
原文地址:http://blog.csdn.net/kingbaiyulong/article/details/51333975