标签:人脸检测 harr
Viola-Jones人脸检测算法是第一个实时的人脸检测算法。其影响力就不用多说了,即便是现在,该算法的应用仍然非常广泛。众所周知,Viola-Jones算法分为三个部分,Harr特征和积分图,特征选择的AdaptBoost以及用于训练的Cascade模型。对于Cascade模型,它更多的表示的是一种Strategy,这可以当作一个另外的类别了,这个类别可以看作算法的一种“细节”处理,不同的人对其有不同的看法。Cascade模型主要的目的是降低训练时间,更重要的是使得分类器具有一般性,将重点放在Object和Object邻域的那些样本点。类似于这样的Strategy还有像Kalal在他的博士论文TLD中提出的Sampling Strategy和另外的一种Cascade模型,都能够有效的降低训练时间及使分类器具有一般性,这样的一些Strategy都属于Bootstrapping,也就是训练时用一些策略做出适当的引导,而不是盲目的训练所有的样本点。下面一起看看Harr怎么回事吧。
Harr特征和积分图
1 理论
就我而言Harr特征是属于Regional的特征表达方法(Focus on regional information),对于一个给定的图像窗口,Harr对该窗口的表达方式是用该窗口中的不同局域部分中的灰度差来表示的。如下图所示(a)表示一个窗口I,(b)表示Harr的一个模式,(c)表示在窗口I中计算(b)模式的Harr特征值时,只有白色和黑色部分被考虑了(也就是Focus on regional information)。
Harr特征常用的模式有:
Harr特征值的计算公式为(窗口大小N*N):
对于给定的一个窗口和一个Harr特征模式,如果每一个特征值的计算过程中都对一个模式中的黑色部分和白色部分进行和的计算的话,那么效率就太低了,这里积分图技术就大显身手了。对于给定的一个N*N的窗口I,其积分图计算公式如下:
于是对一个窗口图像中的一个方形内的像素求和计算方式如下:
计算积分图的算法描述如下:
2 实现
2.1基本数据结构
为了方便算法的实现,这里设计了一个ImgData数据结构,该结构将注意力集中在了图像的数据部分,也许是不太完善的,但目前能够满足我的需求,你也是很容易对它做一改动去满足你的需求,比如基于ImgData设计一个三通道的图像数据结构。
#ifndef _IMGDATA_H_ #define_IMGDATA_H_ #include<iostream> #include<cassert> /** *@ ImgDatais a template class that is used for operating only on image data * and simply focus on one channel right now,but this calss could be used for * constructing the 3 or more channels operationtemplate class * *@parammdata pointer pointing to image data *@parammrows number of rows for image data *@parammcols number of cols for image data */ template<typenameT> classImgData { private: T** mdata; unsignedintmrows; unsignedintmcols; public: /** *@brief constructor function with defaultinitialization 0 * *@param rows number of rows for image data *@param cols number of cols for image data *@val initial value for image data, default valueis 0 */ ImgData(unsignedintrows,unsignedintcols,Tval=0) { assert(rows > 0 && cols > 0); mrows= rows; mcols= cols; mdata= newT*[mrows]; for(unsignedinti=0; i<mrows; i++) { mdata[i]= newT[mcols]; } for(unsignedinti=0; i<mrows; i++) { for(unsignedintj=0; j<mcols; j++) { mdata[i][j]= val; } } } /** *@brief copy constructor */ ImgData(constImgData<T>&data){ mrows= data.rows(); mcols= data.cols(); mdata= newT*[mrows]; for(unsignedinti=0; i<mrows; i++) { mdata[i]= newT[mcols]; } for(unsignedinti=0; i<mrows; i++) { for(unsignedintj=0; j<mcols; j++) { mdata[i][j]= data[i][j]; } } } /** *@breif destructor */ ~ImgData(){ for(unsignedinti=0; i<mrows; i++) { delete[]mdata[i]; } delete[]mdata; } /** *@brief overload assignment operator */ ImgData<T>&operator=(constImgData<T>& data) { if(mrows!= data.rows() || mcols != data.rows()){ for(unsignedinti=0; i<mrows; i++) { delete[]mdata[i]; } delete[]mdata; } mrows= data.rows(); mcols= data.cols(); mdata= newT*[mrows]; for(unsignedinti=0; i<mrows; i++) { mdata[i]= newT[mcols]; } for(unsignedinti=0; i<mrows; i++) { for(unsignedintj=0; j<mcols; j++) { mdata[i][j]= data[i][j]; } } return*this; } /** *@brief set image data‘s value * *@param data a double layer pointer pointingto data going to be set to image data */ voidsetData(constT**data=NULL) { assert(data != NULL); for(unsignedinti=0; i<mrows; i++) { for(unsignedintj=0; j<mcols; j++) { mdata[i][j]= data[i][j]; } } } /** *@brief overload [] operator for likedata[][]operator use */ T* operator[](unsignedintn)const{ assert(n < mrows); returnmdata[n]; } /** *@brief overload -= operator for all imagedata minus val individually */ voidoperator-=(Tval){ for(unsignedinti=0; i<mrows; i++) { for(unsignedintj=0; j<mcols; j++) { mdata[i][j]-= val; } } } /** *@brief overload += operator for all imagedata plus val individually */ voidoperator+=(Tval){ for(unsignedinti=0; i<mrows; i++) { for(unsignedintj=0; j<mcols; j++) { mdata[i][j]+= val; } } } /** *@brief overload *= operator for all imagedata multiple val individually */ voidoperator*=(Tval){ for(unsignedinti=0; i<mrows; i++) { for(unsignedintj=0; j<mcols; j++) { mdata[i][j]*= val; } } } /** *@brief overload /= operator for all image datadevide val individually */ voidoperator/=(Tval){ for(unsignedinti=0; i<mrows; i++) { for(unsignedintj=0; j<mcols; j++) { mdata[i][j]/= val; } } } /** *@brief ImageData .* ImageData * *@return a const ImageData */ constImgData<T>cwiseProduct(constImgData<T>& imgData) const{ assert(mrows == imgData.rows() && mcols ==imgData.cols()); ImgData<T>temp(imgData.rows(), imgData.cols()); T** tempData = temp.data(); T** data = imgData.data(); for(unsignedinti=0; i<mrows; i++) { for(unsignedintj=0; j<mcols; j++) { tempData[i][j]= mdata[i][j] * data[i][j]; } } returntemp; } /** *@brief compute sum of image data * *@return a long double type sum of image data */ longdoublesum() const { longdoubleimageSum = 0; for(unsignedinti=0; i<mrows; i++) { for(unsignedintj=0; j<mcols; j++) { imageSum+= mdata[i][j]; } } returnimageSum; } /** *@brief get pointer of image data for changingdata‘s value use */ T** data() const { returnmdata; } /** *@brief get number rows for image data */ unsignedintrows() const { returnmrows; } /** *@brief get number cols for image data */ unsignedintcols() const { returnmcols; } /** *@brief print image data for validation use */ voidprint() const { for(unsignedinti=0; i<mrows; i++) { for(unsignedintj=0; j<mcols; j++) { std::cout<< mdata[i][j] << " "; } std::cout<< std::endl; } } }; #endif
2.2 积分图相关实现
这里提供了一个计算积分图的实现,以及提供了一个计算积分图内的一个矩形区域的和的方法。
#ifndef _INTEGRALIMG_H_ #define_INTEGRALIMG_H_ #include<cassert> #include"cvImgData.h" /** * @briefintegral image in linear time * * @paramimage original image * @returnintegralImage integral image */ //compute an integral image in linear time template<typenameT> void buildIntegralImage( constImgData<T>&image , ImgData<T>&integralImage ){ assert(image.rows() == integralImage.rows()); assert(image.cols() == image.cols()); intnRows = image.rows(); intnCols = image.cols(); //O(n)complexity: however no parallel should be used T** imgData = image.data(); T** integralImgData = integralImage.data(); for(inti = 0; i < nRows; i++) for(intj = 0; j < nCols; j++){ integralImgData[i][j]= imgData[i][j]; integralImgData[i][j]+= i > 0 ? integralImgData[i-1][j] : 0; integralImgData[i][j]+= j > 0 ? integralImgData[i][j-1] : 0; integralImgData[i][j]-= std::min(i,j) > 0 ? integralImgData[i-1][j-1] : 0; } } /** * @brief sumup the pixels in a sub-rectangle of the image * * @paramintegralImage its integral image * @param uirow index of this sub-rectangle‘s up left corner * @param ujcolumn index of this sub-rectangle‘s up left corner * @param irnumber of rows of this sub-rectangle * @param jrnumber of columns of this sub-rectangle * @returnthe sum */ template<typenameT> longdoublesumImagePart( constImgData<T>&integralImage , intui , intuj , intir , intjr ){ //rectangle‘sother pair of coordinates intdi = ui + ir- 1; intdj = uj + jr- 1; //standardoperation T** integralData = integralImage.data(); longdoublesum = integralData[di][dj]; sum-= ui > 0 ? integralData[ui-1][dj]: 0; sum-= uj > 0 ? integralData[di][uj-1]: 0; sum+= std::min(ui, uj)> 0 ? integralData[ui-1][uj-1]: 0; returnsum; } #endif
2.3 Harr特征实现
对于Harr特征的计算,这里实现了Viola-Jones算法中的5种基本模式。
#ifndef _HARR_H_ #define_HARR_H_ #include<iostream> #include<fstream> #include<vector> #include<stack> #include<string> #include<cassert> #include"cvImgData.h" #include"cvIntegralImg.h" usingnamespacestd; #defineFOLLOW_VJtrue #defineSTD_NORM_CONST1e4 #defineFLAT_THRESHOLD1 /** * @briefcompute Haar-like features with integral image, * mostly for training samples use. * * @paramintegralImage its integral image * @paramfeatures all the features from this image * @paramenforceShape the original Viola-Jones‘s proposal or more extensive definition * of rectangle features (requires significantlymore memory if disabled) */ inlineintsumOfElement(intarray[],intk){ intret = 0; for(inti=0; i<k; i++) { ret+= array[i]; } returnret; } voidcomputeHaarLikeFeaturesOnlyUsingIntegralImage( ImgData<double>&image , vector<double>&features , boolenforceShape= true ){ if(features.size()> 0) { features.resize(0); } //integralimage intnRows = image.rows(); intnCols = image.cols(); //firstnormalize the example, this might prove key to learning success //meanvalue floatmean = static_cast<float>(image.sum()/(float)(nRows*nCols)); image-= mean; floatstd = static_cast<float>(sqrt(image.cwiseProduct(image).sum()/(float)(nRows*nCols))); if(std< FLAT_THRESHOLD || _isnan(std)){ std= FLAT_THRESHOLD; cout<< "Find one completely flat example. Don‘t worry.It‘ll be treated as an outlier presumably."<< endl; } //fullyuse the float range image*= STD_NORM_CONST; image/= std; ImgData<double>integralImage(nRows, nCols); buildIntegralImage(image,integralImage); //twohorizontal patterns for(intportions = 2; portions < 4; portions++){ //upleft coordinate for(inti = 0; i < nRows; i++) for(intj = 0; j < nCols; j++){ //howlong can l1 be intl1MIN = portions == 2 ? 0 : 1; intl1MAX = portions == 2 ? 0 : nCols - j; //height for(intir = 1; ir <= nRows - i; ir++) for(intl1 = l1MIN; l1 <= l1MAX; l1++) for(intl2 = 1; l2 <= nCols-j-l1; l2++) for(intl3 = 1; l3 <= nCols-j-l1-l2; l3++){ //makethese lengths more intelligible intlengths[3]; lengths[0]= l1; lengths[1]= l2; lengths[2]= l3; //nowgo for the feature doublefeature = 0; if(portions== 2){ if(enforceShape&& l2 != l3) continue; feature+= (float)sumImagePart(integralImage, i, j,ir, l2); feature-= (float)sumImagePart(integralImage, i, j+l2,ir, l3); }else{ if(enforceShape&& FOLLOW_VJ && !(l1 == l3 && l1== l2)) continue; if(enforceShape&& !FOLLOW_VJ && !(l1 == l3 && l1>= l2)) continue; for(intk = 0; k < 3; k++){ intfactor = static_cast<int>(pow(-1.,k)); intadvance = k == 0 ? 0 : sumOfElement(lengths, k); feature+= factor*(float)sumImagePart(integralImage, i,j+advance, ir, lengths[k]); } } //store features.push_back(feature); } } } //twovertical patterns: if you go my way, only one vertical pattern for(intportions = 2; portions < 4; portions++){ //upleft coordinate for(inti = 0; i < nRows; i++) for(intj = 0; j < nCols; j++){ intl1MIN = portions == 2 ? 0 : 1; intl1MAX = portions == 2 ? 0 : nRows - i; //width for(intjr = 1; jr <= nCols-j; jr++) for(intl1 = l1MIN; l1 <= l1MAX; l1++) for(intl2 = 1; l2 <= nRows-i-l1; l2++) for(intl3 = 1; l3 <= nRows-i-l1-l2; l3++){ //makethese lengths more intelligible intlengths[3]; lengths[0]= l1; lengths[1]= l2; lengths[2]= l3; //nowgo for the feature doublefeature = 0; if(portions== 2){ if(enforceShape&& !(l2 == l3)) continue; feature+= (float)sumImagePart(integralImage, i, j,l2, jr); feature-= (float)sumImagePart(integralImage, i+l2, j,l3, jr); }else{ if(enforceShape&& FOLLOW_VJ && !(l1 == l2 && l1== l3)) continue; //if(enforceShape&& FOLLOW_VJ) if(enforceShape&& !FOLLOW_VJ) continue; for(intk = 0; k < 3; k++){ intfactor = static_cast<int>(pow(-1.,k)); intadvance = k == 0 ? 0 : sumOfElement(lengths, k); feature+= factor*(float)sumImagePart(integralImage,i+advance, j, lengths[k], jr); } } //store features.push_back(feature); } } } //amixed pattern for(inti = 0; i < nRows; i++) for(intj = 0; j < nCols; j++) for(inti_l1 = 1; i_l1 < nRows - i; i_l1++) for(inti_l2 = 1; i_l2 <= nRows - i - i_l1; i_l2++) for(intj_l1 = 1; j_l1 < nCols - j; j_l1++) for(intj_l2 = 1; j_l2 <= nCols - j - j_l1; j_l2++){ if(enforceShape&& !(i_l1 == i_l2 && j_l1 == j_l2)) continue; doublefeature = 0; feature+= (float)sumImagePart(integralImage, i, j,i_l1, j_l1); feature-= (float)sumImagePart(integralImage, i, j +j_l1, i_l1, j_l2); feature-= (float)sumImagePart(integralImage, i +i_l1, j, i_l2, j_l1); feature+= (float)sumImagePart(integralImage, i + i_l1,j + j_l1, i_l2, j_l2); //record features.push_back(feature); } }
reference:
Yi-Qing Wang, An Analysis of the Viola-Jones Face Detection Algorithm, IPOL.
本文出自 “Remys” 博客,谢绝转载!
标签:人脸检测 harr
原文地址:http://remyspot.blog.51cto.com/8218746/1627232