//设置决策树分类误差计算方法 stumperror = (int) ((CvMTStumpTrainParams*) trainParams)->error; //设置class step和ydata ydata = trainClasses->data.ptr; if( trainClasses->rows == 1 ) { m = trainClasses->cols; ystep = CV_ELEM_SIZE( trainClasses->type ); } else { m = trainClasses->rows; ystep = trainClasses->step; } //设置weight step和wdata wdata = weights->data.ptr; if( weights->rows == 1 ) { assert( weights->cols == m ); wstep = CV_ELEM_SIZE( weights->type ); } else { assert( weights->rows == m ); wstep = weights->step; } //设置步长,地址等參数,用于获取idxCache内容 if( ((CvMTStumpTrainParams*) trainParams)->sortedIdx != NULL ) { sortedtype = CV_MAT_TYPE( ((CvMTStumpTrainParams*) trainParams)->sortedIdx->type ); assert( sortedtype == CV_16SC1 || sortedtype == CV_32SC1 || sortedtype == CV_32FC1 ); sorteddata = ((CvMTStumpTrainParams*) trainParams)->sortedIdx->data.ptr; sortedsstep = CV_ELEM_SIZE( sortedtype ); sortedcstep = ((CvMTStumpTrainParams*) trainParams)->sortedIdx->step; sortedn = ((CvMTStumpTrainParams*) trainParams)->sortedIdx->rows; sortedm = ((CvMTStumpTrainParams*) trainParams)->sortedIdx->cols; } if( trainData == NULL ) { assert( ((CvMTStumpTrainParams*) trainParams)->getTrainData != NULL ); n = ((CvMTStumpTrainParams*) trainParams)->numcomp; assert( n > 0 ); } //设置步长,地址等參数,用于获取dataCache内容 else { assert( CV_MAT_TYPE( trainData->type ) == CV_32FC1 ); data = trainData->data.ptr; if( CV_IS_ROW_SAMPLE( flags ) ) { cstep = CV_ELEM_SIZE( trainData->type ); sstep = trainData->step; assert( m == trainData->rows ); datan = n = trainData->cols; } else { sstep = CV_ELEM_SIZE( trainData->type ); cstep = trainData->step; assert( m == trainData->cols ); datan = n = trainData->rows; } if( ((CvMTStumpTrainParams*) trainParams)->getTrainData != NULL ) { n = ((CvMTStumpTrainParams*) trainParams)->numcomp; } }可能研究代码到这里的朋友仍然不清楚idxCache和valCache的作用。
//1MB == 1048576B 计算一个样本中有多少个特征能被pre计算放在内存中 numprecalculated = (int) ( ((size_t) mem) * ((size_t) 1048576) / ( ((size_t) (npos + nneg)) * (sizeof( float ) + sizeof( short )) ) );
比方第一行代表feature1从小到大的index顺序。从图中能够看出。sample1的特征值feature1 < sample0的特征值feature1<...<sample n < sample n-1。
while( t_compidx < n ) { //选择计算前100种特征 t_n = portion; if( t_compidx < datan ) { t_n = ( t_n < (datan - t_compidx) ) ? t_n : (datan - t_compidx); t_data = data; t_cstep = cstep; t_sstep = sstep; } else { } if( sorteddata != NULL ) { } else { /* have sorted indices */ switch( sortedtype ) { case CV_16SC1: //选择某个样本的某个特征值作为结点 for( ti = t_compidx; ti < MIN( sortedn, t_compidx + t_n ); ti++ ) { if( findStumpThreshold_16s[stumperror]( t_data + ti * t_cstep, t_sstep, wdata, wstep, ydata, ystep, sorteddata + ti * sortedcstep, sortedsstep, sortedm, &lerror, &rerror, &threshold, &left, &right, &sumw, &sumwy, &sumwyy ) ) { optcompidx = ti; } } break; } } }}这里datan代表的是一个检測窗体包括的特征数目。portion代表以多少的行为单位进行计算。每一个循环选取valCache中的portion行进行计算,应该是为了发挥并行计算的优势,假如设置了并行计算的宏的话。
#define ICV_DEF_FIND_STUMP_THRESHOLD( suffix, type, error ) CV_BOOST_IMPL int icvFindStumpThreshold_##suffix( uchar* data, size_t datastep, uchar* wdata, size_t wstep, uchar* ydata, size_t ystep, uchar* idxdata, size_t idxstep, int num, float* lerror, float* rerror, float* threshold, float* left, float* right, float* sumw, float* sumwy, float* sumwyy ) { int found = 0; float wyl = 0.0F; float wl = 0.0F; float wyyl = 0.0F; float wyr = 0.0F; float wr = 0.0F; float curleft = 0.0F; float curright = 0.0F; float* prevval = NULL; float* curval = NULL; float curlerror = 0.0F; float currerror = 0.0F; float wposl; float wposr; int i = 0; int idx = 0; wposl = wposr = 0.0F; if( *sumw == FLT_MAX ) { /* calculate sums */ float *y = NULL; float *w = NULL; float wy = 0.0F; *sumw = 0.0F; *sumwy = 0.0F; *sumwyy = 0.0F; for( i = 0; i < num; i++ ) { idx = (int) ( *((type*) (idxdata + i*idxstep)) ); w = (float*) (wdata + idx * wstep); *sumw += *w; y = (float*) (ydata + idx * ystep); wy = (*w) * (*y); *sumwy += wy; *sumwyy += wy * (*y); } } for( i = 0; i < num; i++ ) { idx = (int) ( *((type*) (idxdata + i*idxstep)) ); curval = (float*) (data + idx * datastep); /* for debug purpose */ if( i > 0 ) assert( (*prevval) <= (*curval) ); wyr = *sumwy - wyl; wr = *sumw - wl; if( wl > 0.0 ) curleft = wyl / wl; else curleft = 0.0F; if( wr > 0.0 ) curright = wyr / wr; else curright = 0.0F; error if( curlerror + currerror < (*lerror) + (*rerror) ) { (*lerror) = curlerror; (*rerror) = currerror; *threshold = *curval; if( i > 0 ) { *threshold = 0.5F * (*threshold + *prevval); } *left = curleft; *right = curright; found = 1; } do { wl += *((float*) (wdata + idx * wstep)); wyl += (*((float*) (wdata + idx * wstep))) * (*((float*) (ydata + idx * ystep))); wyyl += *((float*) (wdata + idx * wstep)) * (*((float*) (ydata + idx * ystep))) * (*((float*) (ydata + idx * ystep))); } while( (++i) < num && ( *((float*) (data + (idx = (int) ( *((type*) (idxdata + i*idxstep))) ) * datastep)) == *curval ) ); --i; prevval = curval; } /* for each value */ return found; }
#define ICV_DEF_FIND_STUMP_THRESHOLD_SQ( suffix, type ) ICV_DEF_FIND_STUMP_THRESHOLD( sq_##suffix, type, /* calculate error (sum of squares) */ /* err = sum( w * (y - left(rigt)Val)^2 ) */ curlerror = wyyl + curleft * curleft * wl - 2.0F * curleft * wyl; currerror = (*sumwyy) - wyyl + curright * curright * wr - 2.0F * curright * wyr; )
OpenCV HaarTraining代码解析(二)cvCreateMTStumpClassifier(建立决策树)