标签:
(一)答题表格设计与识别
实际设计好的表格如下图
为了图像精确,表格和四角的标记都是由程序生成的,文字和数据是后期排版软件添加上去的.
图中四角的四个黑方块主要用来定位表格,然后就可以切割出每个单元格,最后去做字符识别.
具体步骤为:
1,灰度化并二值化;
2,查找轮廓,把找出四个定位标记;
3,透视变换,校正变形;
4,切割表格,分别识别每个表格;
实际操作中发现最关键的是表格一定要平整,变形对识别影响较大;
部分代码:
int table_recognition(IplImage* img,unsigned char * result) { //大图二值化 IplImage* bin_img = cvCloneImage(img); image_threshold(bin_img); //去噪 IplImage* tmp_img = cvCloneImage(bin_img); cvErode(tmp_img, tmp_img, NULL, 1); //腐蚀 cvDilate(tmp_img, tmp_img, NULL, 1); //膨胀 //查找轮廓 CvSeq* contours; CvMemStorage * storage = cvCreateMemStorage(0); cvSetImageROI(tmp_img, cvRect(0, 0, bin_img->width, bin_img->height)); cvFindContours(tmp_img, storage, &contours, sizeof(CvContour), CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE, cvPoint(0, 0)); int ids[5]; int rotates[5]; int vals[5]; CvPoint points[5][4]; int n = 0; // 检测每个轮廓 for (; contours; contours = contours->h_next) { //用指定精度逼近多边形曲线 CvSeq* result; result = cvApproxPoly(contours, sizeof(CvContour), storage, CV_POLY_APPROX_DP, cvContourPerimeter(contours)*0.01, 0); //不是四边形的不要 if (result->total != 4) continue; //不是凸多边形不要 if (!cvCheckContourConvexity(result)) continue; //面积大小或小于指定值的排除 double s = fabs(cvContourArea(result, CV_WHOLE_SEQ, 0)); if (s<60) continue; //解码每个轮廓标志,正确的保存下来********************************************** CvPoint2D32f srcQuad[4]; for (int i = 0; i < 4; i++){ CvPoint* pt = (CvPoint*)cvGetSeqElem(result, i);//取标记四边形的四个顶点 points[n][i] = *pt; srcQuad[i].x = (float)pt->x; srcQuad[i].y = (float)pt->y; } //透视变换取出marker IplImage * mark_img = cvCreateImage(cvSize(40,40), 8, 1); perspective(bin_img, mark_img, srcQuad); // int rt = marker_decode(mark_img, &ids[n], &rotates[n], &vals[n]); if (rt !=0) continue; // n++; if (n>4) break; } if (n != 4)//发现四个标记 return -1; //if (rotates[0] != rotates[1] || rotates[1] != rotates[2] || rotates[2] != rotates[3])//四个标记旋转一致 // return -1; //marker 0123 if (ids[0] != 0 && ids[1] != 0 && ids[2] != 0 && ids[3] != 0) return -1; if (ids[0] != 1 && ids[1] != 1 && ids[2] != 1 && ids[3] != 1) return -1; if (ids[0] != 2 && ids[1] != 2 && ids[2] != 2 && ids[3] != 2) return -1; if (ids[0] != 3 && ids[1] != 3 && ids[2] != 3 && ids[3] != 3) return -1; //确定表格四个点 CvPoint2D32f pts[4]; for (int i = 0; i < 4; i++) { int id = ids[i]; int rotate = rotates[i]; CvPoint pt; if (id == 0){ pt = points[i][(1 + rotate)%4]; } else if (id == 1){ pt = points[i][(0 + rotate) % 4]; } else if (id == 2){ pt = points[i][(3 + rotate) % 4]; } else if (id == 3){ pt = points[i][(2 + rotate) % 4]; } pts[id].x = pt.x; pts[id].y = pt.y; } //CvPoint2D32f tmp_ptf = pts[1]; //pts[1] = pts[3]; //pts[3] = tmp_ptf; IplImage * table_img = cvCreateImage(cvSize(64*15, 64*4+32), 8, 1); perspective(img, table_img, pts); //表格分割 int nt = 0; IplImage* gird_img = cvCreateImage(cvSize(64, 64), 8, 1); for (int j = 1; j < 4; j+=2) { for (int i = 0; i < 15; i++) { cvSetImageROI(table_img, cvRect(0+64*i, 15+64*j, 64, 64)); cvCopy(table_img, gird_img); #ifdef _WIN32 save_gird(gird_img, nt); #endif int rt = svm_recognition(gird_img); result[nt] = rt; nt++; } } //cvNamedWindow("Image", CV_WINDOW_NORMAL); //cvShowImage("Image", gird_img); //cvWaitKey(0); cvReleaseImage(&bin_img); cvClearMemStorage(storage); return 0; }
标签:
原文地址:http://www.cnblogs.com/veryjuly/p/5701818.html