码迷,mamicode.com
首页 > 其他好文 > 详细

初级验证码识别

时间:2014-12-28 19:32:53      阅读:293      评论:0      收藏:0      [点我收藏+]

标签:

  本来想写个自动识别验证码的程序,验证码是一些可辨率比较高的字母和数字,写着写着发现sb了,同样的数字和字母居然角度旋转过了,觉得这是用js不能完成的任务。coding中止,记录下初步的成果。

  技术分享

  拿到例如如上的一张验证码图。首先是去噪,去掉和验证码数字色差很大的像素,然后是分割,把数字分成一块块的矩形,然后是匹配,和一些预处理完的二进制数组进行匹配,最后是输出结果。而因为及时发现在该网站的验证码上做了无用功,我只是到了去噪这步。用户脚本如下:

 1 // ==UserScript==
 2 // @name        checkcode
 3 // @namespace   http://www.cnblogs.com/bigbigsunrise/
 4 // @description 自动输入验证码
 5 // @include     https://www.xinhehui.com/Account/User/register
 6 // @version     1.0
 7 // @grant       none
 8 // ==/UserScript==
 9 
10 var canvas = document.createElement(‘canvas‘);
11 var c = canvas.getContext(‘2d‘);
12 
13 var timer = function() {
14     var img = document.getElementById(‘VerifyCodeImg‘);
15     setTimeout(function(){
16         if (!img) {
17             timer();
18         } else {
19             canvas.width = imgWidth = img.width;
20             canvas.height = imgHeight = img.height;
21             c.drawImage(img, 0, 0, imgWidth, imgHeight);
22             clearNoise();
23             document.body.appendChild(canvas);
24         }
25     }, 1000);       
26 };
27 
28 // 去噪函数
29 var clearNoise = function(){
30     var blankImageData = c.createImageData(1, 1); // 白色对比像素点
31     blankImageData.data[0]=blankImageData.data[1]=blankImageData.data[2]=255;
32 
33     for (var y = 0, h = imgHeight; y < h; y += 1){                
34         for (var x = 0, w = imgWidth; x < w; x += 1){
35             var imageData = c.getImageData(x, y, 1, 1);
36             if (chromatic(imageData.data, blankImageData.data) < 180){
37                 // 验证码较简单, 直接二值设置阀值过滤
38                 imageData.data[0] = imageData.data[1] = imageData.data[2] = 255;
39                 c.putImageData(imageData, x, y);                                
40             }
41         }
42     }
43 };
44 
45 var chromatic = function(data1, data2){
46     // 色差对比
47     var offR = data1[0] - data2[0],
48         offG = data1[1] - data2[1],
49         offB = data1[2] - data2[2];
50         // console.log(offR, offG, offB)
51     return Math.sqrt(offR * offR + offG * offG + offB * offB);
52 }
53 
54 timer();

  验证码去噪前后如下:

 技术分享 技术分享

  

  代码参照了以前看到过的一篇博文,介绍了如下验证码的代码分析。

  技术分享

技术分享
  1 // ==UserScript==
  2 // @name checkcode
  3 // @author gb_2312
  4 // @match http://jxgl.hdu.edu.cn/*
  5 // ==/UserScript==
  6 
  7 var sample = [];
  8 sample[0] = "0000000000000000000000000000000000000111000111110110011111000111100011110001111000111100011110001111000110111110001110000000000000000000000000000000000000";
  9 sample[1] = "00000000001111111111111111111111110000000000";
 10 sample[2] = "0000000000000000000000000000000000000111100111111110001111000110000011000011100011100011100001100001100001111111111111100000000000000000000000000000000000";
 11 sample[3] = "0000000000000000000000000000000000001111001111110110011100001100001110000111000001110000011110001111000111111110001110000000000000000000000000000000000000";
 12 sample[4] = "000000000000000000000000000000000011000011000111001111001111011011110011110011111111111111000011000011000000000000000000000000000000";
 13 sample[5] = "0000000000000000000000000000000000001111110111111011000001100001111110011111111000110000011110001111100110111111001110000000000000000000000000000000000000";
 14 sample[6] = "0000000000000000000000000000000000000111100111110110011111000001111100111111011001111100011110001111001110111110001111000000000000000000000000000000000000";
 15 sample[7] = "000000000000000000000000000000111111111111000111000110001100001100011000011000011000110000110000110000000000000000000000000000000000";
 16 sample[8] = "0000000000000000000000000000000000001111100111110110001111000111100111011111001111101100011110001111000111111111001111000000000000000000000000000000000000";
 17 
 18 var canvas = document.createElement(‘canvas‘);
 19 var ctx = canvas.getContext(‘2d‘);
 20 var input = document.getElementById(‘txtYz‘);
 21 canvas.id = "canvas";
 22 var guessCode = ‘‘;
 23 var imgWidth, imgHeight;
 24 var timer = function(){
 25     var img = document.querySelector(‘.footbutton img‘) || document.images[0];
 26     setTimeout(function(){
 27         if (!img) {
 28             timer();
 29             return;
 30         } else {
 31             img.height = 22;
 32             guessCode = ‘‘;
 33             canvas.width = imgWidth = img.width;
 34             canvas.height = imgHeight = img.height;
 35             ctx.drawImage(img, 0, 0,imgWidth,imgHeight);
 36             //document.body.appendChild(canvas);
 37             clearNoise();
 38             numSlice();
 39         }
 40     }, 1000);            
 41 };
 42 
 43 
 44 
 45 
 46 var clearNoise = function(){
 47     var blankImageData = ctx.createImageData(1,1);// 白色对比像素点
 48         blankImageData.data[0]=blankImageData.data[1]=blankImageData.data[2]=255;
 49 
 50     for (var y=0, h=imgHeight; y<h; y+=1){                
 51         for (var x=0, w=imgWidth; x<w; x+=1){
 52 
 53             var imageData = ctx.getImageData(x,y,1,1);
 54 
 55             if ( chromatic(imageData.data, blankImageData.data) < 180 ){
 56                 // 验证码较简单,直接二值设置阀值过滤
 57                 imageData.data[0]=imageData.data[1]=imageData.data[2]=255;
 58                 ctx.putImageData(imageData, x, y);                                
 59             }
 60 
 61         }
 62     }
 63 }
 64 
 65 var chromatic = function(data1, data2){
 66     // 色差对比
 67     var offR = data1[0] - data2[0]
 68         offG = data1[1] - data2[1],
 69         offB = data1[2] - data2[2];
 70         // console.log(offR,offG,offB)
 71     return Math.sqrt(offR*offR+offG*offG+offB*offB);
 72 }
 73 
 74 var numSlice = function(){
 75     // 取验证码坐标
 76     var codePos = [];
 77     for (var x=0, w=imgWidth; x<w; x++){
 78         var count = 0;
 79         var checkImageData = ctx.getImageData(x, 0, 1, imgHeight);
 80         for (var i=0, l=checkImageData.data.length; i<l; i+=4){
 81             // 得到的为存在验证码的坐标
 82             if ( checkImageData.data[i] < 255 || checkImageData.data[i+1] < 255 || checkImageData.data[i+2] < 255 ){
 83                 count++;
 84             }
 85             if ( count >= 4 ){
 86                 codePos.push(x);
 87                 break;
 88             }
 89         }
 90     }
 91 
 92     // 切分验证码坐标
 93     var slicePos = [];
 94     for (var i=1, l=codePos.length-1; i<l; i++){
 95         // 处理出边界的坐标
 96         if ( codePos[i+1] - codePos[i] > 1 ){
 97             slicePos.push(codePos[i], codePos[i+1]);
 98         }
 99     }
100     slicePos.unshift(codePos[0]);
101     slicePos.push(codePos[codePos.length-1]);
102 
103     // 通过坐标切割出单个验证码图片
104     var sliceImageData = [];
105     for (var i=0, l=slicePos.length; i<l ;i+=2){
106         var tempImageData = ctx.getImageData(slicePos[i], 0, slicePos[i+1]-slicePos[i]+1, imgHeight);
107 
108         var temp = ‘‘;
109         for (var j=0; j<tempImageData.data.length; j+=4){
110             if ( tempImageData.data[j] < 255 || tempImageData.data[j+1] < 255 || tempImageData.data[j+2] < 255 ){
111                 temp += ‘1‘;
112             } else {
113                 temp += ‘0‘;
114             }
115         }
116         LDS(temp);
117     }
118     input.value = guessCode;
119 }
120 
121 var LDS = function(str){
122     var temp=[];
123     var index, LDP=0;
124     for (var i=0;i<9;i++){
125         temp.push(LD(sample[i], str));
126     }
127     for (var i=0;i<9;i++){
128         if (temp[i]>LDP){
129             LDP = temp[i];
130             index = i;
131         }
132     }
133     guessCode += index;
134     //console.log(guessCode);
135 }
136 var LD = function(str1, str2){
137     //编辑距离
138     var s = [];
139     str1 = ‘ ‘+str1;
140     str2 = ‘ ‘+str2;
141     var l1=str1.length, l2=str2.length;
142     for (var i=0,l=str1.length;i<l;i++){
143         s[i] = [];
144         s[i][0] = i;
145     }
146     for (var i=0,l=str2.length;i<l;i++){
147         s[0][i] = i;
148     }
149 
150     for (var i=1;i<l1;i++){
151         for (var j=1;j<l2;j++){
152             var tij = s[i-1][j-1];
153             if ( str1[i] !== str2[j] ){
154                 tij += 1;
155             }
156 
157             var min;
158             if ( s[i-1][j]+1 <= s[i][j-1]+1 ){
159                 min = s[i-1][j]+1;
160             } else {
161                 min = s[i][j-1]+1;
162             }
163             if ( tij <= min){
164                 min = tij;
165             }
166 
167             s[i][j]=min;
168         }
169     }
170     if (l1>l2){
171         return 1-s[l1-1][l2-1]/l1;
172     } else {
173         return 1-s[l1-1][l2-1]/l2;
174     }        
175 }
176    
177  console.log(‘running‘);timer();
View Code

 

初级验证码识别

标签:

原文地址:http://www.cnblogs.com/bigbigsunrise/p/4190299.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!