标签:
本来想写个自动识别验证码的程序,验证码是一些可辨率比较高的字母和数字,写着写着发现sb了,同样的数字和字母居然角度旋转过了,觉得这是用js不能完成的任务。coding中止,记录下初步的成果。
拿到例如如上的一张验证码图。首先是去噪,去掉和验证码数字色差很大的像素,然后是分割,把数字分成一块块的矩形,然后是匹配,和一些预处理完的二进制数组进行匹配,最后是输出结果。而因为及时发现在该网站的验证码上做了无用功,我只是到了去噪这步。用户脚本如下:
1 // ==UserScript== 2 // @name checkcode 3 // @namespace http://www.cnblogs.com/bigbigsunrise/ 4 // @description 自动输入验证码 5 // @include https://www.xinhehui.com/Account/User/register 6 // @version 1.0 7 // @grant none 8 // ==/UserScript== 9 10 var canvas = document.createElement(‘canvas‘); 11 var c = canvas.getContext(‘2d‘); 12 13 var timer = function() { 14 var img = document.getElementById(‘VerifyCodeImg‘); 15 setTimeout(function(){ 16 if (!img) { 17 timer(); 18 } else { 19 canvas.width = imgWidth = img.width; 20 canvas.height = imgHeight = img.height; 21 c.drawImage(img, 0, 0, imgWidth, imgHeight); 22 clearNoise(); 23 document.body.appendChild(canvas); 24 } 25 }, 1000); 26 }; 27 28 // 去噪函数 29 var clearNoise = function(){ 30 var blankImageData = c.createImageData(1, 1); // 白色对比像素点 31 blankImageData.data[0]=blankImageData.data[1]=blankImageData.data[2]=255; 32 33 for (var y = 0, h = imgHeight; y < h; y += 1){ 34 for (var x = 0, w = imgWidth; x < w; x += 1){ 35 var imageData = c.getImageData(x, y, 1, 1); 36 if (chromatic(imageData.data, blankImageData.data) < 180){ 37 // 验证码较简单, 直接二值设置阀值过滤 38 imageData.data[0] = imageData.data[1] = imageData.data[2] = 255; 39 c.putImageData(imageData, x, y); 40 } 41 } 42 } 43 }; 44 45 var chromatic = function(data1, data2){ 46 // 色差对比 47 var offR = data1[0] - data2[0], 48 offG = data1[1] - data2[1], 49 offB = data1[2] - data2[2]; 50 // console.log(offR, offG, offB) 51 return Math.sqrt(offR * offR + offG * offG + offB * offB); 52 } 53 54 timer();
验证码去噪前后如下:
代码参照了以前看到过的一篇博文,介绍了如下验证码的代码分析。
1 // ==UserScript== 2 // @name checkcode 3 // @author gb_2312 4 // @match http://jxgl.hdu.edu.cn/* 5 // ==/UserScript== 6 7 var sample = []; 8 sample[0] = "0000000000000000000000000000000000000111000111110110011111000111100011110001111000111100011110001111000110111110001110000000000000000000000000000000000000"; 9 sample[1] = "00000000001111111111111111111111110000000000"; 10 sample[2] = "0000000000000000000000000000000000000111100111111110001111000110000011000011100011100011100001100001100001111111111111100000000000000000000000000000000000"; 11 sample[3] = "0000000000000000000000000000000000001111001111110110011100001100001110000111000001110000011110001111000111111110001110000000000000000000000000000000000000"; 12 sample[4] = "000000000000000000000000000000000011000011000111001111001111011011110011110011111111111111000011000011000000000000000000000000000000"; 13 sample[5] = "0000000000000000000000000000000000001111110111111011000001100001111110011111111000110000011110001111100110111111001110000000000000000000000000000000000000"; 14 sample[6] = "0000000000000000000000000000000000000111100111110110011111000001111100111111011001111100011110001111001110111110001111000000000000000000000000000000000000"; 15 sample[7] = "000000000000000000000000000000111111111111000111000110001100001100011000011000011000110000110000110000000000000000000000000000000000"; 16 sample[8] = "0000000000000000000000000000000000001111100111110110001111000111100111011111001111101100011110001111000111111111001111000000000000000000000000000000000000"; 17 18 var canvas = document.createElement(‘canvas‘); 19 var ctx = canvas.getContext(‘2d‘); 20 var input = document.getElementById(‘txtYz‘); 21 canvas.id = "canvas"; 22 var guessCode = ‘‘; 23 var imgWidth, imgHeight; 24 var timer = function(){ 25 var img = document.querySelector(‘.footbutton img‘) || document.images[0]; 26 setTimeout(function(){ 27 if (!img) { 28 timer(); 29 return; 30 } else { 31 img.height = 22; 32 guessCode = ‘‘; 33 canvas.width = imgWidth = img.width; 34 canvas.height = imgHeight = img.height; 35 ctx.drawImage(img, 0, 0,imgWidth,imgHeight); 36 //document.body.appendChild(canvas); 37 clearNoise(); 38 numSlice(); 39 } 40 }, 1000); 41 }; 42 43 44 45 46 var clearNoise = function(){ 47 var blankImageData = ctx.createImageData(1,1);// 白色对比像素点 48 blankImageData.data[0]=blankImageData.data[1]=blankImageData.data[2]=255; 49 50 for (var y=0, h=imgHeight; y<h; y+=1){ 51 for (var x=0, w=imgWidth; x<w; x+=1){ 52 53 var imageData = ctx.getImageData(x,y,1,1); 54 55 if ( chromatic(imageData.data, blankImageData.data) < 180 ){ 56 // 验证码较简单,直接二值设置阀值过滤 57 imageData.data[0]=imageData.data[1]=imageData.data[2]=255; 58 ctx.putImageData(imageData, x, y); 59 } 60 61 } 62 } 63 } 64 65 var chromatic = function(data1, data2){ 66 // 色差对比 67 var offR = data1[0] - data2[0] 68 offG = data1[1] - data2[1], 69 offB = data1[2] - data2[2]; 70 // console.log(offR,offG,offB) 71 return Math.sqrt(offR*offR+offG*offG+offB*offB); 72 } 73 74 var numSlice = function(){ 75 // 取验证码坐标 76 var codePos = []; 77 for (var x=0, w=imgWidth; x<w; x++){ 78 var count = 0; 79 var checkImageData = ctx.getImageData(x, 0, 1, imgHeight); 80 for (var i=0, l=checkImageData.data.length; i<l; i+=4){ 81 // 得到的为存在验证码的坐标 82 if ( checkImageData.data[i] < 255 || checkImageData.data[i+1] < 255 || checkImageData.data[i+2] < 255 ){ 83 count++; 84 } 85 if ( count >= 4 ){ 86 codePos.push(x); 87 break; 88 } 89 } 90 } 91 92 // 切分验证码坐标 93 var slicePos = []; 94 for (var i=1, l=codePos.length-1; i<l; i++){ 95 // 处理出边界的坐标 96 if ( codePos[i+1] - codePos[i] > 1 ){ 97 slicePos.push(codePos[i], codePos[i+1]); 98 } 99 } 100 slicePos.unshift(codePos[0]); 101 slicePos.push(codePos[codePos.length-1]); 102 103 // 通过坐标切割出单个验证码图片 104 var sliceImageData = []; 105 for (var i=0, l=slicePos.length; i<l ;i+=2){ 106 var tempImageData = ctx.getImageData(slicePos[i], 0, slicePos[i+1]-slicePos[i]+1, imgHeight); 107 108 var temp = ‘‘; 109 for (var j=0; j<tempImageData.data.length; j+=4){ 110 if ( tempImageData.data[j] < 255 || tempImageData.data[j+1] < 255 || tempImageData.data[j+2] < 255 ){ 111 temp += ‘1‘; 112 } else { 113 temp += ‘0‘; 114 } 115 } 116 LDS(temp); 117 } 118 input.value = guessCode; 119 } 120 121 var LDS = function(str){ 122 var temp=[]; 123 var index, LDP=0; 124 for (var i=0;i<9;i++){ 125 temp.push(LD(sample[i], str)); 126 } 127 for (var i=0;i<9;i++){ 128 if (temp[i]>LDP){ 129 LDP = temp[i]; 130 index = i; 131 } 132 } 133 guessCode += index; 134 //console.log(guessCode); 135 } 136 var LD = function(str1, str2){ 137 //编辑距离 138 var s = []; 139 str1 = ‘ ‘+str1; 140 str2 = ‘ ‘+str2; 141 var l1=str1.length, l2=str2.length; 142 for (var i=0,l=str1.length;i<l;i++){ 143 s[i] = []; 144 s[i][0] = i; 145 } 146 for (var i=0,l=str2.length;i<l;i++){ 147 s[0][i] = i; 148 } 149 150 for (var i=1;i<l1;i++){ 151 for (var j=1;j<l2;j++){ 152 var tij = s[i-1][j-1]; 153 if ( str1[i] !== str2[j] ){ 154 tij += 1; 155 } 156 157 var min; 158 if ( s[i-1][j]+1 <= s[i][j-1]+1 ){ 159 min = s[i-1][j]+1; 160 } else { 161 min = s[i][j-1]+1; 162 } 163 if ( tij <= min){ 164 min = tij; 165 } 166 167 s[i][j]=min; 168 } 169 } 170 if (l1>l2){ 171 return 1-s[l1-1][l2-1]/l1; 172 } else { 173 return 1-s[l1-1][l2-1]/l2; 174 } 175 } 176 177 console.log(‘running‘);timer();
标签:
原文地址:http://www.cnblogs.com/bigbigsunrise/p/4190299.html