标签:
#!/usr/bin/env python # coding=utf-8 tesseract_cmd = ‘D:\\Tesseract-OCR\\tesseract‘ try: from PIL import Image import pytesseract import urllib2 import urllib import cookielib import re import subprocess import sys import tempfile import os import shlex import json except ImportError: print ‘模块导入错误,请使用pip安装‘ raise SystemExit ‘‘‘文件目录,并返回该目录‘‘‘ def mkdir(path): # 去除左右两边的空格 path=path.strip() # 去除尾部 \符号 path=path.rstrip("\\") if not os.path.exists(path): os.makedirs(path) return path def get_yundapic(): ‘‘‘装载cookie‘‘‘ cj = cookielib.CookieJar(); opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) opener.addheaders={(‘User-agent‘, ‘Mozilla/5.0 (Windows NT 6.3; WOW64; rv:47.0) Gecko/20100101 Firefox/47.0‘)} urllib2.install_opener(opener) picUrl = ‘http://ykjcx.yundasys.com/zb1qBpg2.php‘ req1 = urllib2.Request(picUrl) operate=opener.open(req1) data=operate.read() return (cj,data); def save_pic(input_file_name,data): if data == None: return file=open(input_file_name, "wb") file.write(data) file.flush() file.close() def tempnam(): tmpfile = tempfile.NamedTemporaryFile(prefix="tess_") return tmpfile.name def cleanup(filename): try: os.remove(filename) except OSError: pass def run_tesseract(input_filename, output_filename_base): command = [tesseract_cmd, input_filename, output_filename_base] proc = subprocess.Popen(command,stderr=subprocess.PIPE,shell=True) return (proc.wait(), proc.stderr.read()) def image_to_string(data): output_file_name_base = tempnam() input_file_name = ‘%s.png‘ % tempnam() output_file_name = ‘%s.txt‘ % output_file_name_base try: save_pic(input_file_name,data) status, error_string = run_tesseract(input_file_name,output_file_name_base) if status: raise f = open(output_file_name) try: return f.read().strip() finally: f.close() finally: cleanup(input_file_name) cleanup(output_file_name) def save_data(cookies,vcode): for ck in cookies: if ck.name == ‘PHPSESSID‘: cookie = ck.value saveUrl = ‘保存cookie和验证码地址‘ values = {‘cookie‘:cookie,‘websiteid‘:1,‘code‘:vcode} postDate = json.dumps(values) req = urllib2.Request(saveUrl,postDate) response = urllib2.urlopen(req) page = response.read() return page def main(): while (True): cj,input_file_stream = get_yundapic() vcode = image_to_string(input_file_stream) if vcode != ‘‘ and len(vcode) == 3: if re.match(r‘[a-zA-z]+$‘,vcode): print ‘識別錯誤‘ else: try: num = eval(vcode) print num print save_data(cj,eval(vcode)); except: pass ‘‘‘ while (True): cj,captcha,input_file_stream = get_pic() vcode = image_to_string(input_file_stream) if vcode != ‘‘ and len(vcode) == 4: if re.match(r‘[a-zA-z]+$‘,vcode): print(save_data(cj,captcha,vcode)) else: print ‘識別錯誤‘ ‘‘‘ ‘‘‘ cj,input_file_stream = get_yundapic() vcode = image_to_string(input_file_stream) print vcode ‘‘‘ if __name__ == ‘__main__‘: main()
比较喜欢直接粗暴的贴代码,不多说,自己看代码,COPY即运行
标签:
原文地址:http://www.cnblogs.com/zwdo/p/5697256.html