码迷,mamicode.com
首页 > 编程语言 > 详细

Python3.4 12306 2015年3月验证码识别

时间:2015-03-17 12:27:48      阅读:172      评论:0      收藏:0      [点我收藏+]

标签:python   12306   验证码识别   图片处理   pil   

import ssl
import json
from PIL import Image
import urllib
import re
import urllib.request as urllib2
if hasattr(ssl, '_create_unverified_context'):
    ssl.create_default_context = ssl._create_unverified_context
UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36"
pic_url = "https://kyfw.12306.cn/otn/passcodeNew/getPassCodeNew?module=login&rand=sjrand&0.21191171556711197"
def get_img():
    resp = urllib2.urlopen(pic_url)
    raw = resp.read()
    with open('./tmp.jpg', 'wb') as fp:
        fp.write(raw)
    return Image.open('./tmp.jpg')
def get_sub_img(im, x, y):
    assert 0 <= x <= 3
    assert 0 <= y <= 2
    WITH = HEIGHT = 68
    left = 5 + (67 + 5) * x
    top = 41 + (67 + 5) * y
    right = left + 67
    bottom = top + 67
    return im.crop((left, top, right, bottom))
def baidu_stu_lookup(im):
    url = "http://stu.baidu.com/n/image?fr=html5&needRawImageUrl=true&id=WU_FILE_0&name=233.png&type=image%2Fpng&lastModifiedDate=Mon+Mar+16+2015+20%3A49%3A11+GMT%2B0800+(CST)&size="
    im.save("./query_temp_img.png")
    raw = open("./query_temp_img.png", 'rb').read()
    url = url + str(len(raw))
    req = urllib2.Request(url, raw, {'Content-Type': 'image/png', 'User-Agent': UA})
    resp_url = urllib2.urlopen(req).read()

    url = "http://stu.baidu.com/n/searchpc?queryImageUrl=" + urllib2.quote(resp_url)
    req = urllib2.Request(url, headers={'User-Agent': UA})
    resp = urllib2.urlopen(req)
    html = resp.read().decode()
    return baidu_stu_html_extract(html)
def baidu_stu_html_extract(html):

    pattern = re.compile(r"keywords:'(.*?)'")
    matches = pattern.findall(html)
    if not matches:
        return '[UNKOWN]'
    json_str = matches[0]
    json_str = json_str.replace('\\x22', '"').replace('\\\\', '\\')
    result = [item['keyword'] for item in json.loads(json_str)]
    return '|'.join(result) if result else '[UNKOWN]'
if __name__ == '__main__':
    im = get_img()
    for y in range(2):
        for x in range(4):
            im2 = get_sub_img(im, x, y)
            result = baidu_stu_lookup(im2)
            print((y, x), result)

改自https://github.com/andelf/fuck12306/blob/master/fuck12306.py

Python 3.4 可用


Python3.4 12306 2015年3月验证码识别

标签:python   12306   验证码识别   图片处理   pil   

原文地址:http://blog.csdn.net/u013511642/article/details/44339333

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!