标签:
1.在模拟登陆的过程中第一步需要得到登陆前信息,用户名和密码通过js预先加密,所以必须要先将js预先加密的servertime和nonce和pubkey得到,下面json模块和re得到预先加密的信息
1 #---coding:utf-8--- 2 import urllib2 3 import re 4 import json 5 def get_servertime(): 6 url="http://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=sinaSSOController.preloginCallBack&su=dW5kZWZpbmVk&client=ssologin.js(v1.3.18)&_=1329806375939" 7 8 data=urllib2.urlopen(url).read() 9 10 p=re.compile(‘\((.*)\)‘) 11 12 try: 13 json_data=p.search(data).group(1) 14 print json_data 15 data=json.loads(json_data) 16 ##采用json。loads进行解码,得到dict类型数据,从之中得到需要的数据 17 servertime = str(data[‘servertime‘]) 18 nonce = data[‘nonce‘] 19 print servertime,"\n",nonce 20 except: 21 print "Get servertime data" 22 23 if __name__==‘__main__‘: 24 get_servertime() 25
结果上显示:
1 {"retcode":0,"servertime":1472783606,"pcid":"gz-32dce7bbd55e33948992c2978d847ff601de","nonce":"26ISKM","pubkey":"-----BEGIN PUBLIC KEY-----\nMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDrKjhWhmGIf6GAvdtcq9XyHHv9\nWcCQyy0kWoesJTBiiCcpKT5VBjUFCOf5qju3f0MzIxSQ+RX21jxV\/i8IpJs1P0RK\n05k8rMAtt4Sru45CqbG7\/\/s4vhjXjoeg5Bubj3OpKO4MzuH2c5iEuXd+T+noihu+\nSVknrEp5mzGB1kQkQwIDAQAB\n-----END PUBLIC KEY-----","rsakv":"1330428213","is_openlock":0,"exectime":10} 2 1472783606 3 26ISKM 4 -----BEGIN PUBLIC KEY----- 5 MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDrKjhWhmGIf6GAvdtcq9XyHHv9 6 WcCQyy0kWoesJTBiiCcpKT5VBjUFCOf5qju3f0MzIxSQ+RX21jxV/i8IpJs1P0RK 7 05k8rMAtt4Sru45CqbG7//s4vhjXjoeg5Bubj3OpKO4MzuH2c5iEuXd+T+noihu+ 8 SVknrEp5mzGB1kQkQwIDAQAB 9 -----END PUBLIC KEY----- 10 1330428213
1 #---coding:utf-8--- 2 import urllib2 3 import re 4 import json 5 import hashlib 6 import urllib 7 def get_servertime(): 8 url="http://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=sinaSSOController.preloginCallBack&su=dW5kZWZpbmVk&client=ssologin.js(v1.3.18)&_=1329806375939" 9 10 data=urllib2.urlopen(url).read() 11 12 p=re.compile(‘\((.*)\)‘) 13 14 try: 15 json_data=p.search(data).group(1) 16 17 data=json.loads(json_data) 18 ##采用json。loads进行解码,得到dict类型数据,从之中得到需要的数据 19 servertime = str(data[‘servertime‘]) 20 nonce = data[‘nonce‘] 21 pubkey=data[‘pubkey‘] 22 rsakv=data[‘rsakv‘] 23 print servertime,"\n",nonce,"\n",pubkey,"\n",rsakv 24 # return servertime,nonce,rsakv 25 except: 26 print "Get servertime data" 27 return None 28 #对密码进行加密,采用RSA机制进行三次加密 29 def get_pwd(pwd,servertime,nonce): 30 pwd1=hashlib.sha1(pwd).hexdigest() 31 pwd2=hashlib.sha1(pwd1).hexdigest() 32 pwd3_=pwd2+servertime+nonce 33 pwd3 = hashlib.sha1(pwd3_).hexdigest() 34 return pwd3 35 36 def get_user(username): 37 ##采用base64加密 38 username_=urllib.quote(username) 39 username=base64.encodestring(username_)[:-1] 40 return username 41 42 43 if __name__==‘__main__‘: 44 get_servertime() 45
完成的代码,对于模拟登陆中最重要的对其加密机制,之后就是对其进行模拟登陆一般步骤,post_data和对头文件进行包装:
在这里使用的是urllib、cookie库
1 import urllib.request 2 import http.cookiejar 3 import base64 4 import json 5 import urllib.parse 6 import rsa 7 import binascii 8 import os 9 import re 10 import time 11 import datetime 12 import random 13 14 15 username=‘‘ 16 password=‘‘ 17 18 19 cookiejar=http.cookiejar.LWPCookieJar(username) 20 cookie=urllib.request.HTTPCookieProcessor(cookiejar) 21 httphandle=urllib.request.HTTPHandler() 22 opener=urllib.request.build_opener(cookie,httphandle) 23 urllib.request.install_opener(opener) 24 25 26 publickey=‘EB2A38568661887FA180BDDB5CABD5F21C7BFD59C090CB2D245A87AC253062882729293E5506350508E7F9AA3BB77F4333231490F915F6D63C55FE2F08A49B353F444AD3993CACC02DB 27 784ABBB8E42A9B1BBFFFB38BE18D78E87A0E41B9B8F73A928EE0CCEE1F6739884B9777E4FE9E88A1BBE495927AC4A799B3181D6442443‘ 28 pubkey=int(publickey,16) 29 30 postdata={ 31 ‘entry‘:‘weibo‘, 32 ‘gateway‘:‘1‘, 33 ‘from‘:‘‘, 34 ‘savestate‘:‘7‘, 35 ‘useticket‘:‘1‘, 36 ‘pagerefer‘:‘http://login.sina.com.cn/sso/logout.php?entry=miniblog&r=http%3A%2F%2Fweibo.com%2Flogout.php%3Fbackurl%3D%252F‘, 37 ‘vsnf‘:‘1‘, 38 ‘su‘:‘‘, 39 ‘service‘:‘miniblog‘, 40 ‘servertime‘:‘‘, 41 ‘nonce‘:‘‘, 42 ‘pwencode‘:‘rsa2‘, 43 ‘rsakv‘:‘1330428213‘, 44 ‘sp‘:‘‘, 45 ‘sr‘:‘1920*1080‘, 46 ‘encoding‘:‘UTF-8‘, 47 ‘prelt‘:‘269‘, 48 ‘url‘:‘http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack‘, 49 ‘returntype‘:‘META‘, 50 ‘showpin‘:‘0‘ 51 } 52 def gettime(): 53 return time.mktime(datetime.datetime.now().timetuple()) 54 55 def openurl(url,chart=‘utf-8‘,data=None): 56 result=opener.open(url,data) 57 result=result.read() 58 if(chart!=‘null‘): 59 return result.decode(chart) 60 else: 61 return result 62 63 """ 64 login_about get begin 65 """ 66 def b64(sth): 67 return base64.b64encode(sth.encode()).decode(‘utf-8‘) 68 69 def get_su(): 70 string=urllib.parse.quote(username) 71 return b64(string) 72 73 def get_sp(st,nc): 74 key=rsa.PublicKey(pubkey,65537) 75 message=str(st)+‘\t‘+str(nc)+‘\n‘+password 76 sp=rsa.encrypt(message.encode(),key) 77 sp=binascii.b2a_hex(sp) 78 return sp.decode(‘utf-8‘) 79 def get_servertime():#and nonce 80 81 url=‘http://login.sina.com.cn/sso/prelogin.php?entry=weibo&su=%s&checkpin=1&rsakt=mod‘ %(get_su()) 82 page=opener.open(url) 83 data=json.loads(page.read().decode(‘utf-8‘)) 84 85 result=[] 86 result.append(str(data[‘servertime‘])) 87 result.append(str(data[‘nonce‘])) 88 result.append(str(data[‘pcid‘])) 89 return result 90 """ 91 login_about get end 92 match begin 93 """ 94 def match(pattern,string): 95 a=re.compile(pattern) 96 result=re.findall(a,string) 97 return result 98 def match_fanscount(string): 99 a=r‘fans" >([0-9]+)‘ 100 result=match(a,string) 101 if(result!=[]): 102 return result[0] 103 else: 104 return 20000 105 def match_login_url(string): 106 a=r‘[a-zA-z]+://[^\s]*=0‘ 107 result=match(a,string) 108 return result[0] 109 def match_uid(string): 110 a=r‘usercard="id=([0-9]+)" href="‘ 111 result=match(a,string) 112 return result 113 def match_name(string): 114 a=r"CONFIG\[‘onick‘\]=‘(.+)‘" 115 result=match(a,string) 116 return result[0] 117 def replace_(st): 118 a=‘\\‘ 119 120 return st.replace(a,‘‘) 121 """ 122 match end 123 do login begin 124 """ 125 def login(postdata): 126 result=get_servertime() 127 servertime=result[0] 128 nonce=result[1] 129 130 postdata[‘su‘]=get_su() 131 postdata[‘sp‘]=get_sp(servertime,nonce) 132 postdata[‘servertime‘]=servertime 133 postdata[‘nonce‘]=nonce 134 135 postdata=urllib.parse.urlencode(postdata) 136 url=‘http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.18)‘ 137 138 headers={ 139 ‘User-Agent‘:‘Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.94 Safari/537.36 OPR/27.0.1689.66 (Edition Baidu)‘, 140 } 141 req=urllib.request.Request(url,postdata.encode(),headers) 142 text=urllib.request.urlopen(req) 143 text=text.read() 144 145 text=text.decode(‘gbk‘) 146 result=match_login_url(text) 147 opener.open(result) 148 cookiejar.save() 149 def auto_login(): 150 cookiejar.load() 151 html=openurl(‘http://weibo.com/‘,‘gbk‘) 152 url=match_login_url(html) 153 opener.open(url) 154 """ 155 opener.open(‘http://passport.weibo.cn/sso/crossdomain?action=login&savestate=1&retcode=0‘) 156 result=openurl(‘http://weibo.com/2598335181/follow?rightmod=1&wvr=6‘) 157 print(result)""" 158 159 """ 160 """ 161 162 163 if(os.path.exists(username)==True): 164 print(‘检测到cookie,自动登录‘) 165 auto_login() 166 else: 167 login(postdata)
在接下来采用requests下session模块,在session会话条件下,不需要再每一次提交的过程中都要保存cookies,在session.post()方法的时候就已经将cookies自动提交上去了,使用Seesion一直与服务器保持会话:
但是自己编写的程序在最后对网页上的内容进行重定向的时候出现了一些问题:
1 #!/usr/bin/env python 2 #---coding:utf-8---- 3 import requests 4 import json 5 import urllib 6 import re 7 import base64 8 import rsa 9 import binascii 10 from matplotlib._image import Image 11 12 username="*********" 13 pwd=‘***************‘ 14 def get_su(username): 15 username_html=urllib.quote(username).encode(‘utf-8‘) 16 su=base64.b64encode(username_html).decode(‘utf-8‘) 17 # print su 18 return su 19 20 def get_sth(su): 21 # 改字典内的数据经过精简,只有在这些数据存在下才不影响获得所需的准确数据 22 payload = {‘entry‘: ‘weibo‘, ‘rsakt‘: ‘mod‘, ‘su‘: su, ‘checkpin‘: ‘1‘ } 23 res = requests.get(‘http://login.sina.com.cn/sso/prelogin.php‘, 24 params=payload).text 25 res = eval(res)##将字符串以一种理解的文档进行表达,得到pubkey可以运用任何形式 26 # print(res) 27 return res 28 29 def get_sp(pwd,nonce,servertime,pubkey): 30 ##创建公钥的长度:pubkey的公钥在新浪中得到是固定的 31 key=rsa.PublicKey(int(pubkey,16),65537) 32 message=str(servertime)+‘\t‘+str(nonce)+‘\n‘+pwd 33 34 ##对集进行加密: 35 passwd=rsa.encrypt(message.encode(‘utf-8‘),key) 36 ##将加密信息转换成16进制 37 sp=binascii.b2a_hex(passwd) 38 return sp 39 def get_pin(pcid): 40 payload={‘s‘:‘0‘,‘p‘:pcid} 41 pin_url = "http://login.sina.com.cn/cgi/pin.php" 42 43 Res=requests.Session().get(pin_url,params=payload) 44 45 ##把图片进行显示 46 with open(‘cha.jpg‘,‘w+‘) as f: 47 f.write(Res.content) 48 f.close() 49 try: 50 51 im=Image.open(‘cha.jpg‘) 52 im.show() 53 im.close() 54 except: 55 print(‘在当前目录下没有找到图片‘) 56 57 if __name__==‘__main__‘: 58 su=get_su(username)##直接对用户名进行加密,采用requets模块得到密码加密信息 59 res=get_sth(su) 60 print res 61 nonce=res[‘nonce‘] 62 rsakv=res[‘rsakv‘] 63 servertime=res[‘servertime‘] 64 pcid=res[‘pcid‘] 65 pubkey=res[‘pubkey‘] 66 showin=False##判断代码是否用验证码 67 ##获取其中密码,通过RSA加密 68 sp=get_sp(pwd,nonce,servertime,pubkey) 69 print ‘加密用户名:‘,su,‘加密的密码:‘,sp 70 71 ####开始向客户端进行post_data: 72 73 headers = { 74 ‘User-Agent‘: ‘Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) ‘ 75 ‘AppleWebKit/537.36 (KHTML, like Gecko) ‘ 76 ‘Chrome/47.0.2526.80 Safari/537.36‘ 77 } 78 payload = { 79 ‘entry‘: ‘weibo‘, 80 ‘gateway‘: ‘1‘, 81 ‘from‘: ‘‘, 82 ‘savestate‘: ‘7‘, 83 ‘userticket‘: ‘1‘, 84 ‘pagerefer‘: ‘http://login.sina.com.cn/sso/logout.php?entry=miniblog‘ 85 ‘&r=http%3A%2F%2Fweibo.com%2Flogout.php%3Fbackurl%3D%252F‘, 86 ‘vsnf‘: ‘1‘, 87 ‘su‘: su, 88 ‘service‘: ‘miniblog‘, 89 ‘servertime‘: servertime, 90 ‘nonce‘: nonce, 91 ‘pwencode‘: ‘rsa2‘, 92 ‘rsakv‘: rsakv, 93 ‘sp‘: sp, 94 ‘encoding‘: ‘UTF-8‘, 95 ‘prelt‘: ‘106‘, 96 ‘url‘: ‘http://weibo.com/ajaxlogin.php?framelogin=1&callback=‘ 97 ‘parent.sinaSSOController.feedBackUrlCallBack‘, 98 ‘returntype‘: ‘META‘ 99 } 100 101 ##判断其中是否要验证码:图像或者数字 102 if showin: 103 pcid=res[‘pcid‘] 104 get_pin(pcid) 105 106 payload[‘door‘]=input(‘请输入验证码:‘) 107 Res=requests.session().post(‘http://login.sina.com.cn/sso/login.php?client=‘ 108 ‘ssologin.js(v1.4.18)‘,data=payload,hearders=headers) 109 110 content=Res.content.decode(‘GBK‘) 111 print content 112 else: 113 res =requests. session().post(‘http://login.sina.com.cn/sso/login.php?client=‘ 114 ‘ssologin.js(v1.4.18)‘, data=payload, headers=headers) 115 res = res.content.decode(‘GBK‘) 116 # print(res),type(res) 117 118 ##得到内容进行重定向,发现重定向不成功,******** 119 pattern = r‘location\.replace\([\‘"](.*?)[\‘"]\)‘ 120 login_url=re.findall(pattern,res) 121 #print login_url 122 ##同样采用这种形式的正则 123 res1=str(res.encode(‘GBK‘))##必须将Unicode转换成某种编码方式才能转换str类型 124 #print res1 125 p=re.compile(r‘location\.replace\(\"(.*)\"\)‘) 126 127 login_url=p.search(res1).group(1) 128 print login_url 129 130 ###通过得到login——url之后进行处理 131 132 page=requests.session().get(login_url,headers=headers) 133 134 print page.content.decode(‘gb2312‘) 135 136 137 uuid_res = re.findall(r‘"uniqueid":"(.*?)"‘, page.content) 138 print(uuid_res) 139 140 weibo = requests.session().get(‘http://weibo.com/%s/profile?topnav=1&wvr=6&is_all=1‘ % uuid_res) 141 id_pa = r‘<title>(.*?)</title>‘ 142 143 #weiboID = re.findall(id_pa, weibo.content.decode("utf-8"), re.S)[0] 144 145 #print weiboID 146 147 148 149 150 151 152 153 154
标签:
原文地址:http://www.cnblogs.com/woainifanfan/p/5832634.html