标签:mpi findall get time python 爬虫 window user header 发送
import requests,re,time
header ={
"Cookie":"登陆过账号后的cookie 必须填写",
"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"
}
#访问个人帐号下的贴吧主页
url = "百度首页--右上角贴吧--右上角用户名(我的贴吧) 然后把url填到这里"
html = requests.get(url,headers=header)
#print(html.text)
#提取贴吧相关的ID 名称等信息
s1 = r‘"forum_id":(.*?),"forum_name":"(.*?)"‘
tieba_info = re.compile(s1,re.S).findall(str(html.text))
#print(tieba_info)
for i in tieba_info:
time.sleep(3)#访问CD要控制好,否则容易出现验证码,导致签到失败
print(i[1])
print(i[1].encode("latin-1"))
#获取可以签到的全部贴吧名字
#print(i[1].encode("latin-1").decode("unicode_escape"))
#获取tbs 发送签到请求需要获得名为tbs的数据 他在页面信息里面
tieba_name = (i[1].encode("latin-1").decode("unicode_escape"))
tieba_link = "https://tieba.baidu.com/f?kw=" + tieba_name
info = requests.get(tieba_link,headers=header)
#print(info.text)
s2 =r"tbs‘: \"(.*?)\"" #单双引号都有 注意转义字符
tieba_tbs = re.compile(s2,re.S).findall(str(info.text))[0]
#print(tieba_tbs)
#签到的postdata
qiandao_url = "https://tieba.baidu.com/sign/add"
qiandao_data = {"ie":"utf-8",
"kw":tieba_name,
"tbs":tieba_tbs} #tbs这个数据意义不明 可以在附近相关网页代码中搜索看看 是否能发现关联
#实现签到 是否成功 可以看返回信息
try:
qiandao = requests.post(qiandao_url,data=qiandao_data,headers=header)
#print(qiandao.text)
print(tieba_name,"签到")
except:
print(tieba_name,"异常")
continue
标签:mpi findall get time python 爬虫 window user header 发送
原文地址:https://www.cnblogs.com/cwkcwk/p/9576518.html