用Python3Request爬取英雄联盟皮肤、单线程爬取

时间：2019-06-06 11:55:40 阅读：86 评论：0 收藏：0 [点我收藏+]

标签：threading 大小写自己 __name__ def regex values 标识 import

和王者荣耀相类似

链接：https://www.cnblogs.com/wxzbk/p/10981098.html

"""
Hero_LOL
"""
import requests
import re
import threading


def hero(hero_name, hero_num):
    #统一前戳
    h_l = "https://ossweb-img.qq.com/images/lol/web201310/skin/big"
    # 逐一遍历英雄
    print(hero_num.__len__())
    num = 0  # 为了获取英雄的号码
    for i in hero_num:
        # 逐一遍历皮肤，此处假定一个英雄最多15个皮肤
        for sk_num in range(0, 20):  # 从第一个开始，没有第0个皮肤
            if len(i)==2:
                hsl = h_l + i + "0" +str(sk_num) +".jpg"
            else:
                hsl = h_l + i + "00" + str(sk_num) + ".jpg"
            hl = requests.get(hsl)
            if hl.status_code == 200:
                filename = "LOL/" + str(hero_name[num]) + str(sk_num) + ".jpg"
                print("此时正在下载：" + filename)
                with open(filename, "wb") as f:
                    f.write(hl.content)
            else:
                break
        num += 1


def main():
    """
    #获取全部英雄数据
    :return:
    """
    #JS_url
    Hero_url =  "https://lol.qq.com/biz/hero/champion.js"
    #User-Agent伪装浏览器标识
    header = {‘User-Agent‘:‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36‘}
    #Get source code(获取源代码)
    response = requests.get(Hero_url,headers = header)
    #decode 编码为GBK，方便下方使用正则遍历
    response = response.content.decode("GBK")
    #regex（正则表达式）
    reg = """keys":([\s\S]*?),"data"""
    #预编译
    regex = re.compile(reg,re.IGNORECASE) #第一个参数 正则表达式 第二个参数忽略的意思，忽略大小写
    #findall 就是获取response内所有符合规则的数据，返回值为list
    response_s = regex.findall(response)
    #也就是所有符合规则的数据放到了列表里，如果只有一个，那就是list[0]
    response_s = response_s[0]
    #通过eval转换为一个dict，具体功能自己查（其实我也没大懂）
    res = eval(response_s)
    #get the keys in the dict(再转换为list)
    hero_num  = list(res.keys()) #Hero_number
    #get the values in the dict (再转换为list)
    hero_name = list(res.values())#Hero_name

    hero(hero_name,hero_num)
    t1 = threading.Thread(target=hero,args=(hero_name,hero_num))
    t1.start()
   
if __name__ == ‘__main__‘:
    main()

用Python3Request爬取英雄联盟皮肤、单线程爬取

标签：threading 大小写自己 __name__ def regex values 标识 import

原文地址：https://www.cnblogs.com/wxzbk/p/10983976.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行