标签:get imp F12 定义 curl 资料 代码 表达 sleep
1.没啥说的 代码上步骤我都写清楚了,下载图片用的json,re,requests模块打包用的是tarfile模块
批量下载
import requests import time import re import json #英雄联盟官网资料库里面打开F12,然后点一张图片,通过js过滤后,找到champion.js def getLOLImages(): #头部信息,点开champion.js中Headers可以找到 主要用于伪装浏览器 header={"User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36"} url_js=‘https://lol.qq.com/biz/hero/champion.js‘ #获取js源代码 Str bytes res_js = requests.get(url_js).content #转码 转成字符串 html_js = res_js.decode() #正则表达式 req =‘"keys":(.*?),"data"‘ list_js = re.findall(req,html_js) #转成dict dict_js = json.loads(list_js[0]) print(type(dict_js)) #打印类型 pic_list = [] #定义图片列表 for key in dict_js: print(key) #英雄id #拼接url hero_num = ‘‘ for i in range(20): number =str(i) if len(number) ==1: hero_num = ‘00‘+number elif len(number) ==2: hero_num = ‘0‘+number numstr = key+ hero_num url = ‘https://game.gtimg.cn/images/lol/act/img/skin/big‘+numstr+‘.jpg‘ pic_list.append(url) #获取图片名称 list_filepath =[] path = r‘E:\images‘ #设置自己本地路径存图片的地方 for name in dict_js.values(): for i in range(20): file_path = path + ‘\\‘+ name + str(i) + ".jpg" #拼接 list_filepath.append(file_path) #下载图片 n =0 for picurl in pic_list: res =requests.get(picurl) n+=1 #获取状态码 if res.status_code == 200: print("正在下载%s" %list_filepath[n]) time.sleep(1) with open(list_filepath[n],‘wb‘) as f: f.write(res.content) getLOLImages()
2.打包压缩包
import os import tarfile #用的是.tar.gz压缩 当然你也可以用别的,如zip,但是这个.tar.gz比zip的压缩完后小很多,最小的是.tar.bz2的 #获取一下当前路径 path = os.getcwd() print(path) #E:\pythonproject\python_all\ceshi #先获取到最顶层路径 然后拼接一下images文件夹 newpath = os.path.dirname(os.path.dirname(os.path.dirname(path))) print(newpath) img_path = os.path.join(newpath,‘images‘) print(img_path) #创建压缩包 with tarfile.open(‘lol.tar.gz‘,‘w:gz‘,encoding=‘utf-8‘) as tf: tf.add(img_path)
标签:get imp F12 定义 curl 资料 代码 表达 sleep
原文地址:https://www.cnblogs.com/weiweivip666/p/12940774.html