标签:int windows head 电影 parse ref top100 select boa
import requests
import parsel
import time
import threading
# 模拟浏览器
headers = {"Referer": "https://maoyan.com/board/4?offset=0",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) \
Chrome/80.0.3987.116 Safari/537.36"}
# 定义获取方法
def get_info(page):
# 获得信息页面
url = "https://maoyan.com/board/4?offset={}".format(page)
response = requests.get(url, headers=headers)
# print(response.text)
# 读取页面内容
sel = parsel.Selector(response.text)
# print(sel.re(‘<p class="releasetime">(.*?)</p>‘))
# print(sel.xpath(‘//p[@class="releasetime"]/text()‘).getall())
# 整理信息
dds = sel.css("dd")
for dd in dds:
print({
"电影名称": dd.css(‘p.name a::text‘).getall()[0],
"主演": dd.css(‘p.star::text‘).getall()[0].strip(),
"上映时间": dd.css(‘p.releasetime::text‘).getall()[0],
"评分": "".join(dd.css(‘p.score i::text‘).getall())
})
# print("*" * 100)
star_time = time.time()
# 构造URL 请求10页的数据
for page in range(0, 100, 10):
# 创建线程对象
t1 = threading.Thread(target=get_info, args=(page,))
# 启动线程
t1.start()
# get_info(page)
# 打印线程数
print("总线程数:", threading.enumerate())
while len(threading.enumerate()) > 1:
pass
print("总用时:", time.time()-star_time, "s", sep=" ")
标签:int windows head 电影 parse ref top100 select boa
原文地址:https://www.cnblogs.com/panda009079/p/12348284.html