BeautifulSoup爬取电影天堂全站电影资源

时间：2019-12-29 00:53:19 阅读：241 评论：0 收藏：0 [点我收藏+]

标签：begin 没有 requests 输出 mod close ref html pen

#爬取电影天堂全站电影资源链接
#功能：
#1、获取电影天堂资源下载链接并输出
#2、所有链接保存为一个csv文件

import time
import requests
from bs4 import BeautifulSoup
import csv


def spider(url):
    global page, No, fobj
    try:
        page += 1
        print("第{}页".format(page))
        # time.sleep(1)
        #获取网页链接并读取
        html = requests.get(url)
        html.encoding="gbk"
        html=html.text
        #beautfulSoup装载文档
        root=BeautifulSoup(html,"lxml")
        #查找所需元素，获取tables列表
        tables=root.find("div",attrs={"class":"co_content8"}).find("ul").find_all("table")
        for table in tables:
            name = table.find("a").text
            url = "http://www.dytt8.net"+table.find("a")["href"]
            # 文件写入操作
            writer = csv.writer(fobj)
            writer.writerow([name, url])
            No += 1
            print("No:", No, name, url)
        #爬取下一页
        # time.sleep(1)
        urls=root.find("div",attrs={"class":"co_content8"}).find("div",attrs={"class":"x"}).find_all("a")
        #寻找下一页的链接
        for u in urls:
            # print(url.text)
            # try:
            if u.text == "下一页":
                    url="https://www.dytt8.net/html/gndy/dyzz/"+u["href"]
                    print(url)
                    #如有下一页
                    spiderA(url)

    except:#没有下一页
        print("finished")
        # spiderA(url)



begin_time = time.time()
url="https://www.dytt8.net/html/gndy/dyzz/index.html"
page=0
No=0
fobj=open("movies.csv", "wt", encoding="gbk", newline=‘‘)
spider(url)
fobj.close()
end_time = time.time()
time=end_time-begin_time
m,s=divmod(round(time),60)
print("用时：{}min{}s".format(m,s))

标签：begin 没有 requests 输出 mod close ref html pen

原文地址：https://www.cnblogs.com/billie52707/p/12113520.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行