python日常—爬取豆瓣250条电影记录

时间：2018-10-20 23:49:23 阅读：502 评论：0 收藏：0 [点我收藏+]

#  感兴趣的同仁可以相互交流哦

import requests  
import lxml.html,csv  
doubanUrl = ‘https://movie.douban.com/top250?start={}&filter=‘

def getSource(doubanUrl):
    response = requests.get(doubanUrl)      # 获取网页
    response.encoding = ‘utf-8‘        # 修改编码
    return response.content             #获取源码

def getEveryItem(source):
    # 获取HTML对象
    selector = lxml.html.document_fromstring(source)  
    # 提取标签所有的信息
    movieItemList = selector.xpath(‘//div[@class="info"]‘)
    # 定义一个空列表——用于展示信息
    movieList = []
    for eachMovie in movieItemList:
        movieDict = {}
        # 分层提取
        title = eachMovie.xpath(‘div[@class="hd"/a/span/[@class="title"]/text()‘)
        otherTitle = eachMovie.xpath(‘div[@class="hd"/a/span/[@class="other"]/text()‘)
        link = eachMovie.xpath(‘div[@class="hd"/a/@href‘)[0]
        star = eachMovie.xpath(‘div[@class="hd"/div[@class="star"]/span[@class="rating_num"]/text()‘)
        quote = eachMovie.xpath(‘div[@class="hd"/p[@class="quote"]/span/text()‘)
        # 保存字典信息
        movieDict[‘title‘] = ‘‘.join(title+otherTitle)
        movieDict[‘url‘] = link
        movieDict[‘star‘] = star
        movieDict[‘quote‘] = quote
        movieList.append(movieDict)
    return movieList

def writeData(movieList):
    with open(‘./Douban.csv‘,‘w‘,encoding=‘UTF-8‘,newline=‘‘) as f:
        writer = csv.DictWriter(f,fieldnames=[‘titlr‘,‘star‘,‘quote‘,‘url‘])
        # 写入表头
        writer.writeheader()
        for each in movieList:
            writer.writerow(each)

if __name__ == ‘main‘:
    # 共展示250条电影信息 每页25条 ,共10页
    movieList = []
    for i in range(10):
        # 获取url
        pageLink = doubanUrl.format(i*25)
        print(pageLink)
        # 根据地址获取资源
        source = getSource(pageLink)
        movieList = getEveryItem(source)

    print(movieList[:10])
    writeData(movieList)

标签：csv 提取电影 info star data eve 列表 lin

原文地址：https://www.cnblogs.com/zxycb/p/9823311.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行