Python爬虫包图网case

时间：2019-08-30 18:48:24 阅读：81 评论：0 收藏：0 [点我收藏+]

标签：pytho load inf title 请求 response python爬虫 main odi

# coding=utf-8
import requests
from lxml import etree
# 请求网页获取网页信息
responce = requests.get("https://ibaotu.com/shipin/")
# 整理网页文本对象
html = etree.HTML(responce.text)
# 定位网页元素位置
title_list = html.xpath(‘//span[@class="video-title"]/text()‘)
print(title_list)
src_list = html.xpath(‘//div[@class="video-play"]/video/@src‘)
print(src_list)
# 遍历数据，循环取文件名和链接地址
for tit, src in zip(title_list, src_list):
# 定义链接返回拼接数据
    responce = requests.get("http:" + src)
# 定义文件名称和格式
    file_name = tit + ".mp4"
    print(‘保存视频文件: {}‘.format(file_name))
# 文件已二进制方式写入文件名称，播放链接内容
    with open(file_name, ‘wb‘) as f:
        f.write(responce.content)

from urllib import *
import requests
from lxml import etree

class Spider:
    def geturl(self, url):
        response = requests.get(url)
        response.encoding = response.apparent_encoding
        return response.text

    # def download(self,url):
    #     response = requests.Request(url)
    #     return response.url


    def getinfo(self, url):
        html = etree.HTML(url)
        tit1 = ‘//span[@class="video-title"]/text()‘
        tit = html.xpath(tit1)
        src = html.xpath(‘//div[@class="video-play"]/video/@src‘)
        return tit, src

    def saveinfo(self,name,data):
        for n,l in zip(name,data):
            responce = requests.get("http:" + l)
            file_name = n + ".mp4"
            print("正在下载:{}".format(file_name))
            with open(file_name, ‘wb‘)as f:
                f.write(responce.content)


    def run(self,firsturl):
        html = self.geturl(firsturl)
        info = self.getinfo(html)
        for date in zip(info):
            name = info[0]
            src = info[1]
            self.saveinfo(name,src)


if __name__ == ‘__main__‘:
    spider = Spider()
    spider.run("https://ibaotu.com/shipin/")

Python爬虫包图网case

标签：pytho load inf title 请求 response python爬虫 main odi

原文地址：https://www.cnblogs.com/smilyroy/p/11436261.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行

Python爬虫 包图网case

Python爬虫包图网case