爬虫遇到HTTP Error 403的问题

时间：2019-06-03 22:00:21 阅读：311 评论：0 收藏：0 [点我收藏+]

标签：get parser exception coding txt requests write lib sel

# coding=utf-8


from bs4 import BeautifulSoup
import requests
import urllib
x = 1
y = 1

def crawl(url):
    res = requests.get(url)
    soup = BeautifulSoup(res.text, 'html.parser')
    global y
    with open(f'C:/Users/Administrator/Desktop/alien/pachong/xnt/{y}.txt','w',encoding="utf-8") as f:
        f.write(str(soup))
        y += 1
    yinhuns = soup.select('img')
    print()
    for yh in yinhuns:
        print(yh)
        link = yh.get('src')
        print(link)
        global x    
        urllib.request.urlretrieve(link, f'C:/Users/Administrator/Desktop/alien/pachong/xnt/{x}.jpg')
        print(f'正在下载第{x}张图片')
        x += 1
        
for i in range(1,5):
    url = "https://acg.fi/hentai/23643.htm/" + str(i)
    
    try:
        crawl(url)
    except ValueError as f:
        continue
    except Exception as e:
        print(e)

标签：get parser exception coding txt requests write lib sel

原文地址：https://www.cnblogs.com/rener0424/p/10970096.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行