标签:glob class dir def with int exit chrome com
代码:
#-*- coding: UTF-8 -*- import requests import os from bs4 import BeautifulSoup import urllib start_url = ‘http://www.521609.com/meinvxiaohua/‘ headers = {‘user-agent‘: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36‘} saved_path = r‘C:\Users\zhoutiax\Desktop\xiaohua‘ x=1 def crawl(url): # req = urllib2.Request(url, headers=headers) # content = urllib2.urlopen(req, timeout=20).read() content = requests.get(url).text soup = BeautifulSoup(content, "html.parser") img_urls = soup.find_all("img") global x if not os.path.exists(saved_path): os.makedirs(saved_path) for img_url in img_urls: # print img_url[‘src‘] if img_url[‘src‘].startswith(‘/uploads‘): img = url.split(‘/m‘)[0] + img_url[‘src‘] urllib.urlretrieve(img, r‘C:\Users\zhoutiax\Desktop\xiaohua\%d.jpg‘ % x) x += 1 else: exit if __name__ == ‘__main__‘: for page in range(1, 5): # 多页 page_url = start_url + "list12%d.html" % page print page_url crawl(page_url)
标签:glob class dir def with int exit chrome com
原文地址:https://www.cnblogs.com/nevermore29/p/9606035.html