标签:图片 nbsp splay pytho data- spl url cli beautiful
一个简单爬虫案例
from bs4 import BeautifulSoup import os import requests response = requests.get("http://www.90xiaohua.com/") response.encoding = "utf-8" # print(response.text) soup = BeautifulSoup(response.text, ‘html.parser‘) div = soup.find(name=‘div‘, attrs={‘class‘: ‘m-page m-page-sr m-page-sm‘}) a_list = div.find_all(name=‘a‘) count = 2 for a in a_list: span = a.find(name="span") if span: url=‘http://90xiaohua.com/index_%s.html‘%(count) obj=requests.get(url) print(obj.text) soup = BeautifulSoup(obj.text, ‘html.parser‘) div = soup.find(name=‘div‘, attrs={‘class‘: ‘m-list-main‘}) li_list = div.find_all(name=‘li‘) for li in li_list: img = li.find(name=‘img‘) alt = img.get(‘alt‘) src = img.get(‘data-original‘) src = "http://90xiaohua.com" + src print(src) file_name = src.rsplit("/", maxsplit=1)[1] file_name = file_name.rsplit(".", maxsplit=1)[0] file_name = file_name + ‘.jpg‘ ret = requests.get(src) file_path = r‘D:\python_code\eggweb\file‘ path = os.path.join(file_path, file_name) with open(path, ‘wb‘) as f: f.write(ret.content) count=count+1
标签:图片 nbsp splay pytho data- spl url cli beautiful
原文地址:https://www.cnblogs.com/hbfengjun/p/12564326.html