#coding=utf-8 import urllib.request from bs4 import BeautifulSoup from urllib import error import re def validateTitle(title): rstr = r"[\/\\\:\*\?\"\<\>\|]" # ‘/ \ : * ? " < > |‘ new_title = re.sub(rstr, "_", title) # 替换为下划线 return new_title for j in range(1,151637): url_origin = "http://www.7160.com/meinv/"+str(j) for i in range(1,30): if i == 1 : url = url_origin+"/index.html" else: url = url_origin+"/index_"+str(i)+".html" request = urllib.request.Request(url) try: res = urllib.request.urlopen(request) soup = BeautifulSoup(res,‘lxml‘) title_obj = soup.find(attrs={"class":"picmainer"}) if title_obj is not None: print(url) title = title_obj.h1.string content = soup.find(‘img‘) src = content.get("src") file_name = validateTitle(title)+".jpg" urllib.request.urlretrieve(src, file_name) print(file_name+"保存成功") except error.URLError as e: print(e.reason)