标签:python
import re import urllib2 def getHtmlCode(url): return urllib2.urlopen(url).read() def findTitleUrl(htmlString): regTitleUrl = re.compile("href=\"(.+?)\"") return regTitleUrl.findall(htmlString) def findTitleContent(htmlString): regTitleContent = re.compile("\">(.+?)</a>") return regTitleContent.findall(htmlString) htmlCode = getHtmlCode('http://www.yinwang.org/') titleContent = findTitleContent(htmlCode) titleUrl = findTitleUrl(htmlCode) for i in range(0, len(titleUrl)): print titleContent[i+3] print titleUrl[i+8] htmlPage = getHtmlCode(titleUrl[i+8]) f = open("%s.html"%(titleContent[i+3]),'wb') f.write(htmlPage) f.close
标签:python
原文地址:http://blog.csdn.net/rainlesvio/article/details/40431325