标签:python
import re
import urllib2
def getHtmlCode(url):
return urllib2.urlopen(url).read()
def findTitleUrl(htmlString):
regTitleUrl = re.compile("href=\"(.+?)\"")
return regTitleUrl.findall(htmlString)
def findTitleContent(htmlString):
regTitleContent = re.compile("\">(.+?)</a>")
return regTitleContent.findall(htmlString)
htmlCode = getHtmlCode('http://www.yinwang.org/')
titleContent = findTitleContent(htmlCode)
titleUrl = findTitleUrl(htmlCode)
for i in range(0, len(titleUrl)):
print titleContent[i+3]
print titleUrl[i+8]
htmlPage = getHtmlCode(titleUrl[i+8])
f = open("%s.html"%(titleContent[i+3]),'wb')
f.write(htmlPage)
f.close
标签:python
原文地址:http://blog.csdn.net/rainlesvio/article/details/40431325