标签:
早上还有实验验收,先上代码,早上再写。
import urllib2 import re from bs4 import BeautifulSoup content = urllib2.urlopen("http://www.cnblogs.com/ly941122/").read(); soup=BeautifulSoup(content) siteUrls = soup.findAll(‘div‘,{‘class‘:‘postTitle‘}) tag=re.compile(‘</?\w+[^>]*>‘) print siteUrls file=open(‘res.txt‘,‘wb‘) try: for i in siteUrls: str1=re.sub(tag,‘ ‘,str(i)) str1=str1[1:] # print str1 str2=str(i).split(‘ ‘) html = str2[2][6:-1] file.write(str1+html+‘\n‘) except Exception,e: print e file.close() raw_input()
标签:
原文地址:http://www.cnblogs.com/ly941122/p/4572861.html