标签:art gre http _id etc and one SM time
str = ‘‘‘King Athamus of northern Greece had two children, Phrixus and Helle.After he left his first wife and mar ried Ino, a wicked woman,the two children received all the cruel treatment that a stepmother coulddevise , At one timethe kingdom was ruined by a famine.‘‘‘ print(re.split("[\s,.?!]+", str))
将获取新闻详情的代码定义成一个函数 def getNewDetail(newsUrl):
import requests import re from bs4 import BeautifulSoup def getClickCount(newUrl): new_id = re.findall(r‘\_(.*).html‘,newUrl) new_id = new_id[0].split(‘/‘)[1] url = ‘http://oa.gzcc.cn/api.php?op=count&id={}&modelid=80‘.format(new_id) content = requests.get(url) clickCount = int(re.search("hits‘\).html\(‘(.*)‘\);", content.text).group(1)) return clickCount def getNewDetail(newsUrl): web=requests.get(newsUrl) web.encoding=‘utf-8‘ soup=BeautifulSoup(web.text, ‘html.parser‘) content=soup.find(‘div‘,{‘class‘:‘show-content‘}).text #正文 list=[] info=soup.find(‘div‘,{‘class‘:‘show-info‘}) info=info.text.replace(‘\xa0‘,‘n‘).split(‘n‘)#细节信息 for string in info: if len(string)>3: list.append(string) for i in range(len(list)): if list[i].find(‘次‘)!=-1: list[i]=‘点击:{}次‘.format(getClickCount(newsUrl)) list.append(content) return list if __name__==‘__main__‘: url=‘http://news.gzcc.cn/html/2018/xiaoyuanxinwen_0329/9129.html‘ new=getNewDetail(url) print(new)
标签:art gre http _id etc and one SM time
原文地址:https://www.cnblogs.com/127li/p/8783436.html