码迷,mamicode.com
首页 > 其他好文 > 详细

获取全部校园新闻(GZCC大新闻流量刷新器)

时间:2018-04-10 13:41:32      阅读:165      评论:0      收藏:0      [点我收藏+]

标签:odi   描述   次数   tail   mmm   color   点击   内容   coding   

import requests
import re

url = "http://news.gzcc.cn/html/xiaoyuanxinwen/"
listnewurl = "http://news.gzcc.cn/html/xiaoyuanxinwen/index.html"

res = requests.get(url)
reslist = requests.get(listnewurl)

res.encoding = utf-8

# 利用BeautifulSoup的HTML解析器,生成结构树
from bs4 import BeautifulSoup

soup = BeautifulSoup(res.text, html.parser)
soup_list = BeautifulSoup(reslist.text, html.parser)


def getlistnew(listnewurl):  # 获取简单的新闻
    reslist = requests.get(listnewurl)
    reslist.encoding = utf-8

    soup_list = BeautifulSoup(reslist.text, html.parser)

    tou = http://news.gzcc.cn/html/xiaoyuanxinwen/
    shuzi = ‘‘
    wei = .html

    for news in soup_list.select(li):  #首页
        if len(news.select(.news-list-title)) > 0:
            # 首页文章标题
            title = news.select(.news-list-title)[0].text
            # 首页文章链接
            href = news.select(a)[0][href]

            print(------------------------------------------------------------------------------)
            print("文章标题:" + title)
            print("\n文章链接:" + href)

    for i in range(2, 233):  #首页外全部
        shuzi = i;
        allnewurl=%s%s%s % (tou, shuzi, wei)
        resalllist = requests.get(allnewurl)
        resalllist.encoding = utf-8
        soup_alllist = BeautifulSoup(resalllist.text, html.parser)
        for news in soup_alllist.select(li):
            if len(news.select(.news-list-title)) > 0:
              # 首页文章标题
              title = news.select(.news-list-title)[0].text
              # 首页文章链接
              href = news.select(a)[0][href]

              print(------------------------------------------------------------------------------)
              print("文章标题:" + title)
              print("\n文章链接:" + href)


def getClickCount(url):
    HitUrl = http://oa.gzcc.cn/api.php?op=count&id=9183&modelid=80
    hitNumber = requests.get(HitUrl).text.split(.html)[-1].lstrip("(‘").rstrip("‘);")
    print("点击次数:", hitNumber)

    re.match(http://news.gzcc.cn/html/2018/xiaoyuanxinwen(.*).html, url).group(1).split(/)[1]
    print(新闻编号:, re.search(\_(.*).html, url).group(1))


def getNewDetail(url):  # 获取一页的详细新闻
    res = requests.get(url)
    res.encoding = utf-8
    soup = BeautifulSoup(res.text, html.parser)

    for news in soup.select(li):
        if len(news.select(.news-list-title)) > 0:
            # 首页文章标题
            title = news.select(.news-list-title)[0].text
            # 首页文章描述
            description = news.select(.news-list-description)[0].text
            # 首页文章信息
            info = news.select(.news-list-info)[0].text
            # 首页文章链接
            href = news.select(a)[0][href]

            url = href
            res = requests.get(url)
            res.encoding = utf-8
            soup = BeautifulSoup(res.text, html.parser)

            # 获取每篇文章的信息
            newinfo = soup.select(.show-info)[0].text

            # 获取文章内容
            content = soup.select(#content)[0].text

            # 日期
            date = newinfo.split()[0]
            # 当日时间
            time = newinfo.split()[1]
            # 作者
            author = newinfo.split()[2]
            # 审核
            checker = newinfo.split()[3]
            # 来源
            source = newinfo.split()[4]
            # 摄影
            Photography = newinfo.split()[5]

            print(------------------------------------------------------------------------------)
            print("文章标题:" + title)
            print("\n文章描述:" + description)
            print("\n文章信息:\n" + date +   + time + \n + author + \n + checker + \n + source + \n + Photography)
            getClickCount(href)  # 点击次数、新闻编号
            print("\n文章链接:" + href)
            print(content)
            print(------------------------------------------------------------------------------)


# getNewDetail(url)   #获取一页的详细新闻
getlistnew(listnewurl)  # 获取全部的新闻标题链接

#emmm写个死循环就是成刷流量的工具了(滑稽

技术分享图片

最后面的:

技术分享图片

获取全部校园新闻(GZCC大新闻流量刷新器)

标签:odi   描述   次数   tail   mmm   color   点击   内容   coding   

原文地址:https://www.cnblogs.com/FZW1874402927/p/8777906.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!