python爬虫实例

时间：2020-06-23 15:31:36 阅读：63 评论：0 收藏：0 [点我收藏+]

标签：https sts man beautiful wait previous loop not sync

1.九寨沟景点

import asyncio
import requests
from bs4 import BeautifulSoup

base_url = ‘https://www.jiuzhai.com/{0}‘


async def parse_next_html(response):
    soup = BeautifulSoup(response.text,features="html.parser")
    next = soup.select_one(‘.pager .next a‘)
    if next is not None:
        print(base_url.format(next.attrs[‘href‘]),"+++++++")
        response = await get_response(base_url.format(next.attrs[‘href‘]))
        if response is not None:
            await parse_next_html(response)


async def parse_previous_html(response):
    soup = BeautifulSoup(response.text,features="html.parser")
    previous = soup.select_one(‘.pager .previous a‘)
    if previous is not None:
        print(base_url.format(previous.attrs[‘href‘]),"*********")
        response = await get_response(base_url.format(previous.attrs[‘href‘]))
        if response is not None:
            await parse_previous_html(response)


async def get_response(url):
    try:
        return requests.get(url)
    except Exception as e:
        print(e)
        return None


async def run_manager(url):
    response = await get_response(url)
    if response is not None:
        await parse_previous_html(response)

        await parse_next_html(response)


async def main():
    start_url = ‘https://www.jiuzhai.com/news/number-of-tourists/7110-5000-180‘
    await asyncio.gather(
        run_manager(start_url),

    )

if __name__ == ‘__main__‘:

    loop = asyncio.get_event_loop()
    loop.run_until_complete(main())

python爬虫实例

标签：https sts man beautiful wait previous loop not sync

原文地址：https://www.cnblogs.com/snow-wolf-1/p/13181973.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行