xiaohuar.spider

时间：2018-10-25 14:09:42 阅读：383 评论：0 收藏：0 [点我收藏+]

标签：one beauty int count head %s safari turn .com

import requests, re
from requests.exceptions import RequestException


def get_one_page(url, agent):
    try:
        response = requests.get(url, headers=agent)
        if response.status_code == 200:
            return response.text
        print(‘网站出错1‘)
        return
    except RequestException:
        print(‘网站出错‘)
        return


def reg(x):
    lis = []
    for i in x:
        y = i.rstrip(‘"‘)
        m = y.lstrip(‘src="‘)
        z = m.lstrip(‘http://www.xiaohuar.com‘)
        lis.append(z)
    return lis


def main():
    url = ‘http://www.xiaohuar.com/2014.html‘
    agent = {‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 ‘
                           ‘(KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36‘}
    html = get_one_page(url, agent)
    reg1 = re.findall(r‘src="http://www.xiaohuar.com/d.+"|src="/d.+"‘, html)
    return reg1


def read_beauty(lis):
    count = 0
    for i in lis:
        count += 1
        name = ‘%s.jpg‘ % count
        agent = {‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36‘
                               ‘ (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36‘}
        url = ‘http://www.xiaohuar.com/‘ + i
        try:
            reson = requests.get(url, headers=agent)
            if reson.status_code == 200:
                with open(name, ‘wb‘) as f:
                    f.write(reson.content)
                print(‘完成1次‘)
                continue
            print(‘网站出错1‘)
            return

        except RequestException:
            print(‘网站出错‘)
            return


if __name__ == ‘__main__‘:
    x = main()
    xxxx = reg(x)
    read_beauty(xxxx)
    print(‘全部完成‘)

xiaohuar.spider

标签：one beauty int count head %s safari turn .com

原文地址：https://www.cnblogs.com/sw-z/p/9849103.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行