Python 存入三大文件

时间：2020-02-29 13:16:59 阅读：66 评论：0 收藏：0 [点我收藏+]

标签：bpa user utf-8 link false use agent ext print

爬虫数据存入三大文件


import requests
import json,csv
from lxml import etree
for i in range(1,10):
    if i == 1:
        url = 'http://www.lnzxzb.cn/gcjyxx/004001/subpage.html'
    else:
        # url = 'http://www.lnzxzb.cn/gcjyxx/004001/%s.html' % i
        url = 'http://www.lnzxzb.cn/gcjyxx/004001/'+str(i)+'.html'
    headers = {
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'
    }
    res = requests.get(url=url,headers=headers)
    tree = etree.HTML(res.text)
----------------------------------------------------------------------------------------------------------------------------------------------------
    #存 txt  文件       ***********************************
    # with open('ztb.txt', 'a', encoding='utf-8') as f:
    #     for i in range(1,16):
    #         ret = tree.xpath('//ul[@id="showList"]/li['+str(i)+']/p/a/@href')[0]
    #         ret1 = tree.xpath('//ul[@id="showList"]/li['+str(i)+']/p/a/@title')[0]
    #         ret2 = tree.xpath('//ul[@id="showList"]/li['+str(i)+']/span[1]/text()')[0]
    #         # print(ret+ret1+ret2)
    #         f.write(''.join([ret,ret1,ret2,'\n']))
----------------------------------------------------------------------------------------------------------------------------------------------------
    
    #   存 json 文件 **************************
    # with open('ztb.json', 'a', encoding='utf-8') as f:
    #     for i in range(1,16):
    #         ret = tree.xpath('//ul[@id="showList"]/li['+str(i)+']/p/a/@href')[0]
    #         ret1 = tree.xpath('//ul[@id="showList"]/li['+str(i)+']/p/a/@title')[0]
    #         ret2 = tree.xpath('//ul[@id="showList"]/li['+str(i)+']/span[1]/text()')[0]
    #         # print(ret+ret1+ret2)
    #         dic = {'ret':ret,'ret1':ret1,'ret2':ret2}
    #         f.write(json.dumps(dic,indent=4,ensure_ascii=False)+',')
----------------------------------------------------------------------------------------------------------------------------------------------------
    #存 CSV 文件---导包 import csv  ***************************
    with open('ztb.csv', 'a', encoding='utf-8') as f:
        # delimiter=' '  必须是一个字符，一个空格,或者逗号
        # writer  俩个参数
        wr = csv.writer(f,delimiter=',')
        # writerow---先写入CSV文件，定义格式
        wr.writerow(['link','title','times'])
        for i in range(1,16):
            ret = tree.xpath('//ul[@id="showList"]/li['+str(i)+']/p/a/@href')[0]
            ret1 = tree.xpath('//ul[@id="showList"]/li['+str(i)+']/p/a/@title')[0]
            ret2 = tree.xpath('//ul[@id="showList"]/li['+str(i)+']/span[1]/text()')[0]
            # print(ret+ret1+ret2)
            
            wr.writerow([ret,ret1,ret2])

Python 存入三大文件

标签：bpa user utf-8 link false use agent ext print

原文地址：https://www.cnblogs.com/xinzaiyuan/p/12382209.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行