标签:class link 遍历 utf-8 auth one 初始 个数 event
# 爬取知乎上的热门话题,获取话题的问题、作者、答案,然后保存在TXT文本中
1 import requests 2 from pyquery import PyQuery 3 4 url = ‘https://www.zhihu.com/explore‘ 5 headers = { 6 ‘User-Agent‘: ‘Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) ‘ 7 ‘Chrome/58.0.3029.110 Safari/537.36‘ 8 } 9 html = requests.get(url, headers=headers).text 10 doc = PyQuery(html) 11 12 # 选择class="explore-tab"的内部class="feed-item"的所有节点,再遍历每一个元素(每个元素就是一个热门话题) 13 items = doc(‘.explore-tab .feed-item‘).items() 14 15 for item in items: 16 # 找到‘h2‘节点,获取知乎上热门话题的问题 17 question = item.find(‘h2‘).text() 18 19 # 选择class="author-link-line"的所有节点,获取文本值 20 author = item.find(‘.author-link-line‘).text() 21 22 # 找到属性值为content的节点,获取其内部文本值,再通过PyQuery()方法初始化获取文本信息 23 answer = PyQuery(item.find(‘.content‘).html()).text() 24 25 file = open(‘explore.txt‘, ‘a‘, encoding=‘utf-8‘) 26 file.write(‘\n‘.join([question, author, answer])) 27 file.write(‘\n‘ + ‘=‘ * 50 + ‘\n‘) 28 file.close()
运行结果生成一个TXT文件
其中涉及open()方法,具体查看open()官方文档
# 调用JSON库的loads()方法将JSON文本字符串转为JSON对象
1 import json 2 3 string = ‘‘‘ 4 [ 5 { 6 "name": "Lee Hua", 7 "gender": "male", 8 "birthday": "1997-10-25" 9 }, 10 { 11 "name": "Li Hua", 12 "gender": "female", 13 "birthday": "1998-10-25" 14 } 15 ] 16 ‘‘‘ 17 18 print(type(string), string, sep=‘\n‘) 19 20 data = json.loads(string) 21 print(type(data), data, sep=‘\n‘) 22 23 24 # 输出: 25 <class ‘str‘> 26 27 [ 28 { 29 "name": "Lee Hua", 30 "gender": "male", 31 "birthday": "1997-10-25" 32 }, 33 { 34 "name": "Li Hua", 35 "gender": "female", 36 "birthday": "1998-10-25" 37 } 38 ] 39 40 <class ‘list‘> 41 [{‘name‘: ‘Lee Hua‘, ‘gender‘: ‘male‘, ‘birthday‘: ‘1997-10-25‘}, {‘name‘: ‘Li Hua‘, ‘gender‘: ‘female‘, ‘birthday‘: ‘1998-10-25‘}]
# JSON数据需要用双引号来包围,不能用单引号。
# 通过dumps()方法将JSON对象转为文本字符串。
1 import json 2 3 data = [ 4 { 5 "name": "Lee Hua", 6 "gender": "male", 7 "birthday": "1997-10-25" 8 }, 9 { 10 "name": "李 华", 11 "gender": "男", 12 "birthday": "1998-10-25" 13 } 14 ] 15 16 17 with open(‘data.json‘, ‘w‘) as file: 18 file.write(json.dumps(data, indent=2)) 19 20 21 # 如果不存在data.json文件,运行结果则生成一个名为data.json的文件 22 23 # data.json文件内写入的内容: 24 [ 25 { 26 "name": "Lee Hua", 27 "gender": "male", 28 "birthday": "1997-10-25" 29 }, 30 { 31 "name": "\u674e \u534e", 32 "gender": "\u7537", 33 "birthday": "1998-10-25" 34 } 35 ] 36 37 38 # file.write(json.dumps(data, indent=2))中, indent参数代表缩进字符个数。 39 # 如果想要再写入的data.json文件中显示写入的中文,那么file.write(json.dumps(data, indent=2, ensure_ascii=False))即可
1 import csv 2 3 with open(‘data.csv‘, ‘w‘) as csvfile: 4 writer = csv.writer(csvfile) 5 writer.writerow([‘id‘, ‘name‘, ‘age‘]) 6 writer.writerow([‘10001‘, ‘Li Hua‘, ‘18‘]) 7 writer.writerow([‘10002‘, ‘Lee Hua‘, ‘19‘]) 8 writer.writerow([‘10003‘, ‘liyihua‘, ‘20‘]) 9 # 上面的内容也可以这样写入 10 # writer = csv.writer(csvfile) 11 # writer.writerow([‘id‘, ‘name‘, ‘age‘]) 12 # writer.writerow( 13 # [ 14 # [‘10001‘, ‘Li Hua‘, ‘18‘], 15 # [‘10002‘, ‘Lee Hua‘, ‘19‘], 16 # [‘10003‘, ‘liyihua‘, ‘20‘] 17 # ] 18 # ) 19 20 # 上面内容还可以这样写入 21 # field_names = [‘id‘, ‘name‘, ‘age‘] 22 # writer = csv.DictWriter(csvfile, fieldnames=field_names) 23 # writer.writerow({‘id‘: ‘10001‘, ‘name‘: ‘Li Hua‘, ‘age‘: ‘18‘}) 24 # writer.writerow({‘id‘: ‘10002‘, ‘name‘: ‘Lee Hua‘, ‘age‘: ‘19‘}) 25 # writer.writerow({‘id‘: ‘10003‘, ‘name‘: ‘liyihua‘, ‘age‘: ‘20‘}) 26 27 28 29 # 运行结果:生成一个data.csv文件,文件内容如下 30 id,name,age 31 10001,Li Hua,18 32 10002,Lee Hua,19 33 10003,liyihua,20 34 # 想修改行与列之间的分隔符,可以传入参数delimiter参数,writer = csv.writer(csvfile, delimiter=‘分隔符‘)
# 如果想要追加写入的话,可以修改文件的打开模式,即将open()函数的第二个参数改成a。如:open(‘file‘, ‘a‘)
# 写入时有时候会遇到编码错误问题,此时只要给open()函数指定编码形式就可以了。如:open(‘file‘, ‘r‘, encoding=‘utf-8‘)
标签:class link 遍历 utf-8 auth one 初始 个数 event
原文地址:https://www.cnblogs.com/liyihua/p/11173584.html