import requests import sys class Tieba(object): def __init__(self, tieba_name, pn): self.tieba_name = tieba_name self.base_url = ‘https://tieba.baidu.com/f?kw=%spn=‘ % (tieba_name) # print(self.base_url) self.url_list = [] for i in range(pn): url = self.base_url + str(i * 50) self.url_list.append(url) # print(self.url_list) self.headers = { ‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.89 Safari/537.36‘ } pass # 获取url对应的响应 def get_page(self, url): response = requests.get(url, headers=self.headers) return response.content # 保存数据 def save_content(self, content, index): filename = self.tieba_name + ‘_‘ + str(index) + ‘.html‘ with open(filename, ‘wb‘)as f: f.write(content) def run(self): # 构建基础url # 生成url列表 # 构建请求头 # 遍历url列表 for url in self.url_list: # 发送请求 content = self.get_page(url) # 保存响应 index = self.url_list.index(url) self.save_content(content, index) pass if __name__ == ‘__main__‘: name = sys.argv[1] pn = sys.argv[2] tieba = Tieba(name, int(pn)) tieba.run()
使用方法
python 代码所在文件名 美女 3