标签:on() post 2.0 sleep stat file flush run lin
import requests, json, time, tablib def send_ajax_request(data: dict): try: ajax_response = session.post(url=ajax_url, params={"needAddtionalResult": "false", "city": city}, data=data, headers=ajax_headers, timeout=timeout) if ajax_response.status_code == 200: return ajax_response.json() return {} except Exception: return {} def get_job_info(info_dic: dict): jobInfoMap = info_dic.get("content").get("positionResult").get("result") for jobInfoDict in jobInfoMap: dic = {} dic["companyId"] = jobInfoDict.get("companyId") dic["companyFullName"] = jobInfoDict.get("companyFullName") dic["positionName"] = jobInfoDict.get("positionName") dic["workYear"] = jobInfoDict.get("workYear") dic["education"] = jobInfoDict.get("education") dic["salary"] = jobInfoDict.get("salary") dic["jobNature"] = jobInfoDict.get("jobNature") dic["companySize"] = jobInfoDict.get("companySize") dic["city"] = jobInfoDict.get("city") dic["district"] = jobInfoDict.get("district") dic["createTime"] = jobInfoDict.get("createTime") if is_save_txtfile: yield json.dumps(dic, ensure_ascii=False) else: yield dic.values() def save_to_file(json_data): for data in json_data: f.write(data + "\n") def save_to_excel(list_data): for line in list_data: dataset.append(line) def run(): for i in range(1, 31): data = { "first": "false", "pn": i, "kd": "python" } info_dic = send_ajax_request(data) data = get_job_info(info_dic) if is_save_txtfile: save_to_file(data) else: save_to_excel(data) print("正在保存数据") time.sleep(sleeptime) if __name__ == ‘__main__‘: session = requests.Session() job_name = "python" city = "成都" timeout = 5 sleeptime = 10 doc_url = "https://www.lagou.com/jobs/list_{job_name}".format(job_name=job_name) session.headers[ "User-Agent"] = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36" session.headers["Host"] = "www.lagou.com" doc_response = session.get(url=doc_url, params={"city": city}) ajax_headers = { "Origin": "https://www.lagou.com", "Referer": doc_response.url } ajax_url = "https://www.lagou.com/jobs/positionAjax.json?=false" is_save_txtfile = False if not is_save_txtfile: dataset = tablib.Dataset() dataset.headers = ["companyId", "companyFullName", "positionName", "workYear", "education", "salary", "jobNature", "companySize", "city", "district", "createTime"] f = open("jobinfo.txt", "a", encoding="utf-8") try: run() except Exception: print(‘出错了‘) finally: if is_save_txtfile: f.close() else: with open("jobInfo.xls", "wb") as f: f.write(dataset.xls) f.flush()
标签:on() post 2.0 sleep stat file flush run lin
原文地址:https://www.cnblogs.com/zhuchunyu/p/10765945.html