标签:get style 内容 迭代 openpyxl parse create == 编码
# -*- coding: utf-8 -*- """ Created on Mon Sep 9 10:07:50 2019 @author: Administrator """ import os import re import traceback import requests import openpyxl from bs4 import BeautifulSoup #from write_excel import OpenpyxlExcel from write_excel import OprationExcel url = "http://xkz.cbirc.gov.cn/ilicence/showLicenceInfo.do?state=3&id=00000001" root = "d:/pictures//" path = root + "xkz.html" r = requests.get(url) #r.coding = "utf-8" html = r.text soup = BeautifulSoup(html, "html.parser") # 每条信息的key info_key_list = [] # 每个key的value info_value_list = [] # 将每行信息以字典形式存储的列表 json格式 info_list = [] lsh_values_list = [] jgname_values_list = [] jgcode_values_list = [] # 查找的标签中,筛选多条属性时,可用如下方法 # for i in soup.find_all("td", attrs={"align":"right", "height": "25"}): #print(soup.find("title").string) try: if not soup.find("title").string == "出错啦!": for tr in soup.find_all("tr", "a0"): if tr.find("td", {"align": "right"}): for td in tr.find("td", {"align": "right"}): info_key = td.string.split()[0].split(":")[0] info_key_list.append(info_key) # 由于html中有空格、换行等空字符,这里要用next_siblings平行迭代并判断出非空,再输出 # 但是注意,有些信息的value可能是空的,这里要判断出value为空的key,并把value改为“--” for td in tr.find("td", {"align": "right"}).next_siblings: # 判断出bs4中的tag类型,并判断出tag内string为空的内容 if type(td).__name__ == ‘Tag‘ and len(td.string.split()) == 0: info_value = "--" info_value_list.append(info_value) # elif not (td.string is None) and (len(td.string.split()) > 0): elif td.string and (len(td.string.split()) > 0): info_value = (td.string.split()[0]) info_value_list.append(info_value) else: continue # print(len(info_key_list)) # print((info_value_list)) for i in range(len(info_key_list)): info_list.append({info_key_list[i]: info_value_list[i]}) # print(info_list) # for num in range(len(info_list)): # print(num, info_list[num]) # excel_1 = OpenpyxlExcel("test.xlsx") # excel_1.openpyxl_write(info_list) excel1 = OprationExcel("xkz1.xls") excel1.excel_write() excel1.excel_append(lsh_values_list, 1) excel1.excel_append(jgname_values_list, 2) excel1.excel_append(jgcode_values_list, 3) excel1.excel_read(1) excel1.excel_read(2) excel1.excel_read(3) # excel1.excel_append(info_list[1]["机构名称"]) # excel1.excel_append(info_list[0]["机构编码"]) else: print("查无结果") except: traceback.print_exc()
标签:get style 内容 迭代 openpyxl parse create == 编码
原文地址:https://www.cnblogs.com/zwy-blog/p/11557762.html