标签:use ODB rom code show 本地 bsp nec tin
链接mysql做了个小练习:爬取 http://wufazhuce.com 上的问题,描述和答案,存到本地的数据库里。
数据表结构:
CREATE TABLE `questions` ( `title` varchar(2000) DEFAULT NULL, `description` varchar(200) DEFAULT NULL, `answers` varchar(2000) DEFAULT NULL, `url` varchar(2000) DEFAULT NULL, `daynum` varchar(20) DEFAULT NULL ) ENGINE=InnoDB DEFAULT CHARSET=utf8
代码:
# author: import pymysql.cursors import requests from bs4 import BeautifulSoup con = pymysql.connect(host=‘192.168.86.130‘, user=‘root‘, password=‘letmein‘, db=‘0603simon‘, port=3306, charset=‘utf8‘) # with con.cursor() as cur: # sql = ‘show tables‘ # result = cur.execute(sql) # print(result) # exit() cur = con.cursor() for p_num in range(1, 1872): # for p_num in range(8,9): url = ‘http://wufazhuce.com/question/%s‘ % p_num response = requests.get(url=url) response.encoding = response.apparent_encoding soup = BeautifulSoup(response.text, features="html.parser") # print(soup) tar = soup.find(‘div‘, class_=‘one-cuestion‘) # print(tar) if not tar: print(‘not tar‘) sql = ‘‘‘insert into questions (title,description,answers,url,daynum) values(‘404‘,‘404‘,‘404‘,‘%s‘,‘%s‘)‘‘‘ % ( url, p_num) print(sql) result = cur.execute(sql) con.commit() print(‘执行结果:‘ + str(result)) continue title = tar.find(‘h4‘).text.strip() # print (title) # exit() desc = soup.find(‘div‘, class_=‘cuestion-contenido‘).text.strip() # print(desc) ans = soup.find_all(‘div‘, class_=‘cuestion-contenido‘) for index, answer in enumerate(ans): if index == 0: continue # print(answer.text.strip()) answer = answer.text.strip() # print(url) if answer.__len__() > 1800: answer = answer[0:1800] sql = ‘‘‘insert into questions (title,description,answers,url,daynum) values(%s,%s,%s,‘%s‘,‘%s‘)‘‘‘ % ( con.escape(title), con.escape(desc), con.escape(answer), url, p_num) print(sql) result = cur.execute(sql) con.commit() print(‘执行结果:‘ + str(result)) cur.close() con.close()
标签:use ODB rom code show 本地 bsp nec tin
原文地址:https://www.cnblogs.com/Simonsun002/p/9152944.html