标签:rom 报错 rip final except __name__ str http agent
# -*- coding: utf-8 -*- """ Created on Sat Jun 2 15:56:29 2018 @author: Administrator """ from bs4 import BeautifulSoup import requests import time #导入相应的库文件 import pymysql #导入mysql import json headers = { ‘User-Agent‘:‘Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like 07 Gecko) Chrome/53.0.2785.143 Safari/537.36‘ } #加入请求头 def get_info(url): #定义获取网页信息的函数 wb_data = requests.get(url,headers=headers) soup = BeautifulSoup(wb_data.text,‘lxml‘) cityname = soup.select(‘h1‘) #城市名称 averPrice = soup.select(‘div.columnbox‘) #平均房价 #averSumPrice = soup.select(‘div.city-price clearfix > div.u_area > ul.u_list > li > span.u_list_num‘) #平均总价 for cityname,averPrice in zip(cityname,averPrice): data = { ‘cityname‘:cityname.get_text().strip(), ‘averPrice‘:averPrice.get_text().strip() } #解析Json json_str = json.dumps(data) print ("Python 原始数据:", repr(data)) print ("JSON 对象:", json_str) # 将 JSON 对象转换为 Python 字典 data2 = json.loads(json_str) print ("data2[‘cityname‘]: ", data2[‘cityname‘]) db=conn() #插入数据 #sql_insert = """insert into house(cityName) values(‘许昌房价‘)""" sql_insert ="""insert into house(cityName,totalProduction) values(‘{}‘,‘{}‘)""".format(data2[‘cityname‘],data2[‘averPrice‘]) print(sql_insert) # 使用cursor()方法获取操作游标 cursor = db.cursor() try: # 执行sql语句 cursor.execute(sql_insert) # 提交到数据库执行 db.commit() except: # 如果发生错误则回滚 db.rollback() # 关闭数据库连接 db.close() #执行连接 def conn(): #打开数据库连接 db= pymysql.connect(host="localhost",user="root", password="root",db="houseprice",port=3306,charset=‘utf8‘) return db #执行sql def mysqlExcuit(db,sql): # 使用cursor()方法获取操作游标 cur = db.cursor() try: cur.execute(sql) #提交 db.commit() except Exception as e: #错误回滚 db.rollback() finally: db.close() if __name__ == ‘__main__‘: #为程序的主入口 #http://bj.xiaozhu.com/search-duanzufang-p{}-0/ cities = [‘np‘,‘gy‘,‘hk‘,‘yc‘,‘xn‘,‘zz‘,‘xc‘,‘tl‘]#南平、贵阳、海口、银川、西宁、枣庄、许昌、通辽 urls = [‘http://www.creprice.cn/market/{}/forsale/ALL/11.html‘.format(city) for city in cities] #构造多页url for single_url in urls: get_info(single_url) #循环调用get_links函数 time.sleep(2) #睡眠2秒
插入中文要注意字符集的问题,否则插入会失败
端口要开启,网上很多端口设置的都不是3306端口,要改成自己在mysql.ini里面设置的端口,否则会报错“计算机无法积极响应”
#执行连接 def conn(): #打开数据库连接 db= pymysql.connect(host="localhost",user="root", password="root",db="houseprice",port=3306,charset=‘utf8‘) return db
标签:rom 报错 rip final except __name__ str http agent
原文地址:https://www.cnblogs.com/Erma/p/9211182.html