码迷,mamicode.com
首页 > 其他好文 > 详细

获取某小区二手房信息并发送邮箱

时间:2019-04-23 20:44:02      阅读:112      评论:0      收藏:0      [点我收藏+]

标签:als   关注   pandas   []   onclick   except   +=   strip   unit   

1.数据库:mysql

2.库:requests、BeautifulSoup、pandas、smtplib、mysql.connector

db.py:

config = {
    host:127.0.0.1,
    user:‘****,
    passwd:‘****,
    database:houseinfo
}


url.json
{
  "url": "https://bj.*****.com/ershoufang",
  "urls": [
    "/bp0ep100000ba0ea20000l3c1111027378232/?sug=%E5%88%A9%E6%B3%BD%E8%A5%BF%E5%9B%AD&noStatic=1",
    "/bp0ep100000ba0ea20000l3c1111027378505/?sug=%E5%8D%97%E6%B9%96%E4%B8%9C%E5%9B%AD%E4%B8%80%E5%8C%BA"
  ]
}
主程序:
技术图片
  1 #!/usr/bin/python
  2 # coding=utf-8
  3 
  4 import json
  5 import time
  6 import requests
  7 from bs4 import BeautifulSoup
  8 import mysql.connector
  9 import smtplib
 10 from email.mime.text import MIMEText
 11 from email.header import Header
 12 import mysql
 13 import pandas as pd
 14 import pachong.db
 15 import unittest
 16 
 17 # 从网络上获取数据,存入数据库
 18 def savehouseinfo():
 19     # 从文件中获取url
 20     with open(url.json, r) as f:
 21         data = json.load(f)
 22         # 获取url
 23         url1 = data[url]
 24         # 获取小区对应URl
 25         url_2 = data[urls]
 26 
 27     # 连接数据库
 28     config = pachong.db.config
 29     mydb = mysql.connector.connect(**config)
 30     mycursor = mydb.cursor()
 31 
 32     # 清空表中数据
 33     sql = "delete from houseInfo;"
 34     mycursor.execute(sql)
 35 
 36     # 访问url,获取想要数据
 37     for u in url_2:
 38         url = url1 + u
 39         print(url)
 40         headers = {User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36
 41                                  (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36}
 42 
 43         source = requests.get(url, headers=headers).text.replace(<span class="divide">/</span>, ;) 44             .replace(</a>, ;</a>)
 45         source = source.replace(<span>, <span>;).replace(</span>, ;</span>)
 46         soup = BeautifulSoup(source, html.parser)
 47         # print(soup)
 48         # print(soup)
 49         # 获取房型对应的信息
 50         divs = soup.find_all(attrs={class: info clear})
 51 
 52         # 将数据存入数据库
 53         # print(divs)
 54         for i in divs:
 55             # print(i)
 56 
 57             # soup = BeautifulSoup(i.tostring(), ‘html.parser‘)
 58             # texts = i.get_text().replace(‘随时看房;‘, ‘‘).replace(‘新上;‘, ‘‘).replace(‘VR房源;‘, ‘‘).replace(‘房主自荐;‘, ‘‘)
 59             # print(texts)
 60             # houseInfos = texts.split(‘;‘)
 61             # 房型名称
 62             title = i.find(class_=title).a.get_text().strip(;)
 63             # title = houseInfos[0]
 64             # # 小区名称
 65             houseInfo = i.find(class_=houseInfo).get_text().split(;)
 66             partName = houseInfo[0].strip(;)
 67             # partName = houseInfos[1]
 68             # # 房型
 69             ttype = houseInfo[2].strip(;)
 70             # ttype = houseInfos[3]
 71             # # 面积
 72             area = houseInfo[3].strip(;)
 73             # area = houseInfos[4]
 74             # # 朝向
 75             orientations = houseInfo[4].strip(;)
 76             # orientations = houseInfos[5]
 77             # # 装修
 78             decorate = houseInfo[5].strip(;)
 79             # decorate = houseInfos[6]
 80             # # 电梯楼层
 81             elevator = houseInfo[6].strip(;)
 82             positionInfo = i.find(class_=positionInfo).get_text().split(;)
 83             floor = elevator + positionInfo[0].strip(;)
 84             # floor = houseInfos[7]
 85             # # 建房年份
 86             buildYear = positionInfo[1].strip(;)
 87             # buildYear = houseInfos[8]
 88             # # 地区
 89             address = i.find(class_=positionInfo).a.get_text().strip(;)
 90             # address = houseInfos[9]
 91             # # 关注人数
 92             followInfos = i.find(class_=followInfo).get_text(/).split(/)
 93             care = followInfos[0]
 94             # care = houseInfos[10]
 95             # # 地铁
 96             subway = i.find(class_=subway).get_text().strip(;)
 97             # subway = houseInfos[11]
 98             # # 房本
 99             if(i.find(class_=five)):
100                 book = i.find(class_=five).get_text().strip(;)
101             elif(i.find(class_=taxfree)):
102                 book = i.find(class_=taxfree).get_text().strip(;)
103             else:
104                 book = ‘‘
105             # book = houseInfos[12]
106             # # 总价
107             total = i.find(class_=totalPrice).span.get_text().strip(;)+
108             # total = houseInfos[14]
109             # # 单价
110             price = i.find(class_=unitPrice).span.get_text().strip(;)
111             # price = houseInfos[16]
112             # print(price)
113             mycursor = mydb.cursor()
114             # 插入数据
115             sql = "INSERT INTO houseInfo (title, partName, ttype, area, orientations, " 116                   "decorate, floor, buildYear,address, care, subway, book, total, price) " 117                   "VALUES (%s, %s, %s, %s," 118                 " %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);"
119             val = (title, partName, ttype, area, orientations, decorate, floor, buildYear,
120                    address, care, subway, book, total, price)
121             mycursor.execute(sql, val)
122         mydb.commit()  # 数据表内容有更新,必须使用到该语句
123         mycursor.close()
124         mydb.close
125 
126 # 从数据库中获取数据
127 def gethouseinfo():
128     config = pachong.db.config
129     mydb = mysql.connector.connect(**config)
130     mycursor = mydb.cursor()
131     sql = ‘‘‘select id,partName,title,ttype, area, total, price, orientations, decorate, floor, buildYear,
132                    address, care, subway, book from houseInfo;‘‘‘
133     ids = []
134     partnames = []
135     names = []
136     ttypes = []
137     areas = []
138     totals = []
139     prices =[]
140     orientationses = []
141     decorates = []
142     floors = []
143     buildYears = []
144     addresses = []
145     cares = []
146     subways = []
147     books = []
148     try:
149         mycursor.execute(sql)
150         table = mycursor.fetchall()
151         for t in table:
152             ids.append(t[0])
153             partnames.append(t[1])
154             names.append(t[2])
155             ttypes.append(t[3])
156             areas.append(t[4])
157             totals.append(t[5])
158             prices.append(t[6])
159             orientationses.append(t[7])
160             decorates.append(t[8])
161             floors.append(t[9])
162             buildYears.append(t[10])
163             addresses.append(t[11])
164             cares.append(t[12])
165             subways.append(t[13])
166             books.append(t[14])
167         results = (ids, partnames, names, ttypes, areas, totals, books,
168                    prices, orientationses, decorates, floors, buildYears, subways, addresses, cares)
169         return results
170     except:
171         print("Error:unable to facth data")
172     mydb.close()
173 
174 def highlight():
175     return [background-color: #FF0000]
176 
177 # 将从数据库中拿到的数据转成html
178 def convert_to_html():
179     result = gethouseinfo()
180     titles = [编号, 小区名称, 出售房名称, 房型, 面积, 总价, 房本,
181               单价, 朝向, 装修, 楼层, 建造年份, 地铁, 地址, 关注度]
182     d = {}
183     index = 0
184     for r in result:
185         t = titles[index]
186         d[t] = r
187         index += 1
188     # print(d)
189     df = pd.DataFrame(d, columns=titles)
190     pd.set_option(max_colwidth, 500)
191     # print(df)
192     h = df.to_html()
193     df.style.apply(highlight)
194     # print(h)
195     return h
196 
197 # 发送邮件
198 def sendmail():
199     # 第三方 SMTP 服务
200     mail_host = "smtp.163.com"  # 设置服务器
201     mail_user = "******"  # 用户名
202     mail_pass = "*****"  # 口令
203 
204     sender = ****@163.com
205     receivers = [****@163.com]  # 接收邮件,可设置为你的QQ邮箱或者其他邮箱
206 
207     h = convert_to_html()
208     message = MIMEText(h, _subtype=html, _charset=utf-8)
209     message[From] = Header("***", utf-8)
210     message[To] = Header(***, utf-8)
211 
212     t = time.strftime(%Y-%m-%d %H:%M:%S, time.localtime())
213     subject = 二手房_+t
214     message[Subject] = Header(subject, utf-8)
215 
216 
217     try:
218         smtpObj = smtplib.SMTP()
219         smtpObj.connect(mail_host, 25)  # 25 为 SMTP 端口号
220         smtpObj.login(mail_user, mail_pass)
221         smtpObj.sendmail(sender, receivers, message.as_string())
222         print("邮件发送成功")
223     except smtplib.SMTPException as e:
224         print(e)
View Code

 



获取某小区二手房信息并发送邮箱

标签:als   关注   pandas   []   onclick   except   +=   strip   unit   

原文地址:https://www.cnblogs.com/qixiafeng/p/10758582.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!