爬取百合网的数据链接:http://www.cnblogs.com/YuWeiXiF/p/8439552.html
总共爬了22779条数据。第一次接触matplotlib库,以下代码参考了matplotlib官方文档:https://matplotlib.org/users/index.html。
数据查询用到了两个方法:getSexNumber(@sex varchar(2),@income varchar(30))、gethousingNumber(@sex varchar(2),@housing varchar(6))来简化查询语句的长度,代码如下:
1 go
2 create function getSexNumber(@sex varchar(2),@income varchar(30))
3 returns int
4 as
5 begin
6 return(select count(id) from users where sex = @sex and income = @income)
7 end
8 go
9 go
10 create function gethousingNumber(@sex varchar(2),@housing varchar(6))
11 returns int
12 as
13 begin
14 return(select count(id) from users where sex = @sex and housing = @housing)
15 end
16 go
以下代码为SQL Server 数据库操作:
1 #__author: "YuWei"
2 #__date: 2018/2/11
3 import numpy as np
4 import matplotlib.pyplot as plt
5 import pymssql
6
7 def db(sql):
8 """
9 数据库相关操作
10
11 :param sql: sql语句
12 :return: 查询的结果集,list封装
13 """
14 conn = pymssql.connect(host=‘localhost‘, user=‘sa‘, password=‘123456c‘, database=‘Baihe‘, charset="utf8")
15 cur = conn.cursor()
16 cur.execute(sql)
17 row = cur.fetchone() # 指向结果集的第一行,
18 data = [] # 返回的list
19 while row:
20 rows = list(row)
21 for i in range(len(rows)): # 针对rows的每项编码
22 try:
23 rows[i] = rows[i].encode(‘latin-1‘).decode(‘gbk‘)
24 except AttributeError:pass
25 data.append(rows) # 向data加数据
26 row = cur.fetchone() #
27 print(data)
28 cur.close()
29 conn.close()
30 return data
生成各工资段人数占总人数比图:
1 def builder_income_ratio():
2 """
3 生成各工资段人数占总人数比图
4
5 :return: 无
6 """
7 data_list = db("select income,count(id) from users group by income")
8 income_data_list = [] # 数据
9 income_labels_list = [] # 图例
10 for data in data_list:
11 income_data_list.append(data[1])
12 income_labels_list.append(data[0])
13 income_data_list.remove(income_data_list[6]) # 删掉不要的数据
14 income_labels_list.remove(income_labels_list[6]) # 删掉不要的数据
15 # 画饼图
16 plt.pie(income_data_list,labels=income_labels_list,colors=[‘c‘,‘m‘,‘r‘,‘g‘],startangle=30,
17 shadow=True,explode=(0, 0, 0.1, 0, 0, 0, 0.1, 0, 0.1, 0, 0, 0),autopct=‘%.1f%%‘)
18 plt.title(‘各工资段人数占总人数比‘) # 标题
19 plt.show() # 显示
执行效果如下:
生成各工资段男,女人数图:
1 def builder_sex_ratio():
2 """
3 生成各工资段男,女人数图
4
5 :return: 无
6 """
7 data_list = db("select income,dbo.getSexNumber(‘男‘,income) as 男 ,dbo.getSexNumber(‘女‘,income) as 女 "
8 "from users group by income")
9 men = [] # 男
10 women = [] # 女
11 labels =[] # 图例
12 for data in data_list:
13 labels.append(data[0])
14 men.append(data[1])
15 women.append(data[2])
16 men.remove(men[6]) # 删掉不要的数据
17 women.remove(women[6]) # 删掉不要的数据
18 labels.remove(labels[6]) # 删掉不要的数据
19 max_line = 12 # 12个
20 fig,ax = plt.subplots()
21 line = np.arange(max_line) # [0,1,2,3,4,5,6,7,8,9,10,11]
22 bar_width = 0.4 # 条形之间的宽度
23 # 画条形图
24 ax.bar(line, men, bar_width,alpha=0.3, color=‘b‘,label=‘男‘)
25 ax.bar(line+bar_width, women, bar_width,alpha=0.3, color=‘r‘,label=‘女‘)
26 ax.set_xlabel(‘工资段‘)
27 ax.set_ylabel(‘人数‘)
28 ax.set_title(‘各工资段男,女人数图‘)
29 ax.set_xticks(line + bar_width / 2) # 保证条形居中
30 ax.set_xticklabels(labels)
31 # 画两条线
32 plt.plot([0.04, 1.04, 2.04, 3.04, 4.04, 5.04, 6.04, 7.04, 8.04, 9.04, 10.04, 11.04], men, label=‘男‘)
33 plt.plot([0.4, 1.4, 2.4, 3.4, 4.4, 5.4, 6.4, 7.4, 8.4, 9.4, 10.4, 11.4], women, label=‘女‘)
34 ax.legend()
35 fig.tight_layout()
36 # fig.savefig("1.png") # 生成图片
37 plt.show()
执行效果如下:
生成男,女平均身高图:
1 def builder_age_ratio():
2 """
3 生成男,女平均身高图
4
5 :return:
6 """
7 data_list = db("select sex,avg(height) as 平均升高 from users group by sex")
8 sex = [] # 性别
9 number = [] # 人数
10 for data in data_list:
11 sex.append(data[0])
12 number.append(data[1])
13 # 画条形图
14 plt.bar(sex[0], number[0], label="男", color=‘g‘,width=0.03)
15 plt.bar(sex[1], number[1], label="女", color=‘r‘,width=0.03)
16 plt.legend()
17 plt.xlabel(‘性别‘)
18 plt.ylabel(‘身高‘)
19 plt.title(‘男女平均身高图‘)
20 plt.show()
执行效果如下:
生成有房与无房的人数比例图:
1 def builder_housing_sum_ratio():
2 """
3 生成有房与无房的人数比例图
4
5 :return:
6 """
7 data_list = db("select housing,count(id) from users group by housing")
8 housing_data_list = []
9 housing_labels_list = []
10 for data in data_list:
11 housing_data_list.append(data[1])
12 housing_labels_list.append(data[0])
13 # 画饼图
14 plt.pie(housing_data_list, labels=housing_labels_list, colors=[‘g‘, ‘r‘], startangle=30,
15 shadow=True, explode=(0, 0), autopct=‘%.0f%%‘)
16 plt.title(‘有房与无房的人数比例图‘)
17 plt.show()
执行效果如下:
生成有无房男女人数图:
1 def builder_housing_ratio():
2 """
3 生成有无房男女人数图
4
5 :return:
6 """
7 data_list = db("select dbo.gethousing(‘女‘,housing),dbo.gethousing(‘男‘,housing) from users group by housing")
8 homey = [] # 有房
9 homem = [] # 无房
10 for data in data_list:
11 homey.append(data[0])
12 homem.append(data[1])
13 max_line = 2 # 两个
14 fig, ax = plt.subplots()
15 line = np.arange(max_line) # [0,1]
16 bar_width = 0.1 # 条形之间的宽度
17 # 画条形
18 ax.bar(line,homey , bar_width, alpha=0.3,color=‘b‘,label=‘女‘)
19 ax.bar(line+bar_width, homem, bar_width,alpha=0.3,color=‘r‘,label=‘男‘)
20 ax.set_xlabel(‘有无房‘)
21 ax.set_ylabel(‘人数‘)
22 ax.set_title(‘有无房男女人数图‘)
23 ax.set_xticks(line + bar_width / 2) # 保持居中
24 ax.set_xticklabels([‘有房‘,‘无房‘])
25 ax.legend()
26 fig.tight_layout()
27 plt.show()
执行效果如下: