年纪大了跟不上啊,哈哈
# !/usr/env pythen
# -*- coding:utf-8 -*-
"""
分析
import string
"""
# 文件操作 读、写、二进制
# fb=open(‘emma.txt‘,‘r‘)
# content=fb.read()
# bp.close()
with open(‘emma.txt‘,‘r‘)as fb
content = fb.read()
# print(content)
# res=‘a b c‘.split()
# 清洗数据,把标点符号去掉
content=content.replace(‘-‘,‘‘)
word_list=content.split()
for i in string.punctuation:
content=content.replace(i,‘‘)
# 得到了单词的列表
# print(string.punctuation)
# print(word_list[:100])
# content.split()
word_list=content.split()
# 统计每个单词的频率
data={}
for word in word_list
word=word.lower() # 规范格式为小写
if word in data:
data[word]+=1
else:
data[word]=1
#data[word] = data.get(word,0)+1
hist=[]
# 排序
for key,vars in data.items()
this.append([value,key])
hist.sort(reverse=true)
print(hist) # 倒序
# 画图 matplotlib
#print(hist)
for item in hist[:10]
plt.bara((item[1]),(item[0]))
plt.legend()
plt.xlabel(‘word‘)
plt.ylabel(‘rate‘)
plt.title(‘‘)
plt.show