标签:generate plot 空格 ams txt nump code class inf
from PIL import Image import wordcloud import numpy as np import matplotlib.pyplot as plt import jieba #文本处理 def f(): with open(‘停用词.txt‘,‘r‘,encoding=‘utf-8‘) as fx: a,b=fx.readlines(),[] for i in a: i=i.strip(‘\n‘).strip(‘ ‘)#第二个 strip 用于把停用词后面的空格去掉 b.append(i) with open(‘李尔王.txt‘,‘r‘,encoding=‘utf-8‘) as fo: c=[] for i in fo: i=fo.readline() fo1=i.strip(‘\n‘) fo1=jieba.lcut(fo1) for j in fo1: if len(j) !=1: if j not in b: c.append(j) return c def g(n): # n 为列表 a,c,d={},[],[] for i in n: a[i]=a.get(i,0)+1 b=list(a.items()) b.sort(key=lambda x:x[1],reverse=True) for i in range(15): e,f=b[i] c.append(e)#向列表加入词 d.append(f)#向列表加入词频 print(‘{: <10}{:>10}‘.format(e,f)) return list([c,d]) def h(n):# n 为词频列表 a=n plt.rcParams[‘font.sans-serif‘]=[‘simHei‘] plt.rcParams[‘axes.unicode_minus‘]=False name_list =a[0]#词 num_list =a[1]#词频 plt.bar(range(len(num_list)), num_list,tick_label=name_list,fc=‘r‘) plt.show() def k(n):# n 为词语列表 a=‘ ‘.join(n) mask=np.array(Image.open(‘皮卡丘.jpg‘)) # 图片模板 b= wordcloud.WordCloud(font_path =‘SIMYOU.TTF‘ , scale=13, max_words=2500, mask = mask, height= 800, width=800, background_color=‘white‘, repeat=False, mode=‘RGBA‘)#处理图片 b=b.generate(a)#填充词生成词云 b.to_file(‘词云2.png‘) a=f()#生成词语 b=g(a)#词频统计,返回两列表 #h(b)#词频可视化 k(a)
标签:generate plot 空格 ams txt nump code class inf
原文地址:https://www.cnblogs.com/35312020003o/p/14711374.html