标签:mat 词云 readline generate code rip rgba numpy 保存
from PIL import Image import wordcloud import numpy as np import matplotlib.pyplot as plt import jieba #文本处理 def f(): with open(‘停用词.txt‘,‘r‘,encoding=‘utf-8‘) as fx: a,b=fx.readlines(),[] for i in a: i=i.strip(‘\n‘).strip(‘ ‘)#第二个 strip 用于把停用词后面的空格去掉 b.append(i) with open(‘余罪.txt‘,‘r‘,encoding=‘utf-8‘) as fo: c=[] for i in fo: i=fo.readline() fo1=i.strip(‘\n‘) fo1=jieba.lcut(fo1) for j in fo1: if len(j) !=1: if j not in b: c.append(j) return c def g(n): # n 为列表 a,c,d={},[],[] for i in n: a[i]=a.get(i,0)+1 b=list(a.items()) b.sort(key=lambda x:x[1],reverse=True) for i in range(15): e,f=b[i] c.append(e)#向列表加入词 d.append(f)#向列表加入词频 print(‘{: <10}{:>10}‘.format(e,f)) return list([c,d]) def k(n):# n 为词语列表 a=‘ ‘.join(n) mask=np.array(Image.open(‘图.jpg‘)) # 图片模板 b= wordcloud.WordCloud(font_path =‘SIMYOU.TTF‘ , scale=20, max_words=6000, mask = mask, height= 800, width=800, background_color=‘white‘, repeat=False, mode=‘RGBA‘)#处理图片 b=b.generate(a)#填充词生成词云 b.to_file(‘词云.png‘)#保存图片 a=f()#生成词语 b=g(a)#词频统计,返回两列表 k(a)
标签:mat 词云 readline generate code rip rgba numpy 保存
原文地址:https://www.cnblogs.com/PLla/p/14711369.html