第一次python词云尝试

时间：2018-08-09 19:22:30 阅读：196 评论：0 收藏：0 [点我收藏+]

标签：name cannot 背景词云 lov pat can dcl bubuko

1 分析英文文本

 1 from wordcloud import WordCloud
 2 import os
 3 
 4 cur_path = os.path.dirname(__file__)
 5 
 6 with open(os.path.join(cur_path, ‘love_en.txt‘)) as fp:
 7     txt = fp.read()
 8     # print(txt)
 9     wordcloud = WordCloud().generate(txt)
10     image = wordcloud.to_image()
11     image.show()

发生错误，错误类型:OSError: cannot open resource

解决方案：添加字体

改正后代码：

 1 from wordcloud import WordCloud
 2 import os
 3 
 4 cur_path = os.path.dirname(__file__)
 5 
 6 with open(os.path.join(cur_path, ‘love_en.txt‘)) as fp:
 7     txt = fp.read()
 8     # print(txt)
 9     wordcloud = WordCloud(font_path = ‘FZLTXIHK.TTF‘).generate(txt)
10     image = wordcloud.to_image()
11     image.show()

进一步优化代码：

 1 from wordcloud import WordCloud
 2 import os
 3 
 4 cur_path = os.path.dirname(__file__)
 5 
 6 with open(os.path.join(cur_path, ‘love_en.txt‘)) as fp:
 7     txt = fp.read()
 8     # print(txt)
 9     wordcloud = WordCloud(font_path = ‘FZLTXIHK.TTF‘, # 字体
10                           background_color = ‘black‘, # 背景色
11                           max_words = 30, # 最大显示单词数
12                           max_font_size = 60 # 频率最大单词字体大小
13                         ).generate(txt)
14     image = wordcloud.to_image()
15     image.show()

效果图：技术分享图片

2 分析中文文本

 1 import jieba
 2 from wordcloud import WordCloud
 3 import os
 4 
 5 cur_path = os.path.dirname(__file__)
 6 
 7 def chinese_jieba(txt):
 8     wordlist_jieba = jieba.cut(txt) # 将文本分割，返回列表
 9     txt_jieba = " ".join(wordlist_jieba) # 将列表拼接为以空格为间断的字符串
10     return txt_jieba
11 
12 stopwords = {‘这些‘:0, ‘那些‘:0, ‘因为‘:0, ‘所以‘:0} # 噪声词
13 
14 with open(os.path.join(cur_path, ‘择天记.txt‘)) as fp:
15     txt = fp.read()
16     txt = chinese_jieba(txt)
17     # print(txt)
18     wordcloud = WordCloud(font_path = ‘FZLTXIHK.TTF‘, # 字体
19                           background_color = ‘black‘, # 背景色
20                           max_words = 30, # 最大显示单词数
21                           max_font_size = 60, # 频率最大单词字体大小
22                           stopwords = stopwords # 过滤噪声词
23                         ).generate(txt)
24     image = wordcloud.to_image()
25     image.show()

效果图：技术分享图片

3 进一步优化显示效果

 1 import jieba
 2 from wordcloud import WordCloud
 3 import os
 4 import numpy
 5 import PIL.Image as Image
 6 
 7 cur_path = os.path.dirname(__file__)
 8 
 9 def chinese_jieba(txt):
10     wordlist_jieba = jieba.cut(txt) # 将文本分割，返回列表
11     txt_jieba = " ".join(wordlist_jieba) # 将列表拼接为以空格为间断的字符串
12     return txt_jieba
13 
14 stopwords = {‘这些‘:0, ‘那些‘:0, ‘因为‘:0, ‘所以‘:0} # 噪声词
15 mask_pic = numpy.array(Image.open(os.path.join(cur_path, ‘love.jpg‘)))
16 
17 with open(os.path.join(cur_path, ‘择天记.txt‘)) as fp:
18     txt = fp.read()
19     txt = chinese_jieba(txt)
20     # print(txt)
21     wordcloud = WordCloud(font_path = ‘FZLTXIHK.TTF‘, # 字体
22                           background_color = ‘white‘, # 背景色
23                           max_words = 100, # 最大显示单词数
24                           max_font_size = 60, # 频率最大单词字体大小
25                           stopwords = stopwords, # 过滤噪声词
26                           mask = mask_pic # 自定义显示的效果图
27                         ).generate(txt)
28     image = wordcloud.to_image()
29     image.show()

效果图：技术分享图片

第一次python词云尝试

标签：name cannot 背景词云 lov pat can dcl bubuko

原文地址：https://www.cnblogs.com/jiangchengzi93812/p/9450455.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行