标签:mat ext lam des eve list span top continue
import jieba path = ‘C:\\Users\\Administrator\\Desktop\\3国演义.txt‘ text = open(path,‘r‘,encoding=‘utf-8‘).read() words = jieba.lcut(text) excludes = [‘将军‘,‘却说‘,‘二人‘,‘不可‘,‘商议‘,‘天下‘,‘军士‘,‘次日‘,‘徐州‘,‘天子‘,‘如何‘,‘引兵‘,‘引军‘,‘军马‘,‘太守‘,‘朝廷‘,‘不能‘,‘诸侯‘,‘出马‘,‘太师‘] counts = {} for word in words: if len(word) == 1: continue elif word == ‘诸葛亮‘or word == ‘孔明曰‘: rword = ‘孔明‘ elif word == ‘玄德‘or word == ‘玄德曰‘: rword = ‘刘备‘ elif word == ‘孟德‘or word == ‘丞相‘: rword = ‘曹操‘ elif word == ‘关公‘or word == ‘云长‘: rword = ‘关羽‘ else: rword = word counts[rword] = counts.get(rword,0) + 1 for word in excludes: del counts[word] items = list(counts.items()) items.sort(key = lambda x:x[1],reverse=True) for i in range(15): word,count = items[i] print("{0:<10}{1:>5}".format(word,count))
标签:mat ext lam des eve list span top continue
原文地址:https://www.cnblogs.com/Glzt/p/12651325.html