import jieba f = open(‘article.txt‘,‘r‘,encoding=‘utf-8‘) text = f.read() f.close() str = ‘‘‘一!“”,。?;’"‘,.、:\n‘‘‘ for s in str: text = text.replace(s,‘ ‘) wordlist = list(jieba.cut(text)) exclude = {‘你‘,‘你们‘,‘的‘,‘他‘,‘了‘,‘她‘,‘是‘,‘在‘,‘—‘,‘他们‘,‘着‘,‘把‘,‘不‘,‘也‘,‘我‘,‘人‘,‘而‘, ‘与‘,‘有‘,‘可是‘,‘自己‘,‘就‘,‘又‘,‘什么‘,‘和‘,‘一个‘,‘ ‘,‘呢‘,‘很‘,‘象‘,‘一点‘,‘都‘,‘去‘, ‘没有‘,‘个‘,‘上‘,‘给‘,‘来‘,‘还‘,‘到‘,‘这‘,‘要‘,‘不是‘,‘得‘,‘但是‘,‘已经‘,‘那么‘,‘只‘,‘因为‘,} set = set(wordlist) - exclude dict = {} for key in set: dict[key]=wordlist.count(key) dictlist = list(dict.items()) dictlist.sort(key=lambda x: x[1], reverse=True) for i in range(20): print(dictlist[i])
结果截图: