f = open("peng.txt", "r", encoding=‘utf-8‘) song = f.read() f.close() sep = ‘‘‘,.?—!"‘‘‘ exclude = {‘the‘, ‘and‘, ‘i‘, ‘in‘, "i‘m", ‘a‘, ‘of‘, ‘an‘, ‘on‘, ‘to‘, ‘with‘} for c in sep: song = song.replace(c, ‘ ‘) swl = song.lower().split() swd = {} sws = set(swl) - exclude for w in sws: swd[w] = swl.count(w) fl = list(swd.items()) fl.sort(key=lambda x: x[1], reverse=True) for i in fl: print(i) f = open("result.txt", "w") for i in range(20): f.write(fl[i][0] + " " + str(fl[i][1]) + "\n") f.close()
import jieba f = open(‘weicheng.txt‘, ‘r‘, encoding=‘utf-8‘) text = f.read() f.close() p = ‘‘‘,。‘’“”:;()!?、 ‘‘‘ a = { ‘的‘, ‘\n‘, ‘\u3000‘, ‘曰‘, ‘之‘, ‘不‘, ‘人‘, ‘一‘, ‘大‘, ‘马‘, ‘来‘, ‘有‘, ‘于‘, ‘下‘, ‘此‘, } for i in p: text = text.replace(i, ‘‘) print(list(jieba.cut(text))) t = list(jieba.lcut(text)) print(t) count = {} wl = list(set(t) - a) print(wl) for i in range(0, len(wl)): count[wl[i]] = text.count(str(wl[i])) cl = list(count.items()) cl.sort(key=lambda x: x[1], reverse=True) print(cl) f = open(‘wcCount.txt‘, ‘a‘) for i in range(20): f.write(cl[i][0] + ‘:‘ + str(cl[i][1]) + ‘\n‘) f.close()