标签:[1] highlight 第四次 for http 处理 .com rev 计数
strHello=‘‘‘...‘‘‘.lower() fo = open(‘C:\\Users\Administrator\Desktop\hello.txt‘, ‘r‘, encoding=‘utf-8‘) hello = fo.read() fo.close() print(hello) sep = ‘‘‘,?‘‘‘ for ch in sep: strHello = strHello.replace(ch, ‘‘) strList = strHello.split() print(len(strList),strList) strSet = set(strList) exclude = {‘i‘, ‘in‘, ‘the‘‘anymore‘} strSet = strSet-exclude print(len(strSet),strSet) strDict = {} for hello in strSet: strDict[hello] = strList.count(hello) print(strDict.items()) wcList = list(strDict.items()) wcList.sort() print(strDict.items()) print(wcList[:20])
import jieba fo = open(‘C:\\Users\Administrator\Desktop\hi.txt‘,‘r‘,encoding=‘utf-8‘) strho = fo.read() #通过文件读取字符串 str fo.close() print(strho) #对文本进行预处理 sep = ‘,。?!;:‘’“” "" ‘ for sh in sep: strho = strho.replace(sh,‘‘) print(strho) #单词计数字典 set strset = set(strho) print(len(strset),strset) ##单词计数dict strDict={} for word in strset: strDict[word]=strho.count(word) print(len(strDict),strDict) #把字典转换为列表 hlouList = list(strDict.items()) print(hlouList) #按词频排序 list.sort(key=) hlouList.sort(key=lambda x: x[1], reverse=True) print(hlouList) #.输出TOP(20) for i in range(20): print(hlouList[i])
标签:[1] highlight 第四次 for http 处理 .com rev 计数
原文地址:https://www.cnblogs.com/liuyonghe/p/9789986.html