标签:reverse png str read else The 中英文 com for
#读取字符串str f = open(‘zz.txt‘,‘r‘,encoding=‘utf-8‘) strbig= f.read() f.close() sep =‘‘‘.,;:?!-_‘‘‘ for ch in sep: strbig = strbig.replace(ch,‘ ‘) strbig = strbig.lower() print(strbig) strlist = strbig.split() print(len(strlist),strlist) strset = set(strlist) exclude = {‘a‘,‘the‘,‘and‘,‘i‘,‘you‘} strset = strset-exclude print(len(strset),strset) strdict ={} for word in strset: strdict[word] = strlist.count(word) print(len(strdict),strdict) ccList = list(strdict.items()) print(ccList) ccList.sort(key=lambda x: x[1], reverse=True) print(ccList) for i in range(20): b = ccList[i] print(b)
fo = open(‘a.txt‘,‘r‘,encoding=‘utf-8‘) shz=fo.read() fo.close() print(shz) # jieba3种模式分词 import jieba print(list(jieba.cut(shz))) print(list(jieba.cut(shz,cut_all=True))) print(list(jieba.cut_for_search(shz))) s1=‘‘‘, 。 ; : - ! ? 、 “ ”‘‘‘ #标点符号转换成空格 for ch in s1: shz=shz.replace(ch,‘ ‘) print(shz) strlist=shz.split() print(len(strlist),strlist) #分解提取词语 wordsls=jieba.lcut(shz) wcdict={} for word in wordsls: #词为1的删除 if len(word)==1: continue else: wcdict[word]=wcdict.get(word,0)+1 print(wordsls) # # 按词频排序 shls=list(wcdict.items()) shls.sort(key=lambda x:x[1],reverse=True) print(shls) # # # 输出TOP15 for i in range(15): print(shls[i])
标签:reverse png str read else The 中英文 com for
原文地址:https://www.cnblogs.com/xsxsx/p/9809753.html