标签:
#!/usr/bin/python #Filename: readlinepy.py import sys,re urldir=r"C:\python27\a.txt" distone={} numTen=[] #先文档变成一个字典 f=open(urldir,‘r‘) for line in f.readlines(): #去掉非字符的符号 line = re.sub(‘\W‘," ",line) lineone=line.split() for keyone in lineone: if not distone.get(keyone): distone[keyone]=1 else: distone[keyone]+=1 f.close() #整理前10出现的单词的次数 for v in distone.values(): if v not in numTen: numTen.append(v) numTen.sort() numTen=numTen[-10:] #将字典转为列表,其其实这个可以不转。直接遍历也行,之前没有想到同时出现的怎么办。 distone = sorted(distone.iteritems(),key=lambda d:d[1],reverse = True ) #最后遍历,可能出现的次数一样的。 for i in distone: if i[1] in numTen: print i
标签:
原文地址:http://www.cnblogs.com/drgcaosheng/p/5073950.html