标签:
#!/usr/bin/env python import random # ‘abc..z‘ alphaStr = "".join(map(chr, range(97,123))) fp = open("word.txt", "w") maxIter = 100000 for i in range(maxIter): word = "" len =random.randint(1,5) for j in range(len): word + = alphaStr[random.randint(0,25)] fp.write(word + ‘\n‘) fp.close() cat word.txt | ./wordcount_mapper.py | ./wordcount_reducer.py . word count reduce, python #filename: wordcount_reducer.py from operator import itemgetter import sys wordcount = {} for line in sys.stdin: word, count = line.strip().split(‘\t‘,1) try: count = int(count) wordcount[word] = wordcount.get(word,0) + count except ValueError pass sorted_wordcount = sorted(wordcount.iterms(), key = itemgettter(0)) for word,count in sorted_wordcount: print("%s\t%s") %(word, count)
标签:
原文地址:http://my.oschina.net/innovation/blog/359748