Python 的mapreduce 單詞統計(轉載)

<!-- lang: python -->
	
#!/usr/bin/env python
import random

# 'abc..z'
alphaStr = "".join(map(chr, range(97,123)))
fp = open("word.txt", "w")
maxIter = 100000
for i in range(maxIter):
	word = ""
	len =random.randint(1,5)
	for j in range(len):
		word + = alphaStr[random.randint(0,25)]
		fp.write(word + '\n')
fp.close()


cat word.txt | ./wordcount_mapper.py | ./wordcount_reducer.py . 

word count reduce,   python

#filename:  wordcount_reducer.py
from  operator import itemgetter
import sys

wordcount = {}
for line in sys.stdin:
	word, count = line.strip().split('\t',1)
	try:
		count = int(count)
		wordcount[word] = wordcount.get(word,0) + count
	except ValueError
		pass
		
sorted_wordcount = sorted(wordcount.iterms(), key = itemgettter(0))
for word,count in sorted_wordcount:
	print("%s\t%s") %(word, count)
相關文章
相關標籤/搜索