python -p pwd localhost python
import glob import mincemeat import operator all_filepaths = glob.glob('hw3data/*') def file_contents(filename): f = open(filename) try: return finally: f.close() datasource = dict((filename,file_contents(filename)) for filename in all_filepaths) def my_mapper(key,value): from stopwords import allStopWords import re for line in value.splitlines(): allThree=line.split(':::') for author in allThree[1].split('::'): for word in re.sub(r'([^\s\t0-9a-zA-Z-])+', '',allThree[2]).split(): tmpWord=word.strip().lower() if len(tmpWord)<=1 or tmpWord in allStopWords: continue yield (author,tmpWord),1 def my_reducer(key,value): result=sum(value) return result s = mincemeat.Server() s.datasource = datasource s.mapfn = my_mapper s.reducefn = my_reducer results = s.run_server(password="pwd") print results resList=[(x[0],x[1],results[x]) for x in results.keys()] sorted_results = sorted(resList, key=operator.itemgetter(0,2)) with open('output.txt','w') as f: for (a,b,c) in sorted_results: f.write(a+' *** '+b+' *** '+str(c)+'\n')
Stephen L. Bloom *** scalar *** 1 Stephen L. Bloom *** concatenation *** 1 Stephen L. Bloom *** point *** 1 Stephen L. Bloom *** varieties *** 1 Stephen L. Bloom *** observation *** 1 Stephen L. Bloom *** equivalence *** 1 Stephen L. Bloom *** axioms *** 1 Stephen L. Bloom *** languages *** 1 Stephen L. Bloom *** logical *** 1 Stephen L. Bloom *** algebras *** 1 Stephen L. Bloom *** equations *** 1 Stephen L. Bloom *** number *** 1 Stephen L. Bloom *** vector *** 1 Stephen L. Bloom *** polynomial *** 1 Stephen L. Bloom *** solving *** 1 Stephen L. Bloom *** equational *** 1 Stephen L. Bloom *** axiomatizing *** 1 Stephen L. Bloom *** characterization *** 1 Stephen L. Bloom *** regular *** 2 Stephen L. Bloom *** sets *** 2 Stephen L. Bloom *** iteration *** 3 Stephen L. Lieman *** unacceptable *** 1 Stephen L. Lieman *** correcting *** 1 Stephen L. Lieman *** never *** 1 Stephen L. Lieman *** powerful *** 1 Stephen L. Lieman *** accept *** 1