pwordfreq.py
46 lines
| 1.3 KiB
| text/x-python
|
PythonLexer
Brian E Granger
|
r1337 | #!/usr/bin/env python | ||
"""Parallel word frequency counter.""" | ||||
from itertools import repeat | ||||
from wordfreq import print_wordfreq, wordfreq | ||||
def pwordfreq(rc, text): | ||||
"""Parallel word frequency counter. | ||||
rc - An IPython RemoteController | ||||
text - The name of a string on the engines to do the freq count on. | ||||
""" | ||||
rc.execute('freqs = wordfreq(%s)' %text) | ||||
freqs_list = rc.pull('freqs') | ||||
word_set = set() | ||||
for f in freqs_list: | ||||
word_set.update(f.keys()) | ||||
freqs = dict(zip(word_set, repeat(0))) | ||||
for f in freqs_list: | ||||
for word, count in f.iteritems(): | ||||
freqs[word] += count | ||||
return freqs | ||||
if __name__ == '__main__': | ||||
# Create a MultiEngineClient | ||||
Brian E Granger
|
r1338 | from IPython.kernel import client | ||
Brian E Granger
|
r1337 | ipc = client.MultiEngineClient() | ||
# Run the wordfreq script on the engines. | ||||
ipc.run('wordfreq.py') | ||||
# Run the serial version | ||||
print "Serial word frequency count:" | ||||
text = open('davinci.txt').read() | ||||
freqs = wordfreq(text) | ||||
print_wordfreq(freqs, 10) | ||||
# The parallel version | ||||
print "\nParallel word frequency count:" | ||||
files = ['davinci%i.txt' % i for i in range(4)] | ||||
ipc.scatter('textfile', files) | ||||
ipc.execute('text = open(textfile[0]).read()') | ||||
pfreqs = pwordfreq(ipc,'text') | ||||
print_wordfreq(freqs) | ||||