##// END OF EJS Templates
test WinHPC launchers on all platforms...
test WinHPC launchers on all platforms the tests don't actually launch anything, they just write template files, so they can run on any platform.

File last commit:

r9190:20a102a5
r12903:1e3f6739
Show More
pwordfreq.py
80 lines | 2.2 KiB | text/x-python | PythonLexer
MinRK
updates to docs and examples
r3670 #!/usr/bin/env python
"""Parallel word frequency counter.
This only works for a local cluster, because the filenames are local paths.
"""
import os
MinRK
update a few parallel examples...
r4184 import time
MinRK
updates to docs and examples
r3670 import urllib
from itertools import repeat
from wordfreq import print_wordfreq, wordfreq
from IPython.parallel import Client, Reference
davinci_url = "http://www.gutenberg.org/cache/epub/5000/pg5000.txt"
def pwordfreq(view, fnames):
"""Parallel word frequency counter.
view - An IPython DirectView
fnames - The filenames containing the split data.
"""
assert len(fnames) == len(view.targets)
view.scatter('fname', fnames, flatten=True)
ar = view.apply(wordfreq, Reference('fname'))
freqs_list = ar.get()
word_set = set()
for f in freqs_list:
word_set.update(f.keys())
freqs = dict(zip(word_set, repeat(0)))
for f in freqs_list:
for word, count in f.iteritems():
freqs[word] += count
return freqs
if __name__ == '__main__':
# Create a Client and View
rc = Client()
view = rc[:]
if not os.path.exists('davinci.txt'):
# download from project gutenberg
Thomas Kluyver
Update print syntax in parallel examples.
r6455 print("Downloading Da Vinci's notebooks from Project Gutenberg")
MinRK
updates to docs and examples
r3670 urllib.urlretrieve(davinci_url, 'davinci.txt')
# Run the serial version
Thomas Kluyver
Update print syntax in parallel examples.
r6455 print("Serial word frequency count:")
MinRK
updates to docs and examples
r3670 text = open('davinci.txt').read()
MinRK
update a few parallel examples...
r4184 tic = time.time()
MinRK
updates to docs and examples
r3670 freqs = wordfreq(text)
MinRK
update a few parallel examples...
r4184 toc = time.time()
MinRK
updates to docs and examples
r3670 print_wordfreq(freqs, 10)
Thomas Kluyver
Update print syntax in parallel examples.
r6455 print("Took %.3f s to calcluate"%(toc-tic))
MinRK
updates to docs and examples
r3670
# The parallel version
Thomas Kluyver
Update print syntax in parallel examples.
r6455 print("\nParallel word frequency count:")
MinRK
updates to docs and examples
r3670 # split the davinci.txt into one file per engine:
lines = text.splitlines()
nlines = len(lines)
n = len(rc)
block = nlines/n
for i in range(n):
chunk = lines[i*block:i*(block+1)]
with open('davinci%i.txt'%i, 'w') as f:
f.write('\n'.join(chunk))
Jörgen Stenarson
Search of getcwd and replace with getcwdu. Ignoring core/prompts.py
r4208 cwd = os.path.abspath(os.getcwdu())
MinRK
updates to docs and examples
r3670 fnames = [ os.path.join(cwd, 'davinci%i.txt'%i) for i in range(n)]
MinRK
update a few parallel examples...
r4184 tic = time.time()
MinRK
updates to docs and examples
r3670 pfreqs = pwordfreq(view,fnames)
MinRK
update a few parallel examples...
r4184 toc = time.time()
MinRK
updates to docs and examples
r3670 print_wordfreq(freqs)
Thomas Kluyver
Update print syntax in parallel examples.
r6455 print("Took %.3f s to calcluate on %i engines"%(toc-tic, len(view.targets)))
MinRK
updates to docs and examples
r3670 # cleanup split files
map(os.remove, fnames)