Show More
@@ -15,6 +15,8 b' from wordfreq import print_wordfreq, wordfreq' | |||||
15 |
|
15 | |||
16 | from IPython.parallel import Client, Reference |
|
16 | from IPython.parallel import Client, Reference | |
17 |
|
17 | |||
|
18 | from __future__ import division | |||
|
19 | ||||
18 | davinci_url = "http://www.gutenberg.org/cache/epub/5000/pg5000.txt" |
|
20 | davinci_url = "http://www.gutenberg.org/cache/epub/5000/pg5000.txt" | |
19 |
|
21 | |||
20 | def pwordfreq(view, fnames): |
|
22 | def pwordfreq(view, fnames): | |
@@ -32,7 +34,7 b' def pwordfreq(view, fnames):' | |||||
32 | word_set.update(f.keys()) |
|
34 | word_set.update(f.keys()) | |
33 | freqs = dict(zip(word_set, repeat(0))) |
|
35 | freqs = dict(zip(word_set, repeat(0))) | |
34 | for f in freqs_list: |
|
36 | for f in freqs_list: | |
35 |
for word, count in f. |
|
37 | for word, count in f.items(): | |
36 | freqs[word] += count |
|
38 | freqs[word] += count | |
37 | return freqs |
|
39 | return freqs | |
38 |
|
40 | |||
@@ -45,7 +47,11 b" if __name__ == '__main__':" | |||||
45 | if not os.path.exists('davinci.txt'): |
|
47 | if not os.path.exists('davinci.txt'): | |
46 | # download from project gutenberg |
|
48 | # download from project gutenberg | |
47 | print("Downloading Da Vinci's notebooks from Project Gutenberg") |
|
49 | print("Downloading Da Vinci's notebooks from Project Gutenberg") | |
48 | urllib.urlretrieve(davinci_url, 'davinci.txt') |
|
50 | try : #python2 | |
|
51 | urllib.urlretrieve(davinci_url, 'davinci.txt') | |||
|
52 | except : #python3 | |||
|
53 | import urllib.request | |||
|
54 | urllib.request.urlretrieve(davinci_url, 'davinci.txt') | |||
49 |
|
55 | |||
50 | # Run the serial version |
|
56 | # Run the serial version | |
51 | print("Serial word frequency count:") |
|
57 | print("Serial word frequency count:") | |
@@ -63,13 +69,16 b" if __name__ == '__main__':" | |||||
63 | lines = text.splitlines() |
|
69 | lines = text.splitlines() | |
64 | nlines = len(lines) |
|
70 | nlines = len(lines) | |
65 | n = len(rc) |
|
71 | n = len(rc) | |
66 | block = nlines/n |
|
72 | block = nlines//n | |
67 | for i in range(n): |
|
73 | for i in range(n): | |
68 | chunk = lines[i*block:i*(block+1)] |
|
74 | chunk = lines[i*block:i*(block+1)] | |
69 | with open('davinci%i.txt'%i, 'w') as f: |
|
75 | with open('davinci%i.txt'%i, 'w') as f: | |
70 | f.write('\n'.join(chunk)) |
|
76 | f.write('\n'.join(chunk)) | |
71 |
|
77 | |||
72 | cwd = os.path.abspath(os.getcwdu()) |
|
78 | try : #python2 | |
|
79 | cwd = os.path.abspath(os.getcwdu()) | |||
|
80 | except : #python3 | |||
|
81 | cwd = os.path.abspath(os.getcwd()) | |||
73 | fnames = [ os.path.join(cwd, 'davinci%i.txt'%i) for i in range(n)] |
|
82 | fnames = [ os.path.join(cwd, 'davinci%i.txt'%i) for i in range(n)] | |
74 | tic = time.time() |
|
83 | tic = time.time() | |
75 | pfreqs = pwordfreq(view,fnames) |
|
84 | pfreqs = pwordfreq(view,fnames) |
General Comments 0
You need to be logged in to leave comments.
Login now