Show More
@@ -0,0 +1,67 b'' | |||
|
1 | """Calculate statistics on the digits of pi in parallel. | |
|
2 | ||
|
3 | This program uses the functions in :file:`pidigits.py` to calculate | |
|
4 | the frequencies of 2 digit sequences in the digits of pi. The | |
|
5 | results are plotted using matplotlib. | |
|
6 | ||
|
7 | To run, text files from http://www.super-computing.org/ | |
|
8 | must be installed in the working directory of the IPython engines. | |
|
9 | The actual filenames to be used can be set with the ``filestring`` | |
|
10 | variable below. | |
|
11 | ||
|
12 | The dataset we have been using for this is the 200 million digit one here: | |
|
13 | ftp://pi.super-computing.org/.2/pi200m/ | |
|
14 | """ | |
|
15 | ||
|
16 | from IPython.kernel import client | |
|
17 | from matplotlib import pyplot as plt | |
|
18 | import numpy as np | |
|
19 | from pidigits import * | |
|
20 | from timeit import default_timer as clock | |
|
21 | ||
|
22 | ||
|
23 | # Files with digits of pi (10m digits each) | |
|
24 | filestring = 'pi200m-ascii-%(i)02dof20.txt' | |
|
25 | files = [filestring % {'i':i} for i in range(1,16)] | |
|
26 | ||
|
27 | ||
|
28 | # A function for reducing the frequencies calculated | |
|
29 | # by different engines. | |
|
30 | def reduce_freqs(freqlist): | |
|
31 | allfreqs = np.zeros_like(freqlist[0]) | |
|
32 | for f in freqlist: | |
|
33 | allfreqs += f | |
|
34 | return allfreqs | |
|
35 | ||
|
36 | ||
|
37 | # Connect to the IPython cluster | |
|
38 | mec = client.MultiEngineClient(profile='mycluster') | |
|
39 | mec.run('pidigits.py') | |
|
40 | ||
|
41 | ||
|
42 | # Run 10m digits on 1 engine | |
|
43 | mapper = mec.mapper(targets=0) | |
|
44 | t1 = clock() | |
|
45 | ||
|
46 | freqs10m = mapper.map(compute_two_digit_freqs, files[:1])[0] | |
|
47 | ||
|
48 | t2 = clock() | |
|
49 | digits_per_second1 = 10.0e6/(t2-t1) | |
|
50 | print "Digits per second (1 core, 10m digits): ", digits_per_second1 | |
|
51 | ||
|
52 | ||
|
53 | # Run 150m digits on 15 engines (8 cores) | |
|
54 | t1 = clock() | |
|
55 | ||
|
56 | freqs_all = mec.map(compute_two_digit_freqs, files[:len(mec)]) | |
|
57 | freqs150m = reduce_freqs(freqs_all) | |
|
58 | ||
|
59 | t2 = clock() | |
|
60 | digits_per_second8 = 150.0e6/(t2-t1) | |
|
61 | print "Digits per second (8 cores, 150m digits): ", digits_per_second8 | |
|
62 | ||
|
63 | print "Speedup: ", digits_per_second8/digits_per_second1 | |
|
64 | ||
|
65 | plot_two_digit_freqs(freqs150m) | |
|
66 | plt.title("2 digit sequences in 150m digits of pi") | |
|
67 |
@@ -0,0 +1,126 b'' | |||
|
1 | """Compute statistics on the digits of pi. | |
|
2 | ||
|
3 | This uses precomputed digits of pi from the website | |
|
4 | of Professor Yasumasa Kanada at the University of | |
|
5 | Tokoyo: http://www.super-computing.org/ | |
|
6 | ||
|
7 | Currently, there are only functions to read the | |
|
8 | .txt (non-compressed, non-binary) files, but adding | |
|
9 | support for compression and binary files would be | |
|
10 | straightforward. | |
|
11 | ||
|
12 | This focuses on computing the number of times that | |
|
13 | all 1, 2, n digits sequences occur in the digits of pi. | |
|
14 | If the digits of pi are truly random, these frequencies | |
|
15 | should be equal. | |
|
16 | """ | |
|
17 | ||
|
18 | # Import statements | |
|
19 | ||
|
20 | from __future__ import division, with_statement | |
|
21 | import numpy as np | |
|
22 | from matplotlib import pyplot as plt | |
|
23 | ||
|
24 | # Top-level functions | |
|
25 | ||
|
26 | def compute_one_digit_freqs(filename): | |
|
27 | d = txt_file_to_digits(filename) | |
|
28 | freqs = one_digit_freqs(d) | |
|
29 | return freqs | |
|
30 | ||
|
31 | def compute_two_digit_freqs(filename): | |
|
32 | d = txt_file_to_digits(filename) | |
|
33 | freqs = two_digit_freqs(d) | |
|
34 | return freqs | |
|
35 | ||
|
36 | def compute_n_digit_freqs(filename, n): | |
|
37 | d = txt_file_to_digits(filename) | |
|
38 | freqs = n_digit_freqs(d, n) | |
|
39 | return freqs | |
|
40 | ||
|
41 | # Read digits from a txt file | |
|
42 | ||
|
43 | def txt_file_to_digits(filename, the_type=str): | |
|
44 | """ | |
|
45 | Yield the digits of pi read from a .txt file. | |
|
46 | """ | |
|
47 | with open(filename, 'r') as f: | |
|
48 | for line in f.readlines(): | |
|
49 | for c in line: | |
|
50 | if c != '\n' and c!= ' ': | |
|
51 | yield the_type(c) | |
|
52 | ||
|
53 | # Actual counting functions | |
|
54 | ||
|
55 | def one_digit_freqs(digits, normalize=False): | |
|
56 | """ | |
|
57 | Consume digits of pi and compute 1 digit freq. counts. | |
|
58 | """ | |
|
59 | freqs = np.zeros(10, dtype='i4') | |
|
60 | for d in digits: | |
|
61 | freqs[int(d)] += 1 | |
|
62 | if normalize: | |
|
63 | freqs = freqs/freqs.sum() | |
|
64 | return freqs | |
|
65 | ||
|
66 | def two_digit_freqs(digits, normalize=False): | |
|
67 | """ | |
|
68 | Consume digits of pi and compute 2 digits freq. counts. | |
|
69 | """ | |
|
70 | freqs = np.zeros(100, dtype='i4') | |
|
71 | last = digits.next() | |
|
72 | this = digits.next() | |
|
73 | for d in digits: | |
|
74 | index = int(last + this) | |
|
75 | freqs[index] += 1 | |
|
76 | last = this | |
|
77 | this = d | |
|
78 | if normalize: | |
|
79 | freqs = freqs/freqs.sum() | |
|
80 | return freqs | |
|
81 | ||
|
82 | def n_digit_freqs(digits, n, normalize=False): | |
|
83 | """ | |
|
84 | Consume digits of pi and compute n digits freq. counts. | |
|
85 | ||
|
86 | This should only be used for 1-6 digits. | |
|
87 | """ | |
|
88 | freqs = np.zeros(pow(10,n), dtype='i4') | |
|
89 | current = np.zeros(n, dtype=int) | |
|
90 | for i in range(n): | |
|
91 | current[i] = digits.next() | |
|
92 | for d in digits: | |
|
93 | index = int(''.join(map(str, current))) | |
|
94 | freqs[index] += 1 | |
|
95 | current[0:-1] = current[1:] | |
|
96 | current[-1] = d | |
|
97 | if normalize: | |
|
98 | freqs = freqs/freqs.sum() | |
|
99 | return freqs | |
|
100 | ||
|
101 | # Plotting functions | |
|
102 | ||
|
103 | def plot_two_digit_freqs(f2): | |
|
104 | """ | |
|
105 | Plot two digits frequency counts using matplotlib. | |
|
106 | """ | |
|
107 | f2_copy = f2.copy() | |
|
108 | f2_copy.shape = (10,10) | |
|
109 | ax = plt.matshow(f2_copy) | |
|
110 | plt.colorbar() | |
|
111 | for i in range(10): | |
|
112 | for j in range(10): | |
|
113 | plt.text(i-0.2, j+0.2, str(j)+str(i)) | |
|
114 | plt.ylabel('First digit') | |
|
115 | plt.xlabel('Second digit') | |
|
116 | return ax | |
|
117 | ||
|
118 | def plot_one_digit_freqs(f1): | |
|
119 | """ | |
|
120 | Plot one digit frequency counts using matplotlib. | |
|
121 | """ | |
|
122 | ax = plt.plot(f1,'bo-') | |
|
123 | plt.title('Single digit counts in pi') | |
|
124 | plt.xlabel('Digit') | |
|
125 | plt.ylabel('Count') | |
|
126 | return ax |
General Comments 0
You need to be logged in to leave comments.
Login now