Show More
@@ -0,0 +1,67 b'' | |||||
|
1 | """Calculate statistics on the digits of pi in parallel. | |||
|
2 | ||||
|
3 | This program uses the functions in :file:`pidigits.py` to calculate | |||
|
4 | the frequencies of 2 digit sequences in the digits of pi. The | |||
|
5 | results are plotted using matplotlib. | |||
|
6 | ||||
|
7 | To run, text files from http://www.super-computing.org/ | |||
|
8 | must be installed in the working directory of the IPython engines. | |||
|
9 | The actual filenames to be used can be set with the ``filestring`` | |||
|
10 | variable below. | |||
|
11 | ||||
|
12 | The dataset we have been using for this is the 200 million digit one here: | |||
|
13 | ftp://pi.super-computing.org/.2/pi200m/ | |||
|
14 | """ | |||
|
15 | ||||
|
16 | from IPython.kernel import client | |||
|
17 | from matplotlib import pyplot as plt | |||
|
18 | import numpy as np | |||
|
19 | from pidigits import * | |||
|
20 | from timeit import default_timer as clock | |||
|
21 | ||||
|
22 | ||||
|
23 | # Files with digits of pi (10m digits each) | |||
|
24 | filestring = 'pi200m-ascii-%(i)02dof20.txt' | |||
|
25 | files = [filestring % {'i':i} for i in range(1,16)] | |||
|
26 | ||||
|
27 | ||||
|
28 | # A function for reducing the frequencies calculated | |||
|
29 | # by different engines. | |||
|
30 | def reduce_freqs(freqlist): | |||
|
31 | allfreqs = np.zeros_like(freqlist[0]) | |||
|
32 | for f in freqlist: | |||
|
33 | allfreqs += f | |||
|
34 | return allfreqs | |||
|
35 | ||||
|
36 | ||||
|
37 | # Connect to the IPython cluster | |||
|
38 | mec = client.MultiEngineClient(profile='mycluster') | |||
|
39 | mec.run('pidigits.py') | |||
|
40 | ||||
|
41 | ||||
|
42 | # Run 10m digits on 1 engine | |||
|
43 | mapper = mec.mapper(targets=0) | |||
|
44 | t1 = clock() | |||
|
45 | ||||
|
46 | freqs10m = mapper.map(compute_two_digit_freqs, files[:1])[0] | |||
|
47 | ||||
|
48 | t2 = clock() | |||
|
49 | digits_per_second1 = 10.0e6/(t2-t1) | |||
|
50 | print "Digits per second (1 core, 10m digits): ", digits_per_second1 | |||
|
51 | ||||
|
52 | ||||
|
53 | # Run 150m digits on 15 engines (8 cores) | |||
|
54 | t1 = clock() | |||
|
55 | ||||
|
56 | freqs_all = mec.map(compute_two_digit_freqs, files[:len(mec)]) | |||
|
57 | freqs150m = reduce_freqs(freqs_all) | |||
|
58 | ||||
|
59 | t2 = clock() | |||
|
60 | digits_per_second8 = 150.0e6/(t2-t1) | |||
|
61 | print "Digits per second (8 cores, 150m digits): ", digits_per_second8 | |||
|
62 | ||||
|
63 | print "Speedup: ", digits_per_second8/digits_per_second1 | |||
|
64 | ||||
|
65 | plot_two_digit_freqs(freqs150m) | |||
|
66 | plt.title("2 digit sequences in 150m digits of pi") | |||
|
67 |
@@ -0,0 +1,126 b'' | |||||
|
1 | """Compute statistics on the digits of pi. | |||
|
2 | ||||
|
3 | This uses precomputed digits of pi from the website | |||
|
4 | of Professor Yasumasa Kanada at the University of | |||
|
5 | Tokoyo: http://www.super-computing.org/ | |||
|
6 | ||||
|
7 | Currently, there are only functions to read the | |||
|
8 | .txt (non-compressed, non-binary) files, but adding | |||
|
9 | support for compression and binary files would be | |||
|
10 | straightforward. | |||
|
11 | ||||
|
12 | This focuses on computing the number of times that | |||
|
13 | all 1, 2, n digits sequences occur in the digits of pi. | |||
|
14 | If the digits of pi are truly random, these frequencies | |||
|
15 | should be equal. | |||
|
16 | """ | |||
|
17 | ||||
|
18 | # Import statements | |||
|
19 | ||||
|
20 | from __future__ import division, with_statement | |||
|
21 | import numpy as np | |||
|
22 | from matplotlib import pyplot as plt | |||
|
23 | ||||
|
24 | # Top-level functions | |||
|
25 | ||||
|
26 | def compute_one_digit_freqs(filename): | |||
|
27 | d = txt_file_to_digits(filename) | |||
|
28 | freqs = one_digit_freqs(d) | |||
|
29 | return freqs | |||
|
30 | ||||
|
31 | def compute_two_digit_freqs(filename): | |||
|
32 | d = txt_file_to_digits(filename) | |||
|
33 | freqs = two_digit_freqs(d) | |||
|
34 | return freqs | |||
|
35 | ||||
|
36 | def compute_n_digit_freqs(filename, n): | |||
|
37 | d = txt_file_to_digits(filename) | |||
|
38 | freqs = n_digit_freqs(d, n) | |||
|
39 | return freqs | |||
|
40 | ||||
|
41 | # Read digits from a txt file | |||
|
42 | ||||
|
43 | def txt_file_to_digits(filename, the_type=str): | |||
|
44 | """ | |||
|
45 | Yield the digits of pi read from a .txt file. | |||
|
46 | """ | |||
|
47 | with open(filename, 'r') as f: | |||
|
48 | for line in f.readlines(): | |||
|
49 | for c in line: | |||
|
50 | if c != '\n' and c!= ' ': | |||
|
51 | yield the_type(c) | |||
|
52 | ||||
|
53 | # Actual counting functions | |||
|
54 | ||||
|
55 | def one_digit_freqs(digits, normalize=False): | |||
|
56 | """ | |||
|
57 | Consume digits of pi and compute 1 digit freq. counts. | |||
|
58 | """ | |||
|
59 | freqs = np.zeros(10, dtype='i4') | |||
|
60 | for d in digits: | |||
|
61 | freqs[int(d)] += 1 | |||
|
62 | if normalize: | |||
|
63 | freqs = freqs/freqs.sum() | |||
|
64 | return freqs | |||
|
65 | ||||
|
66 | def two_digit_freqs(digits, normalize=False): | |||
|
67 | """ | |||
|
68 | Consume digits of pi and compute 2 digits freq. counts. | |||
|
69 | """ | |||
|
70 | freqs = np.zeros(100, dtype='i4') | |||
|
71 | last = digits.next() | |||
|
72 | this = digits.next() | |||
|
73 | for d in digits: | |||
|
74 | index = int(last + this) | |||
|
75 | freqs[index] += 1 | |||
|
76 | last = this | |||
|
77 | this = d | |||
|
78 | if normalize: | |||
|
79 | freqs = freqs/freqs.sum() | |||
|
80 | return freqs | |||
|
81 | ||||
|
82 | def n_digit_freqs(digits, n, normalize=False): | |||
|
83 | """ | |||
|
84 | Consume digits of pi and compute n digits freq. counts. | |||
|
85 | ||||
|
86 | This should only be used for 1-6 digits. | |||
|
87 | """ | |||
|
88 | freqs = np.zeros(pow(10,n), dtype='i4') | |||
|
89 | current = np.zeros(n, dtype=int) | |||
|
90 | for i in range(n): | |||
|
91 | current[i] = digits.next() | |||
|
92 | for d in digits: | |||
|
93 | index = int(''.join(map(str, current))) | |||
|
94 | freqs[index] += 1 | |||
|
95 | current[0:-1] = current[1:] | |||
|
96 | current[-1] = d | |||
|
97 | if normalize: | |||
|
98 | freqs = freqs/freqs.sum() | |||
|
99 | return freqs | |||
|
100 | ||||
|
101 | # Plotting functions | |||
|
102 | ||||
|
103 | def plot_two_digit_freqs(f2): | |||
|
104 | """ | |||
|
105 | Plot two digits frequency counts using matplotlib. | |||
|
106 | """ | |||
|
107 | f2_copy = f2.copy() | |||
|
108 | f2_copy.shape = (10,10) | |||
|
109 | ax = plt.matshow(f2_copy) | |||
|
110 | plt.colorbar() | |||
|
111 | for i in range(10): | |||
|
112 | for j in range(10): | |||
|
113 | plt.text(i-0.2, j+0.2, str(j)+str(i)) | |||
|
114 | plt.ylabel('First digit') | |||
|
115 | plt.xlabel('Second digit') | |||
|
116 | return ax | |||
|
117 | ||||
|
118 | def plot_one_digit_freqs(f1): | |||
|
119 | """ | |||
|
120 | Plot one digit frequency counts using matplotlib. | |||
|
121 | """ | |||
|
122 | ax = plt.plot(f1,'bo-') | |||
|
123 | plt.title('Single digit counts in pi') | |||
|
124 | plt.xlabel('Digit') | |||
|
125 | plt.ylabel('Count') | |||
|
126 | return ax |
General Comments 0
You need to be logged in to leave comments.
Login now