Show More
@@ -1,158 +1,157 b'' | |||||
1 | """Compute statistics on the digits of pi. |
|
1 | """Compute statistics on the digits of pi. | |
2 |
|
2 | |||
3 | This uses precomputed digits of pi from the website |
|
3 | This uses precomputed digits of pi from the website | |
4 | of Professor Yasumasa Kanada at the University of |
|
4 | of Professor Yasumasa Kanada at the University of | |
5 | Tokoyo: http://www.super-computing.org/ |
|
5 | Tokoyo: http://www.super-computing.org/ | |
6 |
|
6 | |||
7 | Currently, there are only functions to read the |
|
7 | Currently, there are only functions to read the | |
8 | .txt (non-compressed, non-binary) files, but adding |
|
8 | .txt (non-compressed, non-binary) files, but adding | |
9 | support for compression and binary files would be |
|
9 | support for compression and binary files would be | |
10 | straightforward. |
|
10 | straightforward. | |
11 |
|
11 | |||
12 | This focuses on computing the number of times that |
|
12 | This focuses on computing the number of times that | |
13 | all 1, 2, n digits sequences occur in the digits of pi. |
|
13 | all 1, 2, n digits sequences occur in the digits of pi. | |
14 | If the digits of pi are truly random, these frequencies |
|
14 | If the digits of pi are truly random, these frequencies | |
15 | should be equal. |
|
15 | should be equal. | |
16 | """ |
|
16 | """ | |
17 |
|
17 | |||
18 | # Import statements |
|
18 | # Import statements | |
19 | from __future__ import division, with_statement |
|
19 | from __future__ import division, with_statement | |
20 |
|
20 | |||
21 | import numpy as np |
|
21 | import numpy as np | |
22 | from matplotlib import pyplot as plt |
|
22 | from matplotlib import pyplot as plt | |
23 | from six import advance_iterator |
|
|||
24 |
|
23 | |||
25 | # Top-level functions |
|
24 | # Top-level functions | |
26 |
|
25 | |||
27 | def fetch_pi_file(filename): |
|
26 | def fetch_pi_file(filename): | |
28 | """This will download a segment of pi from super-computing.org |
|
27 | """This will download a segment of pi from super-computing.org | |
29 | if the file is not already present. |
|
28 | if the file is not already present. | |
30 | """ |
|
29 | """ | |
31 | import os, urllib |
|
30 | import os, urllib | |
32 | ftpdir="ftp://pi.super-computing.org/.2/pi200m/" |
|
31 | ftpdir="ftp://pi.super-computing.org/.2/pi200m/" | |
33 | if os.path.exists(filename): |
|
32 | if os.path.exists(filename): | |
34 | # we already have it |
|
33 | # we already have it | |
35 | return |
|
34 | return | |
36 | else: |
|
35 | else: | |
37 | # download it |
|
36 | # download it | |
38 | urllib.urlretrieve(ftpdir+filename,filename) |
|
37 | urllib.urlretrieve(ftpdir+filename,filename) | |
39 |
|
38 | |||
40 | def compute_one_digit_freqs(filename): |
|
39 | def compute_one_digit_freqs(filename): | |
41 | """ |
|
40 | """ | |
42 | Read digits of pi from a file and compute the 1 digit frequencies. |
|
41 | Read digits of pi from a file and compute the 1 digit frequencies. | |
43 | """ |
|
42 | """ | |
44 | d = txt_file_to_digits(filename) |
|
43 | d = txt_file_to_digits(filename) | |
45 | freqs = one_digit_freqs(d) |
|
44 | freqs = one_digit_freqs(d) | |
46 | return freqs |
|
45 | return freqs | |
47 |
|
46 | |||
48 | def compute_two_digit_freqs(filename): |
|
47 | def compute_two_digit_freqs(filename): | |
49 | """ |
|
48 | """ | |
50 | Read digits of pi from a file and compute the 2 digit frequencies. |
|
49 | Read digits of pi from a file and compute the 2 digit frequencies. | |
51 | """ |
|
50 | """ | |
52 | d = txt_file_to_digits(filename) |
|
51 | d = txt_file_to_digits(filename) | |
53 | freqs = two_digit_freqs(d) |
|
52 | freqs = two_digit_freqs(d) | |
54 | return freqs |
|
53 | return freqs | |
55 |
|
54 | |||
56 | def reduce_freqs(freqlist): |
|
55 | def reduce_freqs(freqlist): | |
57 | """ |
|
56 | """ | |
58 | Add up a list of freq counts to get the total counts. |
|
57 | Add up a list of freq counts to get the total counts. | |
59 | """ |
|
58 | """ | |
60 | allfreqs = np.zeros_like(freqlist[0]) |
|
59 | allfreqs = np.zeros_like(freqlist[0]) | |
61 | for f in freqlist: |
|
60 | for f in freqlist: | |
62 | allfreqs += f |
|
61 | allfreqs += f | |
63 | return allfreqs |
|
62 | return allfreqs | |
64 |
|
63 | |||
65 | def compute_n_digit_freqs(filename, n): |
|
64 | def compute_n_digit_freqs(filename, n): | |
66 | """ |
|
65 | """ | |
67 | Read digits of pi from a file and compute the n digit frequencies. |
|
66 | Read digits of pi from a file and compute the n digit frequencies. | |
68 | """ |
|
67 | """ | |
69 | d = txt_file_to_digits(filename) |
|
68 | d = txt_file_to_digits(filename) | |
70 | freqs = n_digit_freqs(d, n) |
|
69 | freqs = n_digit_freqs(d, n) | |
71 | return freqs |
|
70 | return freqs | |
72 |
|
71 | |||
73 | # Read digits from a txt file |
|
72 | # Read digits from a txt file | |
74 |
|
73 | |||
75 | def txt_file_to_digits(filename, the_type=str): |
|
74 | def txt_file_to_digits(filename, the_type=str): | |
76 | """ |
|
75 | """ | |
77 | Yield the digits of pi read from a .txt file. |
|
76 | Yield the digits of pi read from a .txt file. | |
78 | """ |
|
77 | """ | |
79 | with open(filename, 'r') as f: |
|
78 | with open(filename, 'r') as f: | |
80 | for line in f.readlines(): |
|
79 | for line in f.readlines(): | |
81 | for c in line: |
|
80 | for c in line: | |
82 | if c != '\n' and c!= ' ': |
|
81 | if c != '\n' and c!= ' ': | |
83 | yield the_type(c) |
|
82 | yield the_type(c) | |
84 |
|
83 | |||
85 | # Actual counting functions |
|
84 | # Actual counting functions | |
86 |
|
85 | |||
87 | def one_digit_freqs(digits, normalize=False): |
|
86 | def one_digit_freqs(digits, normalize=False): | |
88 | """ |
|
87 | """ | |
89 | Consume digits of pi and compute 1 digit freq. counts. |
|
88 | Consume digits of pi and compute 1 digit freq. counts. | |
90 | """ |
|
89 | """ | |
91 | freqs = np.zeros(10, dtype='i4') |
|
90 | freqs = np.zeros(10, dtype='i4') | |
92 | for d in digits: |
|
91 | for d in digits: | |
93 | freqs[int(d)] += 1 |
|
92 | freqs[int(d)] += 1 | |
94 | if normalize: |
|
93 | if normalize: | |
95 | freqs = freqs/freqs.sum() |
|
94 | freqs = freqs/freqs.sum() | |
96 | return freqs |
|
95 | return freqs | |
97 |
|
96 | |||
98 | def two_digit_freqs(digits, normalize=False): |
|
97 | def two_digit_freqs(digits, normalize=False): | |
99 | """ |
|
98 | """ | |
100 | Consume digits of pi and compute 2 digits freq. counts. |
|
99 | Consume digits of pi and compute 2 digits freq. counts. | |
101 | """ |
|
100 | """ | |
102 | freqs = np.zeros(100, dtype='i4') |
|
101 | freqs = np.zeros(100, dtype='i4') | |
103 |
last = |
|
102 | last = next(digits) | |
104 |
this = |
|
103 | this = next(digits) | |
105 | for d in digits: |
|
104 | for d in digits: | |
106 | index = int(last + this) |
|
105 | index = int(last + this) | |
107 | freqs[index] += 1 |
|
106 | freqs[index] += 1 | |
108 | last = this |
|
107 | last = this | |
109 | this = d |
|
108 | this = d | |
110 | if normalize: |
|
109 | if normalize: | |
111 | freqs = freqs/freqs.sum() |
|
110 | freqs = freqs/freqs.sum() | |
112 | return freqs |
|
111 | return freqs | |
113 |
|
112 | |||
114 | def n_digit_freqs(digits, n, normalize=False): |
|
113 | def n_digit_freqs(digits, n, normalize=False): | |
115 | """ |
|
114 | """ | |
116 | Consume digits of pi and compute n digits freq. counts. |
|
115 | Consume digits of pi and compute n digits freq. counts. | |
117 |
|
116 | |||
118 | This should only be used for 1-6 digits. |
|
117 | This should only be used for 1-6 digits. | |
119 | """ |
|
118 | """ | |
120 | freqs = np.zeros(pow(10,n), dtype='i4') |
|
119 | freqs = np.zeros(pow(10,n), dtype='i4') | |
121 | current = np.zeros(n, dtype=int) |
|
120 | current = np.zeros(n, dtype=int) | |
122 | for i in range(n): |
|
121 | for i in range(n): | |
123 |
current[i] = |
|
122 | current[i] = next(digits) | |
124 | for d in digits: |
|
123 | for d in digits: | |
125 | index = int(''.join(map(str, current))) |
|
124 | index = int(''.join(map(str, current))) | |
126 | freqs[index] += 1 |
|
125 | freqs[index] += 1 | |
127 | current[0:-1] = current[1:] |
|
126 | current[0:-1] = current[1:] | |
128 | current[-1] = d |
|
127 | current[-1] = d | |
129 | if normalize: |
|
128 | if normalize: | |
130 | freqs = freqs/freqs.sum() |
|
129 | freqs = freqs/freqs.sum() | |
131 | return freqs |
|
130 | return freqs | |
132 |
|
131 | |||
133 | # Plotting functions |
|
132 | # Plotting functions | |
134 |
|
133 | |||
135 | def plot_two_digit_freqs(f2): |
|
134 | def plot_two_digit_freqs(f2): | |
136 | """ |
|
135 | """ | |
137 | Plot two digits frequency counts using matplotlib. |
|
136 | Plot two digits frequency counts using matplotlib. | |
138 | """ |
|
137 | """ | |
139 | f2_copy = f2.copy() |
|
138 | f2_copy = f2.copy() | |
140 | f2_copy.shape = (10,10) |
|
139 | f2_copy.shape = (10,10) | |
141 | ax = plt.matshow(f2_copy) |
|
140 | ax = plt.matshow(f2_copy) | |
142 | plt.colorbar() |
|
141 | plt.colorbar() | |
143 | for i in range(10): |
|
142 | for i in range(10): | |
144 | for j in range(10): |
|
143 | for j in range(10): | |
145 | plt.text(i-0.2, j+0.2, str(j)+str(i)) |
|
144 | plt.text(i-0.2, j+0.2, str(j)+str(i)) | |
146 | plt.ylabel('First digit') |
|
145 | plt.ylabel('First digit') | |
147 | plt.xlabel('Second digit') |
|
146 | plt.xlabel('Second digit') | |
148 | return ax |
|
147 | return ax | |
149 |
|
148 | |||
150 | def plot_one_digit_freqs(f1): |
|
149 | def plot_one_digit_freqs(f1): | |
151 | """ |
|
150 | """ | |
152 | Plot one digit frequency counts using matplotlib. |
|
151 | Plot one digit frequency counts using matplotlib. | |
153 | """ |
|
152 | """ | |
154 | ax = plt.plot(f1,'bo-') |
|
153 | ax = plt.plot(f1,'bo-') | |
155 | plt.title('Single digit counts in pi') |
|
154 | plt.title('Single digit counts in pi') | |
156 | plt.xlabel('Digit') |
|
155 | plt.xlabel('Digit') | |
157 | plt.ylabel('Count') |
|
156 | plt.ylabel('Count') | |
158 | return ax |
|
157 | return ax |
General Comments 0
You need to be logged in to leave comments.
Login now