Show More
@@ -1,161 +1,162 b'' | |||||
1 | """Compute statistics on the digits of pi. |
|
1 | """Compute statistics on the digits of pi. | |
2 |
|
2 | |||
3 | This uses precomputed digits of pi from the website |
|
3 | This uses precomputed digits of pi from the website | |
4 | of Professor Yasumasa Kanada at the University of |
|
4 | of Professor Yasumasa Kanada at the University of | |
5 | Tokoyo: http://www.super-computing.org/ |
|
5 | Tokoyo: http://www.super-computing.org/ | |
6 |
|
6 | |||
7 | Currently, there are only functions to read the |
|
7 | Currently, there are only functions to read the | |
8 | .txt (non-compressed, non-binary) files, but adding |
|
8 | .txt (non-compressed, non-binary) files, but adding | |
9 | support for compression and binary files would be |
|
9 | support for compression and binary files would be | |
10 | straightforward. |
|
10 | straightforward. | |
11 |
|
11 | |||
12 | This focuses on computing the number of times that |
|
12 | This focuses on computing the number of times that | |
13 | all 1, 2, n digits sequences occur in the digits of pi. |
|
13 | all 1, 2, n digits sequences occur in the digits of pi. | |
14 | If the digits of pi are truly random, these frequencies |
|
14 | If the digits of pi are truly random, these frequencies | |
15 | should be equal. |
|
15 | should be equal. | |
16 | """ |
|
16 | """ | |
17 |
|
17 | |||
18 | # Import statements |
|
18 | # Import statements | |
19 | from __future__ import division, with_statement |
|
19 | from __future__ import division, with_statement | |
20 |
|
20 | |||
21 | import numpy as np |
|
21 | import numpy as np | |
22 | from matplotlib import pyplot as plt |
|
22 | from matplotlib import pyplot as plt | |
23 |
|
23 | |||
24 | # Top-level functions |
|
24 | try : #python2 | |
|
25 | from urllib import urlretrieve | |||
|
26 | except : #python3 | |||
|
27 | from urllib.request import urlretrieve | |||
|
28 | ||||
|
29 | # Top-level functions | |||
25 |
|
30 | |||
26 | def fetch_pi_file(filename): |
|
31 | def fetch_pi_file(filename): | |
27 | """This will download a segment of pi from super-computing.org |
|
32 | """This will download a segment of pi from super-computing.org | |
28 | if the file is not already present. |
|
33 | if the file is not already present. | |
29 | """ |
|
34 | """ | |
30 | import os, urllib |
|
35 | import os, urllib | |
31 | ftpdir="ftp://pi.super-computing.org/.2/pi200m/" |
|
36 | ftpdir="ftp://pi.super-computing.org/.2/pi200m/" | |
32 | if os.path.exists(filename): |
|
37 | if os.path.exists(filename): | |
33 | # we already have it |
|
38 | # we already have it | |
34 | return |
|
39 | return | |
35 | else: |
|
40 | else: | |
36 | # download it |
|
41 | # download it | |
37 | try : #python2 |
|
42 | urlretrieve(ftpdir+filename,filename) | |
38 | urllib.urlretrieve(ftpdir+filename,filename) |
|
|||
39 | except : #python3 |
|
|||
40 | import urllib.request |
|
|||
41 | urllib.request.urlretrieve(ftpdir+filename,filename) |
|
|||
42 |
|
43 | |||
43 | def compute_one_digit_freqs(filename): |
|
44 | def compute_one_digit_freqs(filename): | |
44 | """ |
|
45 | """ | |
45 | Read digits of pi from a file and compute the 1 digit frequencies. |
|
46 | Read digits of pi from a file and compute the 1 digit frequencies. | |
46 | """ |
|
47 | """ | |
47 | d = txt_file_to_digits(filename) |
|
48 | d = txt_file_to_digits(filename) | |
48 | freqs = one_digit_freqs(d) |
|
49 | freqs = one_digit_freqs(d) | |
49 | return freqs |
|
50 | return freqs | |
50 |
|
51 | |||
51 | def compute_two_digit_freqs(filename): |
|
52 | def compute_two_digit_freqs(filename): | |
52 | """ |
|
53 | """ | |
53 | Read digits of pi from a file and compute the 2 digit frequencies. |
|
54 | Read digits of pi from a file and compute the 2 digit frequencies. | |
54 | """ |
|
55 | """ | |
55 | d = txt_file_to_digits(filename) |
|
56 | d = txt_file_to_digits(filename) | |
56 | freqs = two_digit_freqs(d) |
|
57 | freqs = two_digit_freqs(d) | |
57 | return freqs |
|
58 | return freqs | |
58 |
|
59 | |||
59 | def reduce_freqs(freqlist): |
|
60 | def reduce_freqs(freqlist): | |
60 | """ |
|
61 | """ | |
61 | Add up a list of freq counts to get the total counts. |
|
62 | Add up a list of freq counts to get the total counts. | |
62 | """ |
|
63 | """ | |
63 | allfreqs = np.zeros_like(freqlist[0]) |
|
64 | allfreqs = np.zeros_like(freqlist[0]) | |
64 | for f in freqlist: |
|
65 | for f in freqlist: | |
65 | allfreqs += f |
|
66 | allfreqs += f | |
66 | return allfreqs |
|
67 | return allfreqs | |
67 |
|
68 | |||
68 | def compute_n_digit_freqs(filename, n): |
|
69 | def compute_n_digit_freqs(filename, n): | |
69 | """ |
|
70 | """ | |
70 | Read digits of pi from a file and compute the n digit frequencies. |
|
71 | Read digits of pi from a file and compute the n digit frequencies. | |
71 | """ |
|
72 | """ | |
72 | d = txt_file_to_digits(filename) |
|
73 | d = txt_file_to_digits(filename) | |
73 | freqs = n_digit_freqs(d, n) |
|
74 | freqs = n_digit_freqs(d, n) | |
74 | return freqs |
|
75 | return freqs | |
75 |
|
76 | |||
76 | # Read digits from a txt file |
|
77 | # Read digits from a txt file | |
77 |
|
78 | |||
78 | def txt_file_to_digits(filename, the_type=str): |
|
79 | def txt_file_to_digits(filename, the_type=str): | |
79 | """ |
|
80 | """ | |
80 | Yield the digits of pi read from a .txt file. |
|
81 | Yield the digits of pi read from a .txt file. | |
81 | """ |
|
82 | """ | |
82 | with open(filename, 'r') as f: |
|
83 | with open(filename, 'r') as f: | |
83 | for line in f.readlines(): |
|
84 | for line in f.readlines(): | |
84 | for c in line: |
|
85 | for c in line: | |
85 | if c != '\n' and c!= ' ': |
|
86 | if c != '\n' and c!= ' ': | |
86 | yield the_type(c) |
|
87 | yield the_type(c) | |
87 |
|
88 | |||
88 | # Actual counting functions |
|
89 | # Actual counting functions | |
89 |
|
90 | |||
90 | def one_digit_freqs(digits, normalize=False): |
|
91 | def one_digit_freqs(digits, normalize=False): | |
91 | """ |
|
92 | """ | |
92 | Consume digits of pi and compute 1 digit freq. counts. |
|
93 | Consume digits of pi and compute 1 digit freq. counts. | |
93 | """ |
|
94 | """ | |
94 | freqs = np.zeros(10, dtype='i4') |
|
95 | freqs = np.zeros(10, dtype='i4') | |
95 | for d in digits: |
|
96 | for d in digits: | |
96 | freqs[int(d)] += 1 |
|
97 | freqs[int(d)] += 1 | |
97 | if normalize: |
|
98 | if normalize: | |
98 | freqs = freqs/freqs.sum() |
|
99 | freqs = freqs/freqs.sum() | |
99 | return freqs |
|
100 | return freqs | |
100 |
|
101 | |||
101 | def two_digit_freqs(digits, normalize=False): |
|
102 | def two_digit_freqs(digits, normalize=False): | |
102 | """ |
|
103 | """ | |
103 | Consume digits of pi and compute 2 digits freq. counts. |
|
104 | Consume digits of pi and compute 2 digits freq. counts. | |
104 | """ |
|
105 | """ | |
105 | freqs = np.zeros(100, dtype='i4') |
|
106 | freqs = np.zeros(100, dtype='i4') | |
106 | last = next(digits) |
|
107 | last = next(digits) | |
107 | this = next(digits) |
|
108 | this = next(digits) | |
108 | for d in digits: |
|
109 | for d in digits: | |
109 | index = int(last + this) |
|
110 | index = int(last + this) | |
110 | freqs[index] += 1 |
|
111 | freqs[index] += 1 | |
111 | last = this |
|
112 | last = this | |
112 | this = d |
|
113 | this = d | |
113 | if normalize: |
|
114 | if normalize: | |
114 | freqs = freqs/freqs.sum() |
|
115 | freqs = freqs/freqs.sum() | |
115 | return freqs |
|
116 | return freqs | |
116 |
|
117 | |||
117 | def n_digit_freqs(digits, n, normalize=False): |
|
118 | def n_digit_freqs(digits, n, normalize=False): | |
118 | """ |
|
119 | """ | |
119 | Consume digits of pi and compute n digits freq. counts. |
|
120 | Consume digits of pi and compute n digits freq. counts. | |
120 |
|
121 | |||
121 | This should only be used for 1-6 digits. |
|
122 | This should only be used for 1-6 digits. | |
122 | """ |
|
123 | """ | |
123 | freqs = np.zeros(pow(10,n), dtype='i4') |
|
124 | freqs = np.zeros(pow(10,n), dtype='i4') | |
124 | current = np.zeros(n, dtype=int) |
|
125 | current = np.zeros(n, dtype=int) | |
125 | for i in range(n): |
|
126 | for i in range(n): | |
126 | current[i] = next(digits) |
|
127 | current[i] = next(digits) | |
127 | for d in digits: |
|
128 | for d in digits: | |
128 | index = int(''.join(map(str, current))) |
|
129 | index = int(''.join(map(str, current))) | |
129 | freqs[index] += 1 |
|
130 | freqs[index] += 1 | |
130 | current[0:-1] = current[1:] |
|
131 | current[0:-1] = current[1:] | |
131 | current[-1] = d |
|
132 | current[-1] = d | |
132 | if normalize: |
|
133 | if normalize: | |
133 | freqs = freqs/freqs.sum() |
|
134 | freqs = freqs/freqs.sum() | |
134 | return freqs |
|
135 | return freqs | |
135 |
|
136 | |||
136 | # Plotting functions |
|
137 | # Plotting functions | |
137 |
|
138 | |||
138 | def plot_two_digit_freqs(f2): |
|
139 | def plot_two_digit_freqs(f2): | |
139 | """ |
|
140 | """ | |
140 | Plot two digits frequency counts using matplotlib. |
|
141 | Plot two digits frequency counts using matplotlib. | |
141 | """ |
|
142 | """ | |
142 | f2_copy = f2.copy() |
|
143 | f2_copy = f2.copy() | |
143 | f2_copy.shape = (10,10) |
|
144 | f2_copy.shape = (10,10) | |
144 | ax = plt.matshow(f2_copy) |
|
145 | ax = plt.matshow(f2_copy) | |
145 | plt.colorbar() |
|
146 | plt.colorbar() | |
146 | for i in range(10): |
|
147 | for i in range(10): | |
147 | for j in range(10): |
|
148 | for j in range(10): | |
148 | plt.text(i-0.2, j+0.2, str(j)+str(i)) |
|
149 | plt.text(i-0.2, j+0.2, str(j)+str(i)) | |
149 | plt.ylabel('First digit') |
|
150 | plt.ylabel('First digit') | |
150 | plt.xlabel('Second digit') |
|
151 | plt.xlabel('Second digit') | |
151 | return ax |
|
152 | return ax | |
152 |
|
153 | |||
153 | def plot_one_digit_freqs(f1): |
|
154 | def plot_one_digit_freqs(f1): | |
154 | """ |
|
155 | """ | |
155 | Plot one digit frequency counts using matplotlib. |
|
156 | Plot one digit frequency counts using matplotlib. | |
156 | """ |
|
157 | """ | |
157 | ax = plt.plot(f1,'bo-') |
|
158 | ax = plt.plot(f1,'bo-') | |
158 | plt.title('Single digit counts in pi') |
|
159 | plt.title('Single digit counts in pi') | |
159 | plt.xlabel('Digit') |
|
160 | plt.xlabel('Digit') | |
160 | plt.ylabel('Count') |
|
161 | plt.ylabel('Count') | |
161 | return ax |
|
162 | return ax |
General Comments 0
You need to be logged in to leave comments.
Login now