##// END OF EJS Templates
Adding pidigits.py and parallelpi.py to examples.
bgranger -
Show More
@@ -0,0 +1,67 b''
1 """Calculate statistics on the digits of pi in parallel.
2
3 This program uses the functions in :file:`pidigits.py` to calculate
4 the frequencies of 2 digit sequences in the digits of pi. The
5 results are plotted using matplotlib.
6
7 To run, text files from http://www.super-computing.org/
8 must be installed in the working directory of the IPython engines.
9 The actual filenames to be used can be set with the ``filestring``
10 variable below.
11
12 The dataset we have been using for this is the 200 million digit one here:
13 ftp://pi.super-computing.org/.2/pi200m/
14 """
15
16 from IPython.kernel import client
17 from matplotlib import pyplot as plt
18 import numpy as np
19 from pidigits import *
20 from timeit import default_timer as clock
21
22
23 # Files with digits of pi (10m digits each)
24 filestring = 'pi200m-ascii-%(i)02dof20.txt'
25 files = [filestring % {'i':i} for i in range(1,16)]
26
27
28 # A function for reducing the frequencies calculated
29 # by different engines.
30 def reduce_freqs(freqlist):
31 allfreqs = np.zeros_like(freqlist[0])
32 for f in freqlist:
33 allfreqs += f
34 return allfreqs
35
36
37 # Connect to the IPython cluster
38 mec = client.MultiEngineClient(profile='mycluster')
39 mec.run('pidigits.py')
40
41
42 # Run 10m digits on 1 engine
43 mapper = mec.mapper(targets=0)
44 t1 = clock()
45
46 freqs10m = mapper.map(compute_two_digit_freqs, files[:1])[0]
47
48 t2 = clock()
49 digits_per_second1 = 10.0e6/(t2-t1)
50 print "Digits per second (1 core, 10m digits): ", digits_per_second1
51
52
53 # Run 150m digits on 15 engines (8 cores)
54 t1 = clock()
55
56 freqs_all = mec.map(compute_two_digit_freqs, files[:len(mec)])
57 freqs150m = reduce_freqs(freqs_all)
58
59 t2 = clock()
60 digits_per_second8 = 150.0e6/(t2-t1)
61 print "Digits per second (8 cores, 150m digits): ", digits_per_second8
62
63 print "Speedup: ", digits_per_second8/digits_per_second1
64
65 plot_two_digit_freqs(freqs150m)
66 plt.title("2 digit sequences in 150m digits of pi")
67
@@ -0,0 +1,126 b''
1 """Compute statistics on the digits of pi.
2
3 This uses precomputed digits of pi from the website
4 of Professor Yasumasa Kanada at the University of
5 Tokoyo: http://www.super-computing.org/
6
7 Currently, there are only functions to read the
8 .txt (non-compressed, non-binary) files, but adding
9 support for compression and binary files would be
10 straightforward.
11
12 This focuses on computing the number of times that
13 all 1, 2, n digits sequences occur in the digits of pi.
14 If the digits of pi are truly random, these frequencies
15 should be equal.
16 """
17
18 # Import statements
19
20 from __future__ import division, with_statement
21 import numpy as np
22 from matplotlib import pyplot as plt
23
24 # Top-level functions
25
26 def compute_one_digit_freqs(filename):
27 d = txt_file_to_digits(filename)
28 freqs = one_digit_freqs(d)
29 return freqs
30
31 def compute_two_digit_freqs(filename):
32 d = txt_file_to_digits(filename)
33 freqs = two_digit_freqs(d)
34 return freqs
35
36 def compute_n_digit_freqs(filename, n):
37 d = txt_file_to_digits(filename)
38 freqs = n_digit_freqs(d, n)
39 return freqs
40
41 # Read digits from a txt file
42
43 def txt_file_to_digits(filename, the_type=str):
44 """
45 Yield the digits of pi read from a .txt file.
46 """
47 with open(filename, 'r') as f:
48 for line in f.readlines():
49 for c in line:
50 if c != '\n' and c!= ' ':
51 yield the_type(c)
52
53 # Actual counting functions
54
55 def one_digit_freqs(digits, normalize=False):
56 """
57 Consume digits of pi and compute 1 digit freq. counts.
58 """
59 freqs = np.zeros(10, dtype='i4')
60 for d in digits:
61 freqs[int(d)] += 1
62 if normalize:
63 freqs = freqs/freqs.sum()
64 return freqs
65
66 def two_digit_freqs(digits, normalize=False):
67 """
68 Consume digits of pi and compute 2 digits freq. counts.
69 """
70 freqs = np.zeros(100, dtype='i4')
71 last = digits.next()
72 this = digits.next()
73 for d in digits:
74 index = int(last + this)
75 freqs[index] += 1
76 last = this
77 this = d
78 if normalize:
79 freqs = freqs/freqs.sum()
80 return freqs
81
82 def n_digit_freqs(digits, n, normalize=False):
83 """
84 Consume digits of pi and compute n digits freq. counts.
85
86 This should only be used for 1-6 digits.
87 """
88 freqs = np.zeros(pow(10,n), dtype='i4')
89 current = np.zeros(n, dtype=int)
90 for i in range(n):
91 current[i] = digits.next()
92 for d in digits:
93 index = int(''.join(map(str, current)))
94 freqs[index] += 1
95 current[0:-1] = current[1:]
96 current[-1] = d
97 if normalize:
98 freqs = freqs/freqs.sum()
99 return freqs
100
101 # Plotting functions
102
103 def plot_two_digit_freqs(f2):
104 """
105 Plot two digits frequency counts using matplotlib.
106 """
107 f2_copy = f2.copy()
108 f2_copy.shape = (10,10)
109 ax = plt.matshow(f2_copy)
110 plt.colorbar()
111 for i in range(10):
112 for j in range(10):
113 plt.text(i-0.2, j+0.2, str(j)+str(i))
114 plt.ylabel('First digit')
115 plt.xlabel('Second digit')
116 return ax
117
118 def plot_one_digit_freqs(f1):
119 """
120 Plot one digit frequency counts using matplotlib.
121 """
122 ax = plt.plot(f1,'bo-')
123 plt.title('Single digit counts in pi')
124 plt.xlabel('Digit')
125 plt.ylabel('Count')
126 return ax
General Comments 0
You need to be logged in to leave comments. Login now