diff --git a/docs/examples/kernel/parallelpi.py b/docs/examples/kernel/parallelpi.py new file mode 100644 index 0000000..8ed6952 --- /dev/null +++ b/docs/examples/kernel/parallelpi.py @@ -0,0 +1,67 @@ +"""Calculate statistics on the digits of pi in parallel. + +This program uses the functions in :file:`pidigits.py` to calculate +the frequencies of 2 digit sequences in the digits of pi. The +results are plotted using matplotlib. + +To run, text files from http://www.super-computing.org/ +must be installed in the working directory of the IPython engines. +The actual filenames to be used can be set with the ``filestring`` +variable below. + +The dataset we have been using for this is the 200 million digit one here: +ftp://pi.super-computing.org/.2/pi200m/ +""" + +from IPython.kernel import client +from matplotlib import pyplot as plt +import numpy as np +from pidigits import * +from timeit import default_timer as clock + + +# Files with digits of pi (10m digits each) +filestring = 'pi200m-ascii-%(i)02dof20.txt' +files = [filestring % {'i':i} for i in range(1,16)] + + +# A function for reducing the frequencies calculated +# by different engines. +def reduce_freqs(freqlist): + allfreqs = np.zeros_like(freqlist[0]) + for f in freqlist: + allfreqs += f + return allfreqs + + +# Connect to the IPython cluster +mec = client.MultiEngineClient(profile='mycluster') +mec.run('pidigits.py') + + +# Run 10m digits on 1 engine +mapper = mec.mapper(targets=0) +t1 = clock() + +freqs10m = mapper.map(compute_two_digit_freqs, files[:1])[0] + +t2 = clock() +digits_per_second1 = 10.0e6/(t2-t1) +print "Digits per second (1 core, 10m digits): ", digits_per_second1 + + +# Run 150m digits on 15 engines (8 cores) +t1 = clock() + +freqs_all = mec.map(compute_two_digit_freqs, files[:len(mec)]) +freqs150m = reduce_freqs(freqs_all) + +t2 = clock() +digits_per_second8 = 150.0e6/(t2-t1) +print "Digits per second (8 cores, 150m digits): ", digits_per_second8 + +print "Speedup: ", digits_per_second8/digits_per_second1 + +plot_two_digit_freqs(freqs150m) +plt.title("2 digit sequences in 150m digits of pi") + diff --git a/docs/examples/kernel/pidigits.py b/docs/examples/kernel/pidigits.py new file mode 100644 index 0000000..9a85f58 --- /dev/null +++ b/docs/examples/kernel/pidigits.py @@ -0,0 +1,126 @@ +"""Compute statistics on the digits of pi. + +This uses precomputed digits of pi from the website +of Professor Yasumasa Kanada at the University of +Tokoyo: http://www.super-computing.org/ + +Currently, there are only functions to read the +.txt (non-compressed, non-binary) files, but adding +support for compression and binary files would be +straightforward. + +This focuses on computing the number of times that +all 1, 2, n digits sequences occur in the digits of pi. +If the digits of pi are truly random, these frequencies +should be equal. +""" + +# Import statements + +from __future__ import division, with_statement +import numpy as np +from matplotlib import pyplot as plt + +# Top-level functions + +def compute_one_digit_freqs(filename): + d = txt_file_to_digits(filename) + freqs = one_digit_freqs(d) + return freqs + +def compute_two_digit_freqs(filename): + d = txt_file_to_digits(filename) + freqs = two_digit_freqs(d) + return freqs + +def compute_n_digit_freqs(filename, n): + d = txt_file_to_digits(filename) + freqs = n_digit_freqs(d, n) + return freqs + +# Read digits from a txt file + +def txt_file_to_digits(filename, the_type=str): + """ + Yield the digits of pi read from a .txt file. + """ + with open(filename, 'r') as f: + for line in f.readlines(): + for c in line: + if c != '\n' and c!= ' ': + yield the_type(c) + +# Actual counting functions + +def one_digit_freqs(digits, normalize=False): + """ + Consume digits of pi and compute 1 digit freq. counts. + """ + freqs = np.zeros(10, dtype='i4') + for d in digits: + freqs[int(d)] += 1 + if normalize: + freqs = freqs/freqs.sum() + return freqs + +def two_digit_freqs(digits, normalize=False): + """ + Consume digits of pi and compute 2 digits freq. counts. + """ + freqs = np.zeros(100, dtype='i4') + last = digits.next() + this = digits.next() + for d in digits: + index = int(last + this) + freqs[index] += 1 + last = this + this = d + if normalize: + freqs = freqs/freqs.sum() + return freqs + +def n_digit_freqs(digits, n, normalize=False): + """ + Consume digits of pi and compute n digits freq. counts. + + This should only be used for 1-6 digits. + """ + freqs = np.zeros(pow(10,n), dtype='i4') + current = np.zeros(n, dtype=int) + for i in range(n): + current[i] = digits.next() + for d in digits: + index = int(''.join(map(str, current))) + freqs[index] += 1 + current[0:-1] = current[1:] + current[-1] = d + if normalize: + freqs = freqs/freqs.sum() + return freqs + +# Plotting functions + +def plot_two_digit_freqs(f2): + """ + Plot two digits frequency counts using matplotlib. + """ + f2_copy = f2.copy() + f2_copy.shape = (10,10) + ax = plt.matshow(f2_copy) + plt.colorbar() + for i in range(10): + for j in range(10): + plt.text(i-0.2, j+0.2, str(j)+str(i)) + plt.ylabel('First digit') + plt.xlabel('Second digit') + return ax + +def plot_one_digit_freqs(f1): + """ + Plot one digit frequency counts using matplotlib. + """ + ax = plt.plot(f1,'bo-') + plt.title('Single digit counts in pi') + plt.xlabel('Digit') + plt.ylabel('Count') + return ax