Show More
@@ -0,0 +1,144 | |||
|
1 | #!/usr/bin/env python | |
|
2 | """Run a Monte-Carlo options pricer in parallel.""" | |
|
3 | ||
|
4 | #----------------------------------------------------------------------------- | |
|
5 | # Imports | |
|
6 | #----------------------------------------------------------------------------- | |
|
7 | ||
|
8 | import sys | |
|
9 | import time | |
|
10 | from IPython.zmq.parallel import client | |
|
11 | import numpy as np | |
|
12 | from mcpricer import price_options | |
|
13 | from matplotlib import pyplot as plt | |
|
14 | ||
|
15 | #----------------------------------------------------------------------------- | |
|
16 | # Setup parameters for the run | |
|
17 | #----------------------------------------------------------------------------- | |
|
18 | ||
|
19 | def ask_question(text, the_type, default): | |
|
20 | s = '%s [%r]: ' % (text, the_type(default)) | |
|
21 | result = raw_input(s) | |
|
22 | if result: | |
|
23 | return the_type(result) | |
|
24 | else: | |
|
25 | return the_type(default) | |
|
26 | ||
|
27 | cluster_profile = ask_question("Cluster profile", str, "default") | |
|
28 | price = ask_question("Initial price", float, 100.0) | |
|
29 | rate = ask_question("Interest rate", float, 0.05) | |
|
30 | days = ask_question("Days to expiration", int, 260) | |
|
31 | paths = ask_question("Number of MC paths", int, 10000) | |
|
32 | n_strikes = ask_question("Number of strike values", int, 5) | |
|
33 | min_strike = ask_question("Min strike price", float, 90.0) | |
|
34 | max_strike = ask_question("Max strike price", float, 110.0) | |
|
35 | n_sigmas = ask_question("Number of volatility values", int, 5) | |
|
36 | min_sigma = ask_question("Min volatility", float, 0.1) | |
|
37 | max_sigma = ask_question("Max volatility", float, 0.4) | |
|
38 | ||
|
39 | strike_vals = np.linspace(min_strike, max_strike, n_strikes) | |
|
40 | sigma_vals = np.linspace(min_sigma, max_sigma, n_sigmas) | |
|
41 | ||
|
42 | #----------------------------------------------------------------------------- | |
|
43 | # Setup for parallel calculation | |
|
44 | #----------------------------------------------------------------------------- | |
|
45 | ||
|
46 | # The Client is used to setup the calculation and works with all | |
|
47 | # engines. | |
|
48 | c = client.Client(profile=cluster_profile) | |
|
49 | ||
|
50 | # A LoadBalancedView is an interface to the engines that provides dynamic load | |
|
51 | # balancing at the expense of not knowing which engine will execute the code. | |
|
52 | view = c[None] | |
|
53 | ||
|
54 | # Initialize the common code on the engines. This Python module has the | |
|
55 | # price_options function that prices the options. | |
|
56 | ||
|
57 | #----------------------------------------------------------------------------- | |
|
58 | # Perform parallel calculation | |
|
59 | #----------------------------------------------------------------------------- | |
|
60 | ||
|
61 | print "Running parallel calculation over strike prices and volatilities..." | |
|
62 | print "Strike prices: ", strike_vals | |
|
63 | print "Volatilities: ", sigma_vals | |
|
64 | sys.stdout.flush() | |
|
65 | ||
|
66 | # Submit tasks to the TaskClient for each (strike, sigma) pair as a MapTask. | |
|
67 | t1 = time.time() | |
|
68 | async_results = [] | |
|
69 | for strike in strike_vals: | |
|
70 | for sigma in sigma_vals: | |
|
71 | ar = view.apply_async(price_options, price, strike, sigma, rate, days, paths) | |
|
72 | async_results.append(ar) | |
|
73 | ||
|
74 | print "Submitted tasks: ", len(async_results) | |
|
75 | sys.stdout.flush() | |
|
76 | ||
|
77 | # Block until all tasks are completed. | |
|
78 | c.barrier(async_results) | |
|
79 | t2 = time.time() | |
|
80 | t = t2-t1 | |
|
81 | ||
|
82 | print "Parallel calculation completed, time = %s s" % t | |
|
83 | print "Collecting results..." | |
|
84 | ||
|
85 | # Get the results using TaskClient.get_task_result. | |
|
86 | results = [ar.get() for ar in async_results] | |
|
87 | ||
|
88 | # Assemble the result into a structured NumPy array. | |
|
89 | prices = np.empty(n_strikes*n_sigmas, | |
|
90 | dtype=[('ecall',float),('eput',float),('acall',float),('aput',float)] | |
|
91 | ) | |
|
92 | ||
|
93 | for i, price in enumerate(results): | |
|
94 | prices[i] = tuple(price) | |
|
95 | ||
|
96 | prices.shape = (n_strikes, n_sigmas) | |
|
97 | strike_mesh, sigma_mesh = np.meshgrid(strike_vals, sigma_vals) | |
|
98 | ||
|
99 | print "Results are available: strike_mesh, sigma_mesh, prices" | |
|
100 | print "To plot results type 'plot_options(sigma_mesh, strike_mesh, prices)'" | |
|
101 | ||
|
102 | #----------------------------------------------------------------------------- | |
|
103 | # Utilities | |
|
104 | #----------------------------------------------------------------------------- | |
|
105 | ||
|
106 | def plot_options(sigma_mesh, strike_mesh, prices): | |
|
107 | """ | |
|
108 | Make a contour plot of the option price in (sigma, strike) space. | |
|
109 | """ | |
|
110 | plt.figure(1) | |
|
111 | ||
|
112 | plt.subplot(221) | |
|
113 | plt.contourf(sigma_mesh, strike_mesh, prices['ecall']) | |
|
114 | plt.axis('tight') | |
|
115 | plt.colorbar() | |
|
116 | plt.title('European Call') | |
|
117 | plt.ylabel("Strike Price") | |
|
118 | ||
|
119 | plt.subplot(222) | |
|
120 | plt.contourf(sigma_mesh, strike_mesh, prices['acall']) | |
|
121 | plt.axis('tight') | |
|
122 | plt.colorbar() | |
|
123 | plt.title("Asian Call") | |
|
124 | ||
|
125 | plt.subplot(223) | |
|
126 | plt.contourf(sigma_mesh, strike_mesh, prices['eput']) | |
|
127 | plt.axis('tight') | |
|
128 | plt.colorbar() | |
|
129 | plt.title("European Put") | |
|
130 | plt.xlabel("Volatility") | |
|
131 | plt.ylabel("Strike Price") | |
|
132 | ||
|
133 | plt.subplot(224) | |
|
134 | plt.contourf(sigma_mesh, strike_mesh, prices['aput']) | |
|
135 | plt.axis('tight') | |
|
136 | plt.colorbar() | |
|
137 | plt.title("Asian Put") | |
|
138 | plt.xlabel("Volatility") | |
|
139 | ||
|
140 | ||
|
141 | ||
|
142 | ||
|
143 | ||
|
144 |
@@ -0,0 +1,45 | |||
|
1 | ||
|
2 | def price_options(S=100.0, K=100.0, sigma=0.25, r=0.05, days=260, paths=10000): | |
|
3 | """ | |
|
4 | Price European and Asian options using a Monte Carlo method. | |
|
5 | ||
|
6 | Parameters | |
|
7 | ---------- | |
|
8 | S : float | |
|
9 | The initial price of the stock. | |
|
10 | K : float | |
|
11 | The strike price of the option. | |
|
12 | sigma : float | |
|
13 | The volatility of the stock. | |
|
14 | r : float | |
|
15 | The risk free interest rate. | |
|
16 | days : int | |
|
17 | The number of days until the option expires. | |
|
18 | paths : int | |
|
19 | The number of Monte Carlo paths used to price the option. | |
|
20 | ||
|
21 | Returns | |
|
22 | ------- | |
|
23 | A tuple of (E. call, E. put, A. call, A. put) option prices. | |
|
24 | """ | |
|
25 | import numpy as np | |
|
26 | from math import exp,sqrt | |
|
27 | ||
|
28 | h = 1.0/days | |
|
29 | const1 = exp((r-0.5*sigma**2)*h) | |
|
30 | const2 = sigma*sqrt(h) | |
|
31 | stock_price = S*np.ones(paths, dtype='float64') | |
|
32 | stock_price_sum = np.zeros(paths, dtype='float64') | |
|
33 | for j in range(days): | |
|
34 | growth_factor = const1*np.exp(const2*np.random.standard_normal(paths)) | |
|
35 | stock_price = stock_price*growth_factor | |
|
36 | stock_price_sum = stock_price_sum + stock_price | |
|
37 | stock_price_avg = stock_price_sum/days | |
|
38 | zeros = np.zeros(paths, dtype='float64') | |
|
39 | r_factor = exp(-r*h*days) | |
|
40 | euro_put = r_factor*np.mean(np.maximum(zeros, K-stock_price)) | |
|
41 | asian_put = r_factor*np.mean(np.maximum(zeros, K-stock_price_avg)) | |
|
42 | euro_call = r_factor*np.mean(np.maximum(zeros, stock_price-K)) | |
|
43 | asian_call = r_factor*np.mean(np.maximum(zeros, stock_price_avg-K)) | |
|
44 | return (euro_call, euro_put, asian_call, asian_put) | |
|
45 |
@@ -0,0 +1,63 | |||
|
1 | """Calculate statistics on the digits of pi in parallel. | |
|
2 | ||
|
3 | This program uses the functions in :file:`pidigits.py` to calculate | |
|
4 | the frequencies of 2 digit sequences in the digits of pi. The | |
|
5 | results are plotted using matplotlib. | |
|
6 | ||
|
7 | To run, text files from http://www.super-computing.org/ | |
|
8 | must be installed in the working directory of the IPython engines. | |
|
9 | The actual filenames to be used can be set with the ``filestring`` | |
|
10 | variable below. | |
|
11 | ||
|
12 | The dataset we have been using for this is the 200 million digit one here: | |
|
13 | ftp://pi.super-computing.org/.2/pi200m/ | |
|
14 | ||
|
15 | and the files used will be downloaded if they are not in the working directory | |
|
16 | of the IPython engines. | |
|
17 | """ | |
|
18 | ||
|
19 | from IPython.zmq.parallel import client | |
|
20 | from matplotlib import pyplot as plt | |
|
21 | import numpy as np | |
|
22 | from pidigits import * | |
|
23 | from timeit import default_timer as clock | |
|
24 | ||
|
25 | # Files with digits of pi (10m digits each) | |
|
26 | filestring = 'pi200m.ascii.%(i)02dof20' | |
|
27 | files = [filestring % {'i':i} for i in range(1,16)] | |
|
28 | ||
|
29 | # Connect to the IPython cluster | |
|
30 | c = client.Client() | |
|
31 | c.run('pidigits.py') | |
|
32 | ||
|
33 | # the number of engines | |
|
34 | n = len(c.ids) | |
|
35 | id0 = list(c.ids)[0] | |
|
36 | # fetch the pi-files | |
|
37 | print "downloading %i files of pi"%n | |
|
38 | c.map(fetch_pi_file, files[:n]) | |
|
39 | print "done" | |
|
40 | ||
|
41 | # Run 10m digits on 1 engine | |
|
42 | t1 = clock() | |
|
43 | freqs10m = c[id0].apply_sync_bound(compute_two_digit_freqs, files[0]) | |
|
44 | t2 = clock() | |
|
45 | digits_per_second1 = 10.0e6/(t2-t1) | |
|
46 | print "Digits per second (1 core, 10m digits): ", digits_per_second1 | |
|
47 | ||
|
48 | ||
|
49 | # Run n*10m digits on all engines | |
|
50 | t1 = clock() | |
|
51 | c.block=True | |
|
52 | freqs_all = c.map(compute_two_digit_freqs, files[:n]) | |
|
53 | freqs150m = reduce_freqs(freqs_all) | |
|
54 | t2 = clock() | |
|
55 | digits_per_second8 = n*10.0e6/(t2-t1) | |
|
56 | print "Digits per second (%i engines, %i0m digits): "%(n,n), digits_per_second8 | |
|
57 | ||
|
58 | print "Speedup: ", digits_per_second8/digits_per_second1 | |
|
59 | ||
|
60 | plot_two_digit_freqs(freqs150m) | |
|
61 | plt.title("2 digit sequences in %i0m digits of pi"%n) | |
|
62 | plt.show() | |
|
63 |
@@ -0,0 +1,159 | |||
|
1 | """Compute statistics on the digits of pi. | |
|
2 | ||
|
3 | This uses precomputed digits of pi from the website | |
|
4 | of Professor Yasumasa Kanada at the University of | |
|
5 | Tokoyo: http://www.super-computing.org/ | |
|
6 | ||
|
7 | Currently, there are only functions to read the | |
|
8 | .txt (non-compressed, non-binary) files, but adding | |
|
9 | support for compression and binary files would be | |
|
10 | straightforward. | |
|
11 | ||
|
12 | This focuses on computing the number of times that | |
|
13 | all 1, 2, n digits sequences occur in the digits of pi. | |
|
14 | If the digits of pi are truly random, these frequencies | |
|
15 | should be equal. | |
|
16 | """ | |
|
17 | ||
|
18 | # Import statements | |
|
19 | from __future__ import division, with_statement | |
|
20 | ||
|
21 | import os | |
|
22 | import urllib | |
|
23 | ||
|
24 | import numpy as np | |
|
25 | from matplotlib import pyplot as plt | |
|
26 | ||
|
27 | # Top-level functions | |
|
28 | ||
|
29 | def fetch_pi_file(filename): | |
|
30 | """This will download a segment of pi from super-computing.org | |
|
31 | if the file is not already present. | |
|
32 | """ | |
|
33 | ftpdir="ftp://pi.super-computing.org/.2/pi200m/" | |
|
34 | if os.path.exists(filename): | |
|
35 | # we already have it | |
|
36 | return | |
|
37 | else: | |
|
38 | # download it | |
|
39 | urllib.urlretrieve(ftpdir+filename,filename) | |
|
40 | ||
|
41 | def compute_one_digit_freqs(filename): | |
|
42 | """ | |
|
43 | Read digits of pi from a file and compute the 1 digit frequencies. | |
|
44 | """ | |
|
45 | d = txt_file_to_digits(filename) | |
|
46 | freqs = one_digit_freqs(d) | |
|
47 | return freqs | |
|
48 | ||
|
49 | def compute_two_digit_freqs(filename): | |
|
50 | """ | |
|
51 | Read digits of pi from a file and compute the 2 digit frequencies. | |
|
52 | """ | |
|
53 | d = txt_file_to_digits(filename) | |
|
54 | freqs = two_digit_freqs(d) | |
|
55 | return freqs | |
|
56 | ||
|
57 | def reduce_freqs(freqlist): | |
|
58 | """ | |
|
59 | Add up a list of freq counts to get the total counts. | |
|
60 | """ | |
|
61 | allfreqs = np.zeros_like(freqlist[0]) | |
|
62 | for f in freqlist: | |
|
63 | allfreqs += f | |
|
64 | return allfreqs | |
|
65 | ||
|
66 | def compute_n_digit_freqs(filename, n): | |
|
67 | """ | |
|
68 | Read digits of pi from a file and compute the n digit frequencies. | |
|
69 | """ | |
|
70 | d = txt_file_to_digits(filename) | |
|
71 | freqs = n_digit_freqs(d, n) | |
|
72 | return freqs | |
|
73 | ||
|
74 | # Read digits from a txt file | |
|
75 | ||
|
76 | def txt_file_to_digits(filename, the_type=str): | |
|
77 | """ | |
|
78 | Yield the digits of pi read from a .txt file. | |
|
79 | """ | |
|
80 | with open(filename, 'r') as f: | |
|
81 | for line in f.readlines(): | |
|
82 | for c in line: | |
|
83 | if c != '\n' and c!= ' ': | |
|
84 | yield the_type(c) | |
|
85 | ||
|
86 | # Actual counting functions | |
|
87 | ||
|
88 | def one_digit_freqs(digits, normalize=False): | |
|
89 | """ | |
|
90 | Consume digits of pi and compute 1 digit freq. counts. | |
|
91 | """ | |
|
92 | freqs = np.zeros(10, dtype='i4') | |
|
93 | for d in digits: | |
|
94 | freqs[int(d)] += 1 | |
|
95 | if normalize: | |
|
96 | freqs = freqs/freqs.sum() | |
|
97 | return freqs | |
|
98 | ||
|
99 | def two_digit_freqs(digits, normalize=False): | |
|
100 | """ | |
|
101 | Consume digits of pi and compute 2 digits freq. counts. | |
|
102 | """ | |
|
103 | freqs = np.zeros(100, dtype='i4') | |
|
104 | last = digits.next() | |
|
105 | this = digits.next() | |
|
106 | for d in digits: | |
|
107 | index = int(last + this) | |
|
108 | freqs[index] += 1 | |
|
109 | last = this | |
|
110 | this = d | |
|
111 | if normalize: | |
|
112 | freqs = freqs/freqs.sum() | |
|
113 | return freqs | |
|
114 | ||
|
115 | def n_digit_freqs(digits, n, normalize=False): | |
|
116 | """ | |
|
117 | Consume digits of pi and compute n digits freq. counts. | |
|
118 | ||
|
119 | This should only be used for 1-6 digits. | |
|
120 | """ | |
|
121 | freqs = np.zeros(pow(10,n), dtype='i4') | |
|
122 | current = np.zeros(n, dtype=int) | |
|
123 | for i in range(n): | |
|
124 | current[i] = digits.next() | |
|
125 | for d in digits: | |
|
126 | index = int(''.join(map(str, current))) | |
|
127 | freqs[index] += 1 | |
|
128 | current[0:-1] = current[1:] | |
|
129 | current[-1] = d | |
|
130 | if normalize: | |
|
131 | freqs = freqs/freqs.sum() | |
|
132 | return freqs | |
|
133 | ||
|
134 | # Plotting functions | |
|
135 | ||
|
136 | def plot_two_digit_freqs(f2): | |
|
137 | """ | |
|
138 | Plot two digits frequency counts using matplotlib. | |
|
139 | """ | |
|
140 | f2_copy = f2.copy() | |
|
141 | f2_copy.shape = (10,10) | |
|
142 | ax = plt.matshow(f2_copy) | |
|
143 | plt.colorbar() | |
|
144 | for i in range(10): | |
|
145 | for j in range(10): | |
|
146 | plt.text(i-0.2, j+0.2, str(j)+str(i)) | |
|
147 | plt.ylabel('First digit') | |
|
148 | plt.xlabel('Second digit') | |
|
149 | return ax | |
|
150 | ||
|
151 | def plot_one_digit_freqs(f1): | |
|
152 | """ | |
|
153 | Plot one digit frequency counts using matplotlib. | |
|
154 | """ | |
|
155 | ax = plt.plot(f1,'bo-') | |
|
156 | plt.title('Single digit counts in pi') | |
|
157 | plt.xlabel('Digit') | |
|
158 | plt.ylabel('Count') | |
|
159 | return ax |
@@ -1,286 +1,290 | |||
|
1 | 1 | ================= |
|
2 | 2 | Parallel examples |
|
3 | 3 | ================= |
|
4 | 4 | |
|
5 | 5 | .. note:: |
|
6 | 6 | |
|
7 | Not adapted to zmq yet | |
|
7 | Performance numbers from ``IPython.kernel``, not newparallel | |
|
8 | 8 | |
|
9 | 9 | In this section we describe two more involved examples of using an IPython |
|
10 | 10 | cluster to perform a parallel computation. In these examples, we will be using |
|
11 | 11 | IPython's "pylab" mode, which enables interactive plotting using the |
|
12 | 12 | Matplotlib package. IPython can be started in this mode by typing:: |
|
13 | 13 | |
|
14 |
ipython - |
|
|
14 | ipython --pylab | |
|
15 | 15 | |
|
16 | 16 | at the system command line. If this prints an error message, you will |
|
17 | 17 | need to install the default profiles from within IPython by doing, |
|
18 | 18 | |
|
19 | 19 | .. sourcecode:: ipython |
|
20 | 20 | |
|
21 | 21 | In [1]: %install_profiles |
|
22 | 22 | |
|
23 | 23 | and then restarting IPython. |
|
24 | 24 | |
|
25 | 25 | 150 million digits of pi |
|
26 | 26 | ======================== |
|
27 | 27 | |
|
28 | 28 | In this example we would like to study the distribution of digits in the |
|
29 | 29 | number pi (in base 10). While it is not known if pi is a normal number (a |
|
30 | 30 | number is normal in base 10 if 0-9 occur with equal likelihood) numerical |
|
31 | 31 | investigations suggest that it is. We will begin with a serial calculation on |
|
32 | 32 | 10,000 digits of pi and then perform a parallel calculation involving 150 |
|
33 | 33 | million digits. |
|
34 | 34 | |
|
35 | 35 | In both the serial and parallel calculation we will be using functions defined |
|
36 | 36 | in the :file:`pidigits.py` file, which is available in the |
|
37 | 37 | :file:`docs/examples/kernel` directory of the IPython source distribution. |
|
38 | 38 | These functions provide basic facilities for working with the digits of pi and |
|
39 | 39 | can be loaded into IPython by putting :file:`pidigits.py` in your current |
|
40 | 40 | working directory and then doing: |
|
41 | 41 | |
|
42 | 42 | .. sourcecode:: ipython |
|
43 | 43 | |
|
44 | 44 | In [1]: run pidigits.py |
|
45 | 45 | |
|
46 | 46 | Serial calculation |
|
47 | 47 | ------------------ |
|
48 | 48 | |
|
49 | 49 | For the serial calculation, we will use SymPy (http://www.sympy.org) to |
|
50 | 50 | calculate 10,000 digits of pi and then look at the frequencies of the digits |
|
51 | 51 | 0-9. Out of 10,000 digits, we expect each digit to occur 1,000 times. While |
|
52 | 52 | SymPy is capable of calculating many more digits of pi, our purpose here is to |
|
53 | 53 | set the stage for the much larger parallel calculation. |
|
54 | 54 | |
|
55 | 55 | In this example, we use two functions from :file:`pidigits.py`: |
|
56 | 56 | :func:`one_digit_freqs` (which calculates how many times each digit occurs) |
|
57 | 57 | and :func:`plot_one_digit_freqs` (which uses Matplotlib to plot the result). |
|
58 | 58 | Here is an interactive IPython session that uses these functions with |
|
59 | 59 | SymPy: |
|
60 | 60 | |
|
61 | 61 | .. sourcecode:: ipython |
|
62 | 62 | |
|
63 | 63 | In [7]: import sympy |
|
64 | 64 | |
|
65 | 65 | In [8]: pi = sympy.pi.evalf(40) |
|
66 | 66 | |
|
67 | 67 | In [9]: pi |
|
68 | 68 | Out[9]: 3.141592653589793238462643383279502884197 |
|
69 | 69 | |
|
70 | 70 | In [10]: pi = sympy.pi.evalf(10000) |
|
71 | 71 | |
|
72 | 72 | In [11]: digits = (d for d in str(pi)[2:]) # create a sequence of digits |
|
73 | 73 | |
|
74 | 74 | In [12]: run pidigits.py # load one_digit_freqs/plot_one_digit_freqs |
|
75 | 75 | |
|
76 | 76 | In [13]: freqs = one_digit_freqs(digits) |
|
77 | 77 | |
|
78 | 78 | In [14]: plot_one_digit_freqs(freqs) |
|
79 | 79 | Out[14]: [<matplotlib.lines.Line2D object at 0x18a55290>] |
|
80 | 80 | |
|
81 | 81 | The resulting plot of the single digit counts shows that each digit occurs |
|
82 | 82 | approximately 1,000 times, but that with only 10,000 digits the |
|
83 | 83 | statistical fluctuations are still rather large: |
|
84 | 84 | |
|
85 | .. image:: single_digits.* | |
|
85 | .. image:: ../parallel/single_digits.* | |
|
86 | 86 | |
|
87 | 87 | It is clear that to reduce the relative fluctuations in the counts, we need |
|
88 | 88 | to look at many more digits of pi. That brings us to the parallel calculation. |
|
89 | 89 | |
|
90 | 90 | Parallel calculation |
|
91 | 91 | -------------------- |
|
92 | 92 | |
|
93 | 93 | Calculating many digits of pi is a challenging computational problem in itself. |
|
94 | 94 | Because we want to focus on the distribution of digits in this example, we |
|
95 | 95 | will use pre-computed digit of pi from the website of Professor Yasumasa |
|
96 |
Kanada at the University of Tok |
|
|
96 | Kanada at the University of Tokyo (http://www.super-computing.org). These | |
|
97 | 97 | digits come in a set of text files (ftp://pi.super-computing.org/.2/pi200m/) |
|
98 | 98 | that each have 10 million digits of pi. |
|
99 | 99 | |
|
100 | 100 | For the parallel calculation, we have copied these files to the local hard |
|
101 | 101 | drives of the compute nodes. A total of 15 of these files will be used, for a |
|
102 | 102 | total of 150 million digits of pi. To make things a little more interesting we |
|
103 | 103 | will calculate the frequencies of all 2 digits sequences (00-99) and then plot |
|
104 | 104 | the result using a 2D matrix in Matplotlib. |
|
105 | 105 | |
|
106 | 106 | The overall idea of the calculation is simple: each IPython engine will |
|
107 | 107 | compute the two digit counts for the digits in a single file. Then in a final |
|
108 | 108 | step the counts from each engine will be added up. To perform this |
|
109 | 109 | calculation, we will need two top-level functions from :file:`pidigits.py`: |
|
110 | 110 | |
|
111 |
.. literalinclude:: ../../examples/ |
|
|
111 | .. literalinclude:: ../../examples/newparallel/pidigits.py | |
|
112 | 112 | :language: python |
|
113 | 113 | :lines: 34-49 |
|
114 | 114 | |
|
115 | 115 | We will also use the :func:`plot_two_digit_freqs` function to plot the |
|
116 | 116 | results. The code to run this calculation in parallel is contained in |
|
117 |
:file:`docs/examples/ |
|
|
117 | :file:`docs/examples/newparallel/parallelpi.py`. This code can be run in parallel | |
|
118 | 118 | using IPython by following these steps: |
|
119 | 119 | |
|
120 | 1. Copy the text files with the digits of pi | |
|
121 | (ftp://pi.super-computing.org/.2/pi200m/) to the working directory of the | |
|
122 | engines on the compute nodes. | |
|
123 | 2. Use :command:`ipclusterz` to start 15 engines. We used an 8 core (2 quad | |
|
120 | 1. Use :command:`ipclusterz` to start 15 engines. We used an 8 core (2 quad | |
|
124 | 121 | core CPUs) cluster with hyperthreading enabled which makes the 8 cores |
|
125 | 122 | looks like 16 (1 controller + 15 engines) in the OS. However, the maximum |
|
126 | 123 | speedup we can observe is still only 8x. |
|
127 |
|
|
|
128 | up IPython in pylab mode and type ``run parallelpi.py``. | |
|
124 | 2. With the file :file:`parallelpi.py` in your current working directory, open | |
|
125 | up IPython in pylab mode and type ``run parallelpi.py``. This will download | |
|
126 | the pi files via ftp the first time you run it, if they are not | |
|
127 | present in the Engines' working directory. | |
|
129 | 128 | |
|
130 | 129 | When run on our 8 core cluster, we observe a speedup of 7.7x. This is slightly |
|
131 | 130 | less than linear scaling (8x) because the controller is also running on one of |
|
132 | 131 | the cores. |
|
133 | 132 | |
|
134 | 133 | To emphasize the interactive nature of IPython, we now show how the |
|
135 | 134 | calculation can also be run by simply typing the commands from |
|
136 | 135 | :file:`parallelpi.py` interactively into IPython: |
|
137 | 136 | |
|
138 | 137 | .. sourcecode:: ipython |
|
139 | 138 | |
|
140 | 139 | In [1]: from IPython.zmq.parallel import client |
|
141 | 2009-11-19 11:32:38-0800 [-] Log opened. | |
|
142 | 140 | |
|
143 |
# The |
|
|
144 |
# We simply pass |
|
|
141 | # The Client allows us to use the engines interactively. | |
|
142 | # We simply pass Client the name of the cluster profile we | |
|
145 | 143 | # are using. |
|
146 | 144 | In [2]: c = client.Client(profile='mycluster') |
|
147 | 2009-11-19 11:32:44-0800 [-] Connecting [0] | |
|
148 | 2009-11-19 11:32:44-0800 [Negotiation,client] Connected: ./ipcontroller-mec.furl | |
|
149 | 145 | |
|
150 |
In [3]: |
|
|
146 | In [3]: c.ids | |
|
151 | 147 | Out[3]: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] |
|
152 | 148 | |
|
153 | 149 | In [4]: run pidigits.py |
|
154 | 150 | |
|
155 |
In [5]: filestring = 'pi200m |
|
|
151 | In [5]: filestring = 'pi200m.ascii.%(i)02dof20' | |
|
156 | 152 | |
|
157 | 153 | # Create the list of files to process. |
|
158 | 154 | In [6]: files = [filestring % {'i':i} for i in range(1,16)] |
|
159 | 155 | |
|
160 | 156 | In [7]: files |
|
161 | 157 | Out[7]: |
|
162 |
['pi200m |
|
|
163 |
'pi200m |
|
|
164 |
'pi200m |
|
|
165 |
'pi200m |
|
|
166 |
'pi200m |
|
|
167 |
'pi200m |
|
|
168 |
'pi200m |
|
|
169 |
'pi200m |
|
|
170 |
'pi200m |
|
|
171 |
'pi200m |
|
|
172 |
'pi200m |
|
|
173 |
'pi200m |
|
|
174 |
'pi200m |
|
|
175 |
'pi200m |
|
|
176 |
'pi200m |
|
|
177 | ||
|
178 | # This is the parallel calculation using the MultiEngineClient.map method | |
|
158 | ['pi200m.ascii.01of20', | |
|
159 | 'pi200m.ascii.02of20', | |
|
160 | 'pi200m.ascii.03of20', | |
|
161 | 'pi200m.ascii.04of20', | |
|
162 | 'pi200m.ascii.05of20', | |
|
163 | 'pi200m.ascii.06of20', | |
|
164 | 'pi200m.ascii.07of20', | |
|
165 | 'pi200m.ascii.08of20', | |
|
166 | 'pi200m.ascii.09of20', | |
|
167 | 'pi200m.ascii.10of20', | |
|
168 | 'pi200m.ascii.11of20', | |
|
169 | 'pi200m.ascii.12of20', | |
|
170 | 'pi200m.ascii.13of20', | |
|
171 | 'pi200m.ascii.14of20', | |
|
172 | 'pi200m.ascii.15of20'] | |
|
173 | ||
|
174 | # download the data files if they don't already exist: | |
|
175 | In [8]: c.map(fetch_pi_file, files) | |
|
176 | ||
|
177 | # This is the parallel calculation using the Client.map method | |
|
179 | 178 | # which applies compute_two_digit_freqs to each file in files in parallel. |
|
180 |
In [ |
|
|
179 | In [9]: freqs_all = c.map(compute_two_digit_freqs, files) | |
|
181 | 180 | |
|
182 | 181 | # Add up the frequencies from each engine. |
|
183 |
In [ |
|
|
182 | In [10]: freqs = reduce_freqs(freqs_all) | |
|
184 | 183 | |
|
185 |
In [ |
|
|
186 |
Out[ |
|
|
184 | In [11]: plot_two_digit_freqs(freqs) | |
|
185 | Out[11]: <matplotlib.image.AxesImage object at 0x18beb110> | |
|
187 | 186 | |
|
188 |
In [1 |
|
|
189 |
Out[1 |
|
|
187 | In [12]: plt.title('2 digit counts of 150m digits of pi') | |
|
188 | Out[12]: <matplotlib.text.Text object at 0x18d1f9b0> | |
|
190 | 189 | |
|
191 | 190 | The resulting plot generated by Matplotlib is shown below. The colors indicate |
|
192 | 191 | which two digit sequences are more (red) or less (blue) likely to occur in the |
|
193 | 192 | first 150 million digits of pi. We clearly see that the sequence "41" is |
|
194 | 193 | most likely and that "06" and "07" are least likely. Further analysis would |
|
195 | 194 | show that the relative size of the statistical fluctuations have decreased |
|
196 | 195 | compared to the 10,000 digit calculation. |
|
197 | 196 | |
|
198 | .. image:: two_digit_counts.* | |
|
197 | .. image:: ../parallel/two_digit_counts.* | |
|
199 | 198 | |
|
200 | 199 | |
|
201 | 200 | Parallel options pricing |
|
202 | 201 | ======================== |
|
203 | 202 | |
|
204 | 203 | An option is a financial contract that gives the buyer of the contract the |
|
205 | 204 | right to buy (a "call") or sell (a "put") a secondary asset (a stock for |
|
206 | 205 | example) at a particular date in the future (the expiration date) for a |
|
207 | 206 | pre-agreed upon price (the strike price). For this right, the buyer pays the |
|
208 | 207 | seller a premium (the option price). There are a wide variety of flavors of |
|
209 | 208 | options (American, European, Asian, etc.) that are useful for different |
|
210 | 209 | purposes: hedging against risk, speculation, etc. |
|
211 | 210 | |
|
212 | 211 | Much of modern finance is driven by the need to price these contracts |
|
213 | 212 | accurately based on what is known about the properties (such as volatility) of |
|
214 | 213 | the underlying asset. One method of pricing options is to use a Monte Carlo |
|
215 | 214 | simulation of the underlying asset price. In this example we use this approach |
|
216 | 215 | to price both European and Asian (path dependent) options for various strike |
|
217 | 216 | prices and volatilities. |
|
218 | 217 | |
|
219 | 218 | The code for this example can be found in the :file:`docs/examples/kernel` |
|
220 | 219 | directory of the IPython source. The function :func:`price_options` in |
|
221 | 220 | :file:`mcpricer.py` implements the basic Monte Carlo pricing algorithm using |
|
222 | 221 | the NumPy package and is shown here: |
|
223 | 222 | |
|
224 | 223 | .. literalinclude:: ../../examples/kernel/mcpricer.py |
|
225 | 224 | :language: python |
|
226 | 225 | |
|
227 |
To run this code in parallel, we will use IPython's :class:` |
|
|
226 | To run this code in parallel, we will use IPython's :class:`LoadBalancedView` class, | |
|
228 | 227 | which distributes work to the engines using dynamic load balancing. This |
|
229 |
|
|
|
230 |
the previous example. The parallel calculation using :class:` |
|
|
228 | view is a wrapper of the :class:`Client` class shown in | |
|
229 | the previous example. The parallel calculation using :class:`LoadBalancedView` can | |
|
231 | 230 | be found in the file :file:`mcpricer.py`. The code in this file creates a |
|
232 | 231 | :class:`TaskClient` instance and then submits a set of tasks using |
|
233 | 232 | :meth:`TaskClient.run` that calculate the option prices for different |
|
234 | 233 | volatilities and strike prices. The results are then plotted as a 2D contour |
|
235 | 234 | plot using Matplotlib. |
|
236 | 235 | |
|
237 | 236 | .. literalinclude:: ../../examples/kernel/mcdriver.py |
|
238 | 237 | :language: python |
|
239 | 238 | |
|
240 | 239 | To use this code, start an IPython cluster using :command:`ipclusterz`, open |
|
241 | 240 | IPython in the pylab mode with the file :file:`mcdriver.py` in your current |
|
242 | 241 | working directory and then type: |
|
243 | 242 | |
|
244 | 243 | .. sourcecode:: ipython |
|
245 | 244 | |
|
246 | 245 | In [7]: run mcdriver.py |
|
247 | 246 | Submitted tasks: [0, 1, 2, ...] |
|
248 | 247 | |
|
249 | 248 | Once all the tasks have finished, the results can be plotted using the |
|
250 | 249 | :func:`plot_options` function. Here we make contour plots of the Asian |
|
251 | 250 | call and Asian put options as function of the volatility and strike price: |
|
252 | 251 | |
|
253 | 252 | .. sourcecode:: ipython |
|
254 | 253 | |
|
255 | 254 | In [8]: plot_options(sigma_vals, K_vals, prices['acall']) |
|
256 | 255 | |
|
257 | 256 | In [9]: plt.figure() |
|
258 | 257 | Out[9]: <matplotlib.figure.Figure object at 0x18c178d0> |
|
259 | 258 | |
|
260 | 259 | In [10]: plot_options(sigma_vals, K_vals, prices['aput']) |
|
261 | 260 | |
|
262 | 261 | These results are shown in the two figures below. On a 8 core cluster the |
|
263 | 262 | entire calculation (10 strike prices, 10 volatilities, 100,000 paths for each) |
|
264 | 263 | took 30 seconds in parallel, giving a speedup of 7.7x, which is comparable |
|
265 | 264 | to the speedup observed in our previous example. |
|
266 | 265 | |
|
267 | .. image:: asian_call.* | |
|
266 | .. image:: ../parallel/asian_call.* | |
|
268 | 267 | |
|
269 | .. image:: asian_put.* | |
|
268 | .. image:: ../parallel/asian_put.* | |
|
270 | 269 | |
|
271 | 270 | Conclusion |
|
272 | 271 | ========== |
|
273 | 272 | |
|
274 | 273 | To conclude these examples, we summarize the key features of IPython's |
|
275 | 274 | parallel architecture that have been demonstrated: |
|
276 | 275 | |
|
277 | 276 | * Serial code can be parallelized often with only a few extra lines of code. |
|
278 |
We have used the :class:` |
|
|
277 | We have used the :class:`DirectView` and :class:`LoadBalancedView` classes | |
|
279 | 278 | for this purpose. |
|
280 | 279 | * The resulting parallel code can be run without ever leaving the IPython's |
|
281 | 280 | interactive shell. |
|
282 | 281 | * Any data computed in parallel can be explored interactively through |
|
283 | 282 | visualization or further numerical calculations. |
|
284 | 283 | * We have run these examples on a cluster running Windows HPC Server 2008. |
|
285 | 284 | IPython's built in support for the Windows HPC job scheduler makes it |
|
286 | 285 | easy to get started with IPython's parallel capabilities. |
|
286 | ||
|
287 | .. note:: | |
|
288 | ||
|
289 | The newparallel code has never been run on Windows HPC Server, so the last | |
|
290 | conclusion is untested. |
@@ -1,186 +1,187 | |||
|
1 | 1 | .. _parallelmpi: |
|
2 | 2 | |
|
3 | 3 | ======================= |
|
4 | 4 | Using MPI with IPython |
|
5 | 5 | ======================= |
|
6 | 6 | |
|
7 | 7 | .. note:: |
|
8 | 8 | |
|
9 | 9 | Not adapted to zmq yet |
|
10 | This is out of date wrt ipcluster in general as well | |
|
10 | 11 | |
|
11 | 12 | Often, a parallel algorithm will require moving data between the engines. One |
|
12 | 13 | way of accomplishing this is by doing a pull and then a push using the |
|
13 | 14 | multiengine client. However, this will be slow as all the data has to go |
|
14 | 15 | through the controller to the client and then back through the controller, to |
|
15 | 16 | its final destination. |
|
16 | 17 | |
|
17 | 18 | A much better way of moving data between engines is to use a message passing |
|
18 | 19 | library, such as the Message Passing Interface (MPI) [MPI]_. IPython's |
|
19 | 20 | parallel computing architecture has been designed from the ground up to |
|
20 | 21 | integrate with MPI. This document describes how to use MPI with IPython. |
|
21 | 22 | |
|
22 | 23 | Additional installation requirements |
|
23 | 24 | ==================================== |
|
24 | 25 | |
|
25 | 26 | If you want to use MPI with IPython, you will need to install: |
|
26 | 27 | |
|
27 | 28 | * A standard MPI implementation such as OpenMPI [OpenMPI]_ or MPICH. |
|
28 | 29 | * The mpi4py [mpi4py]_ package. |
|
29 | 30 | |
|
30 | 31 | .. note:: |
|
31 | 32 | |
|
32 | 33 | The mpi4py package is not a strict requirement. However, you need to |
|
33 | 34 | have *some* way of calling MPI from Python. You also need some way of |
|
34 | 35 | making sure that :func:`MPI_Init` is called when the IPython engines start |
|
35 | 36 | up. There are a number of ways of doing this and a good number of |
|
36 | 37 | associated subtleties. We highly recommend just using mpi4py as it |
|
37 | 38 | takes care of most of these problems. If you want to do something |
|
38 | 39 | different, let us know and we can help you get started. |
|
39 | 40 | |
|
40 | 41 | Starting the engines with MPI enabled |
|
41 | 42 | ===================================== |
|
42 | 43 | |
|
43 | 44 | To use code that calls MPI, there are typically two things that MPI requires. |
|
44 | 45 | |
|
45 | 46 | 1. The process that wants to call MPI must be started using |
|
46 | 47 | :command:`mpiexec` or a batch system (like PBS) that has MPI support. |
|
47 | 48 | 2. Once the process starts, it must call :func:`MPI_Init`. |
|
48 | 49 | |
|
49 | 50 | There are a couple of ways that you can start the IPython engines and get |
|
50 | 51 | these things to happen. |
|
51 | 52 | |
|
52 | 53 | Automatic starting using :command:`mpiexec` and :command:`ipclusterz` |
|
53 | 54 | -------------------------------------------------------------------- |
|
54 | 55 | |
|
55 | 56 | The easiest approach is to use the `mpiexec` mode of :command:`ipclusterz`, |
|
56 | 57 | which will first start a controller and then a set of engines using |
|
57 | 58 | :command:`mpiexec`:: |
|
58 | 59 | |
|
59 | 60 | $ ipclusterz mpiexec -n 4 |
|
60 | 61 | |
|
61 | 62 | This approach is best as interrupting :command:`ipclusterz` will automatically |
|
62 | 63 | stop and clean up the controller and engines. |
|
63 | 64 | |
|
64 | 65 | Manual starting using :command:`mpiexec` |
|
65 | 66 | ---------------------------------------- |
|
66 | 67 | |
|
67 | 68 | If you want to start the IPython engines using the :command:`mpiexec`, just |
|
68 | 69 | do:: |
|
69 | 70 | |
|
70 | 71 | $ mpiexec -n 4 ipengine --mpi=mpi4py |
|
71 | 72 | |
|
72 | 73 | This requires that you already have a controller running and that the FURL |
|
73 | 74 | files for the engines are in place. We also have built in support for |
|
74 | 75 | PyTrilinos [PyTrilinos]_, which can be used (assuming is installed) by |
|
75 | 76 | starting the engines with:: |
|
76 | 77 | |
|
77 | 78 | mpiexec -n 4 ipengine --mpi=pytrilinos |
|
78 | 79 | |
|
79 | 80 | Automatic starting using PBS and :command:`ipclusterz` |
|
80 | 81 | ----------------------------------------------------- |
|
81 | 82 | |
|
82 | 83 | The :command:`ipclusterz` command also has built-in integration with PBS. For |
|
83 | 84 | more information on this approach, see our documentation on :ref:`ipclusterz |
|
84 | 85 | <parallel_process>`. |
|
85 | 86 | |
|
86 | 87 | Actually using MPI |
|
87 | 88 | ================== |
|
88 | 89 | |
|
89 | 90 | Once the engines are running with MPI enabled, you are ready to go. You can |
|
90 | 91 | now call any code that uses MPI in the IPython engines. And, all of this can |
|
91 | 92 | be done interactively. Here we show a simple example that uses mpi4py |
|
92 | 93 | [mpi4py]_ version 1.1.0 or later. |
|
93 | 94 | |
|
94 | 95 | First, lets define a simply function that uses MPI to calculate the sum of a |
|
95 | 96 | distributed array. Save the following text in a file called :file:`psum.py`: |
|
96 | 97 | |
|
97 | 98 | .. sourcecode:: python |
|
98 | 99 | |
|
99 | 100 | from mpi4py import MPI |
|
100 | 101 | import numpy as np |
|
101 | 102 | |
|
102 | 103 | def psum(a): |
|
103 | 104 | s = np.sum(a) |
|
104 | 105 | rcvBuf = np.array(0.0,'d') |
|
105 | 106 | MPI.COMM_WORLD.Allreduce([s, MPI.DOUBLE], |
|
106 | 107 | [rcvBuf, MPI.DOUBLE], |
|
107 | 108 | op=MPI.SUM) |
|
108 | 109 | return rcvBuf |
|
109 | 110 | |
|
110 | 111 | Now, start an IPython cluster in the same directory as :file:`psum.py`:: |
|
111 | 112 | |
|
112 | 113 | $ ipclusterz mpiexec -n 4 |
|
113 | 114 | |
|
114 | 115 | Finally, connect to the cluster and use this function interactively. In this |
|
115 | 116 | case, we create a random array on each engine and sum up all the random arrays |
|
116 | 117 | using our :func:`psum` function: |
|
117 | 118 | |
|
118 | 119 | .. sourcecode:: ipython |
|
119 | 120 | |
|
120 | 121 | In [1]: from IPython.zmq.parallel import client |
|
121 | 122 | |
|
122 | 123 | In [2]: c = client.Client() |
|
123 | 124 | |
|
124 | 125 | In [3]: mec.activate() |
|
125 | 126 | |
|
126 | 127 | In [4]: px import numpy as np |
|
127 | 128 | Parallel execution on engines: all |
|
128 | 129 | Out[4]: |
|
129 | 130 | <Results List> |
|
130 | 131 | [0] In [13]: import numpy as np |
|
131 | 132 | [1] In [13]: import numpy as np |
|
132 | 133 | [2] In [13]: import numpy as np |
|
133 | 134 | [3] In [13]: import numpy as np |
|
134 | 135 | |
|
135 | 136 | In [6]: px a = np.random.rand(100) |
|
136 | 137 | Parallel execution on engines: all |
|
137 | 138 | Out[6]: |
|
138 | 139 | <Results List> |
|
139 | 140 | [0] In [15]: a = np.random.rand(100) |
|
140 | 141 | [1] In [15]: a = np.random.rand(100) |
|
141 | 142 | [2] In [15]: a = np.random.rand(100) |
|
142 | 143 | [3] In [15]: a = np.random.rand(100) |
|
143 | 144 | |
|
144 | 145 | In [7]: px from psum import psum |
|
145 | 146 | Parallel execution on engines: all |
|
146 | 147 | Out[7]: |
|
147 | 148 | <Results List> |
|
148 | 149 | [0] In [16]: from psum import psum |
|
149 | 150 | [1] In [16]: from psum import psum |
|
150 | 151 | [2] In [16]: from psum import psum |
|
151 | 152 | [3] In [16]: from psum import psum |
|
152 | 153 | |
|
153 | 154 | In [8]: px s = psum(a) |
|
154 | 155 | Parallel execution on engines: all |
|
155 | 156 | Out[8]: |
|
156 | 157 | <Results List> |
|
157 | 158 | [0] In [17]: s = psum(a) |
|
158 | 159 | [1] In [17]: s = psum(a) |
|
159 | 160 | [2] In [17]: s = psum(a) |
|
160 | 161 | [3] In [17]: s = psum(a) |
|
161 | 162 | |
|
162 | 163 | In [9]: px print s |
|
163 | 164 | Parallel execution on engines: all |
|
164 | 165 | Out[9]: |
|
165 | 166 | <Results List> |
|
166 | 167 | [0] In [18]: print s |
|
167 | 168 | [0] Out[18]: 187.451545803 |
|
168 | 169 | |
|
169 | 170 | [1] In [18]: print s |
|
170 | 171 | [1] Out[18]: 187.451545803 |
|
171 | 172 | |
|
172 | 173 | [2] In [18]: print s |
|
173 | 174 | [2] Out[18]: 187.451545803 |
|
174 | 175 | |
|
175 | 176 | [3] In [18]: print s |
|
176 | 177 | [3] Out[18]: 187.451545803 |
|
177 | 178 | |
|
178 | 179 | Any Python code that makes calls to MPI can be used in this manner, including |
|
179 | 180 | compiled C, C++ and Fortran libraries that have been exposed to Python. |
|
180 | 181 | |
|
181 | 182 | .. [MPI] Message Passing Interface. http://www-unix.mcs.anl.gov/mpi/ |
|
182 | 183 | .. [mpi4py] MPI for Python. mpi4py: http://mpi4py.scipy.org/ |
|
183 | 184 | .. [OpenMPI] Open MPI. http://www.open-mpi.org/ |
|
184 | 185 | .. [PyTrilinos] PyTrilinos. http://trilinos.sandia.gov/packages/pytrilinos/ |
|
185 | 186 | |
|
186 | 187 |
General Comments 0
You need to be logged in to leave comments.
Login now