##// END OF EJS Templates
update parallel demos for newparallel
MinRK -
Show More
@@ -0,0 +1,144
1 #!/usr/bin/env python
2 """Run a Monte-Carlo options pricer in parallel."""
3
4 #-----------------------------------------------------------------------------
5 # Imports
6 #-----------------------------------------------------------------------------
7
8 import sys
9 import time
10 from IPython.zmq.parallel import client
11 import numpy as np
12 from mcpricer import price_options
13 from matplotlib import pyplot as plt
14
15 #-----------------------------------------------------------------------------
16 # Setup parameters for the run
17 #-----------------------------------------------------------------------------
18
19 def ask_question(text, the_type, default):
20 s = '%s [%r]: ' % (text, the_type(default))
21 result = raw_input(s)
22 if result:
23 return the_type(result)
24 else:
25 return the_type(default)
26
27 cluster_profile = ask_question("Cluster profile", str, "default")
28 price = ask_question("Initial price", float, 100.0)
29 rate = ask_question("Interest rate", float, 0.05)
30 days = ask_question("Days to expiration", int, 260)
31 paths = ask_question("Number of MC paths", int, 10000)
32 n_strikes = ask_question("Number of strike values", int, 5)
33 min_strike = ask_question("Min strike price", float, 90.0)
34 max_strike = ask_question("Max strike price", float, 110.0)
35 n_sigmas = ask_question("Number of volatility values", int, 5)
36 min_sigma = ask_question("Min volatility", float, 0.1)
37 max_sigma = ask_question("Max volatility", float, 0.4)
38
39 strike_vals = np.linspace(min_strike, max_strike, n_strikes)
40 sigma_vals = np.linspace(min_sigma, max_sigma, n_sigmas)
41
42 #-----------------------------------------------------------------------------
43 # Setup for parallel calculation
44 #-----------------------------------------------------------------------------
45
46 # The Client is used to setup the calculation and works with all
47 # engines.
48 c = client.Client(profile=cluster_profile)
49
50 # A LoadBalancedView is an interface to the engines that provides dynamic load
51 # balancing at the expense of not knowing which engine will execute the code.
52 view = c[None]
53
54 # Initialize the common code on the engines. This Python module has the
55 # price_options function that prices the options.
56
57 #-----------------------------------------------------------------------------
58 # Perform parallel calculation
59 #-----------------------------------------------------------------------------
60
61 print "Running parallel calculation over strike prices and volatilities..."
62 print "Strike prices: ", strike_vals
63 print "Volatilities: ", sigma_vals
64 sys.stdout.flush()
65
66 # Submit tasks to the TaskClient for each (strike, sigma) pair as a MapTask.
67 t1 = time.time()
68 async_results = []
69 for strike in strike_vals:
70 for sigma in sigma_vals:
71 ar = view.apply_async(price_options, price, strike, sigma, rate, days, paths)
72 async_results.append(ar)
73
74 print "Submitted tasks: ", len(async_results)
75 sys.stdout.flush()
76
77 # Block until all tasks are completed.
78 c.barrier(async_results)
79 t2 = time.time()
80 t = t2-t1
81
82 print "Parallel calculation completed, time = %s s" % t
83 print "Collecting results..."
84
85 # Get the results using TaskClient.get_task_result.
86 results = [ar.get() for ar in async_results]
87
88 # Assemble the result into a structured NumPy array.
89 prices = np.empty(n_strikes*n_sigmas,
90 dtype=[('ecall',float),('eput',float),('acall',float),('aput',float)]
91 )
92
93 for i, price in enumerate(results):
94 prices[i] = tuple(price)
95
96 prices.shape = (n_strikes, n_sigmas)
97 strike_mesh, sigma_mesh = np.meshgrid(strike_vals, sigma_vals)
98
99 print "Results are available: strike_mesh, sigma_mesh, prices"
100 print "To plot results type 'plot_options(sigma_mesh, strike_mesh, prices)'"
101
102 #-----------------------------------------------------------------------------
103 # Utilities
104 #-----------------------------------------------------------------------------
105
106 def plot_options(sigma_mesh, strike_mesh, prices):
107 """
108 Make a contour plot of the option price in (sigma, strike) space.
109 """
110 plt.figure(1)
111
112 plt.subplot(221)
113 plt.contourf(sigma_mesh, strike_mesh, prices['ecall'])
114 plt.axis('tight')
115 plt.colorbar()
116 plt.title('European Call')
117 plt.ylabel("Strike Price")
118
119 plt.subplot(222)
120 plt.contourf(sigma_mesh, strike_mesh, prices['acall'])
121 plt.axis('tight')
122 plt.colorbar()
123 plt.title("Asian Call")
124
125 plt.subplot(223)
126 plt.contourf(sigma_mesh, strike_mesh, prices['eput'])
127 plt.axis('tight')
128 plt.colorbar()
129 plt.title("European Put")
130 plt.xlabel("Volatility")
131 plt.ylabel("Strike Price")
132
133 plt.subplot(224)
134 plt.contourf(sigma_mesh, strike_mesh, prices['aput'])
135 plt.axis('tight')
136 plt.colorbar()
137 plt.title("Asian Put")
138 plt.xlabel("Volatility")
139
140
141
142
143
144
@@ -0,0 +1,45
1
2 def price_options(S=100.0, K=100.0, sigma=0.25, r=0.05, days=260, paths=10000):
3 """
4 Price European and Asian options using a Monte Carlo method.
5
6 Parameters
7 ----------
8 S : float
9 The initial price of the stock.
10 K : float
11 The strike price of the option.
12 sigma : float
13 The volatility of the stock.
14 r : float
15 The risk free interest rate.
16 days : int
17 The number of days until the option expires.
18 paths : int
19 The number of Monte Carlo paths used to price the option.
20
21 Returns
22 -------
23 A tuple of (E. call, E. put, A. call, A. put) option prices.
24 """
25 import numpy as np
26 from math import exp,sqrt
27
28 h = 1.0/days
29 const1 = exp((r-0.5*sigma**2)*h)
30 const2 = sigma*sqrt(h)
31 stock_price = S*np.ones(paths, dtype='float64')
32 stock_price_sum = np.zeros(paths, dtype='float64')
33 for j in range(days):
34 growth_factor = const1*np.exp(const2*np.random.standard_normal(paths))
35 stock_price = stock_price*growth_factor
36 stock_price_sum = stock_price_sum + stock_price
37 stock_price_avg = stock_price_sum/days
38 zeros = np.zeros(paths, dtype='float64')
39 r_factor = exp(-r*h*days)
40 euro_put = r_factor*np.mean(np.maximum(zeros, K-stock_price))
41 asian_put = r_factor*np.mean(np.maximum(zeros, K-stock_price_avg))
42 euro_call = r_factor*np.mean(np.maximum(zeros, stock_price-K))
43 asian_call = r_factor*np.mean(np.maximum(zeros, stock_price_avg-K))
44 return (euro_call, euro_put, asian_call, asian_put)
45
@@ -0,0 +1,63
1 """Calculate statistics on the digits of pi in parallel.
2
3 This program uses the functions in :file:`pidigits.py` to calculate
4 the frequencies of 2 digit sequences in the digits of pi. The
5 results are plotted using matplotlib.
6
7 To run, text files from http://www.super-computing.org/
8 must be installed in the working directory of the IPython engines.
9 The actual filenames to be used can be set with the ``filestring``
10 variable below.
11
12 The dataset we have been using for this is the 200 million digit one here:
13 ftp://pi.super-computing.org/.2/pi200m/
14
15 and the files used will be downloaded if they are not in the working directory
16 of the IPython engines.
17 """
18
19 from IPython.zmq.parallel import client
20 from matplotlib import pyplot as plt
21 import numpy as np
22 from pidigits import *
23 from timeit import default_timer as clock
24
25 # Files with digits of pi (10m digits each)
26 filestring = 'pi200m.ascii.%(i)02dof20'
27 files = [filestring % {'i':i} for i in range(1,16)]
28
29 # Connect to the IPython cluster
30 c = client.Client()
31 c.run('pidigits.py')
32
33 # the number of engines
34 n = len(c.ids)
35 id0 = list(c.ids)[0]
36 # fetch the pi-files
37 print "downloading %i files of pi"%n
38 c.map(fetch_pi_file, files[:n])
39 print "done"
40
41 # Run 10m digits on 1 engine
42 t1 = clock()
43 freqs10m = c[id0].apply_sync_bound(compute_two_digit_freqs, files[0])
44 t2 = clock()
45 digits_per_second1 = 10.0e6/(t2-t1)
46 print "Digits per second (1 core, 10m digits): ", digits_per_second1
47
48
49 # Run n*10m digits on all engines
50 t1 = clock()
51 c.block=True
52 freqs_all = c.map(compute_two_digit_freqs, files[:n])
53 freqs150m = reduce_freqs(freqs_all)
54 t2 = clock()
55 digits_per_second8 = n*10.0e6/(t2-t1)
56 print "Digits per second (%i engines, %i0m digits): "%(n,n), digits_per_second8
57
58 print "Speedup: ", digits_per_second8/digits_per_second1
59
60 plot_two_digit_freqs(freqs150m)
61 plt.title("2 digit sequences in %i0m digits of pi"%n)
62 plt.show()
63
@@ -0,0 +1,159
1 """Compute statistics on the digits of pi.
2
3 This uses precomputed digits of pi from the website
4 of Professor Yasumasa Kanada at the University of
5 Tokoyo: http://www.super-computing.org/
6
7 Currently, there are only functions to read the
8 .txt (non-compressed, non-binary) files, but adding
9 support for compression and binary files would be
10 straightforward.
11
12 This focuses on computing the number of times that
13 all 1, 2, n digits sequences occur in the digits of pi.
14 If the digits of pi are truly random, these frequencies
15 should be equal.
16 """
17
18 # Import statements
19 from __future__ import division, with_statement
20
21 import os
22 import urllib
23
24 import numpy as np
25 from matplotlib import pyplot as plt
26
27 # Top-level functions
28
29 def fetch_pi_file(filename):
30 """This will download a segment of pi from super-computing.org
31 if the file is not already present.
32 """
33 ftpdir="ftp://pi.super-computing.org/.2/pi200m/"
34 if os.path.exists(filename):
35 # we already have it
36 return
37 else:
38 # download it
39 urllib.urlretrieve(ftpdir+filename,filename)
40
41 def compute_one_digit_freqs(filename):
42 """
43 Read digits of pi from a file and compute the 1 digit frequencies.
44 """
45 d = txt_file_to_digits(filename)
46 freqs = one_digit_freqs(d)
47 return freqs
48
49 def compute_two_digit_freqs(filename):
50 """
51 Read digits of pi from a file and compute the 2 digit frequencies.
52 """
53 d = txt_file_to_digits(filename)
54 freqs = two_digit_freqs(d)
55 return freqs
56
57 def reduce_freqs(freqlist):
58 """
59 Add up a list of freq counts to get the total counts.
60 """
61 allfreqs = np.zeros_like(freqlist[0])
62 for f in freqlist:
63 allfreqs += f
64 return allfreqs
65
66 def compute_n_digit_freqs(filename, n):
67 """
68 Read digits of pi from a file and compute the n digit frequencies.
69 """
70 d = txt_file_to_digits(filename)
71 freqs = n_digit_freqs(d, n)
72 return freqs
73
74 # Read digits from a txt file
75
76 def txt_file_to_digits(filename, the_type=str):
77 """
78 Yield the digits of pi read from a .txt file.
79 """
80 with open(filename, 'r') as f:
81 for line in f.readlines():
82 for c in line:
83 if c != '\n' and c!= ' ':
84 yield the_type(c)
85
86 # Actual counting functions
87
88 def one_digit_freqs(digits, normalize=False):
89 """
90 Consume digits of pi and compute 1 digit freq. counts.
91 """
92 freqs = np.zeros(10, dtype='i4')
93 for d in digits:
94 freqs[int(d)] += 1
95 if normalize:
96 freqs = freqs/freqs.sum()
97 return freqs
98
99 def two_digit_freqs(digits, normalize=False):
100 """
101 Consume digits of pi and compute 2 digits freq. counts.
102 """
103 freqs = np.zeros(100, dtype='i4')
104 last = digits.next()
105 this = digits.next()
106 for d in digits:
107 index = int(last + this)
108 freqs[index] += 1
109 last = this
110 this = d
111 if normalize:
112 freqs = freqs/freqs.sum()
113 return freqs
114
115 def n_digit_freqs(digits, n, normalize=False):
116 """
117 Consume digits of pi and compute n digits freq. counts.
118
119 This should only be used for 1-6 digits.
120 """
121 freqs = np.zeros(pow(10,n), dtype='i4')
122 current = np.zeros(n, dtype=int)
123 for i in range(n):
124 current[i] = digits.next()
125 for d in digits:
126 index = int(''.join(map(str, current)))
127 freqs[index] += 1
128 current[0:-1] = current[1:]
129 current[-1] = d
130 if normalize:
131 freqs = freqs/freqs.sum()
132 return freqs
133
134 # Plotting functions
135
136 def plot_two_digit_freqs(f2):
137 """
138 Plot two digits frequency counts using matplotlib.
139 """
140 f2_copy = f2.copy()
141 f2_copy.shape = (10,10)
142 ax = plt.matshow(f2_copy)
143 plt.colorbar()
144 for i in range(10):
145 for j in range(10):
146 plt.text(i-0.2, j+0.2, str(j)+str(i))
147 plt.ylabel('First digit')
148 plt.xlabel('Second digit')
149 return ax
150
151 def plot_one_digit_freqs(f1):
152 """
153 Plot one digit frequency counts using matplotlib.
154 """
155 ax = plt.plot(f1,'bo-')
156 plt.title('Single digit counts in pi')
157 plt.xlabel('Digit')
158 plt.ylabel('Count')
159 return ax
@@ -1,286 +1,290
1 1 =================
2 2 Parallel examples
3 3 =================
4 4
5 5 .. note::
6 6
7 Not adapted to zmq yet
7 Performance numbers from ``IPython.kernel``, not newparallel
8 8
9 9 In this section we describe two more involved examples of using an IPython
10 10 cluster to perform a parallel computation. In these examples, we will be using
11 11 IPython's "pylab" mode, which enables interactive plotting using the
12 12 Matplotlib package. IPython can be started in this mode by typing::
13 13
14 ipython -p pylab
14 ipython --pylab
15 15
16 16 at the system command line. If this prints an error message, you will
17 17 need to install the default profiles from within IPython by doing,
18 18
19 19 .. sourcecode:: ipython
20 20
21 21 In [1]: %install_profiles
22 22
23 23 and then restarting IPython.
24 24
25 25 150 million digits of pi
26 26 ========================
27 27
28 28 In this example we would like to study the distribution of digits in the
29 29 number pi (in base 10). While it is not known if pi is a normal number (a
30 30 number is normal in base 10 if 0-9 occur with equal likelihood) numerical
31 31 investigations suggest that it is. We will begin with a serial calculation on
32 32 10,000 digits of pi and then perform a parallel calculation involving 150
33 33 million digits.
34 34
35 35 In both the serial and parallel calculation we will be using functions defined
36 36 in the :file:`pidigits.py` file, which is available in the
37 37 :file:`docs/examples/kernel` directory of the IPython source distribution.
38 38 These functions provide basic facilities for working with the digits of pi and
39 39 can be loaded into IPython by putting :file:`pidigits.py` in your current
40 40 working directory and then doing:
41 41
42 42 .. sourcecode:: ipython
43 43
44 44 In [1]: run pidigits.py
45 45
46 46 Serial calculation
47 47 ------------------
48 48
49 49 For the serial calculation, we will use SymPy (http://www.sympy.org) to
50 50 calculate 10,000 digits of pi and then look at the frequencies of the digits
51 51 0-9. Out of 10,000 digits, we expect each digit to occur 1,000 times. While
52 52 SymPy is capable of calculating many more digits of pi, our purpose here is to
53 53 set the stage for the much larger parallel calculation.
54 54
55 55 In this example, we use two functions from :file:`pidigits.py`:
56 56 :func:`one_digit_freqs` (which calculates how many times each digit occurs)
57 57 and :func:`plot_one_digit_freqs` (which uses Matplotlib to plot the result).
58 58 Here is an interactive IPython session that uses these functions with
59 59 SymPy:
60 60
61 61 .. sourcecode:: ipython
62 62
63 63 In [7]: import sympy
64 64
65 65 In [8]: pi = sympy.pi.evalf(40)
66 66
67 67 In [9]: pi
68 68 Out[9]: 3.141592653589793238462643383279502884197
69 69
70 70 In [10]: pi = sympy.pi.evalf(10000)
71 71
72 72 In [11]: digits = (d for d in str(pi)[2:]) # create a sequence of digits
73 73
74 74 In [12]: run pidigits.py # load one_digit_freqs/plot_one_digit_freqs
75 75
76 76 In [13]: freqs = one_digit_freqs(digits)
77 77
78 78 In [14]: plot_one_digit_freqs(freqs)
79 79 Out[14]: [<matplotlib.lines.Line2D object at 0x18a55290>]
80 80
81 81 The resulting plot of the single digit counts shows that each digit occurs
82 82 approximately 1,000 times, but that with only 10,000 digits the
83 83 statistical fluctuations are still rather large:
84 84
85 .. image:: single_digits.*
85 .. image:: ../parallel/single_digits.*
86 86
87 87 It is clear that to reduce the relative fluctuations in the counts, we need
88 88 to look at many more digits of pi. That brings us to the parallel calculation.
89 89
90 90 Parallel calculation
91 91 --------------------
92 92
93 93 Calculating many digits of pi is a challenging computational problem in itself.
94 94 Because we want to focus on the distribution of digits in this example, we
95 95 will use pre-computed digit of pi from the website of Professor Yasumasa
96 Kanada at the University of Tokoyo (http://www.super-computing.org). These
96 Kanada at the University of Tokyo (http://www.super-computing.org). These
97 97 digits come in a set of text files (ftp://pi.super-computing.org/.2/pi200m/)
98 98 that each have 10 million digits of pi.
99 99
100 100 For the parallel calculation, we have copied these files to the local hard
101 101 drives of the compute nodes. A total of 15 of these files will be used, for a
102 102 total of 150 million digits of pi. To make things a little more interesting we
103 103 will calculate the frequencies of all 2 digits sequences (00-99) and then plot
104 104 the result using a 2D matrix in Matplotlib.
105 105
106 106 The overall idea of the calculation is simple: each IPython engine will
107 107 compute the two digit counts for the digits in a single file. Then in a final
108 108 step the counts from each engine will be added up. To perform this
109 109 calculation, we will need two top-level functions from :file:`pidigits.py`:
110 110
111 .. literalinclude:: ../../examples/kernel/pidigits.py
111 .. literalinclude:: ../../examples/newparallel/pidigits.py
112 112 :language: python
113 113 :lines: 34-49
114 114
115 115 We will also use the :func:`plot_two_digit_freqs` function to plot the
116 116 results. The code to run this calculation in parallel is contained in
117 :file:`docs/examples/kernel/parallelpi.py`. This code can be run in parallel
117 :file:`docs/examples/newparallel/parallelpi.py`. This code can be run in parallel
118 118 using IPython by following these steps:
119 119
120 1. Copy the text files with the digits of pi
121 (ftp://pi.super-computing.org/.2/pi200m/) to the working directory of the
122 engines on the compute nodes.
123 2. Use :command:`ipclusterz` to start 15 engines. We used an 8 core (2 quad
120 1. Use :command:`ipclusterz` to start 15 engines. We used an 8 core (2 quad
124 121 core CPUs) cluster with hyperthreading enabled which makes the 8 cores
125 122 looks like 16 (1 controller + 15 engines) in the OS. However, the maximum
126 123 speedup we can observe is still only 8x.
127 3. With the file :file:`parallelpi.py` in your current working directory, open
128 up IPython in pylab mode and type ``run parallelpi.py``.
124 2. With the file :file:`parallelpi.py` in your current working directory, open
125 up IPython in pylab mode and type ``run parallelpi.py``. This will download
126 the pi files via ftp the first time you run it, if they are not
127 present in the Engines' working directory.
129 128
130 129 When run on our 8 core cluster, we observe a speedup of 7.7x. This is slightly
131 130 less than linear scaling (8x) because the controller is also running on one of
132 131 the cores.
133 132
134 133 To emphasize the interactive nature of IPython, we now show how the
135 134 calculation can also be run by simply typing the commands from
136 135 :file:`parallelpi.py` interactively into IPython:
137 136
138 137 .. sourcecode:: ipython
139 138
140 139 In [1]: from IPython.zmq.parallel import client
141 2009-11-19 11:32:38-0800 [-] Log opened.
142 140
143 # The MultiEngineClient allows us to use the engines interactively.
144 # We simply pass MultiEngineClient the name of the cluster profile we
141 # The Client allows us to use the engines interactively.
142 # We simply pass Client the name of the cluster profile we
145 143 # are using.
146 144 In [2]: c = client.Client(profile='mycluster')
147 2009-11-19 11:32:44-0800 [-] Connecting [0]
148 2009-11-19 11:32:44-0800 [Negotiation,client] Connected: ./ipcontroller-mec.furl
149 145
150 In [3]: mec.get_ids()
146 In [3]: c.ids
151 147 Out[3]: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
152 148
153 149 In [4]: run pidigits.py
154 150
155 In [5]: filestring = 'pi200m-ascii-%(i)02dof20.txt'
151 In [5]: filestring = 'pi200m.ascii.%(i)02dof20'
156 152
157 153 # Create the list of files to process.
158 154 In [6]: files = [filestring % {'i':i} for i in range(1,16)]
159 155
160 156 In [7]: files
161 157 Out[7]:
162 ['pi200m-ascii-01of20.txt',
163 'pi200m-ascii-02of20.txt',
164 'pi200m-ascii-03of20.txt',
165 'pi200m-ascii-04of20.txt',
166 'pi200m-ascii-05of20.txt',
167 'pi200m-ascii-06of20.txt',
168 'pi200m-ascii-07of20.txt',
169 'pi200m-ascii-08of20.txt',
170 'pi200m-ascii-09of20.txt',
171 'pi200m-ascii-10of20.txt',
172 'pi200m-ascii-11of20.txt',
173 'pi200m-ascii-12of20.txt',
174 'pi200m-ascii-13of20.txt',
175 'pi200m-ascii-14of20.txt',
176 'pi200m-ascii-15of20.txt']
177
178 # This is the parallel calculation using the MultiEngineClient.map method
158 ['pi200m.ascii.01of20',
159 'pi200m.ascii.02of20',
160 'pi200m.ascii.03of20',
161 'pi200m.ascii.04of20',
162 'pi200m.ascii.05of20',
163 'pi200m.ascii.06of20',
164 'pi200m.ascii.07of20',
165 'pi200m.ascii.08of20',
166 'pi200m.ascii.09of20',
167 'pi200m.ascii.10of20',
168 'pi200m.ascii.11of20',
169 'pi200m.ascii.12of20',
170 'pi200m.ascii.13of20',
171 'pi200m.ascii.14of20',
172 'pi200m.ascii.15of20']
173
174 # download the data files if they don't already exist:
175 In [8]: c.map(fetch_pi_file, files)
176
177 # This is the parallel calculation using the Client.map method
179 178 # which applies compute_two_digit_freqs to each file in files in parallel.
180 In [8]: freqs_all = mec.map(compute_two_digit_freqs, files)
179 In [9]: freqs_all = c.map(compute_two_digit_freqs, files)
181 180
182 181 # Add up the frequencies from each engine.
183 In [8]: freqs = reduce_freqs(freqs_all)
182 In [10]: freqs = reduce_freqs(freqs_all)
184 183
185 In [9]: plot_two_digit_freqs(freqs)
186 Out[9]: <matplotlib.image.AxesImage object at 0x18beb110>
184 In [11]: plot_two_digit_freqs(freqs)
185 Out[11]: <matplotlib.image.AxesImage object at 0x18beb110>
187 186
188 In [10]: plt.title('2 digit counts of 150m digits of pi')
189 Out[10]: <matplotlib.text.Text object at 0x18d1f9b0>
187 In [12]: plt.title('2 digit counts of 150m digits of pi')
188 Out[12]: <matplotlib.text.Text object at 0x18d1f9b0>
190 189
191 190 The resulting plot generated by Matplotlib is shown below. The colors indicate
192 191 which two digit sequences are more (red) or less (blue) likely to occur in the
193 192 first 150 million digits of pi. We clearly see that the sequence "41" is
194 193 most likely and that "06" and "07" are least likely. Further analysis would
195 194 show that the relative size of the statistical fluctuations have decreased
196 195 compared to the 10,000 digit calculation.
197 196
198 .. image:: two_digit_counts.*
197 .. image:: ../parallel/two_digit_counts.*
199 198
200 199
201 200 Parallel options pricing
202 201 ========================
203 202
204 203 An option is a financial contract that gives the buyer of the contract the
205 204 right to buy (a "call") or sell (a "put") a secondary asset (a stock for
206 205 example) at a particular date in the future (the expiration date) for a
207 206 pre-agreed upon price (the strike price). For this right, the buyer pays the
208 207 seller a premium (the option price). There are a wide variety of flavors of
209 208 options (American, European, Asian, etc.) that are useful for different
210 209 purposes: hedging against risk, speculation, etc.
211 210
212 211 Much of modern finance is driven by the need to price these contracts
213 212 accurately based on what is known about the properties (such as volatility) of
214 213 the underlying asset. One method of pricing options is to use a Monte Carlo
215 214 simulation of the underlying asset price. In this example we use this approach
216 215 to price both European and Asian (path dependent) options for various strike
217 216 prices and volatilities.
218 217
219 218 The code for this example can be found in the :file:`docs/examples/kernel`
220 219 directory of the IPython source. The function :func:`price_options` in
221 220 :file:`mcpricer.py` implements the basic Monte Carlo pricing algorithm using
222 221 the NumPy package and is shown here:
223 222
224 223 .. literalinclude:: ../../examples/kernel/mcpricer.py
225 224 :language: python
226 225
227 To run this code in parallel, we will use IPython's :class:`TaskClient` class,
226 To run this code in parallel, we will use IPython's :class:`LoadBalancedView` class,
228 227 which distributes work to the engines using dynamic load balancing. This
229 client can be used along side the :class:`MultiEngineClient` class shown in
230 the previous example. The parallel calculation using :class:`TaskClient` can
228 view is a wrapper of the :class:`Client` class shown in
229 the previous example. The parallel calculation using :class:`LoadBalancedView` can
231 230 be found in the file :file:`mcpricer.py`. The code in this file creates a
232 231 :class:`TaskClient` instance and then submits a set of tasks using
233 232 :meth:`TaskClient.run` that calculate the option prices for different
234 233 volatilities and strike prices. The results are then plotted as a 2D contour
235 234 plot using Matplotlib.
236 235
237 236 .. literalinclude:: ../../examples/kernel/mcdriver.py
238 237 :language: python
239 238
240 239 To use this code, start an IPython cluster using :command:`ipclusterz`, open
241 240 IPython in the pylab mode with the file :file:`mcdriver.py` in your current
242 241 working directory and then type:
243 242
244 243 .. sourcecode:: ipython
245 244
246 245 In [7]: run mcdriver.py
247 246 Submitted tasks: [0, 1, 2, ...]
248 247
249 248 Once all the tasks have finished, the results can be plotted using the
250 249 :func:`plot_options` function. Here we make contour plots of the Asian
251 250 call and Asian put options as function of the volatility and strike price:
252 251
253 252 .. sourcecode:: ipython
254 253
255 254 In [8]: plot_options(sigma_vals, K_vals, prices['acall'])
256 255
257 256 In [9]: plt.figure()
258 257 Out[9]: <matplotlib.figure.Figure object at 0x18c178d0>
259 258
260 259 In [10]: plot_options(sigma_vals, K_vals, prices['aput'])
261 260
262 261 These results are shown in the two figures below. On a 8 core cluster the
263 262 entire calculation (10 strike prices, 10 volatilities, 100,000 paths for each)
264 263 took 30 seconds in parallel, giving a speedup of 7.7x, which is comparable
265 264 to the speedup observed in our previous example.
266 265
267 .. image:: asian_call.*
266 .. image:: ../parallel/asian_call.*
268 267
269 .. image:: asian_put.*
268 .. image:: ../parallel/asian_put.*
270 269
271 270 Conclusion
272 271 ==========
273 272
274 273 To conclude these examples, we summarize the key features of IPython's
275 274 parallel architecture that have been demonstrated:
276 275
277 276 * Serial code can be parallelized often with only a few extra lines of code.
278 We have used the :class:`MultiEngineClient` and :class:`TaskClient` classes
277 We have used the :class:`DirectView` and :class:`LoadBalancedView` classes
279 278 for this purpose.
280 279 * The resulting parallel code can be run without ever leaving the IPython's
281 280 interactive shell.
282 281 * Any data computed in parallel can be explored interactively through
283 282 visualization or further numerical calculations.
284 283 * We have run these examples on a cluster running Windows HPC Server 2008.
285 284 IPython's built in support for the Windows HPC job scheduler makes it
286 285 easy to get started with IPython's parallel capabilities.
286
287 .. note::
288
289 The newparallel code has never been run on Windows HPC Server, so the last
290 conclusion is untested.
@@ -1,186 +1,187
1 1 .. _parallelmpi:
2 2
3 3 =======================
4 4 Using MPI with IPython
5 5 =======================
6 6
7 7 .. note::
8 8
9 9 Not adapted to zmq yet
10 This is out of date wrt ipcluster in general as well
10 11
11 12 Often, a parallel algorithm will require moving data between the engines. One
12 13 way of accomplishing this is by doing a pull and then a push using the
13 14 multiengine client. However, this will be slow as all the data has to go
14 15 through the controller to the client and then back through the controller, to
15 16 its final destination.
16 17
17 18 A much better way of moving data between engines is to use a message passing
18 19 library, such as the Message Passing Interface (MPI) [MPI]_. IPython's
19 20 parallel computing architecture has been designed from the ground up to
20 21 integrate with MPI. This document describes how to use MPI with IPython.
21 22
22 23 Additional installation requirements
23 24 ====================================
24 25
25 26 If you want to use MPI with IPython, you will need to install:
26 27
27 28 * A standard MPI implementation such as OpenMPI [OpenMPI]_ or MPICH.
28 29 * The mpi4py [mpi4py]_ package.
29 30
30 31 .. note::
31 32
32 33 The mpi4py package is not a strict requirement. However, you need to
33 34 have *some* way of calling MPI from Python. You also need some way of
34 35 making sure that :func:`MPI_Init` is called when the IPython engines start
35 36 up. There are a number of ways of doing this and a good number of
36 37 associated subtleties. We highly recommend just using mpi4py as it
37 38 takes care of most of these problems. If you want to do something
38 39 different, let us know and we can help you get started.
39 40
40 41 Starting the engines with MPI enabled
41 42 =====================================
42 43
43 44 To use code that calls MPI, there are typically two things that MPI requires.
44 45
45 46 1. The process that wants to call MPI must be started using
46 47 :command:`mpiexec` or a batch system (like PBS) that has MPI support.
47 48 2. Once the process starts, it must call :func:`MPI_Init`.
48 49
49 50 There are a couple of ways that you can start the IPython engines and get
50 51 these things to happen.
51 52
52 53 Automatic starting using :command:`mpiexec` and :command:`ipclusterz`
53 54 --------------------------------------------------------------------
54 55
55 56 The easiest approach is to use the `mpiexec` mode of :command:`ipclusterz`,
56 57 which will first start a controller and then a set of engines using
57 58 :command:`mpiexec`::
58 59
59 60 $ ipclusterz mpiexec -n 4
60 61
61 62 This approach is best as interrupting :command:`ipclusterz` will automatically
62 63 stop and clean up the controller and engines.
63 64
64 65 Manual starting using :command:`mpiexec`
65 66 ----------------------------------------
66 67
67 68 If you want to start the IPython engines using the :command:`mpiexec`, just
68 69 do::
69 70
70 71 $ mpiexec -n 4 ipengine --mpi=mpi4py
71 72
72 73 This requires that you already have a controller running and that the FURL
73 74 files for the engines are in place. We also have built in support for
74 75 PyTrilinos [PyTrilinos]_, which can be used (assuming is installed) by
75 76 starting the engines with::
76 77
77 78 mpiexec -n 4 ipengine --mpi=pytrilinos
78 79
79 80 Automatic starting using PBS and :command:`ipclusterz`
80 81 -----------------------------------------------------
81 82
82 83 The :command:`ipclusterz` command also has built-in integration with PBS. For
83 84 more information on this approach, see our documentation on :ref:`ipclusterz
84 85 <parallel_process>`.
85 86
86 87 Actually using MPI
87 88 ==================
88 89
89 90 Once the engines are running with MPI enabled, you are ready to go. You can
90 91 now call any code that uses MPI in the IPython engines. And, all of this can
91 92 be done interactively. Here we show a simple example that uses mpi4py
92 93 [mpi4py]_ version 1.1.0 or later.
93 94
94 95 First, lets define a simply function that uses MPI to calculate the sum of a
95 96 distributed array. Save the following text in a file called :file:`psum.py`:
96 97
97 98 .. sourcecode:: python
98 99
99 100 from mpi4py import MPI
100 101 import numpy as np
101 102
102 103 def psum(a):
103 104 s = np.sum(a)
104 105 rcvBuf = np.array(0.0,'d')
105 106 MPI.COMM_WORLD.Allreduce([s, MPI.DOUBLE],
106 107 [rcvBuf, MPI.DOUBLE],
107 108 op=MPI.SUM)
108 109 return rcvBuf
109 110
110 111 Now, start an IPython cluster in the same directory as :file:`psum.py`::
111 112
112 113 $ ipclusterz mpiexec -n 4
113 114
114 115 Finally, connect to the cluster and use this function interactively. In this
115 116 case, we create a random array on each engine and sum up all the random arrays
116 117 using our :func:`psum` function:
117 118
118 119 .. sourcecode:: ipython
119 120
120 121 In [1]: from IPython.zmq.parallel import client
121 122
122 123 In [2]: c = client.Client()
123 124
124 125 In [3]: mec.activate()
125 126
126 127 In [4]: px import numpy as np
127 128 Parallel execution on engines: all
128 129 Out[4]:
129 130 <Results List>
130 131 [0] In [13]: import numpy as np
131 132 [1] In [13]: import numpy as np
132 133 [2] In [13]: import numpy as np
133 134 [3] In [13]: import numpy as np
134 135
135 136 In [6]: px a = np.random.rand(100)
136 137 Parallel execution on engines: all
137 138 Out[6]:
138 139 <Results List>
139 140 [0] In [15]: a = np.random.rand(100)
140 141 [1] In [15]: a = np.random.rand(100)
141 142 [2] In [15]: a = np.random.rand(100)
142 143 [3] In [15]: a = np.random.rand(100)
143 144
144 145 In [7]: px from psum import psum
145 146 Parallel execution on engines: all
146 147 Out[7]:
147 148 <Results List>
148 149 [0] In [16]: from psum import psum
149 150 [1] In [16]: from psum import psum
150 151 [2] In [16]: from psum import psum
151 152 [3] In [16]: from psum import psum
152 153
153 154 In [8]: px s = psum(a)
154 155 Parallel execution on engines: all
155 156 Out[8]:
156 157 <Results List>
157 158 [0] In [17]: s = psum(a)
158 159 [1] In [17]: s = psum(a)
159 160 [2] In [17]: s = psum(a)
160 161 [3] In [17]: s = psum(a)
161 162
162 163 In [9]: px print s
163 164 Parallel execution on engines: all
164 165 Out[9]:
165 166 <Results List>
166 167 [0] In [18]: print s
167 168 [0] Out[18]: 187.451545803
168 169
169 170 [1] In [18]: print s
170 171 [1] Out[18]: 187.451545803
171 172
172 173 [2] In [18]: print s
173 174 [2] Out[18]: 187.451545803
174 175
175 176 [3] In [18]: print s
176 177 [3] Out[18]: 187.451545803
177 178
178 179 Any Python code that makes calls to MPI can be used in this manner, including
179 180 compiled C, C++ and Fortran libraries that have been exposed to Python.
180 181
181 182 .. [MPI] Message Passing Interface. http://www-unix.mcs.anl.gov/mpi/
182 183 .. [mpi4py] MPI for Python. mpi4py: http://mpi4py.scipy.org/
183 184 .. [OpenMPI] Open MPI. http://www.open-mpi.org/
184 185 .. [PyTrilinos] PyTrilinos. http://trilinos.sandia.gov/packages/pytrilinos/
185 186
186 187
General Comments 0
You need to be logged in to leave comments. Login now