Show More
1 | NO CONTENT: new file 100644, binary diff hidden |
|
NO CONTENT: new file 100644, binary diff hidden |
1 | NO CONTENT: new file 100644, binary diff hidden |
|
NO CONTENT: new file 100644, binary diff hidden |
1 | NO CONTENT: new file 100644, binary diff hidden |
|
NO CONTENT: new file 100644, binary diff hidden |
1 | NO CONTENT: new file 100644, binary diff hidden |
|
NO CONTENT: new file 100644, binary diff hidden |
@@ -51,19 +51,22 b' def inputhook_wx1():' | |||||
51 | This approach seems to work, but its performance is not great as it |
|
51 | This approach seems to work, but its performance is not great as it | |
52 | relies on having PyOS_InputHook called regularly. |
|
52 | relies on having PyOS_InputHook called regularly. | |
53 | """ |
|
53 | """ | |
54 | app = wx.GetApp() |
|
54 | try: | |
55 | if app is not None: |
|
55 | app = wx.GetApp() | |
56 | assert wx.Thread_IsMain() |
|
56 | if app is not None: | |
57 |
|
57 | assert wx.Thread_IsMain() | ||
58 | # Make a temporary event loop and process system events until |
|
58 | ||
59 | # there are no more waiting, then allow idle events (which |
|
59 | # Make a temporary event loop and process system events until | |
60 | # will also deal with pending or posted wx events.) |
|
60 | # there are no more waiting, then allow idle events (which | |
61 | evtloop = wx.EventLoop() |
|
61 | # will also deal with pending or posted wx events.) | |
62 |
|
|
62 | evtloop = wx.EventLoop() | |
63 | while evtloop.Pending(): |
|
63 | ea = wx.EventLoopActivator(evtloop) | |
64 |
evtloop. |
|
64 | while evtloop.Pending(): | |
65 | app.ProcessIdle() |
|
65 | evtloop.Dispatch() | |
66 | del ea |
|
66 | app.ProcessIdle() | |
|
67 | del ea | |||
|
68 | except KeyboardInterrupt: | |||
|
69 | pass | |||
67 | return 0 |
|
70 | return 0 | |
68 |
|
71 | |||
69 | class EventLoopTimer(wx.Timer): |
|
72 | class EventLoopTimer(wx.Timer): | |
@@ -102,13 +105,16 b' def inputhook_wx2():' | |||||
102 | but eventually performance would suffer from calling select/kbhit too |
|
105 | but eventually performance would suffer from calling select/kbhit too | |
103 | often. |
|
106 | often. | |
104 | """ |
|
107 | """ | |
105 | app = wx.GetApp() |
|
108 | try: | |
106 | if app is not None: |
|
109 | app = wx.GetApp() | |
107 | assert wx.Thread_IsMain() |
|
110 | if app is not None: | |
108 | elr = EventLoopRunner() |
|
111 | assert wx.Thread_IsMain() | |
109 | # As this time is made shorter, keyboard response improves, but idle |
|
112 | elr = EventLoopRunner() | |
110 | # CPU load goes up. 10 ms seems like a good compromise. |
|
113 | # As this time is made shorter, keyboard response improves, but idle | |
111 | elr.Run(time=10) # CHANGE time here to control polling interval |
|
114 | # CPU load goes up. 10 ms seems like a good compromise. | |
|
115 | elr.Run(time=10) # CHANGE time here to control polling interval | |||
|
116 | except KeyboardInterrupt: | |||
|
117 | pass | |||
112 | return 0 |
|
118 | return 0 | |
113 |
|
119 | |||
114 | def inputhook_wx3(): |
|
120 | def inputhook_wx3(): | |
@@ -119,49 +125,54 b' def inputhook_wx3():' | |||||
119 | time.sleep is inserted. This is needed, otherwise, CPU usage is at 100%. |
|
125 | time.sleep is inserted. This is needed, otherwise, CPU usage is at 100%. | |
120 | This sleep time should be tuned though for best performance. |
|
126 | This sleep time should be tuned though for best performance. | |
121 | """ |
|
127 | """ | |
122 | app = wx.GetApp() |
|
128 | # We need to protect against a user pressing Control-C when IPython is | |
123 | if app is not None: |
|
129 | # idle and this is running. We trap KeyboardInterrupt and pass. | |
124 | assert wx.Thread_IsMain() |
|
130 | try: | |
125 |
|
131 | app = wx.GetApp() | ||
126 | # The import of wx on Linux sets the handler for signal.SIGINT |
|
132 | if app is not None: | |
127 | # to 0. This is a bug in wx or gtk. We fix by just setting it |
|
133 | assert wx.Thread_IsMain() | |
128 | # back to the Python default. |
|
134 | ||
129 | if not callable(signal.getsignal(signal.SIGINT)): |
|
135 | # The import of wx on Linux sets the handler for signal.SIGINT | |
130 | signal.signal(signal.SIGINT, signal.default_int_handler) |
|
136 | # to 0. This is a bug in wx or gtk. We fix by just setting it | |
131 |
|
137 | # back to the Python default. | ||
132 | evtloop = wx.EventLoop() |
|
138 | if not callable(signal.getsignal(signal.SIGINT)): | |
133 | ea = wx.EventLoopActivator(evtloop) |
|
139 | signal.signal(signal.SIGINT, signal.default_int_handler) | |
134 | t = clock() |
|
140 | ||
135 | while not stdin_ready(): |
|
141 | evtloop = wx.EventLoop() | |
136 | while evtloop.Pending(): |
|
142 | ea = wx.EventLoopActivator(evtloop) | |
137 |
|
|
143 | t = clock() | |
138 | evtloop.Dispatch() |
|
144 | while not stdin_ready(): | |
139 | app.ProcessIdle() |
|
145 | while evtloop.Pending(): | |
140 | # We need to sleep at this point to keep the idle CPU load |
|
146 | t = clock() | |
141 | # low. However, if sleep to long, GUI response is poor. As |
|
147 | evtloop.Dispatch() | |
142 | # a compromise, we watch how often GUI events are being processed |
|
148 | app.ProcessIdle() | |
143 | # and switch between a short and long sleep time. Here are some |
|
149 | # We need to sleep at this point to keep the idle CPU load | |
144 | # stats useful in helping to tune this. |
|
150 | # low. However, if sleep to long, GUI response is poor. As | |
145 | # time CPU load |
|
151 | # a compromise, we watch how often GUI events are being processed | |
146 | # 0.001 13% |
|
152 | # and switch between a short and long sleep time. Here are some | |
147 | # 0.005 3% |
|
153 | # stats useful in helping to tune this. | |
148 |
|
|
154 | # time CPU load | |
149 |
|
|
155 | # 0.001 13% | |
150 | used_time = clock() - t |
|
156 | # 0.005 3% | |
151 | if used_time > 5*60.0: |
|
157 | # 0.01 1.5% | |
152 |
# |
|
158 | # 0.05 0.5% | |
153 |
|
|
159 | used_time = clock() - t | |
154 |
|
|
160 | if used_time > 5*60.0: | |
155 |
# print 'Sleep for |
|
161 | # print 'Sleep for 5 s' # dbg | |
156 |
time.sleep( |
|
162 | time.sleep(5.0) | |
157 |
elif used_time > |
|
163 | elif used_time > 10.0: | |
158 | # Few GUI events coming in, so we can sleep longer |
|
164 | # print 'Sleep for 1 s' # dbg | |
159 | # print 'Sleep for 0.05 s' # dbg |
|
165 | time.sleep(1.0) | |
160 |
|
|
166 | elif used_time > 0.1: | |
161 | else: |
|
167 | # Few GUI events coming in, so we can sleep longer | |
162 | # Many GUI events coming in, so sleep only very little |
|
168 | # print 'Sleep for 0.05 s' # dbg | |
163 |
time.sleep(0.0 |
|
169 | time.sleep(0.05) | |
164 |
|
|
170 | else: | |
|
171 | # Many GUI events coming in, so sleep only very little | |||
|
172 | time.sleep(0.001) | |||
|
173 | del ea | |||
|
174 | except KeyboardInterrupt: | |||
|
175 | pass | |||
165 | return 0 |
|
176 | return 0 | |
166 |
|
177 | |||
167 | # This is our default implementation |
|
178 | # This is our default implementation |
@@ -1,25 +1,30 b'' | |||||
1 | #!/usr/bin/env python |
|
1 | #!/usr/bin/env python | |
2 | # encoding: utf-8 |
|
|||
3 | """Run a Monte-Carlo options pricer in parallel.""" |
|
2 | """Run a Monte-Carlo options pricer in parallel.""" | |
4 |
|
3 | |||
5 | from IPython.kernel import client |
|
4 | from IPython.kernel import client | |
6 | import numpy as np |
|
5 | import numpy as np | |
7 | from mcpricer import price_options |
|
6 | from mcpricer import price_options | |
8 |
|
7 | |||
9 |
|
8 | # The MultiEngineClient is used to setup the calculation and works with all | ||
10 | tc = client.TaskClient(profile='default') |
|
9 | # engine. | |
11 | mec = client.MultiEngineClient(profile='default') |
|
10 | mec = client.MultiEngineClient(profile='default') | |
12 |
|
11 | |||
|
12 | # The TaskClient is an interface to the engines that provides dynamic load | |||
|
13 | # balancing at the expense of not knowing which engine will execute the code. | |||
|
14 | tc = client.TaskClient(profile='default') | |||
13 |
|
15 | |||
14 | # Initialize the common code on the engines |
|
16 | # Initialize the common code on the engines. This Python module has the | |
|
17 | # price_options function that prices the options. | |||
15 | mec.run('mcpricer.py') |
|
18 | mec.run('mcpricer.py') | |
16 |
|
19 | |||
17 |
# Define the function that will |
|
20 | # Define the function that will make up our tasks. We basically want to | |
|
21 | # call the price_options function with all but two arguments (K, sigma) | |||
|
22 | # fixed. | |||
18 | def my_prices(K, sigma): |
|
23 | def my_prices(K, sigma): | |
19 | S = 100.0 |
|
24 | S = 100.0 | |
20 | r = 0.05 |
|
25 | r = 0.05 | |
21 | days = 260 |
|
26 | days = 260 | |
22 | paths = 10000 |
|
27 | paths = 100000 | |
23 | return price_options(S, K, sigma, r, days, paths) |
|
28 | return price_options(S, K, sigma, r, days, paths) | |
24 |
|
29 | |||
25 | # Create arrays of strike prices and volatilities |
|
30 | # Create arrays of strike prices and volatilities | |
@@ -28,7 +33,9 b' nsigma = 5' | |||||
28 | K_vals = np.linspace(90.0, 100.0, nK) |
|
33 | K_vals = np.linspace(90.0, 100.0, nK) | |
29 | sigma_vals = np.linspace(0.0, 0.2, nsigma) |
|
34 | sigma_vals = np.linspace(0.0, 0.2, nsigma) | |
30 |
|
35 | |||
31 | # Submit tasks |
|
36 | # Submit tasks to the TaskClient for each (K, sigma) pair as a MapTask. | |
|
37 | # The MapTask simply applies a function (my_prices) to the arguments: | |||
|
38 | # my_prices(K, sigma) and returns the result. | |||
32 | taskids = [] |
|
39 | taskids = [] | |
33 | for K in K_vals: |
|
40 | for K in K_vals: | |
34 | for sigma in sigma_vals: |
|
41 | for sigma in sigma_vals: | |
@@ -37,24 +44,24 b' for K in K_vals:' | |||||
37 |
|
44 | |||
38 | print "Submitted tasks: ", taskids |
|
45 | print "Submitted tasks: ", taskids | |
39 |
|
46 | |||
40 | # Block until tasks are completed |
|
47 | # Block until all tasks are completed. | |
41 | tc.barrier(taskids) |
|
48 | tc.barrier(taskids) | |
42 |
|
49 | |||
43 | # Get the results |
|
50 | # Get the results using TaskClient.get_task_result. | |
44 | results = [tc.get_task_result(tid) for tid in taskids] |
|
51 | results = [tc.get_task_result(tid) for tid in taskids] | |
45 |
|
52 | |||
46 | # Assemble the result |
|
53 | # Assemble the result into a structured NumPy array. | |
47 | prices = np.empty(nK*nsigma, |
|
54 | prices = np.empty(nK*nsigma, | |
48 |
dtype=[(' |
|
55 | dtype=[('ecall',float),('eput',float),('acall',float),('aput',float)] | |
49 | ) |
|
56 | ) | |
50 | for i, price_tuple in enumerate(results): |
|
57 | for i, price_tuple in enumerate(results): | |
51 | prices[i] = price_tuple |
|
58 | prices[i] = price_tuple | |
52 | prices.shape = (nK, nsigma) |
|
59 | prices.shape = (nK, nsigma) | |
|
60 | K_vals, sigma_vals = np.meshgrid(K_vals, sigma_vals) | |||
53 |
|
61 | |||
54 |
|
62 | def plot_options(sigma_vals, K_vals, prices): | ||
55 | def plot_options(K_vals, sigma_vals, prices): |
|
|||
56 | """ |
|
63 | """ | |
57 | Make a contour plot of the option prices. |
|
64 | Make a contour plot of the option price in (sigma, K) space. | |
58 | """ |
|
65 | """ | |
59 | from matplotlib import pyplot as plt |
|
66 | from matplotlib import pyplot as plt | |
60 | plt.contourf(sigma_vals, K_vals, prices) |
|
67 | plt.contourf(sigma_vals, K_vals, prices) |
@@ -4,7 +4,26 b' from math import *' | |||||
4 |
|
4 | |||
5 | def price_options(S=100.0, K=100.0, sigma=0.25, r=0.05, days=260, paths=10000): |
|
5 | def price_options(S=100.0, K=100.0, sigma=0.25, r=0.05, days=260, paths=10000): | |
6 | """ |
|
6 | """ | |
7 |
Price |
|
7 | Price European and Asian options using a Monte Carlo method. | |
|
8 | ||||
|
9 | Parameters | |||
|
10 | ---------- | |||
|
11 | S : float | |||
|
12 | The initial price of the stock. | |||
|
13 | K : float | |||
|
14 | The strike price of the option. | |||
|
15 | sigma : float | |||
|
16 | The volatility of the stock. | |||
|
17 | r : float | |||
|
18 | The risk free interest rate. | |||
|
19 | days : int | |||
|
20 | The number of days until the option expires. | |||
|
21 | paths : int | |||
|
22 | The number of Monte Carlo paths used to price the option. | |||
|
23 | ||||
|
24 | Returns | |||
|
25 | ------- | |||
|
26 | A tuple of (E. call, E. put, A. call, A. put) option prices. | |||
8 | """ |
|
27 | """ | |
9 | h = 1.0/days |
|
28 | h = 1.0/days | |
10 | const1 = exp((r-0.5*sigma**2)*h) |
|
29 | const1 = exp((r-0.5*sigma**2)*h) | |
@@ -18,16 +37,9 b' def price_options(S=100.0, K=100.0, sigma=0.25, r=0.05, days=260, paths=10000):' | |||||
18 | stock_price_avg = stock_price_sum/days |
|
37 | stock_price_avg = stock_price_sum/days | |
19 | zeros = np.zeros(paths, dtype='float64') |
|
38 | zeros = np.zeros(paths, dtype='float64') | |
20 | r_factor = exp(-r*h*days) |
|
39 | r_factor = exp(-r*h*days) | |
21 |
|
|
40 | euro_put = r_factor*np.mean(np.maximum(zeros, K-stock_price)) | |
22 | asian_put = r_factor*np.mean(np.maximum(zeros, K-stock_price_avg)) |
|
41 | asian_put = r_factor*np.mean(np.maximum(zeros, K-stock_price_avg)) | |
23 |
|
|
42 | euro_call = r_factor*np.mean(np.maximum(zeros, stock_price-K)) | |
24 | asian_call = r_factor*np.mean(np.maximum(zeros, stock_price_avg-K)) |
|
43 | asian_call = r_factor*np.mean(np.maximum(zeros, stock_price_avg-K)) | |
25 |
return ( |
|
44 | return (euro_call, euro_put, asian_call, asian_put) | |
26 |
|
||||
27 |
|
45 | |||
28 | if __name__ == '__main__': |
|
|||
29 | (vc, vp, ac, ap) = price_options() |
|
|||
30 | print "Vanilla Put Price = ", vp |
|
|||
31 | print "Asian Put Price = ", ap |
|
|||
32 | print "Vanilla Call Price = ", vc |
|
|||
33 | print "Asian Call Price = ", ac |
|
@@ -25,15 +25,6 b" filestring = 'pi200m-ascii-%(i)02dof20.txt'" | |||||
25 | files = [filestring % {'i':i} for i in range(1,16)] |
|
25 | files = [filestring % {'i':i} for i in range(1,16)] | |
26 |
|
26 | |||
27 |
|
27 | |||
28 | # A function for reducing the frequencies calculated |
|
|||
29 | # by different engines. |
|
|||
30 | def reduce_freqs(freqlist): |
|
|||
31 | allfreqs = np.zeros_like(freqlist[0]) |
|
|||
32 | for f in freqlist: |
|
|||
33 | allfreqs += f |
|
|||
34 | return allfreqs |
|
|||
35 |
|
||||
36 |
|
||||
37 | # Connect to the IPython cluster |
|
28 | # Connect to the IPython cluster | |
38 | mec = client.MultiEngineClient(profile='mycluster') |
|
29 | mec = client.MultiEngineClient(profile='mycluster') | |
39 | mec.run('pidigits.py') |
|
30 | mec.run('pidigits.py') | |
@@ -42,9 +33,7 b" mec.run('pidigits.py')" | |||||
42 | # Run 10m digits on 1 engine |
|
33 | # Run 10m digits on 1 engine | |
43 | mapper = mec.mapper(targets=0) |
|
34 | mapper = mec.mapper(targets=0) | |
44 | t1 = clock() |
|
35 | t1 = clock() | |
45 |
|
||||
46 | freqs10m = mapper.map(compute_two_digit_freqs, files[:1])[0] |
|
36 | freqs10m = mapper.map(compute_two_digit_freqs, files[:1])[0] | |
47 |
|
||||
48 | t2 = clock() |
|
37 | t2 = clock() | |
49 | digits_per_second1 = 10.0e6/(t2-t1) |
|
38 | digits_per_second1 = 10.0e6/(t2-t1) | |
50 | print "Digits per second (1 core, 10m digits): ", digits_per_second1 |
|
39 | print "Digits per second (1 core, 10m digits): ", digits_per_second1 | |
@@ -52,10 +41,8 b' print "Digits per second (1 core, 10m digits): ", digits_per_second1' | |||||
52 |
|
41 | |||
53 | # Run 150m digits on 15 engines (8 cores) |
|
42 | # Run 150m digits on 15 engines (8 cores) | |
54 | t1 = clock() |
|
43 | t1 = clock() | |
55 |
|
||||
56 | freqs_all = mec.map(compute_two_digit_freqs, files[:len(mec)]) |
|
44 | freqs_all = mec.map(compute_two_digit_freqs, files[:len(mec)]) | |
57 | freqs150m = reduce_freqs(freqs_all) |
|
45 | freqs150m = reduce_freqs(freqs_all) | |
58 |
|
||||
59 | t2 = clock() |
|
46 | t2 = clock() | |
60 | digits_per_second8 = 150.0e6/(t2-t1) |
|
47 | digits_per_second8 = 150.0e6/(t2-t1) | |
61 | print "Digits per second (8 cores, 150m digits): ", digits_per_second8 |
|
48 | print "Digits per second (8 cores, 150m digits): ", digits_per_second8 |
@@ -24,16 +24,34 b' from matplotlib import pyplot as plt' | |||||
24 | # Top-level functions |
|
24 | # Top-level functions | |
25 |
|
25 | |||
26 | def compute_one_digit_freqs(filename): |
|
26 | def compute_one_digit_freqs(filename): | |
|
27 | """ | |||
|
28 | Read digits of pi from a file and compute the 1 digit frequencies. | |||
|
29 | """ | |||
27 | d = txt_file_to_digits(filename) |
|
30 | d = txt_file_to_digits(filename) | |
28 | freqs = one_digit_freqs(d) |
|
31 | freqs = one_digit_freqs(d) | |
29 | return freqs |
|
32 | return freqs | |
30 |
|
33 | |||
31 | def compute_two_digit_freqs(filename): |
|
34 | def compute_two_digit_freqs(filename): | |
|
35 | """ | |||
|
36 | Read digits of pi from a file and compute the 2 digit frequencies. | |||
|
37 | """ | |||
32 | d = txt_file_to_digits(filename) |
|
38 | d = txt_file_to_digits(filename) | |
33 | freqs = two_digit_freqs(d) |
|
39 | freqs = two_digit_freqs(d) | |
34 | return freqs |
|
40 | return freqs | |
35 |
|
41 | |||
|
42 | def reduce_freqs(freqlist): | |||
|
43 | """ | |||
|
44 | Add up a list of freq counts to get the total counts. | |||
|
45 | """ | |||
|
46 | allfreqs = np.zeros_like(freqlist[0]) | |||
|
47 | for f in freqlist: | |||
|
48 | allfreqs += f | |||
|
49 | return allfreqs | |||
|
50 | ||||
36 | def compute_n_digit_freqs(filename, n): |
|
51 | def compute_n_digit_freqs(filename, n): | |
|
52 | """ | |||
|
53 | Read digits of pi from a file and compute the n digit frequencies. | |||
|
54 | """ | |||
37 | d = txt_file_to_digits(filename) |
|
55 | d = txt_file_to_digits(filename) | |
38 | freqs = n_digit_freqs(d, n) |
|
56 | freqs = n_digit_freqs(d, n) | |
39 | return freqs |
|
57 | return freqs |
1 | NO CONTENT: modified file, binary diff hidden |
|
NO CONTENT: modified file, binary diff hidden |
1 | NO CONTENT: modified file, binary diff hidden |
|
NO CONTENT: modified file, binary diff hidden |
1 | NO CONTENT: modified file, binary diff hidden |
|
NO CONTENT: modified file, binary diff hidden |
1 | NO CONTENT: modified file, binary diff hidden |
|
NO CONTENT: modified file, binary diff hidden |
1 | NO CONTENT: modified file, binary diff hidden |
|
NO CONTENT: modified file, binary diff hidden |
1 | NO CONTENT: modified file, binary diff hidden |
|
NO CONTENT: modified file, binary diff hidden |
1 | NO CONTENT: modified file, binary diff hidden |
|
NO CONTENT: modified file, binary diff hidden |
1 | NO CONTENT: modified file, binary diff hidden |
|
NO CONTENT: modified file, binary diff hidden |
@@ -2,125 +2,269 b'' | |||||
2 | Parallel examples |
|
2 | Parallel examples | |
3 | ================= |
|
3 | ================= | |
4 |
|
4 | |||
5 |
In this section we describe |
|
5 | In this section we describe two more involved examples of using an IPython | |
6 | cluster to perform a parallel computation. |
|
6 | cluster to perform a parallel computation. In these examples, we will be using | |
|
7 | IPython's "pylab" mode, which enables interactive plotting using the | |||
|
8 | Matplotlib package. IPython can be started in this mode by typing:: | |||
|
9 | ||||
|
10 | ipython -p pylab | |||
|
11 | ||||
|
12 | at the system command line. If this prints an error message, you will | |||
|
13 | need to install the default profiles from within IPython by doing, | |||
|
14 | ||||
|
15 | .. sourcecode:: ipython | |||
|
16 | ||||
|
17 | In [1]: %install_profiles | |||
|
18 | ||||
|
19 | and then restarting IPython. | |||
7 |
|
20 | |||
8 | 150 million digits of pi |
|
21 | 150 million digits of pi | |
9 | ======================== |
|
22 | ======================== | |
10 |
|
23 | |||
11 | In this example we would like to study the distribution of digits in the |
|
24 | In this example we would like to study the distribution of digits in the | |
12 | number pi. More specifically, we are going to study how often each 2 |
|
25 | number pi (in base 10). While it is not known if pi is a normal number (a | |
13 | digits sequence occurs in the first 150 million digits of pi. If the digits |
|
26 | number is normal in base 10 if 0-9 occur with equal likelihood) numerical | |
14 | 0-9 occur with equal probability, we expect that each two digits sequence |
|
27 | investigations suggest that it is. We will begin with a serial calculation on | |
15 | (00, 01, ..., 99) will occur 1% of the time. |
|
28 | 10,000 digits of pi and then perform a parallel calculation involving 150 | |
16 |
|
29 | million digits. | ||
17 | This examples uses precomputed digits of pi from the website of Professor |
|
30 | ||
18 | Yasumasa Kanada at the University of Tokoyo (http://www.super-computing.org). |
|
31 | In both the serial and parallel calculation we will be using functions defined | |
19 | These digits come in a set of ``.txt`` files |
|
32 | in the :file:`pidigits.py` file, which is available in the | |
20 | (ftp://pi.super-computing.org/.2/pi200m/) that each have 10 million digits of |
|
33 | :file:`docs/examples/kernel` directory of the IPython source distribution. | |
21 | pi. In the parallel computation, we will use the :meth:`MultiEngineClient.map` |
|
34 | These functions provide basic facilities for working with the digits of pi and | |
22 | method to have each engine compute the desired statistics on a subset of these |
|
35 | can be loaded into IPython by putting :file:`pidigits.py` in your current | |
23 | files. Before I started the parallel computation, I copied the data files |
|
36 | working directory and then doing: | |
24 | to the compute nodes so the engine have fast access to them. |
|
37 | ||
25 |
|
38 | .. sourcecode:: ipython | ||
26 | Here are the Python functions for counting the frequencies of each two digit |
|
39 | ||
27 | sequence in serial:: |
|
40 | In [1]: run pidigits.py | |
28 |
|
41 | |||
29 | def compute_two_digit_freqs(filename): |
|
42 | Serial calculation | |
30 | """ |
|
43 | ------------------ | |
31 | Compute the two digit frequencies from a single file. |
|
44 | ||
32 | """ |
|
45 | For the serial calculation, we will use SymPy (http://www.sympy.org) to | |
33 | d = txt_file_to_digits(filename) |
|
46 | calculate 10,000 digits of pi and then look at the frequencies of the digits | |
34 | freqs = two_digit_freqs(d) |
|
47 | 0-9. Out of 10,000 digits, we expect each digit to occur 1,000 times. While | |
35 | return freqs |
|
48 | SymPy is capable of calculating many more digits of pi, our purpose here is to | |
36 |
|
49 | set the stage for the much larger parallel calculation. | ||
37 | def txt_file_to_digits(filename, the_type=str): |
|
50 | ||
38 | """ |
|
51 | In this example, we use two functions from :file:`pidigits.py`: | |
39 | Yield the digits of pi read from a .txt file. |
|
52 | :func:`one_digit_freqs` (which calculates how many times each digit occurs) | |
40 | """ |
|
53 | and :func:`plot_one_digit_freqs` (which uses Matplotlib to plot the result). | |
41 | with open(filename, 'r') as f: |
|
54 | Here is an interactive IPython session that uses these functions with | |
42 | for line in f.readlines(): |
|
55 | SymPy: | |
43 | for c in line: |
|
56 | ||
44 | if c != '\n' and c!= ' ': |
|
57 | .. sourcecode:: ipython | |
45 | yield the_type(c) |
|
58 | ||
46 |
|
59 | In [7]: import sympy | ||
47 | def two_digit_freqs(digits, normalize=False): |
|
60 | ||
48 | """ |
|
61 | In [8]: pi = sympy.pi.evalf(40) | |
49 | Consume digits of pi and compute 2 digits freq. counts. |
|
62 | ||
50 | """ |
|
63 | In [9]: pi | |
51 | freqs = np.zeros(100, dtype='i4') |
|
64 | Out[9]: 3.141592653589793238462643383279502884197 | |
52 | last = digits.next() |
|
65 | ||
53 | this = digits.next() |
|
66 | In [10]: pi = sympy.pi.evalf(10000) | |
54 | for d in digits: |
|
67 | ||
55 | index = int(last + this) |
|
68 | In [11]: digits = (d for d in str(pi)[2:]) # create a sequence of digits | |
56 | freqs[index] += 1 |
|
69 | ||
57 | last = this |
|
70 | In [12]: run pidigits.py # load one_digit_freqs/plot_one_digit_freqs | |
58 | this = d |
|
71 | ||
59 | if normalize: |
|
72 | In [13]: freqs = one_digit_freqs(digits) | |
60 | freqs = freqs/freqs.sum() |
|
73 | ||
61 | return freqs |
|
74 | In [14]: plot_one_digit_freqs(freqs) | |
62 |
|
75 | Out[14]: [<matplotlib.lines.Line2D object at 0x18a55290>] | ||
63 | These functions are defined in the file :file:`pidigits.py`. To perform the |
|
76 | ||
64 | calculation in parallel, we use an additional file: :file:`parallelpi.py`:: |
|
77 | The resulting plot of the single digit counts shows that each digit occurs | |
65 |
|
78 | approximately 1,000 times, but that with only 10,000 digits the | ||
66 | from IPython.kernel import client |
|
79 | statistical fluctuations are still rather large: | |
67 | from matplotlib import pyplot as plt |
|
80 | ||
68 | import numpy as np |
|
81 | .. image:: single_digits.* | |
69 | from pidigits import * |
|
82 | ||
70 | from timeit import default_timer as clock |
|
83 | It is clear that to reduce the relative fluctuations in the counts, we need | |
71 |
|
84 | to look at many more digits of pi. That brings us to the parallel calculation. | ||
72 | # Files with digits of pi (10m digits each) |
|
85 | ||
73 | filestring = 'pi200m-ascii-%(i)02dof20.txt' |
|
86 | Parallel calculation | |
74 | files = [filestring % {'i':i} for i in range(1,16)] |
|
87 | -------------------- | |
75 |
|
88 | |||
76 | # A function for reducing the frequencies calculated |
|
89 | Calculating many digits of pi is a challenging computational problem in itself. | |
77 | # by different engines. |
|
90 | Because we want to focus on the distribution of digits in this example, we | |
78 | def reduce_freqs(freqlist): |
|
91 | will use pre-computed digit of pi from the website of Professor Yasumasa | |
79 | allfreqs = np.zeros_like(freqlist[0]) |
|
92 | Kanada at the University of Tokoyo (http://www.super-computing.org). These | |
80 | for f in freqlist: |
|
93 | digits come in a set of text files (ftp://pi.super-computing.org/.2/pi200m/) | |
81 | allfreqs += f |
|
94 | that each have 10 million digits of pi. | |
82 | return allfreqs |
|
95 | ||
83 |
|
96 | For the parallel calculation, we have copied these files to the local hard | ||
84 | # Connect to the IPython cluster |
|
97 | drives of the compute nodes. A total of 15 of these files will be used, for a | |
85 | mec = client.MultiEngineClient(profile='mycluster') |
|
98 | total of 150 million digits of pi. To make things a little more interesting we | |
86 | mec.run('pidigits.py') |
|
99 | will calculate the frequencies of all 2 digits sequences (00-99) and then plot | |
87 |
|
100 | the result using a 2D matrix in Matplotlib. | ||
88 | # Run 10m digits on 1 engine |
|
101 | ||
89 | mapper = mec.mapper(targets=0) |
|
102 | The overall idea of the calculation is simple: each IPython engine will | |
90 | t1 = clock() |
|
103 | compute the two digit counts for the digits in a single file. Then in a final | |
91 | freqs10m = mapper.map(compute_two_digit_freqs, files[:1])[0] |
|
104 | step the counts from each engine will be added up. To perform this | |
92 | t2 = clock() |
|
105 | calculation, we will need two top-level functions from :file:`pidigits.py`: | |
93 | digits_per_second1 = 10.0e6/(t2-t1) |
|
106 | ||
94 | print "Digits per second (1 core, 10m digits): ", digits_per_second1 |
|
107 | .. literalinclude:: ../../examples/kernel/pidigits.py | |
95 |
|
108 | :language: python | ||
96 | # Run 150m digits on 15 engines (8 cores) |
|
109 | :lines: 34-49 | |
97 | t1 = clock() |
|
110 | ||
98 | freqs_all = mec.map(compute_two_digit_freqs, files[:len(mec)]) |
|
111 | We will also use the :func:`plot_two_digit_freqs` function to plot the | |
99 | freqs150m = reduce_freqs(freqs_all) |
|
112 | results. The code to run this calculation in parallel is contained in | |
100 | t2 = clock() |
|
113 | :file:`docs/examples/kernel/parallelpi.py`. This code can be run in parallel | |
101 | digits_per_second8 = 150.0e6/(t2-t1) |
|
114 | using IPython by following these steps: | |
102 | print "Digits per second (8 cores, 150m digits): ", digits_per_second8 |
|
115 | ||
103 |
|
116 | 1. Copy the text files with the digits of pi | ||
104 | print "Speedup: ", digits_per_second8/digits_per_second1 |
|
117 | (ftp://pi.super-computing.org/.2/pi200m/) to the working directory of the | |
105 |
|
118 | engines on the compute nodes. | ||
106 | plot_two_digit_freqs(freqs150m) |
|
119 | 2. Use :command:`ipcluster` to start 15 engines. We used an 8 core cluster | |
107 | plt.title("2 digit sequences in 150m digits of pi") |
|
120 | with hyperthreading enabled which makes the 8 cores looks like 16 (1 | |
108 |
|
121 | controller + 15 engines) in the OS. However, the maximum speedup we can | ||
109 | To run this code on an IPython cluster: |
|
122 | observe is still only 8x. | |
110 |
|
123 | 3. With the file :file:`parallelpi.py` in your current working directory, open | ||
111 | 1. Start an IPython cluster with 15 engines: ``ipcluster start -p mycluster -n 15`` |
|
124 | up IPython in pylab mode and type ``run parallelpi.py``. | |
112 | 2. Open IPython's interactive shell using the pylab profile |
|
125 | ||
113 | ``ipython -p pylab`` and type ``run parallelpi.py``. |
|
126 | When run on our 8 core cluster, we observe a speedup of 7.7x. This is slightly | |
114 |
|
127 | less than linear scaling (8x) because the controller is also running on one of | ||
115 | At this point, the parallel calculation will begin. On a small an 8 core |
|
128 | the cores. | |
116 | cluster, we observe a speedup of 7.7x. The resulting plot of the two digit |
|
129 | ||
117 | sequences is shown in the following screenshot. |
|
130 | To emphasize the interactive nature of IPython, we now show how the | |
118 |
|
131 | calculation can also be run by simply typing the commands from | ||
119 | .. image:: parallel_pi.* |
|
132 | :file:`parallelpi.py` interactively into IPython: | |
120 |
|
133 | |||
121 |
|
134 | .. sourcecode:: ipython | ||
122 | Parallel option pricing |
|
135 | ||
123 | ======================= |
|
136 | In [1]: from IPython.kernel import client | |
124 |
|
137 | 2009-11-19 11:32:38-0800 [-] Log opened. | ||
125 | The example will be added at a later point. |
|
138 | ||
|
139 | # The MultiEngineClient allows us to use the engines interactively | |||
|
140 | In [2]: mec = client.MultiEngineClient(profile='mycluster') | |||
|
141 | 2009-11-19 11:32:44-0800 [-] Connecting [0] | |||
|
142 | 2009-11-19 11:32:44-0800 [Negotiation,client] Connected: ./ipcontroller-mec.furl | |||
|
143 | ||||
|
144 | In [3]: mec.get_ids() | |||
|
145 | Out[3]: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] | |||
|
146 | ||||
|
147 | In [4]: run pidigits.py | |||
|
148 | ||||
|
149 | In [5]: filestring = 'pi200m-ascii-%(i)02dof20.txt' | |||
|
150 | ||||
|
151 | In [6]: files = [filestring % {'i':i} for i in range(1,16)] | |||
|
152 | ||||
|
153 | In [7]: files | |||
|
154 | Out[7]: | |||
|
155 | ['pi200m-ascii-01of20.txt', | |||
|
156 | 'pi200m-ascii-02of20.txt', | |||
|
157 | 'pi200m-ascii-03of20.txt', | |||
|
158 | 'pi200m-ascii-04of20.txt', | |||
|
159 | 'pi200m-ascii-05of20.txt', | |||
|
160 | 'pi200m-ascii-06of20.txt', | |||
|
161 | 'pi200m-ascii-07of20.txt', | |||
|
162 | 'pi200m-ascii-08of20.txt', | |||
|
163 | 'pi200m-ascii-09of20.txt', | |||
|
164 | 'pi200m-ascii-10of20.txt', | |||
|
165 | 'pi200m-ascii-11of20.txt', | |||
|
166 | 'pi200m-ascii-12of20.txt', | |||
|
167 | 'pi200m-ascii-13of20.txt', | |||
|
168 | 'pi200m-ascii-14of20.txt', | |||
|
169 | 'pi200m-ascii-15of20.txt'] | |||
|
170 | ||||
|
171 | # This is the parallel calculation using the MultiEngineClient.map method | |||
|
172 | # which applies compute_two_digit_freqs to each file in files in parallel. | |||
|
173 | In [8]: freqs_all = mec.map(compute_two_digit_freqs, files) | |||
|
174 | ||||
|
175 | # Add up the frequencies from each engine. | |||
|
176 | In [8]: freqs = reduce_freqs(freqs_all) | |||
|
177 | ||||
|
178 | In [9]: plot_two_digit_freqs(freqs) | |||
|
179 | Out[9]: <matplotlib.image.AxesImage object at 0x18beb110> | |||
|
180 | ||||
|
181 | In [10]: plt.title('2 digit counts of 150m digits of pi') | |||
|
182 | Out[10]: <matplotlib.text.Text object at 0x18d1f9b0> | |||
|
183 | ||||
|
184 | The resulting plot generated by Matplotlib is shown below. The colors indicate | |||
|
185 | which two digit sequences are more (red) or less (blue) likely to occur in the | |||
|
186 | first 150 million digits of pi. We clearly see that the sequence "41" is | |||
|
187 | most likely and that "06" and "07" are least likely. Further analysis would | |||
|
188 | show that the relative size of the statistical fluctuations have decreased | |||
|
189 | compared to the 10,000 digit calculation. | |||
|
190 | ||||
|
191 | .. image:: two_digit_counts.* | |||
|
192 | ||||
|
193 | To conclude this example, we summarize the key features of IPython's parallel | |||
|
194 | architecture that this example demonstrates: | |||
|
195 | ||||
|
196 | * Serial code can be parallelized often with only a few extra lines of code. | |||
|
197 | In this case we have used :meth:`MultiEngineClient.map`; the | |||
|
198 | :class:`MultiEngineClient` class has a number of other methods that provide | |||
|
199 | more fine grained control of the IPython cluster. | |||
|
200 | * The resulting parallel code can be run without ever leaving the IPython's | |||
|
201 | interactive shell. | |||
|
202 | * Any data computed in parallel can be explored interactively through | |||
|
203 | visualization or further numerical calculations. | |||
|
204 | ||||
|
205 | ||||
|
206 | Parallel options pricing | |||
|
207 | ======================== | |||
|
208 | ||||
|
209 | An option is a financial contract that gives the buyer of the contract the | |||
|
210 | right to buy (a "call") or sell (a "put") a secondary asset (a stock for | |||
|
211 | example) at a particular date in the future (the expiration date) for a | |||
|
212 | pre-agreed upon price (the strike price). For this right, the buyer pays the | |||
|
213 | seller a premium (the option price). There are a wide variety of flavors of | |||
|
214 | options (American, European, Asian, etc.) that are useful for different | |||
|
215 | purposes: hedging against risk, speculation, etc. | |||
|
216 | ||||
|
217 | Much of modern finance is driven by the need to price these contracts | |||
|
218 | accurately based on what is known about the properties (such as volatility) of | |||
|
219 | the underlying asset. One method of pricing options is to use a Monte Carlo | |||
|
220 | simulation of the underlying assets. In this example we use this approach to | |||
|
221 | price both European and Asian (path dependent) options for various strike | |||
|
222 | prices and volatilities. | |||
|
223 | ||||
|
224 | The code for this example can be found in the :file:`docs/examples/kernel` | |||
|
225 | directory of the IPython source. | |||
|
226 | ||||
|
227 | The function :func:`price_options`, calculates the option prices for a single | |||
|
228 | option (:file:`mcpricer.py`): | |||
|
229 | ||||
|
230 | .. literalinclude:: ../../examples/kernel/mcpricer.py | |||
|
231 | :language: python | |||
|
232 | ||||
|
233 | To run this code in parallel, we will use IPython's :class:`TaskClient`, which | |||
|
234 | distributes work to the engines using dynamic load balancing. This client | |||
|
235 | can be used along side the :class:`MultiEngineClient` shown in the previous | |||
|
236 | example. | |||
|
237 | ||||
|
238 | Here is the code that calls :func:`price_options` for a number of different | |||
|
239 | volatilities and strike prices in parallel: | |||
|
240 | ||||
|
241 | .. literalinclude:: ../../examples/kernel/mcdriver.py | |||
|
242 | :language: python | |||
|
243 | ||||
|
244 | To run this code in parallel, start an IPython cluster using | |||
|
245 | :command:`ipcluster`, open IPython in the pylab mode with the file | |||
|
246 | :file:`mcdriver.py` in your current working directory and then type: | |||
|
247 | ||||
|
248 | .. sourcecode:: ipython | |||
|
249 | ||||
|
250 | In [7]: run mcdriver.py | |||
|
251 | Submitted tasks: [0, 1, 2, ...] | |||
|
252 | ||||
|
253 | Once all the tasks have finished, the results can be plotted using the | |||
|
254 | :func:`plot_options` function. Here we make contour plots of the Asian | |||
|
255 | call and Asian put as function of the volatility and strike price: | |||
|
256 | ||||
|
257 | .. sourcecode:: ipython | |||
|
258 | ||||
|
259 | In [8]: plot_options(sigma_vals, K_vals, prices['acall']) | |||
|
260 | ||||
|
261 | In [9]: plt.figure() | |||
|
262 | Out[9]: <matplotlib.figure.Figure object at 0x18c178d0> | |||
|
263 | ||||
|
264 | In [10]: plot_options(sigma_vals, K_vals, prices['aput']) | |||
|
265 | ||||
|
266 | The plots generated by Matplotlib will look like this: | |||
|
267 | ||||
|
268 | .. image:: asian_call.* | |||
126 |
|
269 | |||
|
270 | .. image:: asian_put.* |
@@ -5,18 +5,38 b' Getting started' | |||||
5 | Introduction |
|
5 | Introduction | |
6 | ============ |
|
6 | ============ | |
7 |
|
7 | |||
8 | IPython is an open source project focused on interactive and exploratory |
|
8 | The Python programming language is increasingly popular language for numerical | |
9 | computing in the Python programming language. It consists of two |
|
9 | computing. This is due to a unique combination of factors. First, Python is a | |
10 | main componenents: |
|
10 | high-level and *interactive* language that is well matched for interactive | |
|
11 | numerical work. Second, it is easy (often times trivial) to integrate legacy | |||
|
12 | C/C++/Fortran code into Python. Third, a large number of high-quality open | |||
|
13 | source projects provide all the needed building blocks for numerical | |||
|
14 | computing: numerical arrays (NumPy), algorithms (SciPy), 2D/3D Visualization | |||
|
15 | (Matplotlib, Mayavi, Chaco), Symbolic Mathematics (Sage, Sympy) and others. | |||
|
16 | ||||
|
17 | The IPython project is a core part of this open-source toolchain and is | |||
|
18 | focused on creating a comprehensive environment for interactive and | |||
|
19 | exploratory computing in the Python programming language. It enables all of | |||
|
20 | the above tools to be used interactively and consists of two main components: | |||
11 |
|
21 | |||
12 | * An enhanced interactive Python shell with support for interactive plotting |
|
22 | * An enhanced interactive Python shell with support for interactive plotting | |
13 | and visualization. |
|
23 | and visualization. | |
14 | * An architecture for interactive parallel computing. |
|
24 | * An architecture for interactive parallel computing. | |
15 |
|
25 | |||
16 | With these components, it is possible to perform all aspects of a parallel |
|
26 | With these components, it is possible to perform all aspects of a parallel | |
17 | computation interactively. This document describes how to get started with |
|
27 | computation interactively. This type of workflow is particularly relevant in | |
18 | IPython on Window HPC Server 2008. A more complete desription of IPython's |
|
28 | scientific and numerical computing where algorithms, code and data are | |
19 | parallel computing capabilities can be found in IPython's online documentation |
|
29 | continually evolving as the user/developer explores a problem. The broad | |
|
30 | treads in computing (commodity clusters, multicore, cloud computing, etc.) | |||
|
31 | make these capabilities of IPython particularly relevant. | |||
|
32 | ||||
|
33 | While IPython is a cross platform tool, it has particularly strong support for | |||
|
34 | Windows based compute clusters running Windows HPC Server 2008. This document | |||
|
35 | describes how to get started with IPython on Windows HPC Server 2008. The | |||
|
36 | content and emphasis here is practical: installing IPython, configuring | |||
|
37 | IPython to use the Windows job scheduler and running example parallel programs | |||
|
38 | interactively. A more complete description of IPython's parallel computing | |||
|
39 | capabilities can be found in IPython's online documentation | |||
20 | (http://ipython.scipy.org/moin/Documentation). |
|
40 | (http://ipython.scipy.org/moin/Documentation). | |
21 |
|
41 | |||
22 | Setting up your Windows cluster |
|
42 | Setting up your Windows cluster | |
@@ -38,7 +58,7 b' such a cluster:' | |||||
38 |
|
58 | |||
39 | More details about installing and configuring Windows HPC Server 2008 can be |
|
59 | More details about installing and configuring Windows HPC Server 2008 can be | |
40 | found on the Windows HPC Home Page (http://www.microsoft.com/hpc). Regardless |
|
60 | found on the Windows HPC Home Page (http://www.microsoft.com/hpc). Regardless | |
41 |
of what steps you |
|
61 | of what steps you follow to set up your cluster, the remainder of this | |
42 | document will assume that: |
|
62 | document will assume that: | |
43 |
|
63 | |||
44 | * There are domain users that can log on to the AD domain and submit jobs |
|
64 | * There are domain users that can log on to the AD domain and submit jobs | |
@@ -63,8 +83,8 b' IPython on Windows:' | |||||
63 | * pyOpenSSL (https://launchpad.net/pyopenssl) |
|
83 | * pyOpenSSL (https://launchpad.net/pyopenssl) | |
64 | * IPython (http://ipython.scipy.org) |
|
84 | * IPython (http://ipython.scipy.org) | |
65 |
|
85 | |||
66 | In addition, the following dependencies are needed to run the demos |
|
86 | In addition, the following dependencies are needed to run the demos described | |
67 |
|
|
87 | in this document. | |
68 |
|
88 | |||
69 | * NumPy and SciPy (http://www.scipy.org) |
|
89 | * NumPy and SciPy (http://www.scipy.org) | |
70 | * wxPython (http://www.wxpython.org) |
|
90 | * wxPython (http://www.wxpython.org) | |
@@ -94,7 +114,7 b' need to follow:' | |||||
94 | Further details about installing IPython or its dependencies can be found in |
|
114 | Further details about installing IPython or its dependencies can be found in | |
95 | the online IPython documentation (http://ipython.scipy.org/moin/Documentation) |
|
115 | the online IPython documentation (http://ipython.scipy.org/moin/Documentation) | |
96 | Once you are finished with the installation, you can try IPython out by |
|
116 | Once you are finished with the installation, you can try IPython out by | |
97 |
opening a Windows Command Prompt and typing |
|
117 | opening a Windows Command Prompt and typing ``ipython``. This will | |
98 | start IPython's interactive shell and you should see something like the |
|
118 | start IPython's interactive shell and you should see something like the | |
99 | following screenshot: |
|
119 | following screenshot: | |
100 |
|
120 | |||
@@ -114,14 +134,15 b' IPython controller' | |||||
114 | command. |
|
134 | command. | |
115 |
|
135 | |||
116 | IPython engine |
|
136 | IPython engine | |
117 |
IPython engines run |
|
137 | IPython engines run a user's Python code in parallel on the compute nodes. | |
118 | Engines are starting using the :command:`ipengine` command. |
|
138 | Engines are starting using the :command:`ipengine` command. | |
119 |
|
139 | |||
120 | Once these processes are started, a user can run Python code interactively and |
|
140 | Once these processes are started, a user can run Python code interactively and | |
121 |
in parallel on the engines from within the IPython shell |
|
141 | in parallel on the engines from within the IPython shell using an appropriate | |
122 |
ability to interact with, plot and visualize data |
|
142 | client. This includes the ability to interact with, plot and visualize data | |
|
143 | from the engines. | |||
123 |
|
144 | |||
124 |
IPython has a command line program called :command:`ipcluster` that |
|
145 | IPython has a command line program called :command:`ipcluster` that automates | |
125 | all aspects of starting the controller and engines on the compute nodes. |
|
146 | all aspects of starting the controller and engines on the compute nodes. | |
126 | :command:`ipcluster` has full support for the Windows HPC job scheduler, |
|
147 | :command:`ipcluster` has full support for the Windows HPC job scheduler, | |
127 | meaning that :command:`ipcluster` can use this job scheduler to start the |
|
148 | meaning that :command:`ipcluster` can use this job scheduler to start the | |
@@ -142,38 +163,44 b' Command Prompt and type the following command::' | |||||
142 | ipcluster start -n 2 |
|
163 | ipcluster start -n 2 | |
143 |
|
164 | |||
144 | You should see a number of messages printed to the screen, ending with |
|
165 | You should see a number of messages printed to the screen, ending with | |
145 |
"IPython cluster: started". |
|
166 | "IPython cluster: started". The result should look something like the following | |
146 |
|
167 | screenshot: | ||
147 |
|
168 | |||
148 | .. image:: ipcluster_start.* |
|
169 | .. image:: ipcluster_start.* | |
149 |
|
170 | |||
150 | At this point, the controller and two engines are running on your local host. |
|
171 | At this point, the controller and two engines are running on your local host. | |
151 | This configuration is useful for testing and for situations where you |
|
172 | This configuration is useful for testing and for situations where you want to | |
152 |
|
|
173 | take advantage of multiple cores on your local computer. | |
153 |
|
174 | |||
154 | Now that we have confirmed that :command:`ipcluster` is working properly, we |
|
175 | Now that we have confirmed that :command:`ipcluster` is working properly, we | |
155 |
describe how to configure and run an IPython cluster on an actual c |
|
176 | describe how to configure and run an IPython cluster on an actual compute | |
156 |
running Windows HPC Server 2008. Here is an outline of the needed |
|
177 | cluster running Windows HPC Server 2008. Here is an outline of the needed | |
|
178 | steps: | |||
157 |
|
179 | |||
158 |
1. Create a cluster profile: |
|
180 | 1. Create a cluster profile using: ``ipcluster create -p mycluster`` | |
159 |
|
181 | |||
160 |
2. Edit confguration files in :file:`.ipython\\cluster_mycluster` |
|
182 | 2. Edit configuration files in the directory :file:`.ipython\\cluster_mycluster` | |
161 |
|
183 | |||
162 |
3. Start the cluster: |
|
184 | 3. Start the cluster using: ``ipcluser start -p mycluster -n 32`` | |
163 |
|
185 | |||
164 | Creating a cluster profile |
|
186 | Creating a cluster profile | |
165 | -------------------------- |
|
187 | -------------------------- | |
166 |
|
188 | |||
167 |
In most cases, you will have to create a |
|
189 | In most cases, you will have to create a cluster profile to use IPython on a | |
168 | IPython on a cluster. A cluster profile is a specially named directory |
|
190 | cluster. A cluster profile is a name (like "mycluster") that is associated | |
169 | (typically located in the :file:`.ipython` subdirectory of your home |
|
191 | with a particular cluster configuration. The profile name is used by | |
170 | directory) that contains the configuration files for a particular IPython |
|
192 | :command:`ipcluster` when working with the cluster. | |
171 | cluster, as well as log files and security keys. The naming convention |
|
193 | ||
172 | for cluster directories is: "cluster_<profile name>". Thus, the cluster |
|
194 | Associated with each cluster profile is a cluster directory. This cluster | |
173 | directory for a profile named "foo" would be :file:`.ipython\\cluster_foo`. |
|
195 | directory is a specially named directory (typically located in the | |
|
196 | :file:`.ipython` subdirectory of your home directory) that contains the | |||
|
197 | configuration files for a particular cluster profile, as well as log files and | |||
|
198 | security keys. The naming convention for cluster directories is: | |||
|
199 | :file:`cluster_<profile name>`. Thus, the cluster directory for a profile named | |||
|
200 | "foo" would be :file:`.ipython\\cluster_foo`. | |||
174 |
|
201 | |||
175 |
To create a new cluster profile (named "mycluster") |
|
202 | To create a new cluster profile (named "mycluster") and the associated cluster | |
176 | command at the Windows Command Prompt:: |
|
203 | directory, type the following command at the Windows Command Prompt:: | |
177 |
|
204 | |||
178 | ipcluster create -p mycluster |
|
205 | ipcluster create -p mycluster | |
179 |
|
206 | |||
@@ -181,10 +208,8 b' The output of this command is shown in the screenshot below. Notice how' | |||||
181 | :command:`ipcluster` prints out the location of the newly created cluster |
|
208 | :command:`ipcluster` prints out the location of the newly created cluster | |
182 | directory. |
|
209 | directory. | |
183 |
|
210 | |||
184 |
|
||||
185 | .. image:: ipcluster_create.* |
|
211 | .. image:: ipcluster_create.* | |
186 |
|
212 | |||
187 |
|
||||
188 | Configuring a cluster profile |
|
213 | Configuring a cluster profile | |
189 | ----------------------------- |
|
214 | ----------------------------- | |
190 |
|
215 | |||
@@ -221,18 +246,19 b' in most cases these will be sufficient to get you started.' | |||||
221 | If any of your configuration attributes involve specifying the location |
|
246 | If any of your configuration attributes involve specifying the location | |
222 | of shared directories or files, you must make sure that you use UNC paths |
|
247 | of shared directories or files, you must make sure that you use UNC paths | |
223 | like :file:`\\\\host\\share`. It is also important that you specify |
|
248 | like :file:`\\\\host\\share`. It is also important that you specify | |
224 |
these paths using raw Python strings: ``r'\\host\share'`` |
|
249 | these paths using raw Python strings: ``r'\\host\share'`` to make sure | |
|
250 | that the backslashes are properly escaped. | |||
225 |
|
251 | |||
226 | Starting the cluster profile |
|
252 | Starting the cluster profile | |
227 | ---------------------------- |
|
253 | ---------------------------- | |
228 |
|
254 | |||
229 | Once a cluster profile has been configured, starting an IPython cluster using |
|
255 | Once a cluster profile has been configured, starting an IPython cluster using | |
230 | the profile is simple: |
|
256 | the profile is simple:: | |
231 |
|
257 | |||
232 | ipcluster start -p mycluster -n 32 |
|
258 | ipcluster start -p mycluster -n 32 | |
233 |
|
259 | |||
234 |
The ``-n |
|
260 | The ``-n`` option tells :command:`ipcluster` how many engines to start (in | |
235 |
Stopping the cluster is as simple as typing Control-C. |
|
261 | this case 32). Stopping the cluster is as simple as typing Control-C. | |
236 |
|
262 | |||
237 | Using the HPC Job Manager |
|
263 | Using the HPC Job Manager | |
238 | ------------------------- |
|
264 | ------------------------- | |
@@ -247,32 +273,56 b' Once these files have been created, they can be imported into the HPC Job' | |||||
247 | Manager application. Then, the controller and engines for that profile can be |
|
273 | Manager application. Then, the controller and engines for that profile can be | |
248 | started using the HPC Job Manager directly, without using :command:`ipcluster`. |
|
274 | started using the HPC Job Manager directly, without using :command:`ipcluster`. | |
249 | However, anytime the cluster profile is re-configured, ``ipcluster start`` |
|
275 | However, anytime the cluster profile is re-configured, ``ipcluster start`` | |
250 |
|
|
276 | must be run again to regenerate the XML job description files. The | |
251 | following screenshot shows what the HPC Job Manager interface looks like |
|
277 | following screenshot shows what the HPC Job Manager interface looks like | |
252 | with a running IPython cluster. |
|
278 | with a running IPython cluster. | |
253 |
|
279 | |||
254 |
|
||||
255 | .. image:: hpc_job_manager.* |
|
280 | .. image:: hpc_job_manager.* | |
256 |
|
281 | |||
257 | Performing a simple interactive parallel computation |
|
282 | Performing a simple interactive parallel computation | |
258 | ==================================================== |
|
283 | ==================================================== | |
259 |
|
284 | |||
260 | Once you have started your IPython cluster, you can start to use it. To do |
|
285 | Once you have started your IPython cluster, you can start to use it. To do | |
261 | this, start up IPython's interactive shell by typing:: |
|
286 | this, open up a new Windows Command Prompt and start up IPython's interactive | |
|
287 | shell by typing:: | |||
262 |
|
288 | |||
263 | ipython |
|
289 | ipython | |
264 |
|
290 | |||
265 |
|
|
291 | Then you can create a :class:`MultiEngineClient` instance for your profile and | |
266 | instance for your profile and use the resulting instance to |
|
292 | use the resulting instance to do a simple interactive parallel computation. In | |
267 | have the cluster do a simple interactive parallel computation. In the |
|
293 | the code and screenshot that follows, we take a simple Python function and | |
268 | screenshot that follows, we take a simple Python function:: |
|
294 | apply it to each element of an array of integers in parallel using the | |
269 |
|
295 | :meth:`MultiEngineClient.map` method: | ||
270 | def f(x): return x**10 |
|
296 | ||
271 |
|
297 | .. sourcecode:: ipython | ||
272 | and apply it to each element of an array of integers in |
|
298 | ||
273 | parallel using the :meth:`MultiEngineClient.map` method:: |
|
299 | In [1]: from IPython.kernel.client import * | |
274 |
|
300 | |||
275 | mec.map(f, range(15)) |
|
301 | In [2]: mec = MultiEngineClient(profile='mycluster') | |
|
302 | ||||
|
303 | In [3]: mec.get_ids() | |||
|
304 | Out[3]: [0, 1, 2, 3, 4, 5, 67, 8, 9, 10, 11, 12, 13, 14] | |||
|
305 | ||||
|
306 | In [4]: def f(x): | |||
|
307 | ...: return x**10 | |||
|
308 | ||||
|
309 | In [5]: mec.map(f, range(15)) # f is applied in parallel | |||
|
310 | Out[5]: | |||
|
311 | [0, | |||
|
312 | 1, | |||
|
313 | 1024, | |||
|
314 | 59049, | |||
|
315 | 1048576, | |||
|
316 | 9765625, | |||
|
317 | 60466176, | |||
|
318 | 282475249, | |||
|
319 | 1073741824, | |||
|
320 | 3486784401L, | |||
|
321 | 10000000000L, | |||
|
322 | 25937424601L, | |||
|
323 | 61917364224L, | |||
|
324 | 137858491849L, | |||
|
325 | 289254654976L] | |||
276 |
|
326 | |||
277 | The :meth:`map` method has the same signature as Python's builtin :func:`map` |
|
327 | The :meth:`map` method has the same signature as Python's builtin :func:`map` | |
278 | function, but runs the calculation in parallel. More involved examples of using |
|
328 | function, but runs the calculation in parallel. More involved examples of using |
General Comments 0
You need to be logged in to leave comments.
Login now