##// END OF EJS Templates
Work in the documentation.
Brian Granger -
Show More
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
@@ -51,19 +51,22 b' def inputhook_wx1():'
51 This approach seems to work, but its performance is not great as it
51 This approach seems to work, but its performance is not great as it
52 relies on having PyOS_InputHook called regularly.
52 relies on having PyOS_InputHook called regularly.
53 """
53 """
54 app = wx.GetApp()
54 try:
55 if app is not None:
55 app = wx.GetApp()
56 assert wx.Thread_IsMain()
56 if app is not None:
57
57 assert wx.Thread_IsMain()
58 # Make a temporary event loop and process system events until
58
59 # there are no more waiting, then allow idle events (which
59 # Make a temporary event loop and process system events until
60 # will also deal with pending or posted wx events.)
60 # there are no more waiting, then allow idle events (which
61 evtloop = wx.EventLoop()
61 # will also deal with pending or posted wx events.)
62 ea = wx.EventLoopActivator(evtloop)
62 evtloop = wx.EventLoop()
63 while evtloop.Pending():
63 ea = wx.EventLoopActivator(evtloop)
64 evtloop.Dispatch()
64 while evtloop.Pending():
65 app.ProcessIdle()
65 evtloop.Dispatch()
66 del ea
66 app.ProcessIdle()
67 del ea
68 except KeyboardInterrupt:
69 pass
67 return 0
70 return 0
68
71
69 class EventLoopTimer(wx.Timer):
72 class EventLoopTimer(wx.Timer):
@@ -102,13 +105,16 b' def inputhook_wx2():'
102 but eventually performance would suffer from calling select/kbhit too
105 but eventually performance would suffer from calling select/kbhit too
103 often.
106 often.
104 """
107 """
105 app = wx.GetApp()
108 try:
106 if app is not None:
109 app = wx.GetApp()
107 assert wx.Thread_IsMain()
110 if app is not None:
108 elr = EventLoopRunner()
111 assert wx.Thread_IsMain()
109 # As this time is made shorter, keyboard response improves, but idle
112 elr = EventLoopRunner()
110 # CPU load goes up. 10 ms seems like a good compromise.
113 # As this time is made shorter, keyboard response improves, but idle
111 elr.Run(time=10) # CHANGE time here to control polling interval
114 # CPU load goes up. 10 ms seems like a good compromise.
115 elr.Run(time=10) # CHANGE time here to control polling interval
116 except KeyboardInterrupt:
117 pass
112 return 0
118 return 0
113
119
114 def inputhook_wx3():
120 def inputhook_wx3():
@@ -119,49 +125,54 b' def inputhook_wx3():'
119 time.sleep is inserted. This is needed, otherwise, CPU usage is at 100%.
125 time.sleep is inserted. This is needed, otherwise, CPU usage is at 100%.
120 This sleep time should be tuned though for best performance.
126 This sleep time should be tuned though for best performance.
121 """
127 """
122 app = wx.GetApp()
128 # We need to protect against a user pressing Control-C when IPython is
123 if app is not None:
129 # idle and this is running. We trap KeyboardInterrupt and pass.
124 assert wx.Thread_IsMain()
130 try:
125
131 app = wx.GetApp()
126 # The import of wx on Linux sets the handler for signal.SIGINT
132 if app is not None:
127 # to 0. This is a bug in wx or gtk. We fix by just setting it
133 assert wx.Thread_IsMain()
128 # back to the Python default.
134
129 if not callable(signal.getsignal(signal.SIGINT)):
135 # The import of wx on Linux sets the handler for signal.SIGINT
130 signal.signal(signal.SIGINT, signal.default_int_handler)
136 # to 0. This is a bug in wx or gtk. We fix by just setting it
131
137 # back to the Python default.
132 evtloop = wx.EventLoop()
138 if not callable(signal.getsignal(signal.SIGINT)):
133 ea = wx.EventLoopActivator(evtloop)
139 signal.signal(signal.SIGINT, signal.default_int_handler)
134 t = clock()
140
135 while not stdin_ready():
141 evtloop = wx.EventLoop()
136 while evtloop.Pending():
142 ea = wx.EventLoopActivator(evtloop)
137 t = clock()
143 t = clock()
138 evtloop.Dispatch()
144 while not stdin_ready():
139 app.ProcessIdle()
145 while evtloop.Pending():
140 # We need to sleep at this point to keep the idle CPU load
146 t = clock()
141 # low. However, if sleep to long, GUI response is poor. As
147 evtloop.Dispatch()
142 # a compromise, we watch how often GUI events are being processed
148 app.ProcessIdle()
143 # and switch between a short and long sleep time. Here are some
149 # We need to sleep at this point to keep the idle CPU load
144 # stats useful in helping to tune this.
150 # low. However, if sleep to long, GUI response is poor. As
145 # time CPU load
151 # a compromise, we watch how often GUI events are being processed
146 # 0.001 13%
152 # and switch between a short and long sleep time. Here are some
147 # 0.005 3%
153 # stats useful in helping to tune this.
148 # 0.01 1.5%
154 # time CPU load
149 # 0.05 0.5%
155 # 0.001 13%
150 used_time = clock() - t
156 # 0.005 3%
151 if used_time > 5*60.0:
157 # 0.01 1.5%
152 # print 'Sleep for 5 s' # dbg
158 # 0.05 0.5%
153 time.sleep(5.0)
159 used_time = clock() - t
154 elif used_time > 10.0:
160 if used_time > 5*60.0:
155 # print 'Sleep for 1 s' # dbg
161 # print 'Sleep for 5 s' # dbg
156 time.sleep(1.0)
162 time.sleep(5.0)
157 elif used_time > 0.1:
163 elif used_time > 10.0:
158 # Few GUI events coming in, so we can sleep longer
164 # print 'Sleep for 1 s' # dbg
159 # print 'Sleep for 0.05 s' # dbg
165 time.sleep(1.0)
160 time.sleep(0.05)
166 elif used_time > 0.1:
161 else:
167 # Few GUI events coming in, so we can sleep longer
162 # Many GUI events coming in, so sleep only very little
168 # print 'Sleep for 0.05 s' # dbg
163 time.sleep(0.001)
169 time.sleep(0.05)
164 del ea
170 else:
171 # Many GUI events coming in, so sleep only very little
172 time.sleep(0.001)
173 del ea
174 except KeyboardInterrupt:
175 pass
165 return 0
176 return 0
166
177
167 # This is our default implementation
178 # This is our default implementation
@@ -1,25 +1,30 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 # encoding: utf-8
3 """Run a Monte-Carlo options pricer in parallel."""
2 """Run a Monte-Carlo options pricer in parallel."""
4
3
5 from IPython.kernel import client
4 from IPython.kernel import client
6 import numpy as np
5 import numpy as np
7 from mcpricer import price_options
6 from mcpricer import price_options
8
7
9
8 # The MultiEngineClient is used to setup the calculation and works with all
10 tc = client.TaskClient(profile='default')
9 # engine.
11 mec = client.MultiEngineClient(profile='default')
10 mec = client.MultiEngineClient(profile='default')
12
11
12 # The TaskClient is an interface to the engines that provides dynamic load
13 # balancing at the expense of not knowing which engine will execute the code.
14 tc = client.TaskClient(profile='default')
13
15
14 # Initialize the common code on the engines
16 # Initialize the common code on the engines. This Python module has the
17 # price_options function that prices the options.
15 mec.run('mcpricer.py')
18 mec.run('mcpricer.py')
16
19
17 # Define the function that will do the calculation
20 # Define the function that will make up our tasks. We basically want to
21 # call the price_options function with all but two arguments (K, sigma)
22 # fixed.
18 def my_prices(K, sigma):
23 def my_prices(K, sigma):
19 S = 100.0
24 S = 100.0
20 r = 0.05
25 r = 0.05
21 days = 260
26 days = 260
22 paths = 10000
27 paths = 100000
23 return price_options(S, K, sigma, r, days, paths)
28 return price_options(S, K, sigma, r, days, paths)
24
29
25 # Create arrays of strike prices and volatilities
30 # Create arrays of strike prices and volatilities
@@ -28,7 +33,9 b' nsigma = 5'
28 K_vals = np.linspace(90.0, 100.0, nK)
33 K_vals = np.linspace(90.0, 100.0, nK)
29 sigma_vals = np.linspace(0.0, 0.2, nsigma)
34 sigma_vals = np.linspace(0.0, 0.2, nsigma)
30
35
31 # Submit tasks
36 # Submit tasks to the TaskClient for each (K, sigma) pair as a MapTask.
37 # The MapTask simply applies a function (my_prices) to the arguments:
38 # my_prices(K, sigma) and returns the result.
32 taskids = []
39 taskids = []
33 for K in K_vals:
40 for K in K_vals:
34 for sigma in sigma_vals:
41 for sigma in sigma_vals:
@@ -37,24 +44,24 b' for K in K_vals:'
37
44
38 print "Submitted tasks: ", taskids
45 print "Submitted tasks: ", taskids
39
46
40 # Block until tasks are completed
47 # Block until all tasks are completed.
41 tc.barrier(taskids)
48 tc.barrier(taskids)
42
49
43 # Get the results
50 # Get the results using TaskClient.get_task_result.
44 results = [tc.get_task_result(tid) for tid in taskids]
51 results = [tc.get_task_result(tid) for tid in taskids]
45
52
46 # Assemble the result
53 # Assemble the result into a structured NumPy array.
47 prices = np.empty(nK*nsigma,
54 prices = np.empty(nK*nsigma,
48 dtype=[('vcall',float),('vput',float),('acall',float),('aput',float)]
55 dtype=[('ecall',float),('eput',float),('acall',float),('aput',float)]
49 )
56 )
50 for i, price_tuple in enumerate(results):
57 for i, price_tuple in enumerate(results):
51 prices[i] = price_tuple
58 prices[i] = price_tuple
52 prices.shape = (nK, nsigma)
59 prices.shape = (nK, nsigma)
60 K_vals, sigma_vals = np.meshgrid(K_vals, sigma_vals)
53
61
54
62 def plot_options(sigma_vals, K_vals, prices):
55 def plot_options(K_vals, sigma_vals, prices):
56 """
63 """
57 Make a contour plot of the option prices.
64 Make a contour plot of the option price in (sigma, K) space.
58 """
65 """
59 from matplotlib import pyplot as plt
66 from matplotlib import pyplot as plt
60 plt.contourf(sigma_vals, K_vals, prices)
67 plt.contourf(sigma_vals, K_vals, prices)
@@ -4,7 +4,26 b' from math import *'
4
4
5 def price_options(S=100.0, K=100.0, sigma=0.25, r=0.05, days=260, paths=10000):
5 def price_options(S=100.0, K=100.0, sigma=0.25, r=0.05, days=260, paths=10000):
6 """
6 """
7 Price vanilla and asian options using a Monte Carlo method.
7 Price European and Asian options using a Monte Carlo method.
8
9 Parameters
10 ----------
11 S : float
12 The initial price of the stock.
13 K : float
14 The strike price of the option.
15 sigma : float
16 The volatility of the stock.
17 r : float
18 The risk free interest rate.
19 days : int
20 The number of days until the option expires.
21 paths : int
22 The number of Monte Carlo paths used to price the option.
23
24 Returns
25 -------
26 A tuple of (E. call, E. put, A. call, A. put) option prices.
8 """
27 """
9 h = 1.0/days
28 h = 1.0/days
10 const1 = exp((r-0.5*sigma**2)*h)
29 const1 = exp((r-0.5*sigma**2)*h)
@@ -18,16 +37,9 b' def price_options(S=100.0, K=100.0, sigma=0.25, r=0.05, days=260, paths=10000):'
18 stock_price_avg = stock_price_sum/days
37 stock_price_avg = stock_price_sum/days
19 zeros = np.zeros(paths, dtype='float64')
38 zeros = np.zeros(paths, dtype='float64')
20 r_factor = exp(-r*h*days)
39 r_factor = exp(-r*h*days)
21 vanilla_put = r_factor*np.mean(np.maximum(zeros, K-stock_price))
40 euro_put = r_factor*np.mean(np.maximum(zeros, K-stock_price))
22 asian_put = r_factor*np.mean(np.maximum(zeros, K-stock_price_avg))
41 asian_put = r_factor*np.mean(np.maximum(zeros, K-stock_price_avg))
23 vanilla_call = r_factor*np.mean(np.maximum(zeros, stock_price-K))
42 euro_call = r_factor*np.mean(np.maximum(zeros, stock_price-K))
24 asian_call = r_factor*np.mean(np.maximum(zeros, stock_price_avg-K))
43 asian_call = r_factor*np.mean(np.maximum(zeros, stock_price_avg-K))
25 return (vanilla_call, vanilla_put, asian_call, asian_put)
44 return (euro_call, euro_put, asian_call, asian_put)
26
27
45
28 if __name__ == '__main__':
29 (vc, vp, ac, ap) = price_options()
30 print "Vanilla Put Price = ", vp
31 print "Asian Put Price = ", ap
32 print "Vanilla Call Price = ", vc
33 print "Asian Call Price = ", ac
@@ -25,15 +25,6 b" filestring = 'pi200m-ascii-%(i)02dof20.txt'"
25 files = [filestring % {'i':i} for i in range(1,16)]
25 files = [filestring % {'i':i} for i in range(1,16)]
26
26
27
27
28 # A function for reducing the frequencies calculated
29 # by different engines.
30 def reduce_freqs(freqlist):
31 allfreqs = np.zeros_like(freqlist[0])
32 for f in freqlist:
33 allfreqs += f
34 return allfreqs
35
36
37 # Connect to the IPython cluster
28 # Connect to the IPython cluster
38 mec = client.MultiEngineClient(profile='mycluster')
29 mec = client.MultiEngineClient(profile='mycluster')
39 mec.run('pidigits.py')
30 mec.run('pidigits.py')
@@ -42,9 +33,7 b" mec.run('pidigits.py')"
42 # Run 10m digits on 1 engine
33 # Run 10m digits on 1 engine
43 mapper = mec.mapper(targets=0)
34 mapper = mec.mapper(targets=0)
44 t1 = clock()
35 t1 = clock()
45
46 freqs10m = mapper.map(compute_two_digit_freqs, files[:1])[0]
36 freqs10m = mapper.map(compute_two_digit_freqs, files[:1])[0]
47
48 t2 = clock()
37 t2 = clock()
49 digits_per_second1 = 10.0e6/(t2-t1)
38 digits_per_second1 = 10.0e6/(t2-t1)
50 print "Digits per second (1 core, 10m digits): ", digits_per_second1
39 print "Digits per second (1 core, 10m digits): ", digits_per_second1
@@ -52,10 +41,8 b' print "Digits per second (1 core, 10m digits): ", digits_per_second1'
52
41
53 # Run 150m digits on 15 engines (8 cores)
42 # Run 150m digits on 15 engines (8 cores)
54 t1 = clock()
43 t1 = clock()
55
56 freqs_all = mec.map(compute_two_digit_freqs, files[:len(mec)])
44 freqs_all = mec.map(compute_two_digit_freqs, files[:len(mec)])
57 freqs150m = reduce_freqs(freqs_all)
45 freqs150m = reduce_freqs(freqs_all)
58
59 t2 = clock()
46 t2 = clock()
60 digits_per_second8 = 150.0e6/(t2-t1)
47 digits_per_second8 = 150.0e6/(t2-t1)
61 print "Digits per second (8 cores, 150m digits): ", digits_per_second8
48 print "Digits per second (8 cores, 150m digits): ", digits_per_second8
@@ -24,16 +24,34 b' from matplotlib import pyplot as plt'
24 # Top-level functions
24 # Top-level functions
25
25
26 def compute_one_digit_freqs(filename):
26 def compute_one_digit_freqs(filename):
27 """
28 Read digits of pi from a file and compute the 1 digit frequencies.
29 """
27 d = txt_file_to_digits(filename)
30 d = txt_file_to_digits(filename)
28 freqs = one_digit_freqs(d)
31 freqs = one_digit_freqs(d)
29 return freqs
32 return freqs
30
33
31 def compute_two_digit_freqs(filename):
34 def compute_two_digit_freqs(filename):
35 """
36 Read digits of pi from a file and compute the 2 digit frequencies.
37 """
32 d = txt_file_to_digits(filename)
38 d = txt_file_to_digits(filename)
33 freqs = two_digit_freqs(d)
39 freqs = two_digit_freqs(d)
34 return freqs
40 return freqs
35
41
42 def reduce_freqs(freqlist):
43 """
44 Add up a list of freq counts to get the total counts.
45 """
46 allfreqs = np.zeros_like(freqlist[0])
47 for f in freqlist:
48 allfreqs += f
49 return allfreqs
50
36 def compute_n_digit_freqs(filename, n):
51 def compute_n_digit_freqs(filename, n):
52 """
53 Read digits of pi from a file and compute the n digit frequencies.
54 """
37 d = txt_file_to_digits(filename)
55 d = txt_file_to_digits(filename)
38 freqs = n_digit_freqs(d, n)
56 freqs = n_digit_freqs(d, n)
39 return freqs
57 return freqs
1 NO CONTENT: modified file, binary diff hidden
NO CONTENT: modified file, binary diff hidden
1 NO CONTENT: modified file, binary diff hidden
NO CONTENT: modified file, binary diff hidden
1 NO CONTENT: modified file, binary diff hidden
NO CONTENT: modified file, binary diff hidden
1 NO CONTENT: modified file, binary diff hidden
NO CONTENT: modified file, binary diff hidden
1 NO CONTENT: modified file, binary diff hidden
NO CONTENT: modified file, binary diff hidden
1 NO CONTENT: modified file, binary diff hidden
NO CONTENT: modified file, binary diff hidden
1 NO CONTENT: modified file, binary diff hidden
NO CONTENT: modified file, binary diff hidden
1 NO CONTENT: modified file, binary diff hidden
NO CONTENT: modified file, binary diff hidden
@@ -2,125 +2,269 b''
2 Parallel examples
2 Parallel examples
3 =================
3 =================
4
4
5 In this section we describe a few more involved examples of using an IPython
5 In this section we describe two more involved examples of using an IPython
6 cluster to perform a parallel computation.
6 cluster to perform a parallel computation. In these examples, we will be using
7 IPython's "pylab" mode, which enables interactive plotting using the
8 Matplotlib package. IPython can be started in this mode by typing::
9
10 ipython -p pylab
11
12 at the system command line. If this prints an error message, you will
13 need to install the default profiles from within IPython by doing,
14
15 .. sourcecode:: ipython
16
17 In [1]: %install_profiles
18
19 and then restarting IPython.
7
20
8 150 million digits of pi
21 150 million digits of pi
9 ========================
22 ========================
10
23
11 In this example we would like to study the distribution of digits in the
24 In this example we would like to study the distribution of digits in the
12 number pi. More specifically, we are going to study how often each 2
25 number pi (in base 10). While it is not known if pi is a normal number (a
13 digits sequence occurs in the first 150 million digits of pi. If the digits
26 number is normal in base 10 if 0-9 occur with equal likelihood) numerical
14 0-9 occur with equal probability, we expect that each two digits sequence
27 investigations suggest that it is. We will begin with a serial calculation on
15 (00, 01, ..., 99) will occur 1% of the time.
28 10,000 digits of pi and then perform a parallel calculation involving 150
16
29 million digits.
17 This examples uses precomputed digits of pi from the website of Professor
30
18 Yasumasa Kanada at the University of Tokoyo (http://www.super-computing.org).
31 In both the serial and parallel calculation we will be using functions defined
19 These digits come in a set of ``.txt`` files
32 in the :file:`pidigits.py` file, which is available in the
20 (ftp://pi.super-computing.org/.2/pi200m/) that each have 10 million digits of
33 :file:`docs/examples/kernel` directory of the IPython source distribution.
21 pi. In the parallel computation, we will use the :meth:`MultiEngineClient.map`
34 These functions provide basic facilities for working with the digits of pi and
22 method to have each engine compute the desired statistics on a subset of these
35 can be loaded into IPython by putting :file:`pidigits.py` in your current
23 files. Before I started the parallel computation, I copied the data files
36 working directory and then doing:
24 to the compute nodes so the engine have fast access to them.
37
25
38 .. sourcecode:: ipython
26 Here are the Python functions for counting the frequencies of each two digit
39
27 sequence in serial::
40 In [1]: run pidigits.py
28
41
29 def compute_two_digit_freqs(filename):
42 Serial calculation
30 """
43 ------------------
31 Compute the two digit frequencies from a single file.
44
32 """
45 For the serial calculation, we will use SymPy (http://www.sympy.org) to
33 d = txt_file_to_digits(filename)
46 calculate 10,000 digits of pi and then look at the frequencies of the digits
34 freqs = two_digit_freqs(d)
47 0-9. Out of 10,000 digits, we expect each digit to occur 1,000 times. While
35 return freqs
48 SymPy is capable of calculating many more digits of pi, our purpose here is to
36
49 set the stage for the much larger parallel calculation.
37 def txt_file_to_digits(filename, the_type=str):
50
38 """
51 In this example, we use two functions from :file:`pidigits.py`:
39 Yield the digits of pi read from a .txt file.
52 :func:`one_digit_freqs` (which calculates how many times each digit occurs)
40 """
53 and :func:`plot_one_digit_freqs` (which uses Matplotlib to plot the result).
41 with open(filename, 'r') as f:
54 Here is an interactive IPython session that uses these functions with
42 for line in f.readlines():
55 SymPy:
43 for c in line:
56
44 if c != '\n' and c!= ' ':
57 .. sourcecode:: ipython
45 yield the_type(c)
58
46
59 In [7]: import sympy
47 def two_digit_freqs(digits, normalize=False):
60
48 """
61 In [8]: pi = sympy.pi.evalf(40)
49 Consume digits of pi and compute 2 digits freq. counts.
62
50 """
63 In [9]: pi
51 freqs = np.zeros(100, dtype='i4')
64 Out[9]: 3.141592653589793238462643383279502884197
52 last = digits.next()
65
53 this = digits.next()
66 In [10]: pi = sympy.pi.evalf(10000)
54 for d in digits:
67
55 index = int(last + this)
68 In [11]: digits = (d for d in str(pi)[2:]) # create a sequence of digits
56 freqs[index] += 1
69
57 last = this
70 In [12]: run pidigits.py # load one_digit_freqs/plot_one_digit_freqs
58 this = d
71
59 if normalize:
72 In [13]: freqs = one_digit_freqs(digits)
60 freqs = freqs/freqs.sum()
73
61 return freqs
74 In [14]: plot_one_digit_freqs(freqs)
62
75 Out[14]: [<matplotlib.lines.Line2D object at 0x18a55290>]
63 These functions are defined in the file :file:`pidigits.py`. To perform the
76
64 calculation in parallel, we use an additional file: :file:`parallelpi.py`::
77 The resulting plot of the single digit counts shows that each digit occurs
65
78 approximately 1,000 times, but that with only 10,000 digits the
66 from IPython.kernel import client
79 statistical fluctuations are still rather large:
67 from matplotlib import pyplot as plt
80
68 import numpy as np
81 .. image:: single_digits.*
69 from pidigits import *
82
70 from timeit import default_timer as clock
83 It is clear that to reduce the relative fluctuations in the counts, we need
71
84 to look at many more digits of pi. That brings us to the parallel calculation.
72 # Files with digits of pi (10m digits each)
85
73 filestring = 'pi200m-ascii-%(i)02dof20.txt'
86 Parallel calculation
74 files = [filestring % {'i':i} for i in range(1,16)]
87 --------------------
75
88
76 # A function for reducing the frequencies calculated
89 Calculating many digits of pi is a challenging computational problem in itself.
77 # by different engines.
90 Because we want to focus on the distribution of digits in this example, we
78 def reduce_freqs(freqlist):
91 will use pre-computed digit of pi from the website of Professor Yasumasa
79 allfreqs = np.zeros_like(freqlist[0])
92 Kanada at the University of Tokoyo (http://www.super-computing.org). These
80 for f in freqlist:
93 digits come in a set of text files (ftp://pi.super-computing.org/.2/pi200m/)
81 allfreqs += f
94 that each have 10 million digits of pi.
82 return allfreqs
95
83
96 For the parallel calculation, we have copied these files to the local hard
84 # Connect to the IPython cluster
97 drives of the compute nodes. A total of 15 of these files will be used, for a
85 mec = client.MultiEngineClient(profile='mycluster')
98 total of 150 million digits of pi. To make things a little more interesting we
86 mec.run('pidigits.py')
99 will calculate the frequencies of all 2 digits sequences (00-99) and then plot
87
100 the result using a 2D matrix in Matplotlib.
88 # Run 10m digits on 1 engine
101
89 mapper = mec.mapper(targets=0)
102 The overall idea of the calculation is simple: each IPython engine will
90 t1 = clock()
103 compute the two digit counts for the digits in a single file. Then in a final
91 freqs10m = mapper.map(compute_two_digit_freqs, files[:1])[0]
104 step the counts from each engine will be added up. To perform this
92 t2 = clock()
105 calculation, we will need two top-level functions from :file:`pidigits.py`:
93 digits_per_second1 = 10.0e6/(t2-t1)
106
94 print "Digits per second (1 core, 10m digits): ", digits_per_second1
107 .. literalinclude:: ../../examples/kernel/pidigits.py
95
108 :language: python
96 # Run 150m digits on 15 engines (8 cores)
109 :lines: 34-49
97 t1 = clock()
110
98 freqs_all = mec.map(compute_two_digit_freqs, files[:len(mec)])
111 We will also use the :func:`plot_two_digit_freqs` function to plot the
99 freqs150m = reduce_freqs(freqs_all)
112 results. The code to run this calculation in parallel is contained in
100 t2 = clock()
113 :file:`docs/examples/kernel/parallelpi.py`. This code can be run in parallel
101 digits_per_second8 = 150.0e6/(t2-t1)
114 using IPython by following these steps:
102 print "Digits per second (8 cores, 150m digits): ", digits_per_second8
115
103
116 1. Copy the text files with the digits of pi
104 print "Speedup: ", digits_per_second8/digits_per_second1
117 (ftp://pi.super-computing.org/.2/pi200m/) to the working directory of the
105
118 engines on the compute nodes.
106 plot_two_digit_freqs(freqs150m)
119 2. Use :command:`ipcluster` to start 15 engines. We used an 8 core cluster
107 plt.title("2 digit sequences in 150m digits of pi")
120 with hyperthreading enabled which makes the 8 cores looks like 16 (1
108
121 controller + 15 engines) in the OS. However, the maximum speedup we can
109 To run this code on an IPython cluster:
122 observe is still only 8x.
110
123 3. With the file :file:`parallelpi.py` in your current working directory, open
111 1. Start an IPython cluster with 15 engines: ``ipcluster start -p mycluster -n 15``
124 up IPython in pylab mode and type ``run parallelpi.py``.
112 2. Open IPython's interactive shell using the pylab profile
125
113 ``ipython -p pylab`` and type ``run parallelpi.py``.
126 When run on our 8 core cluster, we observe a speedup of 7.7x. This is slightly
114
127 less than linear scaling (8x) because the controller is also running on one of
115 At this point, the parallel calculation will begin. On a small an 8 core
128 the cores.
116 cluster, we observe a speedup of 7.7x. The resulting plot of the two digit
129
117 sequences is shown in the following screenshot.
130 To emphasize the interactive nature of IPython, we now show how the
118
131 calculation can also be run by simply typing the commands from
119 .. image:: parallel_pi.*
132 :file:`parallelpi.py` interactively into IPython:
120
133
121
134 .. sourcecode:: ipython
122 Parallel option pricing
135
123 =======================
136 In [1]: from IPython.kernel import client
124
137 2009-11-19 11:32:38-0800 [-] Log opened.
125 The example will be added at a later point.
138
139 # The MultiEngineClient allows us to use the engines interactively
140 In [2]: mec = client.MultiEngineClient(profile='mycluster')
141 2009-11-19 11:32:44-0800 [-] Connecting [0]
142 2009-11-19 11:32:44-0800 [Negotiation,client] Connected: ./ipcontroller-mec.furl
143
144 In [3]: mec.get_ids()
145 Out[3]: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
146
147 In [4]: run pidigits.py
148
149 In [5]: filestring = 'pi200m-ascii-%(i)02dof20.txt'
150
151 In [6]: files = [filestring % {'i':i} for i in range(1,16)]
152
153 In [7]: files
154 Out[7]:
155 ['pi200m-ascii-01of20.txt',
156 'pi200m-ascii-02of20.txt',
157 'pi200m-ascii-03of20.txt',
158 'pi200m-ascii-04of20.txt',
159 'pi200m-ascii-05of20.txt',
160 'pi200m-ascii-06of20.txt',
161 'pi200m-ascii-07of20.txt',
162 'pi200m-ascii-08of20.txt',
163 'pi200m-ascii-09of20.txt',
164 'pi200m-ascii-10of20.txt',
165 'pi200m-ascii-11of20.txt',
166 'pi200m-ascii-12of20.txt',
167 'pi200m-ascii-13of20.txt',
168 'pi200m-ascii-14of20.txt',
169 'pi200m-ascii-15of20.txt']
170
171 # This is the parallel calculation using the MultiEngineClient.map method
172 # which applies compute_two_digit_freqs to each file in files in parallel.
173 In [8]: freqs_all = mec.map(compute_two_digit_freqs, files)
174
175 # Add up the frequencies from each engine.
176 In [8]: freqs = reduce_freqs(freqs_all)
177
178 In [9]: plot_two_digit_freqs(freqs)
179 Out[9]: <matplotlib.image.AxesImage object at 0x18beb110>
180
181 In [10]: plt.title('2 digit counts of 150m digits of pi')
182 Out[10]: <matplotlib.text.Text object at 0x18d1f9b0>
183
184 The resulting plot generated by Matplotlib is shown below. The colors indicate
185 which two digit sequences are more (red) or less (blue) likely to occur in the
186 first 150 million digits of pi. We clearly see that the sequence "41" is
187 most likely and that "06" and "07" are least likely. Further analysis would
188 show that the relative size of the statistical fluctuations have decreased
189 compared to the 10,000 digit calculation.
190
191 .. image:: two_digit_counts.*
192
193 To conclude this example, we summarize the key features of IPython's parallel
194 architecture that this example demonstrates:
195
196 * Serial code can be parallelized often with only a few extra lines of code.
197 In this case we have used :meth:`MultiEngineClient.map`; the
198 :class:`MultiEngineClient` class has a number of other methods that provide
199 more fine grained control of the IPython cluster.
200 * The resulting parallel code can be run without ever leaving the IPython's
201 interactive shell.
202 * Any data computed in parallel can be explored interactively through
203 visualization or further numerical calculations.
204
205
206 Parallel options pricing
207 ========================
208
209 An option is a financial contract that gives the buyer of the contract the
210 right to buy (a "call") or sell (a "put") a secondary asset (a stock for
211 example) at a particular date in the future (the expiration date) for a
212 pre-agreed upon price (the strike price). For this right, the buyer pays the
213 seller a premium (the option price). There are a wide variety of flavors of
214 options (American, European, Asian, etc.) that are useful for different
215 purposes: hedging against risk, speculation, etc.
216
217 Much of modern finance is driven by the need to price these contracts
218 accurately based on what is known about the properties (such as volatility) of
219 the underlying asset. One method of pricing options is to use a Monte Carlo
220 simulation of the underlying assets. In this example we use this approach to
221 price both European and Asian (path dependent) options for various strike
222 prices and volatilities.
223
224 The code for this example can be found in the :file:`docs/examples/kernel`
225 directory of the IPython source.
226
227 The function :func:`price_options`, calculates the option prices for a single
228 option (:file:`mcpricer.py`):
229
230 .. literalinclude:: ../../examples/kernel/mcpricer.py
231 :language: python
232
233 To run this code in parallel, we will use IPython's :class:`TaskClient`, which
234 distributes work to the engines using dynamic load balancing. This client
235 can be used along side the :class:`MultiEngineClient` shown in the previous
236 example.
237
238 Here is the code that calls :func:`price_options` for a number of different
239 volatilities and strike prices in parallel:
240
241 .. literalinclude:: ../../examples/kernel/mcdriver.py
242 :language: python
243
244 To run this code in parallel, start an IPython cluster using
245 :command:`ipcluster`, open IPython in the pylab mode with the file
246 :file:`mcdriver.py` in your current working directory and then type:
247
248 .. sourcecode:: ipython
249
250 In [7]: run mcdriver.py
251 Submitted tasks: [0, 1, 2, ...]
252
253 Once all the tasks have finished, the results can be plotted using the
254 :func:`plot_options` function. Here we make contour plots of the Asian
255 call and Asian put as function of the volatility and strike price:
256
257 .. sourcecode:: ipython
258
259 In [8]: plot_options(sigma_vals, K_vals, prices['acall'])
260
261 In [9]: plt.figure()
262 Out[9]: <matplotlib.figure.Figure object at 0x18c178d0>
263
264 In [10]: plot_options(sigma_vals, K_vals, prices['aput'])
265
266 The plots generated by Matplotlib will look like this:
267
268 .. image:: asian_call.*
126
269
270 .. image:: asian_put.*
@@ -5,18 +5,38 b' Getting started'
5 Introduction
5 Introduction
6 ============
6 ============
7
7
8 IPython is an open source project focused on interactive and exploratory
8 The Python programming language is increasingly popular language for numerical
9 computing in the Python programming language. It consists of two
9 computing. This is due to a unique combination of factors. First, Python is a
10 main componenents:
10 high-level and *interactive* language that is well matched for interactive
11 numerical work. Second, it is easy (often times trivial) to integrate legacy
12 C/C++/Fortran code into Python. Third, a large number of high-quality open
13 source projects provide all the needed building blocks for numerical
14 computing: numerical arrays (NumPy), algorithms (SciPy), 2D/3D Visualization
15 (Matplotlib, Mayavi, Chaco), Symbolic Mathematics (Sage, Sympy) and others.
16
17 The IPython project is a core part of this open-source toolchain and is
18 focused on creating a comprehensive environment for interactive and
19 exploratory computing in the Python programming language. It enables all of
20 the above tools to be used interactively and consists of two main components:
11
21
12 * An enhanced interactive Python shell with support for interactive plotting
22 * An enhanced interactive Python shell with support for interactive plotting
13 and visualization.
23 and visualization.
14 * An architecture for interactive parallel computing.
24 * An architecture for interactive parallel computing.
15
25
16 With these components, it is possible to perform all aspects of a parallel
26 With these components, it is possible to perform all aspects of a parallel
17 computation interactively. This document describes how to get started with
27 computation interactively. This type of workflow is particularly relevant in
18 IPython on Window HPC Server 2008. A more complete desription of IPython's
28 scientific and numerical computing where algorithms, code and data are
19 parallel computing capabilities can be found in IPython's online documentation
29 continually evolving as the user/developer explores a problem. The broad
30 treads in computing (commodity clusters, multicore, cloud computing, etc.)
31 make these capabilities of IPython particularly relevant.
32
33 While IPython is a cross platform tool, it has particularly strong support for
34 Windows based compute clusters running Windows HPC Server 2008. This document
35 describes how to get started with IPython on Windows HPC Server 2008. The
36 content and emphasis here is practical: installing IPython, configuring
37 IPython to use the Windows job scheduler and running example parallel programs
38 interactively. A more complete description of IPython's parallel computing
39 capabilities can be found in IPython's online documentation
20 (http://ipython.scipy.org/moin/Documentation).
40 (http://ipython.scipy.org/moin/Documentation).
21
41
22 Setting up your Windows cluster
42 Setting up your Windows cluster
@@ -38,7 +58,7 b' such a cluster:'
38
58
39 More details about installing and configuring Windows HPC Server 2008 can be
59 More details about installing and configuring Windows HPC Server 2008 can be
40 found on the Windows HPC Home Page (http://www.microsoft.com/hpc). Regardless
60 found on the Windows HPC Home Page (http://www.microsoft.com/hpc). Regardless
41 of what steps you go through to set up your cluster, the remainder of this
61 of what steps you follow to set up your cluster, the remainder of this
42 document will assume that:
62 document will assume that:
43
63
44 * There are domain users that can log on to the AD domain and submit jobs
64 * There are domain users that can log on to the AD domain and submit jobs
@@ -63,8 +83,8 b' IPython on Windows:'
63 * pyOpenSSL (https://launchpad.net/pyopenssl)
83 * pyOpenSSL (https://launchpad.net/pyopenssl)
64 * IPython (http://ipython.scipy.org)
84 * IPython (http://ipython.scipy.org)
65
85
66 In addition, the following dependencies are needed to run the demos
86 In addition, the following dependencies are needed to run the demos described
67 described in this document.
87 in this document.
68
88
69 * NumPy and SciPy (http://www.scipy.org)
89 * NumPy and SciPy (http://www.scipy.org)
70 * wxPython (http://www.wxpython.org)
90 * wxPython (http://www.wxpython.org)
@@ -94,7 +114,7 b' need to follow:'
94 Further details about installing IPython or its dependencies can be found in
114 Further details about installing IPython or its dependencies can be found in
95 the online IPython documentation (http://ipython.scipy.org/moin/Documentation)
115 the online IPython documentation (http://ipython.scipy.org/moin/Documentation)
96 Once you are finished with the installation, you can try IPython out by
116 Once you are finished with the installation, you can try IPython out by
97 opening a Windows Command Prompt and typing :command:`ipython`. This will
117 opening a Windows Command Prompt and typing ``ipython``. This will
98 start IPython's interactive shell and you should see something like the
118 start IPython's interactive shell and you should see something like the
99 following screenshot:
119 following screenshot:
100
120
@@ -114,14 +134,15 b' IPython controller'
114 command.
134 command.
115
135
116 IPython engine
136 IPython engine
117 IPython engines run your Python code in parallel on the compute nodes.
137 IPython engines run a user's Python code in parallel on the compute nodes.
118 Engines are starting using the :command:`ipengine` command.
138 Engines are starting using the :command:`ipengine` command.
119
139
120 Once these processes are started, a user can run Python code interactively and
140 Once these processes are started, a user can run Python code interactively and
121 in parallel on the engines from within the IPython shell. This includes the
141 in parallel on the engines from within the IPython shell using an appropriate
122 ability to interact with, plot and visualize data from the engines.
142 client. This includes the ability to interact with, plot and visualize data
143 from the engines.
123
144
124 IPython has a command line program called :command:`ipcluster` that handles
145 IPython has a command line program called :command:`ipcluster` that automates
125 all aspects of starting the controller and engines on the compute nodes.
146 all aspects of starting the controller and engines on the compute nodes.
126 :command:`ipcluster` has full support for the Windows HPC job scheduler,
147 :command:`ipcluster` has full support for the Windows HPC job scheduler,
127 meaning that :command:`ipcluster` can use this job scheduler to start the
148 meaning that :command:`ipcluster` can use this job scheduler to start the
@@ -142,38 +163,44 b' Command Prompt and type the following command::'
142 ipcluster start -n 2
163 ipcluster start -n 2
143
164
144 You should see a number of messages printed to the screen, ending with
165 You should see a number of messages printed to the screen, ending with
145 "IPython cluster: started". A screenshot of this follows.
166 "IPython cluster: started". The result should look something like the following
146
167 screenshot:
147
168
148 .. image:: ipcluster_start.*
169 .. image:: ipcluster_start.*
149
170
150 At this point, the controller and two engines are running on your local host.
171 At this point, the controller and two engines are running on your local host.
151 This configuration is useful for testing and for situations where you
172 This configuration is useful for testing and for situations where you want to
152 have multiple cores on your local computer.
173 take advantage of multiple cores on your local computer.
153
174
154 Now that we have confirmed that :command:`ipcluster` is working properly, we
175 Now that we have confirmed that :command:`ipcluster` is working properly, we
155 describe how to configure and run an IPython cluster on an actual cluster
176 describe how to configure and run an IPython cluster on an actual compute
156 running Windows HPC Server 2008. Here is an outline of the needed steps:
177 cluster running Windows HPC Server 2008. Here is an outline of the needed
178 steps:
157
179
158 1. Create a cluster profile: ``ipcluster create -p mycluster``
180 1. Create a cluster profile using: ``ipcluster create -p mycluster``
159
181
160 2. Edit confguration files in :file:`.ipython\\cluster_mycluster`.
182 2. Edit configuration files in the directory :file:`.ipython\\cluster_mycluster`
161
183
162 3. Start the cluster: ``ipcluser start -p mycluster -n 32``
184 3. Start the cluster using: ``ipcluser start -p mycluster -n 32``
163
185
164 Creating a cluster profile
186 Creating a cluster profile
165 --------------------------
187 --------------------------
166
188
167 In most cases, you will have to create and configure a cluster profile to use
189 In most cases, you will have to create a cluster profile to use IPython on a
168 IPython on a cluster. A cluster profile is a specially named directory
190 cluster. A cluster profile is a name (like "mycluster") that is associated
169 (typically located in the :file:`.ipython` subdirectory of your home
191 with a particular cluster configuration. The profile name is used by
170 directory) that contains the configuration files for a particular IPython
192 :command:`ipcluster` when working with the cluster.
171 cluster, as well as log files and security keys. The naming convention
193
172 for cluster directories is: "cluster_<profile name>". Thus, the cluster
194 Associated with each cluster profile is a cluster directory. This cluster
173 directory for a profile named "foo" would be :file:`.ipython\\cluster_foo`.
195 directory is a specially named directory (typically located in the
196 :file:`.ipython` subdirectory of your home directory) that contains the
197 configuration files for a particular cluster profile, as well as log files and
198 security keys. The naming convention for cluster directories is:
199 :file:`cluster_<profile name>`. Thus, the cluster directory for a profile named
200 "foo" would be :file:`.ipython\\cluster_foo`.
174
201
175 To create a new cluster profile (named "mycluster"), type the following
202 To create a new cluster profile (named "mycluster") and the associated cluster
176 command at the Windows Command Prompt::
203 directory, type the following command at the Windows Command Prompt::
177
204
178 ipcluster create -p mycluster
205 ipcluster create -p mycluster
179
206
@@ -181,10 +208,8 b' The output of this command is shown in the screenshot below. Notice how'
181 :command:`ipcluster` prints out the location of the newly created cluster
208 :command:`ipcluster` prints out the location of the newly created cluster
182 directory.
209 directory.
183
210
184
185 .. image:: ipcluster_create.*
211 .. image:: ipcluster_create.*
186
212
187
188 Configuring a cluster profile
213 Configuring a cluster profile
189 -----------------------------
214 -----------------------------
190
215
@@ -221,18 +246,19 b' in most cases these will be sufficient to get you started.'
221 If any of your configuration attributes involve specifying the location
246 If any of your configuration attributes involve specifying the location
222 of shared directories or files, you must make sure that you use UNC paths
247 of shared directories or files, you must make sure that you use UNC paths
223 like :file:`\\\\host\\share`. It is also important that you specify
248 like :file:`\\\\host\\share`. It is also important that you specify
224 these paths using raw Python strings: ``r'\\host\share'``.
249 these paths using raw Python strings: ``r'\\host\share'`` to make sure
250 that the backslashes are properly escaped.
225
251
226 Starting the cluster profile
252 Starting the cluster profile
227 ----------------------------
253 ----------------------------
228
254
229 Once a cluster profile has been configured, starting an IPython cluster using
255 Once a cluster profile has been configured, starting an IPython cluster using
230 the profile is simple:
256 the profile is simple::
231
257
232 ipcluster start -p mycluster -n 32
258 ipcluster start -p mycluster -n 32
233
259
234 The ``-n 32`` option tells :command:`ipcluster` how many engines to start.
260 The ``-n`` option tells :command:`ipcluster` how many engines to start (in
235 Stopping the cluster is as simple as typing Control-C.
261 this case 32). Stopping the cluster is as simple as typing Control-C.
236
262
237 Using the HPC Job Manager
263 Using the HPC Job Manager
238 -------------------------
264 -------------------------
@@ -247,32 +273,56 b' Once these files have been created, they can be imported into the HPC Job'
247 Manager application. Then, the controller and engines for that profile can be
273 Manager application. Then, the controller and engines for that profile can be
248 started using the HPC Job Manager directly, without using :command:`ipcluster`.
274 started using the HPC Job Manager directly, without using :command:`ipcluster`.
249 However, anytime the cluster profile is re-configured, ``ipcluster start``
275 However, anytime the cluster profile is re-configured, ``ipcluster start``
250 has to be run again to regenerate the XML job description files. The
276 must be run again to regenerate the XML job description files. The
251 following screenshot shows what the HPC Job Manager interface looks like
277 following screenshot shows what the HPC Job Manager interface looks like
252 with a running IPython cluster.
278 with a running IPython cluster.
253
279
254
255 .. image:: hpc_job_manager.*
280 .. image:: hpc_job_manager.*
256
281
257 Performing a simple interactive parallel computation
282 Performing a simple interactive parallel computation
258 ====================================================
283 ====================================================
259
284
260 Once you have started your IPython cluster, you can start to use it. To do
285 Once you have started your IPython cluster, you can start to use it. To do
261 this, start up IPython's interactive shell by typing::
286 this, open up a new Windows Command Prompt and start up IPython's interactive
287 shell by typing::
262
288
263 ipython
289 ipython
264
290
265 at the Windows Command Prompt. Then you can create a :class:`MultiEngineClient`
291 Then you can create a :class:`MultiEngineClient` instance for your profile and
266 instance for your profile and use the resulting instance to
292 use the resulting instance to do a simple interactive parallel computation. In
267 have the cluster do a simple interactive parallel computation. In the
293 the code and screenshot that follows, we take a simple Python function and
268 screenshot that follows, we take a simple Python function::
294 apply it to each element of an array of integers in parallel using the
269
295 :meth:`MultiEngineClient.map` method:
270 def f(x): return x**10
296
271
297 .. sourcecode:: ipython
272 and apply it to each element of an array of integers in
298
273 parallel using the :meth:`MultiEngineClient.map` method::
299 In [1]: from IPython.kernel.client import *
274
300
275 mec.map(f, range(15))
301 In [2]: mec = MultiEngineClient(profile='mycluster')
302
303 In [3]: mec.get_ids()
304 Out[3]: [0, 1, 2, 3, 4, 5, 67, 8, 9, 10, 11, 12, 13, 14]
305
306 In [4]: def f(x):
307 ...: return x**10
308
309 In [5]: mec.map(f, range(15)) # f is applied in parallel
310 Out[5]:
311 [0,
312 1,
313 1024,
314 59049,
315 1048576,
316 9765625,
317 60466176,
318 282475249,
319 1073741824,
320 3486784401L,
321 10000000000L,
322 25937424601L,
323 61917364224L,
324 137858491849L,
325 289254654976L]
276
326
277 The :meth:`map` method has the same signature as Python's builtin :func:`map`
327 The :meth:`map` method has the same signature as Python's builtin :func:`map`
278 function, but runs the calculation in parallel. More involved examples of using
328 function, but runs the calculation in parallel. More involved examples of using
General Comments 0
You need to be logged in to leave comments. Login now