##// END OF EJS Templates
Work in the documentation.
Brian Granger -
Show More
1 NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
@@ -51,19 +51,22 b' def inputhook_wx1():'
51 51 This approach seems to work, but its performance is not great as it
52 52 relies on having PyOS_InputHook called regularly.
53 53 """
54 app = wx.GetApp()
55 if app is not None:
56 assert wx.Thread_IsMain()
57
58 # Make a temporary event loop and process system events until
59 # there are no more waiting, then allow idle events (which
60 # will also deal with pending or posted wx events.)
61 evtloop = wx.EventLoop()
62 ea = wx.EventLoopActivator(evtloop)
63 while evtloop.Pending():
64 evtloop.Dispatch()
65 app.ProcessIdle()
66 del ea
54 try:
55 app = wx.GetApp()
56 if app is not None:
57 assert wx.Thread_IsMain()
58
59 # Make a temporary event loop and process system events until
60 # there are no more waiting, then allow idle events (which
61 # will also deal with pending or posted wx events.)
62 evtloop = wx.EventLoop()
63 ea = wx.EventLoopActivator(evtloop)
64 while evtloop.Pending():
65 evtloop.Dispatch()
66 app.ProcessIdle()
67 del ea
68 except KeyboardInterrupt:
69 pass
67 70 return 0
68 71
69 72 class EventLoopTimer(wx.Timer):
@@ -102,13 +105,16 b' def inputhook_wx2():'
102 105 but eventually performance would suffer from calling select/kbhit too
103 106 often.
104 107 """
105 app = wx.GetApp()
106 if app is not None:
107 assert wx.Thread_IsMain()
108 elr = EventLoopRunner()
109 # As this time is made shorter, keyboard response improves, but idle
110 # CPU load goes up. 10 ms seems like a good compromise.
111 elr.Run(time=10) # CHANGE time here to control polling interval
108 try:
109 app = wx.GetApp()
110 if app is not None:
111 assert wx.Thread_IsMain()
112 elr = EventLoopRunner()
113 # As this time is made shorter, keyboard response improves, but idle
114 # CPU load goes up. 10 ms seems like a good compromise.
115 elr.Run(time=10) # CHANGE time here to control polling interval
116 except KeyboardInterrupt:
117 pass
112 118 return 0
113 119
114 120 def inputhook_wx3():
@@ -119,49 +125,54 b' def inputhook_wx3():'
119 125 time.sleep is inserted. This is needed, otherwise, CPU usage is at 100%.
120 126 This sleep time should be tuned though for best performance.
121 127 """
122 app = wx.GetApp()
123 if app is not None:
124 assert wx.Thread_IsMain()
125
126 # The import of wx on Linux sets the handler for signal.SIGINT
127 # to 0. This is a bug in wx or gtk. We fix by just setting it
128 # back to the Python default.
129 if not callable(signal.getsignal(signal.SIGINT)):
130 signal.signal(signal.SIGINT, signal.default_int_handler)
131
132 evtloop = wx.EventLoop()
133 ea = wx.EventLoopActivator(evtloop)
134 t = clock()
135 while not stdin_ready():
136 while evtloop.Pending():
137 t = clock()
138 evtloop.Dispatch()
139 app.ProcessIdle()
140 # We need to sleep at this point to keep the idle CPU load
141 # low. However, if sleep to long, GUI response is poor. As
142 # a compromise, we watch how often GUI events are being processed
143 # and switch between a short and long sleep time. Here are some
144 # stats useful in helping to tune this.
145 # time CPU load
146 # 0.001 13%
147 # 0.005 3%
148 # 0.01 1.5%
149 # 0.05 0.5%
150 used_time = clock() - t
151 if used_time > 5*60.0:
152 # print 'Sleep for 5 s' # dbg
153 time.sleep(5.0)
154 elif used_time > 10.0:
155 # print 'Sleep for 1 s' # dbg
156 time.sleep(1.0)
157 elif used_time > 0.1:
158 # Few GUI events coming in, so we can sleep longer
159 # print 'Sleep for 0.05 s' # dbg
160 time.sleep(0.05)
161 else:
162 # Many GUI events coming in, so sleep only very little
163 time.sleep(0.001)
164 del ea
128 # We need to protect against a user pressing Control-C when IPython is
129 # idle and this is running. We trap KeyboardInterrupt and pass.
130 try:
131 app = wx.GetApp()
132 if app is not None:
133 assert wx.Thread_IsMain()
134
135 # The import of wx on Linux sets the handler for signal.SIGINT
136 # to 0. This is a bug in wx or gtk. We fix by just setting it
137 # back to the Python default.
138 if not callable(signal.getsignal(signal.SIGINT)):
139 signal.signal(signal.SIGINT, signal.default_int_handler)
140
141 evtloop = wx.EventLoop()
142 ea = wx.EventLoopActivator(evtloop)
143 t = clock()
144 while not stdin_ready():
145 while evtloop.Pending():
146 t = clock()
147 evtloop.Dispatch()
148 app.ProcessIdle()
149 # We need to sleep at this point to keep the idle CPU load
150 # low. However, if sleep to long, GUI response is poor. As
151 # a compromise, we watch how often GUI events are being processed
152 # and switch between a short and long sleep time. Here are some
153 # stats useful in helping to tune this.
154 # time CPU load
155 # 0.001 13%
156 # 0.005 3%
157 # 0.01 1.5%
158 # 0.05 0.5%
159 used_time = clock() - t
160 if used_time > 5*60.0:
161 # print 'Sleep for 5 s' # dbg
162 time.sleep(5.0)
163 elif used_time > 10.0:
164 # print 'Sleep for 1 s' # dbg
165 time.sleep(1.0)
166 elif used_time > 0.1:
167 # Few GUI events coming in, so we can sleep longer
168 # print 'Sleep for 0.05 s' # dbg
169 time.sleep(0.05)
170 else:
171 # Many GUI events coming in, so sleep only very little
172 time.sleep(0.001)
173 del ea
174 except KeyboardInterrupt:
175 pass
165 176 return 0
166 177
167 178 # This is our default implementation
@@ -1,25 +1,30 b''
1 1 #!/usr/bin/env python
2 # encoding: utf-8
3 2 """Run a Monte-Carlo options pricer in parallel."""
4 3
5 4 from IPython.kernel import client
6 5 import numpy as np
7 6 from mcpricer import price_options
8 7
9
10 tc = client.TaskClient(profile='default')
8 # The MultiEngineClient is used to setup the calculation and works with all
9 # engine.
11 10 mec = client.MultiEngineClient(profile='default')
12 11
12 # The TaskClient is an interface to the engines that provides dynamic load
13 # balancing at the expense of not knowing which engine will execute the code.
14 tc = client.TaskClient(profile='default')
13 15
14 # Initialize the common code on the engines
16 # Initialize the common code on the engines. This Python module has the
17 # price_options function that prices the options.
15 18 mec.run('mcpricer.py')
16 19
17 # Define the function that will do the calculation
20 # Define the function that will make up our tasks. We basically want to
21 # call the price_options function with all but two arguments (K, sigma)
22 # fixed.
18 23 def my_prices(K, sigma):
19 24 S = 100.0
20 25 r = 0.05
21 26 days = 260
22 paths = 10000
27 paths = 100000
23 28 return price_options(S, K, sigma, r, days, paths)
24 29
25 30 # Create arrays of strike prices and volatilities
@@ -28,7 +33,9 b' nsigma = 5'
28 33 K_vals = np.linspace(90.0, 100.0, nK)
29 34 sigma_vals = np.linspace(0.0, 0.2, nsigma)
30 35
31 # Submit tasks
36 # Submit tasks to the TaskClient for each (K, sigma) pair as a MapTask.
37 # The MapTask simply applies a function (my_prices) to the arguments:
38 # my_prices(K, sigma) and returns the result.
32 39 taskids = []
33 40 for K in K_vals:
34 41 for sigma in sigma_vals:
@@ -37,24 +44,24 b' for K in K_vals:'
37 44
38 45 print "Submitted tasks: ", taskids
39 46
40 # Block until tasks are completed
47 # Block until all tasks are completed.
41 48 tc.barrier(taskids)
42 49
43 # Get the results
50 # Get the results using TaskClient.get_task_result.
44 51 results = [tc.get_task_result(tid) for tid in taskids]
45 52
46 # Assemble the result
53 # Assemble the result into a structured NumPy array.
47 54 prices = np.empty(nK*nsigma,
48 dtype=[('vcall',float),('vput',float),('acall',float),('aput',float)]
55 dtype=[('ecall',float),('eput',float),('acall',float),('aput',float)]
49 56 )
50 57 for i, price_tuple in enumerate(results):
51 58 prices[i] = price_tuple
52 59 prices.shape = (nK, nsigma)
60 K_vals, sigma_vals = np.meshgrid(K_vals, sigma_vals)
53 61
54
55 def plot_options(K_vals, sigma_vals, prices):
62 def plot_options(sigma_vals, K_vals, prices):
56 63 """
57 Make a contour plot of the option prices.
64 Make a contour plot of the option price in (sigma, K) space.
58 65 """
59 66 from matplotlib import pyplot as plt
60 67 plt.contourf(sigma_vals, K_vals, prices)
@@ -4,7 +4,26 b' from math import *'
4 4
5 5 def price_options(S=100.0, K=100.0, sigma=0.25, r=0.05, days=260, paths=10000):
6 6 """
7 Price vanilla and asian options using a Monte Carlo method.
7 Price European and Asian options using a Monte Carlo method.
8
9 Parameters
10 ----------
11 S : float
12 The initial price of the stock.
13 K : float
14 The strike price of the option.
15 sigma : float
16 The volatility of the stock.
17 r : float
18 The risk free interest rate.
19 days : int
20 The number of days until the option expires.
21 paths : int
22 The number of Monte Carlo paths used to price the option.
23
24 Returns
25 -------
26 A tuple of (E. call, E. put, A. call, A. put) option prices.
8 27 """
9 28 h = 1.0/days
10 29 const1 = exp((r-0.5*sigma**2)*h)
@@ -18,16 +37,9 b' def price_options(S=100.0, K=100.0, sigma=0.25, r=0.05, days=260, paths=10000):'
18 37 stock_price_avg = stock_price_sum/days
19 38 zeros = np.zeros(paths, dtype='float64')
20 39 r_factor = exp(-r*h*days)
21 vanilla_put = r_factor*np.mean(np.maximum(zeros, K-stock_price))
40 euro_put = r_factor*np.mean(np.maximum(zeros, K-stock_price))
22 41 asian_put = r_factor*np.mean(np.maximum(zeros, K-stock_price_avg))
23 vanilla_call = r_factor*np.mean(np.maximum(zeros, stock_price-K))
42 euro_call = r_factor*np.mean(np.maximum(zeros, stock_price-K))
24 43 asian_call = r_factor*np.mean(np.maximum(zeros, stock_price_avg-K))
25 return (vanilla_call, vanilla_put, asian_call, asian_put)
26
44 return (euro_call, euro_put, asian_call, asian_put)
27 45
28 if __name__ == '__main__':
29 (vc, vp, ac, ap) = price_options()
30 print "Vanilla Put Price = ", vp
31 print "Asian Put Price = ", ap
32 print "Vanilla Call Price = ", vc
33 print "Asian Call Price = ", ac
@@ -25,15 +25,6 b" filestring = 'pi200m-ascii-%(i)02dof20.txt'"
25 25 files = [filestring % {'i':i} for i in range(1,16)]
26 26
27 27
28 # A function for reducing the frequencies calculated
29 # by different engines.
30 def reduce_freqs(freqlist):
31 allfreqs = np.zeros_like(freqlist[0])
32 for f in freqlist:
33 allfreqs += f
34 return allfreqs
35
36
37 28 # Connect to the IPython cluster
38 29 mec = client.MultiEngineClient(profile='mycluster')
39 30 mec.run('pidigits.py')
@@ -42,9 +33,7 b" mec.run('pidigits.py')"
42 33 # Run 10m digits on 1 engine
43 34 mapper = mec.mapper(targets=0)
44 35 t1 = clock()
45
46 36 freqs10m = mapper.map(compute_two_digit_freqs, files[:1])[0]
47
48 37 t2 = clock()
49 38 digits_per_second1 = 10.0e6/(t2-t1)
50 39 print "Digits per second (1 core, 10m digits): ", digits_per_second1
@@ -52,10 +41,8 b' print "Digits per second (1 core, 10m digits): ", digits_per_second1'
52 41
53 42 # Run 150m digits on 15 engines (8 cores)
54 43 t1 = clock()
55
56 44 freqs_all = mec.map(compute_two_digit_freqs, files[:len(mec)])
57 45 freqs150m = reduce_freqs(freqs_all)
58
59 46 t2 = clock()
60 47 digits_per_second8 = 150.0e6/(t2-t1)
61 48 print "Digits per second (8 cores, 150m digits): ", digits_per_second8
@@ -24,16 +24,34 b' from matplotlib import pyplot as plt'
24 24 # Top-level functions
25 25
26 26 def compute_one_digit_freqs(filename):
27 """
28 Read digits of pi from a file and compute the 1 digit frequencies.
29 """
27 30 d = txt_file_to_digits(filename)
28 31 freqs = one_digit_freqs(d)
29 32 return freqs
30 33
31 34 def compute_two_digit_freqs(filename):
35 """
36 Read digits of pi from a file and compute the 2 digit frequencies.
37 """
32 38 d = txt_file_to_digits(filename)
33 39 freqs = two_digit_freqs(d)
34 40 return freqs
35 41
42 def reduce_freqs(freqlist):
43 """
44 Add up a list of freq counts to get the total counts.
45 """
46 allfreqs = np.zeros_like(freqlist[0])
47 for f in freqlist:
48 allfreqs += f
49 return allfreqs
50
36 51 def compute_n_digit_freqs(filename, n):
52 """
53 Read digits of pi from a file and compute the n digit frequencies.
54 """
37 55 d = txt_file_to_digits(filename)
38 56 freqs = n_digit_freqs(d, n)
39 57 return freqs
1 NO CONTENT: modified file, binary diff hidden
1 NO CONTENT: modified file, binary diff hidden
1 NO CONTENT: modified file, binary diff hidden
1 NO CONTENT: modified file, binary diff hidden
1 NO CONTENT: modified file, binary diff hidden
1 NO CONTENT: modified file, binary diff hidden
1 NO CONTENT: modified file, binary diff hidden
1 NO CONTENT: modified file, binary diff hidden
@@ -2,125 +2,269 b''
2 2 Parallel examples
3 3 =================
4 4
5 In this section we describe a few more involved examples of using an IPython
6 cluster to perform a parallel computation.
5 In this section we describe two more involved examples of using an IPython
6 cluster to perform a parallel computation. In these examples, we will be using
7 IPython's "pylab" mode, which enables interactive plotting using the
8 Matplotlib package. IPython can be started in this mode by typing::
9
10 ipython -p pylab
11
12 at the system command line. If this prints an error message, you will
13 need to install the default profiles from within IPython by doing,
14
15 .. sourcecode:: ipython
16
17 In [1]: %install_profiles
18
19 and then restarting IPython.
7 20
8 21 150 million digits of pi
9 22 ========================
10 23
11 24 In this example we would like to study the distribution of digits in the
12 number pi. More specifically, we are going to study how often each 2
13 digits sequence occurs in the first 150 million digits of pi. If the digits
14 0-9 occur with equal probability, we expect that each two digits sequence
15 (00, 01, ..., 99) will occur 1% of the time.
16
17 This examples uses precomputed digits of pi from the website of Professor
18 Yasumasa Kanada at the University of Tokoyo (http://www.super-computing.org).
19 These digits come in a set of ``.txt`` files
20 (ftp://pi.super-computing.org/.2/pi200m/) that each have 10 million digits of
21 pi. In the parallel computation, we will use the :meth:`MultiEngineClient.map`
22 method to have each engine compute the desired statistics on a subset of these
23 files. Before I started the parallel computation, I copied the data files
24 to the compute nodes so the engine have fast access to them.
25
26 Here are the Python functions for counting the frequencies of each two digit
27 sequence in serial::
28
29 def compute_two_digit_freqs(filename):
30 """
31 Compute the two digit frequencies from a single file.
32 """
33 d = txt_file_to_digits(filename)
34 freqs = two_digit_freqs(d)
35 return freqs
36
37 def txt_file_to_digits(filename, the_type=str):
38 """
39 Yield the digits of pi read from a .txt file.
40 """
41 with open(filename, 'r') as f:
42 for line in f.readlines():
43 for c in line:
44 if c != '\n' and c!= ' ':
45 yield the_type(c)
46
47 def two_digit_freqs(digits, normalize=False):
48 """
49 Consume digits of pi and compute 2 digits freq. counts.
50 """
51 freqs = np.zeros(100, dtype='i4')
52 last = digits.next()
53 this = digits.next()
54 for d in digits:
55 index = int(last + this)
56 freqs[index] += 1
57 last = this
58 this = d
59 if normalize:
60 freqs = freqs/freqs.sum()
61 return freqs
62
63 These functions are defined in the file :file:`pidigits.py`. To perform the
64 calculation in parallel, we use an additional file: :file:`parallelpi.py`::
65
66 from IPython.kernel import client
67 from matplotlib import pyplot as plt
68 import numpy as np
69 from pidigits import *
70 from timeit import default_timer as clock
71
72 # Files with digits of pi (10m digits each)
73 filestring = 'pi200m-ascii-%(i)02dof20.txt'
74 files = [filestring % {'i':i} for i in range(1,16)]
75
76 # A function for reducing the frequencies calculated
77 # by different engines.
78 def reduce_freqs(freqlist):
79 allfreqs = np.zeros_like(freqlist[0])
80 for f in freqlist:
81 allfreqs += f
82 return allfreqs
83
84 # Connect to the IPython cluster
85 mec = client.MultiEngineClient(profile='mycluster')
86 mec.run('pidigits.py')
87
88 # Run 10m digits on 1 engine
89 mapper = mec.mapper(targets=0)
90 t1 = clock()
91 freqs10m = mapper.map(compute_two_digit_freqs, files[:1])[0]
92 t2 = clock()
93 digits_per_second1 = 10.0e6/(t2-t1)
94 print "Digits per second (1 core, 10m digits): ", digits_per_second1
95
96 # Run 150m digits on 15 engines (8 cores)
97 t1 = clock()
98 freqs_all = mec.map(compute_two_digit_freqs, files[:len(mec)])
99 freqs150m = reduce_freqs(freqs_all)
100 t2 = clock()
101 digits_per_second8 = 150.0e6/(t2-t1)
102 print "Digits per second (8 cores, 150m digits): ", digits_per_second8
103
104 print "Speedup: ", digits_per_second8/digits_per_second1
105
106 plot_two_digit_freqs(freqs150m)
107 plt.title("2 digit sequences in 150m digits of pi")
108
109 To run this code on an IPython cluster:
110
111 1. Start an IPython cluster with 15 engines: ``ipcluster start -p mycluster -n 15``
112 2. Open IPython's interactive shell using the pylab profile
113 ``ipython -p pylab`` and type ``run parallelpi.py``.
114
115 At this point, the parallel calculation will begin. On a small an 8 core
116 cluster, we observe a speedup of 7.7x. The resulting plot of the two digit
117 sequences is shown in the following screenshot.
118
119 .. image:: parallel_pi.*
120
121
122 Parallel option pricing
123 =======================
124
125 The example will be added at a later point.
25 number pi (in base 10). While it is not known if pi is a normal number (a
26 number is normal in base 10 if 0-9 occur with equal likelihood) numerical
27 investigations suggest that it is. We will begin with a serial calculation on
28 10,000 digits of pi and then perform a parallel calculation involving 150
29 million digits.
30
31 In both the serial and parallel calculation we will be using functions defined
32 in the :file:`pidigits.py` file, which is available in the
33 :file:`docs/examples/kernel` directory of the IPython source distribution.
34 These functions provide basic facilities for working with the digits of pi and
35 can be loaded into IPython by putting :file:`pidigits.py` in your current
36 working directory and then doing:
37
38 .. sourcecode:: ipython
39
40 In [1]: run pidigits.py
41
42 Serial calculation
43 ------------------
44
45 For the serial calculation, we will use SymPy (http://www.sympy.org) to
46 calculate 10,000 digits of pi and then look at the frequencies of the digits
47 0-9. Out of 10,000 digits, we expect each digit to occur 1,000 times. While
48 SymPy is capable of calculating many more digits of pi, our purpose here is to
49 set the stage for the much larger parallel calculation.
50
51 In this example, we use two functions from :file:`pidigits.py`:
52 :func:`one_digit_freqs` (which calculates how many times each digit occurs)
53 and :func:`plot_one_digit_freqs` (which uses Matplotlib to plot the result).
54 Here is an interactive IPython session that uses these functions with
55 SymPy:
56
57 .. sourcecode:: ipython
58
59 In [7]: import sympy
60
61 In [8]: pi = sympy.pi.evalf(40)
62
63 In [9]: pi
64 Out[9]: 3.141592653589793238462643383279502884197
65
66 In [10]: pi = sympy.pi.evalf(10000)
67
68 In [11]: digits = (d for d in str(pi)[2:]) # create a sequence of digits
69
70 In [12]: run pidigits.py # load one_digit_freqs/plot_one_digit_freqs
71
72 In [13]: freqs = one_digit_freqs(digits)
73
74 In [14]: plot_one_digit_freqs(freqs)
75 Out[14]: [<matplotlib.lines.Line2D object at 0x18a55290>]
76
77 The resulting plot of the single digit counts shows that each digit occurs
78 approximately 1,000 times, but that with only 10,000 digits the
79 statistical fluctuations are still rather large:
80
81 .. image:: single_digits.*
82
83 It is clear that to reduce the relative fluctuations in the counts, we need
84 to look at many more digits of pi. That brings us to the parallel calculation.
85
86 Parallel calculation
87 --------------------
88
89 Calculating many digits of pi is a challenging computational problem in itself.
90 Because we want to focus on the distribution of digits in this example, we
91 will use pre-computed digit of pi from the website of Professor Yasumasa
92 Kanada at the University of Tokoyo (http://www.super-computing.org). These
93 digits come in a set of text files (ftp://pi.super-computing.org/.2/pi200m/)
94 that each have 10 million digits of pi.
95
96 For the parallel calculation, we have copied these files to the local hard
97 drives of the compute nodes. A total of 15 of these files will be used, for a
98 total of 150 million digits of pi. To make things a little more interesting we
99 will calculate the frequencies of all 2 digits sequences (00-99) and then plot
100 the result using a 2D matrix in Matplotlib.
101
102 The overall idea of the calculation is simple: each IPython engine will
103 compute the two digit counts for the digits in a single file. Then in a final
104 step the counts from each engine will be added up. To perform this
105 calculation, we will need two top-level functions from :file:`pidigits.py`:
106
107 .. literalinclude:: ../../examples/kernel/pidigits.py
108 :language: python
109 :lines: 34-49
110
111 We will also use the :func:`plot_two_digit_freqs` function to plot the
112 results. The code to run this calculation in parallel is contained in
113 :file:`docs/examples/kernel/parallelpi.py`. This code can be run in parallel
114 using IPython by following these steps:
115
116 1. Copy the text files with the digits of pi
117 (ftp://pi.super-computing.org/.2/pi200m/) to the working directory of the
118 engines on the compute nodes.
119 2. Use :command:`ipcluster` to start 15 engines. We used an 8 core cluster
120 with hyperthreading enabled which makes the 8 cores looks like 16 (1
121 controller + 15 engines) in the OS. However, the maximum speedup we can
122 observe is still only 8x.
123 3. With the file :file:`parallelpi.py` in your current working directory, open
124 up IPython in pylab mode and type ``run parallelpi.py``.
125
126 When run on our 8 core cluster, we observe a speedup of 7.7x. This is slightly
127 less than linear scaling (8x) because the controller is also running on one of
128 the cores.
129
130 To emphasize the interactive nature of IPython, we now show how the
131 calculation can also be run by simply typing the commands from
132 :file:`parallelpi.py` interactively into IPython:
133
134 .. sourcecode:: ipython
135
136 In [1]: from IPython.kernel import client
137 2009-11-19 11:32:38-0800 [-] Log opened.
138
139 # The MultiEngineClient allows us to use the engines interactively
140 In [2]: mec = client.MultiEngineClient(profile='mycluster')
141 2009-11-19 11:32:44-0800 [-] Connecting [0]
142 2009-11-19 11:32:44-0800 [Negotiation,client] Connected: ./ipcontroller-mec.furl
143
144 In [3]: mec.get_ids()
145 Out[3]: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
146
147 In [4]: run pidigits.py
148
149 In [5]: filestring = 'pi200m-ascii-%(i)02dof20.txt'
150
151 In [6]: files = [filestring % {'i':i} for i in range(1,16)]
152
153 In [7]: files
154 Out[7]:
155 ['pi200m-ascii-01of20.txt',
156 'pi200m-ascii-02of20.txt',
157 'pi200m-ascii-03of20.txt',
158 'pi200m-ascii-04of20.txt',
159 'pi200m-ascii-05of20.txt',
160 'pi200m-ascii-06of20.txt',
161 'pi200m-ascii-07of20.txt',
162 'pi200m-ascii-08of20.txt',
163 'pi200m-ascii-09of20.txt',
164 'pi200m-ascii-10of20.txt',
165 'pi200m-ascii-11of20.txt',
166 'pi200m-ascii-12of20.txt',
167 'pi200m-ascii-13of20.txt',
168 'pi200m-ascii-14of20.txt',
169 'pi200m-ascii-15of20.txt']
170
171 # This is the parallel calculation using the MultiEngineClient.map method
172 # which applies compute_two_digit_freqs to each file in files in parallel.
173 In [8]: freqs_all = mec.map(compute_two_digit_freqs, files)
174
175 # Add up the frequencies from each engine.
176 In [8]: freqs = reduce_freqs(freqs_all)
177
178 In [9]: plot_two_digit_freqs(freqs)
179 Out[9]: <matplotlib.image.AxesImage object at 0x18beb110>
180
181 In [10]: plt.title('2 digit counts of 150m digits of pi')
182 Out[10]: <matplotlib.text.Text object at 0x18d1f9b0>
183
184 The resulting plot generated by Matplotlib is shown below. The colors indicate
185 which two digit sequences are more (red) or less (blue) likely to occur in the
186 first 150 million digits of pi. We clearly see that the sequence "41" is
187 most likely and that "06" and "07" are least likely. Further analysis would
188 show that the relative size of the statistical fluctuations have decreased
189 compared to the 10,000 digit calculation.
190
191 .. image:: two_digit_counts.*
192
193 To conclude this example, we summarize the key features of IPython's parallel
194 architecture that this example demonstrates:
195
196 * Serial code can be parallelized often with only a few extra lines of code.
197 In this case we have used :meth:`MultiEngineClient.map`; the
198 :class:`MultiEngineClient` class has a number of other methods that provide
199 more fine grained control of the IPython cluster.
200 * The resulting parallel code can be run without ever leaving the IPython's
201 interactive shell.
202 * Any data computed in parallel can be explored interactively through
203 visualization or further numerical calculations.
204
205
206 Parallel options pricing
207 ========================
208
209 An option is a financial contract that gives the buyer of the contract the
210 right to buy (a "call") or sell (a "put") a secondary asset (a stock for
211 example) at a particular date in the future (the expiration date) for a
212 pre-agreed upon price (the strike price). For this right, the buyer pays the
213 seller a premium (the option price). There are a wide variety of flavors of
214 options (American, European, Asian, etc.) that are useful for different
215 purposes: hedging against risk, speculation, etc.
216
217 Much of modern finance is driven by the need to price these contracts
218 accurately based on what is known about the properties (such as volatility) of
219 the underlying asset. One method of pricing options is to use a Monte Carlo
220 simulation of the underlying assets. In this example we use this approach to
221 price both European and Asian (path dependent) options for various strike
222 prices and volatilities.
223
224 The code for this example can be found in the :file:`docs/examples/kernel`
225 directory of the IPython source.
226
227 The function :func:`price_options`, calculates the option prices for a single
228 option (:file:`mcpricer.py`):
229
230 .. literalinclude:: ../../examples/kernel/mcpricer.py
231 :language: python
232
233 To run this code in parallel, we will use IPython's :class:`TaskClient`, which
234 distributes work to the engines using dynamic load balancing. This client
235 can be used along side the :class:`MultiEngineClient` shown in the previous
236 example.
237
238 Here is the code that calls :func:`price_options` for a number of different
239 volatilities and strike prices in parallel:
240
241 .. literalinclude:: ../../examples/kernel/mcdriver.py
242 :language: python
243
244 To run this code in parallel, start an IPython cluster using
245 :command:`ipcluster`, open IPython in the pylab mode with the file
246 :file:`mcdriver.py` in your current working directory and then type:
247
248 .. sourcecode:: ipython
249
250 In [7]: run mcdriver.py
251 Submitted tasks: [0, 1, 2, ...]
252
253 Once all the tasks have finished, the results can be plotted using the
254 :func:`plot_options` function. Here we make contour plots of the Asian
255 call and Asian put as function of the volatility and strike price:
256
257 .. sourcecode:: ipython
258
259 In [8]: plot_options(sigma_vals, K_vals, prices['acall'])
260
261 In [9]: plt.figure()
262 Out[9]: <matplotlib.figure.Figure object at 0x18c178d0>
263
264 In [10]: plot_options(sigma_vals, K_vals, prices['aput'])
265
266 The plots generated by Matplotlib will look like this:
267
268 .. image:: asian_call.*
126 269
270 .. image:: asian_put.*
@@ -5,18 +5,38 b' Getting started'
5 5 Introduction
6 6 ============
7 7
8 IPython is an open source project focused on interactive and exploratory
9 computing in the Python programming language. It consists of two
10 main componenents:
8 The Python programming language is increasingly popular language for numerical
9 computing. This is due to a unique combination of factors. First, Python is a
10 high-level and *interactive* language that is well matched for interactive
11 numerical work. Second, it is easy (often times trivial) to integrate legacy
12 C/C++/Fortran code into Python. Third, a large number of high-quality open
13 source projects provide all the needed building blocks for numerical
14 computing: numerical arrays (NumPy), algorithms (SciPy), 2D/3D Visualization
15 (Matplotlib, Mayavi, Chaco), Symbolic Mathematics (Sage, Sympy) and others.
16
17 The IPython project is a core part of this open-source toolchain and is
18 focused on creating a comprehensive environment for interactive and
19 exploratory computing in the Python programming language. It enables all of
20 the above tools to be used interactively and consists of two main components:
11 21
12 22 * An enhanced interactive Python shell with support for interactive plotting
13 23 and visualization.
14 24 * An architecture for interactive parallel computing.
15 25
16 26 With these components, it is possible to perform all aspects of a parallel
17 computation interactively. This document describes how to get started with
18 IPython on Window HPC Server 2008. A more complete desription of IPython's
19 parallel computing capabilities can be found in IPython's online documentation
27 computation interactively. This type of workflow is particularly relevant in
28 scientific and numerical computing where algorithms, code and data are
29 continually evolving as the user/developer explores a problem. The broad
30 treads in computing (commodity clusters, multicore, cloud computing, etc.)
31 make these capabilities of IPython particularly relevant.
32
33 While IPython is a cross platform tool, it has particularly strong support for
34 Windows based compute clusters running Windows HPC Server 2008. This document
35 describes how to get started with IPython on Windows HPC Server 2008. The
36 content and emphasis here is practical: installing IPython, configuring
37 IPython to use the Windows job scheduler and running example parallel programs
38 interactively. A more complete description of IPython's parallel computing
39 capabilities can be found in IPython's online documentation
20 40 (http://ipython.scipy.org/moin/Documentation).
21 41
22 42 Setting up your Windows cluster
@@ -38,7 +58,7 b' such a cluster:'
38 58
39 59 More details about installing and configuring Windows HPC Server 2008 can be
40 60 found on the Windows HPC Home Page (http://www.microsoft.com/hpc). Regardless
41 of what steps you go through to set up your cluster, the remainder of this
61 of what steps you follow to set up your cluster, the remainder of this
42 62 document will assume that:
43 63
44 64 * There are domain users that can log on to the AD domain and submit jobs
@@ -63,8 +83,8 b' IPython on Windows:'
63 83 * pyOpenSSL (https://launchpad.net/pyopenssl)
64 84 * IPython (http://ipython.scipy.org)
65 85
66 In addition, the following dependencies are needed to run the demos
67 described in this document.
86 In addition, the following dependencies are needed to run the demos described
87 in this document.
68 88
69 89 * NumPy and SciPy (http://www.scipy.org)
70 90 * wxPython (http://www.wxpython.org)
@@ -94,7 +114,7 b' need to follow:'
94 114 Further details about installing IPython or its dependencies can be found in
95 115 the online IPython documentation (http://ipython.scipy.org/moin/Documentation)
96 116 Once you are finished with the installation, you can try IPython out by
97 opening a Windows Command Prompt and typing :command:`ipython`. This will
117 opening a Windows Command Prompt and typing ``ipython``. This will
98 118 start IPython's interactive shell and you should see something like the
99 119 following screenshot:
100 120
@@ -114,14 +134,15 b' IPython controller'
114 134 command.
115 135
116 136 IPython engine
117 IPython engines run your Python code in parallel on the compute nodes.
137 IPython engines run a user's Python code in parallel on the compute nodes.
118 138 Engines are starting using the :command:`ipengine` command.
119 139
120 140 Once these processes are started, a user can run Python code interactively and
121 in parallel on the engines from within the IPython shell. This includes the
122 ability to interact with, plot and visualize data from the engines.
141 in parallel on the engines from within the IPython shell using an appropriate
142 client. This includes the ability to interact with, plot and visualize data
143 from the engines.
123 144
124 IPython has a command line program called :command:`ipcluster` that handles
145 IPython has a command line program called :command:`ipcluster` that automates
125 146 all aspects of starting the controller and engines on the compute nodes.
126 147 :command:`ipcluster` has full support for the Windows HPC job scheduler,
127 148 meaning that :command:`ipcluster` can use this job scheduler to start the
@@ -142,38 +163,44 b' Command Prompt and type the following command::'
142 163 ipcluster start -n 2
143 164
144 165 You should see a number of messages printed to the screen, ending with
145 "IPython cluster: started". A screenshot of this follows.
146
166 "IPython cluster: started". The result should look something like the following
167 screenshot:
147 168
148 169 .. image:: ipcluster_start.*
149 170
150 171 At this point, the controller and two engines are running on your local host.
151 This configuration is useful for testing and for situations where you
152 have multiple cores on your local computer.
172 This configuration is useful for testing and for situations where you want to
173 take advantage of multiple cores on your local computer.
153 174
154 175 Now that we have confirmed that :command:`ipcluster` is working properly, we
155 describe how to configure and run an IPython cluster on an actual cluster
156 running Windows HPC Server 2008. Here is an outline of the needed steps:
176 describe how to configure and run an IPython cluster on an actual compute
177 cluster running Windows HPC Server 2008. Here is an outline of the needed
178 steps:
157 179
158 1. Create a cluster profile: ``ipcluster create -p mycluster``
180 1. Create a cluster profile using: ``ipcluster create -p mycluster``
159 181
160 2. Edit confguration files in :file:`.ipython\\cluster_mycluster`.
182 2. Edit configuration files in the directory :file:`.ipython\\cluster_mycluster`
161 183
162 3. Start the cluster: ``ipcluser start -p mycluster -n 32``
184 3. Start the cluster using: ``ipcluser start -p mycluster -n 32``
163 185
164 186 Creating a cluster profile
165 187 --------------------------
166 188
167 In most cases, you will have to create and configure a cluster profile to use
168 IPython on a cluster. A cluster profile is a specially named directory
169 (typically located in the :file:`.ipython` subdirectory of your home
170 directory) that contains the configuration files for a particular IPython
171 cluster, as well as log files and security keys. The naming convention
172 for cluster directories is: "cluster_<profile name>". Thus, the cluster
173 directory for a profile named "foo" would be :file:`.ipython\\cluster_foo`.
189 In most cases, you will have to create a cluster profile to use IPython on a
190 cluster. A cluster profile is a name (like "mycluster") that is associated
191 with a particular cluster configuration. The profile name is used by
192 :command:`ipcluster` when working with the cluster.
193
194 Associated with each cluster profile is a cluster directory. This cluster
195 directory is a specially named directory (typically located in the
196 :file:`.ipython` subdirectory of your home directory) that contains the
197 configuration files for a particular cluster profile, as well as log files and
198 security keys. The naming convention for cluster directories is:
199 :file:`cluster_<profile name>`. Thus, the cluster directory for a profile named
200 "foo" would be :file:`.ipython\\cluster_foo`.
174 201
175 To create a new cluster profile (named "mycluster"), type the following
176 command at the Windows Command Prompt::
202 To create a new cluster profile (named "mycluster") and the associated cluster
203 directory, type the following command at the Windows Command Prompt::
177 204
178 205 ipcluster create -p mycluster
179 206
@@ -181,10 +208,8 b' The output of this command is shown in the screenshot below. Notice how'
181 208 :command:`ipcluster` prints out the location of the newly created cluster
182 209 directory.
183 210
184
185 211 .. image:: ipcluster_create.*
186 212
187
188 213 Configuring a cluster profile
189 214 -----------------------------
190 215
@@ -221,18 +246,19 b' in most cases these will be sufficient to get you started.'
221 246 If any of your configuration attributes involve specifying the location
222 247 of shared directories or files, you must make sure that you use UNC paths
223 248 like :file:`\\\\host\\share`. It is also important that you specify
224 these paths using raw Python strings: ``r'\\host\share'``.
249 these paths using raw Python strings: ``r'\\host\share'`` to make sure
250 that the backslashes are properly escaped.
225 251
226 252 Starting the cluster profile
227 253 ----------------------------
228 254
229 255 Once a cluster profile has been configured, starting an IPython cluster using
230 the profile is simple:
256 the profile is simple::
231 257
232 258 ipcluster start -p mycluster -n 32
233 259
234 The ``-n 32`` option tells :command:`ipcluster` how many engines to start.
235 Stopping the cluster is as simple as typing Control-C.
260 The ``-n`` option tells :command:`ipcluster` how many engines to start (in
261 this case 32). Stopping the cluster is as simple as typing Control-C.
236 262
237 263 Using the HPC Job Manager
238 264 -------------------------
@@ -247,32 +273,56 b' Once these files have been created, they can be imported into the HPC Job'
247 273 Manager application. Then, the controller and engines for that profile can be
248 274 started using the HPC Job Manager directly, without using :command:`ipcluster`.
249 275 However, anytime the cluster profile is re-configured, ``ipcluster start``
250 has to be run again to regenerate the XML job description files. The
276 must be run again to regenerate the XML job description files. The
251 277 following screenshot shows what the HPC Job Manager interface looks like
252 278 with a running IPython cluster.
253 279
254
255 280 .. image:: hpc_job_manager.*
256 281
257 282 Performing a simple interactive parallel computation
258 283 ====================================================
259 284
260 285 Once you have started your IPython cluster, you can start to use it. To do
261 this, start up IPython's interactive shell by typing::
286 this, open up a new Windows Command Prompt and start up IPython's interactive
287 shell by typing::
262 288
263 289 ipython
264 290
265 at the Windows Command Prompt. Then you can create a :class:`MultiEngineClient`
266 instance for your profile and use the resulting instance to
267 have the cluster do a simple interactive parallel computation. In the
268 screenshot that follows, we take a simple Python function::
269
270 def f(x): return x**10
271
272 and apply it to each element of an array of integers in
273 parallel using the :meth:`MultiEngineClient.map` method::
274
275 mec.map(f, range(15))
291 Then you can create a :class:`MultiEngineClient` instance for your profile and
292 use the resulting instance to do a simple interactive parallel computation. In
293 the code and screenshot that follows, we take a simple Python function and
294 apply it to each element of an array of integers in parallel using the
295 :meth:`MultiEngineClient.map` method:
296
297 .. sourcecode:: ipython
298
299 In [1]: from IPython.kernel.client import *
300
301 In [2]: mec = MultiEngineClient(profile='mycluster')
302
303 In [3]: mec.get_ids()
304 Out[3]: [0, 1, 2, 3, 4, 5, 67, 8, 9, 10, 11, 12, 13, 14]
305
306 In [4]: def f(x):
307 ...: return x**10
308
309 In [5]: mec.map(f, range(15)) # f is applied in parallel
310 Out[5]:
311 [0,
312 1,
313 1024,
314 59049,
315 1048576,
316 9765625,
317 60466176,
318 282475249,
319 1073741824,
320 3486784401L,
321 10000000000L,
322 25937424601L,
323 61917364224L,
324 137858491849L,
325 289254654976L]
276 326
277 327 The :meth:`map` method has the same signature as Python's builtin :func:`map`
278 328 function, but runs the calculation in parallel. More involved examples of using
General Comments 0
You need to be logged in to leave comments. Login now