##// END OF EJS Templates
Work in the documentation.
Brian Granger -
Show More
1 NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
@@ -1,168 +1,179 b''
1 1 #!/usr/bin/env python
2 2 # encoding: utf-8
3 3
4 4 """
5 5 Enable wxPython to be used interacive by setting PyOS_InputHook.
6 6
7 7 Authors: Robin Dunn, Brian Granger, Ondrej Certik
8 8 """
9 9
10 10 #-----------------------------------------------------------------------------
11 11 # Copyright (C) 2008-2009 The IPython Development Team
12 12 #
13 13 # Distributed under the terms of the BSD License. The full license is in
14 14 # the file COPYING, distributed as part of this software.
15 15 #-----------------------------------------------------------------------------
16 16
17 17 #-----------------------------------------------------------------------------
18 18 # Imports
19 19 #-----------------------------------------------------------------------------
20 20
21 21 import os
22 22 import signal
23 23 import sys
24 24 import time
25 25 from timeit import default_timer as clock
26 26 import wx
27 27
28 28 if os.name == 'posix':
29 29 import select
30 30 elif sys.platform == 'win32':
31 31 import msvcrt
32 32
33 33 #-----------------------------------------------------------------------------
34 34 # Code
35 35 #-----------------------------------------------------------------------------
36 36
37 37 def stdin_ready():
38 38 if os.name == 'posix':
39 39 infds, outfds, erfds = select.select([sys.stdin],[],[],0)
40 40 if infds:
41 41 return True
42 42 else:
43 43 return False
44 44 elif sys.platform == 'win32':
45 45 return msvcrt.kbhit()
46 46
47 47
48 48 def inputhook_wx1():
49 49 """Run the wx event loop by processing pending events only.
50 50
51 51 This approach seems to work, but its performance is not great as it
52 52 relies on having PyOS_InputHook called regularly.
53 53 """
54 app = wx.GetApp()
55 if app is not None:
56 assert wx.Thread_IsMain()
57
58 # Make a temporary event loop and process system events until
59 # there are no more waiting, then allow idle events (which
60 # will also deal with pending or posted wx events.)
61 evtloop = wx.EventLoop()
62 ea = wx.EventLoopActivator(evtloop)
63 while evtloop.Pending():
64 evtloop.Dispatch()
65 app.ProcessIdle()
66 del ea
54 try:
55 app = wx.GetApp()
56 if app is not None:
57 assert wx.Thread_IsMain()
58
59 # Make a temporary event loop and process system events until
60 # there are no more waiting, then allow idle events (which
61 # will also deal with pending or posted wx events.)
62 evtloop = wx.EventLoop()
63 ea = wx.EventLoopActivator(evtloop)
64 while evtloop.Pending():
65 evtloop.Dispatch()
66 app.ProcessIdle()
67 del ea
68 except KeyboardInterrupt:
69 pass
67 70 return 0
68 71
69 72 class EventLoopTimer(wx.Timer):
70 73
71 74 def __init__(self, func):
72 75 self.func = func
73 76 wx.Timer.__init__(self)
74 77
75 78 def Notify(self):
76 79 self.func()
77 80
78 81 class EventLoopRunner(object):
79 82
80 83 def Run(self, time):
81 84 self.evtloop = wx.EventLoop()
82 85 self.timer = EventLoopTimer(self.check_stdin)
83 86 self.timer.Start(time)
84 87 self.evtloop.Run()
85 88
86 89 def check_stdin(self):
87 90 if stdin_ready():
88 91 self.timer.Stop()
89 92 self.evtloop.Exit()
90 93
91 94 def inputhook_wx2():
92 95 """Run the wx event loop, polling for stdin.
93 96
94 97 This version runs the wx eventloop for an undetermined amount of time,
95 98 during which it periodically checks to see if anything is ready on
96 99 stdin. If anything is ready on stdin, the event loop exits.
97 100
98 101 The argument to elr.Run controls how often the event loop looks at stdin.
99 102 This determines the responsiveness at the keyboard. A setting of 1000
100 103 enables a user to type at most 1 char per second. I have found that a
101 104 setting of 10 gives good keyboard response. We can shorten it further,
102 105 but eventually performance would suffer from calling select/kbhit too
103 106 often.
104 107 """
105 app = wx.GetApp()
106 if app is not None:
107 assert wx.Thread_IsMain()
108 elr = EventLoopRunner()
109 # As this time is made shorter, keyboard response improves, but idle
110 # CPU load goes up. 10 ms seems like a good compromise.
111 elr.Run(time=10) # CHANGE time here to control polling interval
108 try:
109 app = wx.GetApp()
110 if app is not None:
111 assert wx.Thread_IsMain()
112 elr = EventLoopRunner()
113 # As this time is made shorter, keyboard response improves, but idle
114 # CPU load goes up. 10 ms seems like a good compromise.
115 elr.Run(time=10) # CHANGE time here to control polling interval
116 except KeyboardInterrupt:
117 pass
112 118 return 0
113 119
114 120 def inputhook_wx3():
115 121 """Run the wx event loop by processing pending events only.
116 122
117 123 This is like inputhook_wx1, but it keeps processing pending events
118 124 until stdin is ready. After processing all pending events, a call to
119 125 time.sleep is inserted. This is needed, otherwise, CPU usage is at 100%.
120 126 This sleep time should be tuned though for best performance.
121 127 """
122 app = wx.GetApp()
123 if app is not None:
124 assert wx.Thread_IsMain()
125
126 # The import of wx on Linux sets the handler for signal.SIGINT
127 # to 0. This is a bug in wx or gtk. We fix by just setting it
128 # back to the Python default.
129 if not callable(signal.getsignal(signal.SIGINT)):
130 signal.signal(signal.SIGINT, signal.default_int_handler)
131
132 evtloop = wx.EventLoop()
133 ea = wx.EventLoopActivator(evtloop)
134 t = clock()
135 while not stdin_ready():
136 while evtloop.Pending():
137 t = clock()
138 evtloop.Dispatch()
139 app.ProcessIdle()
140 # We need to sleep at this point to keep the idle CPU load
141 # low. However, if sleep to long, GUI response is poor. As
142 # a compromise, we watch how often GUI events are being processed
143 # and switch between a short and long sleep time. Here are some
144 # stats useful in helping to tune this.
145 # time CPU load
146 # 0.001 13%
147 # 0.005 3%
148 # 0.01 1.5%
149 # 0.05 0.5%
150 used_time = clock() - t
151 if used_time > 5*60.0:
152 # print 'Sleep for 5 s' # dbg
153 time.sleep(5.0)
154 elif used_time > 10.0:
155 # print 'Sleep for 1 s' # dbg
156 time.sleep(1.0)
157 elif used_time > 0.1:
158 # Few GUI events coming in, so we can sleep longer
159 # print 'Sleep for 0.05 s' # dbg
160 time.sleep(0.05)
161 else:
162 # Many GUI events coming in, so sleep only very little
163 time.sleep(0.001)
164 del ea
128 # We need to protect against a user pressing Control-C when IPython is
129 # idle and this is running. We trap KeyboardInterrupt and pass.
130 try:
131 app = wx.GetApp()
132 if app is not None:
133 assert wx.Thread_IsMain()
134
135 # The import of wx on Linux sets the handler for signal.SIGINT
136 # to 0. This is a bug in wx or gtk. We fix by just setting it
137 # back to the Python default.
138 if not callable(signal.getsignal(signal.SIGINT)):
139 signal.signal(signal.SIGINT, signal.default_int_handler)
140
141 evtloop = wx.EventLoop()
142 ea = wx.EventLoopActivator(evtloop)
143 t = clock()
144 while not stdin_ready():
145 while evtloop.Pending():
146 t = clock()
147 evtloop.Dispatch()
148 app.ProcessIdle()
149 # We need to sleep at this point to keep the idle CPU load
150 # low. However, if sleep to long, GUI response is poor. As
151 # a compromise, we watch how often GUI events are being processed
152 # and switch between a short and long sleep time. Here are some
153 # stats useful in helping to tune this.
154 # time CPU load
155 # 0.001 13%
156 # 0.005 3%
157 # 0.01 1.5%
158 # 0.05 0.5%
159 used_time = clock() - t
160 if used_time > 5*60.0:
161 # print 'Sleep for 5 s' # dbg
162 time.sleep(5.0)
163 elif used_time > 10.0:
164 # print 'Sleep for 1 s' # dbg
165 time.sleep(1.0)
166 elif used_time > 0.1:
167 # Few GUI events coming in, so we can sleep longer
168 # print 'Sleep for 0.05 s' # dbg
169 time.sleep(0.05)
170 else:
171 # Many GUI events coming in, so sleep only very little
172 time.sleep(0.001)
173 del ea
174 except KeyboardInterrupt:
175 pass
165 176 return 0
166 177
167 178 # This is our default implementation
168 179 inputhook_wx = inputhook_wx3
@@ -1,64 +1,71 b''
1 1 #!/usr/bin/env python
2 # encoding: utf-8
3 2 """Run a Monte-Carlo options pricer in parallel."""
4 3
5 4 from IPython.kernel import client
6 5 import numpy as np
7 6 from mcpricer import price_options
8 7
9
10 tc = client.TaskClient(profile='default')
8 # The MultiEngineClient is used to setup the calculation and works with all
9 # engine.
11 10 mec = client.MultiEngineClient(profile='default')
12 11
12 # The TaskClient is an interface to the engines that provides dynamic load
13 # balancing at the expense of not knowing which engine will execute the code.
14 tc = client.TaskClient(profile='default')
13 15
14 # Initialize the common code on the engines
16 # Initialize the common code on the engines. This Python module has the
17 # price_options function that prices the options.
15 18 mec.run('mcpricer.py')
16 19
17 # Define the function that will do the calculation
20 # Define the function that will make up our tasks. We basically want to
21 # call the price_options function with all but two arguments (K, sigma)
22 # fixed.
18 23 def my_prices(K, sigma):
19 24 S = 100.0
20 25 r = 0.05
21 26 days = 260
22 paths = 10000
27 paths = 100000
23 28 return price_options(S, K, sigma, r, days, paths)
24 29
25 30 # Create arrays of strike prices and volatilities
26 31 nK = 5
27 32 nsigma = 5
28 33 K_vals = np.linspace(90.0, 100.0, nK)
29 34 sigma_vals = np.linspace(0.0, 0.2, nsigma)
30 35
31 # Submit tasks
36 # Submit tasks to the TaskClient for each (K, sigma) pair as a MapTask.
37 # The MapTask simply applies a function (my_prices) to the arguments:
38 # my_prices(K, sigma) and returns the result.
32 39 taskids = []
33 40 for K in K_vals:
34 41 for sigma in sigma_vals:
35 42 t = client.MapTask(my_prices, args=(K, sigma))
36 43 taskids.append(tc.run(t))
37 44
38 45 print "Submitted tasks: ", taskids
39 46
40 # Block until tasks are completed
47 # Block until all tasks are completed.
41 48 tc.barrier(taskids)
42 49
43 # Get the results
50 # Get the results using TaskClient.get_task_result.
44 51 results = [tc.get_task_result(tid) for tid in taskids]
45 52
46 # Assemble the result
53 # Assemble the result into a structured NumPy array.
47 54 prices = np.empty(nK*nsigma,
48 dtype=[('vcall',float),('vput',float),('acall',float),('aput',float)]
55 dtype=[('ecall',float),('eput',float),('acall',float),('aput',float)]
49 56 )
50 57 for i, price_tuple in enumerate(results):
51 58 prices[i] = price_tuple
52 59 prices.shape = (nK, nsigma)
60 K_vals, sigma_vals = np.meshgrid(K_vals, sigma_vals)
53 61
54
55 def plot_options(K_vals, sigma_vals, prices):
62 def plot_options(sigma_vals, K_vals, prices):
56 63 """
57 Make a contour plot of the option prices.
64 Make a contour plot of the option price in (sigma, K) space.
58 65 """
59 66 from matplotlib import pyplot as plt
60 67 plt.contourf(sigma_vals, K_vals, prices)
61 68 plt.colorbar()
62 69 plt.title("Option Price")
63 70 plt.xlabel("Volatility")
64 71 plt.ylabel("Strike Price")
@@ -1,33 +1,45 b''
1 1 import numpy as np
2 2 from math import *
3 3
4 4
5 5 def price_options(S=100.0, K=100.0, sigma=0.25, r=0.05, days=260, paths=10000):
6 6 """
7 Price vanilla and asian options using a Monte Carlo method.
7 Price European and Asian options using a Monte Carlo method.
8
9 Parameters
10 ----------
11 S : float
12 The initial price of the stock.
13 K : float
14 The strike price of the option.
15 sigma : float
16 The volatility of the stock.
17 r : float
18 The risk free interest rate.
19 days : int
20 The number of days until the option expires.
21 paths : int
22 The number of Monte Carlo paths used to price the option.
23
24 Returns
25 -------
26 A tuple of (E. call, E. put, A. call, A. put) option prices.
8 27 """
9 28 h = 1.0/days
10 29 const1 = exp((r-0.5*sigma**2)*h)
11 30 const2 = sigma*sqrt(h)
12 31 stock_price = S*np.ones(paths, dtype='float64')
13 32 stock_price_sum = np.zeros(paths, dtype='float64')
14 33 for j in range(days):
15 34 growth_factor = const1*np.exp(const2*np.random.standard_normal(paths))
16 35 stock_price = stock_price*growth_factor
17 36 stock_price_sum = stock_price_sum + stock_price
18 37 stock_price_avg = stock_price_sum/days
19 38 zeros = np.zeros(paths, dtype='float64')
20 39 r_factor = exp(-r*h*days)
21 vanilla_put = r_factor*np.mean(np.maximum(zeros, K-stock_price))
40 euro_put = r_factor*np.mean(np.maximum(zeros, K-stock_price))
22 41 asian_put = r_factor*np.mean(np.maximum(zeros, K-stock_price_avg))
23 vanilla_call = r_factor*np.mean(np.maximum(zeros, stock_price-K))
42 euro_call = r_factor*np.mean(np.maximum(zeros, stock_price-K))
24 43 asian_call = r_factor*np.mean(np.maximum(zeros, stock_price_avg-K))
25 return (vanilla_call, vanilla_put, asian_call, asian_put)
26
44 return (euro_call, euro_put, asian_call, asian_put)
27 45
28 if __name__ == '__main__':
29 (vc, vp, ac, ap) = price_options()
30 print "Vanilla Put Price = ", vp
31 print "Asian Put Price = ", ap
32 print "Vanilla Call Price = ", vc
33 print "Asian Call Price = ", ac
@@ -1,67 +1,54 b''
1 1 """Calculate statistics on the digits of pi in parallel.
2 2
3 3 This program uses the functions in :file:`pidigits.py` to calculate
4 4 the frequencies of 2 digit sequences in the digits of pi. The
5 5 results are plotted using matplotlib.
6 6
7 7 To run, text files from http://www.super-computing.org/
8 8 must be installed in the working directory of the IPython engines.
9 9 The actual filenames to be used can be set with the ``filestring``
10 10 variable below.
11 11
12 12 The dataset we have been using for this is the 200 million digit one here:
13 13 ftp://pi.super-computing.org/.2/pi200m/
14 14 """
15 15
16 16 from IPython.kernel import client
17 17 from matplotlib import pyplot as plt
18 18 import numpy as np
19 19 from pidigits import *
20 20 from timeit import default_timer as clock
21 21
22 22
23 23 # Files with digits of pi (10m digits each)
24 24 filestring = 'pi200m-ascii-%(i)02dof20.txt'
25 25 files = [filestring % {'i':i} for i in range(1,16)]
26 26
27 27
28 # A function for reducing the frequencies calculated
29 # by different engines.
30 def reduce_freqs(freqlist):
31 allfreqs = np.zeros_like(freqlist[0])
32 for f in freqlist:
33 allfreqs += f
34 return allfreqs
35
36
37 28 # Connect to the IPython cluster
38 29 mec = client.MultiEngineClient(profile='mycluster')
39 30 mec.run('pidigits.py')
40 31
41 32
42 33 # Run 10m digits on 1 engine
43 34 mapper = mec.mapper(targets=0)
44 35 t1 = clock()
45
46 36 freqs10m = mapper.map(compute_two_digit_freqs, files[:1])[0]
47
48 37 t2 = clock()
49 38 digits_per_second1 = 10.0e6/(t2-t1)
50 39 print "Digits per second (1 core, 10m digits): ", digits_per_second1
51 40
52 41
53 42 # Run 150m digits on 15 engines (8 cores)
54 43 t1 = clock()
55
56 44 freqs_all = mec.map(compute_two_digit_freqs, files[:len(mec)])
57 45 freqs150m = reduce_freqs(freqs_all)
58
59 46 t2 = clock()
60 47 digits_per_second8 = 150.0e6/(t2-t1)
61 48 print "Digits per second (8 cores, 150m digits): ", digits_per_second8
62 49
63 50 print "Speedup: ", digits_per_second8/digits_per_second1
64 51
65 52 plot_two_digit_freqs(freqs150m)
66 53 plt.title("2 digit sequences in 150m digits of pi")
67 54
@@ -1,126 +1,144 b''
1 1 """Compute statistics on the digits of pi.
2 2
3 3 This uses precomputed digits of pi from the website
4 4 of Professor Yasumasa Kanada at the University of
5 5 Tokoyo: http://www.super-computing.org/
6 6
7 7 Currently, there are only functions to read the
8 8 .txt (non-compressed, non-binary) files, but adding
9 9 support for compression and binary files would be
10 10 straightforward.
11 11
12 12 This focuses on computing the number of times that
13 13 all 1, 2, n digits sequences occur in the digits of pi.
14 14 If the digits of pi are truly random, these frequencies
15 15 should be equal.
16 16 """
17 17
18 18 # Import statements
19 19
20 20 from __future__ import division, with_statement
21 21 import numpy as np
22 22 from matplotlib import pyplot as plt
23 23
24 24 # Top-level functions
25 25
26 26 def compute_one_digit_freqs(filename):
27 """
28 Read digits of pi from a file and compute the 1 digit frequencies.
29 """
27 30 d = txt_file_to_digits(filename)
28 31 freqs = one_digit_freqs(d)
29 32 return freqs
30 33
31 34 def compute_two_digit_freqs(filename):
35 """
36 Read digits of pi from a file and compute the 2 digit frequencies.
37 """
32 38 d = txt_file_to_digits(filename)
33 39 freqs = two_digit_freqs(d)
34 40 return freqs
35 41
42 def reduce_freqs(freqlist):
43 """
44 Add up a list of freq counts to get the total counts.
45 """
46 allfreqs = np.zeros_like(freqlist[0])
47 for f in freqlist:
48 allfreqs += f
49 return allfreqs
50
36 51 def compute_n_digit_freqs(filename, n):
52 """
53 Read digits of pi from a file and compute the n digit frequencies.
54 """
37 55 d = txt_file_to_digits(filename)
38 56 freqs = n_digit_freqs(d, n)
39 57 return freqs
40 58
41 59 # Read digits from a txt file
42 60
43 61 def txt_file_to_digits(filename, the_type=str):
44 62 """
45 63 Yield the digits of pi read from a .txt file.
46 64 """
47 65 with open(filename, 'r') as f:
48 66 for line in f.readlines():
49 67 for c in line:
50 68 if c != '\n' and c!= ' ':
51 69 yield the_type(c)
52 70
53 71 # Actual counting functions
54 72
55 73 def one_digit_freqs(digits, normalize=False):
56 74 """
57 75 Consume digits of pi and compute 1 digit freq. counts.
58 76 """
59 77 freqs = np.zeros(10, dtype='i4')
60 78 for d in digits:
61 79 freqs[int(d)] += 1
62 80 if normalize:
63 81 freqs = freqs/freqs.sum()
64 82 return freqs
65 83
66 84 def two_digit_freqs(digits, normalize=False):
67 85 """
68 86 Consume digits of pi and compute 2 digits freq. counts.
69 87 """
70 88 freqs = np.zeros(100, dtype='i4')
71 89 last = digits.next()
72 90 this = digits.next()
73 91 for d in digits:
74 92 index = int(last + this)
75 93 freqs[index] += 1
76 94 last = this
77 95 this = d
78 96 if normalize:
79 97 freqs = freqs/freqs.sum()
80 98 return freqs
81 99
82 100 def n_digit_freqs(digits, n, normalize=False):
83 101 """
84 102 Consume digits of pi and compute n digits freq. counts.
85 103
86 104 This should only be used for 1-6 digits.
87 105 """
88 106 freqs = np.zeros(pow(10,n), dtype='i4')
89 107 current = np.zeros(n, dtype=int)
90 108 for i in range(n):
91 109 current[i] = digits.next()
92 110 for d in digits:
93 111 index = int(''.join(map(str, current)))
94 112 freqs[index] += 1
95 113 current[0:-1] = current[1:]
96 114 current[-1] = d
97 115 if normalize:
98 116 freqs = freqs/freqs.sum()
99 117 return freqs
100 118
101 119 # Plotting functions
102 120
103 121 def plot_two_digit_freqs(f2):
104 122 """
105 123 Plot two digits frequency counts using matplotlib.
106 124 """
107 125 f2_copy = f2.copy()
108 126 f2_copy.shape = (10,10)
109 127 ax = plt.matshow(f2_copy)
110 128 plt.colorbar()
111 129 for i in range(10):
112 130 for j in range(10):
113 131 plt.text(i-0.2, j+0.2, str(j)+str(i))
114 132 plt.ylabel('First digit')
115 133 plt.xlabel('Second digit')
116 134 return ax
117 135
118 136 def plot_one_digit_freqs(f1):
119 137 """
120 138 Plot one digit frequency counts using matplotlib.
121 139 """
122 140 ax = plt.plot(f1,'bo-')
123 141 plt.title('Single digit counts in pi')
124 142 plt.xlabel('Digit')
125 143 plt.ylabel('Count')
126 144 return ax
1 NO CONTENT: modified file, binary diff hidden
1 NO CONTENT: modified file, binary diff hidden
1 NO CONTENT: modified file, binary diff hidden
1 NO CONTENT: modified file, binary diff hidden
1 NO CONTENT: modified file, binary diff hidden
1 NO CONTENT: modified file, binary diff hidden
1 NO CONTENT: modified file, binary diff hidden
1 NO CONTENT: modified file, binary diff hidden
@@ -1,126 +1,270 b''
1 1 =================
2 2 Parallel examples
3 3 =================
4 4
5 In this section we describe a few more involved examples of using an IPython
6 cluster to perform a parallel computation.
5 In this section we describe two more involved examples of using an IPython
6 cluster to perform a parallel computation. In these examples, we will be using
7 IPython's "pylab" mode, which enables interactive plotting using the
8 Matplotlib package. IPython can be started in this mode by typing::
9
10 ipython -p pylab
11
12 at the system command line. If this prints an error message, you will
13 need to install the default profiles from within IPython by doing,
14
15 .. sourcecode:: ipython
16
17 In [1]: %install_profiles
18
19 and then restarting IPython.
7 20
8 21 150 million digits of pi
9 22 ========================
10 23
11 24 In this example we would like to study the distribution of digits in the
12 number pi. More specifically, we are going to study how often each 2
13 digits sequence occurs in the first 150 million digits of pi. If the digits
14 0-9 occur with equal probability, we expect that each two digits sequence
15 (00, 01, ..., 99) will occur 1% of the time.
16
17 This examples uses precomputed digits of pi from the website of Professor
18 Yasumasa Kanada at the University of Tokoyo (http://www.super-computing.org).
19 These digits come in a set of ``.txt`` files
20 (ftp://pi.super-computing.org/.2/pi200m/) that each have 10 million digits of
21 pi. In the parallel computation, we will use the :meth:`MultiEngineClient.map`
22 method to have each engine compute the desired statistics on a subset of these
23 files. Before I started the parallel computation, I copied the data files
24 to the compute nodes so the engine have fast access to them.
25
26 Here are the Python functions for counting the frequencies of each two digit
27 sequence in serial::
28
29 def compute_two_digit_freqs(filename):
30 """
31 Compute the two digit frequencies from a single file.
32 """
33 d = txt_file_to_digits(filename)
34 freqs = two_digit_freqs(d)
35 return freqs
36
37 def txt_file_to_digits(filename, the_type=str):
38 """
39 Yield the digits of pi read from a .txt file.
40 """
41 with open(filename, 'r') as f:
42 for line in f.readlines():
43 for c in line:
44 if c != '\n' and c!= ' ':
45 yield the_type(c)
46
47 def two_digit_freqs(digits, normalize=False):
48 """
49 Consume digits of pi and compute 2 digits freq. counts.
50 """
51 freqs = np.zeros(100, dtype='i4')
52 last = digits.next()
53 this = digits.next()
54 for d in digits:
55 index = int(last + this)
56 freqs[index] += 1
57 last = this
58 this = d
59 if normalize:
60 freqs = freqs/freqs.sum()
61 return freqs
62
63 These functions are defined in the file :file:`pidigits.py`. To perform the
64 calculation in parallel, we use an additional file: :file:`parallelpi.py`::
65
66 from IPython.kernel import client
67 from matplotlib import pyplot as plt
68 import numpy as np
69 from pidigits import *
70 from timeit import default_timer as clock
71
72 # Files with digits of pi (10m digits each)
73 filestring = 'pi200m-ascii-%(i)02dof20.txt'
74 files = [filestring % {'i':i} for i in range(1,16)]
75
76 # A function for reducing the frequencies calculated
77 # by different engines.
78 def reduce_freqs(freqlist):
79 allfreqs = np.zeros_like(freqlist[0])
80 for f in freqlist:
81 allfreqs += f
82 return allfreqs
83
84 # Connect to the IPython cluster
85 mec = client.MultiEngineClient(profile='mycluster')
86 mec.run('pidigits.py')
87
88 # Run 10m digits on 1 engine
89 mapper = mec.mapper(targets=0)
90 t1 = clock()
91 freqs10m = mapper.map(compute_two_digit_freqs, files[:1])[0]
92 t2 = clock()
93 digits_per_second1 = 10.0e6/(t2-t1)
94 print "Digits per second (1 core, 10m digits): ", digits_per_second1
95
96 # Run 150m digits on 15 engines (8 cores)
97 t1 = clock()
98 freqs_all = mec.map(compute_two_digit_freqs, files[:len(mec)])
99 freqs150m = reduce_freqs(freqs_all)
100 t2 = clock()
101 digits_per_second8 = 150.0e6/(t2-t1)
102 print "Digits per second (8 cores, 150m digits): ", digits_per_second8
103
104 print "Speedup: ", digits_per_second8/digits_per_second1
105
106 plot_two_digit_freqs(freqs150m)
107 plt.title("2 digit sequences in 150m digits of pi")
108
109 To run this code on an IPython cluster:
110
111 1. Start an IPython cluster with 15 engines: ``ipcluster start -p mycluster -n 15``
112 2. Open IPython's interactive shell using the pylab profile
113 ``ipython -p pylab`` and type ``run parallelpi.py``.
114
115 At this point, the parallel calculation will begin. On a small an 8 core
116 cluster, we observe a speedup of 7.7x. The resulting plot of the two digit
117 sequences is shown in the following screenshot.
118
119 .. image:: parallel_pi.*
120
121
122 Parallel option pricing
123 =======================
124
125 The example will be added at a later point.
25 number pi (in base 10). While it is not known if pi is a normal number (a
26 number is normal in base 10 if 0-9 occur with equal likelihood) numerical
27 investigations suggest that it is. We will begin with a serial calculation on
28 10,000 digits of pi and then perform a parallel calculation involving 150
29 million digits.
30
31 In both the serial and parallel calculation we will be using functions defined
32 in the :file:`pidigits.py` file, which is available in the
33 :file:`docs/examples/kernel` directory of the IPython source distribution.
34 These functions provide basic facilities for working with the digits of pi and
35 can be loaded into IPython by putting :file:`pidigits.py` in your current
36 working directory and then doing:
37
38 .. sourcecode:: ipython
39
40 In [1]: run pidigits.py
41
42 Serial calculation
43 ------------------
44
45 For the serial calculation, we will use SymPy (http://www.sympy.org) to
46 calculate 10,000 digits of pi and then look at the frequencies of the digits
47 0-9. Out of 10,000 digits, we expect each digit to occur 1,000 times. While
48 SymPy is capable of calculating many more digits of pi, our purpose here is to
49 set the stage for the much larger parallel calculation.
50
51 In this example, we use two functions from :file:`pidigits.py`:
52 :func:`one_digit_freqs` (which calculates how many times each digit occurs)
53 and :func:`plot_one_digit_freqs` (which uses Matplotlib to plot the result).
54 Here is an interactive IPython session that uses these functions with
55 SymPy:
56
57 .. sourcecode:: ipython
58
59 In [7]: import sympy
60
61 In [8]: pi = sympy.pi.evalf(40)
62
63 In [9]: pi
64 Out[9]: 3.141592653589793238462643383279502884197
65
66 In [10]: pi = sympy.pi.evalf(10000)
67
68 In [11]: digits = (d for d in str(pi)[2:]) # create a sequence of digits
69
70 In [12]: run pidigits.py # load one_digit_freqs/plot_one_digit_freqs
71
72 In [13]: freqs = one_digit_freqs(digits)
73
74 In [14]: plot_one_digit_freqs(freqs)
75 Out[14]: [<matplotlib.lines.Line2D object at 0x18a55290>]
76
77 The resulting plot of the single digit counts shows that each digit occurs
78 approximately 1,000 times, but that with only 10,000 digits the
79 statistical fluctuations are still rather large:
80
81 .. image:: single_digits.*
82
83 It is clear that to reduce the relative fluctuations in the counts, we need
84 to look at many more digits of pi. That brings us to the parallel calculation.
85
86 Parallel calculation
87 --------------------
88
89 Calculating many digits of pi is a challenging computational problem in itself.
90 Because we want to focus on the distribution of digits in this example, we
91 will use pre-computed digit of pi from the website of Professor Yasumasa
92 Kanada at the University of Tokoyo (http://www.super-computing.org). These
93 digits come in a set of text files (ftp://pi.super-computing.org/.2/pi200m/)
94 that each have 10 million digits of pi.
95
96 For the parallel calculation, we have copied these files to the local hard
97 drives of the compute nodes. A total of 15 of these files will be used, for a
98 total of 150 million digits of pi. To make things a little more interesting we
99 will calculate the frequencies of all 2 digits sequences (00-99) and then plot
100 the result using a 2D matrix in Matplotlib.
101
102 The overall idea of the calculation is simple: each IPython engine will
103 compute the two digit counts for the digits in a single file. Then in a final
104 step the counts from each engine will be added up. To perform this
105 calculation, we will need two top-level functions from :file:`pidigits.py`:
106
107 .. literalinclude:: ../../examples/kernel/pidigits.py
108 :language: python
109 :lines: 34-49
110
111 We will also use the :func:`plot_two_digit_freqs` function to plot the
112 results. The code to run this calculation in parallel is contained in
113 :file:`docs/examples/kernel/parallelpi.py`. This code can be run in parallel
114 using IPython by following these steps:
115
116 1. Copy the text files with the digits of pi
117 (ftp://pi.super-computing.org/.2/pi200m/) to the working directory of the
118 engines on the compute nodes.
119 2. Use :command:`ipcluster` to start 15 engines. We used an 8 core cluster
120 with hyperthreading enabled which makes the 8 cores looks like 16 (1
121 controller + 15 engines) in the OS. However, the maximum speedup we can
122 observe is still only 8x.
123 3. With the file :file:`parallelpi.py` in your current working directory, open
124 up IPython in pylab mode and type ``run parallelpi.py``.
125
126 When run on our 8 core cluster, we observe a speedup of 7.7x. This is slightly
127 less than linear scaling (8x) because the controller is also running on one of
128 the cores.
129
130 To emphasize the interactive nature of IPython, we now show how the
131 calculation can also be run by simply typing the commands from
132 :file:`parallelpi.py` interactively into IPython:
133
134 .. sourcecode:: ipython
135
136 In [1]: from IPython.kernel import client
137 2009-11-19 11:32:38-0800 [-] Log opened.
138
139 # The MultiEngineClient allows us to use the engines interactively
140 In [2]: mec = client.MultiEngineClient(profile='mycluster')
141 2009-11-19 11:32:44-0800 [-] Connecting [0]
142 2009-11-19 11:32:44-0800 [Negotiation,client] Connected: ./ipcontroller-mec.furl
143
144 In [3]: mec.get_ids()
145 Out[3]: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
146
147 In [4]: run pidigits.py
148
149 In [5]: filestring = 'pi200m-ascii-%(i)02dof20.txt'
150
151 In [6]: files = [filestring % {'i':i} for i in range(1,16)]
152
153 In [7]: files
154 Out[7]:
155 ['pi200m-ascii-01of20.txt',
156 'pi200m-ascii-02of20.txt',
157 'pi200m-ascii-03of20.txt',
158 'pi200m-ascii-04of20.txt',
159 'pi200m-ascii-05of20.txt',
160 'pi200m-ascii-06of20.txt',
161 'pi200m-ascii-07of20.txt',
162 'pi200m-ascii-08of20.txt',
163 'pi200m-ascii-09of20.txt',
164 'pi200m-ascii-10of20.txt',
165 'pi200m-ascii-11of20.txt',
166 'pi200m-ascii-12of20.txt',
167 'pi200m-ascii-13of20.txt',
168 'pi200m-ascii-14of20.txt',
169 'pi200m-ascii-15of20.txt']
170
171 # This is the parallel calculation using the MultiEngineClient.map method
172 # which applies compute_two_digit_freqs to each file in files in parallel.
173 In [8]: freqs_all = mec.map(compute_two_digit_freqs, files)
174
175 # Add up the frequencies from each engine.
176 In [8]: freqs = reduce_freqs(freqs_all)
177
178 In [9]: plot_two_digit_freqs(freqs)
179 Out[9]: <matplotlib.image.AxesImage object at 0x18beb110>
180
181 In [10]: plt.title('2 digit counts of 150m digits of pi')
182 Out[10]: <matplotlib.text.Text object at 0x18d1f9b0>
183
184 The resulting plot generated by Matplotlib is shown below. The colors indicate
185 which two digit sequences are more (red) or less (blue) likely to occur in the
186 first 150 million digits of pi. We clearly see that the sequence "41" is
187 most likely and that "06" and "07" are least likely. Further analysis would
188 show that the relative size of the statistical fluctuations have decreased
189 compared to the 10,000 digit calculation.
190
191 .. image:: two_digit_counts.*
192
193 To conclude this example, we summarize the key features of IPython's parallel
194 architecture that this example demonstrates:
195
196 * Serial code can be parallelized often with only a few extra lines of code.
197 In this case we have used :meth:`MultiEngineClient.map`; the
198 :class:`MultiEngineClient` class has a number of other methods that provide
199 more fine grained control of the IPython cluster.
200 * The resulting parallel code can be run without ever leaving the IPython's
201 interactive shell.
202 * Any data computed in parallel can be explored interactively through
203 visualization or further numerical calculations.
204
205
206 Parallel options pricing
207 ========================
208
209 An option is a financial contract that gives the buyer of the contract the
210 right to buy (a "call") or sell (a "put") a secondary asset (a stock for
211 example) at a particular date in the future (the expiration date) for a
212 pre-agreed upon price (the strike price). For this right, the buyer pays the
213 seller a premium (the option price). There are a wide variety of flavors of
214 options (American, European, Asian, etc.) that are useful for different
215 purposes: hedging against risk, speculation, etc.
216
217 Much of modern finance is driven by the need to price these contracts
218 accurately based on what is known about the properties (such as volatility) of
219 the underlying asset. One method of pricing options is to use a Monte Carlo
220 simulation of the underlying assets. In this example we use this approach to
221 price both European and Asian (path dependent) options for various strike
222 prices and volatilities.
223
224 The code for this example can be found in the :file:`docs/examples/kernel`
225 directory of the IPython source.
226
227 The function :func:`price_options`, calculates the option prices for a single
228 option (:file:`mcpricer.py`):
229
230 .. literalinclude:: ../../examples/kernel/mcpricer.py
231 :language: python
232
233 To run this code in parallel, we will use IPython's :class:`TaskClient`, which
234 distributes work to the engines using dynamic load balancing. This client
235 can be used along side the :class:`MultiEngineClient` shown in the previous
236 example.
237
238 Here is the code that calls :func:`price_options` for a number of different
239 volatilities and strike prices in parallel:
240
241 .. literalinclude:: ../../examples/kernel/mcdriver.py
242 :language: python
243
244 To run this code in parallel, start an IPython cluster using
245 :command:`ipcluster`, open IPython in the pylab mode with the file
246 :file:`mcdriver.py` in your current working directory and then type:
247
248 .. sourcecode:: ipython
249
250 In [7]: run mcdriver.py
251 Submitted tasks: [0, 1, 2, ...]
252
253 Once all the tasks have finished, the results can be plotted using the
254 :func:`plot_options` function. Here we make contour plots of the Asian
255 call and Asian put as function of the volatility and strike price:
256
257 .. sourcecode:: ipython
258
259 In [8]: plot_options(sigma_vals, K_vals, prices['acall'])
260
261 In [9]: plt.figure()
262 Out[9]: <matplotlib.figure.Figure object at 0x18c178d0>
263
264 In [10]: plot_options(sigma_vals, K_vals, prices['aput'])
265
266 The plots generated by Matplotlib will look like this:
267
268 .. image:: asian_call.*
126 269
270 .. image:: asian_put.*
@@ -1,282 +1,332 b''
1 1 ========================================
2 2 Getting started
3 3 ========================================
4 4
5 5 Introduction
6 6 ============
7 7
8 IPython is an open source project focused on interactive and exploratory
9 computing in the Python programming language. It consists of two
10 main componenents:
8 The Python programming language is increasingly popular language for numerical
9 computing. This is due to a unique combination of factors. First, Python is a
10 high-level and *interactive* language that is well matched for interactive
11 numerical work. Second, it is easy (often times trivial) to integrate legacy
12 C/C++/Fortran code into Python. Third, a large number of high-quality open
13 source projects provide all the needed building blocks for numerical
14 computing: numerical arrays (NumPy), algorithms (SciPy), 2D/3D Visualization
15 (Matplotlib, Mayavi, Chaco), Symbolic Mathematics (Sage, Sympy) and others.
16
17 The IPython project is a core part of this open-source toolchain and is
18 focused on creating a comprehensive environment for interactive and
19 exploratory computing in the Python programming language. It enables all of
20 the above tools to be used interactively and consists of two main components:
11 21
12 22 * An enhanced interactive Python shell with support for interactive plotting
13 23 and visualization.
14 24 * An architecture for interactive parallel computing.
15 25
16 26 With these components, it is possible to perform all aspects of a parallel
17 computation interactively. This document describes how to get started with
18 IPython on Window HPC Server 2008. A more complete desription of IPython's
19 parallel computing capabilities can be found in IPython's online documentation
27 computation interactively. This type of workflow is particularly relevant in
28 scientific and numerical computing where algorithms, code and data are
29 continually evolving as the user/developer explores a problem. The broad
30 treads in computing (commodity clusters, multicore, cloud computing, etc.)
31 make these capabilities of IPython particularly relevant.
32
33 While IPython is a cross platform tool, it has particularly strong support for
34 Windows based compute clusters running Windows HPC Server 2008. This document
35 describes how to get started with IPython on Windows HPC Server 2008. The
36 content and emphasis here is practical: installing IPython, configuring
37 IPython to use the Windows job scheduler and running example parallel programs
38 interactively. A more complete description of IPython's parallel computing
39 capabilities can be found in IPython's online documentation
20 40 (http://ipython.scipy.org/moin/Documentation).
21 41
22 42 Setting up your Windows cluster
23 43 ===============================
24 44
25 45 This document assumes that you already have a cluster running Windows
26 46 HPC Server 2008. Here is a broad overview of what is involved with setting up
27 47 such a cluster:
28 48
29 49 1. Install Windows Server 2008 on the head and compute nodes in the cluster.
30 50 2. Setup the network configuration on each host. Each host should have a
31 51 static IP address.
32 52 3. On the head node, activate the "Active Directory Domain Services" role
33 53 and make the head node the domain controller.
34 54 4. Join the compute nodes to the newly created Active Directory (AD) domain.
35 55 5. Setup user accounts in the domain with shared home directories.
36 56 6. Install the HPC Pack 2008 on the head node to create a cluster.
37 57 7. Install the HPC Pack 2008 on the compute nodes.
38 58
39 59 More details about installing and configuring Windows HPC Server 2008 can be
40 60 found on the Windows HPC Home Page (http://www.microsoft.com/hpc). Regardless
41 of what steps you go through to set up your cluster, the remainder of this
61 of what steps you follow to set up your cluster, the remainder of this
42 62 document will assume that:
43 63
44 64 * There are domain users that can log on to the AD domain and submit jobs
45 65 to the cluster scheduler.
46 66 * These domain users have shared home directories. While shared home
47 67 directories are not required to use IPython, they make it much easier to
48 68 use IPython.
49 69
50 70 Installation of IPython and its dependencies
51 71 ============================================
52 72
53 73 IPython and all of its dependencies are freely available and open source.
54 74 These packages provide a powerful and cost-effective approach to numerical and
55 75 scientific computing on Windows. The following dependencies are needed to run
56 76 IPython on Windows:
57 77
58 78 * Python 2.5 or 2.6 (http://www.python.org)
59 79 * pywin32 (http://sourceforge.net/projects/pywin32/)
60 80 * PyReadline (https://launchpad.net/pyreadline)
61 81 * zope.interface and Twisted (http://twistedmatrix.com)
62 82 * Foolcap (http://foolscap.lothar.com/trac)
63 83 * pyOpenSSL (https://launchpad.net/pyopenssl)
64 84 * IPython (http://ipython.scipy.org)
65 85
66 In addition, the following dependencies are needed to run the demos
67 described in this document.
86 In addition, the following dependencies are needed to run the demos described
87 in this document.
68 88
69 89 * NumPy and SciPy (http://www.scipy.org)
70 90 * wxPython (http://www.wxpython.org)
71 91 * Matplotlib (http://matplotlib.sourceforge.net/)
72 92
73 93 The easiest way of obtaining these dependencies is through the Enthought
74 94 Python Distribution (EPD) (http://www.enthought.com/products/epd.php). EPD is
75 95 produced by Enthought, Inc. and contains all of these packages and others in a
76 96 single installer and is available free for academic users. While it is also
77 97 possible to download and install each package individually, this is a tedious
78 98 process. Thus, we highly recommend using EPD to install these packages on
79 99 Windows.
80 100
81 101 Regardless of how you install the dependencies, here are the steps you will
82 102 need to follow:
83 103
84 104 1. Install all of the packages listed above, either individually or using EPD
85 105 on the head node, compute nodes and user workstations.
86 106
87 107 2. Make sure that :file:`C:\\Python25` and :file:`C:\\Python25\\Scripts` are
88 108 in the system :envvar:`%PATH%` variable on each node.
89 109
90 110 3. Install the latest development version of IPython. This can be done by
91 111 downloading the the development version from the IPython website
92 112 (http://ipython.scipy.org) and following the installation instructions.
93 113
94 114 Further details about installing IPython or its dependencies can be found in
95 115 the online IPython documentation (http://ipython.scipy.org/moin/Documentation)
96 116 Once you are finished with the installation, you can try IPython out by
97 opening a Windows Command Prompt and typing :command:`ipython`. This will
117 opening a Windows Command Prompt and typing ``ipython``. This will
98 118 start IPython's interactive shell and you should see something like the
99 119 following screenshot:
100 120
101 121 .. image:: ipython_shell.*
102 122
103 123 Starting an IPython cluster
104 124 ===========================
105 125
106 126 To use IPython's parallel computing capabilities, you will need to start an
107 127 IPython cluster. An IPython cluster consists of one controller and multiple
108 128 engines:
109 129
110 130 IPython controller
111 131 The IPython controller manages the engines and acts as a gateway between
112 132 the engines and the client, which runs in the user's interactive IPython
113 133 session. The controller is started using the :command:`ipcontroller`
114 134 command.
115 135
116 136 IPython engine
117 IPython engines run your Python code in parallel on the compute nodes.
137 IPython engines run a user's Python code in parallel on the compute nodes.
118 138 Engines are starting using the :command:`ipengine` command.
119 139
120 140 Once these processes are started, a user can run Python code interactively and
121 in parallel on the engines from within the IPython shell. This includes the
122 ability to interact with, plot and visualize data from the engines.
141 in parallel on the engines from within the IPython shell using an appropriate
142 client. This includes the ability to interact with, plot and visualize data
143 from the engines.
123 144
124 IPython has a command line program called :command:`ipcluster` that handles
145 IPython has a command line program called :command:`ipcluster` that automates
125 146 all aspects of starting the controller and engines on the compute nodes.
126 147 :command:`ipcluster` has full support for the Windows HPC job scheduler,
127 148 meaning that :command:`ipcluster` can use this job scheduler to start the
128 149 controller and engines. In our experience, the Windows HPC job scheduler is
129 150 particularly well suited for interactive applications, such as IPython. Once
130 151 :command:`ipcluster` is configured properly, a user can start an IPython
131 152 cluster from their local workstation almost instantly, without having to log
132 153 on to the head node (as is typically required by Unix based job schedulers).
133 154 This enables a user to move seamlessly between serial and parallel
134 155 computations.
135 156
136 157 In this section we show how to use :command:`ipcluster` to start an IPython
137 158 cluster using the Windows HPC Server 2008 job scheduler. To make sure that
138 159 :command:`ipcluster` is installed and working properly, you should first try
139 160 to start an IPython cluster on your local host. To do this, open a Windows
140 161 Command Prompt and type the following command::
141 162
142 163 ipcluster start -n 2
143 164
144 165 You should see a number of messages printed to the screen, ending with
145 "IPython cluster: started". A screenshot of this follows.
146
166 "IPython cluster: started". The result should look something like the following
167 screenshot:
147 168
148 169 .. image:: ipcluster_start.*
149 170
150 171 At this point, the controller and two engines are running on your local host.
151 This configuration is useful for testing and for situations where you
152 have multiple cores on your local computer.
172 This configuration is useful for testing and for situations where you want to
173 take advantage of multiple cores on your local computer.
153 174
154 175 Now that we have confirmed that :command:`ipcluster` is working properly, we
155 describe how to configure and run an IPython cluster on an actual cluster
156 running Windows HPC Server 2008. Here is an outline of the needed steps:
176 describe how to configure and run an IPython cluster on an actual compute
177 cluster running Windows HPC Server 2008. Here is an outline of the needed
178 steps:
157 179
158 1. Create a cluster profile: ``ipcluster create -p mycluster``
180 1. Create a cluster profile using: ``ipcluster create -p mycluster``
159 181
160 2. Edit confguration files in :file:`.ipython\\cluster_mycluster`.
182 2. Edit configuration files in the directory :file:`.ipython\\cluster_mycluster`
161 183
162 3. Start the cluster: ``ipcluser start -p mycluster -n 32``
184 3. Start the cluster using: ``ipcluser start -p mycluster -n 32``
163 185
164 186 Creating a cluster profile
165 187 --------------------------
166 188
167 In most cases, you will have to create and configure a cluster profile to use
168 IPython on a cluster. A cluster profile is a specially named directory
169 (typically located in the :file:`.ipython` subdirectory of your home
170 directory) that contains the configuration files for a particular IPython
171 cluster, as well as log files and security keys. The naming convention
172 for cluster directories is: "cluster_<profile name>". Thus, the cluster
173 directory for a profile named "foo" would be :file:`.ipython\\cluster_foo`.
189 In most cases, you will have to create a cluster profile to use IPython on a
190 cluster. A cluster profile is a name (like "mycluster") that is associated
191 with a particular cluster configuration. The profile name is used by
192 :command:`ipcluster` when working with the cluster.
193
194 Associated with each cluster profile is a cluster directory. This cluster
195 directory is a specially named directory (typically located in the
196 :file:`.ipython` subdirectory of your home directory) that contains the
197 configuration files for a particular cluster profile, as well as log files and
198 security keys. The naming convention for cluster directories is:
199 :file:`cluster_<profile name>`. Thus, the cluster directory for a profile named
200 "foo" would be :file:`.ipython\\cluster_foo`.
174 201
175 To create a new cluster profile (named "mycluster"), type the following
176 command at the Windows Command Prompt::
202 To create a new cluster profile (named "mycluster") and the associated cluster
203 directory, type the following command at the Windows Command Prompt::
177 204
178 205 ipcluster create -p mycluster
179 206
180 207 The output of this command is shown in the screenshot below. Notice how
181 208 :command:`ipcluster` prints out the location of the newly created cluster
182 209 directory.
183 210
184
185 211 .. image:: ipcluster_create.*
186 212
187
188 213 Configuring a cluster profile
189 214 -----------------------------
190 215
191 216 Next, you will need to configure the newly created cluster profile by editing
192 217 the following configuration files in the cluster directory:
193 218
194 219 * :file:`ipcluster_config.py`
195 220 * :file:`ipcontroller_config.py`
196 221 * :file:`ipengine_config.py`
197 222
198 223 When :command:`ipcluster` is run, these configuration files are used to
199 224 determine how the engines and controller will be started. In most cases,
200 225 you will only have to set a few of the attributes in these files.
201 226
202 227 To configure :command:`ipcluster` to use the Windows HPC job scheduler, you
203 228 will need to edit the following attributes in the file
204 229 :file:`ipcluster_config.py`::
205 230
206 231 # Set these at the top of the file to tell ipcluster to use the
207 232 # Windows HPC job scheduler.
208 233 c.Global.controller_launcher = \
209 234 'IPython.kernel.launcher.WindowsHPCControllerLauncher'
210 235 c.Global.engine_launcher = \
211 236 'IPython.kernel.launcher.WindowsHPCEngineSetLauncher'
212 237
213 238 # Set these to the host name of the scheduler (head node) of your cluster.
214 239 c.WindowsHPCControllerLauncher.scheduler = 'HEADNODE'
215 240 c.WindowsHPCEngineSetLauncher.scheduler = 'HEADNODE'
216 241
217 242 There are a number of other configuration attributes that can be set, but
218 243 in most cases these will be sufficient to get you started.
219 244
220 245 .. warning::
221 246 If any of your configuration attributes involve specifying the location
222 247 of shared directories or files, you must make sure that you use UNC paths
223 248 like :file:`\\\\host\\share`. It is also important that you specify
224 these paths using raw Python strings: ``r'\\host\share'``.
249 these paths using raw Python strings: ``r'\\host\share'`` to make sure
250 that the backslashes are properly escaped.
225 251
226 252 Starting the cluster profile
227 253 ----------------------------
228 254
229 255 Once a cluster profile has been configured, starting an IPython cluster using
230 the profile is simple:
256 the profile is simple::
231 257
232 258 ipcluster start -p mycluster -n 32
233 259
234 The ``-n 32`` option tells :command:`ipcluster` how many engines to start.
235 Stopping the cluster is as simple as typing Control-C.
260 The ``-n`` option tells :command:`ipcluster` how many engines to start (in
261 this case 32). Stopping the cluster is as simple as typing Control-C.
236 262
237 263 Using the HPC Job Manager
238 264 -------------------------
239 265
240 266 When ``ipcluster start`` is run the first time, :command:`ipcluster` creates
241 267 two XML job description files in the cluster directory:
242 268
243 269 * :file:`ipcontroller_job.xml`
244 270 * :file:`ipengineset_job.xml`
245 271
246 272 Once these files have been created, they can be imported into the HPC Job
247 273 Manager application. Then, the controller and engines for that profile can be
248 274 started using the HPC Job Manager directly, without using :command:`ipcluster`.
249 275 However, anytime the cluster profile is re-configured, ``ipcluster start``
250 has to be run again to regenerate the XML job description files. The
276 must be run again to regenerate the XML job description files. The
251 277 following screenshot shows what the HPC Job Manager interface looks like
252 278 with a running IPython cluster.
253 279
254
255 280 .. image:: hpc_job_manager.*
256 281
257 282 Performing a simple interactive parallel computation
258 283 ====================================================
259 284
260 285 Once you have started your IPython cluster, you can start to use it. To do
261 this, start up IPython's interactive shell by typing::
286 this, open up a new Windows Command Prompt and start up IPython's interactive
287 shell by typing::
262 288
263 289 ipython
264 290
265 at the Windows Command Prompt. Then you can create a :class:`MultiEngineClient`
266 instance for your profile and use the resulting instance to
267 have the cluster do a simple interactive parallel computation. In the
268 screenshot that follows, we take a simple Python function::
269
270 def f(x): return x**10
271
272 and apply it to each element of an array of integers in
273 parallel using the :meth:`MultiEngineClient.map` method::
274
275 mec.map(f, range(15))
291 Then you can create a :class:`MultiEngineClient` instance for your profile and
292 use the resulting instance to do a simple interactive parallel computation. In
293 the code and screenshot that follows, we take a simple Python function and
294 apply it to each element of an array of integers in parallel using the
295 :meth:`MultiEngineClient.map` method:
296
297 .. sourcecode:: ipython
298
299 In [1]: from IPython.kernel.client import *
300
301 In [2]: mec = MultiEngineClient(profile='mycluster')
302
303 In [3]: mec.get_ids()
304 Out[3]: [0, 1, 2, 3, 4, 5, 67, 8, 9, 10, 11, 12, 13, 14]
305
306 In [4]: def f(x):
307 ...: return x**10
308
309 In [5]: mec.map(f, range(15)) # f is applied in parallel
310 Out[5]:
311 [0,
312 1,
313 1024,
314 59049,
315 1048576,
316 9765625,
317 60466176,
318 282475249,
319 1073741824,
320 3486784401L,
321 10000000000L,
322 25937424601L,
323 61917364224L,
324 137858491849L,
325 289254654976L]
276 326
277 327 The :meth:`map` method has the same signature as Python's builtin :func:`map`
278 328 function, but runs the calculation in parallel. More involved examples of using
279 329 :class:`MultiEngineClient` are provided in the examples that follow.
280 330
281 331 .. image:: mec_simple.*
282 332
General Comments 0
You need to be logged in to leave comments. Login now