Show More
@@ -0,0 +1,144 b'' | |||||
|
1 | #!/usr/bin/env python | |||
|
2 | """Run a Monte-Carlo options pricer in parallel.""" | |||
|
3 | ||||
|
4 | #----------------------------------------------------------------------------- | |||
|
5 | # Imports | |||
|
6 | #----------------------------------------------------------------------------- | |||
|
7 | ||||
|
8 | import sys | |||
|
9 | import time | |||
|
10 | from IPython.zmq.parallel import client | |||
|
11 | import numpy as np | |||
|
12 | from mcpricer import price_options | |||
|
13 | from matplotlib import pyplot as plt | |||
|
14 | ||||
|
15 | #----------------------------------------------------------------------------- | |||
|
16 | # Setup parameters for the run | |||
|
17 | #----------------------------------------------------------------------------- | |||
|
18 | ||||
|
19 | def ask_question(text, the_type, default): | |||
|
20 | s = '%s [%r]: ' % (text, the_type(default)) | |||
|
21 | result = raw_input(s) | |||
|
22 | if result: | |||
|
23 | return the_type(result) | |||
|
24 | else: | |||
|
25 | return the_type(default) | |||
|
26 | ||||
|
27 | cluster_profile = ask_question("Cluster profile", str, "default") | |||
|
28 | price = ask_question("Initial price", float, 100.0) | |||
|
29 | rate = ask_question("Interest rate", float, 0.05) | |||
|
30 | days = ask_question("Days to expiration", int, 260) | |||
|
31 | paths = ask_question("Number of MC paths", int, 10000) | |||
|
32 | n_strikes = ask_question("Number of strike values", int, 5) | |||
|
33 | min_strike = ask_question("Min strike price", float, 90.0) | |||
|
34 | max_strike = ask_question("Max strike price", float, 110.0) | |||
|
35 | n_sigmas = ask_question("Number of volatility values", int, 5) | |||
|
36 | min_sigma = ask_question("Min volatility", float, 0.1) | |||
|
37 | max_sigma = ask_question("Max volatility", float, 0.4) | |||
|
38 | ||||
|
39 | strike_vals = np.linspace(min_strike, max_strike, n_strikes) | |||
|
40 | sigma_vals = np.linspace(min_sigma, max_sigma, n_sigmas) | |||
|
41 | ||||
|
42 | #----------------------------------------------------------------------------- | |||
|
43 | # Setup for parallel calculation | |||
|
44 | #----------------------------------------------------------------------------- | |||
|
45 | ||||
|
46 | # The Client is used to setup the calculation and works with all | |||
|
47 | # engines. | |||
|
48 | c = client.Client(profile=cluster_profile) | |||
|
49 | ||||
|
50 | # A LoadBalancedView is an interface to the engines that provides dynamic load | |||
|
51 | # balancing at the expense of not knowing which engine will execute the code. | |||
|
52 | view = c[None] | |||
|
53 | ||||
|
54 | # Initialize the common code on the engines. This Python module has the | |||
|
55 | # price_options function that prices the options. | |||
|
56 | ||||
|
57 | #----------------------------------------------------------------------------- | |||
|
58 | # Perform parallel calculation | |||
|
59 | #----------------------------------------------------------------------------- | |||
|
60 | ||||
|
61 | print "Running parallel calculation over strike prices and volatilities..." | |||
|
62 | print "Strike prices: ", strike_vals | |||
|
63 | print "Volatilities: ", sigma_vals | |||
|
64 | sys.stdout.flush() | |||
|
65 | ||||
|
66 | # Submit tasks to the TaskClient for each (strike, sigma) pair as a MapTask. | |||
|
67 | t1 = time.time() | |||
|
68 | async_results = [] | |||
|
69 | for strike in strike_vals: | |||
|
70 | for sigma in sigma_vals: | |||
|
71 | ar = view.apply_async(price_options, price, strike, sigma, rate, days, paths) | |||
|
72 | async_results.append(ar) | |||
|
73 | ||||
|
74 | print "Submitted tasks: ", len(async_results) | |||
|
75 | sys.stdout.flush() | |||
|
76 | ||||
|
77 | # Block until all tasks are completed. | |||
|
78 | c.barrier(async_results) | |||
|
79 | t2 = time.time() | |||
|
80 | t = t2-t1 | |||
|
81 | ||||
|
82 | print "Parallel calculation completed, time = %s s" % t | |||
|
83 | print "Collecting results..." | |||
|
84 | ||||
|
85 | # Get the results using TaskClient.get_task_result. | |||
|
86 | results = [ar.get() for ar in async_results] | |||
|
87 | ||||
|
88 | # Assemble the result into a structured NumPy array. | |||
|
89 | prices = np.empty(n_strikes*n_sigmas, | |||
|
90 | dtype=[('ecall',float),('eput',float),('acall',float),('aput',float)] | |||
|
91 | ) | |||
|
92 | ||||
|
93 | for i, price in enumerate(results): | |||
|
94 | prices[i] = tuple(price) | |||
|
95 | ||||
|
96 | prices.shape = (n_strikes, n_sigmas) | |||
|
97 | strike_mesh, sigma_mesh = np.meshgrid(strike_vals, sigma_vals) | |||
|
98 | ||||
|
99 | print "Results are available: strike_mesh, sigma_mesh, prices" | |||
|
100 | print "To plot results type 'plot_options(sigma_mesh, strike_mesh, prices)'" | |||
|
101 | ||||
|
102 | #----------------------------------------------------------------------------- | |||
|
103 | # Utilities | |||
|
104 | #----------------------------------------------------------------------------- | |||
|
105 | ||||
|
106 | def plot_options(sigma_mesh, strike_mesh, prices): | |||
|
107 | """ | |||
|
108 | Make a contour plot of the option price in (sigma, strike) space. | |||
|
109 | """ | |||
|
110 | plt.figure(1) | |||
|
111 | ||||
|
112 | plt.subplot(221) | |||
|
113 | plt.contourf(sigma_mesh, strike_mesh, prices['ecall']) | |||
|
114 | plt.axis('tight') | |||
|
115 | plt.colorbar() | |||
|
116 | plt.title('European Call') | |||
|
117 | plt.ylabel("Strike Price") | |||
|
118 | ||||
|
119 | plt.subplot(222) | |||
|
120 | plt.contourf(sigma_mesh, strike_mesh, prices['acall']) | |||
|
121 | plt.axis('tight') | |||
|
122 | plt.colorbar() | |||
|
123 | plt.title("Asian Call") | |||
|
124 | ||||
|
125 | plt.subplot(223) | |||
|
126 | plt.contourf(sigma_mesh, strike_mesh, prices['eput']) | |||
|
127 | plt.axis('tight') | |||
|
128 | plt.colorbar() | |||
|
129 | plt.title("European Put") | |||
|
130 | plt.xlabel("Volatility") | |||
|
131 | plt.ylabel("Strike Price") | |||
|
132 | ||||
|
133 | plt.subplot(224) | |||
|
134 | plt.contourf(sigma_mesh, strike_mesh, prices['aput']) | |||
|
135 | plt.axis('tight') | |||
|
136 | plt.colorbar() | |||
|
137 | plt.title("Asian Put") | |||
|
138 | plt.xlabel("Volatility") | |||
|
139 | ||||
|
140 | ||||
|
141 | ||||
|
142 | ||||
|
143 | ||||
|
144 |
@@ -0,0 +1,45 b'' | |||||
|
1 | ||||
|
2 | def price_options(S=100.0, K=100.0, sigma=0.25, r=0.05, days=260, paths=10000): | |||
|
3 | """ | |||
|
4 | Price European and Asian options using a Monte Carlo method. | |||
|
5 | ||||
|
6 | Parameters | |||
|
7 | ---------- | |||
|
8 | S : float | |||
|
9 | The initial price of the stock. | |||
|
10 | K : float | |||
|
11 | The strike price of the option. | |||
|
12 | sigma : float | |||
|
13 | The volatility of the stock. | |||
|
14 | r : float | |||
|
15 | The risk free interest rate. | |||
|
16 | days : int | |||
|
17 | The number of days until the option expires. | |||
|
18 | paths : int | |||
|
19 | The number of Monte Carlo paths used to price the option. | |||
|
20 | ||||
|
21 | Returns | |||
|
22 | ------- | |||
|
23 | A tuple of (E. call, E. put, A. call, A. put) option prices. | |||
|
24 | """ | |||
|
25 | import numpy as np | |||
|
26 | from math import exp,sqrt | |||
|
27 | ||||
|
28 | h = 1.0/days | |||
|
29 | const1 = exp((r-0.5*sigma**2)*h) | |||
|
30 | const2 = sigma*sqrt(h) | |||
|
31 | stock_price = S*np.ones(paths, dtype='float64') | |||
|
32 | stock_price_sum = np.zeros(paths, dtype='float64') | |||
|
33 | for j in range(days): | |||
|
34 | growth_factor = const1*np.exp(const2*np.random.standard_normal(paths)) | |||
|
35 | stock_price = stock_price*growth_factor | |||
|
36 | stock_price_sum = stock_price_sum + stock_price | |||
|
37 | stock_price_avg = stock_price_sum/days | |||
|
38 | zeros = np.zeros(paths, dtype='float64') | |||
|
39 | r_factor = exp(-r*h*days) | |||
|
40 | euro_put = r_factor*np.mean(np.maximum(zeros, K-stock_price)) | |||
|
41 | asian_put = r_factor*np.mean(np.maximum(zeros, K-stock_price_avg)) | |||
|
42 | euro_call = r_factor*np.mean(np.maximum(zeros, stock_price-K)) | |||
|
43 | asian_call = r_factor*np.mean(np.maximum(zeros, stock_price_avg-K)) | |||
|
44 | return (euro_call, euro_put, asian_call, asian_put) | |||
|
45 |
@@ -0,0 +1,63 b'' | |||||
|
1 | """Calculate statistics on the digits of pi in parallel. | |||
|
2 | ||||
|
3 | This program uses the functions in :file:`pidigits.py` to calculate | |||
|
4 | the frequencies of 2 digit sequences in the digits of pi. The | |||
|
5 | results are plotted using matplotlib. | |||
|
6 | ||||
|
7 | To run, text files from http://www.super-computing.org/ | |||
|
8 | must be installed in the working directory of the IPython engines. | |||
|
9 | The actual filenames to be used can be set with the ``filestring`` | |||
|
10 | variable below. | |||
|
11 | ||||
|
12 | The dataset we have been using for this is the 200 million digit one here: | |||
|
13 | ftp://pi.super-computing.org/.2/pi200m/ | |||
|
14 | ||||
|
15 | and the files used will be downloaded if they are not in the working directory | |||
|
16 | of the IPython engines. | |||
|
17 | """ | |||
|
18 | ||||
|
19 | from IPython.zmq.parallel import client | |||
|
20 | from matplotlib import pyplot as plt | |||
|
21 | import numpy as np | |||
|
22 | from pidigits import * | |||
|
23 | from timeit import default_timer as clock | |||
|
24 | ||||
|
25 | # Files with digits of pi (10m digits each) | |||
|
26 | filestring = 'pi200m.ascii.%(i)02dof20' | |||
|
27 | files = [filestring % {'i':i} for i in range(1,16)] | |||
|
28 | ||||
|
29 | # Connect to the IPython cluster | |||
|
30 | c = client.Client() | |||
|
31 | c.run('pidigits.py') | |||
|
32 | ||||
|
33 | # the number of engines | |||
|
34 | n = len(c.ids) | |||
|
35 | id0 = list(c.ids)[0] | |||
|
36 | # fetch the pi-files | |||
|
37 | print "downloading %i files of pi"%n | |||
|
38 | c.map(fetch_pi_file, files[:n]) | |||
|
39 | print "done" | |||
|
40 | ||||
|
41 | # Run 10m digits on 1 engine | |||
|
42 | t1 = clock() | |||
|
43 | freqs10m = c[id0].apply_sync_bound(compute_two_digit_freqs, files[0]) | |||
|
44 | t2 = clock() | |||
|
45 | digits_per_second1 = 10.0e6/(t2-t1) | |||
|
46 | print "Digits per second (1 core, 10m digits): ", digits_per_second1 | |||
|
47 | ||||
|
48 | ||||
|
49 | # Run n*10m digits on all engines | |||
|
50 | t1 = clock() | |||
|
51 | c.block=True | |||
|
52 | freqs_all = c.map(compute_two_digit_freqs, files[:n]) | |||
|
53 | freqs150m = reduce_freqs(freqs_all) | |||
|
54 | t2 = clock() | |||
|
55 | digits_per_second8 = n*10.0e6/(t2-t1) | |||
|
56 | print "Digits per second (%i engines, %i0m digits): "%(n,n), digits_per_second8 | |||
|
57 | ||||
|
58 | print "Speedup: ", digits_per_second8/digits_per_second1 | |||
|
59 | ||||
|
60 | plot_two_digit_freqs(freqs150m) | |||
|
61 | plt.title("2 digit sequences in %i0m digits of pi"%n) | |||
|
62 | plt.show() | |||
|
63 |
@@ -0,0 +1,159 b'' | |||||
|
1 | """Compute statistics on the digits of pi. | |||
|
2 | ||||
|
3 | This uses precomputed digits of pi from the website | |||
|
4 | of Professor Yasumasa Kanada at the University of | |||
|
5 | Tokoyo: http://www.super-computing.org/ | |||
|
6 | ||||
|
7 | Currently, there are only functions to read the | |||
|
8 | .txt (non-compressed, non-binary) files, but adding | |||
|
9 | support for compression and binary files would be | |||
|
10 | straightforward. | |||
|
11 | ||||
|
12 | This focuses on computing the number of times that | |||
|
13 | all 1, 2, n digits sequences occur in the digits of pi. | |||
|
14 | If the digits of pi are truly random, these frequencies | |||
|
15 | should be equal. | |||
|
16 | """ | |||
|
17 | ||||
|
18 | # Import statements | |||
|
19 | from __future__ import division, with_statement | |||
|
20 | ||||
|
21 | import os | |||
|
22 | import urllib | |||
|
23 | ||||
|
24 | import numpy as np | |||
|
25 | from matplotlib import pyplot as plt | |||
|
26 | ||||
|
27 | # Top-level functions | |||
|
28 | ||||
|
29 | def fetch_pi_file(filename): | |||
|
30 | """This will download a segment of pi from super-computing.org | |||
|
31 | if the file is not already present. | |||
|
32 | """ | |||
|
33 | ftpdir="ftp://pi.super-computing.org/.2/pi200m/" | |||
|
34 | if os.path.exists(filename): | |||
|
35 | # we already have it | |||
|
36 | return | |||
|
37 | else: | |||
|
38 | # download it | |||
|
39 | urllib.urlretrieve(ftpdir+filename,filename) | |||
|
40 | ||||
|
41 | def compute_one_digit_freqs(filename): | |||
|
42 | """ | |||
|
43 | Read digits of pi from a file and compute the 1 digit frequencies. | |||
|
44 | """ | |||
|
45 | d = txt_file_to_digits(filename) | |||
|
46 | freqs = one_digit_freqs(d) | |||
|
47 | return freqs | |||
|
48 | ||||
|
49 | def compute_two_digit_freqs(filename): | |||
|
50 | """ | |||
|
51 | Read digits of pi from a file and compute the 2 digit frequencies. | |||
|
52 | """ | |||
|
53 | d = txt_file_to_digits(filename) | |||
|
54 | freqs = two_digit_freqs(d) | |||
|
55 | return freqs | |||
|
56 | ||||
|
57 | def reduce_freqs(freqlist): | |||
|
58 | """ | |||
|
59 | Add up a list of freq counts to get the total counts. | |||
|
60 | """ | |||
|
61 | allfreqs = np.zeros_like(freqlist[0]) | |||
|
62 | for f in freqlist: | |||
|
63 | allfreqs += f | |||
|
64 | return allfreqs | |||
|
65 | ||||
|
66 | def compute_n_digit_freqs(filename, n): | |||
|
67 | """ | |||
|
68 | Read digits of pi from a file and compute the n digit frequencies. | |||
|
69 | """ | |||
|
70 | d = txt_file_to_digits(filename) | |||
|
71 | freqs = n_digit_freqs(d, n) | |||
|
72 | return freqs | |||
|
73 | ||||
|
74 | # Read digits from a txt file | |||
|
75 | ||||
|
76 | def txt_file_to_digits(filename, the_type=str): | |||
|
77 | """ | |||
|
78 | Yield the digits of pi read from a .txt file. | |||
|
79 | """ | |||
|
80 | with open(filename, 'r') as f: | |||
|
81 | for line in f.readlines(): | |||
|
82 | for c in line: | |||
|
83 | if c != '\n' and c!= ' ': | |||
|
84 | yield the_type(c) | |||
|
85 | ||||
|
86 | # Actual counting functions | |||
|
87 | ||||
|
88 | def one_digit_freqs(digits, normalize=False): | |||
|
89 | """ | |||
|
90 | Consume digits of pi and compute 1 digit freq. counts. | |||
|
91 | """ | |||
|
92 | freqs = np.zeros(10, dtype='i4') | |||
|
93 | for d in digits: | |||
|
94 | freqs[int(d)] += 1 | |||
|
95 | if normalize: | |||
|
96 | freqs = freqs/freqs.sum() | |||
|
97 | return freqs | |||
|
98 | ||||
|
99 | def two_digit_freqs(digits, normalize=False): | |||
|
100 | """ | |||
|
101 | Consume digits of pi and compute 2 digits freq. counts. | |||
|
102 | """ | |||
|
103 | freqs = np.zeros(100, dtype='i4') | |||
|
104 | last = digits.next() | |||
|
105 | this = digits.next() | |||
|
106 | for d in digits: | |||
|
107 | index = int(last + this) | |||
|
108 | freqs[index] += 1 | |||
|
109 | last = this | |||
|
110 | this = d | |||
|
111 | if normalize: | |||
|
112 | freqs = freqs/freqs.sum() | |||
|
113 | return freqs | |||
|
114 | ||||
|
115 | def n_digit_freqs(digits, n, normalize=False): | |||
|
116 | """ | |||
|
117 | Consume digits of pi and compute n digits freq. counts. | |||
|
118 | ||||
|
119 | This should only be used for 1-6 digits. | |||
|
120 | """ | |||
|
121 | freqs = np.zeros(pow(10,n), dtype='i4') | |||
|
122 | current = np.zeros(n, dtype=int) | |||
|
123 | for i in range(n): | |||
|
124 | current[i] = digits.next() | |||
|
125 | for d in digits: | |||
|
126 | index = int(''.join(map(str, current))) | |||
|
127 | freqs[index] += 1 | |||
|
128 | current[0:-1] = current[1:] | |||
|
129 | current[-1] = d | |||
|
130 | if normalize: | |||
|
131 | freqs = freqs/freqs.sum() | |||
|
132 | return freqs | |||
|
133 | ||||
|
134 | # Plotting functions | |||
|
135 | ||||
|
136 | def plot_two_digit_freqs(f2): | |||
|
137 | """ | |||
|
138 | Plot two digits frequency counts using matplotlib. | |||
|
139 | """ | |||
|
140 | f2_copy = f2.copy() | |||
|
141 | f2_copy.shape = (10,10) | |||
|
142 | ax = plt.matshow(f2_copy) | |||
|
143 | plt.colorbar() | |||
|
144 | for i in range(10): | |||
|
145 | for j in range(10): | |||
|
146 | plt.text(i-0.2, j+0.2, str(j)+str(i)) | |||
|
147 | plt.ylabel('First digit') | |||
|
148 | plt.xlabel('Second digit') | |||
|
149 | return ax | |||
|
150 | ||||
|
151 | def plot_one_digit_freqs(f1): | |||
|
152 | """ | |||
|
153 | Plot one digit frequency counts using matplotlib. | |||
|
154 | """ | |||
|
155 | ax = plt.plot(f1,'bo-') | |||
|
156 | plt.title('Single digit counts in pi') | |||
|
157 | plt.xlabel('Digit') | |||
|
158 | plt.ylabel('Count') | |||
|
159 | return ax |
@@ -4,14 +4,14 b' Parallel examples' | |||||
4 |
|
4 | |||
5 | .. note:: |
|
5 | .. note:: | |
6 |
|
6 | |||
7 | Not adapted to zmq yet |
|
7 | Performance numbers from ``IPython.kernel``, not newparallel | |
8 |
|
8 | |||
9 | In this section we describe two more involved examples of using an IPython |
|
9 | In this section we describe two more involved examples of using an IPython | |
10 | cluster to perform a parallel computation. In these examples, we will be using |
|
10 | cluster to perform a parallel computation. In these examples, we will be using | |
11 | IPython's "pylab" mode, which enables interactive plotting using the |
|
11 | IPython's "pylab" mode, which enables interactive plotting using the | |
12 | Matplotlib package. IPython can be started in this mode by typing:: |
|
12 | Matplotlib package. IPython can be started in this mode by typing:: | |
13 |
|
13 | |||
14 |
ipython - |
|
14 | ipython --pylab | |
15 |
|
15 | |||
16 | at the system command line. If this prints an error message, you will |
|
16 | at the system command line. If this prints an error message, you will | |
17 | need to install the default profiles from within IPython by doing, |
|
17 | need to install the default profiles from within IPython by doing, | |
@@ -82,7 +82,7 b' The resulting plot of the single digit counts shows that each digit occurs' | |||||
82 | approximately 1,000 times, but that with only 10,000 digits the |
|
82 | approximately 1,000 times, but that with only 10,000 digits the | |
83 | statistical fluctuations are still rather large: |
|
83 | statistical fluctuations are still rather large: | |
84 |
|
84 | |||
85 | .. image:: single_digits.* |
|
85 | .. image:: ../parallel/single_digits.* | |
86 |
|
86 | |||
87 | It is clear that to reduce the relative fluctuations in the counts, we need |
|
87 | It is clear that to reduce the relative fluctuations in the counts, we need | |
88 | to look at many more digits of pi. That brings us to the parallel calculation. |
|
88 | to look at many more digits of pi. That brings us to the parallel calculation. | |
@@ -93,7 +93,7 b' Parallel calculation' | |||||
93 | Calculating many digits of pi is a challenging computational problem in itself. |
|
93 | Calculating many digits of pi is a challenging computational problem in itself. | |
94 | Because we want to focus on the distribution of digits in this example, we |
|
94 | Because we want to focus on the distribution of digits in this example, we | |
95 | will use pre-computed digit of pi from the website of Professor Yasumasa |
|
95 | will use pre-computed digit of pi from the website of Professor Yasumasa | |
96 |
Kanada at the University of Tok |
|
96 | Kanada at the University of Tokyo (http://www.super-computing.org). These | |
97 | digits come in a set of text files (ftp://pi.super-computing.org/.2/pi200m/) |
|
97 | digits come in a set of text files (ftp://pi.super-computing.org/.2/pi200m/) | |
98 | that each have 10 million digits of pi. |
|
98 | that each have 10 million digits of pi. | |
99 |
|
99 | |||
@@ -108,24 +108,23 b' compute the two digit counts for the digits in a single file. Then in a final' | |||||
108 | step the counts from each engine will be added up. To perform this |
|
108 | step the counts from each engine will be added up. To perform this | |
109 | calculation, we will need two top-level functions from :file:`pidigits.py`: |
|
109 | calculation, we will need two top-level functions from :file:`pidigits.py`: | |
110 |
|
110 | |||
111 |
.. literalinclude:: ../../examples/ |
|
111 | .. literalinclude:: ../../examples/newparallel/pidigits.py | |
112 | :language: python |
|
112 | :language: python | |
113 | :lines: 34-49 |
|
113 | :lines: 34-49 | |
114 |
|
114 | |||
115 | We will also use the :func:`plot_two_digit_freqs` function to plot the |
|
115 | We will also use the :func:`plot_two_digit_freqs` function to plot the | |
116 | results. The code to run this calculation in parallel is contained in |
|
116 | results. The code to run this calculation in parallel is contained in | |
117 |
:file:`docs/examples/ |
|
117 | :file:`docs/examples/newparallel/parallelpi.py`. This code can be run in parallel | |
118 | using IPython by following these steps: |
|
118 | using IPython by following these steps: | |
119 |
|
119 | |||
120 | 1. Copy the text files with the digits of pi |
|
120 | 1. Use :command:`ipclusterz` to start 15 engines. We used an 8 core (2 quad | |
121 | (ftp://pi.super-computing.org/.2/pi200m/) to the working directory of the |
|
|||
122 | engines on the compute nodes. |
|
|||
123 | 2. Use :command:`ipclusterz` to start 15 engines. We used an 8 core (2 quad |
|
|||
124 | core CPUs) cluster with hyperthreading enabled which makes the 8 cores |
|
121 | core CPUs) cluster with hyperthreading enabled which makes the 8 cores | |
125 | looks like 16 (1 controller + 15 engines) in the OS. However, the maximum |
|
122 | looks like 16 (1 controller + 15 engines) in the OS. However, the maximum | |
126 | speedup we can observe is still only 8x. |
|
123 | speedup we can observe is still only 8x. | |
127 |
|
|
124 | 2. With the file :file:`parallelpi.py` in your current working directory, open | |
128 | up IPython in pylab mode and type ``run parallelpi.py``. |
|
125 | up IPython in pylab mode and type ``run parallelpi.py``. This will download | |
|
126 | the pi files via ftp the first time you run it, if they are not | |||
|
127 | present in the Engines' working directory. | |||
129 |
|
128 | |||
130 | When run on our 8 core cluster, we observe a speedup of 7.7x. This is slightly |
|
129 | When run on our 8 core cluster, we observe a speedup of 7.7x. This is slightly | |
131 | less than linear scaling (8x) because the controller is also running on one of |
|
130 | less than linear scaling (8x) because the controller is also running on one of | |
@@ -138,55 +137,55 b' calculation can also be run by simply typing the commands from' | |||||
138 | .. sourcecode:: ipython |
|
137 | .. sourcecode:: ipython | |
139 |
|
138 | |||
140 | In [1]: from IPython.zmq.parallel import client |
|
139 | In [1]: from IPython.zmq.parallel import client | |
141 | 2009-11-19 11:32:38-0800 [-] Log opened. |
|
|||
142 |
|
140 | |||
143 |
# The |
|
141 | # The Client allows us to use the engines interactively. | |
144 |
# We simply pass |
|
142 | # We simply pass Client the name of the cluster profile we | |
145 | # are using. |
|
143 | # are using. | |
146 | In [2]: c = client.Client(profile='mycluster') |
|
144 | In [2]: c = client.Client(profile='mycluster') | |
147 | 2009-11-19 11:32:44-0800 [-] Connecting [0] |
|
|||
148 | 2009-11-19 11:32:44-0800 [Negotiation,client] Connected: ./ipcontroller-mec.furl |
|
|||
149 |
|
145 | |||
150 |
In [3]: |
|
146 | In [3]: c.ids | |
151 | Out[3]: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] |
|
147 | Out[3]: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] | |
152 |
|
148 | |||
153 | In [4]: run pidigits.py |
|
149 | In [4]: run pidigits.py | |
154 |
|
150 | |||
155 |
In [5]: filestring = 'pi200m |
|
151 | In [5]: filestring = 'pi200m.ascii.%(i)02dof20' | |
156 |
|
152 | |||
157 | # Create the list of files to process. |
|
153 | # Create the list of files to process. | |
158 | In [6]: files = [filestring % {'i':i} for i in range(1,16)] |
|
154 | In [6]: files = [filestring % {'i':i} for i in range(1,16)] | |
159 |
|
155 | |||
160 | In [7]: files |
|
156 | In [7]: files | |
161 | Out[7]: |
|
157 | Out[7]: | |
162 |
['pi200m |
|
158 | ['pi200m.ascii.01of20', | |
163 |
'pi200m |
|
159 | 'pi200m.ascii.02of20', | |
164 |
'pi200m |
|
160 | 'pi200m.ascii.03of20', | |
165 |
'pi200m |
|
161 | 'pi200m.ascii.04of20', | |
166 |
'pi200m |
|
162 | 'pi200m.ascii.05of20', | |
167 |
'pi200m |
|
163 | 'pi200m.ascii.06of20', | |
168 |
'pi200m |
|
164 | 'pi200m.ascii.07of20', | |
169 |
'pi200m |
|
165 | 'pi200m.ascii.08of20', | |
170 |
'pi200m |
|
166 | 'pi200m.ascii.09of20', | |
171 |
'pi200m |
|
167 | 'pi200m.ascii.10of20', | |
172 |
'pi200m |
|
168 | 'pi200m.ascii.11of20', | |
173 |
'pi200m |
|
169 | 'pi200m.ascii.12of20', | |
174 |
'pi200m |
|
170 | 'pi200m.ascii.13of20', | |
175 |
'pi200m |
|
171 | 'pi200m.ascii.14of20', | |
176 |
'pi200m |
|
172 | 'pi200m.ascii.15of20'] | |
177 |
|
173 | |||
178 | # This is the parallel calculation using the MultiEngineClient.map method |
|
174 | # download the data files if they don't already exist: | |
|
175 | In [8]: c.map(fetch_pi_file, files) | |||
|
176 | ||||
|
177 | # This is the parallel calculation using the Client.map method | |||
179 | # which applies compute_two_digit_freqs to each file in files in parallel. |
|
178 | # which applies compute_two_digit_freqs to each file in files in parallel. | |
180 |
In [ |
|
179 | In [9]: freqs_all = c.map(compute_two_digit_freqs, files) | |
181 |
|
180 | |||
182 | # Add up the frequencies from each engine. |
|
181 | # Add up the frequencies from each engine. | |
183 |
In [ |
|
182 | In [10]: freqs = reduce_freqs(freqs_all) | |
184 |
|
183 | |||
185 |
In [ |
|
184 | In [11]: plot_two_digit_freqs(freqs) | |
186 |
Out[ |
|
185 | Out[11]: <matplotlib.image.AxesImage object at 0x18beb110> | |
187 |
|
186 | |||
188 |
In [1 |
|
187 | In [12]: plt.title('2 digit counts of 150m digits of pi') | |
189 |
Out[1 |
|
188 | Out[12]: <matplotlib.text.Text object at 0x18d1f9b0> | |
190 |
|
189 | |||
191 | The resulting plot generated by Matplotlib is shown below. The colors indicate |
|
190 | The resulting plot generated by Matplotlib is shown below. The colors indicate | |
192 | which two digit sequences are more (red) or less (blue) likely to occur in the |
|
191 | which two digit sequences are more (red) or less (blue) likely to occur in the | |
@@ -195,7 +194,7 b' most likely and that "06" and "07" are least likely. Further analysis would' | |||||
195 | show that the relative size of the statistical fluctuations have decreased |
|
194 | show that the relative size of the statistical fluctuations have decreased | |
196 | compared to the 10,000 digit calculation. |
|
195 | compared to the 10,000 digit calculation. | |
197 |
|
196 | |||
198 | .. image:: two_digit_counts.* |
|
197 | .. image:: ../parallel/two_digit_counts.* | |
199 |
|
198 | |||
200 |
|
199 | |||
201 | Parallel options pricing |
|
200 | Parallel options pricing | |
@@ -224,10 +223,10 b' the NumPy package and is shown here:' | |||||
224 | .. literalinclude:: ../../examples/kernel/mcpricer.py |
|
223 | .. literalinclude:: ../../examples/kernel/mcpricer.py | |
225 | :language: python |
|
224 | :language: python | |
226 |
|
225 | |||
227 |
To run this code in parallel, we will use IPython's :class:` |
|
226 | To run this code in parallel, we will use IPython's :class:`LoadBalancedView` class, | |
228 | which distributes work to the engines using dynamic load balancing. This |
|
227 | which distributes work to the engines using dynamic load balancing. This | |
229 |
|
|
228 | view is a wrapper of the :class:`Client` class shown in | |
230 |
the previous example. The parallel calculation using :class:` |
|
229 | the previous example. The parallel calculation using :class:`LoadBalancedView` can | |
231 | be found in the file :file:`mcpricer.py`. The code in this file creates a |
|
230 | be found in the file :file:`mcpricer.py`. The code in this file creates a | |
232 | :class:`TaskClient` instance and then submits a set of tasks using |
|
231 | :class:`TaskClient` instance and then submits a set of tasks using | |
233 | :meth:`TaskClient.run` that calculate the option prices for different |
|
232 | :meth:`TaskClient.run` that calculate the option prices for different | |
@@ -264,9 +263,9 b' entire calculation (10 strike prices, 10 volatilities, 100,000 paths for each)' | |||||
264 | took 30 seconds in parallel, giving a speedup of 7.7x, which is comparable |
|
263 | took 30 seconds in parallel, giving a speedup of 7.7x, which is comparable | |
265 | to the speedup observed in our previous example. |
|
264 | to the speedup observed in our previous example. | |
266 |
|
265 | |||
267 | .. image:: asian_call.* |
|
266 | .. image:: ../parallel/asian_call.* | |
268 |
|
267 | |||
269 | .. image:: asian_put.* |
|
268 | .. image:: ../parallel/asian_put.* | |
270 |
|
269 | |||
271 | Conclusion |
|
270 | Conclusion | |
272 | ========== |
|
271 | ========== | |
@@ -275,7 +274,7 b" To conclude these examples, we summarize the key features of IPython's" | |||||
275 | parallel architecture that have been demonstrated: |
|
274 | parallel architecture that have been demonstrated: | |
276 |
|
275 | |||
277 | * Serial code can be parallelized often with only a few extra lines of code. |
|
276 | * Serial code can be parallelized often with only a few extra lines of code. | |
278 |
We have used the :class:` |
|
277 | We have used the :class:`DirectView` and :class:`LoadBalancedView` classes | |
279 | for this purpose. |
|
278 | for this purpose. | |
280 | * The resulting parallel code can be run without ever leaving the IPython's |
|
279 | * The resulting parallel code can be run without ever leaving the IPython's | |
281 | interactive shell. |
|
280 | interactive shell. | |
@@ -284,3 +283,8 b' parallel architecture that have been demonstrated:' | |||||
284 | * We have run these examples on a cluster running Windows HPC Server 2008. |
|
283 | * We have run these examples on a cluster running Windows HPC Server 2008. | |
285 | IPython's built in support for the Windows HPC job scheduler makes it |
|
284 | IPython's built in support for the Windows HPC job scheduler makes it | |
286 | easy to get started with IPython's parallel capabilities. |
|
285 | easy to get started with IPython's parallel capabilities. | |
|
286 | ||||
|
287 | .. note:: | |||
|
288 | ||||
|
289 | The newparallel code has never been run on Windows HPC Server, so the last | |||
|
290 | conclusion is untested. |
@@ -7,6 +7,7 b' Using MPI with IPython' | |||||
7 | .. note:: |
|
7 | .. note:: | |
8 |
|
8 | |||
9 | Not adapted to zmq yet |
|
9 | Not adapted to zmq yet | |
|
10 | This is out of date wrt ipcluster in general as well | |||
10 |
|
11 | |||
11 | Often, a parallel algorithm will require moving data between the engines. One |
|
12 | Often, a parallel algorithm will require moving data between the engines. One | |
12 | way of accomplishing this is by doing a pull and then a push using the |
|
13 | way of accomplishing this is by doing a pull and then a push using the |
General Comments 0
You need to be logged in to leave comments.
Login now