upstream/ipython Commit - r3621:afabc88a

update parallel demos for newparallel

MinRK -

r3621:afabc88a

parent child

docs/examples/newparallel/mcdriver.py

0 created 644 +144 0

			@@ -0,0 +1,144
		1	#!/usr/bin/env python
		2	"""Run a Monte-Carlo options pricer in parallel."""
		3
		4	#-----------------------------------------------------------------------------
		5	# Imports
		6	#-----------------------------------------------------------------------------
		7
		8	import sys
		9	import time
		10	from IPython.zmq.parallel import client
		11	import numpy as np
		12	from mcpricer import price_options
		13	from matplotlib import pyplot as plt
		14
		15	#-----------------------------------------------------------------------------
		16	# Setup parameters for the run
		17	#-----------------------------------------------------------------------------
		18
		19	def ask_question(text, the_type, default):
		20	s = '%s [%r]: ' % (text, the_type(default))
		21	result = raw_input(s)
		22	if result:
		23	return the_type(result)
		24	else:
		25	return the_type(default)
		26
		27	cluster_profile = ask_question("Cluster profile", str, "default")
		28	price = ask_question("Initial price", float, 100.0)
		29	rate = ask_question("Interest rate", float, 0.05)
		30	days = ask_question("Days to expiration", int, 260)
		31	paths = ask_question("Number of MC paths", int, 10000)
		32	n_strikes = ask_question("Number of strike values", int, 5)
		33	min_strike = ask_question("Min strike price", float, 90.0)
		34	max_strike = ask_question("Max strike price", float, 110.0)
		35	n_sigmas = ask_question("Number of volatility values", int, 5)
		36	min_sigma = ask_question("Min volatility", float, 0.1)
		37	max_sigma = ask_question("Max volatility", float, 0.4)
		38
		39	strike_vals = np.linspace(min_strike, max_strike, n_strikes)
		40	sigma_vals = np.linspace(min_sigma, max_sigma, n_sigmas)
		41
		42	#-----------------------------------------------------------------------------
		43	# Setup for parallel calculation
		44	#-----------------------------------------------------------------------------
		45
		46	# The Client is used to setup the calculation and works with all
		47	# engines.
		48	c = client.Client(profile=cluster_profile)
		49
		50	# A LoadBalancedView is an interface to the engines that provides dynamic load
		51	# balancing at the expense of not knowing which engine will execute the code.
		52	view = c[None]
		53
		54	# Initialize the common code on the engines. This Python module has the
		55	# price_options function that prices the options.
		56
		57	#-----------------------------------------------------------------------------
		58	# Perform parallel calculation
		59	#-----------------------------------------------------------------------------
		60
		61	print "Running parallel calculation over strike prices and volatilities..."
		62	print "Strike prices: ", strike_vals
		63	print "Volatilities: ", sigma_vals
		64	sys.stdout.flush()
		65
		66	# Submit tasks to the TaskClient for each (strike, sigma) pair as a MapTask.
		67	t1 = time.time()
		68	async_results = []
		69	for strike in strike_vals:
		70	for sigma in sigma_vals:
		71	ar = view.apply_async(price_options, price, strike, sigma, rate, days, paths)
		72	async_results.append(ar)
		73
		74	print "Submitted tasks: ", len(async_results)
		75	sys.stdout.flush()
		76
		77	# Block until all tasks are completed.
		78	c.barrier(async_results)
		79	t2 = time.time()
		80	t = t2-t1
		81
		82	print "Parallel calculation completed, time = %s s" % t
		83	print "Collecting results..."
		84
		85	# Get the results using TaskClient.get_task_result.
		86	results = [ar.get() for ar in async_results]
		87
		88	# Assemble the result into a structured NumPy array.
		89	prices = np.empty(n_strikes*n_sigmas,
		90	dtype=[('ecall',float),('eput',float),('acall',float),('aput',float)]
		91	)
		92
		93	for i, price in enumerate(results):
		94	prices[i] = tuple(price)
		95
		96	prices.shape = (n_strikes, n_sigmas)
		97	strike_mesh, sigma_mesh = np.meshgrid(strike_vals, sigma_vals)
		98
		99	print "Results are available: strike_mesh, sigma_mesh, prices"
		100	print "To plot results type 'plot_options(sigma_mesh, strike_mesh, prices)'"
		101
		102	#-----------------------------------------------------------------------------
		103	# Utilities
		104	#-----------------------------------------------------------------------------
		105
		106	def plot_options(sigma_mesh, strike_mesh, prices):
		107	"""
		108	Make a contour plot of the option price in (sigma, strike) space.
		109	"""
		110	plt.figure(1)
		111
		112	plt.subplot(221)
		113	plt.contourf(sigma_mesh, strike_mesh, prices['ecall'])
		114	plt.axis('tight')
		115	plt.colorbar()
		116	plt.title('European Call')
		117	plt.ylabel("Strike Price")
		118
		119	plt.subplot(222)
		120	plt.contourf(sigma_mesh, strike_mesh, prices['acall'])
		121	plt.axis('tight')
		122	plt.colorbar()
		123	plt.title("Asian Call")
		124
		125	plt.subplot(223)
		126	plt.contourf(sigma_mesh, strike_mesh, prices['eput'])
		127	plt.axis('tight')
		128	plt.colorbar()
		129	plt.title("European Put")
		130	plt.xlabel("Volatility")
		131	plt.ylabel("Strike Price")
		132
		133	plt.subplot(224)
		134	plt.contourf(sigma_mesh, strike_mesh, prices['aput'])
		135	plt.axis('tight')
		136	plt.colorbar()
		137	plt.title("Asian Put")
		138	plt.xlabel("Volatility")
		139
		140
		141
		142
		143
		144

docs/examples/newparallel/mcpricer.py

0 created 644 +45 0

			@@ -0,0 +1,45
		1
		2	def price_options(S=100.0, K=100.0, sigma=0.25, r=0.05, days=260, paths=10000):
		3	"""
		4	Price European and Asian options using a Monte Carlo method.
		5
		6	Parameters
		7	----------
		8	S : float
		9	The initial price of the stock.
		10	K : float
		11	The strike price of the option.
		12	sigma : float
		13	The volatility of the stock.
		14	r : float
		15	The risk free interest rate.
		16	days : int
		17	The number of days until the option expires.
		18	paths : int
		19	The number of Monte Carlo paths used to price the option.
		20
		21	Returns
		22	-------
		23	A tuple of (E. call, E. put, A. call, A. put) option prices.
		24	"""
		25	import numpy as np
		26	from math import exp,sqrt
		27
		28	h = 1.0/days
		29	const1 = exp((r-0.5sigma2)h)
		30	const2 = sigma*sqrt(h)
		31	stock_price = S*np.ones(paths, dtype='float64')
		32	stock_price_sum = np.zeros(paths, dtype='float64')
		33	for j in range(days):
		34	growth_factor = const1np.exp(const2np.random.standard_normal(paths))
		35	stock_price = stock_price*growth_factor
		36	stock_price_sum = stock_price_sum + stock_price
		37	stock_price_avg = stock_price_sum/days
		38	zeros = np.zeros(paths, dtype='float64')
		39	r_factor = exp(-rhdays)
		40	euro_put = r_factor*np.mean(np.maximum(zeros, K-stock_price))
		41	asian_put = r_factor*np.mean(np.maximum(zeros, K-stock_price_avg))
		42	euro_call = r_factor*np.mean(np.maximum(zeros, stock_price-K))
		43	asian_call = r_factor*np.mean(np.maximum(zeros, stock_price_avg-K))
		44	return (euro_call, euro_put, asian_call, asian_put)
		45

docs/examples/newparallel/parallelpi.py

0 created 644 +63 0

			@@ -0,0 +1,63
		1	"""Calculate statistics on the digits of pi in parallel.
		2
		3	This program uses the functions in :file:`pidigits.py` to calculate
		4	the frequencies of 2 digit sequences in the digits of pi. The
		5	results are plotted using matplotlib.
		6
		7	To run, text files from http://www.super-computing.org/
		8	must be installed in the working directory of the IPython engines.
		9	The actual filenames to be used can be set with the ``filestring``
		10	variable below.
		11
		12	The dataset we have been using for this is the 200 million digit one here:
		13	ftp://pi.super-computing.org/.2/pi200m/
		14
		15	and the files used will be downloaded if they are not in the working directory
		16	of the IPython engines.
		17	"""
		18
		19	from IPython.zmq.parallel import client
		20	from matplotlib import pyplot as plt
		21	import numpy as np
		22	from pidigits import *
		23	from timeit import default_timer as clock
		24
		25	# Files with digits of pi (10m digits each)
		26	filestring = 'pi200m.ascii.%(i)02dof20'
		27	files = [filestring % {'i':i} for i in range(1,16)]
		28
		29	# Connect to the IPython cluster
		30	c = client.Client()
		31	c.run('pidigits.py')
		32
		33	# the number of engines
		34	n = len(c.ids)
		35	id0 = list(c.ids)[0]
		36	# fetch the pi-files
		37	print "downloading %i files of pi"%n
		38	c.map(fetch_pi_file, files[:n])
		39	print "done"
		40
		41	# Run 10m digits on 1 engine
		42	t1 = clock()
		43	freqs10m = c[id0].apply_sync_bound(compute_two_digit_freqs, files[0])
		44	t2 = clock()
		45	digits_per_second1 = 10.0e6/(t2-t1)
		46	print "Digits per second (1 core, 10m digits): ", digits_per_second1
		47
		48
		49	# Run n*10m digits on all engines
		50	t1 = clock()
		51	c.block=True
		52	freqs_all = c.map(compute_two_digit_freqs, files[:n])
		53	freqs150m = reduce_freqs(freqs_all)
		54	t2 = clock()
		55	digits_per_second8 = n*10.0e6/(t2-t1)
		56	print "Digits per second (%i engines, %i0m digits): "%(n,n), digits_per_second8
		57
		58	print "Speedup: ", digits_per_second8/digits_per_second1
		59
		60	plot_two_digit_freqs(freqs150m)
		61	plt.title("2 digit sequences in %i0m digits of pi"%n)
		62	plt.show()
		63

docs/examples/newparallel/pidigits.py

0 created 644 +159 0

			@@ -0,0 +1,159
		1	"""Compute statistics on the digits of pi.
		2
		3	This uses precomputed digits of pi from the website
		4	of Professor Yasumasa Kanada at the University of
		5	Tokoyo: http://www.super-computing.org/
		6
		7	Currently, there are only functions to read the
		8	.txt (non-compressed, non-binary) files, but adding
		9	support for compression and binary files would be
		10	straightforward.
		11
		12	This focuses on computing the number of times that
		13	all 1, 2, n digits sequences occur in the digits of pi.
		14	If the digits of pi are truly random, these frequencies
		15	should be equal.
		16	"""
		17
		18	# Import statements
		19	from __future__ import division, with_statement
		20
		21	import os
		22	import urllib
		23
		24	import numpy as np
		25	from matplotlib import pyplot as plt
		26
		27	# Top-level functions
		28
		29	def fetch_pi_file(filename):
		30	"""This will download a segment of pi from super-computing.org
		31	if the file is not already present.
		32	"""
		33	ftpdir="ftp://pi.super-computing.org/.2/pi200m/"
		34	if os.path.exists(filename):
		35	# we already have it
		36	return
		37	else:
		38	# download it
		39	urllib.urlretrieve(ftpdir+filename,filename)
		40
		41	def compute_one_digit_freqs(filename):
		42	"""
		43	Read digits of pi from a file and compute the 1 digit frequencies.
		44	"""
		45	d = txt_file_to_digits(filename)
		46	freqs = one_digit_freqs(d)
		47	return freqs
		48
		49	def compute_two_digit_freqs(filename):
		50	"""
		51	Read digits of pi from a file and compute the 2 digit frequencies.
		52	"""
		53	d = txt_file_to_digits(filename)
		54	freqs = two_digit_freqs(d)
		55	return freqs
		56
		57	def reduce_freqs(freqlist):
		58	"""
		59	Add up a list of freq counts to get the total counts.
		60	"""
		61	allfreqs = np.zeros_like(freqlist[0])
		62	for f in freqlist:
		63	allfreqs += f
		64	return allfreqs
		65
		66	def compute_n_digit_freqs(filename, n):
		67	"""
		68	Read digits of pi from a file and compute the n digit frequencies.
		69	"""
		70	d = txt_file_to_digits(filename)
		71	freqs = n_digit_freqs(d, n)
		72	return freqs
		73
		74	# Read digits from a txt file
		75
		76	def txt_file_to_digits(filename, the_type=str):
		77	"""
		78	Yield the digits of pi read from a .txt file.
		79	"""
		80	with open(filename, 'r') as f:
		81	for line in f.readlines():
		82	for c in line:
		83	if c != '\n' and c!= ' ':
		84	yield the_type(c)
		85
		86	# Actual counting functions
		87
		88	def one_digit_freqs(digits, normalize=False):
		89	"""
		90	Consume digits of pi and compute 1 digit freq. counts.
		91	"""
		92	freqs = np.zeros(10, dtype='i4')
		93	for d in digits:
		94	freqs[int(d)] += 1
		95	if normalize:
		96	freqs = freqs/freqs.sum()
		97	return freqs
		98
		99	def two_digit_freqs(digits, normalize=False):
		100	"""
		101	Consume digits of pi and compute 2 digits freq. counts.
		102	"""
		103	freqs = np.zeros(100, dtype='i4')
		104	last = digits.next()
		105	this = digits.next()
		106	for d in digits:
		107	index = int(last + this)
		108	freqs[index] += 1
		109	last = this
		110	this = d
		111	if normalize:
		112	freqs = freqs/freqs.sum()
		113	return freqs
		114
		115	def n_digit_freqs(digits, n, normalize=False):
		116	"""
		117	Consume digits of pi and compute n digits freq. counts.
		118
		119	This should only be used for 1-6 digits.
		120	"""
		121	freqs = np.zeros(pow(10,n), dtype='i4')
		122	current = np.zeros(n, dtype=int)
		123	for i in range(n):
		124	current[i] = digits.next()
		125	for d in digits:
		126	index = int(''.join(map(str, current)))
		127	freqs[index] += 1
		128	current[0:-1] = current[1:]
		129	current[-1] = d
		130	if normalize:
		131	freqs = freqs/freqs.sum()
		132	return freqs
		133
		134	# Plotting functions
		135
		136	def plot_two_digit_freqs(f2):
		137	"""
		138	Plot two digits frequency counts using matplotlib.
		139	"""
		140	f2_copy = f2.copy()
		141	f2_copy.shape = (10,10)
		142	ax = plt.matshow(f2_copy)
		143	plt.colorbar()
		144	for i in range(10):
		145	for j in range(10):
		146	plt.text(i-0.2, j+0.2, str(j)+str(i))
		147	plt.ylabel('First digit')
		148	plt.xlabel('Second digit')
		149	return ax
		150
		151	def plot_one_digit_freqs(f1):
		152	"""
		153	Plot one digit frequency counts using matplotlib.
		154	"""
		155	ax = plt.plot(f1,'bo-')
		156	plt.title('Single digit counts in pi')
		157	plt.xlabel('Digit')
		158	plt.ylabel('Count')
		159	return ax

docs/source/parallelz/parallel_demos.txt

0 +53 -49

              =================
              Parallel examples
              =================
              .. note::
-                 Not adapted to zmq yet
+                 Performance numbers from ``IPython.kernel``, not newparallel
              In this section we describe two more involved examples of using an IPython
              cluster to perform a parallel computation. In these examples, we will be using
              IPython's "pylab" mode, which enables interactive plotting using the
              Matplotlib package. IPython can be started in this mode by typing::
-                 ipython -p pylab
+                 ipython --pylab
              at the system command line. If this prints an error message, you will
              need to install the default profiles from within IPython by doing,
              .. sourcecode:: ipython
                  In [1]: %install_profiles
              and then restarting IPython.
 million digits of pi
              ========================
              In this example we would like to study the distribution of digits in the
              number pi (in base 10). While it is not known if pi is a normal number (a
              number is normal in base 10 if 0-9 occur with equal likelihood) numerical
              investigations suggest that it is. We will begin with a serial calculation on
 ,000 digits of pi and then perform a parallel calculation involving 150
              million digits.
              In both the serial and parallel calculation we will be using functions defined
              in the :file:`pidigits.py` file, which is available in the
              :file:`docs/examples/kernel` directory of the IPython source distribution.
              These functions provide basic facilities for working with the digits of pi and
              can be loaded into IPython by putting :file:`pidigits.py` in your current
              working directory and then doing:
              .. sourcecode:: ipython
                  In [1]: run pidigits.py
              Serial calculation
              ------------------
              For the serial calculation, we will use SymPy (http://www.sympy.org) to
              calculate 10,000 digits of pi and then look at the frequencies of the digits
 -9. Out of 10,000 digits, we expect each digit to occur 1,000 times. While
              SymPy is capable of calculating many more digits of pi, our purpose here is to
              set the stage for the much larger parallel calculation.
              In this example, we use two functions from :file:`pidigits.py`:
              :func:`one_digit_freqs` (which calculates how many times each digit occurs)
              and :func:`plot_one_digit_freqs` (which uses Matplotlib to plot the result).
              Here is an interactive IPython session that uses these functions with
              SymPy:
              .. sourcecode:: ipython
                  In [7]: import sympy
                  In [8]: pi = sympy.pi.evalf(40)
                  In [9]: pi
                  Out[9]: 3.141592653589793238462643383279502884197
                  In [10]: pi = sympy.pi.evalf(10000)
                  In [11]: digits = (d for d in str(pi)[2:])  # create a sequence of digits
                  In [12]: run pidigits.py  # load one_digit_freqs/plot_one_digit_freqs
                  In [13]: freqs = one_digit_freqs(digits)
                  In [14]: plot_one_digit_freqs(freqs)
                  Out[14]: [<matplotlib.lines.Line2D object at 0x18a55290>]
              The resulting plot of the single digit counts shows that each digit occurs
              approximately 1,000 times, but that with only 10,000 digits the
              statistical fluctuations are still rather large:
-             .. image:: single_digits.*
+             .. image:: ../parallel/single_digits.*
              It is clear that to reduce the relative fluctuations in the counts, we need
              to look at many more digits of pi. That brings us to the parallel calculation.
              Parallel calculation
              --------------------
              Calculating many digits of pi is a challenging computational problem in itself.
              Because we want to focus on the distribution of digits in this example, we
              will use pre-computed digit of pi from the website of Professor Yasumasa
-             Kanada at the University of Tokoyo (http://www.super-computing.org). These
+             Kanada at the University of Tokyo (http://www.super-computing.org). These
              digits come in a set of text files (ftp://pi.super-computing.org/.2/pi200m/)
              that each have 10 million digits of pi.
              For the parallel calculation, we have copied these files to the local hard
              drives of the compute nodes. A total of 15 of these files will be used, for a
              total of 150 million digits of pi. To make things a little more interesting we
              will calculate the frequencies of all 2 digits sequences (00-99) and then plot
              the result using a 2D matrix in Matplotlib.
              The overall idea of the calculation is simple: each IPython engine will
              compute the two digit counts for the digits in a single file. Then in a final
              step the counts from each engine will be added up. To perform this
              calculation, we will need two top-level functions from :file:`pidigits.py`:
-             .. literalinclude:: ../../examples/kernel/pidigits.py
+             .. literalinclude:: ../../examples/newparallel/pidigits.py
                 :language: python
                 :lines: 34-49
              We will also use the :func:`plot_two_digit_freqs` function to plot the
              results. The code to run this calculation in parallel is contained in
-             :file:`docs/examples/kernel/parallelpi.py`. This code can be run in parallel
+             :file:`docs/examples/newparallel/parallelpi.py`. This code can be run in parallel
              using IPython by following these steps:
-. Copy the text files with the digits of pi
-                (ftp://pi.super-computing.org/.2/pi200m/) to the working directory of the
-                engines on the compute nodes.
-. Use :command:`ipclusterz` to start 15 engines. We used an 8 core (2 quad
+. Use :command:`ipclusterz` to start 15 engines. We used an 8 core (2 quad
                 core CPUs) cluster with hyperthreading enabled which makes the 8 cores
                 looks like 16 (1 controller + 15 engines) in the OS. However, the maximum
                 speedup we can observe is still only 8x.
-. With the file :file:`parallelpi.py` in your current working directory, open
-                up IPython in pylab mode and type ``run parallelpi.py``.
+. With the file :file:`parallelpi.py` in your current working directory, open
+                up IPython in pylab mode and type ``run parallelpi.py``.  This will download
+                the pi files via ftp the first time you run it, if they are not
+                present in the Engines' working directory.
              When run on our 8 core cluster, we observe a speedup of 7.7x. This is slightly
              less than linear scaling (8x) because the controller is also running on one of
              the cores.
              To emphasize the interactive nature of IPython, we now show how the
              calculation can also be run by simply typing the commands from
              :file:`parallelpi.py` interactively into IPython:
              .. sourcecode:: ipython
                  In [1]: from IPython.zmq.parallel import client
--11-19 11:32:38-0800 [-] Log opened.
-                 # The MultiEngineClient allows us to use the engines interactively.
-                 # We simply pass MultiEngineClient the name of the cluster profile we
+                 # The Client allows us to use the engines interactively.
+                 # We simply pass Client the name of the cluster profile we
                  # are using.
                  In [2]: c = client.Client(profile='mycluster')
--11-19 11:32:44-0800 [-] Connecting [0]
--11-19 11:32:44-0800 [Negotiation,client] Connected: ./ipcontroller-mec.furl
-                 In [3]: mec.get_ids()
+                 In [3]: c.ids
                  Out[3]: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
                  In [4]: run pidigits.py
-                 In [5]: filestring = 'pi200m-ascii-%(i)02dof20.txt'
+                 In [5]: filestring = 'pi200m.ascii.%(i)02dof20'
                  # Create the list of files to process.
                  In [6]: files = [filestring % {'i':i} for i in range(1,16)]
                  In [7]: files
                  Out[7]:
-                 ['pi200m-ascii-01of20.txt',
-                  'pi200m-ascii-02of20.txt',
-                  'pi200m-ascii-03of20.txt',
-                  'pi200m-ascii-04of20.txt',
-                  'pi200m-ascii-05of20.txt',
-                  'pi200m-ascii-06of20.txt',
-                  'pi200m-ascii-07of20.txt',
-                  'pi200m-ascii-08of20.txt',
-                  'pi200m-ascii-09of20.txt',
-                  'pi200m-ascii-10of20.txt',
-                  'pi200m-ascii-11of20.txt',
-                  'pi200m-ascii-12of20.txt',
-                  'pi200m-ascii-13of20.txt',
-                  'pi200m-ascii-14of20.txt',
-                  'pi200m-ascii-15of20.txt']
-                 # This is the parallel calculation using the MultiEngineClient.map method
+                 ['pi200m.ascii.01of20',
+                  'pi200m.ascii.02of20',
+                  'pi200m.ascii.03of20',
+                  'pi200m.ascii.04of20',
+                  'pi200m.ascii.05of20',
+                  'pi200m.ascii.06of20',
+                  'pi200m.ascii.07of20',
+                  'pi200m.ascii.08of20',
+                  'pi200m.ascii.09of20',
+                  'pi200m.ascii.10of20',
+                  'pi200m.ascii.11of20',
+                  'pi200m.ascii.12of20',
+                  'pi200m.ascii.13of20',
+                  'pi200m.ascii.14of20',
+                  'pi200m.ascii.15of20']
+                 # download the data files if they don't already exist:
+                 In [8]: c.map(fetch_pi_file, files)
+                 # This is the parallel calculation using the Client.map method
                  # which applies compute_two_digit_freqs to each file in files in parallel.
-                 In [8]: freqs_all = mec.map(compute_two_digit_freqs, files)
+                 In [9]: freqs_all = c.map(compute_two_digit_freqs, files)
                  # Add up the frequencies from each engine.
-                 In [8]: freqs = reduce_freqs(freqs_all)
+                 In [10]: freqs = reduce_freqs(freqs_all)
-                 In [9]: plot_two_digit_freqs(freqs)
-                 Out[9]: <matplotlib.image.AxesImage object at 0x18beb110>
+                 In [11]: plot_two_digit_freqs(freqs)
+                 Out[11]: <matplotlib.image.AxesImage object at 0x18beb110>
-                 In [10]: plt.title('2 digit counts of 150m digits of pi')
-                 Out[10]: <matplotlib.text.Text object at 0x18d1f9b0>
+                 In [12]: plt.title('2 digit counts of 150m digits of pi')
+                 Out[12]: <matplotlib.text.Text object at 0x18d1f9b0>
              The resulting plot generated by Matplotlib is shown below. The colors indicate
              which two digit sequences are more (red) or less (blue) likely to occur in the
              first 150 million digits of pi. We clearly see that the sequence "41" is
              most likely and that "06" and "07" are least likely. Further analysis would
              show that the relative size of the statistical fluctuations have decreased
              compared to the 10,000 digit calculation.
-             .. image:: two_digit_counts.*
+             .. image:: ../parallel/two_digit_counts.*
              Parallel options pricing
              ========================
              An option is a financial contract that gives the buyer of the contract the
              right to buy (a "call") or sell (a "put") a secondary asset (a stock for
              example) at a particular date in the future (the expiration date) for a
              pre-agreed upon price (the strike price). For this right, the buyer pays the
              seller a premium (the option price). There are a wide variety of flavors of
              options (American, European, Asian, etc.) that are useful for different
              purposes: hedging against risk, speculation, etc.
              Much of modern finance is driven by the need to price these contracts
              accurately based on what is known about the properties (such as volatility) of
              the underlying asset. One method of pricing options is to use a Monte Carlo
              simulation of the underlying asset price. In this example we use this approach
              to price both European and Asian (path dependent) options for various strike
              prices and volatilities.
              The code for this example can be found in the :file:`docs/examples/kernel`
              directory of the IPython source. The function :func:`price_options` in
              :file:`mcpricer.py` implements the basic Monte Carlo pricing algorithm using
              the NumPy package and is shown here:
              .. literalinclude:: ../../examples/kernel/mcpricer.py
                 :language: python
-             To run this code in parallel, we will use IPython's :class:`TaskClient` class,
+             To run this code in parallel, we will use IPython's :class:`LoadBalancedView` class,
              which distributes work to the engines using dynamic load balancing. This
-             client can be used along side the :class:`MultiEngineClient` class shown in
-             the previous example. The parallel calculation using :class:`TaskClient` can
+             view is a wrapper of the :class:`Client` class shown in
+             the previous example. The parallel calculation using :class:`LoadBalancedView` can
              be found in the file :file:`mcpricer.py`. The code in this file creates a
              :class:`TaskClient` instance and then submits a set of tasks using
              :meth:`TaskClient.run` that calculate the option prices for different
              volatilities and strike prices. The results are then plotted as a 2D contour
              plot using Matplotlib.
              .. literalinclude:: ../../examples/kernel/mcdriver.py
                 :language: python
              To use this code, start an IPython cluster using :command:`ipclusterz`, open
              IPython in the pylab mode with the file :file:`mcdriver.py` in your current
              working directory and then type:
              .. sourcecode:: ipython
                  In [7]: run mcdriver.py
                  Submitted tasks:  [0, 1, 2, ...]
              Once all the tasks have finished, the results can be plotted using the
              :func:`plot_options` function. Here we make contour plots of the Asian
              call and Asian put options as function of the volatility and strike price:
              .. sourcecode:: ipython
                  In [8]: plot_options(sigma_vals, K_vals, prices['acall'])
                  In [9]: plt.figure()
                  Out[9]: <matplotlib.figure.Figure object at 0x18c178d0>
                  In [10]: plot_options(sigma_vals, K_vals, prices['aput'])
              These results are shown in the two figures below. On a 8 core cluster the
              entire calculation (10 strike prices, 10 volatilities, 100,000 paths for each)
              took 30 seconds in parallel, giving a speedup of 7.7x, which is comparable
              to the speedup observed in our previous example.
-             .. image:: asian_call.*
+             .. image:: ../parallel/asian_call.*
-             .. image:: asian_put.*
+             .. image:: ../parallel/asian_put.*
              Conclusion
              ==========
              To conclude these examples, we summarize the key features of IPython's
              parallel architecture that have been demonstrated:
              * Serial code can be parallelized often with only a few extra lines of code.
-               We have used the :class:`MultiEngineClient` and :class:`TaskClient` classes
+               We have used the :class:`DirectView` and :class:`LoadBalancedView` classes
                for this purpose.
              * The resulting parallel code can be run without ever leaving the IPython's
                interactive shell.
              * Any data computed in parallel can be explored interactively through
                visualization or further numerical calculations.
              * We have run these examples on a cluster running Windows HPC Server 2008.
                IPython's built in support for the Windows HPC job scheduler makes it
                easy to get started with IPython's parallel capabilities.
+             .. note::
+                 The newparallel code has never been run on Windows HPC Server, so the last
+                 conclusion is untested.

docs/source/parallelz/parallel_mpi.txt

0 +1 0

              .. _parallelmpi:
              =======================
              Using MPI with IPython
              =======================
              .. note::
                  Not adapted to zmq yet
+                 This is out of date wrt ipcluster in general as well
              Often, a parallel algorithm will require moving data between the engines. One
              way of accomplishing this is by doing a pull and then a push using the
              multiengine client. However, this will be slow as all the data has to go
              through the controller to the client and then back through the controller, to
              its final destination.
              A much better way of moving data between engines is to use a message passing
              library, such as the Message Passing Interface (MPI) [MPI]_. IPython's
              parallel computing architecture has been designed from the ground up to
              integrate with MPI. This document describes how to use MPI with IPython.
              Additional installation requirements
              ====================================
              If you want to use MPI with IPython, you will need to install:
              * A standard MPI implementation such as OpenMPI [OpenMPI]_ or MPICH.
              * The mpi4py [mpi4py]_ package.
              .. note::
                  The mpi4py package is not a strict requirement. However, you need to
                  have *some* way of calling MPI from Python. You also need some way of
                  making sure that :func:`MPI_Init` is called when the IPython engines start
                  up. There are a number of ways of doing this and a good number of
                  associated subtleties. We highly recommend just using mpi4py as it
                  takes care of most of these problems. If you want to do something
                  different, let us know and we can help you get started.
              Starting the engines with MPI enabled
              =====================================
              To use code that calls MPI, there are typically two things that MPI requires.
 . The process that wants to call MPI must be started using
                 :command:`mpiexec` or a batch system (like PBS) that has MPI support.
 . Once the process starts, it must call :func:`MPI_Init`.
              There are a couple of ways that you can start the IPython engines and get
              these things to happen.
              Automatic starting using :command:`mpiexec` and :command:`ipclusterz`
              --------------------------------------------------------------------
              The easiest approach is to use the `mpiexec` mode of :command:`ipclusterz`,
              which will first start a controller and then a set of engines using
              :command:`mpiexec`::
                  $ ipclusterz mpiexec -n 4
              This approach is best as interrupting :command:`ipclusterz` will automatically
              stop and clean up the controller and engines.
              Manual starting using :command:`mpiexec`
              ----------------------------------------
              If you want to start the IPython engines using the :command:`mpiexec`, just
              do::
                  $ mpiexec -n 4 ipengine --mpi=mpi4py
              This requires that you already have a controller running and that the FURL
              files for the engines are in place. We also have built in support for
              PyTrilinos [PyTrilinos]_, which can be used (assuming is installed) by
              starting the engines with::
                  	mpiexec -n 4 ipengine --mpi=pytrilinos
              Automatic starting using PBS and :command:`ipclusterz`
              -----------------------------------------------------
              The :command:`ipclusterz` command also has built-in integration with PBS. For
              more information on this approach, see our documentation on :ref:`ipclusterz
              <parallel_process>`.
              Actually using MPI
              ==================
              Once the engines are running with MPI enabled, you are ready to go. You can
              now call any code that uses MPI in the IPython engines. And, all of this can
              be done interactively. Here we show a simple example that uses mpi4py
              [mpi4py]_ version 1.1.0 or later.
              First, lets define a simply function that uses MPI to calculate the sum of a
              distributed array. Save the following text in a file called :file:`psum.py`:
              .. sourcecode:: python
                  from mpi4py import MPI
                  import numpy as np
                  def psum(a):
                      s = np.sum(a)
                      rcvBuf = np.array(0.0,'d')
                      MPI.COMM_WORLD.Allreduce([s, MPI.DOUBLE],
                          [rcvBuf, MPI.DOUBLE],
                          op=MPI.SUM)
                      return rcvBuf
              Now, start an IPython cluster in the same directory as :file:`psum.py`::
                  $ ipclusterz mpiexec -n 4
              Finally, connect to the cluster and use this function interactively. In this
              case, we create a random array on each engine and sum up all the random arrays
              using our :func:`psum` function:
              .. sourcecode:: ipython
                  In [1]: from IPython.zmq.parallel import client
                  In [2]: c = client.Client()
                  In [3]: mec.activate()
                  In [4]: px import numpy as np
                  Parallel execution on engines: all
                  Out[4]:
                  <Results List>
                  [0] In [13]: import numpy as np
                  [1] In [13]: import numpy as np
                  [2] In [13]: import numpy as np
                  [3] In [13]: import numpy as np
                  In [6]: px a = np.random.rand(100)
                  Parallel execution on engines: all
                  Out[6]:
                  <Results List>
                  [0] In [15]: a = np.random.rand(100)
                  [1] In [15]: a = np.random.rand(100)
                  [2] In [15]: a = np.random.rand(100)
                  [3] In [15]: a = np.random.rand(100)
                  In [7]: px from psum import psum
                  Parallel execution on engines: all
                  Out[7]:
                  <Results List>
                  [0] In [16]: from psum import psum
                  [1] In [16]: from psum import psum
                  [2] In [16]: from psum import psum
                  [3] In [16]: from psum import psum
                  In [8]: px s = psum(a)
                  Parallel execution on engines: all
                  Out[8]:
                  <Results List>
                  [0] In [17]: s = psum(a)
                  [1] In [17]: s = psum(a)
                  [2] In [17]: s = psum(a)
                  [3] In [17]: s = psum(a)
                  In [9]: px print s
                  Parallel execution on engines: all
                  Out[9]:
                  <Results List>
                  [0] In [18]: print s
                  [0] Out[18]: 187.451545803
                  [1] In [18]: print s
                  [1] Out[18]: 187.451545803
                  [2] In [18]: print s
                  [2] Out[18]: 187.451545803
                  [3] In [18]: print s
                  [3] Out[18]: 187.451545803
              Any Python code that makes calls to MPI can be used in this manner, including
              compiled C, C++ and Fortran libraries that have been exposed to Python.
              .. [MPI] Message Passing Interface.  http://www-unix.mcs.anl.gov/mpi/
              .. [mpi4py] MPI for Python. mpi4py: http://mpi4py.scipy.org/
              .. [OpenMPI] Open MPI. http://www.open-mpi.org/
              .. [PyTrilinos] PyTrilinos. http://trilinos.sandia.gov/packages/pytrilinos/

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages