upstream/ipython Commit - r3621:afabc88a

update parallel demos for newparallel

MinRK -

r3621:afabc88a

parent child

docs/examples/newparallel/mcdriver.py

0 created 644 +144 0

@@ -0,0 +1,144 b''
	1	#!/usr/bin/env python
	2	"""Run a Monte-Carlo options pricer in parallel."""
	3
	4	#-----------------------------------------------------------------------------
	5	# Imports
	6	#-----------------------------------------------------------------------------
	7
	8	import sys
	9	import time
	10	from IPython.zmq.parallel import client
	11	import numpy as np
	12	from mcpricer import price_options
	13	from matplotlib import pyplot as plt
	14
	15	#-----------------------------------------------------------------------------
	16	# Setup parameters for the run
	17	#-----------------------------------------------------------------------------
	18
	19	def ask_question(text, the_type, default):
	20	s = '%s [%r]: ' % (text, the_type(default))
	21	result = raw_input(s)
	22	if result:
	23	return the_type(result)
	24	else:
	25	return the_type(default)
	26
	27	cluster_profile = ask_question("Cluster profile", str, "default")
	28	price = ask_question("Initial price", float, 100.0)
	29	rate = ask_question("Interest rate", float, 0.05)
	30	days = ask_question("Days to expiration", int, 260)
	31	paths = ask_question("Number of MC paths", int, 10000)
	32	n_strikes = ask_question("Number of strike values", int, 5)
	33	min_strike = ask_question("Min strike price", float, 90.0)
	34	max_strike = ask_question("Max strike price", float, 110.0)
	35	n_sigmas = ask_question("Number of volatility values", int, 5)
	36	min_sigma = ask_question("Min volatility", float, 0.1)
	37	max_sigma = ask_question("Max volatility", float, 0.4)
	38
	39	strike_vals = np.linspace(min_strike, max_strike, n_strikes)
	40	sigma_vals = np.linspace(min_sigma, max_sigma, n_sigmas)
	41
	42	#-----------------------------------------------------------------------------
	43	# Setup for parallel calculation
	44	#-----------------------------------------------------------------------------
	45
	46	# The Client is used to setup the calculation and works with all
	47	# engines.
	48	c = client.Client(profile=cluster_profile)
	49
	50	# A LoadBalancedView is an interface to the engines that provides dynamic load
	51	# balancing at the expense of not knowing which engine will execute the code.
	52	view = c[None]
	53
	54	# Initialize the common code on the engines. This Python module has the
	55	# price_options function that prices the options.
	56
	57	#-----------------------------------------------------------------------------
	58	# Perform parallel calculation
	59	#-----------------------------------------------------------------------------
	60
	61	print "Running parallel calculation over strike prices and volatilities..."
	62	print "Strike prices: ", strike_vals
	63	print "Volatilities: ", sigma_vals
	64	sys.stdout.flush()
	65
	66	# Submit tasks to the TaskClient for each (strike, sigma) pair as a MapTask.
	67	t1 = time.time()
	68	async_results = []
	69	for strike in strike_vals:
	70	for sigma in sigma_vals:
	71	ar = view.apply_async(price_options, price, strike, sigma, rate, days, paths)
	72	async_results.append(ar)
	73
	74	print "Submitted tasks: ", len(async_results)
	75	sys.stdout.flush()
	76
	77	# Block until all tasks are completed.
	78	c.barrier(async_results)
	79	t2 = time.time()
	80	t = t2-t1
	81
	82	print "Parallel calculation completed, time = %s s" % t
	83	print "Collecting results..."
	84
	85	# Get the results using TaskClient.get_task_result.
	86	results = [ar.get() for ar in async_results]
	87
	88	# Assemble the result into a structured NumPy array.
	89	prices = np.empty(n_strikes*n_sigmas,
	90	dtype=[('ecall',float),('eput',float),('acall',float),('aput',float)]
	91	)
	92
	93	for i, price in enumerate(results):
	94	prices[i] = tuple(price)
	95
	96	prices.shape = (n_strikes, n_sigmas)
	97	strike_mesh, sigma_mesh = np.meshgrid(strike_vals, sigma_vals)
	98
	99	print "Results are available: strike_mesh, sigma_mesh, prices"
	100	print "To plot results type 'plot_options(sigma_mesh, strike_mesh, prices)'"
	101
	102	#-----------------------------------------------------------------------------
	103	# Utilities
	104	#-----------------------------------------------------------------------------
	105
	106	def plot_options(sigma_mesh, strike_mesh, prices):
	107	"""
	108	Make a contour plot of the option price in (sigma, strike) space.
	109	"""
	110	plt.figure(1)
	111
	112	plt.subplot(221)
	113	plt.contourf(sigma_mesh, strike_mesh, prices['ecall'])
	114	plt.axis('tight')
	115	plt.colorbar()
	116	plt.title('European Call')
	117	plt.ylabel("Strike Price")
	118
	119	plt.subplot(222)
	120	plt.contourf(sigma_mesh, strike_mesh, prices['acall'])
	121	plt.axis('tight')
	122	plt.colorbar()
	123	plt.title("Asian Call")
	124
	125	plt.subplot(223)
	126	plt.contourf(sigma_mesh, strike_mesh, prices['eput'])
	127	plt.axis('tight')
	128	plt.colorbar()
	129	plt.title("European Put")
	130	plt.xlabel("Volatility")
	131	plt.ylabel("Strike Price")
	132
	133	plt.subplot(224)
	134	plt.contourf(sigma_mesh, strike_mesh, prices['aput'])
	135	plt.axis('tight')
	136	plt.colorbar()
	137	plt.title("Asian Put")
	138	plt.xlabel("Volatility")
	139
	140
	141
	142
	143
	144

docs/examples/newparallel/mcpricer.py

0 created 644 +45 0

@@ -0,0 +1,45 b''
	1
	2	def price_options(S=100.0, K=100.0, sigma=0.25, r=0.05, days=260, paths=10000):
	3	"""
	4	Price European and Asian options using a Monte Carlo method.
	5
	6	Parameters
	7	----------
	8	S : float
	9	The initial price of the stock.
	10	K : float
	11	The strike price of the option.
	12	sigma : float
	13	The volatility of the stock.
	14	r : float
	15	The risk free interest rate.
	16	days : int
	17	The number of days until the option expires.
	18	paths : int
	19	The number of Monte Carlo paths used to price the option.
	20
	21	Returns
	22	-------
	23	A tuple of (E. call, E. put, A. call, A. put) option prices.
	24	"""
	25	import numpy as np
	26	from math import exp,sqrt
	27
	28	h = 1.0/days
	29	const1 = exp((r-0.5sigma2)h)
	30	const2 = sigma*sqrt(h)
	31	stock_price = S*np.ones(paths, dtype='float64')
	32	stock_price_sum = np.zeros(paths, dtype='float64')
	33	for j in range(days):
	34	growth_factor = const1np.exp(const2np.random.standard_normal(paths))
	35	stock_price = stock_price*growth_factor
	36	stock_price_sum = stock_price_sum + stock_price
	37	stock_price_avg = stock_price_sum/days
	38	zeros = np.zeros(paths, dtype='float64')
	39	r_factor = exp(-rhdays)
	40	euro_put = r_factor*np.mean(np.maximum(zeros, K-stock_price))
	41	asian_put = r_factor*np.mean(np.maximum(zeros, K-stock_price_avg))
	42	euro_call = r_factor*np.mean(np.maximum(zeros, stock_price-K))
	43	asian_call = r_factor*np.mean(np.maximum(zeros, stock_price_avg-K))
	44	return (euro_call, euro_put, asian_call, asian_put)
	45

docs/examples/newparallel/parallelpi.py

0 created 644 +63 0

@@ -0,0 +1,63 b''
	1	"""Calculate statistics on the digits of pi in parallel.
	2
	3	This program uses the functions in :file:`pidigits.py` to calculate
	4	the frequencies of 2 digit sequences in the digits of pi. The
	5	results are plotted using matplotlib.
	6
	7	To run, text files from http://www.super-computing.org/
	8	must be installed in the working directory of the IPython engines.
	9	The actual filenames to be used can be set with the ``filestring``
	10	variable below.
	11
	12	The dataset we have been using for this is the 200 million digit one here:
	13	ftp://pi.super-computing.org/.2/pi200m/
	14
	15	and the files used will be downloaded if they are not in the working directory
	16	of the IPython engines.
	17	"""
	18
	19	from IPython.zmq.parallel import client
	20	from matplotlib import pyplot as plt
	21	import numpy as np
	22	from pidigits import *
	23	from timeit import default_timer as clock
	24
	25	# Files with digits of pi (10m digits each)
	26	filestring = 'pi200m.ascii.%(i)02dof20'
	27	files = [filestring % {'i':i} for i in range(1,16)]
	28
	29	# Connect to the IPython cluster
	30	c = client.Client()
	31	c.run('pidigits.py')
	32
	33	# the number of engines
	34	n = len(c.ids)
	35	id0 = list(c.ids)[0]
	36	# fetch the pi-files
	37	print "downloading %i files of pi"%n
	38	c.map(fetch_pi_file, files[:n])
	39	print "done"
	40
	41	# Run 10m digits on 1 engine
	42	t1 = clock()
	43	freqs10m = c[id0].apply_sync_bound(compute_two_digit_freqs, files[0])
	44	t2 = clock()
	45	digits_per_second1 = 10.0e6/(t2-t1)
	46	print "Digits per second (1 core, 10m digits): ", digits_per_second1
	47
	48
	49	# Run n*10m digits on all engines
	50	t1 = clock()
	51	c.block=True
	52	freqs_all = c.map(compute_two_digit_freqs, files[:n])
	53	freqs150m = reduce_freqs(freqs_all)
	54	t2 = clock()
	55	digits_per_second8 = n*10.0e6/(t2-t1)
	56	print "Digits per second (%i engines, %i0m digits): "%(n,n), digits_per_second8
	57
	58	print "Speedup: ", digits_per_second8/digits_per_second1
	59
	60	plot_two_digit_freqs(freqs150m)
	61	plt.title("2 digit sequences in %i0m digits of pi"%n)
	62	plt.show()
	63

docs/examples/newparallel/pidigits.py

0 created 644 +159 0

@@ -0,0 +1,159 b''
	1	"""Compute statistics on the digits of pi.
	2
	3	This uses precomputed digits of pi from the website
	4	of Professor Yasumasa Kanada at the University of
	5	Tokoyo: http://www.super-computing.org/
	6
	7	Currently, there are only functions to read the
	8	.txt (non-compressed, non-binary) files, but adding
	9	support for compression and binary files would be
	10	straightforward.
	11
	12	This focuses on computing the number of times that
	13	all 1, 2, n digits sequences occur in the digits of pi.
	14	If the digits of pi are truly random, these frequencies
	15	should be equal.
	16	"""
	17
	18	# Import statements
	19	from __future__ import division, with_statement
	20
	21	import os
	22	import urllib
	23
	24	import numpy as np
	25	from matplotlib import pyplot as plt
	26
	27	# Top-level functions
	28
	29	def fetch_pi_file(filename):
	30	"""This will download a segment of pi from super-computing.org
	31	if the file is not already present.
	32	"""
	33	ftpdir="ftp://pi.super-computing.org/.2/pi200m/"
	34	if os.path.exists(filename):
	35	# we already have it
	36	return
	37	else:
	38	# download it
	39	urllib.urlretrieve(ftpdir+filename,filename)
	40
	41	def compute_one_digit_freqs(filename):
	42	"""
	43	Read digits of pi from a file and compute the 1 digit frequencies.
	44	"""
	45	d = txt_file_to_digits(filename)
	46	freqs = one_digit_freqs(d)
	47	return freqs
	48
	49	def compute_two_digit_freqs(filename):
	50	"""
	51	Read digits of pi from a file and compute the 2 digit frequencies.
	52	"""
	53	d = txt_file_to_digits(filename)
	54	freqs = two_digit_freqs(d)
	55	return freqs
	56
	57	def reduce_freqs(freqlist):
	58	"""
	59	Add up a list of freq counts to get the total counts.
	60	"""
	61	allfreqs = np.zeros_like(freqlist[0])
	62	for f in freqlist:
	63	allfreqs += f
	64	return allfreqs
	65
	66	def compute_n_digit_freqs(filename, n):
	67	"""
	68	Read digits of pi from a file and compute the n digit frequencies.
	69	"""
	70	d = txt_file_to_digits(filename)
	71	freqs = n_digit_freqs(d, n)
	72	return freqs
	73
	74	# Read digits from a txt file
	75
	76	def txt_file_to_digits(filename, the_type=str):
	77	"""
	78	Yield the digits of pi read from a .txt file.
	79	"""
	80	with open(filename, 'r') as f:
	81	for line in f.readlines():
	82	for c in line:
	83	if c != '\n' and c!= ' ':
	84	yield the_type(c)
	85
	86	# Actual counting functions
	87
	88	def one_digit_freqs(digits, normalize=False):
	89	"""
	90	Consume digits of pi and compute 1 digit freq. counts.
	91	"""
	92	freqs = np.zeros(10, dtype='i4')
	93	for d in digits:
	94	freqs[int(d)] += 1
	95	if normalize:
	96	freqs = freqs/freqs.sum()
	97	return freqs
	98
	99	def two_digit_freqs(digits, normalize=False):
	100	"""
	101	Consume digits of pi and compute 2 digits freq. counts.
	102	"""
	103	freqs = np.zeros(100, dtype='i4')
	104	last = digits.next()
	105	this = digits.next()
	106	for d in digits:
	107	index = int(last + this)
	108	freqs[index] += 1
	109	last = this
	110	this = d
	111	if normalize:
	112	freqs = freqs/freqs.sum()
	113	return freqs
	114
	115	def n_digit_freqs(digits, n, normalize=False):
	116	"""
	117	Consume digits of pi and compute n digits freq. counts.
	118
	119	This should only be used for 1-6 digits.
	120	"""
	121	freqs = np.zeros(pow(10,n), dtype='i4')
	122	current = np.zeros(n, dtype=int)
	123	for i in range(n):
	124	current[i] = digits.next()
	125	for d in digits:
	126	index = int(''.join(map(str, current)))
	127	freqs[index] += 1
	128	current[0:-1] = current[1:]
	129	current[-1] = d
	130	if normalize:
	131	freqs = freqs/freqs.sum()
	132	return freqs
	133
	134	# Plotting functions
	135
	136	def plot_two_digit_freqs(f2):
	137	"""
	138	Plot two digits frequency counts using matplotlib.
	139	"""
	140	f2_copy = f2.copy()
	141	f2_copy.shape = (10,10)
	142	ax = plt.matshow(f2_copy)
	143	plt.colorbar()
	144	for i in range(10):
	145	for j in range(10):
	146	plt.text(i-0.2, j+0.2, str(j)+str(i))
	147	plt.ylabel('First digit')
	148	plt.xlabel('Second digit')
	149	return ax
	150
	151	def plot_one_digit_freqs(f1):
	152	"""
	153	Plot one digit frequency counts using matplotlib.
	154	"""
	155	ax = plt.plot(f1,'bo-')
	156	plt.title('Single digit counts in pi')
	157	plt.xlabel('Digit')
	158	plt.ylabel('Count')
	159	return ax

docs/source/parallelz/parallel_demos.txt

0 +53 -49

             .. note::
-                Not adapted to zmq yet
+                Performance numbers from ``IPython.kernel``, not newparallel
             In this section we describe two more involved examples of using an IPython
             cluster to perform a parallel computation. In these examples, we will be using
             IPython's "pylab" mode, which enables interactive plotting using the
             Matplotlib package. IPython can be started in this mode by typing::
-                ipython -p pylab
+                ipython --pylab
             at the system command line. If this prints an error message, you will
             need to install the default profiles from within IPython by doing,
             approximately 1,000 times, but that with only 10,000 digits the
             statistical fluctuations are still rather large:
-            .. image:: single_digits.*
+            .. image:: ../parallel/single_digits.*
             It is clear that to reduce the relative fluctuations in the counts, we need
             to look at many more digits of pi. That brings us to the parallel calculation.
             Calculating many digits of pi is a challenging computational problem in itself.
             Because we want to focus on the distribution of digits in this example, we
             will use pre-computed digit of pi from the website of Professor Yasumasa
-            Kanada at the University of Tokoyo (http://www.super-computing.org). These
+            Kanada at the University of Tokyo (http://www.super-computing.org). These
             digits come in a set of text files (ftp://pi.super-computing.org/.2/pi200m/)
             that each have 10 million digits of pi.
             step the counts from each engine will be added up. To perform this
             calculation, we will need two top-level functions from :file:`pidigits.py`:
-            .. literalinclude:: ../../examples/kernel/pidigits.py
+            .. literalinclude:: ../../examples/newparallel/pidigits.py
                :language: python
                :lines: 34-49
             We will also use the :func:`plot_two_digit_freqs` function to plot the
             results. The code to run this calculation in parallel is contained in
-            :file:`docs/examples/kernel/parallelpi.py`. This code can be run in parallel
+            :file:`docs/examples/newparallel/parallelpi.py`. This code can be run in parallel
             using IPython by following these steps:
-. Copy the text files with the digits of pi
+. Use :command:`ipclusterz` to start 15 engines. We used an 8 core (2 quad
-               (ftp://pi.super-computing.org/.2/pi200m/) to the working directory of the
-               engines on the compute nodes.
-. Use :command:`ipclusterz` to start 15 engines. We used an 8 core (2 quad
                core CPUs) cluster with hyperthreading enabled which makes the 8 cores
                looks like 16 (1 controller + 15 engines) in the OS. However, the maximum
                speedup we can observe is still only 8x.
 . With the file :file:`parallelpi.py` in your current working directory, open
-               up IPython in pylab mode and type ``run parallelpi.py``.
+               up IPython in pylab mode and type ``run parallelpi.py``.  This will download
+               the pi files via ftp the first time you run it, if they are not
+               present in the Engines' working directory.
             When run on our 8 core cluster, we observe a speedup of 7.7x. This is slightly
             less than linear scaling (8x) because the controller is also running on one of
             .. sourcecode:: ipython
                 In [1]: from IPython.zmq.parallel import client
--11-19 11:32:38-0800 [-] Log opened.
-                # The MultiEngineClient allows us to use the engines interactively.
+                # The Client allows us to use the engines interactively.
-                # We simply pass MultiEngineClient the name of the cluster profile we
+                # We simply pass Client the name of the cluster profile we
                 # are using.
                 In [2]: c = client.Client(profile='mycluster')
--11-19 11:32:44-0800 [-] Connecting [0]
--11-19 11:32:44-0800 [Negotiation,client] Connected: ./ipcontroller-mec.furl
-                In [3]: mec.get_ids()
+                In [3]: c.ids
                 Out[3]: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
                 In [4]: run pidigits.py
-                In [5]: filestring = 'pi200m-ascii-%(i)02dof20.txt'
+                In [5]: filestring = 'pi200m.ascii.%(i)02dof20'
                 # Create the list of files to process.
                 In [6]: files = [filestring % {'i':i} for i in range(1,16)]
                 In [7]: files
                 Out[7]:
-                ['pi200m-ascii-01of20.txt',
+                ['pi200m.ascii.01of20',
-                 'pi200m-ascii-02of20.txt',
+                 'pi200m.ascii.02of20',
-                 'pi200m-ascii-03of20.txt',
+                 'pi200m.ascii.03of20',
-                 'pi200m-ascii-04of20.txt',
+                 'pi200m.ascii.04of20',
-                 'pi200m-ascii-05of20.txt',
+                 'pi200m.ascii.05of20',
-                 'pi200m-ascii-06of20.txt',
+                 'pi200m.ascii.06of20',
-                 'pi200m-ascii-07of20.txt',
+                 'pi200m.ascii.07of20',
-                 'pi200m-ascii-08of20.txt',
+                 'pi200m.ascii.08of20',
-                 'pi200m-ascii-09of20.txt',
+                 'pi200m.ascii.09of20',
-                 'pi200m-ascii-10of20.txt',
+                 'pi200m.ascii.10of20',
-                 'pi200m-ascii-11of20.txt',
+                 'pi200m.ascii.11of20',
-                 'pi200m-ascii-12of20.txt',
+                 'pi200m.ascii.12of20',
-                 'pi200m-ascii-13of20.txt',
+                 'pi200m.ascii.13of20',
-                 'pi200m-ascii-14of20.txt',
+                 'pi200m.ascii.14of20',
-                 'pi200m-ascii-15of20.txt']
+                 'pi200m.ascii.15of20']
-                # This is the parallel calculation using the MultiEngineClient.map method
+                # download the data files if they don't already exist:
+                In [8]: c.map(fetch_pi_file, files)
+                # This is the parallel calculation using the Client.map method
                 # which applies compute_two_digit_freqs to each file in files in parallel.
-                In [8]: freqs_all = mec.map(compute_two_digit_freqs, files)
+                In [9]: freqs_all = c.map(compute_two_digit_freqs, files)
                 # Add up the frequencies from each engine.
-                In [8]: freqs = reduce_freqs(freqs_all)
+                In [10]: freqs = reduce_freqs(freqs_all)
-                In [9]: plot_two_digit_freqs(freqs)
+                In [11]: plot_two_digit_freqs(freqs)
-                Out[9]: <matplotlib.image.AxesImage object at 0x18beb110>
+                Out[11]: <matplotlib.image.AxesImage object at 0x18beb110>
-                In [10]: plt.title('2 digit counts of 150m digits of pi')
+                In [12]: plt.title('2 digit counts of 150m digits of pi')
-                Out[10]: <matplotlib.text.Text object at 0x18d1f9b0>
+                Out[12]: <matplotlib.text.Text object at 0x18d1f9b0>
             The resulting plot generated by Matplotlib is shown below. The colors indicate
             which two digit sequences are more (red) or less (blue) likely to occur in the
             show that the relative size of the statistical fluctuations have decreased
             compared to the 10,000 digit calculation.
-            .. image:: two_digit_counts.*
+            .. image:: ../parallel/two_digit_counts.*
             Parallel options pricing
             .. literalinclude:: ../../examples/kernel/mcpricer.py
                :language: python
-            To run this code in parallel, we will use IPython's :class:`TaskClient` class,
+            To run this code in parallel, we will use IPython's :class:`LoadBalancedView` class,
             which distributes work to the engines using dynamic load balancing. This
-            client can be used along side the :class:`MultiEngineClient` class shown in
+            view is a wrapper of the :class:`Client` class shown in
-            the previous example. The parallel calculation using :class:`TaskClient` can
+            the previous example. The parallel calculation using :class:`LoadBalancedView` can
             be found in the file :file:`mcpricer.py`. The code in this file creates a
             :class:`TaskClient` instance and then submits a set of tasks using
             :meth:`TaskClient.run` that calculate the option prices for different
             took 30 seconds in parallel, giving a speedup of 7.7x, which is comparable
             to the speedup observed in our previous example.
-            .. image:: asian_call.*
+            .. image:: ../parallel/asian_call.*
-            .. image:: asian_put.*
+            .. image:: ../parallel/asian_put.*
             Conclusion
             ==========
             parallel architecture that have been demonstrated:
             * Serial code can be parallelized often with only a few extra lines of code.
-              We have used the :class:`MultiEngineClient` and :class:`TaskClient` classes
+              We have used the :class:`DirectView` and :class:`LoadBalancedView` classes
               for this purpose.
             * The resulting parallel code can be run without ever leaving the IPython's
               interactive shell.
             * We have run these examples on a cluster running Windows HPC Server 2008.
               IPython's built in support for the Windows HPC job scheduler makes it
               easy to get started with IPython's parallel capabilities.
+            .. note::
+                The newparallel code has never been run on Windows HPC Server, so the last
+                conclusion is untested.

docs/source/parallelz/parallel_mpi.txt

0 +1 0

             .. note::
                 Not adapted to zmq yet
+                This is out of date wrt ipcluster in general as well
             Often, a parallel algorithm will require moving data between the engines. One
             way of accomplishing this is by doing a pull and then a push using the

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages