upstream/ipython Commit - r5169:0cd2b7ac

update parallel docs with some changes from scipy tutorial...

MinRK -

r5169:0cd2b7ac

parent child

docs/examples/parallel/customresults.py

0 created 644 +61 0

			@@ -0,0 +1,61 b''
		1	"""An example for handling results in a way that AsyncMapResult doesn't provide
		2
		3	Specifically, out-of-order results with some special handing of metadata.
		4
		5	This just submits a bunch of jobs, waits on the results, and prints the stdout
		6	and results of each as they finish.
		7
		8	Authors
		9	-------
		10	* MinRK
		11	"""
		12	import time
		13	import random
		14
		15	from IPython import parallel
		16
		17	# create client & views
		18	rc = parallel.Client()
		19	dv = rc[:]
		20	v = rc.load_balanced_view()
		21
		22
		23	# scatter 'id', so id=0,1,2 on engines 0,1,2
		24	dv.scatter('id', rc.ids, flatten=True)
		25	print dv['id']
		26
		27
		28	def sleep_here(count, t):
		29	"""simple function that takes args, prints a short message, sleeps for a time, and returns the same args"""
		30	import time,sys
		31	print "hi from engine %i" % id
		32	sys.stdout.flush()
		33	time.sleep(t)
		34	return count,t
		35
		36	amr = v.map(sleep_here, range(100), [ random.random() for i in range(100) ], chunksize=2)
		37
		38	pending = set(amr.msg_ids)
		39	while pending:
		40	try:
		41	rc.wait(pending, 1e-3)
		42	except parallel.TimeoutError:
		43	# ignore timeouterrors, since they only mean that at least one isn't done
		44	pass
		45	# finished is the set of msg_ids that are complete
		46	finished = pending.difference(rc.outstanding)
		47	# update pending to exclude those that just finished
		48	pending = pending.difference(finished)
		49	for msg_id in finished:
		50	# we know these are done, so don't worry about blocking
		51	ar = rc.get_result(msg_id)
		52	print "job id %s finished on engine %i" % (msg_id, ar.engine_id)
		53	print "with stdout:"
		54	print ' ' + ar.stdout.replace('\n', '\n ').rstrip()
		55	print "and results:"
		56
		57	# note that each job in a map always returns a list of length chunksize
		58	# even if chunksize == 1
		59	for (count,t) in ar.result:
		60	print " item %i: slept for %.2fs" % (count, t)
		61

docs/examples/parallel/iopubwatcher.py

0 created 644 +83 0

			@@ -0,0 +1,83 b''
		1	"""A script for watching all traffic on the IOPub channel (stdout/stderr/pyerr) of engines.
		2
		3	This connects to the default cluster, or you can pass the path to your ipcontroller-client.json
		4
		5	Try running this script, and then running a few jobs that print (and call sys.stdout.flush),
		6	and you will see the print statements as they arrive, notably not waiting for the results
		7	to finish.
		8
		9	You can use the zeromq SUBSCRIBE mechanism to only receive information from specific engines,
		10	and easily filter by message type.
		11
		12	Authors
		13	-------
		14	* MinRK
		15	"""
		16
		17	import os
		18	import sys
		19	import json
		20	import zmq
		21
		22	from IPython.zmq.session import Session
		23	from IPython.parallel.util import disambiguate_url
		24	from IPython.utils.py3compat import str_to_bytes
		25	from IPython.utils.path import get_security_file
		26
		27	def main(connection_file):
		28	"""watch iopub channel, and print messages"""
		29
		30	ctx = zmq.Context.instance()
		31
		32	with open(connection_file) as f:
		33	cfg = json.loads(f.read())
		34
		35	location = cfg['location']
		36	reg_url = cfg['url']
		37	session = Session(key=str_to_bytes(cfg['exec_key']))
		38
		39	query = ctx.socket(zmq.DEALER)
		40	query.connect(disambiguate_url(cfg['url'], location))
		41	session.send(query, "connection_request")
		42	idents,msg = session.recv(query, mode=0)
		43	c = msg['content']
		44	iopub_url = disambiguate_url(c['iopub'], location)
		45	sub = ctx.socket(zmq.SUB)
		46	# This will subscribe to all messages:
		47	sub.setsockopt(zmq.SUBSCRIBE, b'')
		48	# replace with b'' with b'engine.1.stdout' to subscribe only to engine 1's stdout
		49	# 0MQ subscriptions are simple 'foo*' matches, so 'engine.1.' subscribes
		50	# to everything from engine 1, but there is no way to subscribe to
		51	# just stdout from everyone.
		52	# multiple calls to subscribe will add subscriptions, e.g. to subscribe to
		53	# engine 1's stderr and engine 2's stdout:
		54	# sub.setsockopt(zmq.SUBSCRIBE, b'engine.1.stderr')
		55	# sub.setsockopt(zmq.SUBSCRIBE, b'engine.2.stdout')
		56	sub.connect(iopub_url)
		57	while True:
		58	try:
		59	idents,msg = session.recv(sub, mode=0)
		60	except KeyboardInterrupt:
		61	return
		62	# ident always length 1 here
		63	topic = idents[0]
		64	if msg['msg_type'] == 'stream':
		65	# stdout/stderr
		66	# stream names are in msg['content']['name'], if you want to handle
		67	# them differently
		68	print "%s: %s" % (topic, msg['content']['data'])
		69	elif msg['msg_type'] == 'pyerr':
		70	# Python traceback
		71	c = msg['content']
		72	print topic + ':'
		73	for line in c['traceback']:
		74	# indent lines
		75	print ' ' + line
		76
		77	if __name__ == '__main__':
		78	if len(sys.argv) > 1:
		79	cf = sys.argv[1]
		80	else:
		81	# This gets the security file for the default profile:
		82	cf = get_security_file('ipcontroller-client.json')
		83	main(cf) No newline at end of file

docs/examples/parallel/itermapresult.py

0 created 644 +52 0

			@@ -0,0 +1,52 b''
		1	"""Example of iteration through AsyncMapResult, without waiting for all results
		2
		3	Authors
		4	-------
		5	* MinRK
		6	"""
		7	import time
		8
		9	from IPython import parallel
		10
		11	# create client & view
		12	rc = parallel.Client()
		13	dv = rc[:]
		14	v = rc.load_balanced_view()
		15
		16	# scatter 'id', so id=0,1,2 on engines 0,1,2
		17	dv.scatter('id', rc.ids, flatten=True)
		18	print "Engine IDs: ", dv['id']
		19
		20	# create a Reference to `id`. This will be a different value on each engine
		21	ref = parallel.Reference('id')
		22	print "sleeping for `id` seconds on each engine"
		23	tic = time.time()
		24	ar = dv.apply(time.sleep, ref)
		25	for i,r in enumerate(ar):
		26	print "%i: %.3f"%(i, time.time()-tic)
		27
		28	def sleep_here(t):
		29	import time
		30	time.sleep(t)
		31	return id,t
		32
		33	# one call per task
		34	print "running with one call per task"
		35	amr = v.map(sleep_here, [.01*t for t in range(100)])
		36	tic = time.time()
		37	for i,r in enumerate(amr):
		38	print "task %i on engine %i: %.3f" % (i, r[0], time.time()-tic)
		39
		40	print "running with four calls per task"
		41	# with chunksize, we can have four calls per task
		42	amr = v.map(sleep_here, [.01*t for t in range(100)], chunksize=4)
		43	tic = time.time()
		44	for i,r in enumerate(amr):
		45	print "task %i on engine %i: %.3f" % (i, r[0], time.time()-tic)
		46
		47	print "running with two calls per task, with unordered results"
		48	# We can even iterate through faster results first, with ordered=False
		49	amr = v.map(sleep_here, [.01*t for t in range(100,0,-1)], ordered=False, chunksize=2)
		50	tic = time.time()
		51	for i,r in enumerate(amr):
		52	print "slept %.2fs on engine %i: %.3f" % (r[1], r[0], time.time()-tic)

docs/source/parallel/figs/wideView.png

0 created 644 binary 0 0

NO CONTENT: new file 100644, binary diff hidden

docs/source/parallel/dag_dependencies.txt

0 +11 -7

              Here, we have a very simple 5-node DAG:
-             .. figure:: figs/ simpledag.*
+                 :width: 600px
              With NetworkX, an arrow is just a fattened bit on the edge. Here, we can see that task 0
              depends on nothing, and can run immediately. 1 and 2 depend on 0; 3 depends on
-                    ...:     # leading into this one as dependencies
-                    ...:     deps = [ results[n] for n in G.predecessors(node) ]
-                    ...:     # submit and store AsyncResult object
-                    ...:     results[node] = view.apply_with_flags(jobs[node], after=deps, block=False)
+                    ...:    with view.temp_flags(after=deps, block=False):
+                    ...:         results[node] = view.apply_with_flags(jobs[node])
              Now that we have submitted all the jobs, we can wait for the results:
                  In [12]: pos = {}; colors = {}
                  In [12]: for node in G:
-                     ...:    md = results[node].metadata
-                     ...:    start = date2num(md.started)
-                     ...:    runtime = date2num(md.completed) - start
-                     ...:    pos[node] = (start, runtime)
-                     ...:    colors[node] = md.engine_id
+                    ....:    md = results[node].metadata
+                    ....:    start = date2num(md.started)
+                    ....:    runtime = date2num(md.completed) - start
+                    ....:    pos[node] = (start, runtime)
+                    ....:    colors[node] = md.engine_id
                  In [13]: nx.draw(G, pos, node_list=colors.keys(), node_color=colors.values(),
-                     ...:    cmap=gist_rainbow)
+                    ....:    cmap=gist_rainbow)
-             .. figure:: figs/ dagdeps.*
+                 :width: 600px
                  Time started on x, runtime on y, and color-coded by engine-id (in this case there
                  were four engines). Edges denote dependencies.

docs/source/parallel/parallel_intro.txt

0 +34 -2

              Architecture overview
              =====================
+             .. figure:: figs/wideView.png
+                 :width: 300px
              The IPython architecture consists of four components:
              * The IPython engine.
              The controller also provides a single point of contact for users who wish to
              utilize the engines connected to the controller. There are different ways of
              working with a controller. In IPython, all of these models are implemented via
-             the client's :meth:`.View.apply` method, with various arguments, or
+             the :meth:`.View.apply` method, after
              constructing :class:`.View` objects to represent subsets of engines. The two
              primary models for interacting with engines are:
                  but since the controller may listen on different ports for clients and
                  engines, it is stored separately.
+             ipcontroller-client.json will look something like this, under default localhost
+             circumstances:
+             .. sourcecode:: python
+                 {
+                   "url":"tcp:\/\/127.0.0.1:54424",
+                   "exec_key":"a361fe89-92fc-4762-9767-e2f0a05e3130",
+                   "ssh":"",
+                   "location":"10.19.1.135"
+                 }
+             If, however, you are running the controller on a work node on a cluster, you will likely
+             need to use ssh tunnels to connect clients from your laptop to it.  You will also
+             probably need to instruct the controller to listen for engines coming from other work nodes
+             on the cluster.  An example of ipcontroller-client.json, as created by::
+                 $> ipcontroller --ip=0.0.0.0 --ssh=login.mycluster.com
+             .. sourcecode:: python
+                 {
+                   "url":"tcp:\/\/*:54424",
+                   "exec_key":"a361fe89-92fc-4762-9767-e2f0a05e3130",
+                   "ssh":"login.mycluster.com",
+                   "location":"10.0.0.2"
+                 }
              More details of how these JSON files are used are given below.
              A detailed description of the security model and its implementation in IPython
              .. sourcecode:: ipython
-                 In [2]: c = Client(sshserver='myhub.example.com')
+                 In [2]: c = Client('/path/to/my/ipcontroller-client.json', sshserver='me@myhub.example.com')
              Where 'myhub.example.com' is the url or IP address of the machine on
              which the Hub process is running (or another machine that has direct access to the Hub's ports).

docs/source/parallel/parallel_multiengine.txt

0 +37 -43

              For more detailed information about starting the controller and engines, see
              our :ref:`introduction <parallel_overview>` to using IPython for parallel computing.
-             Creating a ``Client`` instance
-             ==============================
+             Creating a ``DirectView`` instance
+             ==================================
              The first step is to import the IPython :mod:`IPython.parallel`
              module and then create a :class:`.Client` instance:
              .. sourcecode:: ipython
                  In [10]: @dview.remote(block=True)
-                     ...: def getpid():
-                     ...:     import os
-                     ...:     return os.getpid()
-                     ...:
+                    ....: def getpid():
+                    ....:     import os
+                    ....:     return os.getpid()
+                    ....:
                  In [11]: getpid()
                  Out[11]: [12345, 12346, 12347, 12348]
                  In [13]: A = np.random.random((64,48))
                  In [14]: @dview.parallel(block=True)
-                     ...: def pmul(A,B):
-                     ...:     return A*B
+                    ....: def pmul(A,B):
+                    ....:     return A*B
                  In [15]: C_local = A*A
                  This is primarily useful for non-copying sends of numpy arrays that you plan to
                  edit in-place.  You need to know when it becomes safe to edit the buffer
                  without corrupting the message.
+             dv.targets : int, list of ints
+                 which targets this view is associated with.
              Creating a view is simple: index-access on a client creates a :class:`.DirectView`.
                  # define our function
                  In [6]: def wait(t):
-                    ...:     import time
-                    ...:     tic = time.time()
-                    ...:     time.sleep(t)
-                    ...:     return time.time()-tic
+                   ....:     import time
+                   ....:     tic = time.time()
+                   ....:     time.sleep(t)
+                   ....:     return time.time()-tic
                  # In non-blocking mode
                  In [7]: ar = dview.apply_async(wait, 2)
              The ``block`` and ``targets`` keyword arguments and attributes
              --------------------------------------------------------------
-             Most DirectView methods (excluding :meth:`apply` and :meth:`map`) accept ``block`` and
+             Most DirectView methods (excluding :meth:`apply`) accept ``block`` and
              ``targets`` as keyword arguments. As we have seen above, these keyword arguments control the
              blocking mode and which engines the command is applied to. The :class:`View` class also has
              :attr:`block` and :attr:`targets` attributes that control the default behavior when the keyword
              Parallel magic commands
              -----------------------
-             .. warning::
-                 The magics have not been changed to work with the zeromq system. The
-                 magics do work, but *do not* print stdin/out like they used to in IPython.kernel.
              We provide a few IPython magic commands (``%px``, ``%autopx`` and ``%result``)
              that make it more pleasant to execute Python commands on the engines
              interactively. These are simply shortcuts to :meth:`execute` and
              .. sourcecode:: ipython
-                 # load the parallel magic extension:
-                 In [21]: %load_ext parallelmagic
                  # Create a DirectView for all targets
                  In [22]: dv = rc[:]
                  In [24]: dv.block=True
-                 In [25]: import numpy
-                 In [26]: %px import numpy
-                 Parallel execution on engines: [0, 1, 2, 3]
+                 # import numpy here and everywhere
+                 In [25]: with dv.sync_imports():
+                    ....:    import numpy
+                 importing numpy on engine(s)
                  In [27]: %px a = numpy.random.rand(2,2)
                  Parallel execution on engines: [0, 1, 2, 3]
                  In [28]: dv['ev']
                  Out[28]: [ array([ 1.09522024, -0.09645227]),
-                            array([ 1.21435496, -0.35546712]),
-                            array([ 0.72180653,  0.07133042]),
-                            array([  1.46384341e+00,   1.04353244e-04])
+                          ]
+                    ....:   array([ 1.21435496, -0.35546712]),
+                    ....:   array([ 0.72180653,  0.07133042]),
+                    ....:   array([  1.46384341,   1.04353244e-04])
+                    ....: ]
              The ``%result`` magic gets the most recent result, or takes an argument
              specifying the index of the result to be requested. It is simply a shortcut to the
                  In [30]: %result
                  Out[30]: [ [ 1.28167017  0.14197338],
-                             [-0.14093616  1.27877273],
-                             [-0.37023573  1.06779409],
-                             [ 0.83664764 -0.25602658] ]
+                    ....:    [-0.14093616  1.27877273],
+                    ....:    [-0.37023573  1.06779409],
+                    ....:    [ 0.83664764 -0.25602658] ]
              The ``%autopx`` magic switches to a mode where everything you type is executed
              on the engines given by the :attr:`targets` attribute:
                  In [37]: dv['ans']
                  Out[37]: [ 'Average max eigenvalue is:  10.1387247332',
-                            'Average max eigenvalue is:  10.2076902286',
-                            'Average max eigenvalue is:  10.1891484655',
-                            'Average max eigenvalue is:  10.1158837784',]
+                    ....:   'Average max eigenvalue is:  10.2076902286',
+                    ....:   'Average max eigenvalue is:  10.1891484655',
+                    ....:   'Average max eigenvalue is:  10.1158837784',]
              Moving Python objects around
              IPython's :class:`Client` class, :meth:`scatter` is from the
              interactive IPython session to the engines and :meth:`gather` is from the
              engines back to the interactive IPython session. For scatter/gather operations
-             between engines, MPI should be used:
+             between engines, MPI, pyzmq, or some other direct interconnect should be used.
              .. sourcecode:: ipython
              .. sourcecode:: ipython
                  In [69]: with dview.sync_imports():
-                     ...:     import numpy
+                    ....:     import numpy
                  importing numpy on engine(s)
              Any imports made inside the block will also be performed on the view's engines.
                  In [69]: from IPython.parallel import require
                  In [70]: @require('re'):
-                     ...: def findall(pat, x):
-                     ...:     # re is guaranteed to be available
-                     ...:     return re.findall(pat, x)
+                    ....: def findall(pat, x):
+                    ....:     # re is guaranteed to be available
+                    ....:     return re.findall(pat, x)
                  # you can also pass modules themselves, that you already have locally:
                  In [71]: @require(time):
-                     ...: def wait(t):
-                     ...:     time.sleep(t)
-                     ...:     return t
+                    ....: def wait(t):
+                    ....:     time.sleep(t)
+                    ....:     return t
              .. _parallel_exceptions:

docs/source/parallel/parallel_task.txt

0 +35 -22

              For more detailed information about starting the controller and engines, see
              our :ref:`introduction <parallel_overview>` to using IPython for parallel computing.
-             Creating a ``Client`` instance
-             ==============================
+             Creating a ``LoadBalancedView`` instance
+             ========================================
              The first step is to import the IPython :mod:`IPython.parallel`
              module and then create a :class:`.Client` instance, and we will also be using
              .. _parallel_taskmap:
-             The AsyncMapResult
-             ==================
+             Map results are iterable!
+             -------------------------
+             When an AsyncResult object actually maps multiple results (e.g. the :class:`~AsyncMapResult`
+             object), you can actually iterate through them, and act on the results as they arrive:
+             .. literalinclude:: ../../examples/parallel/itermapresult.py
+                 :language: python
+                 :lines: 9-34
+             .. seealso::
+                 When AsyncResult or the AsyncMapResult don't provide what you need (for instance,
+                 handling individual results as they arrive, but with metadata), you can always
+                 just split the original result's ``msg_ids`` attribute, and handle them as you like.
+                 For an example of this, see :file:`docs/examples/parallel/customresult.py`
-             When you call ``lview.map_async(f, sequence)``, or just :meth:`map` with `block=True`, then
-             what you get in return will be an :class:`~AsyncMapResult` object. These are similar to
-             AsyncResult objects, but with one key difference
              .. _parallel_dependencies:
              .. sourcecode:: ipython
                  In [10]: @require('numpy', 'zmq')
-                     ...: def myfunc():
-                     ...:     return dostuff()
+                    ....: def myfunc():
+                    ....:     return dostuff()
              Now, any time you apply :func:`myfunc`, the task will only run on a machine that has
              numpy and pyzmq available, and when :func:`myfunc` is called, numpy and zmq will be imported.
              .. sourcecode:: ipython
                  In [10]: def platform_specific(plat):
-                     ...:    import sys
-                     ...:    return sys.platform == plat
+                    ....:    import sys
+                    ....:    return sys.platform == plat
                  In [11]: @depend(platform_specific, 'darwin')
-                     ...: def mactask():
-                     ...:    do_mac_stuff()
+                    ....: def mactask():
+                    ....:    do_mac_stuff()
                  In [12]: @depend(platform_specific, 'nt')
-                     ...: def wintask():
-                     ...:    do_windows_stuff()
+                    ....: def wintask():
+                    ....:    do_windows_stuff()
              In this case, any time you apply ``mytask``, it will only run on an OSX machine.
              ``@depend`` is just like ``apply``, in that it has a ``@depend(f,*args,**kwargs)``
              .. sourcecode::ipython
                  In [13]: def mytask(*args):
-                     ...:    dostuff()
+                    ....:    dostuff()
                  In [14]: mactask = dependent(mytask, platform_specific, 'darwin')
                  # this is the same as decorating the declaration of mytask with @depend
                  # is equivalent to:
                  In [17]: @depend(g, *dargs, **dkwargs)
-                     ...: def t(a,b,c):
-                     ...:     # contents of f
+                    ....: def t(a,b,c):
+                    ....:     # contents of f
              Graph Dependencies
              ------------------
                  In [16]: ar2 = lview.apply(f2)
-                 In [17]: ar3 = lview.apply_with_flags(f3, after=[ar,ar2])
-                 In [17]: ar4 = lview.apply_with_flags(f3, follow=[ar], timeout=2.5)
+                 In [17]: with lview.temp_flags(after=[ar,ar2]):
+                    ....:    ar3 = lview.apply(f3)
+                 In [18]: with lview.temp_flags(follow=[ar], timeout=2.5)
+                    ....:    ar4 = lview.apply(f3)
              .. seealso::
 . Define some functions to be run as tasks
 . Submit your tasks to using the :meth:`apply` method of your
                 :class:`LoadBalancedView` instance.
-. Use :meth:`Client.get_result` to get the results of the
+. Use :meth:`.Client.get_result` to get the results of the
                 tasks, or use the :meth:`AsyncResult.get` method of the results to wait
                 for and then receive the results.

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages