upstream/ipython Commit - r5169:0cd2b7ac

update parallel docs with some changes from scipy tutorial...

MinRK -

r5169:0cd2b7ac

parent child

docs/examples/parallel/customresults.py

0 created 644 +61 0

			@@ -0,0 +1,61 b''
		1	"""An example for handling results in a way that AsyncMapResult doesn't provide
		2
		3	Specifically, out-of-order results with some special handing of metadata.
		4
		5	This just submits a bunch of jobs, waits on the results, and prints the stdout
		6	and results of each as they finish.
		7
		8	Authors
		9	-------
		10	* MinRK
		11	"""
		12	import time
		13	import random
		14
		15	from IPython import parallel
		16
		17	# create client & views
		18	rc = parallel.Client()
		19	dv = rc[:]
		20	v = rc.load_balanced_view()
		21
		22
		23	# scatter 'id', so id=0,1,2 on engines 0,1,2
		24	dv.scatter('id', rc.ids, flatten=True)
		25	print dv['id']
		26
		27
		28	def sleep_here(count, t):
		29	"""simple function that takes args, prints a short message, sleeps for a time, and returns the same args"""
		30	import time,sys
		31	print "hi from engine %i" % id
		32	sys.stdout.flush()
		33	time.sleep(t)
		34	return count,t
		35
		36	amr = v.map(sleep_here, range(100), [ random.random() for i in range(100) ], chunksize=2)
		37
		38	pending = set(amr.msg_ids)
		39	while pending:
		40	try:
		41	rc.wait(pending, 1e-3)
		42	except parallel.TimeoutError:
		43	# ignore timeouterrors, since they only mean that at least one isn't done
		44	pass
		45	# finished is the set of msg_ids that are complete
		46	finished = pending.difference(rc.outstanding)
		47	# update pending to exclude those that just finished
		48	pending = pending.difference(finished)
		49	for msg_id in finished:
		50	# we know these are done, so don't worry about blocking
		51	ar = rc.get_result(msg_id)
		52	print "job id %s finished on engine %i" % (msg_id, ar.engine_id)
		53	print "with stdout:"
		54	print ' ' + ar.stdout.replace('\n', '\n ').rstrip()
		55	print "and results:"
		56
		57	# note that each job in a map always returns a list of length chunksize
		58	# even if chunksize == 1
		59	for (count,t) in ar.result:
		60	print " item %i: slept for %.2fs" % (count, t)
		61

docs/examples/parallel/iopubwatcher.py

0 created 644 +83 0

			@@ -0,0 +1,83 b''
		1	"""A script for watching all traffic on the IOPub channel (stdout/stderr/pyerr) of engines.
		2
		3	This connects to the default cluster, or you can pass the path to your ipcontroller-client.json
		4
		5	Try running this script, and then running a few jobs that print (and call sys.stdout.flush),
		6	and you will see the print statements as they arrive, notably not waiting for the results
		7	to finish.
		8
		9	You can use the zeromq SUBSCRIBE mechanism to only receive information from specific engines,
		10	and easily filter by message type.
		11
		12	Authors
		13	-------
		14	* MinRK
		15	"""
		16
		17	import os
		18	import sys
		19	import json
		20	import zmq
		21
		22	from IPython.zmq.session import Session
		23	from IPython.parallel.util import disambiguate_url
		24	from IPython.utils.py3compat import str_to_bytes
		25	from IPython.utils.path import get_security_file
		26
		27	def main(connection_file):
		28	"""watch iopub channel, and print messages"""
		29
		30	ctx = zmq.Context.instance()
		31
		32	with open(connection_file) as f:
		33	cfg = json.loads(f.read())
		34
		35	location = cfg['location']
		36	reg_url = cfg['url']
		37	session = Session(key=str_to_bytes(cfg['exec_key']))
		38
		39	query = ctx.socket(zmq.DEALER)
		40	query.connect(disambiguate_url(cfg['url'], location))
		41	session.send(query, "connection_request")
		42	idents,msg = session.recv(query, mode=0)
		43	c = msg['content']
		44	iopub_url = disambiguate_url(c['iopub'], location)
		45	sub = ctx.socket(zmq.SUB)
		46	# This will subscribe to all messages:
		47	sub.setsockopt(zmq.SUBSCRIBE, b'')
		48	# replace with b'' with b'engine.1.stdout' to subscribe only to engine 1's stdout
		49	# 0MQ subscriptions are simple 'foo*' matches, so 'engine.1.' subscribes
		50	# to everything from engine 1, but there is no way to subscribe to
		51	# just stdout from everyone.
		52	# multiple calls to subscribe will add subscriptions, e.g. to subscribe to
		53	# engine 1's stderr and engine 2's stdout:
		54	# sub.setsockopt(zmq.SUBSCRIBE, b'engine.1.stderr')
		55	# sub.setsockopt(zmq.SUBSCRIBE, b'engine.2.stdout')
		56	sub.connect(iopub_url)
		57	while True:
		58	try:
		59	idents,msg = session.recv(sub, mode=0)
		60	except KeyboardInterrupt:
		61	return
		62	# ident always length 1 here
		63	topic = idents[0]
		64	if msg['msg_type'] == 'stream':
		65	# stdout/stderr
		66	# stream names are in msg['content']['name'], if you want to handle
		67	# them differently
		68	print "%s: %s" % (topic, msg['content']['data'])
		69	elif msg['msg_type'] == 'pyerr':
		70	# Python traceback
		71	c = msg['content']
		72	print topic + ':'
		73	for line in c['traceback']:
		74	# indent lines
		75	print ' ' + line
		76
		77	if __name__ == '__main__':
		78	if len(sys.argv) > 1:
		79	cf = sys.argv[1]
		80	else:
		81	# This gets the security file for the default profile:
		82	cf = get_security_file('ipcontroller-client.json')
		83	main(cf) No newline at end of file

docs/examples/parallel/itermapresult.py

0 created 644 +52 0

			@@ -0,0 +1,52 b''
		1	"""Example of iteration through AsyncMapResult, without waiting for all results
		2
		3	Authors
		4	-------
		5	* MinRK
		6	"""
		7	import time
		8
		9	from IPython import parallel
		10
		11	# create client & view
		12	rc = parallel.Client()
		13	dv = rc[:]
		14	v = rc.load_balanced_view()
		15
		16	# scatter 'id', so id=0,1,2 on engines 0,1,2
		17	dv.scatter('id', rc.ids, flatten=True)
		18	print "Engine IDs: ", dv['id']
		19
		20	# create a Reference to `id`. This will be a different value on each engine
		21	ref = parallel.Reference('id')
		22	print "sleeping for `id` seconds on each engine"
		23	tic = time.time()
		24	ar = dv.apply(time.sleep, ref)
		25	for i,r in enumerate(ar):
		26	print "%i: %.3f"%(i, time.time()-tic)
		27
		28	def sleep_here(t):
		29	import time
		30	time.sleep(t)
		31	return id,t
		32
		33	# one call per task
		34	print "running with one call per task"
		35	amr = v.map(sleep_here, [.01*t for t in range(100)])
		36	tic = time.time()
		37	for i,r in enumerate(amr):
		38	print "task %i on engine %i: %.3f" % (i, r[0], time.time()-tic)
		39
		40	print "running with four calls per task"
		41	# with chunksize, we can have four calls per task
		42	amr = v.map(sleep_here, [.01*t for t in range(100)], chunksize=4)
		43	tic = time.time()
		44	for i,r in enumerate(amr):
		45	print "task %i on engine %i: %.3f" % (i, r[0], time.time()-tic)
		46
		47	print "running with two calls per task, with unordered results"
		48	# We can even iterate through faster results first, with ordered=False
		49	amr = v.map(sleep_here, [.01*t for t in range(100,0,-1)], ordered=False, chunksize=2)
		50	tic = time.time()
		51	for i,r in enumerate(amr):
		52	print "slept %.2fs on engine %i: %.3f" % (r[1], r[0], time.time()-tic)

docs/source/parallel/figs/wideView.png

0 created 644 binary 0 0

NO CONTENT: new file 100644, binary diff hidden

docs/source/parallel/dag_dependencies.txt

0 +11 -7

              .. _dag_dependencies:
              ================
              DAG Dependencies
              ================
              Often, parallel workflow is described in terms of a `Directed Acyclic Graph
              <http://en.wikipedia.org/wiki/Directed_acyclic_graph>`_ or DAG.  A popular library
              for working with Graphs is NetworkX_.  Here, we will walk through a demo mapping
              a nx DAG to task dependencies.
              The full script that runs this demo can be found in
              :file:`docs/examples/parallel/dagdeps.py`.
              Why are DAGs good for task dependencies?
              ----------------------------------------
              The 'G' in DAG is 'Graph'. A Graph is a collection of **nodes** and **edges** that connect
              the nodes. For our purposes, each node would be a task, and each edge would be a
              dependency. The 'D' in DAG stands for 'Directed'. This means that each edge has a
              direction associated with it. So we can interpret the edge (a,b) as meaning that b depends
              on a, whereas the edge (b,a) would mean a depends on b. The 'A' is 'Acyclic', meaning that
              there must not be any closed loops in the graph. This is important for dependencies,
              because if a loop were closed, then a task could ultimately depend on itself, and never be
              able to run. If your workflow can be described as a DAG, then it is impossible for your
              dependencies to cause a deadlock.
              A Sample DAG
              ------------
              Here, we have a very simple 5-node DAG:
-             .. figure:: figs/ simpledag.*
+                 :width: 600px
              With NetworkX, an arrow is just a fattened bit on the edge. Here, we can see that task 0
              depends on nothing, and can run immediately. 1 and 2 depend on 0; 3 depends on
 and 2; and 4 depends only on 1.
              A possible sequence of events for this workflow:
 . Task 0 can run right away
 . 0 finishes, so 1,2 can start
 . 1 finishes, 3 is still waiting on 2, but 4 can start right away
 . 2 finishes, and 3 can finally start
              Further, taking failures into account, assuming all dependencies are run with the default
              `success=True,failure=False`, the following cases would occur for each node's failure:
 . fails: all other tasks fail as Impossible
 . 2 can still succeed, but 3,4 are unreachable
 . 3 becomes unreachable, but 4 is unaffected
 . and 4. are terminal, and can have no effect on other nodes
              The code to generate the simple DAG:
              .. sourcecode:: python
                  import networkx as nx
                  G = nx.DiGraph()
                  # add 5 nodes, labeled 0-4:
                  map(G.add_node, range(5))
                  # 1,2 depend on 0:
                  G.add_edge(0,1)
                  G.add_edge(0,2)
                  # 3 depends on 1,2
                  G.add_edge(1,3)
                  G.add_edge(2,3)
                  # 4 depends on 1
                  G.add_edge(1,4)
                  # now draw the graph:
                  pos = { 0 : (0,0), 1 : (1,1), 2 : (-1,1),
 : (0,2), 4 : (2,2)}
                  nx.draw(G, pos, edge_color='r')
              For demonstration purposes, we have a function that generates a random DAG with a given
              number of nodes and edges.
              .. literalinclude:: ../../examples/parallel/dagdeps.py
                  :language: python
                  :lines: 20-36
              So first, we start with a graph of 32 nodes, with 128 edges:
              .. sourcecode:: ipython
                  In [2]: G = random_dag(32,128)
              Now, we need to build our dict of jobs corresponding to the nodes on the graph:
              .. sourcecode:: ipython
                  In [3]: jobs = {}
                  # in reality, each job would presumably be different
                  # randomwait is just a function that sleeps for a random interval
                  In [4]: for node in G:
                     ...:     jobs[node] = randomwait
              Once we have a dict of jobs matching the nodes on the graph, we can start submitting jobs,
              and linking up the dependencies. Since we don't know a job's msg_id until it is submitted,
              which is necessary for building dependencies, it is critical that we don't submit any jobs
              before other jobs it may depend on. Fortunately, NetworkX provides a
              :meth:`topological_sort` method which ensures exactly this. It presents an iterable, that
              guarantees that when you arrive at a node, you have already visited all the nodes it
              on which it depends:
              .. sourcecode:: ipython
                  In [5]: rc = Client()
                  In [5]: view = rc.load_balanced_view()
                  In [6]: results = {}
                  In [7]: for node in G.topological_sort():
-                    ...:     # get list of AsyncResult objects from nodes
-                    ...:     # leading into this one as dependencies
-                    ...:     deps = [ results[n] for n in G.predecessors(node) ]
-                    ...:     # submit and store AsyncResult object
-                    ...:     results[node] = view.apply_with_flags(jobs[node], after=deps, block=False)
+                    ...:    with view.temp_flags(after=deps, block=False):
+                    ...:         results[node] = view.apply_with_flags(jobs[node])
              Now that we have submitted all the jobs, we can wait for the results:
              .. sourcecode:: ipython
                  In [8]: view.wait(results.values())
              Now, at least we know that all the jobs ran and did not fail (``r.get()`` would have
              raised an error if a task failed).  But we don't know that the ordering was properly
              respected.  For this, we can use the :attr:`metadata` attribute of each AsyncResult.
              These objects store a variety of metadata about each task, including various timestamps.
              We can validate that the dependencies were respected by checking that each task was
              started after all of its predecessors were completed:
              .. literalinclude:: ../../examples/parallel/dagdeps.py
                  :language: python
                  :lines: 64-70
              We can also validate the graph visually. By drawing the graph with each node's x-position
              as its start time, all arrows must be pointing to the right if dependencies were respected.
              For spreading, the y-position will be the runtime of the task, so long tasks
              will be at the top, and quick, small tasks will be at the bottom.
              .. sourcecode:: ipython
                  In [10]: from matplotlib.dates import date2num
                  In [11]: from matplotlib.cm import gist_rainbow
                  In [12]: pos = {}; colors = {}
                  In [12]: for node in G:
-                     ...:    md = results[node].metadata
-                     ...:    start = date2num(md.started)
-                     ...:    runtime = date2num(md.completed) - start
-                     ...:    pos[node] = (start, runtime)
-                     ...:    colors[node] = md.engine_id
+                    ....:    md = results[node].metadata
+                    ....:    start = date2num(md.started)
+                    ....:    runtime = date2num(md.completed) - start
+                    ....:    pos[node] = (start, runtime)
+                    ....:    colors[node] = md.engine_id
                  In [13]: nx.draw(G, pos, node_list=colors.keys(), node_color=colors.values(),
-                     ...:    cmap=gist_rainbow)
+                    ....:    cmap=gist_rainbow)
-             .. figure:: figs/ dagdeps.*
+                 :width: 600px
                  Time started on x, runtime on y, and color-coded by engine-id (in this case there
                  were four engines). Edges denote dependencies.
              .. _NetworkX: http://networkx.lanl.gov/

docs/source/parallel/parallel_intro.txt

0 +34 -2

              .. _parallel_overview:
              ============================
              Overview and getting started
              ============================
              Introduction
              ============
              This section gives an overview of IPython's sophisticated and powerful
              architecture for parallel and distributed computing. This architecture
              abstracts out parallelism in a very general way, which enables IPython to
              support many different styles of parallelism including:
              * Single program, multiple data (SPMD) parallelism.
              * Multiple program, multiple data (MPMD) parallelism.
              * Message passing using MPI.
              * Task farming.
              * Data parallel.
              * Combinations of these approaches.
              * Custom user defined approaches.
              Most importantly, IPython enables all types of parallel applications to
              be developed, executed, debugged and monitored *interactively*. Hence,
              the ``I`` in IPython.  The following are some example usage cases for IPython:
              * Quickly parallelize algorithms that are embarrassingly parallel
                using a number of simple approaches.  Many simple things can be
                parallelized interactively in one or two lines of code.
              * Steer traditional MPI applications on a supercomputer from an
                IPython session on your laptop.
              * Analyze and visualize large datasets (that could be remote and/or
                distributed) interactively using IPython and tools like
                matplotlib/TVTK.
              * Develop, test and debug new parallel algorithms
                (that may use MPI) interactively.
              * Tie together multiple MPI jobs running on different systems into
                one giant distributed and parallel system.
              * Start a parallel job on your cluster and then have a remote
                collaborator connect to it and pull back data into their
                local IPython session for plotting and analysis.
              * Run a set of tasks on a set of CPUs using dynamic load balancing.
              .. tip::
                 At the SciPy 2011 conference in Austin, Min Ragan-Kelley presented a
                 complete 4-hour tutorial on the use of these features, and all the materials
                 for the tutorial are now `available online`__.  That tutorial provides an
                 excellent, hands-on oriented complement to the reference documentation
                 presented here.
              .. __: http://minrk.github.com/scipy-tutorial-2011
              Architecture overview
              =====================
+             .. figure:: figs/wideView.png
+                 :width: 300px
              The IPython architecture consists of four components:
              * The IPython engine.
              * The IPython hub.
              * The IPython schedulers.
              * The controller client.
              These components live in the :mod:`IPython.parallel` package and are
              installed with IPython.  They do, however, have additional dependencies
              that must be installed.  For more information, see our
              :ref:`installation documentation <install_index>`.
              .. TODO: include zmq in install_index
              IPython engine
              ---------------
              The IPython engine is a Python instance that takes Python commands over a
              network connection. Eventually, the IPython engine will be a full IPython
              interpreter, but for now, it is a regular Python interpreter. The engine
              can also handle incoming and outgoing Python objects sent over a network
              connection.  When multiple engines are started, parallel and distributed
              computing becomes possible. An important feature of an IPython engine is
              that it blocks while user code is being executed. Read on for how the
              IPython controller solves this problem to expose a clean asynchronous API
              to the user.
              IPython controller
              ------------------
              The IPython controller processes provide an interface for working with a set of engines.
              At a general level, the controller is a collection of processes to which IPython engines
              and clients can connect. The controller is composed of a :class:`Hub` and a collection of
              :class:`Schedulers`. These Schedulers are typically run in separate processes but on the
              same machine as the Hub, but can be run anywhere from local threads or on remote machines.
              The controller also provides a single point of contact for users who wish to
              utilize the engines connected to the controller. There are different ways of
              working with a controller. In IPython, all of these models are implemented via
-             the client's :meth:`.View.apply` method, with various arguments, or
+             the :meth:`.View.apply` method, after
              constructing :class:`.View` objects to represent subsets of engines. The two
              primary models for interacting with engines are:
              * A **Direct** interface, where engines are addressed explicitly.
              * A **LoadBalanced** interface, where the Scheduler is trusted with assigning work to
                appropriate engines.
              Advanced users can readily extend the View models to enable other
              styles of parallelism.
              .. note::
                  A single controller and set of engines can be used with multiple models
                  simultaneously. This opens the door for lots of interesting things.
              The Hub
              *******
              The center of an IPython cluster is the Hub. This is the process that keeps
              track of engine connections, schedulers, clients, as well as all task requests and
              results. The primary role of the Hub is to facilitate queries of the cluster state, and
              minimize the necessary information required to establish the many connections involved in
              connecting new clients and engines.
              Schedulers
              **********
              All actions that can be performed on the engine go through a Scheduler. While the engines
              themselves block when user code is run, the schedulers hide that from the user to provide
              a fully asynchronous interface to a set of engines.
              IPython client and views
              ------------------------
              There is one primary object, the :class:`~.parallel.Client`, for connecting to a cluster.
              For each execution model, there is a corresponding :class:`~.parallel.View`. These views
              allow users to interact with a set of engines through the interface. Here are the two default
              views:
              * The :class:`DirectView` class for explicit addressing.
              * The :class:`LoadBalancedView` class for destination-agnostic scheduling.
              Security
              --------
              IPython uses ZeroMQ for networking, which has provided many advantages, but
              one of the setbacks is its utter lack of security [ZeroMQ]_. By default, no IPython
              connections are encrypted, but open ports only listen on localhost. The only
              source of security for IPython is via ssh-tunnel. IPython supports both shell
              (`openssh`) and `paramiko` based tunnels for connections.  There is a key necessary
              to submit requests, but due to the lack of encryption, it does not provide
              significant security if loopback traffic is compromised.
              In our architecture, the controller is the only process that listens on
              network ports, and is thus the main point of vulnerability. The standard model
              for secure connections is to designate that the controller listen on
              localhost, and use ssh-tunnels to connect clients and/or
              engines.
              To connect and authenticate to the controller an engine or client needs
              some information that the controller has stored in a JSON file.
              Thus, the JSON files need to be copied to a location where
              the clients and engines can find them. Typically, this is the
              :file:`~/.ipython/profile_default/security` directory on the host where the
              client/engine is running (which could be a different host than the controller).
              Once the JSON files are copied over, everything should work fine.
              Currently, there are two JSON files that the controller creates:
              ipcontroller-engine.json
                  This JSON file has the information necessary for an engine to connect
                  to a controller.
              ipcontroller-client.json
                  The client's connection information.  This may not differ from the engine's,
                  but since the controller may listen on different ports for clients and
                  engines, it is stored separately.
+             ipcontroller-client.json will look something like this, under default localhost
+             circumstances:
+             .. sourcecode:: python
+                 {
+                   "url":"tcp:\/\/127.0.0.1:54424",
+                   "exec_key":"a361fe89-92fc-4762-9767-e2f0a05e3130",
+                   "ssh":"",
+                   "location":"10.19.1.135"
+                 }
+             If, however, you are running the controller on a work node on a cluster, you will likely
+             need to use ssh tunnels to connect clients from your laptop to it.  You will also
+             probably need to instruct the controller to listen for engines coming from other work nodes
+             on the cluster.  An example of ipcontroller-client.json, as created by::
+                 $> ipcontroller --ip=0.0.0.0 --ssh=login.mycluster.com
+             .. sourcecode:: python
+                 {
+                   "url":"tcp:\/\/*:54424",
+                   "exec_key":"a361fe89-92fc-4762-9767-e2f0a05e3130",
+                   "ssh":"login.mycluster.com",
+                   "location":"10.0.0.2"
+                 }
              More details of how these JSON files are used are given below.
              A detailed description of the security model and its implementation in IPython
              can be found :ref:`here <parallelsecurity>`.
              .. warning::
                  Even at its most secure, the Controller listens on ports on localhost, and
                  every time you make a tunnel, you open a localhost port on the connecting
                  machine that points to the Controller. If localhost on the Controller's
                  machine, or the machine of any client or engine, is untrusted, then your
                  Controller is insecure. There is no way around this with ZeroMQ.
              Getting Started
              ===============
              To use IPython for parallel computing, you need to start one instance of the
              controller and one or more instances of the engine. Initially, it is best to
              simply start a controller and engines on a single host using the
              :command:`ipcluster` command. To start a controller and 4 engines on your
              localhost, just do::
                  $ ipcluster start -n 4
              More details about starting the IPython controller and engines can be found
              :ref:`here <parallel_process>`
              Once you have started the IPython controller and one or more engines, you
              are ready to use the engines to do something useful. To make sure
              everything is working correctly, try the following commands:
              .. sourcecode:: ipython
              	In [1]: from IPython.parallel import Client
              	In [2]: c = Client()
              	In [4]: c.ids
              	Out[4]: set([0, 1, 2, 3])
              	In [5]: c[:].apply_sync(lambda : "Hello, World")
              	Out[5]: [ 'Hello, World', 'Hello, World', 'Hello, World', 'Hello, World' ]
              When a client is created with no arguments, the client tries to find the corresponding JSON file
              in the local `~/.ipython/profile_default/security` directory. Or if you specified a profile,
              you can use that with the Client.  This should cover most cases:
              .. sourcecode:: ipython
                  In [2]: c = Client(profile='myprofile')
              If you have put the JSON file in a different location or it has a different name, create the
              client like this:
              .. sourcecode:: ipython
                  In [2]: c = Client('/path/to/my/ipcontroller-client.json')
              Remember, a client needs to be able to see the Hub's ports to connect. So if they are on a
              different machine, you may need to use an ssh server to tunnel access to that machine,
              then you would connect to it with:
              .. sourcecode:: ipython
-                 In [2]: c = Client(sshserver='myhub.example.com')
+                 In [2]: c = Client('/path/to/my/ipcontroller-client.json', sshserver='me@myhub.example.com')
              Where 'myhub.example.com' is the url or IP address of the machine on
              which the Hub process is running (or another machine that has direct access to the Hub's ports).
              The SSH server may already be specified in ipcontroller-client.json, if the controller was
              instructed at its launch time.
              You are now ready to learn more about the :ref:`Direct
              <parallel_multiengine>` and :ref:`LoadBalanced <parallel_task>` interfaces to the
              controller.
              .. [ZeroMQ] ZeroMQ.  http://www.zeromq.org

docs/source/parallel/parallel_multiengine.txt

0 +37 -43

              .. _parallel_multiengine:
              ==========================
              IPython's Direct interface
              ==========================
              The direct, or multiengine, interface represents one possible way of working with a set of
              IPython engines. The basic idea behind the multiengine interface is that the
              capabilities of each engine are directly and explicitly exposed to the user.
              Thus, in the multiengine interface, each engine is given an id that is used to
              identify the engine and give it work to do. This interface is very intuitive
              and is designed with interactive usage in mind, and is the best place for
              new users of IPython to begin.
              Starting the IPython controller and engines
              ===========================================
              To follow along with this tutorial, you will need to start the IPython
              controller and four IPython engines. The simplest way of doing this is to use
              the :command:`ipcluster` command::
                  $ ipcluster start -n 4
              For more detailed information about starting the controller and engines, see
              our :ref:`introduction <parallel_overview>` to using IPython for parallel computing.
-             Creating a ``Client`` instance
-             ==============================
+             Creating a ``DirectView`` instance
+             ==================================
              The first step is to import the IPython :mod:`IPython.parallel`
              module and then create a :class:`.Client` instance:
              .. sourcecode:: ipython
                  In [1]: from IPython.parallel import Client
                  In [2]: rc = Client()
              This form assumes that the default connection information (stored in
              :file:`ipcontroller-client.json` found in :file:`IPYTHON_DIR/profile_default/security`) is
              accurate. If the controller was started on a remote machine, you must copy that connection
              file to the client machine, or enter its contents as arguments to the Client constructor:
              .. sourcecode:: ipython
                  # If you have copied the json connector file from the controller:
                  In [2]: rc = Client('/path/to/ipcontroller-client.json')
                  # or to connect with a specific profile you have set up:
                  In [3]: rc = Client(profile='mpi')
              To make sure there are engines connected to the controller, users can get a list
              of engine ids:
              .. sourcecode:: ipython
                  In [3]: rc.ids
                  Out[3]: [0, 1, 2, 3]
              Here we see that there are four engines ready to do work for us.
              For direct execution, we will make use of a :class:`DirectView` object, which can be
              constructed via list-access to the client:
              .. sourcecode:: ipython
                  In [4]: dview = rc[:] # use all engines
              .. seealso::
                  For more information, see the in-depth explanation of :ref:`Views <parallel_details>`.
              Quick and easy parallelism
              ==========================
              In many cases, you simply want to apply a Python function to a sequence of
              objects, but *in parallel*. The client interface provides a simple way
              of accomplishing this: using the DirectView's :meth:`~DirectView.map` method.
              Parallel map
              ------------
              Python's builtin :func:`map` functions allows a function to be applied to a
              sequence element-by-element. This type of code is typically trivial to
              parallelize. In fact, since IPython's interface is all about functions anyway,
              you can just use the builtin :func:`map` with a :class:`RemoteFunction`, or a
              DirectView's :meth:`map` method:
              .. sourcecode:: ipython
                  In [62]: serial_result = map(lambda x:x**10, range(32))
                  In [63]: parallel_result = dview.map_sync(lambda x: x**10, range(32))
                  In [67]: serial_result==parallel_result
                  Out[67]: True
              .. note::
                  The :class:`DirectView`'s version of :meth:`map` does
                  not do dynamic load balancing. For a load balanced version, use a
                  :class:`LoadBalancedView`.
              .. seealso::
                  :meth:`map` is implemented via :class:`ParallelFunction`.
              Remote function decorators
              --------------------------
              Remote functions are just like normal functions, but when they are called,
              they execute on one or more engines, rather than locally. IPython provides
              two decorators:
              .. sourcecode:: ipython
                  In [10]: @dview.remote(block=True)
-                     ...: def getpid():
-                     ...:     import os
-                     ...:     return os.getpid()
-                     ...:
+                    ....: def getpid():
+                    ....:     import os
+                    ....:     return os.getpid()
+                    ....:
                  In [11]: getpid()
                  Out[11]: [12345, 12346, 12347, 12348]
              The ``@parallel`` decorator creates parallel functions, that break up an element-wise
              operations and distribute them, reconstructing the result.
              .. sourcecode:: ipython
                  In [12]: import numpy as np
                  In [13]: A = np.random.random((64,48))
                  In [14]: @dview.parallel(block=True)
-                     ...: def pmul(A,B):
-                     ...:     return A*B
+                    ....: def pmul(A,B):
+                    ....:     return A*B
                  In [15]: C_local = A*A
                  In [16]: C_remote = pmul(A,A)
                  In [17]: (C_local == C_remote).all()
                  Out[17]: True
              .. seealso::
                  See the docstrings for the :func:`parallel` and :func:`remote` decorators for
                  options.
              Calling Python functions
              ========================
              The most basic type of operation that can be performed on the engines is to
              execute Python code or call Python functions. Executing Python code can be
              done in blocking or non-blocking mode (non-blocking is default) using the
              :meth:`.View.execute` method, and calling functions can be done via the
              :meth:`.View.apply` method.
              apply
              -----
              The main method for doing remote execution (in fact, all methods that
              communicate with the engines are built on top of it), is :meth:`View.apply`.
              We strive to provide the cleanest interface we can, so `apply` has the following
              signature:
              .. sourcecode:: python
                  view.apply(f, *args, **kwargs)
              There are various ways to call functions with IPython, and these flags are set as
              attributes of the View.  The ``DirectView`` has just two of these flags:
              dv.block : bool
                  whether to wait for the result, or return an :class:`AsyncResult` object
                  immediately
              dv.track : bool
                  whether to instruct pyzmq to track when zeromq is done sending the message.
                  This is primarily useful for non-copying sends of numpy arrays that you plan to
                  edit in-place.  You need to know when it becomes safe to edit the buffer
                  without corrupting the message.
+             dv.targets : int, list of ints
+                 which targets this view is associated with.
              Creating a view is simple: index-access on a client creates a :class:`.DirectView`.
              .. sourcecode:: ipython
                  In [4]: view = rc[1:3]
                  Out[4]: <DirectView [1, 2]>
                  In [5]: view.apply<tab>
                  view.apply  view.apply_async  view.apply_sync
              For convenience, you can set block temporarily for a single call with the extra sync/async methods.
              Blocking execution
              ------------------
              In blocking mode, the :class:`.DirectView` object (called ``dview`` in
              these examples) submits the command to the controller, which places the
              command in the engines' queues for execution. The :meth:`apply` call then
              blocks until the engines are done executing the command:
              .. sourcecode:: ipython
                  In [2]: dview = rc[:] # A DirectView of all engines
                  In [3]: dview.block=True
                  In [4]: dview['a'] = 5
                  In [5]: dview['b'] = 10
                  In [6]: dview.apply(lambda x: a+b+x, 27)
                  Out[6]: [42, 42, 42, 42]
              You can also select blocking execution on a call-by-call basis with the :meth:`apply_sync`
              method:
                  In [7]: dview.block=False
                  In [8]: dview.apply_sync(lambda x: a+b+x, 27)
                  Out[8]: [42, 42, 42, 42]
              Python commands can be executed as strings on specific engines by using a View's ``execute``
              method:
              .. sourcecode:: ipython
                  In [6]: rc[::2].execute('c=a+b')
                  In [7]: rc[1::2].execute('c=a-b')
                  In [8]: dview['c'] # shorthand for dview.pull('c', block=True)
                  Out[8]: [15, -5, 15, -5]
              Non-blocking execution
              ----------------------
              In non-blocking mode, :meth:`apply` submits the command to be executed and
              then returns a :class:`AsyncResult` object immediately. The
              :class:`AsyncResult` object gives you a way of getting a result at a later
              time through its :meth:`get` method.
              .. Note::
                  The :class:`AsyncResult` object provides a superset of the interface in
                  :py:class:`multiprocessing.pool.AsyncResult`.  See the
                  `official Python documentation <http://docs.python.org/library/multiprocessing#multiprocessing.pool.AsyncResult>`_
                  for more.
              This allows you to quickly submit long running commands without blocking your
              local Python/IPython session:
              .. sourcecode:: ipython
                  # define our function
                  In [6]: def wait(t):
-                    ...:     import time
-                    ...:     tic = time.time()
-                    ...:     time.sleep(t)
-                    ...:     return time.time()-tic
+                   ....:     import time
+                   ....:     tic = time.time()
+                   ....:     time.sleep(t)
+                   ....:     return time.time()-tic
                  # In non-blocking mode
                  In [7]: ar = dview.apply_async(wait, 2)
                  # Now block for the result
                  In [8]: ar.get()
                  Out[8]: [2.0006198883056641, 1.9997570514678955, 1.9996809959411621, 2.0003249645233154]
                  # Again in non-blocking mode
                  In [9]: ar = dview.apply_async(wait, 10)
                  # Poll to see if the result is ready
                  In [10]: ar.ready()
                  Out[10]: False
                  # ask for the result, but wait a maximum of 1 second:
                  In [45]: ar.get(1)
                  ---------------------------------------------------------------------------
                  TimeoutError                              Traceback (most recent call last)
                  /home/you/<ipython-input-45-7cd858bbb8e0> in <module>()
                  ----> 1 ar.get(1)
                  /path/to/site-packages/IPython/parallel/asyncresult.pyc in get(self, timeout)
 raise self._exception
 else:
                  ---> 64             raise error.TimeoutError("Result not ready.")
 
 def ready(self):
                  TimeoutError: Result not ready.
              .. Note::
                  Note the import inside the function. This is a common model, to ensure
                  that the appropriate modules are imported where the task is run. You can
                  also manually import modules into the engine(s) namespace(s) via
                  :meth:`view.execute('import numpy')`.
              Often, it is desirable to wait until a set of :class:`AsyncResult` objects
              are done. For this, there is a the method :meth:`wait`. This method takes a
              tuple of :class:`AsyncResult` objects (or `msg_ids` or indices to the client's History),
              and blocks until all of the associated results are ready:
              .. sourcecode:: ipython
                  In [72]: dview.block=False
                  # A trivial list of AsyncResults objects
                  In [73]: pr_list = [dview.apply_async(wait, 3) for i in range(10)]
                  # Wait until all of them are done
                  In [74]: dview.wait(pr_list)
                  # Then, their results are ready using get() or the `.r` attribute
                  In [75]: pr_list[0].get()
                  Out[75]: [2.9982571601867676, 2.9982588291168213, 2.9987530708312988, 2.9990990161895752]
              The ``block`` and ``targets`` keyword arguments and attributes
              --------------------------------------------------------------
-             Most DirectView methods (excluding :meth:`apply` and :meth:`map`) accept ``block`` and
+             Most DirectView methods (excluding :meth:`apply`) accept ``block`` and
              ``targets`` as keyword arguments. As we have seen above, these keyword arguments control the
              blocking mode and which engines the command is applied to. The :class:`View` class also has
              :attr:`block` and :attr:`targets` attributes that control the default behavior when the keyword
              arguments are not provided. Thus the following logic is used for :attr:`block` and :attr:`targets`:
              * If no keyword argument is provided, the instance attributes are used.
              * Keyword argument, if provided override the instance attributes for
                the duration of a single call.
              The following examples demonstrate how to use the instance attributes:
              .. sourcecode:: ipython
                  In [16]: dview.targets = [0,2]
                  In [17]: dview.block = False
                  In [18]: ar = dview.apply(lambda : 10)
                  In [19]: ar.get()
                  Out[19]: [10, 10]
                  In [16]: dview.targets = v.client.ids # all engines (4)
                  In [21]: dview.block = True
                  In [22]: dview.apply(lambda : 42)
                  Out[22]: [42, 42, 42, 42]
              The :attr:`block` and :attr:`targets` instance attributes of the
              :class:`.DirectView` also determine the behavior of the parallel magic commands.
              Parallel magic commands
              -----------------------
-             .. warning::
-                 The magics have not been changed to work with the zeromq system. The
-                 magics do work, but *do not* print stdin/out like they used to in IPython.kernel.
              We provide a few IPython magic commands (``%px``, ``%autopx`` and ``%result``)
              that make it more pleasant to execute Python commands on the engines
              interactively. These are simply shortcuts to :meth:`execute` and
              :meth:`get_result` of the :class:`DirectView`. The ``%px`` magic executes a single
              Python command on the engines specified by the :attr:`targets` attribute of the
              :class:`DirectView` instance:
              .. sourcecode:: ipython
-                 # load the parallel magic extension:
-                 In [21]: %load_ext parallelmagic
                  # Create a DirectView for all targets
                  In [22]: dv = rc[:]
                  # Make this DirectView active for parallel magic commands
                  In [23]: dv.activate()
                  In [24]: dv.block=True
-                 In [25]: import numpy
-                 In [26]: %px import numpy
-                 Parallel execution on engines: [0, 1, 2, 3]
+                 # import numpy here and everywhere
+                 In [25]: with dv.sync_imports():
+                    ....:    import numpy
+                 importing numpy on engine(s)
                  In [27]: %px a = numpy.random.rand(2,2)
                  Parallel execution on engines: [0, 1, 2, 3]
                  In [28]: %px ev = numpy.linalg.eigvals(a)
                  Parallel execution on engines: [0, 1, 2, 3]
                  In [28]: dv['ev']
                  Out[28]: [ array([ 1.09522024, -0.09645227]),
-                            array([ 1.21435496, -0.35546712]),
-                            array([ 0.72180653,  0.07133042]),
-                            array([  1.46384341e+00,   1.04353244e-04])
+                          ]
+                    ....:   array([ 1.21435496, -0.35546712]),
+                    ....:   array([ 0.72180653,  0.07133042]),
+                    ....:   array([  1.46384341,   1.04353244e-04])
+                    ....: ]
              The ``%result`` magic gets the most recent result, or takes an argument
              specifying the index of the result to be requested. It is simply a shortcut to the
              :meth:`get_result` method:
              .. sourcecode:: ipython
                  In [29]: dv.apply_async(lambda : ev)
                  In [30]: %result
                  Out[30]: [ [ 1.28167017  0.14197338],
-                             [-0.14093616  1.27877273],
-                             [-0.37023573  1.06779409],
-                             [ 0.83664764 -0.25602658] ]
+                    ....:    [-0.14093616  1.27877273],
+                    ....:    [-0.37023573  1.06779409],
+                    ....:    [ 0.83664764 -0.25602658] ]
              The ``%autopx`` magic switches to a mode where everything you type is executed
              on the engines given by the :attr:`targets` attribute:
              .. sourcecode:: ipython
                  In [30]: dv.block=False
                  In [31]: %autopx
                  Auto Parallel Enabled
                  Type %autopx to disable
                  In [32]: max_evals = []
                  <IPython.parallel.AsyncResult object at 0x17b8a70>
                  In [33]: for i in range(100):
                     ....:     a = numpy.random.rand(10,10)
                     ....:     a = a+a.transpose()
                     ....:     evals = numpy.linalg.eigvals(a)
                     ....:     max_evals.append(evals[0].real)
                     ....:
                     ....:
                  <IPython.parallel.AsyncResult object at 0x17af8f0>
                  In [34]: %autopx
                  Auto Parallel Disabled
                  In [35]: dv.block=True
                  In [36]: px ans= "Average max eigenvalue is: %f"%(sum(max_evals)/len(max_evals))
                  Parallel execution on engines: [0, 1, 2, 3]
                  In [37]: dv['ans']
                  Out[37]: [ 'Average max eigenvalue is:  10.1387247332',
-                            'Average max eigenvalue is:  10.2076902286',
-                            'Average max eigenvalue is:  10.1891484655',
-                            'Average max eigenvalue is:  10.1158837784',]
+                    ....:   'Average max eigenvalue is:  10.2076902286',
+                    ....:   'Average max eigenvalue is:  10.1891484655',
+                    ....:   'Average max eigenvalue is:  10.1158837784',]
              Moving Python objects around
              ============================
              In addition to calling functions and executing code on engines, you can
              transfer Python objects to and from your IPython session and the engines. In
              IPython, these operations are called :meth:`push` (sending an object to the
              engines) and :meth:`pull` (getting an object from the engines).
              Basic push and pull
              -------------------
              Here are some examples of how you use :meth:`push` and :meth:`pull`:
              .. sourcecode:: ipython
                  In [38]: dview.push(dict(a=1.03234,b=3453))
                  Out[38]: [None,None,None,None]
                  In [39]: dview.pull('a')
                  Out[39]: [ 1.03234, 1.03234, 1.03234, 1.03234]
                  In [40]: dview.pull('b', targets=0)
                  Out[40]: 3453
                  In [41]: dview.pull(('a','b'))
                  Out[41]: [ [1.03234, 3453], [1.03234, 3453], [1.03234, 3453], [1.03234, 3453] ]
                  In [43]: dview.push(dict(c='speed'))
                  Out[43]: [None,None,None,None]
              In non-blocking mode :meth:`push` and :meth:`pull` also return
              :class:`AsyncResult` objects:
              .. sourcecode:: ipython
                  In [48]: ar = dview.pull('a', block=False)
                  In [49]: ar.get()
                  Out[49]: [1.03234, 1.03234, 1.03234, 1.03234]
              Dictionary interface
              --------------------
              Since a Python namespace is just a :class:`dict`, :class:`DirectView` objects provide
              dictionary-style access by key and methods such as :meth:`get` and
              :meth:`update` for convenience. This make the remote namespaces of the engines
              appear as a local dictionary. Underneath, these methods call :meth:`apply`:
              .. sourcecode:: ipython
                  In [51]: dview['a']=['foo','bar']
                  In [52]: dview['a']
                  Out[52]: [ ['foo', 'bar'], ['foo', 'bar'], ['foo', 'bar'], ['foo', 'bar'] ]
              Scatter and gather
              ------------------
              Sometimes it is useful to partition a sequence and push the partitions to
              different engines. In MPI language, this is know as scatter/gather and we
              follow that terminology. However, it is important to remember that in
              IPython's :class:`Client` class, :meth:`scatter` is from the
              interactive IPython session to the engines and :meth:`gather` is from the
              engines back to the interactive IPython session. For scatter/gather operations
-             between engines, MPI should be used:
+             between engines, MPI, pyzmq, or some other direct interconnect should be used.
              .. sourcecode:: ipython
                  In [58]: dview.scatter('a',range(16))
                  Out[58]: [None,None,None,None]
                  In [59]: dview['a']
                  Out[59]: [ [0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15] ]
                  In [60]: dview.gather('a')
                  Out[60]: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
              Other things to look at
              =======================
              How to do parallel list comprehensions
              --------------------------------------
              In many cases list comprehensions are nicer than using the map function. While
              we don't have fully parallel list comprehensions, it is simple to get the
              basic effect using :meth:`scatter` and :meth:`gather`:
              .. sourcecode:: ipython
                  In [66]: dview.scatter('x',range(64))
                  In [67]: %px y = [i**10 for i in x]
                  Parallel execution on engines: [0, 1, 2, 3]
                  Out[67]:
                  In [68]: y = dview.gather('y')
                  In [69]: print y
                  [0, 1, 1024, 59049, 1048576, 9765625, 60466176, 282475249, 1073741824,...]
              Remote imports
              --------------
              Sometimes you will want to import packages both in your interactive session
              and on your remote engines.  This can be done with the :class:`ContextManager`
              created by a DirectView's :meth:`sync_imports` method:
              .. sourcecode:: ipython
                  In [69]: with dview.sync_imports():
-                     ...:     import numpy
+                    ....:     import numpy
                  importing numpy on engine(s)
              Any imports made inside the block will also be performed on the view's engines.
              sync_imports also takes a `local` boolean flag that defaults to True, which specifies
              whether the local imports should also be performed.  However, support for `local=False`
              has not been implemented, so only packages that can be imported locally will work
              this way.
              You can also specify imports via the ``@require`` decorator.  This is a decorator
              designed for use in Dependencies, but can be used to handle remote imports as well.
              Modules or module names passed to ``@require`` will be imported before the decorated
              function is called.  If they cannot be imported, the decorated function will never
              execution, and will fail with an UnmetDependencyError.
              .. sourcecode:: ipython
                  In [69]: from IPython.parallel import require
                  In [70]: @require('re'):
-                     ...: def findall(pat, x):
-                     ...:     # re is guaranteed to be available
-                     ...:     return re.findall(pat, x)
+                    ....: def findall(pat, x):
+                    ....:     # re is guaranteed to be available
+                    ....:     return re.findall(pat, x)
                  # you can also pass modules themselves, that you already have locally:
                  In [71]: @require(time):
-                     ...: def wait(t):
-                     ...:     time.sleep(t)
-                     ...:     return t
+                    ....: def wait(t):
+                    ....:     time.sleep(t)
+                    ....:     return t
              .. _parallel_exceptions:
              Parallel exceptions
              -------------------
              In the multiengine interface, parallel commands can raise Python exceptions,
              just like serial commands. But, it is a little subtle, because a single
              parallel command can actually raise multiple exceptions (one for each engine
              the command was run on). To express this idea, we have a
              :exc:`CompositeError` exception class that will be raised in most cases. The
              :exc:`CompositeError` class is a special type of exception that wraps one or
              more other types of exceptions. Here is how it works:
              .. sourcecode:: ipython
                  In [76]: dview.block=True
                  In [77]: dview.execute('1/0')
                  ---------------------------------------------------------------------------
                  CompositeError                            Traceback (most recent call last)
                  /home/user/<ipython-input-10-5d56b303a66c> in <module>()
                  ----> 1 dview.execute('1/0')
                  /path/to/site-packages/IPython/parallel/client/view.pyc in execute(self, code, targets, block)
 default: self.block
 """
                  --> 593         return self._really_apply(util._execute, args=(code,), block=block, targets=targets)
 
 def run(self, filename, targets=None, block=None):
                  /home/user/<string> in _really_apply(self, f, args, kwargs, targets, block, track)
                  /path/to/site-packages/IPython/parallel/client/view.pyc in sync_results(f, self, *args, **kwargs)
 def sync_results(f, self, *args, **kwargs):
 """sync relevant results from self.client to our results attribute."""
                  ---> 57     ret = f(self, *args, **kwargs)
 delta = self.outstanding.difference(self.client.outstanding)
 completed = self.outstanding.intersection(delta)
                  /home/user/<string> in _really_apply(self, f, args, kwargs, targets, block, track)
                  /path/to/site-packages/IPython/parallel/client/view.pyc in save_ids(f, self, *args, **kwargs)
 n_previous = len(self.client.history)
 try:
                  ---> 46         ret = f(self, *args, **kwargs)
 finally:
 nmsgs = len(self.client.history) - n_previous
                  /path/to/site-packages/IPython/parallel/client/view.pyc in _really_apply(self, f, args, kwargs, targets, block, track)
 if block:
 try:
                  --> 531                 return ar.get()
 except KeyboardInterrupt:
 pass
                  /path/to/site-packages/IPython/parallel/client/asyncresult.pyc in get(self, timeout)
 return self._result
 else:
                  --> 103                 raise self._exception
 else:
 raise error.TimeoutError("Result not ready.")
                  CompositeError: one or more exceptions from call to method: _execute
                  [0:apply]: ZeroDivisionError: integer division or modulo by zero
                  [1:apply]: ZeroDivisionError: integer division or modulo by zero
                  [2:apply]: ZeroDivisionError: integer division or modulo by zero
                  [3:apply]: ZeroDivisionError: integer division or modulo by zero
              Notice how the error message printed when :exc:`CompositeError` is raised has
              information about the individual exceptions that were raised on each engine.
              If you want, you can even raise one of these original exceptions:
              .. sourcecode:: ipython
                  In [80]: try:
                     ....:     dview.execute('1/0')
                     ....: except parallel.error.CompositeError, e:
                     ....:     e.raise_exception()
                     ....:
                     ....:
                  ---------------------------------------------------------------------------
                  RemoteError                               Traceback (most recent call last)
                  /home/user/<ipython-input-17-8597e7e39858> in <module>()
 dview.execute('1/0')
 except CompositeError as e:
                  ----> 4     e.raise_exception()
                  /path/to/site-packages/IPython/parallel/error.pyc in raise_exception(self, excid)
 raise IndexError("an exception with index %i does not exist"%excid)
 else:
                  --> 268             raise RemoteError(en, ev, etb, ei)
 
 
                  RemoteError: ZeroDivisionError(integer division or modulo by zero)
                  Traceback (most recent call last):
                    File "/path/to/site-packages/IPython/parallel/engine/streamkernel.py", line 330, in apply_request
                      exec code in working,working
                    File "<string>", line 1, in <module>
                    File "/path/to/site-packages/IPython/parallel/util.py", line 354, in _execute
                      exec code in globals()
                    File "<string>", line 1, in <module>
                  ZeroDivisionError: integer division or modulo by zero
              If you are working in IPython, you can simple type ``%debug`` after one of
              these :exc:`CompositeError` exceptions is raised, and inspect the exception
              instance:
              .. sourcecode:: ipython
                  In [81]: dview.execute('1/0')
                  ---------------------------------------------------------------------------
                  CompositeError                            Traceback (most recent call last)
                  /home/user/<ipython-input-10-5d56b303a66c> in <module>()
                  ----> 1 dview.execute('1/0')
                  /path/to/site-packages/IPython/parallel/client/view.pyc in execute(self, code, targets, block)
 default: self.block
 """
                  --> 593         return self._really_apply(util._execute, args=(code,), block=block, targets=targets)
 
 def run(self, filename, targets=None, block=None):
                  /home/user/<string> in _really_apply(self, f, args, kwargs, targets, block, track)
                  /path/to/site-packages/IPython/parallel/client/view.pyc in sync_results(f, self, *args, **kwargs)
 def sync_results(f, self, *args, **kwargs):
 """sync relevant results from self.client to our results attribute."""
                  ---> 57     ret = f(self, *args, **kwargs)
 delta = self.outstanding.difference(self.client.outstanding)
 completed = self.outstanding.intersection(delta)
                  /home/user/<string> in _really_apply(self, f, args, kwargs, targets, block, track)
                  /path/to/site-packages/IPython/parallel/client/view.pyc in save_ids(f, self, *args, **kwargs)
 n_previous = len(self.client.history)
 try:
                  ---> 46         ret = f(self, *args, **kwargs)
 finally:
 nmsgs = len(self.client.history) - n_previous
                  /path/to/site-packages/IPython/parallel/client/view.pyc in _really_apply(self, f, args, kwargs, targets, block, track)
 if block:
 try:
                  --> 531                 return ar.get()
 except KeyboardInterrupt:
 pass
                  /path/to/site-packages/IPython/parallel/client/asyncresult.pyc in get(self, timeout)
 return self._result
 else:
                  --> 103                 raise self._exception
 else:
 raise error.TimeoutError("Result not ready.")
                  CompositeError: one or more exceptions from call to method: _execute
                  [0:apply]: ZeroDivisionError: integer division or modulo by zero
                  [1:apply]: ZeroDivisionError: integer division or modulo by zero
                  [2:apply]: ZeroDivisionError: integer division or modulo by zero
                  [3:apply]: ZeroDivisionError: integer division or modulo by zero
                  In [82]: %debug
                  > /path/to/site-packages/IPython/parallel/client/asyncresult.py(103)get()
 else:
                  --> 103                 raise self._exception
 else:
                  # With the debugger running, self._exception is the exceptions instance.  We can tab complete
                  # on it and see the extra methods that are available.
                  ipdb> self._exception.<tab>
                  e.__class__         e.__getitem__       e.__new__           e.__setstate__      e.args
                  e.__delattr__       e.__getslice__      e.__reduce__        e.__str__           e.elist
                  e.__dict__          e.__hash__          e.__reduce_ex__     e.__weakref__       e.message
                  e.__doc__           e.__init__          e.__repr__          e._get_engine_str   e.print_tracebacks
                  e.__getattribute__  e.__module__        e.__setattr__       e._get_traceback    e.raise_exception
                  ipdb> self._exception.print_tracebacks()
                  [0:apply]:
                  Traceback (most recent call last):
                    File "/path/to/site-packages/IPython/parallel/engine/streamkernel.py", line 330, in apply_request
                      exec code in working,working
                    File "<string>", line 1, in <module>
                    File "/path/to/site-packages/IPython/parallel/util.py", line 354, in _execute
                      exec code in globals()
                    File "<string>", line 1, in <module>
                  ZeroDivisionError: integer division or modulo by zero
                  [1:apply]:
                  Traceback (most recent call last):
                    File "/path/to/site-packages/IPython/parallel/engine/streamkernel.py", line 330, in apply_request
                      exec code in working,working
                    File "<string>", line 1, in <module>
                    File "/path/to/site-packages/IPython/parallel/util.py", line 354, in _execute
                      exec code in globals()
                    File "<string>", line 1, in <module>
                  ZeroDivisionError: integer division or modulo by zero
                  [2:apply]:
                  Traceback (most recent call last):
                    File "/path/to/site-packages/IPython/parallel/engine/streamkernel.py", line 330, in apply_request
                      exec code in working,working
                    File "<string>", line 1, in <module>
                    File "/path/to/site-packages/IPython/parallel/util.py", line 354, in _execute
                      exec code in globals()
                    File "<string>", line 1, in <module>
                  ZeroDivisionError: integer division or modulo by zero
                  [3:apply]:
                  Traceback (most recent call last):
                    File "/path/to/site-packages/IPython/parallel/engine/streamkernel.py", line 330, in apply_request
                      exec code in working,working
                    File "<string>", line 1, in <module>
                    File "/path/to/site-packages/IPython/parallel/util.py", line 354, in _execute
                      exec code in globals()
                    File "<string>", line 1, in <module>
                  ZeroDivisionError: integer division or modulo by zero
              All of this same error handling magic even works in non-blocking mode:
              .. sourcecode:: ipython
                  In [83]: dview.block=False
                  In [84]: ar = dview.execute('1/0')
                  In [85]: ar.get()
                  ---------------------------------------------------------------------------
                  CompositeError                            Traceback (most recent call last)
                  /home/user/<ipython-input-21-8531eb3d26fb> in <module>()
                  ----> 1 ar.get()
                  /path/to/site-packages/IPython/parallel/client/asyncresult.pyc in get(self, timeout)
 return self._result
 else:
                  --> 103                 raise self._exception
 else:
 raise error.TimeoutError("Result not ready.")
                  CompositeError: one or more exceptions from call to method: _execute
                  [0:apply]: ZeroDivisionError: integer division or modulo by zero
                  [1:apply]: ZeroDivisionError: integer division or modulo by zero
                  [2:apply]: ZeroDivisionError: integer division or modulo by zero
                  [3:apply]: ZeroDivisionError: integer division or modulo by zero

docs/source/parallel/parallel_task.txt

0 +35 -22

              .. _parallel_task:
              ==========================
              The IPython task interface
              ==========================
              The task interface to the cluster presents the engines as a fault tolerant,
              dynamic load-balanced system of workers. Unlike the multiengine interface, in
              the task interface the user have no direct access to individual engines. By
              allowing the IPython scheduler to assign work, this interface is simultaneously
              simpler and more powerful.
              Best of all, the user can use both of these interfaces running at the same time
              to take advantage of their respective strengths. When the user can break up
              the user's work into segments that do not depend on previous execution, the
              task interface is ideal. But it also has more power and flexibility, allowing
              the user to guide the distribution of jobs, without having to assign tasks to
              engines explicitly.
              Starting the IPython controller and engines
              ===========================================
              To follow along with this tutorial, you will need to start the IPython
              controller and four IPython engines. The simplest way of doing this is to use
              the :command:`ipcluster` command::
              	$ ipcluster start -n 4
              For more detailed information about starting the controller and engines, see
              our :ref:`introduction <parallel_overview>` to using IPython for parallel computing.
-             Creating a ``Client`` instance
-             ==============================
+             Creating a ``LoadBalancedView`` instance
+             ========================================
              The first step is to import the IPython :mod:`IPython.parallel`
              module and then create a :class:`.Client` instance, and we will also be using
              a :class:`LoadBalancedView`, here called `lview`:
              .. sourcecode:: ipython
                  In [1]: from IPython.parallel import Client
                  In [2]: rc = Client()
              This form assumes that the controller was started on localhost with default
              configuration. If not, the location of the controller must be given as an
              argument to the constructor:
              .. sourcecode:: ipython
                  # for a visible LAN controller listening on an external port:
                  In [2]: rc = Client('tcp://192.168.1.16:10101')
                  # or to connect with a specific profile you have set up:
                  In [3]: rc = Client(profile='mpi')
              For load-balanced execution, we will make use of a :class:`LoadBalancedView` object, which can
              be constructed via the client's :meth:`load_balanced_view` method:
              .. sourcecode:: ipython
                  In [4]: lview = rc.load_balanced_view() # default load-balanced view
              .. seealso::
                  For more information, see the in-depth explanation of :ref:`Views <parallel_details>`.
              Quick and easy parallelism
              ==========================
              In many cases, you simply want to apply a Python function to a sequence of
              objects, but *in parallel*. Like the multiengine interface, these can be
              implemented via the task interface. The exact same tools can perform these
              actions in load-balanced ways as well as multiplexed ways: a parallel version
              of :func:`map` and :func:`@parallel` function decorator. If one specifies the
              argument `balanced=True`, then they are dynamically load balanced. Thus, if the
              execution time per item varies significantly, you should use the versions in
              the task interface.
              Parallel map
              ------------
              To load-balance :meth:`map`,simply use a LoadBalancedView:
              .. sourcecode:: ipython
                  In [62]: lview.block = True
                  In [63]: serial_result = map(lambda x:x**10, range(32))
                  In [64]: parallel_result = lview.map(lambda x:x**10, range(32))
                  In [65]: serial_result==parallel_result
                  Out[65]: True
              Parallel function decorator
              ---------------------------
              Parallel functions are just like normal function, but they can be called on
              sequences and *in parallel*. The multiengine interface provides a decorator
              that turns any Python function into a parallel function:
              .. sourcecode:: ipython
                  In [10]: @lview.parallel()
                     ....: def f(x):
                     ....:     return 10.0*x**4
                     ....:
                  In [11]: f.map(range(32))    # this is done in parallel
                  Out[11]: [0.0,10.0,160.0,...]
              .. _parallel_taskmap:
-             The AsyncMapResult
-             ==================
+             Map results are iterable!
+             -------------------------
+             When an AsyncResult object actually maps multiple results (e.g. the :class:`~AsyncMapResult`
+             object), you can actually iterate through them, and act on the results as they arrive:
+             .. literalinclude:: ../../examples/parallel/itermapresult.py
+                 :language: python
+                 :lines: 9-34
+             .. seealso::
+                 When AsyncResult or the AsyncMapResult don't provide what you need (for instance,
+                 handling individual results as they arrive, but with metadata), you can always
+                 just split the original result's ``msg_ids`` attribute, and handle them as you like.
+                 For an example of this, see :file:`docs/examples/parallel/customresult.py`
-             When you call ``lview.map_async(f, sequence)``, or just :meth:`map` with `block=True`, then
-             what you get in return will be an :class:`~AsyncMapResult` object. These are similar to
-             AsyncResult objects, but with one key difference
              .. _parallel_dependencies:
              Dependencies
              ============
              Often, pure atomic load-balancing is too primitive for your work. In these cases, you
              may want to associate some kind of `Dependency` that describes when, where, or whether
              a task can be run.  In IPython, we provide two types of dependencies:
              `Functional Dependencies`_ and `Graph Dependencies`_
              .. note::
                  It is important to note that the pure ZeroMQ scheduler does not support dependencies,
                  and you will see errors or warnings if you try to use dependencies with the pure
                  scheduler.
              Functional Dependencies
              -----------------------
              Functional dependencies are used to determine whether a given engine is capable of running
              a particular task.  This is implemented via a special :class:`Exception` class,
              :class:`UnmetDependency`, found in `IPython.parallel.error`.  Its use is very simple:
              if a task fails with an UnmetDependency exception, then the scheduler, instead of relaying
              the error up to the client like any other error, catches the error, and submits the task
              to a different engine.  This will repeat indefinitely, and a task will never be submitted
              to a given engine a second time.
              You can manually raise the :class:`UnmetDependency` yourself, but IPython has provided
              some decorators for facilitating this behavior.
              There are two decorators and a class used for functional dependencies:
              .. sourcecode:: ipython
                  In [9]: from IPython.parallel import depend, require, dependent
              @require
              ********
              The simplest sort of dependency is requiring that a Python module is available. The
              ``@require`` decorator lets you define a function that will only run on engines where names
              you specify are importable:
              .. sourcecode:: ipython
                  In [10]: @require('numpy', 'zmq')
-                     ...: def myfunc():
-                     ...:     return dostuff()
+                    ....: def myfunc():
+                    ....:     return dostuff()
              Now, any time you apply :func:`myfunc`, the task will only run on a machine that has
              numpy and pyzmq available, and when :func:`myfunc` is called, numpy and zmq will be imported.
              @depend
              *******
              The ``@depend`` decorator lets you decorate any function with any *other* function to
              evaluate the dependency. The dependency function will be called at the start of the task,
              and if it returns ``False``, then the dependency will be considered unmet, and the task
              will be assigned to another engine. If the dependency returns *anything other than
              ``False``*, the rest of the task will continue.
              .. sourcecode:: ipython
                  In [10]: def platform_specific(plat):
-                     ...:    import sys
-                     ...:    return sys.platform == plat
+                    ....:    import sys
+                    ....:    return sys.platform == plat
                  In [11]: @depend(platform_specific, 'darwin')
-                     ...: def mactask():
-                     ...:    do_mac_stuff()
+                    ....: def mactask():
+                    ....:    do_mac_stuff()
                  In [12]: @depend(platform_specific, 'nt')
-                     ...: def wintask():
-                     ...:    do_windows_stuff()
+                    ....: def wintask():
+                    ....:    do_windows_stuff()
              In this case, any time you apply ``mytask``, it will only run on an OSX machine.
              ``@depend`` is just like ``apply``, in that it has a ``@depend(f,*args,**kwargs)``
              signature.
              dependents
              **********
              You don't have to use the decorators on your tasks, if for instance you may want
              to run tasks with a single function but varying dependencies, you can directly construct
              the :class:`dependent` object that the decorators use:
              .. sourcecode::ipython
                  In [13]: def mytask(*args):
-                     ...:    dostuff()
+                    ....:    dostuff()
                  In [14]: mactask = dependent(mytask, platform_specific, 'darwin')
                  # this is the same as decorating the declaration of mytask with @depend
                  # but you can do it again:
                  In [15]: wintask = dependent(mytask, platform_specific, 'nt')
                  # in general:
                  In [16]: t = dependent(f, g, *dargs, **dkwargs)
                  # is equivalent to:
                  In [17]: @depend(g, *dargs, **dkwargs)
-                     ...: def t(a,b,c):
-                     ...:     # contents of f
+                    ....: def t(a,b,c):
+                    ....:     # contents of f
              Graph Dependencies
              ------------------
              Sometimes you want to restrict the time and/or location to run a given task as a function
              of the time and/or location of other tasks. This is implemented via a subclass of
              :class:`set`, called a :class:`Dependency`. A Dependency is just a set of `msg_ids`
              corresponding to tasks, and a few attributes to guide how to decide when the Dependency
              has been met.
              The switches we provide for interpreting whether a given dependency set has been met:
              any|all
                  Whether the dependency is considered met if *any* of the dependencies are done, or
                  only after *all* of them have finished.  This is set by a Dependency's :attr:`all`
                  boolean attribute, which defaults to ``True``.
              success [default: True]
                  Whether to consider tasks that succeeded as fulfilling dependencies.
              failure [default : False]
                  Whether to consider tasks that failed as fulfilling dependencies.
                  using `failure=True,success=False` is useful for setting up cleanup tasks, to be run
                  only when tasks have failed.
              Sometimes you want to run a task after another, but only if that task succeeded. In this case,
              ``success`` should be ``True`` and ``failure`` should be ``False``. However sometimes you may
              not care whether the task succeeds, and always want the second task to run, in which case you
              should use `success=failure=True`. The default behavior is to only use successes.
              There are other switches for interpretation that are made at the *task* level.  These are
              specified via keyword arguments to the client's :meth:`apply` method.
              after,follow
                  You may want to run a task *after* a given set of dependencies have been run and/or
                  run it *where* another set of dependencies are met. To support this, every task has an
                  `after` dependency to restrict time, and a `follow` dependency to restrict
                  destination.
              timeout
                  You may also want to set a time-limit for how long the scheduler should wait before a
                  task's dependencies are met. This is done via a `timeout`, which defaults to 0, which
                  indicates that the task should never timeout. If the timeout is reached, and the
                  scheduler still hasn't been able to assign the task to an engine, the task will fail
                  with a :class:`DependencyTimeout`.
              .. note::
                  Dependencies only work within the task scheduler. You cannot instruct a load-balanced
                  task to run after a job submitted via the MUX interface.
              The simplest form of Dependencies is with `all=True,success=True,failure=False`. In these cases,
              you can skip using Dependency objects, and just pass msg_ids or AsyncResult objects as the
              `follow` and `after` keywords to :meth:`client.apply`:
              .. sourcecode:: ipython
                  In [14]: client.block=False
                  In [15]: ar = lview.apply(f, args, kwargs)
                  In [16]: ar2 = lview.apply(f2)
-                 In [17]: ar3 = lview.apply_with_flags(f3, after=[ar,ar2])
-                 In [17]: ar4 = lview.apply_with_flags(f3, follow=[ar], timeout=2.5)
+                 In [17]: with lview.temp_flags(after=[ar,ar2]):
+                    ....:    ar3 = lview.apply(f3)
+                 In [18]: with lview.temp_flags(follow=[ar], timeout=2.5)
+                    ....:    ar4 = lview.apply(f3)
              .. seealso::
                  Some parallel workloads can be described as a `Directed Acyclic Graph
                  <http://en.wikipedia.org/wiki/Directed_acyclic_graph>`_, or DAG. See :ref:`DAG
                  Dependencies <dag_dependencies>` for an example demonstrating how to use map a NetworkX DAG
                  onto task dependencies.
              Impossible Dependencies
              ***********************
              The schedulers do perform some analysis on graph dependencies to determine whether they
              are not possible to be met. If the scheduler does discover that a dependency cannot be
              met, then the task will fail with an :class:`ImpossibleDependency` error. This way, if the
              scheduler realized that a task can never be run, it won't sit indefinitely in the
              scheduler clogging the pipeline.
              The basic cases that are checked:
              * depending on nonexistent messages
              * `follow` dependencies were run on more than one machine and `all=True`
              * any dependencies failed and `all=True,success=True,failures=False`
              * all dependencies failed and `all=False,success=True,failure=False`
              .. warning::
                  This analysis has not been proven to be rigorous, so it is likely possible for tasks
                  to become impossible to run in obscure situations, so a timeout may be a good choice.
              Retries and Resubmit
              ====================
              Retries
              -------
              Another flag for tasks is `retries`.  This is an integer, specifying how many times
              a task should be resubmitted after failure.  This is useful for tasks that should still run
              if their engine was shutdown, or may have some statistical chance of failing.  The default
              is to not retry tasks.
              Resubmit
              --------
              Sometimes you may want to re-run a task. This could be because it failed for some reason, and
              you have fixed the error, or because you want to restore the cluster to an interrupted state.
              For this, the :class:`Client` has a :meth:`rc.resubmit` method.  This simply takes one or more
              msg_ids, and returns an :class:`AsyncHubResult` for the result(s).  You cannot resubmit
              a task that is pending - only those that have finished, either successful or unsuccessful.
              .. _parallel_schedulers:
              Schedulers
              ==========
              There are a variety of valid ways to determine where jobs should be assigned in a
              load-balancing situation.  In IPython, we support several standard schemes, and
              even make it easy to define your own.  The scheme can be selected via the ``scheme``
              argument to :command:`ipcontroller`, or in the :attr:`TaskScheduler.schemename` attribute
              of a controller config object.
              The built-in routing schemes:
              To select one of these schemes, simply do::
                  $ ipcontroller --scheme=<schemename>
                  for instance:
                  $ ipcontroller --scheme=lru
              lru: Least Recently Used
                  Always assign work to the least-recently-used engine.  A close relative of
                  round-robin, it will be fair with respect to the number of tasks, agnostic
                  with respect to runtime of each task.
              plainrandom: Plain Random
                  Randomly picks an engine on which to run.
              twobin: Two-Bin Random
                  **Requires numpy**
                  Pick two engines at random, and use the LRU of the two. This is known to be better
                  than plain random in many cases, but requires a small amount of computation.
              leastload: Least Load
                  **This is the default scheme**
                  Always assign tasks to the engine with the fewest outstanding tasks (LRU breaks tie).
              weighted: Weighted Two-Bin Random
                  **Requires numpy**
                  Pick two engines at random using the number of outstanding tasks as inverse weights,
                  and use the one with the lower load.
              Pure ZMQ Scheduler
              ------------------
              For maximum throughput, the 'pure' scheme is not Python at all, but a C-level
              :class:`MonitoredQueue` from PyZMQ, which uses a ZeroMQ ``DEALER`` socket to perform all
              load-balancing. This scheduler does not support any of the advanced features of the Python
              :class:`.Scheduler`.
              Disabled features when using the ZMQ Scheduler:
              * Engine unregistration
                  Task farming will be disabled if an engine unregisters.
                  Further, if an engine is unregistered during computation, the scheduler may not recover.
              * Dependencies
                  Since there is no Python logic inside the Scheduler, routing decisions cannot be made
                  based on message content.
              * Early destination notification
                  The Python schedulers know which engine gets which task, and notify the Hub.  This
                  allows graceful handling of Engines coming and going.  There is no way to know
                  where ZeroMQ messages have gone, so there is no way to know what tasks are on which
                  engine until they *finish*.  This makes recovery from engine shutdown very difficult.
              .. note::
                  TODO: performance comparisons
              More details
              ============
              The :class:`LoadBalancedView` has many more powerful features that allow quite a bit
              of flexibility in how tasks are defined and run. The next places to look are
              in the following classes:
              * :class:`~IPython.parallel.client.view.LoadBalancedView`
              * :class:`~IPython.parallel.client.asyncresult.AsyncResult`
              * :meth:`~IPython.parallel.client.view.LoadBalancedView.apply`
              * :mod:`~IPython.parallel.controller.dependency`
              The following is an overview of how to use these classes together:
 . Create a :class:`Client` and :class:`LoadBalancedView`
 . Define some functions to be run as tasks
 . Submit your tasks to using the :meth:`apply` method of your
                 :class:`LoadBalancedView` instance.
-. Use :meth:`Client.get_result` to get the results of the
+. Use :meth:`.Client.get_result` to get the results of the
                 tasks, or use the :meth:`AsyncResult.get` method of the results to wait
                 for and then receive the results.
              .. seealso::
                  A demo of :ref:`DAG Dependencies <dag_dependencies>` with NetworkX and IPython.

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages