##// END OF EJS Templates
Beginning to organize the rst documentation.
Brian E Granger -
Show More
@@ -0,0 +1,70 b''
1 # Makefile for Sphinx documentation
2 #
3
4 # You can set these variables from the command line.
5 SPHINXOPTS =
6 SPHINXBUILD = sphinx-build
7 PAPER =
8
9 # Internal variables.
10 PAPEROPT_a4 = -D latex_paper_size=a4
11 PAPEROPT_letter = -D latex_paper_size=letter
12 ALLSPHINXOPTS = -d build/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
13
14 .PHONY: help clean html web pickle htmlhelp latex changes linkcheck
15
16 help:
17 @echo "Please use \`make <target>' where <target> is one of"
18 @echo " html to make standalone HTML files"
19 @echo " pickle to make pickle files (usable by e.g. sphinx-web)"
20 @echo " htmlhelp to make HTML files and a HTML help project"
21 @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
22 @echo " changes to make an overview over all changed/added/deprecated items"
23 @echo " linkcheck to check all external links for integrity"
24
25 clean:
26 -rm -rf build/*
27
28 html:
29 mkdir -p build/html build/doctrees
30 $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) build/html
31 @echo
32 @echo "Build finished. The HTML pages are in build/html."
33
34 pickle:
35 mkdir -p build/pickle build/doctrees
36 $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) build/pickle
37 @echo
38 @echo "Build finished; now you can process the pickle files or run"
39 @echo " sphinx-web build/pickle"
40 @echo "to start the sphinx-web server."
41
42 web: pickle
43
44 htmlhelp:
45 mkdir -p build/htmlhelp build/doctrees
46 $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) build/htmlhelp
47 @echo
48 @echo "Build finished; now you can run HTML Help Workshop with the" \
49 ".hhp project file in build/htmlhelp."
50
51 latex:
52 mkdir -p build/latex build/doctrees
53 $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) build/latex
54 @echo
55 @echo "Build finished; the LaTeX files are in build/latex."
56 @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \
57 "run these through (pdf)latex."
58
59 changes:
60 mkdir -p build/changes build/doctrees
61 $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) build/changes
62 @echo
63 @echo "The overview file is in build/changes."
64
65 linkcheck:
66 mkdir -p build/linkcheck build/doctrees
67 $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) build/linkcheck
68 @echo
69 @echo "Link check complete; look for any errors in the above output " \
70 "or in build/linkcheck/output.txt."
@@ -0,0 +1,104 b''
1 ===================
2 Changes in IPython
3 ===================
4
5 .. contents::
6
7 Release 0.3
8 ===========
9
10 New features
11 ------------
12
13 * Much improved ``setup.py`` and ``setupegg.py`` scripts. Because Twisted
14 and zope.interface are now easy installable, we can declare them as dependencies
15 in our setupegg.py script.
16 * IPython is now compatible with Twisted 2.5.0 and 8.x.
17 * Added a new example of how to use :mod:`ipython1.kernel.asynclient`.
18 * Initial draft of a process daemon in :mod:`ipython1.daemon`.
19 * The ``TaskController`` now has methods for getting the queue status.
20 * The ``TaskResult`` objects not have information about how long the task
21 took to run.
22 * We are attaching additional attributes to exceptions ``(_ipython_*)`` that
23 we use to carry additional info around.
24 * New top-level module :mod:`asynclient` that has asynchronous versions (that
25 return deferreds) of the client classes. This is designed to users who want
26 to run their own Twisted reactor
27 * All the clients in :mod:`client` are now based on Twisted. This is done by
28 running the Twisted reactor in a separate thread and using the
29 :func:`blockingCallFromThread` function that is in recent versions of Twisted.
30 * Functions can now be pushed/pulled to/from engines using
31 :meth:`MultiEngineClient.push_function` and :meth:`MultiEngineClient.pull_function`.
32 * Gather/scatter are now implemented in the client to reduce the work load
33 of the controller and improve performance.
34 * Complete rewrite of the IPython docuementation. All of the documentation
35 from the IPython website has been moved into docs/source as restructured
36 text documents. PDF and HTML documentation are being generated using
37 Sphinx.
38 * New developer oriented documentation: development guidelines and roadmap.
39 * Traditional ``ChangeLog`` has been changed to a more useful ``changes.txt`` file
40 that is organized by release and is meant to provide something more relevant
41 for users.
42
43 Bug fixes
44 ---------
45
46 * Created a proper ``MANIFEST.in`` file to create source distributions.
47 * Fixed a bug in the ``MultiEngine`` interface. Previously, multi-engine
48 actions were being collected with a :class:`DeferredList` with
49 ``fireononeerrback=1``. This meant that methods were returning
50 before all engines had given their results. This was causing extremely odd
51 bugs in certain cases. To fix this problem, we have 1) set
52 ``fireononeerrback=0`` to make sure all results (or exceptions) are in
53 before returning and 2) introduced a :exc:`CompositeError` exception
54 that wraps all of the engine exceptions. This is a huge change as it means
55 that users will have to catch :exc:`CompositeError` rather than the actual
56 exception.
57
58 Backwards incompatible changes
59 ------------------------------
60
61 * All names have been renamed to conform to the lowercase_with_underscore
62 convention. This will require users to change references to all names like
63 ``queueStatus`` to ``queue_status``.
64 * Previously, methods like :meth:`MultiEngineClient.push` and
65 :meth:`MultiEngineClient.push` used ``*args`` and ``**kwargs``. This was
66 becoming a problem as we weren't able to introduce new keyword arguments into
67 the API. Now these methods simple take a dict or sequence. This has also allowed
68 us to get rid of the ``*All`` methods like :meth:`pushAll` and :meth:`pullAll`.
69 These things are now handled with the ``targets`` keyword argument that defaults
70 to ``'all'``.
71 * The :attr:`MultiEngineClient.magicTargets` has been renamed to
72 :attr:`MultiEngineClient.targets`.
73 * All methods in the MultiEngine interface now accept the optional keyword argument
74 ``block``.
75 * Renamed :class:`RemoteController` to :class:`MultiEngineClient` and
76 :class:`TaskController` to :class:`TaskClient`.
77 * Renamed the top-level module from :mod:`api` to :mod:`client`.
78 * Most methods in the multiengine interface now raise a :exc:`CompositeError` exception
79 that wraps the user's exceptions, rather than just raising the raw user's exception.
80 * Changed the ``setupNS`` and ``resultNames`` in the ``Task`` class to ``push``
81 and ``pull``.
82
83 Version 0.8.2
84 =============
85
86 Changes made since version 0.8.1 was released:
87
88 * %pushd/%popd behave differently; now "pushd /foo" pushes CURRENT directory
89 and jumps to /foo. The current behaviour is closer to the documented
90 behaviour, and should not trip anyone.
91
92 Version 0.8.3
93 =============
94
95 * pydb is now disabled by default (due to %run -d problems). You can enable
96 it by passing -pydb command line argument to IPython. Note that setting
97 it in config file won't work.
98
99 Releases prior to 0.3
100 =====================
101
102 Changes prior to version 0.3 of IPython are described in the older file ``ChangeLog``.
103 Please refer to this document for details.
104
@@ -0,0 +1,7 b''
1 IPython Documentation
2 =====================
3
4 .. toctree::
5 :maxdepth: 2
6
7 ipython.txt No newline at end of file
@@ -0,0 +1,315 b''
1 .. _development:
2
3 ==================================
4 IPython development guidelines
5 ==================================
6
7 .. contents::
8 ..
9 1 Overview
10 2 Project organization
11 2.1 Subpackages
12 2.2 Installation and dependencies
13 2.3 Specific subpackages
14 3 Version control
15 4 Documentation
16 4.1 Standalone documentation
17 4.2 Docstring format
18 5 Coding conventions
19 5.1 General
20 5.2 Naming conventions
21 6 Testing
22 7 Configuration
23 ..
24
25
26 Overview
27 ========
28
29 IPython is the next generation of IPython. It is named such for two reasons:
30
31 - Eventually, IPython will become IPython version 1.0.
32 - This new code base needs to be able to co-exist with the existing IPython until
33 it is a full replacement for it. Thus we needed a different name. We couldn't
34 use ``ipython`` (lowercase) as some files systems are case insensitive.
35
36 There are two, no three, main goals of the IPython effort:
37
38 1. Clean up the existing codebase and write lots of tests.
39 2. Separate the core functionality of IPython from the terminal to enable IPython
40 to be used from within a variety of GUI applications.
41 3. Implement a system for interactive parallel computing.
42
43 While the third goal may seem a bit unrelated to the main focus of IPython, it turns
44 out that the technologies required for this goal are nearly identical with those
45 required for goal two. This is the main reason the interactive parallel computing
46 capabilities are being put into IPython proper. Currently the third of these goals is
47 furthest along.
48
49 This document describes IPython from the perspective of developers.
50
51
52 Project organization
53 ====================
54
55 Subpackages
56 -----------
57
58 IPython is organized into semi self-contained subpackages. Each of the subpackages will have its own:
59
60 - **Dependencies**. One of the most important things to keep in mind in
61 partitioning code amongst subpackages, is that they should be used to cleanly
62 encapsulate dependencies.
63 - **Tests**. Each subpackage shoud have its own ``tests`` subdirectory that
64 contains all of the tests for that package. For information about writing tests
65 for IPython, see the `Testing System`_ section of this document.
66 - **Configuration**. Each subpackage should have its own ``config`` subdirectory
67 that contains the configuration information for the components of the
68 subpackage. For information about how the IPython configuration system
69 works, see the `Configuration System`_ section of this document.
70 - **Scripts**. Each subpackage should have its own ``scripts`` subdirectory that
71 contains all of the command line scripts associated with the subpackage.
72
73 Installation and dependencies
74 -----------------------------
75
76 IPython will not use `setuptools`_ for installation. Instead, we will use standard
77 ``setup.py`` scripts that use `distutils`_. While there are a number a extremely nice
78 features that `setuptools`_ has (like namespace packages), the current implementation
79 of `setuptools`_ has performance problems, particularly on shared file systems. In
80 particular, when Python packages are installed on NSF file systems, import times
81 become much too long (up towards 10 seconds).
82
83 Because IPython is being used extensively in the context of high performance
84 computing, where performance is critical but shared file systems are common, we feel
85 these performance hits are not acceptable. Thus, until the performance problems
86 associated with `setuptools`_ are addressed, we will stick with plain `distutils`_. We
87 are hopeful that these problems will be addressed and that we will eventually begin
88 using `setuptools`_. Because of this, we are trying to organize IPython in a way that
89 will make the eventual transition to `setuptools`_ as painless as possible.
90
91 Because we will be using `distutils`_, there will be no method for automatically installing dependencies. Instead, we are following the approach of `Matplotlib`_ which can be summarized as follows:
92
93 - Distinguish between required and optional dependencies. However, the required
94 dependencies for IPython should be only the Python standard library.
95 - Upon installation check to see which optional dependencies are present and tell
96 the user which parts of IPython need which optional dependencies.
97
98 It is absolutely critical that each subpackage of IPython has a clearly specified set
99 of dependencies and that dependencies are not carelessly inherited from other IPython
100 subpackages. Furthermore, tests that have certain dependencies should not fail if
101 those dependencies are not present. Instead they should be skipped and print a
102 message.
103
104 .. _setuptools: http://peak.telecommunity.com/DevCenter/setuptools
105 .. _distutils: http://docs.python.org/lib/module-distutils.html
106 .. _Matplotlib: http://matplotlib.sourceforge.net/
107
108 Specific subpackages
109 --------------------
110
111 ``core``
112 This is the core functionality of IPython that is independent of the
113 terminal, network and GUIs. Most of the code that is in the current
114 IPython trunk will be refactored, cleaned up and moved here.
115
116 ``kernel``
117 The enables the IPython core to be expose to a the network. This is
118 also where all of the parallel computing capabilities are to be found.
119
120 ``config``
121 The configuration package used by IPython.
122
123 ``frontends``
124 The various frontends for IPython. A frontend is the end-user application
125 that exposes the capabilities of IPython to the user. The most basic frontend
126 will simply be a terminal based application that looks just like today 's
127 IPython. Other frontends will likely be more powerful and based on GUI toolkits.
128
129 ``notebook``
130 An application that allows users to work with IPython notebooks.
131
132 ``tools``
133 This is where general utilities go.
134
135
136 Version control
137 ===============
138
139 In the past, IPython development has been done using `Subversion`__. We are currently trying out `Bazaar`__ and `Launchpad`__.
140
141 .. __: http://subversion.tigris.org/
142 .. __: http://bazaar-vcs.org/
143 .. __: http://www.launchpad.net/ipython
144
145 Documentation
146 =============
147
148 Standalone documentation
149 ------------------------
150
151 All standalone documentation should be written in plain text (``.txt``) files using
152 `reStructuredText`_ for markup and formatting. All such documentation should be placed
153 in the top level directory ``docs`` of the IPython source tree. Or, when appropriate,
154 a suitably named subdirectory should be used. The documentation in this location will
155 serve as the main source for IPython documentation and all existing documentation
156 should be converted to this format.
157
158 In the future, the text files in the ``docs`` directory will be used to generate all
159 forms of documentation for IPython. This include documentation on the IPython website
160 as well as *pdf* documentation.
161
162 .. _reStructuredText: http://docutils.sourceforge.net/rst.html
163
164 Docstring format
165 ----------------
166
167 Good docstrings are very important. All new code will use `Epydoc`_ for generating API
168 docs, so we will follow the `Epydoc`_ conventions. More specifically, we will use
169 `reStructuredText`_ for markup and formatting, since it is understood by a wide
170 variety of tools. This means that if in the future we have any reason to change from
171 `Epydoc`_ to something else, we'll have fewer transition pains.
172
173 Details about using `reStructuredText`_ for docstrings can be found `here
174 <http://epydoc.sourceforge.net/manual-othermarkup.html>`_.
175
176 .. _Epydoc: http://epydoc.sourceforge.net/
177
178 Additional PEPs of interest regarding documentation of code:
179
180 - `Docstring Conventions <http://www.python.org/peps/pep-0257.html>`_
181 - `Docstring Processing System Framework <http://www.python.org/peps/pep-0256.html>`_
182 - `Docutils Design Specification <http://www.python.org/peps/pep-0258.html>`_
183
184
185 Coding conventions
186 ==================
187
188 General
189 -------
190
191 In general, we'll try to follow the standard Python style conventions as described here:
192
193 - `Style Guide for Python Code <http://www.python.org/peps/pep-0008.html>`_
194
195
196 Other comments:
197
198 - In a large file, top level classes and functions should be
199 separated by 2-3 lines to make it easier to separate them visually.
200 - Use 4 spaces for indentation.
201 - Keep the ordering of methods the same in classes that have the same
202 methods. This is particularly true for classes that implement
203 similar interfaces and for interfaces that are similar.
204
205 Naming conventions
206 ------------------
207
208 In terms of naming conventions, we'll follow the guidelines from the `Style Guide for
209 Python Code`_.
210
211 For all new IPython code (and much existing code is being refactored), we'll use:
212
213 - All ``lowercase`` module names.
214
215 - ``CamelCase`` for class names.
216
217 - ``lowercase_with_underscores`` for methods, functions, variables and attributes.
218
219 This may be confusing as most of the existing IPython codebase uses a different convention (``lowerCamelCase`` for methods and attributes). Slowly, we will move IPython over to the new
220 convention, providing shadow names for backward compatibility in public interfaces.
221
222 There are, however, some important exceptions to these rules. In some cases, IPython
223 code will interface with packages (Twisted, Wx, Qt) that use other conventions. At some level this makes it impossible to adhere to our own standards at all times. In particular, when subclassing classes that use other naming conventions, you must follow their naming conventions. To deal with cases like this, we propose the following policy:
224
225 - If you are subclassing a class that uses different conventions, use its
226 naming conventions throughout your subclass. Thus, if you are creating a
227 Twisted Protocol class, used Twisted's ``namingSchemeForMethodsAndAttributes.``
228
229 - All IPython's official interfaces should use our conventions. In some cases
230 this will mean that you need to provide shadow names (first implement ``fooBar``
231 and then ``foo_bar = fooBar``). We want to avoid this at all costs, but it
232 will probably be necessary at times. But, please use this sparingly!
233
234 Implementation-specific *private* methods will use ``_single_underscore_prefix``.
235 Names with a leading double underscore will *only* be used in special cases, as they
236 makes subclassing difficult (such names are not easily seen by child classes).
237
238 Occasionally some run-in lowercase names are used, but mostly for very short names or
239 where we are implementing methods very similar to existing ones in a base class (like
240 ``runlines()`` where ``runsource()`` and ``runcode()`` had established precedent).
241
242 The old IPython codebase has a big mix of classes and modules prefixed with an
243 explicit ``IP``. In Python this is mostly unnecessary, redundant and frowned upon, as
244 namespaces offer cleaner prefixing. The only case where this approach is justified is
245 for classes which are expected to be imported into external namespaces and a very
246 generic name (like Shell) is too likely to clash with something else. We'll need to
247 revisit this issue as we clean up and refactor the code, but in general we should
248 remove as many unnecessary ``IP``/``ip`` prefixes as possible. However, if a prefix
249 seems absolutely necessary the more specific ``IPY`` or ``ipy`` are preferred.
250
251 .. _devel_testing:
252
253 Testing system
254 ==============
255
256 It is extremely important that all code contributed to IPython has tests. Tests should
257 be written as unittests, doctests or as entities that the `Nose`_ testing package will
258 find. Regardless of how the tests are written, we will use `Nose`_ for discovering and
259 running the tests. `Nose`_ will be required to run the IPython test suite, but will
260 not be required to simply use IPython.
261
262 .. _Nose: http://code.google.com/p/python-nose/
263
264 Tests of `Twisted`__ using code should be written by subclassing the ``TestCase`` class
265 that comes with ``twisted.trial.unittest``. When this is done, `Nose`_ will be able to
266 run the tests and the twisted reactor will be handled correctly.
267
268 .. __: http://www.twistedmatrix.com
269
270 Each subpackage in IPython should have its own ``tests`` directory that contains all
271 of the tests for that subpackage. This allows each subpackage to be self-contained. If
272 a subpackage has any dependencies beyond the Python standard library, the tests for
273 that subpackage should be skipped if the dependencies are not found. This is very
274 important so users don't get tests failing simply because they don't have dependencies.
275
276 We also need to look into use Noses ability to tag tests to allow a more modular
277 approach of running tests.
278
279 .. _devel_config:
280
281 Configuration system
282 ====================
283
284 IPython uses `.ini`_ files for configuration purposes. This represents a huge
285 improvement over the configuration system used in IPython. IPython works with these
286 files using the `ConfigObj`_ package, which IPython includes as
287 ``ipython1/external/configobj.py``.
288
289 Currently, we are using raw `ConfigObj`_ objects themselves. Each subpackage of IPython
290 should contain a ``config`` subdirectory that contains all of the configuration
291 information for the subpackage. To see how configuration information is defined (along
292 with defaults) see at the examples in ``ipython1/kernel/config`` and
293 ``ipython1/core/config``. Likewise, to see how the configuration information is used,
294 see examples in ``ipython1/kernel/scripts/ipengine.py``.
295
296 Eventually, we will add a new layer on top of the raw `ConfigObj`_ objects. We are
297 calling this new layer, ``tconfig``, as it will use a `Traits`_-like validation model.
298 We won't actually use `Traits`_, but will implement something similar in pure Python.
299 But, even in this new system, we will still use `ConfigObj`_ and `.ini`_ files
300 underneath the hood. Talk to Fernando if you are interested in working on this part of
301 IPython. The current prototype of ``tconfig`` is located in the IPython sandbox.
302
303 .. _.ini: http://docs.python.org/lib/module-ConfigParser.html
304 .. _ConfigObj: http://www.voidspace.org.uk/python/configobj.html
305 .. _Traits: http://code.enthought.com/traits/
306
307
308
309
310
311
312
313
314
315
@@ -0,0 +1,8 b''
1 Developing IPython
2 ==================
3
4 .. toctree::
5 :maxdepth: 2
6
7 development.txt
8 roadmap.txt
@@ -0,0 +1,96 b''
1 .. _roadmap:
2
3 ===================
4 Development roadmap
5 ===================
6
7 .. contents::
8
9 IPython is an ambitious project that is still under heavy development. However, we want IPython to become useful to as many people as possible, as quickly as possible. To help us accomplish this, we are laying out a roadmap of where we are headed and what needs to happen to get there. Hopefully, this will help the IPython developers figure out the best things to work on for each upcoming release.
10
11 Speaking of releases, we are going to begin releasing a new version of IPython every four weeks. We are hoping that a regular release schedule, along with a clear roadmap of where we are headed will propel the project forward.
12
13 Where are we headed
14 ===================
15
16 Our goal with IPython is simple: to provide a *powerful*, *robust* and *easy to use* framework for parallel computing. While there are other secondary goals you will hear us talking about at various times, this is the primary goal of IPython that frames the roadmap.
17
18 Steps along the way
19 ===================
20
21 Here we describe the various things that we need to work on to accomplish this goal.
22
23 Setting up for regular release schedule
24 ---------------------------------------
25
26 We would like to begin to release IPython regularly (probably a 4 week release cycle). To get ready for this, we need to revisit the development guidelines and put in information about releasing IPython.
27
28 Process startup and management
29 ------------------------------
30
31 IPython is implemented using a distributed set of processes that communicate using TCP/IP network channels. Currently, users have to start each of the various processes separately using command line scripts. This is both difficult and error prone. Furthermore, there are a number of things that often need to be managed once the processes have been started, such as the sending of signals and the shutting down and cleaning up of processes.
32
33 We need to build a system that makes it trivial for users to start and manage IPython processes. This system should have the following properties:
34
35 * It should possible to do everything through an extremely simple API that users
36 can call from their own Python script. No shell commands should be needed.
37 * This simple API should be configured using standard .ini files.
38 * The system should make it possible to start processes using a number of different
39 approaches: SSH, PBS/Torque, Xgrid, Windows Server, mpirun, etc.
40 * The controller and engine processes should each have a daemon for monitoring,
41 signaling and clean up.
42 * The system should be secure.
43 * The system should work under all the major operating systems, including
44 Windows.
45
46 Initial work has begun on the daemon infrastructure, and some of the needed logic is contained in the ipcluster script.
47
48 Ease of use/high-level approaches to parallelism
49 ------------------------------------------------
50
51 While our current API for clients is well designed, we can still do a lot better in designing a user-facing API that is super simple. The main goal here is that it should take *almost no extra code* for users to get their code running in parallel. For this to be possible, we need to tie into Python's standard idioms that enable efficient coding. The biggest ones we are looking at are using context managers (i.e., Python 2.5's ``with`` statement) and decorators. Initial work on this front has begun, but more work is needed.
52
53 We also need to think about new models for expressing parallelism. This is fun work as most of the foundation has already been established.
54
55 Security
56 --------
57
58 Currently, IPython has no built in security or security model. Because we would like IPython to be usable on public computer systems and over wide area networks, we need to come up with a robust solution for security. Here are some of the specific things that need to be included:
59
60 * User authentication between all processes (engines, controller and clients).
61 * Optional TSL/SSL based encryption of all communication channels.
62 * A good way of picking network ports so multiple users on the same system can
63 run their own controller and engines without interfering with those of others.
64 * A clear model for security that enables users to evaluate the security risks
65 associated with using IPython in various manners.
66
67 For the implementation of this, we plan on using Twisted's support for SSL and authentication. One things that we really should look at is the `Foolscap`_ network protocol, which provides many of these things out of the box.
68
69 .. _Foolscap: http://foolscap.lothar.com/trac
70
71 The security work needs to be done in conjunction with other network protocol stuff.
72
73 Latent performance issues
74 -------------------------
75
76 Currently, we have a number of performance issues that are waiting to bite users:
77
78 * The controller store a large amount of state in Python dictionaries. Under heavy
79 usage, these dicts with get very large, causing memory usage problems. We need to
80 develop more scalable solutions to this problem, such as using a sqlite database
81 to store this state. This will also help the controller to be more fault tolerant.
82 * Currently, the client to controller connections are done through XML-RPC using
83 HTTP 1.0. This is very inefficient as XML-RPC is a very verbose protocol and
84 each request must be handled with a new connection. We need to move these network
85 connections over to PB or Foolscap.
86 * We currently don't have a good way of handling large objects in the controller.
87 The biggest problem is that because we don't have any way of streaming objects,
88 we get lots of temporary copies in the low-level buffers. We need to implement
89 a better serialization approach and true streaming support.
90 * The controller currently unpickles and repickles objects. We need to use the
91 [push|pull]_serialized methods instead.
92 * Currently the controller is a bottleneck. We need the ability to scale the
93 controller by aggregating multiple controllers into one effective controller.
94
95
96
1 NO CONTENT: new file 100644
@@ -0,0 +1,16 b''
1 IPython Documentation
2 =====================
3
4 .. toctree::
5 :maxdepth: 1
6
7 core/index.txt
8 dev/index.txt
9 kernel/index.txt
10
11 Indices and tables
12 ==================
13
14 * :ref:`genindex`
15 * :ref:`modindex`
16 * :ref:`search` No newline at end of file
@@ -0,0 +1,16 b''
1 IPython kernel documentation
2 ============================
3
4 User Documentation
5 ------------------
6
7 .. toctree::
8 :maxdepth: 2
9
10 install.txt
11 parallel_intro.txt
12 parallel_multiengine.txt
13 parallel_task.txt
14 parallel_mpi.txt
15 changes.txt
16 faq.txt
@@ -0,0 +1,16 b''
1 IPython Documentation
2 =====================
3
4 .. toctree::
5 :maxdepth: 1
6
7 core/index.txt
8 dev/index.txt
9 kernel/index.txt
10
11 Indices and tables
12 ==================
13
14 * :ref:`genindex`
15 * :ref:`modindex`
16 * :ref:`search` No newline at end of file
1 NO CONTENT: new file 100644
@@ -0,0 +1,115 b''
1 .. _faq:
2
3 ================
4 FAQ for IPython
5 ================
6
7 General questions
8 =================
9
10 What is the difference between IPython and IPython?
11 ----------------------------------------------------
12
13 IPython is the next generation of IPython. It is being created with three main goals in
14 mind:
15
16 1. Clean up the existing codebase and write lots of tests.
17 2. Separate the core functionality of IPython from the terminal to enable IPython
18 to be used from within a variety of GUI applications.
19 3. Implement a system for interactive parallel computing.
20
21 Currently, IPython is not a full replacement for IPython and until that happens,
22 IPython will be developed as a separate project. IPython currently provides a stable
23 and powerful architecture for parallel computing that can be used with IPython or even
24 the default Python shell. For more information, see our `introduction to parallel
25 computing with IPython`__.
26
27 .. __: ./parallel_intro
28
29 What is the history of IPython?
30 --------------------------------
31
32 Questions about parallel computing with IPython
33 ================================================
34
35 Will IPython speed my Python code up?
36 --------------------------------------
37
38 Yes and no. When converting a serial code to run in parallel, there often many
39 difficulty questions that need to be answered, such as:
40
41 * How should data be decomposed onto the set of processors?
42 * What are the data movement patterns?
43 * Can the algorithm be structured to minimize data movement?
44 * Is dynamic load balancing important?
45
46 We can't answer such questions for you. This is the hard (but fun) work of parallel
47 computing. But, once you understand these things IPython will make it easier for you to
48 implement a good solution quickly. Most importantly, you will be able to use the
49 resulting parallel code interactively.
50
51 With that said, if your problem is trivial to parallelize, IPython has a number of
52 different interfaces that will enable you to parallelize things is almost no time at
53 all. A good place to start is the ``map`` method of our `multiengine interface`_.
54
55 .. _multiengine interface: ./parallel_multiengine
56
57 What is the best way to use MPI from Python?
58 --------------------------------------------
59
60 What about all the other parallel computing packages in Python?
61 ---------------------------------------------------------------
62
63 Some of the unique characteristic of IPython are:
64
65 * IPython is the only architecture that abstracts out the notion of a
66 parallel computation in such a way that new models of parallel computing
67 can be explored quickly and easily. If you don't like the models we
68 provide, you can simply create your own using the capabilities we provide.
69 * IPython is asynchronous from the ground up (we use `Twisted`_).
70 * IPython's architecture is designed to avoid subtle problems
71 that emerge because of Python's global interpreter lock (GIL).
72 * While IPython'1 architecture is designed to support a wide range
73 of novel parallel computing models, it is fully interoperable with
74 traditional MPI applications.
75 * IPython has been used and tested extensively on modern supercomputers.
76 * IPython's networking layers are completely modular. Thus, is
77 straightforward to replace our existing network protocols with
78 high performance alternatives (ones based upon Myranet/Infiniband).
79 * IPython is designed from the ground up to support collaborative
80 parallel computing. This enables multiple users to actively develop
81 and run the *same* parallel computation.
82 * Interactivity is a central goal for us. While IPython does not have
83 to be used interactivly, is can be.
84
85 .. _Twisted: http://www.twistedmatrix.com
86
87 Why The IPython controller a bottleneck in my parallel calculation?
88 -------------------------------------------------------------------
89
90 A golden rule in parallel computing is that you should only move data around if you
91 absolutely need to. The main reason that the controller becomes a bottleneck is that
92 too much data is being pushed and pulled to and from the engines. If your algorithm
93 is structured in this way, you really should think about alternative ways of
94 handling the data movement. Here are some ideas:
95
96 1. Have the engines write data to files on the locals disks of the engines.
97 2. Have the engines write data to files on a file system that is shared by
98 the engines.
99 3. Have the engines write data to a database that is shared by the engines.
100 4. Simply keep data in the persistent memory of the engines and move the
101 computation to the data (rather than the data to the computation).
102 5. See if you can pass data directly between engines using MPI.
103
104 Isn't Python slow to be used for high-performance parallel computing?
105 ---------------------------------------------------------------------
106
107
108
109
110
111
112
113
114
115
@@ -0,0 +1,16 b''
1 IPython kernel documentation
2 ============================
3
4 User Documentation
5 ------------------
6
7 .. toctree::
8 :maxdepth: 2
9
10 install.txt
11 parallel_intro.txt
12 parallel_multiengine.txt
13 parallel_task.txt
14 parallel_mpi.txt
15 changes.txt
16 faq.txt
@@ -0,0 +1,169 b''
1 .. _install:
2
3 ===================
4 Installing IPython
5 ===================
6
7 .. contents::
8
9 Introduction
10 ============
11
12 IPython enables parallel applications to be developed in Python. This document
13 describes the steps required to install IPython. For an overview of IPython's
14 architecture as it relates to parallel computing, see our :ref:`introduction to
15 parallel computing with IPython <ip1par>`.
16
17 Please let us know if you have problems installing IPython or any of its
18 dependencies. We have tested IPython extensively with Python 2.4 and 2.5.
19
20 .. warning::
21
22 IPython will not work with Python 2.3 or below.
23
24 IPython has three required dependencies:
25
26 1. `IPython`__
27 2. `Zope Interface`__
28 3. `Twisted`__
29 4. `Foolscap`__
30
31 .. __: http://ipython.scipy.org
32 .. __: http://pypi.python.org/pypi/zope.interface
33 .. __: http://twistedmatrix.com
34 .. __: http://foolscap.lothar.com/trac
35
36 It also has the following optional dependencies:
37
38 1. pexpect (used for certain tests)
39 2. nose (used to run our test suite)
40 3. sqlalchemy (used for database support)
41 4. mpi4py (for MPI support)
42 5. Sphinx and pygments (for building documentation)
43 6. pyOpenSSL (for security)
44
45 Getting IPython
46 ================
47
48 IPython development has been moved to `Launchpad`_. The development branch of IPython can be checkout out using `Bazaar`_::
49
50 $ bzr branch lp:///~ipython/ipython/ipython1-dev
51
52 .. _Launchpad: http://www.launchpad.net/ipython
53 .. _Bazaar: http://bazaar-vcs.org/
54
55 Installation using setuptools
56 =============================
57
58 The easiest way of installing IPython and its dependencies is using
59 `setuptools`_. If you have setuptools installed you can simple use the ``easy_install``
60 script that comes with setuptools (this should be on your path if you have setuptools)::
61
62 $ easy_install ipython1
63
64 This will download and install the latest version of IPython as well as all of its dependencies. For this to work, you will need to be connected to the internet when you run this command. This will install everything info the ``site-packages`` directory of your Python distribution. If this is the system wide Python, you will likely need admin privileges. For information about installing Python packages to other locations (that don't require admin privileges) see the `setuptools`_ documentation.
65
66 .. _setuptools: http://peak.telecommunity.com/DevCenter/setuptools
67
68 If you don't want `setuptools`_ to automatically install the dependencies, you can also get the dependencies yourself, using ``easy_install``::
69
70 $ easy_install IPython
71 $ easy_install zope.interface
72 $ easy_install Twisted
73 $ easy_install foolscap
74
75 or by simply downloading and installing the dependencies manually.
76
77 If you want to have secure (highly recommended) network connections, you will also
78 need to get `pyOpenSSL`__, version 0.6, or just do:
79
80 $ easy_install ipython1[security]
81
82 .. hint:: If you want to do development on IPython and want to always
83 run off your development branch, you can run
84 :command:`python setupegg.py develop` in the IPython source tree.
85
86 .. __: http://pyopenssl.sourceforge.net/
87
88 Installation using plain distutils
89 ==================================
90
91 If you don't have `setuptools`_ installed or don't want to use it, you can also install IPython and its dependencies using ``distutils``. In this approach, you will need to get the most recent stable releases of IPython's dependencies and install each of them by doing::
92
93 $ python setup.py install
94
95 The dependencies need to be installed before installing IPython. After installing the dependencies, install IPython by running::
96
97 $ cd ipython1-dev
98 $ python setup.py install
99
100 .. note:: Here we are using setup.py rather than setupegg.py.
101
102 .. _install_config:
103
104 Configuration
105 =============
106
107 IPython has a configuration system. When running IPython for the first time,
108 reasonable defaults are used for the configuration. The configuration of IPython
109 can be changed in two ways:
110
111 * Configuration files
112 * Commands line options (which override the configuration files)
113
114 IPython has a separate configuration file for each subpackage. Thus, the main
115 configuration files are (in your ``~/.ipython`` directory):
116
117 * ``ipython1.core.ini``
118 * ``ipython1.kernel.ini``
119 * ``ipython1.notebook.ini``
120
121 To create these files for the first time, do the following::
122
123 from ipython1.kernel.config import config_manager as kernel_config
124 kernel_config.write_default_config_file()
125
126 But, you should only need to do this if you need to modify the defaults. If needed
127 repeat this process with the ``notebook`` and ``core`` configuration as well. If you
128 are running into problems with IPython, you might try deleting these configuration
129 files.
130
131 .. _install_testing:
132
133 Testing
134 =======
135
136 Once you have completed the installation of the IPython kernel you can run our test suite
137 with the command::
138
139 trial ipython1
140
141 Or if you have `nose`__ installed::
142
143 nosetests -v ipython1
144
145 The ``trial`` command is part of Twisted and allows asynchronous network based
146 applications to be tested using Python's unittest framework. Please let us know
147 if the tests do not pass. The best way to get in touch with us is on the `IPython
148 developer mailing list`_.
149
150 .. __: http://somethingaboutorange.com/mrl/projects/nose/
151 .. _IPython developer mailing list: http://projects.scipy.org/mailman/listinfo/ipython-dev
152
153 MPI Support
154 ===========
155
156 IPython includes optional support for the Message Passing Interface (`MPI`_),
157 which enables the IPython Engines to pass data between each other using `MPI`_. To use MPI with IPython, the minimal requirements are:
158
159 * An MPI implementation (we recommend `Open MPI`_)
160 * A way to call MPI (we recommend `mpi4py`_)
161
162 But, IPython should work with any MPI implementation and with any code
163 (Python/C/C++/Fortran) that uses MPI. Please contact us for more information about
164 this.
165
166 .. _MPI: http://www-unix.mcs.anl.gov/mpi/
167 .. _mpi4py: http://mpi4py.scipy.org/
168 .. _Open MPI: http://www.open-mpi.org/
169
@@ -0,0 +1,270 b''
1 .. _ip1par:
2
3 ======================================
4 Using IPython for parallel computing
5 ======================================
6
7 .. contents::
8
9 Introduction
10 ============
11
12 This file gives an overview of IPython. IPython has a sophisticated and
13 powerful architecture for parallel and distributed computing. This
14 architecture abstracts out parallelism in a very general way, which
15 enables IPython to support many different styles of parallelism
16 including:
17
18 * Single program, multiple data (SPMD) parallelism.
19 * Multiple program, multiple data (MPMD) parallelism.
20 * Message passing using ``MPI``.
21 * Task farming.
22 * Data parallel.
23 * Combinations of these approaches.
24 * Custom user defined approaches.
25
26 Most importantly, IPython enables all types of parallel applications to
27 be developed, executed, debugged and monitored *interactively*. Hence,
28 the ``I`` in IPython. The following are some example usage cases for IPython:
29
30 * Quickly parallelize algorithms that are embarrassingly parallel
31 using a number of simple approaches. Many simple things can be
32 parallelized interactively in one or two lines of code.
33 * Steer traditional MPI applications on a supercomputer from an
34 IPython session on your laptop.
35 * Analyze and visualize large datasets (that could be remote and/or
36 distributed) interactively using IPython and tools like
37 matplotlib/TVTK.
38 * Develop, test and debug new parallel algorithms
39 (that may use MPI) interactively.
40 * Tie together multiple MPI jobs running on different systems into
41 one giant distributed and parallel system.
42 * Start a parallel job on your cluster and then have a remote
43 collaborator connect to it and pull back data into their
44 local IPython session for plotting and analysis.
45 * Run a set of tasks on a set of CPUs using dynamic load balancing.
46
47 Architecture overview
48 =====================
49
50 The IPython architecture consists of three components:
51
52 * The IPython engine.
53 * The IPython controller.
54 * Various controller Clients.
55
56 IPython engine
57 ---------------
58
59 The IPython engine is a Python instance that takes Python commands over a
60 network connection. Eventually, the IPython engine will be a full IPython
61 interpreter, but for now, it is a regular Python interpreter. The engine
62 can also handle incoming and outgoing Python objects sent over a network
63 connection. When multiple engines are started, parallel and distributed
64 computing becomes possible. An important feature of an IPython engine is
65 that it blocks while user code is being executed. Read on for how the
66 IPython controller solves this problem to expose a clean asynchronous API
67 to the user.
68
69 IPython controller
70 ------------------
71
72 The IPython controller provides an interface for working with a set of
73 engines. At an general level, the controller is a process to which
74 IPython engines can connect. For each connected engine, the controller
75 manages a queue. All actions that can be performed on the engine go
76 through this queue. While the engines themselves block when user code is
77 run, the controller hides that from the user to provide a fully
78 asynchronous interface to a set of engines. Because the controller
79 listens on a network port for engines to connect to it, it must be
80 started before any engines are started.
81
82 The controller also provides a single point of contact for users who wish
83 to utilize the engines connected to the controller. There are different
84 ways of working with a controller. In IPython these ways correspond to different interfaces that the controller is adapted to. Currently we have two default interfaces to the controller:
85
86 * The MultiEngine interface.
87 * The Task interface.
88
89 Advanced users can easily add new custom interfaces to enable other
90 styles of parallelism.
91
92 .. note::
93
94 A single controller and set of engines can be accessed
95 through multiple interfaces simultaneously. This opens the
96 door for lots of interesting things.
97
98 Controller clients
99 ------------------
100
101 For each controller interface, there is a corresponding client. These
102 clients allow users to interact with a set of engines through the
103 interface.
104
105 Security
106 --------
107
108 By default (as long as `pyOpenSSL` is installed) all network connections between the controller and engines and the controller and clients are secure. What does this mean? First of all, all of the connections will be encrypted using SSL. Second, the connections are authenticated. We handle authentication in a `capabilities`__ based security model. In this model, a "capability (known in some systems as a key) is a communicable, unforgeable token of authority". Put simply, a capability is like a key to your house. If you have the key to your house, you can get in, if not you can't.
109
110 .. __: http://en.wikipedia.org/wiki/Capability-based_security
111
112 In our architecture, the controller is the only process that listens on network ports, and is thus responsible to creating these keys. In IPython, these keys are known as Foolscap URLs, or FURLs, because of the underlying network protocol we are using. As a user, you don't need to know anything about the details of these FURLs, other than that when the controller starts, it saves a set of FURLs to files named something.furl. The default location of these files is your ~./ipython directory.
113
114 To connect and authenticate to the controller an engine or client simply needs to present an appropriate furl (that was originally created by the controller) to the controller. Thus, the .furl files need to be copied to a location where the clients and engines can find them. Typically, this is the ~./ipython directory on the host where the client/engine is running (which could be a different host than the controller). Once the .furl files are copied over, everything should work fine.
115
116 Getting Started
117 ===============
118
119 To use IPython for parallel computing, you need to start one instance of
120 the controller and one or more instances of the engine. The controller
121 and each engine can run on different machines or on the same machine.
122 Because of this, there are many different possibilities for setting up
123 the IP addresses and ports used by the various processes.
124
125 Starting the controller and engine on your local machine
126 --------------------------------------------------------
127
128 This is the simplest configuration that can be used and is useful for
129 testing the system and on machines that have multiple cores and/or
130 multple CPUs. The easiest way of doing this is using the ``ipcluster``
131 command::
132
133 $ ipcluster -n 4
134
135 This will start an IPython controller and then 4 engines that connect to
136 the controller. Lastly, the script will print out the Python commands
137 that you can use to connect to the controller. It is that easy.
138
139 Underneath the hood, the ``ipcluster`` script uses two other top-level
140 scripts that you can also use yourself. These scripts are
141 ``ipcontroller``, which starts the controller and ``ipengine`` which
142 starts one engine. To use these scripts to start things on your local
143 machine, do the following.
144
145 First start the controller::
146
147 $ ipcontroller &
148
149 Next, start however many instances of the engine you want using (repeatedly) the command::
150
151 $ ipengine &
152
153 .. warning::
154
155 The order of the above operations is very important. You *must*
156 start the controller before the engines, since the engines connect
157 to the controller as they get started.
158
159 On some platforms you may need to give these commands in the form
160 ``(ipcontroller &)`` and ``(ipengine &)`` for them to work properly. The
161 engines should start and automatically connect to the controller on the
162 default ports, which are chosen for this type of setup. You are now ready
163 to use the controller and engines from IPython.
164
165 Starting the controller and engines on different machines
166 ---------------------------------------------------------
167
168 This section needs to be updated to reflect the new Foolscap capabilities based
169 model.
170
171 Specifying custom ports
172 -----------------------
173
174 This section needs to be updated to reflect the new Foolscap capabilities based
175 model.
176
177 Using ``ipcluster`` with ``ssh``
178 --------------------------------
179
180 The ``ipcluster`` command can also start a controller and engines using
181 ``ssh``. We need more documentation on this, but for now here is any
182 example startup script::
183
184 controller = dict(host='myhost',
185 engine_port=None, # default is 10105
186 control_port=None,
187 )
188
189 # keys are hostnames, values are the number of engine on that host
190 engines = dict(node1=2,
191 node2=2,
192 node3=2,
193 node3=2,
194 )
195
196 Starting engines using ``mpirun``
197 ---------------------------------
198
199 The IPython engines can be started using ``mpirun``/``mpiexec``, even if
200 the engines don't call MPI_Init() or use the MPI API in any way. This is
201 supported on modern MPI implementations like `Open MPI`_.. This provides
202 an really nice way of starting a bunch of engine. On a system with MPI
203 installed you can do::
204
205 mpirun -n 4 ipengine --controller-port=10000 --controller-ip=host0
206
207 .. _Open MPI: http://www.open-mpi.org/
208
209 More details on using MPI with IPython can be found :ref:`here <parallelmpi>`.
210
211 Log files
212 ---------
213
214 All of the components of IPython have log files associated with them.
215 These log files can be extremely useful in debugging problems with
216 IPython and can be found in the directory ``~/.ipython/log``. Sending
217 the log files to us will often help us to debug any problems.
218
219 Security and firewalls
220 ----------------------
221
222 The only process in IPython's architecture that listens on a network
223 port is the controller. Thus the controller is the main security concern.
224 Through the controller, an attacker can execute arbitrary code on the
225 engines. Thus, we highly recommend taking the following precautions:
226
227 * Don't run the controller on a machine that is exposed to the
228 internet.
229 * Don't run the controller on a machine that could have hostile
230 users on it.
231 * If you need to connect to a controller that is behind a firewall,
232 tunnel everything through ssh.
233
234 Currently, IPython does not have any built-in security. Thus, it
235 is up to you to be aware of the security risks associated with using IPython and to take steps to mitigate those risks.
236
237 However, we do have plans to add security measures to IPython itself.
238 This will probably take the form of using SSL for encryption and some
239 authentication scheme.
240
241 Next Steps
242 ==========
243
244 Once you have started the IPython controller and one or more engines, you
245 are ready to use the engines to do somnething useful. To make sure
246 everything is working correctly, try the following commands::
247
248 In [1]: from ipython1.kernel import client
249
250 In [2]: mec = client.MultiEngineClient() # This looks for .furl files in ~./ipython
251
252 In [4]: mec.get_ids()
253 Out[4]: [0, 1, 2, 3]
254
255 In [5]: mec.execute('print "Hello World"')
256 Out[5]:
257 <Results List>
258 [0] In [1]: print "Hello World"
259 [0] Out[1]: Hello World
260
261 [1] In [1]: print "Hello World"
262 [1] Out[1]: Hello World
263
264 [2] In [1]: print "Hello World"
265 [2] Out[1]: Hello World
266
267 [3] In [1]: print "Hello World"
268 [3] Out[1]: Hello World
269
270 If this works, you are ready to learn more about the :ref:`MultiEngine <parallelmultiengine>` and :ref:`Task <paralleltask>` interfaces to the controller.
@@ -0,0 +1,22 b''
1 .. _parallelmpi:
2
3 =======================
4 Using MPI with IPython
5 =======================
6
7 The simplest way of getting started with MPI is to install an MPI implementation
8 (we recommend `Open MPI`_) and `mpi4py`_ and then start the engines using the
9 ``mpirun`` command::
10
11 mpirun -n 4 ipengine --mpi=mpi4py
12
13 This will automatically import `mpi4py`_ and make sure that `MPI_Init` is called
14 at the right time. We also have built in support for `PyTrilinos`_, which can be
15 used (assuming `PyTrilinos`_ is installed) by starting the engines with::
16
17 mpirun -n 4 ipengine --mpi=pytrilinos
18
19 .. _MPI: http://www-unix.mcs.anl.gov/mpi/
20 .. _mpi4py: http://mpi4py.scipy.org/
21 .. _Open MPI: http://www.open-mpi.org/
22 .. _PyTrilinos: http://trilinos.sandia.gov/packages/pytrilinos/ No newline at end of file
This diff has been collapsed as it changes many lines, (728 lines changed) Show them Hide them
@@ -0,0 +1,728 b''
1 .. _parallelmultiengine:
2
3 =================================
4 IPython's MultiEngine interface
5 =================================
6
7 .. contents::
8
9 The MultiEngine interface represents one possible way of working with a
10 set of IPython engines. The basic idea behind the MultiEngine interface is
11 that the capabilities of each engine are explicitly exposed to the user.
12 Thus, in the MultiEngine interface, each engine is given an id that is
13 used to identify the engine and give it work to do. This interface is very
14 intuitive and is designed with interactive usage in mind, and is thus the
15 best place for new users of IPython to begin.
16
17 Starting the IPython controller and engines
18 ===========================================
19
20 To follow along with this tutorial, you will need to start the IPython
21 controller and four IPython engines. The simplest way of doing this is to
22 use the ``ipcluster`` command::
23
24 $ ipcluster -n 4
25
26 For more detailed information about starting the controller and engines, see our :ref:`introduction <ip1par>` to using IPython for parallel computing.
27
28 Creating a ``MultiEngineClient`` instance
29 =========================================
30
31 The first step is to import the IPython ``client`` module and then create a ``MultiEngineClient`` instance::
32
33 In [1]: from ipython1.kernel import client
34
35 In [2]: mec = client.MultiEngineClient()
36
37 To make sure there are engines connected to the controller, use can get a list of engine ids::
38
39 In [3]: mec.get_ids()
40 Out[3]: [0, 1, 2, 3]
41
42 Here we see that there are four engines ready to do work for us.
43
44 Running Python commands
45 =======================
46
47 The most basic type of operation that can be performed on the engines is to execute Python code. Executing Python code can be done in blocking or non-blocking mode (blocking is default) using the ``execute`` method.
48
49 Blocking execution
50 ------------------
51
52 In blocking mode, the ``MultiEngineClient`` object (called ``mec`` in
53 these examples) submits the command to the controller, which places the
54 command in the engines' queues for execution. The ``execute`` call then
55 blocks until the engines are done executing the command::
56
57 # The default is to run on all engines
58 In [4]: mec.execute('a=5')
59 Out[4]:
60 <Results List>
61 [0] In [1]: a=5
62 [1] In [1]: a=5
63 [2] In [1]: a=5
64 [3] In [1]: a=5
65
66 In [5]: mec.execute('b=10')
67 Out[5]:
68 <Results List>
69 [0] In [2]: b=10
70 [1] In [2]: b=10
71 [2] In [2]: b=10
72 [3] In [2]: b=10
73
74 Python commands can be executed on specific engines by calling execute using the ``targets`` keyword argument::
75
76 In [6]: mec.execute('c=a+b',targets=[0,2])
77 Out[6]:
78 <Results List>
79 [0] In [3]: c=a+b
80 [2] In [3]: c=a+b
81
82
83 In [7]: mec.execute('c=a-b',targets=[1,3])
84 Out[7]:
85 <Results List>
86 [1] In [3]: c=a-b
87 [3] In [3]: c=a-b
88
89
90 In [8]: mec.execute('print c')
91 Out[8]:
92 <Results List>
93 [0] In [4]: print c
94 [0] Out[4]: 15
95
96 [1] In [4]: print c
97 [1] Out[4]: -5
98
99 [2] In [4]: print c
100 [2] Out[4]: 15
101
102 [3] In [4]: print c
103 [3] Out[4]: -5
104
105 This example also shows one of the most important things about the IPython engines: they have a persistent user namespaces. The ``execute`` method returns a Python ``dict`` that contains useful information::
106
107 In [9]: result_dict = mec.execute('d=10; print d')
108
109 In [10]: for r in result_dict:
110 ....: print r
111 ....:
112 ....:
113 {'input': {'translated': 'd=10; print d', 'raw': 'd=10; print d'}, 'number': 5, 'id': 0, 'stdout': '10\n'}
114 {'input': {'translated': 'd=10; print d', 'raw': 'd=10; print d'}, 'number': 5, 'id': 1, 'stdout': '10\n'}
115 {'input': {'translated': 'd=10; print d', 'raw': 'd=10; print d'}, 'number': 5, 'id': 2, 'stdout': '10\n'}
116 {'input': {'translated': 'd=10; print d', 'raw': 'd=10; print d'}, 'number': 5, 'id': 3, 'stdout': '10\n'}
117
118 Non-blocking execution
119 ----------------------
120
121 In non-blocking mode, ``execute`` submits the command to be executed and then returns a
122 ``PendingResult`` object immediately. The ``PendingResult`` object gives you a way of getting a
123 result at a later time through its ``get_result`` method or ``r`` attribute. This allows you to
124 quickly submit long running commands without blocking your local Python/IPython session::
125
126 # In blocking mode
127 In [6]: mec.execute('import time')
128 Out[6]:
129 <Results List>
130 [0] In [1]: import time
131 [1] In [1]: import time
132 [2] In [1]: import time
133 [3] In [1]: import time
134
135 # In non-blocking mode
136 In [7]: pr = mec.execute('time.sleep(10)',block=False)
137
138 # Now block for the result
139 In [8]: pr.get_result()
140 Out[8]:
141 <Results List>
142 [0] In [2]: time.sleep(10)
143 [1] In [2]: time.sleep(10)
144 [2] In [2]: time.sleep(10)
145 [3] In [2]: time.sleep(10)
146
147 # Again in non-blocking mode
148 In [9]: pr = mec.execute('time.sleep(10)',block=False)
149
150 # Poll to see if the result is ready
151 In [10]: pr.get_result(block=False)
152
153 # A shorthand for get_result(block=True)
154 In [11]: pr.r
155 Out[11]:
156 <Results List>
157 [0] In [3]: time.sleep(10)
158 [1] In [3]: time.sleep(10)
159 [2] In [3]: time.sleep(10)
160 [3] In [3]: time.sleep(10)
161
162 Often, it is desirable to wait until a set of ``PendingResult`` objects are done. For this, there is a the method ``barrier``. This method takes a tuple of ``PendingResult`` objects and blocks until all of the associated results are ready::
163
164 In [72]: mec.block=False
165
166 # A trivial list of PendingResults objects
167 In [73]: pr_list = [mec.execute('time.sleep(3)') for i in range(10)]
168
169 # Wait until all of them are done
170 In [74]: mec.barrier(pr_list)
171
172 # Then, their results are ready using get_result or the r attribute
173 In [75]: pr_list[0].r
174 Out[75]:
175 <Results List>
176 [0] In [20]: time.sleep(3)
177 [1] In [19]: time.sleep(3)
178 [2] In [20]: time.sleep(3)
179 [3] In [19]: time.sleep(3)
180
181
182 The ``block`` and ``targets`` keyword arguments and attributes
183 --------------------------------------------------------------
184
185 Most commands in the multiengine interface (like ``execute``) accept ``block`` and ``targets``
186 as keyword arguments. As we have seen above, these keyword arguments control the blocking mode
187 and which engines the command is applied to. The ``MultiEngineClient`` class also has ``block``
188 and ``targets`` attributes that control the default behavior when the keyword arguments are not
189 provided. Thus the following logic is used for ``block`` and ``targets``:
190
191 * If no keyword argument is provided, the instance attributes are used.
192 * Keyword argument, if provided override the instance attributes.
193
194 The following examples demonstrate how to use the instance attributes::
195
196 In [16]: mec.targets = [0,2]
197
198 In [17]: mec.block = False
199
200 In [18]: pr = mec.execute('a=5')
201
202 In [19]: pr.r
203 Out[19]:
204 <Results List>
205 [0] In [6]: a=5
206 [2] In [6]: a=5
207
208 # Note targets='all' means all engines
209 In [20]: mec.targets = 'all'
210
211 In [21]: mec.block = True
212
213 In [22]: mec.execute('b=10; print b')
214 Out[22]:
215 <Results List>
216 [0] In [7]: b=10; print b
217 [0] Out[7]: 10
218
219 [1] In [6]: b=10; print b
220 [1] Out[6]: 10
221
222 [2] In [7]: b=10; print b
223 [2] Out[7]: 10
224
225 [3] In [6]: b=10; print b
226 [3] Out[6]: 10
227
228 The ``block`` and ``targets`` instance attributes also determine the behavior of the parallel
229 magic commands...
230
231
232 Parallel magic commands
233 -----------------------
234
235 We provide a few IPython magic commands (``%px``, ``%autopx`` and ``%result``) that make it more pleasant to execute Python commands on the engines interactively. These are simply shortcuts to ``execute`` and ``get_result``. The ``%px`` magic executes a single Python command on the engines specified by the `magicTargets``targets` attribute of the ``MultiEngineClient`` instance (by default this is 'all')::
236
237 # Make this MultiEngineClient active for parallel magic commands
238 In [23]: mec.activate()
239
240 In [24]: mec.block=True
241
242 In [25]: import numpy
243
244 In [26]: %px import numpy
245 Executing command on Controller
246 Out[26]:
247 <Results List>
248 [0] In [8]: import numpy
249 [1] In [7]: import numpy
250 [2] In [8]: import numpy
251 [3] In [7]: import numpy
252
253
254 In [27]: %px a = numpy.random.rand(2,2)
255 Executing command on Controller
256 Out[27]:
257 <Results List>
258 [0] In [9]: a = numpy.random.rand(2,2)
259 [1] In [8]: a = numpy.random.rand(2,2)
260 [2] In [9]: a = numpy.random.rand(2,2)
261 [3] In [8]: a = numpy.random.rand(2,2)
262
263
264 In [28]: %px print numpy.linalg.eigvals(a)
265 Executing command on Controller
266 Out[28]:
267 <Results List>
268 [0] In [10]: print numpy.linalg.eigvals(a)
269 [0] Out[10]: [ 1.28167017 0.14197338]
270
271 [1] In [9]: print numpy.linalg.eigvals(a)
272 [1] Out[9]: [-0.14093616 1.27877273]
273
274 [2] In [10]: print numpy.linalg.eigvals(a)
275 [2] Out[10]: [-0.37023573 1.06779409]
276
277 [3] In [9]: print numpy.linalg.eigvals(a)
278 [3] Out[9]: [ 0.83664764 -0.25602658]
279
280 The ``%result`` magic gets and prints the stdin/stdout/stderr of the last command executed on each engine. It is simply a shortcut to the ``get_result`` method::
281
282 In [29]: %result
283 Out[29]:
284 <Results List>
285 [0] In [10]: print numpy.linalg.eigvals(a)
286 [0] Out[10]: [ 1.28167017 0.14197338]
287
288 [1] In [9]: print numpy.linalg.eigvals(a)
289 [1] Out[9]: [-0.14093616 1.27877273]
290
291 [2] In [10]: print numpy.linalg.eigvals(a)
292 [2] Out[10]: [-0.37023573 1.06779409]
293
294 [3] In [9]: print numpy.linalg.eigvals(a)
295 [3] Out[9]: [ 0.83664764 -0.25602658]
296
297 The ``%autopx`` magic switches to a mode where everything you type is executed on the engines given by the ``targets`` attribute::
298
299 In [30]: mec.block=False
300
301 In [31]: %autopx
302 Auto Parallel Enabled
303 Type %autopx to disable
304
305 In [32]: max_evals = []
306 <ipython1.kernel.multiengineclient.PendingResult object at 0x17b8a70>
307
308 In [33]: for i in range(100):
309 ....: a = numpy.random.rand(10,10)
310 ....: a = a+a.transpose()
311 ....: evals = numpy.linalg.eigvals(a)
312 ....: max_evals.append(evals[0].real)
313 ....:
314 ....:
315 <ipython1.kernel.multiengineclient.PendingResult object at 0x17af8f0>
316
317 In [34]: %autopx
318 Auto Parallel Disabled
319
320 In [35]: mec.block=True
321
322 In [36]: px print "Average max eigenvalue is: ", sum(max_evals)/len(max_evals)
323 Executing command on Controller
324 Out[36]:
325 <Results List>
326 [0] In [13]: print "Average max eigenvalue is: ", sum(max_evals)/len(max_evals)
327 [0] Out[13]: Average max eigenvalue is: 10.1387247332
328
329 [1] In [12]: print "Average max eigenvalue is: ", sum(max_evals)/len(max_evals)
330 [1] Out[12]: Average max eigenvalue is: 10.2076902286
331
332 [2] In [13]: print "Average max eigenvalue is: ", sum(max_evals)/len(max_evals)
333 [2] Out[13]: Average max eigenvalue is: 10.1891484655
334
335 [3] In [12]: print "Average max eigenvalue is: ", sum(max_evals)/len(max_evals)
336 [3] Out[12]: Average max eigenvalue is: 10.1158837784
337
338 Using the ``with`` statement of Python 2.5
339 ------------------------------------------
340
341 Python 2.5 introduced the ``with`` statement. The ``MultiEngineClient`` can be used with the ``with`` statement to execute a block of code on the engines indicated by the ``targets`` attribute::
342
343 In [3]: with mec:
344 ...: client.remote() # Required so the following code is not run locally
345 ...: a = 10
346 ...: b = 30
347 ...: c = a+b
348 ...:
349 ...:
350
351 In [4]: mec.get_result()
352 Out[4]:
353 <Results List>
354 [0] In [1]: a = 10
355 b = 30
356 c = a+b
357
358 [1] In [1]: a = 10
359 b = 30
360 c = a+b
361
362 [2] In [1]: a = 10
363 b = 30
364 c = a+b
365
366 [3] In [1]: a = 10
367 b = 30
368 c = a+b
369
370 This is basically another way of calling execute, but one with allows you to avoid writing code in strings. When used in this way, the attributes ``targets`` and ``block`` are used to control how the code is executed. For now, if you run code in non-blocking mode you won't have access to the ``PendingResult``.
371
372 Moving Python object around
373 ===========================
374
375 In addition to executing code on engines, you can transfer Python objects to and from your
376 IPython session and the engines. In IPython, these operations are called ``push`` (sending an
377 object to the engines) and ``pull`` (getting an object from the engines).
378
379 Basic push and pull
380 -------------------
381
382 Here are some examples of how you use ``push`` and ``pull``::
383
384 In [38]: mec.push(dict(a=1.03234,b=3453))
385 Out[38]: [None, None, None, None]
386
387 In [39]: mec.pull('a')
388 Out[39]: [1.03234, 1.03234, 1.03234, 1.03234]
389
390 In [40]: mec.pull('b',targets=0)
391 Out[40]: [3453]
392
393 In [41]: mec.pull(('a','b'))
394 Out[41]: [[1.03234, 3453], [1.03234, 3453], [1.03234, 3453], [1.03234, 3453]]
395
396 In [42]: mec.zip_pull(('a','b'))
397 Out[42]: [(1.03234, 1.03234, 1.03234, 1.03234), (3453, 3453, 3453, 3453)]
398
399 In [43]: mec.push(dict(c='speed'))
400 Out[43]: [None, None, None, None]
401
402 In [44]: %px print c
403 Executing command on Controller
404 Out[44]:
405 <Results List>
406 [0] In [14]: print c
407 [0] Out[14]: speed
408
409 [1] In [13]: print c
410 [1] Out[13]: speed
411
412 [2] In [14]: print c
413 [2] Out[14]: speed
414
415 [3] In [13]: print c
416 [3] Out[13]: speed
417
418 In non-blocking mode ``push`` and ``pull`` also return ``PendingResult`` objects::
419
420 In [47]: mec.block=False
421
422 In [48]: pr = mec.pull('a')
423
424 In [49]: pr.r
425 Out[49]: [1.03234, 1.03234, 1.03234, 1.03234]
426
427
428 Push and pull for functions
429 ---------------------------
430
431 Functions can also be pushed and pulled using ``push_function`` and ``pull_function``::
432
433 In [53]: def f(x):
434 ....: return 2.0*x**4
435 ....:
436
437 In [54]: mec.push_function(dict(f=f))
438 Out[54]: [None, None, None, None]
439
440 In [55]: mec.execute('y = f(4.0)')
441 Out[55]:
442 <Results List>
443 [0] In [15]: y = f(4.0)
444 [1] In [14]: y = f(4.0)
445 [2] In [15]: y = f(4.0)
446 [3] In [14]: y = f(4.0)
447
448
449 In [56]: px print y
450 Executing command on Controller
451 Out[56]:
452 <Results List>
453 [0] In [16]: print y
454 [0] Out[16]: 512.0
455
456 [1] In [15]: print y
457 [1] Out[15]: 512.0
458
459 [2] In [16]: print y
460 [2] Out[16]: 512.0
461
462 [3] In [15]: print y
463 [3] Out[15]: 512.0
464
465
466 Dictionary interface
467 --------------------
468
469 As a shorthand to ``push`` and ``pull``, the ``MultiEngineClient`` class implements some of the Python dictionary interface. This make the remote namespaces of the engines appear as a local dictionary. Underneath, this uses ``push`` and ``pull``::
470
471 In [50]: mec.block=True
472
473 In [51]: mec['a']=['foo','bar']
474
475 In [52]: mec['a']
476 Out[52]: [['foo', 'bar'], ['foo', 'bar'], ['foo', 'bar'], ['foo', 'bar']]
477
478 Scatter and gather
479 ------------------
480
481 Sometimes it is useful to partition a sequence and push the partitions to different engines. In
482 MPI language, this is know as scatter/gather and we follow that terminology. However, it is
483 important to remember that in IPython ``scatter`` is from the interactive IPython session to
484 the engines and ``gather`` is from the engines back to the interactive IPython session. For
485 scatter/gather operations between engines, MPI should be used::
486
487 In [58]: mec.scatter('a',range(16))
488 Out[58]: [None, None, None, None]
489
490 In [59]: px print a
491 Executing command on Controller
492 Out[59]:
493 <Results List>
494 [0] In [17]: print a
495 [0] Out[17]: [0, 1, 2, 3]
496
497 [1] In [16]: print a
498 [1] Out[16]: [4, 5, 6, 7]
499
500 [2] In [17]: print a
501 [2] Out[17]: [8, 9, 10, 11]
502
503 [3] In [16]: print a
504 [3] Out[16]: [12, 13, 14, 15]
505
506
507 In [60]: mec.gather('a')
508 Out[60]: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
509
510 Other things to look at
511 =======================
512
513 Parallel map
514 ------------
515
516 Python's builtin ``map`` functions allows a function to be applied to a sequence element-by-element. This type of code is typically trivial to parallelize. In fact, the MultiEngine interface in IPython already has a parallel version of ``map`` that works just like its serial counterpart::
517
518 In [63]: serial_result = map(lambda x:x**10, range(32))
519
520 In [64]: parallel_result = mec.map(lambda x:x**10, range(32))
521
522 In [65]: serial_result==parallel_result
523 Out[65]: True
524
525 As you would expect, the parallel version of ``map`` is also influenced by the ``block`` and ``targets`` keyword arguments and attributes.
526
527 How to do parallel list comprehensions
528 --------------------------------------
529
530 In many cases list comprehensions are nicer than using the map function. While we don't have fully parallel list comprehensions, it is simple to get the basic effect using ``scatter`` and ``gather``::
531
532 In [66]: mec.scatter('x',range(64))
533 Out[66]: [None, None, None, None]
534
535 In [67]: px y = [i**10 for i in x]
536 Executing command on Controller
537 Out[67]:
538 <Results List>
539 [0] In [19]: y = [i**10 for i in x]
540 [1] In [18]: y = [i**10 for i in x]
541 [2] In [19]: y = [i**10 for i in x]
542 [3] In [18]: y = [i**10 for i in x]
543
544
545 In [68]: y = mec.gather('y')
546
547 In [69]: print y
548 [0, 1, 1024, 59049, 1048576, 9765625, 60466176, 282475249, 1073741824,...]
549
550 Parallel Exceptions
551 -------------------
552
553 In the MultiEngine interface, parallel commands can raise Python exceptions, just like serial commands. But, it is a little subtle, because a single parallel command can actually raise multiple exceptions (one for each engine the command was run on). To express this idea, the MultiEngine interface has a ``CompositeError`` exception class that will be raised in most cases. The ``CompositeError`` class is a special type of exception that wraps one or more other types of exceptions. Here is how it works::
554
555 In [76]: mec.block=True
556
557 In [77]: mec.execute('1/0')
558 ---------------------------------------------------------------------------
559 CompositeError Traceback (most recent call last)
560
561 /ipython1-client-r3021/docs/examples/<ipython console> in <module>()
562
563 /ipython1-client-r3021/ipython1/kernel/multiengineclient.pyc in execute(self, lines, targets, block)
564 432 targets, block = self._findTargetsAndBlock(targets, block)
565 433 result = blockingCallFromThread(self.smultiengine.execute, lines,
566 --> 434 targets=targets, block=block)
567 435 if block:
568 436 result = ResultList(result)
569
570 /ipython1-client-r3021/ipython1/kernel/twistedutil.pyc in blockingCallFromThread(f, *a, **kw)
571 72 result.raiseException()
572 73 except Exception, e:
573 ---> 74 raise e
574 75 return result
575 76
576
577 CompositeError: one or more exceptions from call to method: execute
578 [0:execute]: ZeroDivisionError: integer division or modulo by zero
579 [1:execute]: ZeroDivisionError: integer division or modulo by zero
580 [2:execute]: ZeroDivisionError: integer division or modulo by zero
581 [3:execute]: ZeroDivisionError: integer division or modulo by zero
582
583 Notice how the error message printed when ``CompositeError`` is raised has information about the individual exceptions that were raised on each engine. If you want, you can even raise one of these original exceptions::
584
585 In [80]: try:
586 ....: mec.execute('1/0')
587 ....: except client.CompositeError, e:
588 ....: e.raise_exception()
589 ....:
590 ....:
591 ---------------------------------------------------------------------------
592 ZeroDivisionError Traceback (most recent call last)
593
594 /ipython1-client-r3021/docs/examples/<ipython console> in <module>()
595
596 /ipython1-client-r3021/ipython1/kernel/error.pyc in raise_exception(self, excid)
597 156 raise IndexError("an exception with index %i does not exist"%excid)
598 157 else:
599 --> 158 raise et, ev, etb
600 159
601 160 def collect_exceptions(rlist, method):
602
603 ZeroDivisionError: integer division or modulo by zero
604
605 If you are working in IPython, you can simple type ``%debug`` after one of these ``CompositeError`` is raised, and inspect the exception instance::
606
607 In [81]: mec.execute('1/0')
608 ---------------------------------------------------------------------------
609 CompositeError Traceback (most recent call last)
610
611 /ipython1-client-r3021/docs/examples/<ipython console> in <module>()
612
613 /ipython1-client-r3021/ipython1/kernel/multiengineclient.pyc in execute(self, lines, targets, block)
614 432 targets, block = self._findTargetsAndBlock(targets, block)
615 433 result = blockingCallFromThread(self.smultiengine.execute, lines,
616 --> 434 targets=targets, block=block)
617 435 if block:
618 436 result = ResultList(result)
619
620 /ipython1-client-r3021/ipython1/kernel/twistedutil.pyc in blockingCallFromThread(f, *a, **kw)
621 72 result.raiseException()
622 73 except Exception, e:
623 ---> 74 raise e
624 75 return result
625 76
626
627 CompositeError: one or more exceptions from call to method: execute
628 [0:execute]: ZeroDivisionError: integer division or modulo by zero
629 [1:execute]: ZeroDivisionError: integer division or modulo by zero
630 [2:execute]: ZeroDivisionError: integer division or modulo by zero
631 [3:execute]: ZeroDivisionError: integer division or modulo by zero
632
633 In [82]: %debug
634 >
635
636 /ipython1-client-r3021/ipython1/kernel/twistedutil.py(74)blockingCallFromThread()
637 73 except Exception, e:
638 ---> 74 raise e
639 75 return result
640
641 # With the debugger running, e is the exceptions instance. We can tab complete
642 # on it and see the extra methods that are available.
643 ipdb> e.
644 e.__class__ e.__getitem__ e.__new__ e.__setstate__ e.args
645 e.__delattr__ e.__getslice__ e.__reduce__ e.__str__ e.elist
646 e.__dict__ e.__hash__ e.__reduce_ex__ e.__weakref__ e.message
647 e.__doc__ e.__init__ e.__repr__ e._get_engine_str e.print_tracebacks
648 e.__getattribute__ e.__module__ e.__setattr__ e._get_traceback e.raise_exception
649 ipdb> e.print_tracebacks()
650 [0:execute]:
651 ---------------------------------------------------------------------------
652 ZeroDivisionError Traceback (most recent call last)
653
654 /ipython1-client-r3021/docs/examples/<string> in <module>()
655
656 ZeroDivisionError: integer division or modulo by zero
657
658 [1:execute]:
659 ---------------------------------------------------------------------------
660 ZeroDivisionError Traceback (most recent call last)
661
662 /ipython1-client-r3021/docs/examples/<string> in <module>()
663
664 ZeroDivisionError: integer division or modulo by zero
665
666 [2:execute]:
667 ---------------------------------------------------------------------------
668 ZeroDivisionError Traceback (most recent call last)
669
670 /ipython1-client-r3021/docs/examples/<string> in <module>()
671
672 ZeroDivisionError: integer division or modulo by zero
673
674 [3:execute]:
675 ---------------------------------------------------------------------------
676 ZeroDivisionError Traceback (most recent call last)
677
678 /ipython1-client-r3021/docs/examples/<string> in <module>()
679
680 ZeroDivisionError: integer division or modulo by zero
681
682 All of this same error handling magic even works in non-blocking mode::
683
684 In [83]: mec.block=False
685
686 In [84]: pr = mec.execute('1/0')
687
688 In [85]: pr.r
689 ---------------------------------------------------------------------------
690 CompositeError Traceback (most recent call last)
691
692 /ipython1-client-r3021/docs/examples/<ipython console> in <module>()
693
694 /ipython1-client-r3021/ipython1/kernel/multiengineclient.pyc in _get_r(self)
695 170
696 171 def _get_r(self):
697 --> 172 return self.get_result(block=True)
698 173
699 174 r = property(_get_r)
700
701 /ipython1-client-r3021/ipython1/kernel/multiengineclient.pyc in get_result(self, default, block)
702 131 return self.result
703 132 try:
704 --> 133 result = self.client.get_pending_deferred(self.result_id, block)
705 134 except error.ResultNotCompleted:
706 135 return default
707
708 /ipython1-client-r3021/ipython1/kernel/multiengineclient.pyc in get_pending_deferred(self, deferredID, block)
709 385
710 386 def get_pending_deferred(self, deferredID, block):
711 --> 387 return blockingCallFromThread(self.smultiengine.get_pending_deferred, deferredID, block)
712 388
713 389 def barrier(self, pendingResults):
714
715 /ipython1-client-r3021/ipython1/kernel/twistedutil.pyc in blockingCallFromThread(f, *a, **kw)
716 72 result.raiseException()
717 73 except Exception, e:
718 ---> 74 raise e
719 75 return result
720 76
721
722 CompositeError: one or more exceptions from call to method: execute
723 [0:execute]: ZeroDivisionError: integer division or modulo by zero
724 [1:execute]: ZeroDivisionError: integer division or modulo by zero
725 [2:execute]: ZeroDivisionError: integer division or modulo by zero
726 [3:execute]: ZeroDivisionError: integer division or modulo by zero
727
728
@@ -0,0 +1,240 b''
1 .. _paralleltask:
2
3 =================================
4 The IPython Task interface
5 =================================
6
7 .. contents::
8
9 The ``Task`` interface to the controller presents the engines as a fault tolerant, dynamic load-balanced system or workers. Unlike the ``MultiEngine`` interface, in the ``Task`` interface, the user have no direct access to individual engines. In some ways, this interface is simpler, but in other ways it is more powerful. Best of all the user can use both of these interfaces at the same time to take advantage or both of their strengths. When the user can break up the user's work into segments that do not depend on previous execution, the ``Task`` interface is ideal. But it also has more power and flexibility, allowing the user to guide the distribution of jobs, without having to assign Tasks to engines explicitly.
10
11 Starting the IPython controller and engines
12 ===========================================
13
14 To follow along with this tutorial, the user will need to start the IPython
15 controller and four IPython engines. The simplest way of doing this is to
16 use the ``ipcluster`` command::
17
18 $ ipcluster -n 4
19
20 For more detailed information about starting the controller and engines, see our :ref:`introduction <ip1par>` to using IPython for parallel computing.
21
22 The magic here is that this single controller and set of engines is running both the MultiEngine and ``Task`` interfaces simultaneously.
23
24 QuickStart Task Farming
25 =======================
26
27 First, a quick example of how to start running the most basic Tasks.
28 The first step is to import the IPython ``client`` module and then create a ``TaskClient`` instance::
29
30 In [1]: from ipython1.kernel import client
31
32 In [2]: tc = client.TaskClient()
33
34 Then the user wrap the commands the user want to run in Tasks::
35
36 In [3]: tasklist = []
37 In [4]: for n in range(1000):
38 ... tasklist.append(client.Task("a = %i"%n, pull="a"))
39
40 The first argument of the ``Task`` constructor is a string, the command to be executed. The most important optional keyword argument is ``pull``, which can be a string or list of strings, and it specifies the variable names to be saved as results of the ``Task``.
41
42 Next, the user need to submit the Tasks to the ``TaskController`` with the ``TaskClient``::
43
44 In [5]: taskids = [ tc.run(t) for t in tasklist ]
45
46 This will give the user a list of the TaskIDs used by the controller to keep track of the Tasks and their results. Now at some point the user are going to want to get those results back. The ``barrier`` method allows the user to wait for the Tasks to finish running::
47
48 In [6]: tc.barrier(taskids)
49
50 This command will block until all the Tasks in ``taskids`` have finished. Now, the user probably want to look at the user's results::
51
52 In [7]: task_results = [ tc.get_task_result(taskid) for taskid in taskids ]
53
54 Now the user have a list of ``TaskResult`` objects, which have the actual result as a dictionary, but also keep track of some useful metadata about the ``Task``::
55
56 In [8]: tr = ``Task``_results[73]
57
58 In [9]: tr
59 Out[9]: ``TaskResult``[ID:73]:{'a':73}
60
61 In [10]: tr.engineid
62 Out[10]: 1
63
64 In [11]: tr.submitted, tr.completed, tr.duration
65 Out[11]: ("2008/03/08 03:41:42", "2008/03/08 03:41:44", 2.12345)
66
67 The actual results are stored in a dictionary, ``tr.results``, and a namespace object ``tr.ns`` which accesses the result keys by attribute::
68
69 In [12]: tr.results['a']
70 Out[12]: 73
71
72 In [13]: tr.ns.a
73 Out[13]: 73
74
75 That should cover the basics of running simple Tasks. There are several more powerful things the user can do with Tasks covered later. The most useful probably being using a ``MutiEngineClient`` interface to initialize all the engines with the import dependencies necessary to run the user's Tasks.
76
77 There are many options for running and managing Tasks. The best way to learn further about the ``Task`` interface is to study the examples in ``docs/examples``. If the user do so and learn a lots about this interface, we encourage the user to expand this documentation about the ``Task`` system.
78
79 Overview of the Task System
80 ===========================
81
82 The user's view of the ``Task`` system has three basic objects: The ``TaskClient``, the ``Task``, and the ``TaskResult``. The names of these three objects well indicate their role.
83
84 The ``TaskClient`` is the user's ``Task`` farming connection to the IPython cluster. Unlike the ``MultiEngineClient``, the ``TaskControler`` handles all the scheduling and distribution of work, so the ``TaskClient`` has no notion of engines, it just submits Tasks and requests their results. The Tasks are described as ``Task`` objects, and their results are wrapped in ``TaskResult`` objects. Thus, there are very few necessary methods for the user to manage.
85
86 Inside the task system is a Scheduler object, which assigns tasks to workers. The default scheduler is a simple FIFO queue. Subclassing the Scheduler should be easy, just implementing your own priority system.
87
88 The TaskClient
89 ==============
90
91 The ``TaskClient`` is the object the user use to connect to the ``Controller`` that is managing the user's Tasks. It is the analog of the ``MultiEngineClient`` for the standard IPython multiplexing interface. As with all client interfaces, the first step is to import the IPython Client Module::
92
93 In [1]: from ipython1.kernel import client
94
95 Just as with the ``MultiEngineClient``, the user create the ``TaskClient`` with a tuple, containing the ip-address and port of the ``Controller``. the ``client`` module conveniently has the default address of the ``Task`` interface of the controller. Creating a default ``TaskClient`` object would be done with this::
96
97 In [2]: tc = client.TaskClient(client.default_task_address)
98
99 or, if the user want to specify a non default location of the ``Controller``, the user can specify explicitly::
100
101 In [3]: tc = client.TaskClient(("192.168.1.1", 10113))
102
103 As discussed earlier, the ``TaskClient`` only has a few basic methods.
104
105 * ``tc.run(task)``
106 ``run`` is the method by which the user submits Tasks. It takes exactly one argument, a ``Task`` object. All the advanced control of ``Task`` behavior is handled by properties of the ``Task`` object, rather than the submission command, so they will be discussed later in the `Task`_ section. ``run`` returns an integer, the ``Task``ID by which the ``Task`` and its results can be tracked and retrieved::
107
108 In [4]: ``Task``ID = tc.run(``Task``)
109
110 * ``tc.get_task_result(taskid, block=``False``)``
111 ``get_task_result`` is the method by which results are retrieved. It takes a single integer argument, the ``Task``ID`` of the result the user wish to retrieve. ``get_task_result`` also takes a keyword argument ``block``. ``block`` specifies whether the user actually want to wait for the result. If ``block`` is false, as it is by default, ``get_task_result`` will return immediately. If the ``Task`` has completed, it will return the ``TaskResult`` object for that ``Task``. But if the ``Task`` has not completed, it will return ``None``. If the user specify ``block=``True``, then ``get_task_result`` will wait for the ``Task`` to complete, and always return the ``TaskResult`` for the requested ``Task``.
112 * ``tc.barrier(taskid(s))``
113 ``barrier`` is a synchronization method. It takes exactly one argument, a ``Task``ID or list of taskIDs. ``barrier`` will block until all the specified Tasks have completed. In practice, a barrier is often called between the ``Task`` submission section of the code and the result gathering section::
114
115 In [5]: taskIDs = [ tc.run(``Task``) for ``Task`` in myTasks ]
116
117 In [6]: tc.get_task_result(taskIDs[-1]) is None
118 Out[6]: ``True``
119
120 In [7]: tc.barrier(``Task``ID)
121
122 In [8]: results = [ tc.get_task_result(tid) for tid in taskIDs ]
123
124 * ``tc.queue_status(verbose=``False``)``
125 ``queue_status`` is a method for querying the state of the ``TaskControler``. ``queue_status`` returns a dict of the form::
126
127 {'scheduled': Tasks that have been submitted but yet run
128 'pending' : Tasks that are currently running
129 'succeeded': Tasks that have completed successfully
130 'failed' : Tasks that have finished with a failure
131 }
132
133 if @verbose is not specified (or is ``False``), then the values of the dict are integers - the number of Tasks in each state. if @verbose is ``True``, then each element in the dict is a list of the taskIDs in that state::
134
135 In [8]: tc.queue_status()
136 Out[8]: {'scheduled': 4,
137 'pending' : 2,
138 'succeeded': 5,
139 'failed' : 1
140 }
141
142 In [9]: tc.queue_status(verbose=True)
143 Out[9]: {'scheduled': [8,9,10,11],
144 'pending' : [6,7],
145 'succeeded': [0,1,2,4,5],
146 'failed' : [3]
147 }
148
149 * ``tc.abort(taskid)``
150 ``abort`` allows the user to abort Tasks that have already been submitted. ``abort`` will always return immediately. If the ``Task`` has completed, ``abort`` will raise an ``IndexError ``Task`` Already Completed``. An obvious case for ``abort`` would be where the user submits a long-running ``Task`` with a number of retries (see ``Task``_ section for how to specify retries) in an interactive session, but realizes there has been a typo. The user can then abort the ``Task``, preventing certain failures from cluttering up the queue. It can also be used for parallel search-type problems, where only one ``Task`` will give the solution, so once the user find the solution, the user would want to abort all remaining Tasks to prevent wasted work.
151 * ``tc.spin()``
152 ``spin`` simply triggers the scheduler in the ``TaskControler``. Under most normal circumstances, this will do nothing. The primary known usage case involves the ``Task`` dependency (see `Dependencies`_). The dependency is a function of an Engine's ``properties``, but changing the ``properties`` via the ``MutliEngineClient`` does not trigger a reschedule event. The main example case for this requires the following event sequence:
153 * ``engine`` is available, ``Task`` is submitted, but ``engine`` does not have ``Task``'s dependencies.
154 * ``engine`` gets necessary dependencies while no new Tasks are submitted or completed.
155 * now ``engine`` can run ``Task``, but a ``Task`` event is required for the ``TaskControler`` to try scheduling ``Task`` again.
156
157 ``spin`` is just an empty ping method to ensure that the Controller has scheduled all available Tasks, and should not be needed under most normal circumstances.
158
159 That covers the ``TaskClient``, a simple interface to the cluster. With this, the user can submit jobs (and abort if necessary), request their results, synchronize on arbitrary subsets of jobs.
160
161 .. _task: The Task Object
162
163 The Task Object
164 ===============
165
166 The ``Task`` is the basic object for describing a job. It can be used in a very simple manner, where the user just specifies a command string to be executed as the ``Task``. The usage of this first argument is exactly the same as the ``execute`` method of the ``MultiEngine`` (in fact, ``execute`` is called to run the code)::
167
168 In [1]: t = client.Task("a = str(id)")
169
170 This ``Task`` would run, and store the string representation of the ``id`` element in ``a`` in each worker's namespace, but it is fairly useless because the user does not know anything about the state of the ``worker`` on which it ran at the time of retrieving results. It is important that each ``Task`` not expect the state of the ``worker`` to persist after the ``Task`` is completed.
171 There are many different situations for using ``Task`` Farming, and the ``Task`` object has many attributes for use in customizing the ``Task`` behavior. All of a ``Task``'s attributes may be specified in the constructor, through keyword arguments, or after ``Task`` construction through attribute assignment.
172
173 Data Attributes
174 ***************
175 It is likely that the user may want to move data around before or after executing the ``Task``. We provide methods of sending data to initialize the worker's namespace, and specifying what data to bring back as the ``Task``'s results.
176
177 * pull = []
178 The obvious case is as above, where ``t`` would execute and store the result of ``myfunc`` in ``a``, it is likely that the user would want to bring ``a`` back to their namespace. This is done through the ``pull`` attribute. ``pull`` can be a string or list of strings, and it specifies the names of variables to be retrieved. The ``TaskResult`` object retrieved by ``get_task_result`` will have a dictionary of keys and values, and the ``Task``'s ``pull`` attribute determines what goes into it::
179
180 In [2]: t = client.Task("a = str(id)", pull = "a")
181
182 In [3]: t = client.Task("a = str(id)", pull = ["a", "id"])
183
184 * push = {}
185 A user might also want to initialize some data into the namespace before the code part of the ``Task`` is run. Enter ``push``. ``push`` is a dictionary of key/value pairs to be loaded from the user's namespace into the worker's immediately before execution::
186
187 In [4]: t = client.Task("a = f(submitted)", push=dict(submitted=time.time()), pull="a")
188
189 push and pull result directly in calling an ``engine``'s ``push`` and ``pull`` methods before and after ``Task`` execution respectively, and thus their api is the same.
190
191 Namespace Cleaning
192 ******************
193 When a user is running a large number of Tasks, it is likely that the namespace of the worker's could become cluttered. Some Tasks might be sensitive to clutter, while others might be known to cause namespace pollution. For these reasons, Tasks have two boolean attributes for cleaning up the namespace.
194
195 * ``clear_after``
196 if clear_after is specified ``True``, the worker on which the ``Task`` was run will be reset (via ``engine.reset``) upon completion of the ``Task``. This can be useful for both Tasks that produce clutter or Tasks whose intermediate data one might wish to be kept private::
197
198 In [5]: t = client.Task("a = range(1e10)", pull = "a",clear_after=True)
199
200
201 * ``clear_before``
202 as one might guess, clear_before is identical to ``clear_after``, but it takes place before the ``Task`` is run. This ensures that the ``Task`` runs on a fresh worker::
203
204 In [6]: t = client.Task("a = globals()", pull = "a",clear_before=True)
205
206 Of course, a user can both at the same time, ensuring that all workers are clear except when they are currently running a job. Both of these default to ``False``.
207
208 Fault Tolerance
209 ***************
210 It is possible that Tasks might fail, and there are a variety of reasons this could happen. One might be that the worker it was running on disconnected, and there was nothing wrong with the ``Task`` itself. With the fault tolerance attributes of the ``Task``, the user can specify how many times to resubmit the ``Task``, and what to do if it never succeeds.
211
212 * ``retries``
213 ``retries`` is an integer, specifying the number of times a ``Task`` is to be retried. It defaults to zero. It is often a good idea for this number to be 1 or 2, to protect the ``Task`` from disconnecting engines, but not a large number. If a ``Task`` is failing 100 times, there is probably something wrong with the ``Task``. The canonical bad example:
214
215 In [7]: t = client.Task("os.kill(os.getpid(), 9)", retries=99)
216
217 This would actually take down 100 workers.
218
219 * ``recovery_task``
220 ``recovery_task`` is another ``Task`` object, to be run in the event of the original ``Task`` still failing after running out of retries. Since ``recovery_task`` is another ``Task`` object, it can have its own ``recovery_task``. The chain of Tasks is limitless, except loops are not allowed (that would be bad!).
221
222 Dependencies
223 ************
224 Dependencies are the most powerful part of the ``Task`` farming system, because it allows the user to do some classification of the workers, and guide the ``Task`` distribution without meddling with the controller directly. It makes use of two objects - the ``Task``'s ``depend`` attribute, and the engine's ``properties``. See the `MultiEngine`_ reference for how to use engine properties. The engine properties api exists for extending IPython, allowing conditional execution and new controllers that make decisions based on properties of its engines. Currently the ``Task`` dependency is the only internal use of the properties api.
225
226 .. _MultiEngine: ./parallel_multiengine
227
228 The ``depend`` attribute of a ``Task`` must be a function of exactly one argument, the worker's properties dictionary, and it should return ``True`` if the ``Task`` should be allowed to run on the worker and ``False`` if not. The usage in the controller is fault tolerant, so exceptions raised by ``Task.depend`` will be ignored and functionally equivalent to always returning ``False``. Tasks`` with invalid ``depend`` functions will never be assigned to a worker::
229
230 In [8]: def dep(properties):
231 ... return properties["RAM"] > 2**32 # have at least 4GB
232 In [9]: t = client.Task("a = bigfunc()", depend=dep)
233
234 It is important to note that assignment of values to the properties dict is done entirely by the user, either locally (in the engine) using the EngineAPI, or remotely, through the ``MultiEngineClient``'s get/set_properties methods.
235
236
237
238
239
240
@@ -1,7 +1,7 b''
1 1 # -*- coding: utf-8 -*-
2 2 #
3 3 # IPython documentation build configuration file, created by
4 # sphinx-quickstart.py on Sat Mar 29 15:36:13 2008.
4 # sphinx-quickstart on Thu May 8 16:45:02 2008.
5 5 #
6 6 # This file is execfile()d with the current directory set to its containing dir.
7 7 #
@@ -11,38 +11,40 b''
11 11 # All configuration values have a default value; values that are commented out
12 12 # serve to show the default value.
13 13
14 import sys
14 import sys, os
15 15
16 # If your extensions are in another directory, add it here.
17 #sys.path.append('some/directory')
16 # If your extensions are in another directory, add it here. If the directory
17 # is relative to the documentation root, use os.path.abspath to make it
18 # absolute, like shown here.
19 #sys.path.append(os.path.abspath('some/directory'))
18 20
19 21 # General configuration
20 22 # ---------------------
21 23
22 24 # Add any Sphinx extension module names here, as strings. They can be extensions
23 # coming with Sphinx (named 'sphinx.addons.*') or your custom ones.
25 # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
24 26 #extensions = []
25 27
26 28 # Add any paths that contain templates here, relative to this directory.
27 29 templates_path = ['_templates']
28 30
29 31 # The suffix of source filenames.
30 source_suffix = '.rst'
32 source_suffix = '.txt'
31 33
32 34 # The master toctree document.
33 master_doc = 'ipython'
35 master_doc = 'index'
34 36
35 37 # General substitutions.
36 38 project = 'IPython'
37 copyright = '2008, Fernando Perez'
39 copyright = '2008, The IPython Development Team'
38 40
39 41 # The default replacements for |version| and |release|, also used in various
40 42 # other places throughout the built documents.
41 43 #
42 44 # The short X.Y version.
43 version = '0.8.3'
45 version = '0.8.4'
44 46 # The full version, including alpha/beta/rc tags.
45 release = '0.8.3'
47 release = '0.8.4'
46 48
47 49 # There are two options for replacing |today|: either, you set today to some
48 50 # non-false value, then it is used:
@@ -53,6 +55,10 b" today_fmt = '%B %d, %Y'"
53 55 # List of documents that shouldn't be included in the build.
54 56 #unused_docs = []
55 57
58 # List of directories, relative to source directories, that shouldn't be searched
59 # for source files.
60 #exclude_dirs = []
61
56 62 # If true, '()' will be appended to :func: etc. cross-reference text.
57 63 #add_function_parentheses = True
58 64
@@ -76,6 +82,14 b" pygments_style = 'sphinx'"
76 82 # given in html_static_path.
77 83 html_style = 'default.css'
78 84
85 # The name for this set of Sphinx documents. If None, it defaults to
86 # "<project> v<release> documentation".
87 #html_title = None
88
89 # The name of an image file (within the static path) to place at the top of
90 # the sidebar.
91 #html_logo = None
92
79 93 # Add any paths that contain custom static files (such as style sheets) here,
80 94 # relative to this directory. They are copied after the builtin static files,
81 95 # so a file named "default.css" will overwrite the builtin "default.css".
@@ -89,9 +103,6 b" html_last_updated_fmt = '%b %d, %Y'"
89 103 # typographically correct entities.
90 104 #html_use_smartypants = True
91 105
92 # Content template for the index page.
93 #html_index = ''
94
95 106 # Custom sidebar templates, maps document names to template names.
96 107 #html_sidebars = {}
97 108
@@ -105,6 +116,14 b" html_last_updated_fmt = '%b %d, %Y'"
105 116 # If true, the reST sources are included in the HTML build as _sources/<name>.
106 117 #html_copy_source = True
107 118
119 # If true, an OpenSearch description file will be output, and all pages will
120 # contain a <link> tag referring to it. The value of this option must be the
121 # base URL from which the finished HTML is served.
122 #html_use_opensearch = ''
123
124 # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml").
125 #html_file_suffix = ''
126
108 127 # Output file base name for HTML help builder.
109 128 htmlhelp_basename = 'IPythondoc'
110 129
@@ -113,14 +132,24 b" htmlhelp_basename = 'IPythondoc'"
113 132 # ------------------------
114 133
115 134 # The paper size ('letter' or 'a4').
116 latex_paper_size = 'a4'
135 latex_paper_size = 'letter'
117 136
118 137 # The font size ('10pt', '11pt' or '12pt').
119 138 latex_font_size = '10pt'
120 139
121 140 # Grouping the document tree into LaTeX files. List of tuples
122 141 # (source start file, target name, title, author, document class [howto/manual]).
123 latex_documents = [('ipython','ipython.tex','IPython Documentation','Fernando Perez (and contributors)','manual')]
142 latex_documents = [
143 ('index', 'IPython.tex', 'IPython Documentation', 'The IPython Development Team', 'manual'),
144 ]
145
146 # The name of an image file (relative to this directory) to place at the top of
147 # the title page.
148 #latex_logo = None
149
150 # For "manual" documents, if this is true, then toplevel headings are parts,
151 # not chapters.
152 #latex_use_parts = False
124 153
125 154 # Additional stuff for the LaTeX preamble.
126 155 #latex_preamble = ''
1 NO CONTENT: file renamed from docs/source/core/ipython.txt to docs/source/interactive/ipython.txt
1 NO CONTENT: file was removed
General Comments 0
You need to be logged in to leave comments. Login now