upstream/ipython Commit - r1338:72652d65

Fixed most of the examples. A few still don't work, but this is a start.

Brian E Granger -

r1338:72652d65

parent child

docs/examples/kernel/asyncmultiengine1.py

0 +1 -1

             #!/usr/bin/env python
             # encoding: utf-8
             # A super simple example showing how to use all of this in a fully
             # asynchronous manner.  The TaskClient also works in this mode.
             from twisted.internet import reactor, defer
-            from ipython1.kernel import asyncclient
+            from IPython.kernel import asyncclient
             def printer(r):
                 print r
                 return r
             def submit(client):
                 d = client.push(dict(a=5, b='asdf', c=[1,2,3]),targets=0,block=True)
                 d.addCallback(lambda _: client.pull(('a','b','c'),targets=0,block=True))
                 d.addBoth(printer)
                 d.addCallback(lambda _: reactor.stop())
             d = asyncclient.get_multiengine_client()
             d.addCallback(submit)
             reactor.run()

docs/examples/kernel/asynctask1.py

0 +5 -4

             #!/usr/bin/env python
             # encoding: utf-8
             # This example shows how the AsynTaskClient can be used
+            # This example is currently broken
             from twisted.internet import reactor, defer
-            from ipython1.kernel import asynclient
+            from IPython.kernel import asyncclient
-            mec = asynclient.AsynMultiEngineClient(('localhost', 10105))
+            mec = asyncclient.AsyncMultiEngineClient(('localhost', 10105))
-            tc = asynclient.AsynTaskClient(('localhost',10113))
+            tc = asyncclient.AsyncTaskClient(('localhost',10113))
             cmd1 = """\
             a = 5
             b = 10*d
             c = a*b*d
             """
-            t1 = asynclient.Task(cmd1, clear_before=False, clear_after=True, pull=['a','b','c'])
+            t1 = asyncclient.Task(cmd1, clear_before=False, clear_after=True, pull=['a','b','c'])
             d = mec.push(dict(d=30))
             def raise_and_print(tr):
                 tr.raiseException()
                 print "a, b: ", tr.ns.a, tr.ns.b
                 return tr
             d.addCallback(lambda _: tc.run(t1))
             d.addCallback(lambda tid: tc.get_task_result(tid,block=True))
             d.addCallback(raise_and_print)
             d.addCallback(lambda _: reactor.stop())
             reactor.run()

docs/examples/kernel/fetchparse.py

0 +1 -1

             """
             An exceptionally lousy site spider
             Ken Kinder <ken@kenkinder.com>
             This module gives an example of how the TaskClient interface to the
             IPython controller works.  Before running this script start the IPython controller
             and some engines using something like::
                 ipcluster -n 4
             """
             from twisted.python.failure import Failure
-            from ipython1.kernel import client
+            from IPython.kernel import client
             import time
             fetchParse = """
             from twisted.web import microdom
             import urllib2
             import urlparse
             def fetchAndParse(url, data=None):
                 links = []
                 try:
                     page = urllib2.urlopen(url, data=data)
                 except Exception:
                     return links
                 else:
                     if page.headers.type == 'text/html':
                         doc = microdom.parseString(page.read(), beExtremelyLenient=True)
                         for node in doc.getElementsByTagName('a'):
                             if node.getAttribute('href'):
                                 links.append(urlparse.urljoin(url, node.getAttribute('href')))
                     return links
             """
             class DistributedSpider(object):
                 # Time to wait between polling for task results.
                 pollingDelay = 0.5
                 def __init__(self, site):
                     self.tc = client.TaskClient()
                     self.rc = client.MultiEngineClient()
                     self.rc.execute(fetchParse)
                     self.allLinks = []
                     self.linksWorking = {}
                     self.linksDone = {}
                     self.site = site
                 def visitLink(self, url):
                     if url not in self.allLinks:
                         self.allLinks.append(url)
                         if url.startswith(self.site):
                             print '    ', url
                             self.linksWorking[url] = self.tc.run(client.Task('links = fetchAndParse(url)', pull=['links'], push={'url': url}))
                 def onVisitDone(self, result, url):
                     print url, ':'
                     self.linksDone[url] = None
                     del self.linksWorking[url]
                     if isinstance(result.failure, Failure):
                         txt = result.failure.getTraceback()
                         for line in txt.split('\n'):
                             print '    ', line
                     else:
                         for link in result.ns.links:
                             self.visitLink(link)
                 def run(self):
                     self.visitLink(self.site)
                     while self.linksWorking:
                         print len(self.linksWorking), 'pending...'
                         self.synchronize()
                         time.sleep(self.pollingDelay)
                 def synchronize(self):
                     for url, taskId in self.linksWorking.items():
                         # Calling get_task_result with block=False will return None if the
                         # task is not done yet.  This provides a simple way of polling.
                         result = self.tc.get_task_result(taskId, block=False)
                         if result is not None:
                             self.onVisitDone(result, url)
             def main():
                 distributedSpider = DistributedSpider(raw_input('Enter site to crawl: '))
                 distributedSpider.run()
             if __name__ == '__main__':
                 main()

docs/examples/kernel/helloworld.py

0 +1 -1

             """
             A Distributed Hello world
             Ken Kinder <ken@kenkinder.com>
             """
-            from ipython1.kernel import client
+            from IPython.kernel import client
             tc = client.TaskClient()
             mec = client.MultiEngineClient()
             mec.execute('import time')
             hello_taskid = tc.run(client.Task('time.sleep(3) ; word = "Hello,"', pull=('word')))
             world_taskid = tc.run(client.Task('time.sleep(3) ; word = "World!"', pull=('word')))
             print "Submitted tasks:", hello_taskid, world_taskid
             print tc.get_task_result(hello_taskid,block=True).ns.word, tc.get_task_result(world_taskid,block=True).ns.word

docs/examples/kernel/mcdriver.py

0 +1 -1

             #!/usr/bin/env python
             # encoding: utf-8
             """Run a Monte-Carlo options pricer in parallel."""
-            from ipython1.kernel import client
+            from IPython.kernel import client
             import numpy as N
             from mcpricer import MCOptionPricer
             tc = client.TaskClient()
             rc = client.MultiEngineClient()
             # Initialize the common code on the engines
             rc.run('mcpricer.py')
             # Push the variables that won't change (stock print, interest rate, days and MC paths)
             rc.push(dict(S=100.0, r=0.05, days=260, paths=10000))
             task_string = """\
             op = MCOptionPricer(S,K,sigma,r,days,paths)
             op.run()
             vp, ap, vc, ac = op.vanilla_put, op.asian_put, op.vanilla_call, op.asian_call
             """
             # Create arrays of strike prices and volatilities
             K_vals = N.arange(90.0,110.0,2.0)
             sigma_vals = N.arange(0.02, 0.3, 0.02)
             # Submit tasks
             taskids = []
             for K in K_vals:
                 for sigma in sigma_vals:
                     t = client.Task(task_string,
                         push=dict(sigma=sigma,K=K),
                         pull=('vp','ap','vc','ac','sigma','K'))
                     taskids.append(tc.run(t))
             print "Submitted tasks: ", taskids
             # Block until tasks are completed
             tc.barrier(taskids)
             # Get the results
             results = [tc.get_task_result(tid) for tid in taskids]
             # Assemble the result
             vc = N.empty(K_vals.shape[0]*sigma_vals.shape[0],dtype='float64')
             vp = N.empty(K_vals.shape[0]*sigma_vals.shape[0],dtype='float64')
             ac = N.empty(K_vals.shape[0]*sigma_vals.shape[0],dtype='float64')
             ap = N.empty(K_vals.shape[0]*sigma_vals.shape[0],dtype='float64')
             for i, tr in enumerate(results):
                 ns = tr.ns
                 vc[i] = ns.vc
                 vp[i] = ns.vp
                 ac[i] = ns.ac
                 ap[i] = ns.ap
             vc.shape = (K_vals.shape[0],sigma_vals.shape[0])
             vp.shape = (K_vals.shape[0],sigma_vals.shape[0])
             ac.shape = (K_vals.shape[0],sigma_vals.shape[0])
             ap.shape = (K_vals.shape[0],sigma_vals.shape[0])
             def plot_options(K_vals, sigma_vals, prices):
                 """Make a contour plot of the option prices."""
                 import pylab
                 pylab.contourf(sigma_vals, K_vals, prices)
                 pylab.colorbar()
                 pylab.title("Option Price")
                 pylab.xlabel("Volatility")
                 pylab.ylabel("Strike Price")

docs/examples/kernel/multiengine1.ipy

0 +3 -3

             #-------------------------------------------------------------------------------
             # Imports
             #-------------------------------------------------------------------------------
             import time
             import numpy
-            import ipython1.kernel.magic
+            import IPython.kernel.magic
-            from ipython1.kernel import client
+            from IPython.kernel import client
-            from ipython1.kernel.error import *
+            from IPython.kernel.error import *
             mec = client.MultiEngineClient()
             #-------------------------------------------------------------------------------
             # Setup
             #-------------------------------------------------------------------------------
             mec.reset()
             mec.activate()
             mec.block = True
             mec.get_ids()
             n = len(mec)
             assert n >= 4, "Not Enough Engines: %i, 4 needed for this script"%n
             values = [
 ,
 .0,
                 range(100),
                 ('asdf', 1000),
                 {'a': 10, 'b': 20}
                 ]
             keys = ['a','b','c','d','e']
             sequences = [
                 range(100),
                 numpy.arange(100)
             ]
             #-------------------------------------------------------------------------------
             # Blocking execution
             #-------------------------------------------------------------------------------
             # Execute
             mec.execute('import math')
             mec.execute('a = 2.0*math.pi')
             mec.execute('print a')
             for id in mec.get_ids():
                 mec.execute('b=%d' % id, targets=id)
             mec.execute('print b')
             try:
                 mec.execute('b = 10',targets=-1)
             except InvalidEngineID:
                 print "Caught invalid engine ID OK."
             try:
                 mec.execute('a=5; 1/0')
             except CompositeError:
                 print "Caught 1/0 correctly."
             %px print a, b
             try:
                 %px 1/0
             except CompositeError:
                 print "Caught 1/0 correctly."
             %autopx
             import numpy
             a = numpy.random.rand(4,4)
             a = a+a.transpose()
             print numpy.linalg.eigvals(a)
             %autopx
             mec.targets = [0,2]
             %px a = 5
             mec.targets = [1,3]
             %px a = 10
             mec.targets = 'all'
             %px print a
             # Push/Pull
             mec.push(dict(a=10, b=30, c={'f':range(10)}))
             mec.pull(('a', 'b'))
             mec.zip_pull(('a', 'b'))
             for id in mec.get_ids():
                 mec.push(dict(a=id), targets=id)
             for id in mec.get_ids():
                 mec.pull('a', targets=id)
             mec.pull('a')
             mec['a'] = 100
             mec['a']
             # get_result/reset/keys
             mec.get_result()
             %result
             mec.keys()
             mec.reset()
             mec.keys()
             try:
                 %result
             except CompositeError:
                 print "Caught IndexError ok."
             %px a = 5
             mec.get_result(1)
             mec.keys()
             # Queue management methods
             %px import time
             prs = [mec.execute('time.sleep(2.0)',block=False) for x in range(5)]
             mec.queue_status()
             time.sleep(3.0)
             mec.clear_queue()
             mec.queue_status()
             time.sleep(2.0)
             mec.queue_status()
             mec.barrier(prs)
             for pr in prs:
                 try:
                     pr.r
                 except CompositeError:
                     print "Caught QueueCleared OK."
             # scatter/gather
             mec.scatter('a', range(10))
             mec.gather('a')
             mec.scatter('b', numpy.arange(10))
             mec.gather('b')
             #-------------------------------------------------------------------------------
             # Non-Blocking execution
             #-------------------------------------------------------------------------------
             mec.block = False
             # execute
             pr1 = mec.execute('a=5')
             pr2 = mec.execute('import sets')
             mec.barrier((pr1, pr2))
             pr1 = mec.execute('1/0')
             pr2 = mec.execute('c = sets.Set()')
             mec.barrier((pr1, pr2))
             try:
                 pr1.r
             except CompositeError:
                 print "Caught ZeroDivisionError OK."
             pr = mec.execute("print 'hi'")
             pr.r
             pr = mec.execute('1/0')
             try:
                 pr.r
             except CompositeError:
                 print "Caught ZeroDivisionError OK."
             # Make sure we can reraise it!
             try:
                 pr.r
             except CompositeError:
                 print "Caught ZeroDivisionError OK."
             # push/pull
             pr1 = mec.push(dict(a=10))
             pr1.get_result()
             pr2 = mec.pull('a')
             pr2.r
             # flush
             mec.flush()
             pd1 = mec.execute('a=30')
             pd2 = mec.pull('a')
             mec.flush()
             try:
                 pd1.get_result()
             except InvalidDeferredID:
                 print "PendingResult object was cleared OK."
             try:
                 pd2.get_result()
             except InvalidDeferredID:
                 print "PendingResult object was cleared OK."
             # This is a command to make sure the end of the file is happy.
             print "The tests are done!"

docs/examples/kernel/multiengine2.ipy

0 +3 -3

             #-------------------------------------------------------------------------------
             # Imports
             #-------------------------------------------------------------------------------
             import time
             import numpy
-            import ipython1.kernel.magic
+            import IPython.kernel.magic
-            from ipython1.kernel import client
+            from IPython.kernel import client
-            from ipython1.kernel.error import *
+            from IPython.kernel.error import *
             mec = client.MultiEngineClient()
             #-------------------------------------------------------------------------------
             # Setup
             #-------------------------------------------------------------------------------
             mec.reset()
             # print mec.keys()
             mec.activate()
             # mec.block=True
             mec.get_ids()
             n = len(mec)
             assert n >= 4, "Not Enough Engines: %i, 4 needed for this script"%n
             mec.block=False
             pr1 = mec.execute('import time')
             pr2 = mec.execute('time.sleep(5)')
             pr3 = mec.push(dict(a=10,b=30,c=range(20000),d='The dog went swimming.'))
             pr4 = mec.pull(('a','b','d'))
             print "Try a non-blocking get_result"
             assert pr4.get_result(block=False, default='not done')=='not done'
             print "Now wait for all the results"
             mec.barrier((pr1,pr2,pr3,pr4))
             print "The last pull got:", pr4.r

docs/examples/kernel/nwmerge.py

0 +2 -2

             """Example showing how to merge multiple remote data streams.
             """
             # Slightly modified version of:
             # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/511509
             import heapq
-            from ipython1.kernel.error import CompositeError
+            from IPython.kernel.error import CompositeError
             def mergesort(list_of_lists, key=None):
                 """ Perform an N-way merge operation on sorted lists.
                 @param list_of_lists: (really iterable of iterable) of sorted elements
                 (either by naturally or by C{key})
                 @param key: specify sort key function (like C{sort()}, C{sorted()})
                 Yields tuples of the form C{(item, iterator)}, where the iterator is the
                 built-in list iterator or something you pass in, if you pre-generate the
                 iterators.
                 This is a stable merge; complexity O(N lg N)
                 Examples::
                 >>> print list(mergesort([[1,2,3,4],
                 ...                      [2,3.25,3.75,4.5,6,7],
                 ...                      [2.625,3.625,6.625,9]]))
                 [1, 2, 2, 2.625, 3, 3.25, 3.625, 3.75, 4, 4.5, 6, 6.625, 7, 9]
                 # note stability
                 >>> print list(mergesort([[1,2,3,4],
                 ...                      [2,3.25,3.75,4.5,6,7],
                 ...                      [2.625,3.625,6.625,9]],
                 ...                      key=int))
                 [1, 2, 2, 2.625, 3, 3.25, 3.75, 3.625, 4, 4.5, 6, 6.625, 7, 9]
                 >>> print list(mergesort([[4, 3, 2, 1],
                 ...                      [7, 6, 4.5, 3.75, 3.25, 2],
                 ...                      [9, 6.625, 3.625, 2.625]],
                 ...                      key=lambda x: -x))
                 [9, 7, 6.625, 6, 4.5, 4, 3.75, 3.625, 3.25, 3, 2.625, 2, 2, 1]
                 """
                 heap = []
                 for i, itr in enumerate(iter(pl) for pl in list_of_lists):
                     try:
                         item = itr.next()
                         toadd = (key(item), i, item, itr) if key else (item, i, itr)
                         heap.append(toadd)
                     except StopIteration:
                         pass
                 heapq.heapify(heap)
                 if key:
                     while heap:
                         _, idx, item, itr = heap[0]
                         yield item
                         try:
                             item = itr.next()
                             heapq.heapreplace(heap, (key(item), idx, item, itr) )
                         except StopIteration:
                             heapq.heappop(heap)
                 else:
                     while heap:
                         item, idx, itr = heap[0]
                         yield item
                         try:
                             heapq.heapreplace(heap, (itr.next(), idx, itr))
                         except StopIteration:
                             heapq.heappop(heap)
             def remote_iterator(rc,engine,name):
                 """Return an iterator on an object living on a remote engine.
                 """
                 # Check that the object exists on the engine and pin a reference to it
                 iter_name = '_%s_rmt_iter_' % name
                 rc.execute('%s = iter(%s)' % (iter_name,name), targets=engine)
                 tpl = '_tmp = %s.next()' % iter_name
                 while True:
                     try:
                         rc.execute(tpl, targets=engine)
                         result = rc.pull('_tmp', targets=engine)[0]
                     # This causes the StopIteration exception to be raised.
                     except CompositeError, e:
                         e.raise_exception()
                     else:
                         yield result
             # Main, interactive testing
             if __name__ == '__main__':
-                from ipython1.kernel import client
+                from IPython.kernel import client
                 ipc = client.MultiEngineClient()
                 print 'Engine IDs:',ipc.get_ids()
                 # Make a set of 'sorted datasets'
                 a0 = range(5,20)
                 a1 = range(10)
                 a2 = range(15,25)
                 # Now, imagine these had been created in the remote engines by some long
                 # computation.  In this simple example, we just send them over into the
                 # remote engines.  They will all be called 'a' in each engine.
                 ipc.push(dict(a=a0), targets=0)
                 ipc.push(dict(a=a1), targets=1)
                 ipc.push(dict(a=a2), targets=2)
                 # And we now make a local object which represents the remote iterator
                 aa0 = remote_iterator(ipc,0,'a')
                 aa1 = remote_iterator(ipc,1,'a')
                 aa2 = remote_iterator(ipc,2,'a')
                 # Let's merge them, both locally and remotely:
                 print 'Merge the local datasets:'
                 print list(mergesort([a0,a1,a2]))
                 print 'Locally merge the remote sets:'
                 print list(mergesort([aa0,aa1,aa2]))

docs/examples/kernel/parallel_pylab.ipy

0 +1 -1

             """Example of how to use pylab to plot parallel data.
             The idea here is to run matplotlib is the same IPython session
             as an ipython RemoteController client.  That way matplotlib
             can be used to plot parallel data that is gathered using
             RemoteController.
             To run this example, first start the IPython controller and 4
             engines::
                 ipcluster -n 4
             Then start ipython in pylab mode::
                 ipython -pylab
             Then a simple "run parallel_pylab.ipy" in IPython will run the
             example.
             """
             import numpy as N
             from pylab import *
-            from ipython1.kernel import client
+            from IPython.kernel import client
             # Get an IPython1 client
             rc = client.MultiEngineClient()
             rc.get_ids()
             rc.activate()
             # Create random arrays on the engines
             # This is to simulate arrays that you have calculated in parallel
             # on the engines.
             # Anymore that length 10000 arrays, matplotlib starts to be slow
             %px import numpy as N
             %px x = N.random.standard_normal(10000)
             %px y = N.random.standard_normal(10000)
             %px print x[0:10]
             %px print y[0:10]
             # Bring back the data
             x_local = rc.gather('x')
             y_local = rc.gather('y')
             # Make a scatter plot of the gathered data
             plot(x_local, y_local,'ro')

docs/examples/kernel/plotting_frontend.py

0 +1 -1

             """An example of how to use IPython1 for plotting remote parallel data
             The two files plotting_frontend.ipy and plotting_backend.py go together.
             To run this example, first start the IPython controller and 4
             engines::
                 ipcluster -n 4
             Then start ipython in pylab mode::
                 ipython -pylab
             Then a simple "run plotting_frontend.ipy" in IPython will run the
             example.  When this is done, all the variables (such as number, downx, etc.)
             are available in IPython, so for example you can make additional plots.
             """
             import numpy as N
             from pylab import *
-            from ipython1.kernel import client
+            from IPython.kernel import client
             # Get an IPython1 client
             rc = client.MultiEngineClient()
             rc.get_ids()
             # Run the simulation on all the engines
             rc.run('plotting_backend.py')
             # Bring back the data
             number = rc.pull('number')
             d_number = rc.pull('d_number')
             downx = rc.gather('downx')
             downy = rc.gather('downy')
             downpx = rc.gather('downpx')
             downpy = rc.gather('downpy')
             print "number: ", sum(number)
             print "downsampled number: ", sum(d_number)
             # Make a scatter plot of the gathered data
             # These calls to matplotlib could be replaced by calls to pygist or
             # another plotting package.
             figure(1)
             scatter(downx, downy)
             xlabel('x')
             ylabel('y')
             figure(2)
             scatter(downpx, downpy)
             xlabel('px')
             ylabel('py')
             show()

docs/examples/kernel/pwordfreq.py

0 +1 -1

             #!/usr/bin/env python
             """Parallel word frequency counter."""
             from itertools import repeat
             from wordfreq import print_wordfreq, wordfreq
             def pwordfreq(rc, text):
                 """Parallel word frequency counter.
                 rc - An IPython RemoteController
                 text - The name of a string on the engines to do the freq count on.
                 """
                 rc.execute('freqs = wordfreq(%s)' %text)
                 freqs_list = rc.pull('freqs')
                 word_set = set()
                 for f in freqs_list:
                     word_set.update(f.keys())
                 freqs = dict(zip(word_set, repeat(0)))
                 for f in freqs_list:
                     for word, count in f.iteritems():
                         freqs[word] += count
                 return freqs
             if __name__ == '__main__':
                 # Create a MultiEngineClient
-                from ipython1.kernel import client
+                from IPython.kernel import client
                 ipc = client.MultiEngineClient()
                 # Run the wordfreq script on the engines.
                 ipc.run('wordfreq.py')
                 # Run the serial version
                 print "Serial word frequency count:"
                 text = open('davinci.txt').read()
                 freqs = wordfreq(text)
                 print_wordfreq(freqs, 10)
                 # The parallel version
                 print "\nParallel word frequency count:"
                 files = ['davinci%i.txt' % i for i in range(4)]
                 ipc.scatter('textfile', files)
                 ipc.execute('text = open(textfile[0]).read()')
                 pfreqs = pwordfreq(ipc,'text')
                 print_wordfreq(freqs)

docs/examples/kernel/pwordfreq_skel.py

0 +1 -1

             #!/usr/bin/env python
             """Parallel word frequency counter."""
             from itertools import repeat
             from wordfreq import print_wordfreq, wordfreq
             def pwordfreq(rc, text):
                 """Parallel word frequency counter.
                 rc - An IPython RemoteController
                 text - The name of a string on the engines to do the freq count on.
                 """
             if __name__ == '__main__':
                 # Create a MultiEngineClient
-                from ipython1.kernel import client
+                from IPython.kernel import client
                 ipc = client.MultiEngineClient()
                 # Run the wordfreq script on the engines.
                 ipc.run('wordfreq.py')
                 # Run the serial version
                 print "Serial word frequency count:"
                 text = open('davinci.txt').read()
                 freqs = wordfreq(text)
                 print_wordfreq(freqs, 10)
                 # The parallel version
                 print "\nParallel word frequency count:"
                 files = ['davinci%i.txt' % i for i in range(4)]
                 ipc.scatter('textfile', files)
                 ipc.execute('text = open(textfile[0]).read()')
                 pfreqs = pwordfreq(ipc,'text')
                 print_wordfreq(freqs)

docs/examples/kernel/rmt.ipy

0 +1 -1

             #-------------------------------------------------------------------------------
             # Driver code that the client runs.
             #-------------------------------------------------------------------------------
             # To run this code start a controller and engines using:
             # ipcluster -n 2
             # Then run the scripts by doing irunner rmt.ipy or by starting ipython and
             # doing run rmt.ipy.
             from rmtkernel import *
-            from ipython1.kernel import client
+            from IPython.kernel import client
             def wignerDistribution(s):
                 """Returns (s, rho(s)) for the Wigner GOE distribution."""
                 return (numpy.pi*s/2.0) * numpy.exp(-numpy.pi*s**2/4.)
             def generateWignerData():
                 s = numpy.linspace(0.0,4.0,400)
                 rhos = wignerDistribution(s)
                 return s, rhos
             def serialDiffs(num, N):
                 diffs = ensembleDiffs(num, N)
                 normalizedDiffs = normalizeDiffs(diffs)
                 return normalizedDiffs
             def parallelDiffs(rc, num, N):
                 nengines = len(rc.get_ids())
                 num_per_engine = num/nengines
                 print "Running with", num_per_engine, "per engine."
                 rc.push(dict(num_per_engine=num_per_engine, N=N))
                 rc.execute('diffs = ensembleDiffs(num_per_engine, N)')
             	# gather blocks always for now
                 pr = rc.gather('diffs')
                 return pr.r
             # Main code
             if __name__ == '__main__':
                 rc = client.MultiEngineClient()
                 print "Distributing code to engines..."
                 r = rc.run('rmtkernel.py')
                 rc.block = False
                 # Simulation parameters
                 nmats = 100
                 matsize = 30
                 %timeit -n1 -r1 serialDiffs(nmats,matsize)
                 %timeit -n1 -r1 parallelDiffs(rc, nmats, matsize)
                 # Uncomment these to plot the histogram
                 import pylab
                 pylab.hist(parallelDiffs(rc,matsize,matsize))

docs/examples/kernel/task1.py

0 +1 -1

-            from ipython1.kernel import client
+            from IPython.kernel import client
             tc = client.TaskClient()
             rc = client.MultiEngineClient()
             rc.push(dict(d=30))
             cmd1 = """\
             a = 5
             b = 10*d
             c = a*b*d
             """
             t1 = client.Task(cmd1, clear_before=False, clear_after=True, pull=['a','b','c'])
             tid1 = tc.run(t1)
             tr1 = tc.get_task_result(tid1,block=True)
             tr1.raiseException()
             print "a, b: ", tr1.ns.a, tr1.ns.b

docs/examples/kernel/task2.py

0 +1 -1

             #!/usr/bin/env python
             # encoding: utf-8
-            from ipython1.kernel import client
+            from IPython.kernel import client
             import time
             tc = client.TaskClient()
             mec = client.MultiEngineClient()
             mec.execute('import time')
             for i in range(24):
                 tc.irun('time.sleep(1)')
             for i in range(6):
                 time.sleep(1.0)
                 print "Queue status (vebose=False)"
                 print tc.queue_status()
             for i in range(24):
                 tc.irun('time.sleep(1)')
             for i in range(6):
                 time.sleep(1.0)
                 print "Queue status (vebose=True)"
                 print tc.queue_status(True)
             for i in range(12):
                 tc.irun('time.sleep(2)')
             print "Queue status (vebose=True)"
             print tc.queue_status(True)
             qs = tc.queue_status(True)
             sched = qs['scheduled']
             for tid in sched[-4:]:
                 tc.abort(tid)
             for i in range(6):
                 time.sleep(1.0)
                 print "Queue status (vebose=True)"
                 print tc.queue_status(True)

docs/examples/kernel/task_profiler.py

0 +1 -1

             #!/usr/bin/env python
             """Test the performance of the task farming system.
             This script submits a set of tasks to the TaskClient.  The tasks
             are basically just a time.sleep(t), where t is a random number between
             two limits that can be configured at the command line.  To run
             the script there must first be an IPython controller and engines running::
                 ipcluster -n 16
             A good test to run with 16 engines is::
                 python task_profiler.py -n 128 -t 0.01 -T 1.0
             This should show a speedup of 13-14x.  The limitation here is that the
             overhead of a single task is about 0.001-0.01 seconds.
             """
             import random, sys
             from optparse import OptionParser
             from IPython.genutils import time
-            from ipython1.kernel import client
+            from IPython.kernel import client
             def main():
                 parser = OptionParser()
                 parser.set_defaults(n=100)
                 parser.set_defaults(tmin=1)
                 parser.set_defaults(tmax=60)
                 parser.set_defaults(controller='localhost')
                 parser.set_defaults(meport=10105)
                 parser.set_defaults(tport=10113)
                 parser.add_option("-n", type='int', dest='n',
                     help='the number of tasks to run')
                 parser.add_option("-t", type='float', dest='tmin',
                     help='the minimum task length in seconds')
                 parser.add_option("-T", type='float', dest='tmax',
                     help='the maximum task length in seconds')
                 parser.add_option("-c", type='string', dest='controller',
                     help='the address of the controller')
                 parser.add_option("-p", type='int', dest='meport',
                     help="the port on which the controller listens for the MultiEngine/RemoteController client")
                 parser.add_option("-P", type='int', dest='tport',
                     help="the port on which the controller listens for the TaskClient client")
                 (opts, args) = parser.parse_args()
                 assert opts.tmax >= opts.tmin, "tmax must not be smaller than tmin"
                 rc = client.MultiEngineClient()
                 tc = client.TaskClient()
                 print tc.task_controller
                 rc.block=True
                 nengines = len(rc.get_ids())
                 rc.execute('from IPython.genutils import time')
                 # the jobs should take a random time within a range
                 times = [random.random()*(opts.tmax-opts.tmin)+opts.tmin for i in range(opts.n)]
                 tasks = [client.Task("time.sleep(%f)"%t) for t in times]
                 stime = sum(times)
                 print "executing %i tasks, totalling %.1f secs on %i engines"%(opts.n, stime, nengines)
                 time.sleep(1)
                 start = time.time()
                 taskids = [tc.run(t) for t in tasks]
                 tc.barrier(taskids)
                 stop = time.time()
                 ptime = stop-start
                 scale = stime/ptime
                 print "executed %.1f secs in %.1f secs"%(stime, ptime)
                 print "%.3fx parallel performance on %i engines"%(scale, nengines)
                 print "%.1f%% of theoretical max"%(100*scale/nengines)
             if __name__ == '__main__':
                 main()

docs/source/config/new_config.txt

0 +1 -1

             ========================
             New configuration system
             ========================
             IPython has a configuration system. When running IPython for the first time,
             reasonable defaults are used for the configuration. The configuration of IPython
             can be changed in two ways:
             	* Configuration files
             	* Commands line options (which override the configuration files)
             IPython has a separate configuration file for each subpackage. Thus, the main
             configuration files are (in your ``~/.ipython`` directory):
              	* ``ipython1.core.ini``
             	* ``ipython1.kernel.ini``
             	* ``ipython1.notebook.ini``
             To create these files for the first time, do the following::
-            	from ipython1.kernel.config import config_manager as kernel_config
+            	from IPython.kernel.config import config_manager as kernel_config
             	kernel_config.write_default_config_file()
             But, you should only need to do this if you need to modify the defaults. If needed
             repeat this process with the ``notebook`` and ``core`` configuration as well. If you
             are running into problems with IPython, you might try deleting these configuration
             files.

docs/source/parallel/parallel_intro.txt

0 +1 -1

             .. _ip1par:
             ======================================
             Using IPython for parallel computing
             ======================================
             .. contents::
             Introduction
             ============
             This file gives an overview of IPython. IPython has a sophisticated and
             powerful architecture for parallel and distributed computing. This
             architecture abstracts out parallelism in a very general way, which
             enables IPython to support many different styles of parallelism
             including:
             	* Single program, multiple data (SPMD) parallelism.
             	* Multiple program, multiple data (MPMD) parallelism.
             	* Message passing using ``MPI``.
             	* Task farming.
             	* Data parallel.
             	* Combinations of these approaches.
             	* Custom user defined approaches.
             Most importantly, IPython enables all types of parallel applications to
             be developed, executed, debugged and monitored *interactively*. Hence,
             the ``I`` in IPython.  The following are some example usage cases for IPython:
             	* Quickly parallelize algorithms that are embarrassingly parallel
             	  using a number of simple approaches.  Many simple things can be
             	  parallelized interactively in one or two lines of code.
             	* Steer traditional MPI applications on a supercomputer from an
             	  IPython session on your laptop.
             	* Analyze and visualize large datasets (that could be remote and/or
             	  distributed) interactively using IPython and tools like
             	  matplotlib/TVTK.
             	* Develop, test and debug new parallel algorithms
             	  (that may use MPI) interactively.
             	* Tie together multiple MPI jobs running on different systems into
             	  one giant distributed and parallel system.
             	* Start a parallel job on your cluster and then have a remote
             	  collaborator connect to it and pull back data into their
             	  local IPython session for plotting and analysis.
             	* Run a set of tasks on a set of CPUs using dynamic load balancing.
             Architecture overview
             =====================
             The IPython architecture consists of three components:
             	* The IPython engine.
             	* The IPython controller.
             	* Various controller Clients.
             IPython engine
             ---------------
             The IPython engine is a Python instance that takes Python commands over a
             network connection. Eventually, the IPython engine will be a full IPython
             interpreter, but for now, it is a regular Python interpreter. The engine
             can also handle incoming and outgoing Python objects sent over a network
             connection.  When multiple engines are started, parallel and distributed
             computing becomes possible. An important feature of an IPython engine is
             that it blocks while user code is being executed. Read on for how the
             IPython controller solves this problem to expose a clean asynchronous API
             to the user.
             IPython controller
             ------------------
             The IPython controller provides an interface for working with a set of
             engines. At an general level, the controller is a process to which
             IPython engines can connect. For each connected engine, the controller
             manages a queue. All actions that can be performed on the engine go
             through this queue. While the engines themselves block when user code is
             run, the controller hides that from the user to provide a fully
             asynchronous interface to a set of engines. Because the controller
             listens on a network port for engines to connect to it, it must be
             started before any engines are started.
             The controller also provides a single point of contact for users who wish
             to utilize the engines connected to the controller. There are different
             ways of working with a controller. In IPython these ways correspond to different interfaces that the controller is adapted to.  Currently we have two default interfaces to the controller:
             	* The MultiEngine interface.
             	* The Task interface.
             Advanced users can easily add new custom interfaces to enable other
             styles of parallelism.
             .. note::
             	A single controller and set of engines can be accessed
             	through multiple interfaces simultaneously.  This opens the
             	door for lots of interesting things.
             Controller clients
             ------------------
             For each controller interface, there is a corresponding client. These
             clients allow users to interact with a set of engines through the
             interface.
             Security
             --------
             By default (as long as `pyOpenSSL` is installed) all network connections between the controller and engines and the controller and clients are secure.  What does this mean?  First of all, all of the connections will be encrypted using SSL.  Second, the connections are authenticated.  We handle authentication in a `capabilities`__ based security model.  In this model,  a "capability (known in some systems as a key) is a communicable, unforgeable token of authority".  Put simply, a capability is like a key to your house.  If you have the key to your house, you can get in, if not you can't.
             .. __: http://en.wikipedia.org/wiki/Capability-based_security
             In our architecture, the controller is the only process that listens on network ports, and is thus responsible to creating these keys.  In IPython, these keys are known as Foolscap URLs, or FURLs, because of the underlying network protocol we are using.  As a user, you don't need to know anything about the details of these FURLs, other than that when the controller starts, it saves a set of FURLs to files named something.furl.  The default location of these files is your ~./ipython directory.
             To connect and authenticate to the controller an engine or client simply needs to present an appropriate furl (that was originally created by the controller) to the controller.  Thus, the .furl files need to be copied to a location where the clients and engines can find them.  Typically, this is the ~./ipython directory on the host where the client/engine is running (which could be a different host than the controller).  Once the .furl files are copied over, everything should work fine.
             Getting Started
             ===============
             To use IPython for parallel computing, you need to start one instance of
             the controller and one or more instances of the engine. The controller
             and each engine can run on different machines or on the same machine.
             Because of this, there are many different possibilities for setting up
             the IP addresses and ports used by the various processes.
             Starting the controller and engine on your local machine
             --------------------------------------------------------
             This is the simplest configuration that can be used and is useful for
             testing the system and on machines that have multiple cores and/or
             multple CPUs. The easiest way of doing this is using the ``ipcluster``
             command::
             	$ ipcluster -n 4
             This will start an IPython controller and then 4 engines that connect to
             the controller. Lastly, the script will print out the Python commands
             that you can use to connect to the controller. It is that easy.
             Underneath the hood, the ``ipcluster`` script uses two other top-level
             scripts that you can also use yourself. These scripts are
             ``ipcontroller``, which starts the controller and ``ipengine`` which
             starts one engine. To use these scripts to start things on your local
             machine, do the following.
             First start the controller::
             	$ ipcontroller &
             Next, start however many instances of the engine you want using (repeatedly) the command::
             	$ ipengine &
             .. warning::
             	The order of the above operations is very important.  You *must*
              	start the controller before the engines, since the engines connect
             	to the controller as they get started.
             On some platforms you may need to give these commands in the form
             ``(ipcontroller &)`` and ``(ipengine &)`` for them to work properly. The
             engines should start and automatically connect to the controller on the
             default ports, which are chosen for this type of setup. You are now ready
             to use the controller and engines from IPython.
             Starting the controller and engines on different machines
             ---------------------------------------------------------
             This section needs to be updated to reflect the new Foolscap capabilities based
             model.
             Using ``ipcluster`` with ``ssh``
             --------------------------------
             The ``ipcluster`` command can also start a controller and engines using
             ``ssh``.  We need more documentation on this, but for now here is any
             example startup script::
             	controller = dict(host='myhost',
             	                  engine_port=None, # default is 10105
             	                  control_port=None,
             	                  )
             	# keys are hostnames, values are the number of engine on that host
             	engines = dict(node1=2,
             	               node2=2,
             	               node3=2,
             	               node3=2,
             	               )
             Starting engines using ``mpirun``
             ---------------------------------
             The IPython engines can be started using ``mpirun``/``mpiexec``, even if
             the engines don't call MPI_Init() or use the MPI API in any way. This is
             supported on modern MPI implementations like `Open MPI`_.. This provides
             an really nice way of starting a bunch of engine. On a system with MPI
             installed you can do::
             	mpirun -n 4 ipengine --controller-port=10000 --controller-ip=host0
             .. _Open MPI: http://www.open-mpi.org/
             More details on using MPI with IPython can be found :ref:`here <parallelmpi>`.
             Log files
             ---------
             All of the components of IPython have log files associated with them.
             These log files can be extremely useful in debugging problems with
             IPython and can be found in the directory ``~/.ipython/log``.  Sending
             the log files to us will often help us to debug any problems.
             Next Steps
             ==========
             Once you have started the IPython controller and one or more engines, you
             are ready to use the engines to do somnething useful. To make sure
             everything is working correctly, try the following commands::
-            	In [1]: from ipython1.kernel import client
+            	In [1]: from IPython.kernel import client
             	In [2]: mec = client.MultiEngineClient()    # This looks for .furl files in ~./ipython
             	In [4]: mec.get_ids()
             	Out[4]: [0, 1, 2, 3]
             	In [5]: mec.execute('print "Hello World"')
             	Out[5]:
             	<Results List>
             	[0] In [1]: print "Hello World"
             	[0] Out[1]: Hello World
             	[1] In [1]: print "Hello World"
             	[1] Out[1]: Hello World
             	[2] In [1]: print "Hello World"
             	[2] Out[1]: Hello World
             	[3] In [1]: print "Hello World"
             	[3] Out[1]: Hello World
             If this works, you are ready to learn more about the :ref:`MultiEngine <parallelmultiengine>` and :ref:`Task <paralleltask>` interfaces to the controller.

docs/source/parallel/parallel_multiengine.txt

0 +3 -3

             .. _parallelmultiengine:
             =================================
             IPython's MultiEngine interface
             =================================
             .. contents::
             The MultiEngine interface represents one possible way of working with a
             set of IPython engines. The basic idea behind the MultiEngine interface is
             that the capabilities of each engine are explicitly exposed to the user.
             Thus, in the MultiEngine interface, each engine is given an id that is
             used to identify the engine and give it work to do. This interface is very
             intuitive and is designed with interactive usage in mind, and is thus the
             best place for new users of IPython to begin.
             Starting the IPython controller and engines
             ===========================================
             To follow along with this tutorial, you will need to start the IPython
             controller and four IPython engines. The simplest way of doing this is to
             use the ``ipcluster`` command::
             	$ ipcluster -n 4
             For more detailed information about starting the controller and engines, see our :ref:`introduction <ip1par>` to using IPython for parallel computing.
             Creating a ``MultiEngineClient`` instance
             =========================================
             The first step is to import the IPython ``client`` module and then create a ``MultiEngineClient`` instance::
-            	In [1]: from ipython1.kernel import client
+            	In [1]: from IPython.kernel import client
             	In [2]: mec = client.MultiEngineClient()
             To make sure there are engines connected to the controller, use can get a list of engine ids::
             	In [3]: mec.get_ids()
             	Out[3]: [0, 1, 2, 3]
             Here we see that there are four engines ready to do work for us.
             Running Python commands
             =======================
             The most basic type of operation that can be performed on the engines is to execute Python code. Executing Python code can be done in blocking or non-blocking mode (blocking is default) using the ``execute`` method.
             Blocking execution
             ------------------
             In blocking mode, the ``MultiEngineClient`` object (called ``mec`` in
             these examples) submits the command to the controller, which places the
             command in the engines' queues for execution. The ``execute`` call then
             blocks until the engines are done executing the command::
             	# The default is to run on all engines
             	In [4]: mec.execute('a=5')
             	Out[4]:
             	<Results List>
             	[0] In [1]: a=5
             	[1] In [1]: a=5
             	[2] In [1]: a=5
             	[3] In [1]: a=5
             	In [5]: mec.execute('b=10')
             	Out[5]:
             	<Results List>
             	[0] In [2]: b=10
             	[1] In [2]: b=10
             	[2] In [2]: b=10
             	[3] In [2]: b=10
             Python commands can be executed on specific engines by calling execute using the ``targets`` keyword argument::
             	In [6]: mec.execute('c=a+b',targets=[0,2])
             	Out[6]:
             	<Results List>
             	[0] In [3]: c=a+b
             	[2] In [3]: c=a+b
             	In [7]: mec.execute('c=a-b',targets=[1,3])
             	Out[7]:
             	<Results List>
             	[1] In [3]: c=a-b
             	[3] In [3]: c=a-b
             	In [8]: mec.execute('print c')
             	Out[8]:
             	<Results List>
             	[0] In [4]: print c
             	[0] Out[4]: 15
             	[1] In [4]: print c
             	[1] Out[4]: -5
             	[2] In [4]: print c
             	[2] Out[4]: 15
             	[3] In [4]: print c
             	[3] Out[4]: -5
             This example also shows one of the most important things about the IPython engines: they have a persistent user namespaces.  The ``execute`` method returns a Python ``dict`` that contains useful information::
             	In [9]: result_dict = mec.execute('d=10; print d')
             	In [10]: for r in result_dict:
             	   ....:     print r
             	   ....:
             	   ....:
             	{'input': {'translated': 'd=10; print d', 'raw': 'd=10; print d'}, 'number': 5, 'id': 0, 'stdout': '10\n'}
             	{'input': {'translated': 'd=10; print d', 'raw': 'd=10; print d'}, 'number': 5, 'id': 1, 'stdout': '10\n'}
             	{'input': {'translated': 'd=10; print d', 'raw': 'd=10; print d'}, 'number': 5, 'id': 2, 'stdout': '10\n'}
             	{'input': {'translated': 'd=10; print d', 'raw': 'd=10; print d'}, 'number': 5, 'id': 3, 'stdout': '10\n'}
             Non-blocking execution
             ----------------------
             In non-blocking mode, ``execute`` submits the command to be executed and then returns a
             ``PendingResult`` object immediately. The ``PendingResult`` object gives you a way of getting a
             result at a later time through its ``get_result`` method or ``r`` attribute. This allows you to
             quickly submit long running commands without blocking your local Python/IPython session::
             	# In blocking mode
             	In [6]: mec.execute('import time')
             	Out[6]:
             	<Results List>
             	[0] In [1]: import time
             	[1] In [1]: import time
             	[2] In [1]: import time
             	[3] In [1]: import time
             	# In non-blocking mode
             	In [7]: pr = mec.execute('time.sleep(10)',block=False)
             	# Now block for the result
             	In [8]: pr.get_result()
             	Out[8]:
             	<Results List>
             	[0] In [2]: time.sleep(10)
             	[1] In [2]: time.sleep(10)
             	[2] In [2]: time.sleep(10)
             	[3] In [2]: time.sleep(10)
             	# Again in non-blocking mode
             	In [9]: pr = mec.execute('time.sleep(10)',block=False)
             	# Poll to see if the result is ready
             	In [10]: pr.get_result(block=False)
             	# A shorthand for get_result(block=True)
             	In [11]: pr.r
             	Out[11]:
             	<Results List>
             	[0] In [3]: time.sleep(10)
             	[1] In [3]: time.sleep(10)
             	[2] In [3]: time.sleep(10)
             	[3] In [3]: time.sleep(10)
             Often, it is desirable to wait until a set of ``PendingResult`` objects are done.  For this, there is a the method ``barrier``.  This method takes a tuple of ``PendingResult`` objects and blocks until all of the associated results are ready::
             	In [72]: mec.block=False
             	# A trivial list of PendingResults objects
             	In [73]: pr_list = [mec.execute('time.sleep(3)') for i in range(10)]
             	# Wait until all of them are done
             	In [74]: mec.barrier(pr_list)
             	# Then, their results are ready using get_result or the r attribute
             	In [75]: pr_list[0].r
             	Out[75]:
             	<Results List>
             	[0] In [20]: time.sleep(3)
             	[1] In [19]: time.sleep(3)
             	[2] In [20]: time.sleep(3)
             	[3] In [19]: time.sleep(3)
             The ``block`` and ``targets`` keyword arguments and attributes
             --------------------------------------------------------------
             Most commands in the multiengine interface (like ``execute``) accept ``block`` and ``targets``
             as keyword arguments. As we have seen above, these keyword arguments control the blocking mode
             and which engines the command is applied to. The ``MultiEngineClient`` class also has ``block``
             and ``targets`` attributes that control the default behavior when the keyword arguments are not
             provided. Thus the following logic is used for ``block`` and ``targets``:
             	* If no keyword argument is provided, the instance attributes are used.
             	* Keyword argument, if provided override the instance attributes.
             The following examples demonstrate how to use the instance attributes::
             	In [16]: mec.targets = [0,2]
             	In [17]: mec.block = False
             	In [18]: pr = mec.execute('a=5')
             	In [19]: pr.r
             	Out[19]:
             	<Results List>
             	[0] In [6]: a=5
             	[2] In [6]: a=5
             	# Note targets='all' means all engines
             	In [20]: mec.targets = 'all'
             	In [21]: mec.block = True
             	In [22]: mec.execute('b=10; print b')
             	Out[22]:
             	<Results List>
             	[0] In [7]: b=10; print b
             	[0] Out[7]: 10
             	[1] In [6]: b=10; print b
             	[1] Out[6]: 10
             	[2] In [7]: b=10; print b
             	[2] Out[7]: 10
             	[3] In [6]: b=10; print b
             	[3] Out[6]: 10
             The ``block`` and ``targets`` instance attributes also determine the behavior of the parallel
             magic commands...
             Parallel magic commands
             -----------------------
             We provide a few IPython magic commands (``%px``, ``%autopx`` and ``%result``) that make it more pleasant to execute Python commands on the engines interactively. These are simply shortcuts to ``execute`` and ``get_result``. The ``%px`` magic executes a single Python command on the engines specified by the `magicTargets``targets` attribute of the ``MultiEngineClient`` instance (by default this is 'all')::
             	# Make this MultiEngineClient active for parallel magic commands
             	In [23]: mec.activate()
             	In [24]: mec.block=True
             	In [25]: import numpy
             	In [26]: %px import numpy
             	Executing command on Controller
             	Out[26]:
             	<Results List>
             	[0] In [8]: import numpy
             	[1] In [7]: import numpy
             	[2] In [8]: import numpy
             	[3] In [7]: import numpy
             	In [27]: %px a = numpy.random.rand(2,2)
             	Executing command on Controller
             	Out[27]:
             	<Results List>
             	[0] In [9]: a = numpy.random.rand(2,2)
             	[1] In [8]: a = numpy.random.rand(2,2)
             	[2] In [9]: a = numpy.random.rand(2,2)
             	[3] In [8]: a = numpy.random.rand(2,2)
             	In [28]: %px print numpy.linalg.eigvals(a)
             	Executing command on Controller
             	Out[28]:
             	<Results List>
             	[0] In [10]: print numpy.linalg.eigvals(a)
             	[0] Out[10]: [ 1.28167017  0.14197338]
             	[1] In [9]: print numpy.linalg.eigvals(a)
             	[1] Out[9]: [-0.14093616  1.27877273]
             	[2] In [10]: print numpy.linalg.eigvals(a)
             	[2] Out[10]: [-0.37023573  1.06779409]
             	[3] In [9]: print numpy.linalg.eigvals(a)
             	[3] Out[9]: [ 0.83664764 -0.25602658]
             The ``%result`` magic gets and prints the stdin/stdout/stderr of the last command executed on each engine.  It is simply a shortcut to the ``get_result`` method::
             	In [29]: %result
             	Out[29]:
             	<Results List>
             	[0] In [10]: print numpy.linalg.eigvals(a)
             	[0] Out[10]: [ 1.28167017  0.14197338]
             	[1] In [9]: print numpy.linalg.eigvals(a)
             	[1] Out[9]: [-0.14093616  1.27877273]
             	[2] In [10]: print numpy.linalg.eigvals(a)
             	[2] Out[10]: [-0.37023573  1.06779409]
             	[3] In [9]: print numpy.linalg.eigvals(a)
             	[3] Out[9]: [ 0.83664764 -0.25602658]
             The ``%autopx`` magic switches to a mode where everything you type is executed on the engines given by the ``targets`` attribute::
             	In [30]: mec.block=False
             	In [31]: %autopx
             	Auto Parallel Enabled
             	Type %autopx to disable
             	In [32]: max_evals = []
-            	<ipython1.kernel.multiengineclient.PendingResult object at 0x17b8a70>
+            	<IPython.kernel.multiengineclient.PendingResult object at 0x17b8a70>
             	In [33]: for i in range(100):
             	   ....:     a = numpy.random.rand(10,10)
             	   ....:     a = a+a.transpose()
             	   ....:     evals = numpy.linalg.eigvals(a)
             	   ....:     max_evals.append(evals[0].real)
             	   ....:
             	   ....:
-            	<ipython1.kernel.multiengineclient.PendingResult object at 0x17af8f0>
+            	<IPython.kernel.multiengineclient.PendingResult object at 0x17af8f0>
             	In [34]: %autopx
             	Auto Parallel Disabled
             	In [35]: mec.block=True
             	In [36]: px print "Average max eigenvalue is: ", sum(max_evals)/len(max_evals)
             	Executing command on Controller
             	Out[36]:
             	<Results List>
             	[0] In [13]: print "Average max eigenvalue is: ", sum(max_evals)/len(max_evals)
             	[0] Out[13]: Average max eigenvalue is:  10.1387247332
             	[1] In [12]: print "Average max eigenvalue is: ", sum(max_evals)/len(max_evals)
             	[1] Out[12]: Average max eigenvalue is:  10.2076902286
             	[2] In [13]: print "Average max eigenvalue is: ", sum(max_evals)/len(max_evals)
             	[2] Out[13]: Average max eigenvalue is:  10.1891484655
             	[3] In [12]: print "Average max eigenvalue is: ", sum(max_evals)/len(max_evals)
             	[3] Out[12]: Average max eigenvalue is:  10.1158837784
             Using the ``with`` statement of Python 2.5
             ------------------------------------------
             Python 2.5 introduced the ``with`` statement.  The ``MultiEngineClient`` can be used with the ``with`` statement to execute a block of code on the engines indicated by the ``targets`` attribute::
             	In [3]: with mec:
             	   ...:     client.remote()    # Required so the following code is not run locally
             	   ...:     a = 10
             	   ...:     b = 30
             	   ...:     c = a+b
             	   ...:
             	   ...:
             	In [4]: mec.get_result()
             	Out[4]:
             	<Results List>
             	[0] In [1]: a = 10
             	b = 30
             	c = a+b
             	[1] In [1]: a = 10
             	b = 30
             	c = a+b
             	[2] In [1]: a = 10
             	b = 30
             	c = a+b
             	[3] In [1]: a = 10
             	b = 30
             	c = a+b
             This is basically another way of calling execute, but one with allows you to avoid writing code in strings.  When used in this way, the attributes ``targets`` and ``block`` are used to control how the code is executed.  For now, if you run code in non-blocking mode you won't have access to the ``PendingResult``.
             Moving Python object around
             ===========================
             In addition to executing code on engines, you can transfer Python objects to and from your
             IPython session and the engines. In IPython, these operations are called ``push`` (sending an
             object to the engines) and ``pull`` (getting an object from the engines).
             Basic push and pull
             -------------------
             Here are some examples of how you use ``push`` and ``pull``::
             	In [38]: mec.push(dict(a=1.03234,b=3453))
             	Out[38]: [None, None, None, None]
             	In [39]: mec.pull('a')
             	Out[39]: [1.03234, 1.03234, 1.03234, 1.03234]
             	In [40]: mec.pull('b',targets=0)
             	Out[40]: [3453]
             	In [41]: mec.pull(('a','b'))
             	Out[41]: [[1.03234, 3453], [1.03234, 3453], [1.03234, 3453], [1.03234, 3453]]
             	In [42]: mec.zip_pull(('a','b'))
             	Out[42]: [(1.03234, 1.03234, 1.03234, 1.03234), (3453, 3453, 3453, 3453)]
             	In [43]: mec.push(dict(c='speed'))
             	Out[43]: [None, None, None, None]
             	In [44]: %px print c
             	Executing command on Controller
             	Out[44]:
             	<Results List>
             	[0] In [14]: print c
             	[0] Out[14]: speed
             	[1] In [13]: print c
             	[1] Out[13]: speed
             	[2] In [14]: print c
             	[2] Out[14]: speed
             	[3] In [13]: print c
             	[3] Out[13]: speed
             In non-blocking mode ``push`` and ``pull`` also return ``PendingResult`` objects::
             	In [47]: mec.block=False
             	In [48]: pr = mec.pull('a')
             	In [49]: pr.r
             	Out[49]: [1.03234, 1.03234, 1.03234, 1.03234]
             Push and pull for functions
             ---------------------------
             Functions can also be pushed and pulled using ``push_function`` and ``pull_function``::
             	In [53]: def f(x):
             	   ....:     return 2.0*x**4
             	   ....:
             	In [54]: mec.push_function(dict(f=f))
             	Out[54]: [None, None, None, None]
             	In [55]: mec.execute('y = f(4.0)')
             	Out[55]:
             	<Results List>
             	[0] In [15]: y = f(4.0)
             	[1] In [14]: y = f(4.0)
             	[2] In [15]: y = f(4.0)
             	[3] In [14]: y = f(4.0)
             	In [56]: px print y
             	Executing command on Controller
             	Out[56]:
             	<Results List>
             	[0] In [16]: print y
             	[0] Out[16]: 512.0
             	[1] In [15]: print y
             	[1] Out[15]: 512.0
             	[2] In [16]: print y
             	[2] Out[16]: 512.0
             	[3] In [15]: print y
             	[3] Out[15]: 512.0
             Dictionary interface
             --------------------
             As a shorthand to ``push`` and ``pull``, the ``MultiEngineClient`` class implements some of the Python dictionary interface. This make the remote namespaces of the engines appear as a local dictionary. Underneath, this uses ``push`` and ``pull``::
             	In [50]: mec.block=True
             	In [51]: mec['a']=['foo','bar']
             	In [52]: mec['a']
             	Out[52]: [['foo', 'bar'], ['foo', 'bar'], ['foo', 'bar'], ['foo', 'bar']]
             Scatter and gather
             ------------------
             Sometimes it is useful to partition a sequence and push the partitions to different engines. In
             MPI language, this is know as scatter/gather and we follow that terminology. However, it is
             important to remember that in IPython ``scatter`` is from the interactive IPython session to
             the engines and ``gather`` is from the engines back to the interactive IPython session. For
             scatter/gather operations between engines, MPI should be used::
             	In [58]: mec.scatter('a',range(16))
             	Out[58]: [None, None, None, None]
             	In [59]: px print a
             	Executing command on Controller
             	Out[59]:
             	<Results List>
             	[0] In [17]: print a
             	[0] Out[17]: [0, 1, 2, 3]
             	[1] In [16]: print a
             	[1] Out[16]: [4, 5, 6, 7]
             	[2] In [17]: print a
             	[2] Out[17]: [8, 9, 10, 11]
             	[3] In [16]: print a
             	[3] Out[16]: [12, 13, 14, 15]
             	In [60]: mec.gather('a')
             	Out[60]: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
             Other things to look at
             =======================
             Parallel map
             ------------
             Python's builtin ``map`` functions allows a function to be applied to a sequence element-by-element.  This type of code is typically trivial to parallelize.  In fact, the MultiEngine interface in IPython already has a parallel version of ``map`` that works just like its serial counterpart::
             	In [63]: serial_result = map(lambda x:x**10, range(32))
             	In [64]: parallel_result = mec.map(lambda x:x**10, range(32))
             	In [65]: serial_result==parallel_result
             	Out[65]: True
             As you would expect, the parallel version of ``map`` is also influenced by the ``block`` and ``targets`` keyword arguments and attributes.
             How to do parallel list comprehensions
             --------------------------------------
             In many cases list comprehensions are nicer than using the map function.  While we don't have fully parallel list comprehensions, it is simple to get the basic effect using ``scatter`` and ``gather``::
             	In [66]: mec.scatter('x',range(64))
             	Out[66]: [None, None, None, None]
             	In [67]: px y = [i**10 for i in x]
             	Executing command on Controller
             	Out[67]:
             	<Results List>
             	[0] In [19]: y = [i**10 for i in x]
             	[1] In [18]: y = [i**10 for i in x]
             	[2] In [19]: y = [i**10 for i in x]
             	[3] In [18]: y = [i**10 for i in x]
             	In [68]: y = mec.gather('y')
             	In [69]: print y
             	[0, 1, 1024, 59049, 1048576, 9765625, 60466176, 282475249, 1073741824,...]
             Parallel Exceptions
             -------------------
             In the MultiEngine interface, parallel commands can raise Python exceptions, just like serial commands.  But, it is a little subtle, because a single parallel command can actually raise multiple exceptions (one for each engine the command was run on).  To express this idea, the MultiEngine interface has a ``CompositeError`` exception class that will be raised in most cases.  The ``CompositeError`` class is a special type of exception that wraps one or more other types of exceptions.  Here is how it works::
             	In [76]: mec.block=True
             	In [77]: mec.execute('1/0')
             	---------------------------------------------------------------------------
             	CompositeError                            Traceback (most recent call last)
             	/ipython1-client-r3021/docs/examples/<ipython console> in <module>()
             	/ipython1-client-r3021/ipython1/kernel/multiengineclient.pyc in execute(self, lines, targets, block)
 targets, block = self._findTargetsAndBlock(targets, block)
 result = blockingCallFromThread(self.smultiengine.execute, lines,
             	--> 434             targets=targets, block=block)
 if block:
 result = ResultList(result)
             	/ipython1-client-r3021/ipython1/kernel/twistedutil.pyc in blockingCallFromThread(f, *a, **kw)
 result.raiseException()
 except Exception, e:
             	---> 74             raise e
 return result
             	CompositeError: one or more exceptions from call to method: execute
             	[0:execute]: ZeroDivisionError: integer division or modulo by zero
             	[1:execute]: ZeroDivisionError: integer division or modulo by zero
             	[2:execute]: ZeroDivisionError: integer division or modulo by zero
             	[3:execute]: ZeroDivisionError: integer division or modulo by zero
             Notice how the error message printed when ``CompositeError`` is raised has information about the individual exceptions that were raised on each engine.  If you want, you can even raise one of these original exceptions::
             	In [80]: try:
             	   ....:     mec.execute('1/0')
             	   ....: except client.CompositeError, e:
             	   ....:     e.raise_exception()
             	   ....:
             	   ....:
             	---------------------------------------------------------------------------
             	ZeroDivisionError                         Traceback (most recent call last)
             	/ipython1-client-r3021/docs/examples/<ipython console> in <module>()
             	/ipython1-client-r3021/ipython1/kernel/error.pyc in raise_exception(self, excid)
 raise IndexError("an exception with index %i does not exist"%excid)
 else:
             	--> 158             raise et, ev, etb
 def collect_exceptions(rlist, method):
             	ZeroDivisionError: integer division or modulo by zero
             If you are working in IPython, you can simple type ``%debug`` after one of these ``CompositeError`` is raised, and inspect the exception instance::
             	In [81]: mec.execute('1/0')
             	---------------------------------------------------------------------------
             	CompositeError                            Traceback (most recent call last)
             	/ipython1-client-r3021/docs/examples/<ipython console> in <module>()
             	/ipython1-client-r3021/ipython1/kernel/multiengineclient.pyc in execute(self, lines, targets, block)
 targets, block = self._findTargetsAndBlock(targets, block)
 result = blockingCallFromThread(self.smultiengine.execute, lines,
             	--> 434             targets=targets, block=block)
 if block:
 result = ResultList(result)
             	/ipython1-client-r3021/ipython1/kernel/twistedutil.pyc in blockingCallFromThread(f, *a, **kw)
 result.raiseException()
 except Exception, e:
             	---> 74             raise e
 return result
             	CompositeError: one or more exceptions from call to method: execute
             	[0:execute]: ZeroDivisionError: integer division or modulo by zero
             	[1:execute]: ZeroDivisionError: integer division or modulo by zero
             	[2:execute]: ZeroDivisionError: integer division or modulo by zero
             	[3:execute]: ZeroDivisionError: integer division or modulo by zero
             	In [82]: %debug
             	>
             	/ipython1-client-r3021/ipython1/kernel/twistedutil.py(74)blockingCallFromThread()
 except Exception, e:
             	---> 74             raise e
 return result
             	# With the debugger running, e is the exceptions instance.  We can tab complete
             	# on it and see the extra methods that are available.
             	ipdb> e.
             	e.__class__         e.__getitem__       e.__new__           e.__setstate__      e.args
             	e.__delattr__       e.__getslice__      e.__reduce__        e.__str__           e.elist
             	e.__dict__          e.__hash__          e.__reduce_ex__     e.__weakref__       e.message
             	e.__doc__           e.__init__          e.__repr__          e._get_engine_str   e.print_tracebacks
             	e.__getattribute__  e.__module__        e.__setattr__       e._get_traceback    e.raise_exception
             	ipdb> e.print_tracebacks()
             	[0:execute]:
             	---------------------------------------------------------------------------
             	ZeroDivisionError                         Traceback (most recent call last)
             	/ipython1-client-r3021/docs/examples/<string> in <module>()
             	ZeroDivisionError: integer division or modulo by zero
             	[1:execute]:
             	---------------------------------------------------------------------------
             	ZeroDivisionError                         Traceback (most recent call last)
             	/ipython1-client-r3021/docs/examples/<string> in <module>()
             	ZeroDivisionError: integer division or modulo by zero
             	[2:execute]:
             	---------------------------------------------------------------------------
             	ZeroDivisionError                         Traceback (most recent call last)
             	/ipython1-client-r3021/docs/examples/<string> in <module>()
             	ZeroDivisionError: integer division or modulo by zero
             	[3:execute]:
             	---------------------------------------------------------------------------
             	ZeroDivisionError                         Traceback (most recent call last)
             	/ipython1-client-r3021/docs/examples/<string> in <module>()
             	ZeroDivisionError: integer division or modulo by zero
             All of this same error handling magic even works in non-blocking mode::
             	In [83]: mec.block=False
             	In [84]: pr = mec.execute('1/0')
             	In [85]: pr.r
             	---------------------------------------------------------------------------
             	CompositeError                            Traceback (most recent call last)
             	/ipython1-client-r3021/docs/examples/<ipython console> in <module>()
             	/ipython1-client-r3021/ipython1/kernel/multiengineclient.pyc in _get_r(self)
 def _get_r(self):
             	--> 172         return self.get_result(block=True)
 r = property(_get_r)
             	/ipython1-client-r3021/ipython1/kernel/multiengineclient.pyc in get_result(self, default, block)
 return self.result
 try:
             	--> 133             result = self.client.get_pending_deferred(self.result_id, block)
 except error.ResultNotCompleted:
 return default
             	/ipython1-client-r3021/ipython1/kernel/multiengineclient.pyc in get_pending_deferred(self, deferredID, block)
 def get_pending_deferred(self, deferredID, block):
             	--> 387         return blockingCallFromThread(self.smultiengine.get_pending_deferred, deferredID, block)
 def barrier(self, pendingResults):
             	/ipython1-client-r3021/ipython1/kernel/twistedutil.pyc in blockingCallFromThread(f, *a, **kw)
 result.raiseException()
 except Exception, e:
             	---> 74             raise e
 return result
             	CompositeError: one or more exceptions from call to method: execute
             	[0:execute]: ZeroDivisionError: integer division or modulo by zero
             	[1:execute]: ZeroDivisionError: integer division or modulo by zero
             	[2:execute]: ZeroDivisionError: integer division or modulo by zero
             	[3:execute]: ZeroDivisionError: integer division or modulo by zero

docs/source/parallel/parallel_task.txt

0 +2 -2

             .. _paralleltask:
             =================================
             The IPython Task interface
             =================================
             .. contents::
             The ``Task`` interface to the controller presents the engines as a fault tolerant, dynamic load-balanced system or workers. Unlike the ``MultiEngine`` interface, in the ``Task`` interface, the user have no direct access to individual engines. In some ways, this interface is simpler, but in other ways it is more powerful. Best of all the user can use both of these interfaces at the same time to take advantage or both of their strengths.  When the user can break up the user's work into segments that do not depend on previous execution, the ``Task`` interface is ideal.  But it also has more power and flexibility, allowing the user to guide the distribution of jobs, without having to assign Tasks to engines explicitly.
             Starting the IPython controller and engines
             ===========================================
             To follow along with this tutorial, the user will need to start the IPython
             controller and four IPython engines. The simplest way of doing this is to
             use the ``ipcluster`` command::
             	$ ipcluster -n 4
             For more detailed information about starting the controller and engines, see our :ref:`introduction <ip1par>` to using IPython for parallel computing.
             The magic here is that this single controller and set of engines is running both the MultiEngine and ``Task`` interfaces simultaneously.
             QuickStart Task Farming
             =======================
             First, a quick example of how to start running the most basic Tasks.
             The first step is to import the IPython ``client`` module and then create a ``TaskClient`` instance::
-                In [1]: from ipython1.kernel import client
+                In [1]: from IPython.kernel import client
                 In [2]: tc = client.TaskClient()
             Then the user wrap the commands the user want to run in Tasks::
                 In [3]: tasklist = []
                 In [4]: for n in range(1000):
                     ...     tasklist.append(client.Task("a = %i"%n, pull="a"))
             The first argument of the ``Task`` constructor is a string, the command to be executed.  The most important optional keyword argument is ``pull``, which can be a string or list of strings, and it specifies the variable names to be saved as results of the ``Task``.
             Next, the user need to submit the Tasks to the ``TaskController`` with the ``TaskClient``::
                 In [5]: taskids = [ tc.run(t) for t in tasklist ]
             This will give the user a list of the TaskIDs used by the controller to keep track of the Tasks and their results.  Now at some point the user are going to want to get those results back.  The ``barrier`` method allows the user to wait for the Tasks to finish running::
                 In [6]: tc.barrier(taskids)
             This command will block until all the Tasks in ``taskids`` have finished.  Now, the user probably want to look at the user's results::
                 In [7]: task_results = [ tc.get_task_result(taskid) for taskid in taskids ]
             Now the user have a list of ``TaskResult`` objects, which have the actual result as a dictionary, but also keep track of some useful metadata about the ``Task``::
                 In [8]: tr = ``Task``_results[73]
                 In [9]: tr
                 Out[9]: ``TaskResult``[ID:73]:{'a':73}
                 In [10]: tr.engineid
                 Out[10]: 1
                 In [11]: tr.submitted, tr.completed, tr.duration
                 Out[11]: ("2008/03/08 03:41:42", "2008/03/08 03:41:44", 2.12345)
             The actual results are stored in a dictionary, ``tr.results``, and a namespace object ``tr.ns`` which accesses the result keys by attribute::
                 In [12]: tr.results['a']
                 Out[12]: 73
                 In [13]: tr.ns.a
                 Out[13]: 73
             That should cover the basics of running simple Tasks.  There are several more powerful things the user can do with Tasks covered later.  The most useful probably being using a ``MutiEngineClient`` interface to initialize all the engines with the import dependencies necessary to run the user's Tasks.
             There are many options for running and managing Tasks. The best way to learn further about the ``Task`` interface is to study the examples in ``docs/examples``. If the user do so and learn a lots about this interface, we encourage the user to expand this documentation about the ``Task`` system.
             Overview of the Task System
             ===========================
             The user's view of the ``Task`` system has three basic objects:  The ``TaskClient``, the ``Task``, and the ``TaskResult``.  The names of these three objects well indicate their role.
             The ``TaskClient`` is the user's ``Task`` farming connection to the IPython cluster.  Unlike the ``MultiEngineClient``, the ``TaskControler`` handles all the scheduling and distribution of work, so the ``TaskClient`` has no notion of engines, it just submits Tasks and requests their results.  The Tasks are described as ``Task`` objects, and their results are wrapped in ``TaskResult`` objects.  Thus, there are very few necessary methods for the user to manage.
             Inside the task system is a Scheduler object, which assigns tasks to workers.  The default scheduler is a simple FIFO queue.  Subclassing the Scheduler should be easy, just implementing your own priority system.
             The TaskClient
             ==============
             The ``TaskClient`` is the object the user use to connect to the ``Controller`` that is managing the user's Tasks.  It is the analog of the ``MultiEngineClient`` for the standard IPython multiplexing interface.  As with all client interfaces, the first step is to import the IPython Client Module::
-                In [1]: from ipython1.kernel import client
+                In [1]: from IPython.kernel import client
             Just as with the ``MultiEngineClient``, the user create the ``TaskClient`` with a tuple, containing the ip-address and port of the ``Controller``.  the ``client`` module conveniently has the default address of the ``Task`` interface of the controller.  Creating a default ``TaskClient`` object would be done with this::
                 In [2]: tc = client.TaskClient(client.default_task_address)
             or, if the user want to specify a non default location of the ``Controller``, the user can specify explicitly::
                 In [3]: tc = client.TaskClient(("192.168.1.1", 10113))
             As discussed earlier, the ``TaskClient`` only has a few basic methods.
              * ``tc.run(task)``
                 ``run`` is the method by which the user submits Tasks.  It takes exactly one argument, a ``Task`` object.  All the advanced control of ``Task`` behavior is handled by properties of the ``Task`` object, rather than the submission command, so they will be discussed later in the `Task`_ section.  ``run`` returns an integer, the ``Task``ID by which the ``Task`` and its results can be tracked and retrieved::
                     In [4]: ``Task``ID = tc.run(``Task``)
              * ``tc.get_task_result(taskid, block=``False``)``
                 ``get_task_result`` is the method by which results are retrieved.  It takes a single integer argument, the ``Task``ID`` of the result the user wish to retrieve.  ``get_task_result`` also takes a keyword argument ``block``.  ``block`` specifies whether the user actually want to wait for the result.  If ``block`` is false, as it is by default, ``get_task_result`` will return immediately.  If the ``Task`` has completed, it will return the ``TaskResult`` object for that ``Task``.  But if the ``Task`` has not completed, it will return ``None``.  If the user specify ``block=``True``, then ``get_task_result`` will wait for the ``Task`` to complete, and always return the ``TaskResult`` for the requested ``Task``.
              * ``tc.barrier(taskid(s))``
                 ``barrier`` is a synchronization method.  It takes exactly one argument, a ``Task``ID or list of taskIDs.  ``barrier`` will block until all the specified Tasks have completed.  In practice, a barrier is often called between the ``Task`` submission section of the code and the result gathering section::
                     In [5]: taskIDs = [ tc.run(``Task``) for ``Task`` in myTasks ]
                     In [6]: tc.get_task_result(taskIDs[-1]) is None
                     Out[6]: ``True``
                     In [7]: tc.barrier(``Task``ID)
                     In [8]: results = [ tc.get_task_result(tid) for tid in taskIDs ]
              * ``tc.queue_status(verbose=``False``)``
                 ``queue_status`` is a method for querying the state of the ``TaskControler``.  ``queue_status`` returns a dict of the form::
                     {'scheduled': Tasks that have been submitted but yet run
                      'pending'  : Tasks that are currently running
                      'succeeded': Tasks that have completed successfully
                      'failed'   : Tasks that have finished with a failure
                     }
                 if @verbose is not specified (or is ``False``), then the values of the dict are integers - the number of Tasks in each state.  if @verbose is ``True``, then each element in the dict is a list of the taskIDs in that state::
                     In [8]: tc.queue_status()
                     Out[8]: {'scheduled': 4,
                             'pending'  : 2,
                             'succeeded': 5,
                             'failed'   : 1
                             }
                     In [9]: tc.queue_status(verbose=True)
                     Out[9]: {'scheduled': [8,9,10,11],
                             'pending'  : [6,7],
                             'succeeded': [0,1,2,4,5],
                             'failed'   : [3]
                             }
              * ``tc.abort(taskid)``
                 ``abort`` allows the user to abort Tasks that have already been submitted.  ``abort`` will always return immediately.  If the ``Task`` has completed, ``abort`` will raise an ``IndexError ``Task`` Already Completed``.  An obvious case for ``abort`` would be where the user submits a long-running ``Task`` with a number of retries (see ``Task``_ section for how to specify retries) in an interactive session, but realizes there has been a typo.  The user can then abort the ``Task``, preventing certain failures from cluttering up the queue.  It can also be used for parallel search-type problems, where only one ``Task`` will give the solution, so once the user find the solution, the user would want to abort all remaining Tasks to prevent wasted work.
              * ``tc.spin()``
                 ``spin`` simply triggers the scheduler in the ``TaskControler``.  Under most normal circumstances, this will do nothing.  The primary known usage case involves the ``Task`` dependency (see `Dependencies`_).  The dependency is a function of an Engine's ``properties``, but changing the ``properties`` via the ``MutliEngineClient`` does not trigger a reschedule event.  The main example case for this requires the following event sequence:
                  * ``engine`` is available, ``Task`` is submitted, but ``engine`` does not have ``Task``'s dependencies.
                  * ``engine`` gets necessary dependencies while no new Tasks are submitted or completed.
                  * now ``engine`` can run ``Task``, but a ``Task`` event is required for the ``TaskControler`` to try scheduling ``Task`` again.
                 ``spin`` is just an empty ping method to ensure that the Controller has scheduled all available Tasks, and should not be needed under most normal circumstances.
             That covers the ``TaskClient``, a simple interface to the cluster.  With this, the user can submit jobs (and abort if necessary), request their results, synchronize on arbitrary subsets of jobs.
             .. _task: The Task Object
             The Task Object
             ===============
             The ``Task`` is the basic object for describing a job.  It can be used in a very simple manner, where the user just specifies a command string to be executed as the ``Task``.  The usage of this first argument is exactly the same as the ``execute`` method of the ``MultiEngine`` (in fact, ``execute`` is called to run the code)::
                 In [1]: t = client.Task("a = str(id)")
             This ``Task`` would run, and store the string representation of the ``id`` element in ``a`` in each worker's namespace, but it is fairly useless because the user does not know anything about the state of the ``worker`` on which it ran at the time of retrieving results.  It is important that each ``Task`` not expect the state of the ``worker`` to persist after the ``Task`` is completed.
             There are many different situations for using ``Task`` Farming, and the ``Task`` object has many attributes for use in customizing the ``Task`` behavior.  All of a ``Task``'s attributes may be specified in the constructor, through keyword arguments, or after ``Task`` construction through attribute assignment.
             Data Attributes
             ***************
             It is likely that the user may want to move data around before or after executing the ``Task``.  We provide methods of sending data to initialize the worker's namespace, and specifying what data to bring back as the ``Task``'s results.
              * pull = []
                 The obvious case is as above, where ``t`` would execute and store the result of ``myfunc`` in ``a``, it is likely that the user would want to bring ``a`` back to their namespace.  This is done through the ``pull`` attribute.  ``pull`` can be a string or list of strings, and it specifies the names of variables to be retrieved.  The ``TaskResult`` object retrieved by ``get_task_result`` will have a dictionary of keys and values, and the ``Task``'s ``pull`` attribute determines what goes into it::
                     In [2]: t = client.Task("a = str(id)", pull = "a")
                     In [3]: t = client.Task("a = str(id)", pull = ["a", "id"])
              * push = {}
                 A user might also want to initialize some data into the namespace before the code part of the ``Task`` is run.  Enter ``push``.  ``push`` is a dictionary of key/value pairs to be loaded from the user's namespace into the worker's immediately before execution::
                     In [4]: t = client.Task("a = f(submitted)", push=dict(submitted=time.time()), pull="a")
             push and pull result directly in calling an ``engine``'s ``push`` and ``pull`` methods before and after ``Task`` execution respectively, and thus their api is the same.
             Namespace Cleaning
             ******************
             When a user is running a large number of Tasks, it is likely that the namespace of the worker's could become cluttered.  Some Tasks might be sensitive to clutter, while others might be known to cause namespace pollution.  For these reasons, Tasks have two boolean attributes for cleaning up the namespace.
              * ``clear_after``
                 if clear_after is specified ``True``, the worker on which the ``Task`` was run will be reset (via ``engine.reset``) upon completion of the ``Task``.  This can be useful for both Tasks that produce clutter or Tasks whose intermediate data one might wish to be kept private::
                     In [5]: t = client.Task("a = range(1e10)", pull = "a",clear_after=True)
              * ``clear_before``
                 as one might guess, clear_before is identical to ``clear_after``, but it takes place before the ``Task`` is run.  This ensures that the ``Task`` runs on a fresh worker::
                     In [6]: t = client.Task("a = globals()", pull = "a",clear_before=True)
             Of course, a user can both at the same time, ensuring that all workers are clear except when they are currently running a job.  Both of these default to ``False``.
             Fault Tolerance
             ***************
             It is possible that Tasks might fail, and there are a variety of reasons this could happen.  One might be that the worker it was running on disconnected, and there was nothing wrong with the ``Task`` itself.  With the fault tolerance attributes of the ``Task``, the user can specify how many times to resubmit the ``Task``, and what to do if it never succeeds.
              * ``retries``
                 ``retries`` is an integer, specifying the number of times a ``Task`` is to be retried.  It defaults to zero.  It is often a good idea for this number to be 1 or 2, to protect the ``Task`` from disconnecting engines, but not a large number.  If a ``Task`` is failing 100 times, there is probably something wrong with the ``Task``.  The canonical bad example:
                     In [7]: t = client.Task("os.kill(os.getpid(), 9)", retries=99)
                 This would actually take down 100 workers.
              * ``recovery_task``
                 ``recovery_task`` is another ``Task`` object, to be run in the event of the original ``Task`` still failing after running out of retries.  Since ``recovery_task`` is another ``Task`` object, it can have its own ``recovery_task``.  The chain of Tasks is limitless, except loops are not allowed (that would be bad!).
             Dependencies
             ************
             Dependencies are the most powerful part of the ``Task`` farming system, because it allows the user to do some classification of the workers, and guide the ``Task`` distribution without meddling with the controller directly.  It makes use of two objects - the ``Task``'s ``depend`` attribute, and the engine's ``properties``.  See the `MultiEngine`_ reference for how to use engine properties.  The engine properties api exists for extending IPython, allowing conditional execution and new controllers that make decisions based on properties of its engines.  Currently the ``Task`` dependency is the only internal use of the properties api.
             .. _MultiEngine: ./parallel_multiengine
             The ``depend`` attribute of a ``Task`` must be a function of exactly one argument, the worker's properties dictionary, and it should return ``True`` if the ``Task`` should be allowed to run on the worker and ``False`` if not.  The usage in the controller is fault tolerant, so exceptions raised by ``Task.depend`` will be ignored and functionally equivalent to always returning ``False``.  Tasks`` with invalid ``depend`` functions will never be assigned to a worker::
                 In [8]: def dep(properties):
                     ...     return properties["RAM"] > 2**32 # have at least 4GB
                 In [9]: t = client.Task("a = bigfunc()", depend=dep)
             It is important to note that assignment of values to the properties dict is done entirely by the user, either locally (in the engine) using the EngineAPI, or remotely, through the ``MultiEngineClient``'s get/set_properties methods.

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages