##// END OF EJS Templates
update API after sagedays29...
MinRK -
Show More
@@ -0,0 +1,69 b''
1 """Tests for asyncresult.py"""
2
3 #-------------------------------------------------------------------------------
4 # Copyright (C) 2011 The IPython Development Team
5 #
6 # Distributed under the terms of the BSD License. The full license is in
7 # the file COPYING, distributed as part of this software.
8 #-------------------------------------------------------------------------------
9
10 #-------------------------------------------------------------------------------
11 # Imports
12 #-------------------------------------------------------------------------------
13
14
15 from IPython.zmq.parallel.error import TimeoutError
16
17 from IPython.zmq.parallel.tests import add_engines
18 from .clienttest import ClusterTestCase
19
20 def setup():
21 add_engines(2)
22
23 def wait(n):
24 import time
25 time.sleep(n)
26 return n
27
28 class AsyncResultTest(ClusterTestCase):
29
30 def test_single_result(self):
31 eid = self.client.ids[-1]
32 ar = self.client[eid].apply_async(lambda : 42)
33 self.assertEquals(ar.get(), 42)
34 ar = self.client[[eid]].apply_async(lambda : 42)
35 self.assertEquals(ar.get(), [42])
36 ar = self.client[-1:].apply_async(lambda : 42)
37 self.assertEquals(ar.get(), [42])
38
39 def test_get_after_done(self):
40 ar = self.client[-1].apply_async(lambda : 42)
41 self.assertFalse(ar.ready())
42 ar.wait()
43 self.assertTrue(ar.ready())
44 self.assertEquals(ar.get(), 42)
45 self.assertEquals(ar.get(), 42)
46
47 def test_get_before_done(self):
48 ar = self.client[-1].apply_async(wait, 0.1)
49 self.assertRaises(TimeoutError, ar.get, 0)
50 ar.wait(0)
51 self.assertFalse(ar.ready())
52 self.assertEquals(ar.get(), 0.1)
53
54 def test_get_after_error(self):
55 ar = self.client[-1].apply_async(lambda : 1/0)
56 ar.wait()
57 self.assertRaisesRemote(ZeroDivisionError, ar.get)
58 self.assertRaisesRemote(ZeroDivisionError, ar.get)
59 self.assertRaisesRemote(ZeroDivisionError, ar.get_dict)
60
61 def test_get_dict(self):
62 n = len(self.client)
63 ar = self.client[:].apply_async(lambda : 5)
64 self.assertEquals(ar.get(), [5]*n)
65 d = ar.get_dict()
66 self.assertEquals(sorted(d.keys()), sorted(self.client.ids))
67 for eid,r in d.iteritems():
68 self.assertEquals(r, 5)
69
@@ -0,0 +1,101 b''
1 """Tests for dependency.py"""
2
3 __docformat__ = "restructuredtext en"
4
5 #-------------------------------------------------------------------------------
6 # Copyright (C) 2011 The IPython Development Team
7 #
8 # Distributed under the terms of the BSD License. The full license is in
9 # the file COPYING, distributed as part of this software.
10 #-------------------------------------------------------------------------------
11
12 #-------------------------------------------------------------------------------
13 # Imports
14 #-------------------------------------------------------------------------------
15
16 # import
17 import os
18
19 from IPython.utils.pickleutil import can, uncan
20
21 from IPython.zmq.parallel import dependency as dmod
22 from IPython.zmq.parallel.util import interactive
23
24 from IPython.zmq.parallel.tests import add_engines
25 from .clienttest import ClusterTestCase
26
27 def setup():
28 add_engines(1)
29
30 @dmod.require('time')
31 def wait(n):
32 time.sleep(n)
33 return n
34
35 mixed = map(str, range(10))
36 completed = map(str, range(0,10,2))
37 failed = map(str, range(1,10,2))
38
39 class DependencyTest(ClusterTestCase):
40
41 def setUp(self):
42 ClusterTestCase.setUp(self)
43 self.user_ns = {'__builtins__' : __builtins__}
44 self.view = self.client.load_balanced_view()
45 self.dview = self.client[-1]
46 self.succeeded = set(map(str, range(0,25,2)))
47 self.failed = set(map(str, range(1,25,2)))
48
49 def assertMet(self, dep):
50 self.assertTrue(dep.check(self.succeeded, self.failed), "Dependency should be met")
51
52 def assertUnmet(self, dep):
53 self.assertFalse(dep.check(self.succeeded, self.failed), "Dependency should not be met")
54
55 def assertUnreachable(self, dep):
56 self.assertTrue(dep.unreachable(self.succeeded, self.failed), "Dependency should be unreachable")
57
58 def assertReachable(self, dep):
59 self.assertFalse(dep.unreachable(self.succeeded, self.failed), "Dependency should be reachable")
60
61 def cancan(self, f):
62 """decorator to pass through canning into self.user_ns"""
63 return uncan(can(f), self.user_ns)
64
65 def test_require_imports(self):
66 """test that @require imports names"""
67 @self.cancan
68 @dmod.require('urllib')
69 @interactive
70 def encode(dikt):
71 return urllib.urlencode(dikt)
72 # must pass through canning to properly connect namespaces
73 self.assertEquals(encode(dict(a=5)), 'a=5')
74
75 def test_success_only(self):
76 dep = dmod.Dependency(mixed, success=True, failure=False)
77 self.assertUnmet(dep)
78 self.assertUnreachable(dep)
79 dep.all=False
80 self.assertMet(dep)
81 self.assertReachable(dep)
82 dep = dmod.Dependency(completed, success=True, failure=False)
83 self.assertMet(dep)
84 self.assertReachable(dep)
85 dep.all=False
86 self.assertMet(dep)
87 self.assertReachable(dep)
88
89 def test_failure_only(self):
90 dep = dmod.Dependency(mixed, success=False, failure=True)
91 self.assertUnmet(dep)
92 self.assertUnreachable(dep)
93 dep.all=False
94 self.assertMet(dep)
95 self.assertReachable(dep)
96 dep = dmod.Dependency(completed, success=False, failure=True)
97 self.assertUnmet(dep)
98 self.assertUnreachable(dep)
99 dep.all=False
100 self.assertUnmet(dep)
101 self.assertUnreachable(dep)
@@ -0,0 +1,287 b''
1 """test View objects"""
2 #-------------------------------------------------------------------------------
3 # Copyright (C) 2011 The IPython Development Team
4 #
5 # Distributed under the terms of the BSD License. The full license is in
6 # the file COPYING, distributed as part of this software.
7 #-------------------------------------------------------------------------------
8
9 #-------------------------------------------------------------------------------
10 # Imports
11 #-------------------------------------------------------------------------------
12
13 import time
14 from tempfile import mktemp
15
16 import zmq
17
18 from IPython.zmq.parallel import client as clientmod
19 from IPython.zmq.parallel import error
20 from IPython.zmq.parallel.asyncresult import AsyncResult, AsyncHubResult, AsyncMapResult
21 from IPython.zmq.parallel.view import LoadBalancedView, DirectView
22 from IPython.zmq.parallel.util import interactive
23
24 from IPython.zmq.parallel.tests import add_engines
25
26 from .clienttest import ClusterTestCase, segfault, wait, skip_without
27
28 def setup():
29 add_engines(3)
30
31 class TestView(ClusterTestCase):
32
33 def test_segfault_task(self):
34 """test graceful handling of engine death (balanced)"""
35 # self.add_engines(1)
36 ar = self.client[-1].apply_async(segfault)
37 self.assertRaisesRemote(error.EngineError, ar.get)
38 eid = ar.engine_id
39 while eid in self.client.ids:
40 time.sleep(.01)
41 self.client.spin()
42
43 def test_segfault_mux(self):
44 """test graceful handling of engine death (direct)"""
45 # self.add_engines(1)
46 eid = self.client.ids[-1]
47 ar = self.client[eid].apply_async(segfault)
48 self.assertRaisesRemote(error.EngineError, ar.get)
49 eid = ar.engine_id
50 while eid in self.client.ids:
51 time.sleep(.01)
52 self.client.spin()
53
54 def test_push_pull(self):
55 """test pushing and pulling"""
56 data = dict(a=10, b=1.05, c=range(10), d={'e':(1,2),'f':'hi'})
57 t = self.client.ids[-1]
58 v = self.client[t]
59 push = v.push
60 pull = v.pull
61 v.block=True
62 nengines = len(self.client)
63 push({'data':data})
64 d = pull('data')
65 self.assertEquals(d, data)
66 self.client[:].push({'data':data})
67 d = self.client[:].pull('data', block=True)
68 self.assertEquals(d, nengines*[data])
69 ar = push({'data':data}, block=False)
70 self.assertTrue(isinstance(ar, AsyncResult))
71 r = ar.get()
72 ar = self.client[:].pull('data', block=False)
73 self.assertTrue(isinstance(ar, AsyncResult))
74 r = ar.get()
75 self.assertEquals(r, nengines*[data])
76 self.client[:].push(dict(a=10,b=20))
77 r = self.client[:].pull(('a','b'))
78 self.assertEquals(r, nengines*[[10,20]])
79
80 def test_push_pull_function(self):
81 "test pushing and pulling functions"
82 def testf(x):
83 return 2.0*x
84
85 t = self.client.ids[-1]
86 self.client[t].block=True
87 push = self.client[t].push
88 pull = self.client[t].pull
89 execute = self.client[t].execute
90 push({'testf':testf})
91 r = pull('testf')
92 self.assertEqual(r(1.0), testf(1.0))
93 execute('r = testf(10)')
94 r = pull('r')
95 self.assertEquals(r, testf(10))
96 ar = self.client[:].push({'testf':testf}, block=False)
97 ar.get()
98 ar = self.client[:].pull('testf', block=False)
99 rlist = ar.get()
100 for r in rlist:
101 self.assertEqual(r(1.0), testf(1.0))
102 execute("def g(x): return x*x")
103 r = pull(('testf','g'))
104 self.assertEquals((r[0](10),r[1](10)), (testf(10), 100))
105
106 def test_push_function_globals(self):
107 """test that pushed functions have access to globals"""
108 @interactive
109 def geta():
110 return a
111 # self.add_engines(1)
112 v = self.client[-1]
113 v.block=True
114 v['f'] = geta
115 self.assertRaisesRemote(NameError, v.execute, 'b=f()')
116 v.execute('a=5')
117 v.execute('b=f()')
118 self.assertEquals(v['b'], 5)
119
120 def test_push_function_defaults(self):
121 """test that pushed functions preserve default args"""
122 def echo(a=10):
123 return a
124 v = self.client[-1]
125 v.block=True
126 v['f'] = echo
127 v.execute('b=f()')
128 self.assertEquals(v['b'], 10)
129
130 def test_get_result(self):
131 """test getting results from the Hub."""
132 c = clientmod.Client(profile='iptest')
133 # self.add_engines(1)
134 t = c.ids[-1]
135 v = c[t]
136 v2 = self.client[t]
137 ar = v.apply_async(wait, 1)
138 # give the monitor time to notice the message
139 time.sleep(.25)
140 ahr = v2.get_result(ar.msg_ids)
141 self.assertTrue(isinstance(ahr, AsyncHubResult))
142 self.assertEquals(ahr.get(), ar.get())
143 ar2 = v2.get_result(ar.msg_ids)
144 self.assertFalse(isinstance(ar2, AsyncHubResult))
145 c.spin()
146 c.close()
147
148 def test_run_newline(self):
149 """test that run appends newline to files"""
150 tmpfile = mktemp()
151 with open(tmpfile, 'w') as f:
152 f.write("""def g():
153 return 5
154 """)
155 v = self.client[-1]
156 v.run(tmpfile, block=True)
157 self.assertEquals(v.apply_sync(lambda f: f(), clientmod.Reference('g')), 5)
158
159 def test_apply_tracked(self):
160 """test tracking for apply"""
161 # self.add_engines(1)
162 t = self.client.ids[-1]
163 v = self.client[t]
164 v.block=False
165 def echo(n=1024*1024, **kwargs):
166 with v.temp_flags(**kwargs):
167 return v.apply(lambda x: x, 'x'*n)
168 ar = echo(1, track=False)
169 self.assertTrue(isinstance(ar._tracker, zmq.MessageTracker))
170 self.assertTrue(ar.sent)
171 ar = echo(track=True)
172 self.assertTrue(isinstance(ar._tracker, zmq.MessageTracker))
173 self.assertEquals(ar.sent, ar._tracker.done)
174 ar._tracker.wait()
175 self.assertTrue(ar.sent)
176
177 def test_push_tracked(self):
178 t = self.client.ids[-1]
179 ns = dict(x='x'*1024*1024)
180 v = self.client[t]
181 ar = v.push(ns, block=False, track=False)
182 self.assertTrue(isinstance(ar._tracker, zmq.MessageTracker))
183 self.assertTrue(ar.sent)
184
185 ar = v.push(ns, block=False, track=True)
186 self.assertTrue(isinstance(ar._tracker, zmq.MessageTracker))
187 self.assertEquals(ar.sent, ar._tracker.done)
188 ar._tracker.wait()
189 self.assertTrue(ar.sent)
190 ar.get()
191
192 def test_scatter_tracked(self):
193 t = self.client.ids
194 x='x'*1024*1024
195 ar = self.client[t].scatter('x', x, block=False, track=False)
196 self.assertTrue(isinstance(ar._tracker, zmq.MessageTracker))
197 self.assertTrue(ar.sent)
198
199 ar = self.client[t].scatter('x', x, block=False, track=True)
200 self.assertTrue(isinstance(ar._tracker, zmq.MessageTracker))
201 self.assertEquals(ar.sent, ar._tracker.done)
202 ar._tracker.wait()
203 self.assertTrue(ar.sent)
204 ar.get()
205
206 def test_remote_reference(self):
207 v = self.client[-1]
208 v['a'] = 123
209 ra = clientmod.Reference('a')
210 b = v.apply_sync(lambda x: x, ra)
211 self.assertEquals(b, 123)
212
213
214 def test_scatter_gather(self):
215 view = self.client[:]
216 seq1 = range(16)
217 view.scatter('a', seq1)
218 seq2 = view.gather('a', block=True)
219 self.assertEquals(seq2, seq1)
220 self.assertRaisesRemote(NameError, view.gather, 'asdf', block=True)
221
222 @skip_without('numpy')
223 def test_scatter_gather_numpy(self):
224 import numpy
225 from numpy.testing.utils import assert_array_equal, assert_array_almost_equal
226 view = self.client[:]
227 a = numpy.arange(64)
228 view.scatter('a', a)
229 b = view.gather('a', block=True)
230 assert_array_equal(b, a)
231
232 def test_map(self):
233 view = self.client[:]
234 def f(x):
235 return x**2
236 data = range(16)
237 r = view.map_sync(f, data)
238 self.assertEquals(r, map(f, data))
239
240 def test_scatterGatherNonblocking(self):
241 data = range(16)
242 view = self.client[:]
243 view.scatter('a', data, block=False)
244 ar = view.gather('a', block=False)
245 self.assertEquals(ar.get(), data)
246
247 @skip_without('numpy')
248 def test_scatter_gather_numpy_nonblocking(self):
249 import numpy
250 from numpy.testing.utils import assert_array_equal, assert_array_almost_equal
251 a = numpy.arange(64)
252 view = self.client[:]
253 ar = view.scatter('a', a, block=False)
254 self.assertTrue(isinstance(ar, AsyncResult))
255 amr = view.gather('a', block=False)
256 self.assertTrue(isinstance(amr, AsyncMapResult))
257 assert_array_equal(amr.get(), a)
258
259 def test_execute(self):
260 view = self.client[:]
261 # self.client.debug=True
262 execute = view.execute
263 ar = execute('c=30', block=False)
264 self.assertTrue(isinstance(ar, AsyncResult))
265 ar = execute('d=[0,1,2]', block=False)
266 self.client.wait(ar, 1)
267 self.assertEquals(len(ar.get()), len(self.client))
268 for c in view['c']:
269 self.assertEquals(c, 30)
270
271 def test_abort(self):
272 view = self.client[-1]
273 ar = view.execute('import time; time.sleep(0.25)', block=False)
274 ar2 = view.apply_async(lambda : 2)
275 ar3 = view.apply_async(lambda : 3)
276 view.abort(ar2)
277 view.abort(ar3.msg_ids)
278 self.assertRaises(error.TaskAborted, ar2.get)
279 self.assertRaises(error.TaskAborted, ar3.get)
280
281 def test_temp_flags(self):
282 view = self.client[-1]
283 view.block=True
284 with view.temp_flags(block=False):
285 self.assertFalse(view.block)
286 self.assertTrue(view.block)
287
@@ -0,0 +1,97 b''
1 """
2 An exceptionally lousy site spider
3 Ken Kinder <ken@kenkinder.com>
4
5 Updated for newparallel by Min Ragan-Kelley <benjaminrk@gmail.com>
6
7 This module gives an example of how the task interface to the
8 IPython controller works. Before running this script start the IPython controller
9 and some engines using something like::
10
11 ipclusterz start -n 4
12 """
13 import sys
14 from IPython.zmq.parallel import client, error
15 import time
16 import BeautifulSoup # this isn't necessary, but it helps throw the dependency error earlier
17
18 def fetchAndParse(url, data=None):
19 import urllib2
20 import urlparse
21 import BeautifulSoup
22 links = []
23 try:
24 page = urllib2.urlopen(url, data=data)
25 except Exception:
26 return links
27 else:
28 if page.headers.type == 'text/html':
29 doc = BeautifulSoup.BeautifulSoup(page.read())
30 for node in doc.findAll('a'):
31 href = node.get('href', None)
32 if href:
33 links.append(urlparse.urljoin(url, href))
34 return links
35
36 class DistributedSpider(object):
37
38 # Time to wait between polling for task results.
39 pollingDelay = 0.5
40
41 def __init__(self, site):
42 self.client = client.Client()
43 self.view = self.client.load_balanced_view()
44 self.mux = self.client[:]
45
46 self.allLinks = []
47 self.linksWorking = {}
48 self.linksDone = {}
49
50 self.site = site
51
52 def visitLink(self, url):
53 if url not in self.allLinks:
54 self.allLinks.append(url)
55 if url.startswith(self.site):
56 print ' ', url
57 self.linksWorking[url] = self.view.apply(fetchAndParse, url)
58
59 def onVisitDone(self, links, url):
60 print url, ':'
61 self.linksDone[url] = None
62 del self.linksWorking[url]
63 for link in links:
64 self.visitLink(link)
65
66 def run(self):
67 self.visitLink(self.site)
68 while self.linksWorking:
69 print len(self.linksWorking), 'pending...'
70 self.synchronize()
71 time.sleep(self.pollingDelay)
72
73 def synchronize(self):
74 for url, ar in self.linksWorking.items():
75 # Calling get_task_result with block=False will return None if the
76 # task is not done yet. This provides a simple way of polling.
77 try:
78 links = ar.get(0)
79 except error.TimeoutError:
80 continue
81 except Exception as e:
82 self.linksDone[url] = None
83 del self.linksWorking[url]
84 print url, ':', e.traceback
85 else:
86 self.onVisitDone(links, url)
87
88 def main():
89 if len(sys.argv) > 1:
90 site = sys.argv[1]
91 else:
92 site = raw_input('Enter site to crawl: ')
93 distributedSpider = DistributedSpider(site)
94 distributedSpider.run()
95
96 if __name__ == '__main__':
97 main()
@@ -0,0 +1,19 b''
1 """
2 A Distributed Hello world
3 Ken Kinder <ken@kenkinder.com>
4 """
5 from IPython.zmq.parallel import client
6
7 rc = client.Client()
8
9 def sleep_and_echo(t, msg):
10 import time
11 time.sleep(t)
12 return msg
13
14 view = rc.load_balanced_view()
15
16 world = view.apply_async(sleep_and_echo, 3, 'World!')
17 hello = view.apply_async(sleep_and_echo, 2, 'Hello')
18 print "Submitted tasks:", hello.msg_ids, world.msg_ids
19 print hello.get(), world.get()
@@ -1,143 +1,153 b''
1 1 # encoding: utf-8
2 2
3 3 """Pickle related utilities. Perhaps this should be called 'can'."""
4 4
5 5 __docformat__ = "restructuredtext en"
6 6
7 7 #-------------------------------------------------------------------------------
8 8 # Copyright (C) 2008 The IPython Development Team
9 9 #
10 10 # Distributed under the terms of the BSD License. The full license is in
11 11 # the file COPYING, distributed as part of this software.
12 12 #-------------------------------------------------------------------------------
13 13
14 14 #-------------------------------------------------------------------------------
15 15 # Imports
16 16 #-------------------------------------------------------------------------------
17 17
18 from types import FunctionType
19 18 import copy
20
21 from IPython.zmq.parallel.dependency import dependent
19 import sys
20 from types import FunctionType
22 21
23 22 import codeutil
24 23
25 24 #-------------------------------------------------------------------------------
26 25 # Classes
27 26 #-------------------------------------------------------------------------------
28 27
29 28
30 29 class CannedObject(object):
31 30 def __init__(self, obj, keys=[]):
32 31 self.keys = keys
33 32 self.obj = copy.copy(obj)
34 33 for key in keys:
35 34 setattr(self.obj, key, can(getattr(obj, key)))
36 35
37 36
38 37 def getObject(self, g=None):
39 38 if g is None:
40 39 g = globals()
41 40 for key in self.keys:
42 41 setattr(self.obj, key, uncan(getattr(self.obj, key), g))
43 42 return self.obj
44 43
45 44 class Reference(CannedObject):
46 45 """object for wrapping a remote reference by name."""
47 46 def __init__(self, name):
48 47 if not isinstance(name, basestring):
49 48 raise TypeError("illegal name: %r"%name)
50 49 self.name = name
51 50
52 51 def __repr__(self):
53 52 return "<Reference: %r>"%self.name
54 53
55 54 def getObject(self, g=None):
56 55 if g is None:
57 56 g = globals()
58 57 try:
59 58 return g[self.name]
60 59 except KeyError:
61 60 raise NameError("name %r is not defined"%self.name)
62 61
63 62
64 63 class CannedFunction(CannedObject):
65 64
66 65 def __init__(self, f):
67 66 self._checkType(f)
68 67 self.code = f.func_code
69 68 self.defaults = f.func_defaults
69 self.module = f.__module__ or '__main__'
70 70 self.__name__ = f.__name__
71 71
72 72 def _checkType(self, obj):
73 73 assert isinstance(obj, FunctionType), "Not a function type"
74 74
75 75 def getObject(self, g=None):
76 # try to load function back into its module:
77 if not self.module.startswith('__'):
78 try:
79 __import__(self.module)
80 except ImportError:
81 pass
82 else:
83 g = sys.modules[self.module].__dict__
84
76 85 if g is None:
77 86 g = globals()
78 87 newFunc = FunctionType(self.code, g, self.__name__, self.defaults)
79 88 return newFunc
80 89
81 90 #-------------------------------------------------------------------------------
82 91 # Functions
83 92 #-------------------------------------------------------------------------------
84 93
85
86 94 def can(obj):
95 # import here to prevent module-level circular imports
96 from IPython.zmq.parallel.dependency import dependent
87 97 if isinstance(obj, dependent):
88 98 keys = ('f','df')
89 99 return CannedObject(obj, keys=keys)
90 100 elif isinstance(obj, FunctionType):
91 101 return CannedFunction(obj)
92 102 elif isinstance(obj,dict):
93 103 return canDict(obj)
94 104 elif isinstance(obj, (list,tuple)):
95 105 return canSequence(obj)
96 106 else:
97 107 return obj
98 108
99 109 def canDict(obj):
100 110 if isinstance(obj, dict):
101 111 newobj = {}
102 112 for k, v in obj.iteritems():
103 113 newobj[k] = can(v)
104 114 return newobj
105 115 else:
106 116 return obj
107 117
108 118 def canSequence(obj):
109 119 if isinstance(obj, (list, tuple)):
110 120 t = type(obj)
111 121 return t([can(i) for i in obj])
112 122 else:
113 123 return obj
114 124
115 125 def uncan(obj, g=None):
116 126 if isinstance(obj, CannedObject):
117 127 return obj.getObject(g)
118 128 elif isinstance(obj,dict):
119 129 return uncanDict(obj, g)
120 130 elif isinstance(obj, (list,tuple)):
121 131 return uncanSequence(obj, g)
122 132 else:
123 133 return obj
124 134
125 135 def uncanDict(obj, g=None):
126 136 if isinstance(obj, dict):
127 137 newobj = {}
128 138 for k, v in obj.iteritems():
129 139 newobj[k] = uncan(v,g)
130 140 return newobj
131 141 else:
132 142 return obj
133 143
134 144 def uncanSequence(obj, g=None):
135 145 if isinstance(obj, (list, tuple)):
136 146 t = type(obj)
137 147 return t([uncan(i,g) for i in obj])
138 148 else:
139 149 return obj
140 150
141 151
142 152 def rebindFunctionGlobals(f, glbls):
143 153 return FunctionType(f.func_code, glbls)
@@ -1,322 +1,340 b''
1 1 """AsyncResult objects for the client"""
2 2 #-----------------------------------------------------------------------------
3 3 # Copyright (C) 2010-2011 The IPython Development Team
4 4 #
5 5 # Distributed under the terms of the BSD License. The full license is in
6 6 # the file COPYING, distributed as part of this software.
7 7 #-----------------------------------------------------------------------------
8 8
9 9 #-----------------------------------------------------------------------------
10 10 # Imports
11 11 #-----------------------------------------------------------------------------
12 12
13 13 import time
14 14
15 from zmq import MessageTracker
16
15 17 from IPython.external.decorator import decorator
16 18 from . import error
17 19
18 20 #-----------------------------------------------------------------------------
19 21 # Classes
20 22 #-----------------------------------------------------------------------------
21 23
24 # global empty tracker that's always done:
25 finished_tracker = MessageTracker()
26
22 27 @decorator
23 28 def check_ready(f, self, *args, **kwargs):
24 29 """Call spin() to sync state prior to calling the method."""
25 30 self.wait(0)
26 31 if not self._ready:
27 32 raise error.TimeoutError("result not ready")
28 33 return f(self, *args, **kwargs)
29 34
30 35 class AsyncResult(object):
31 36 """Class for representing results of non-blocking calls.
32 37
33 38 Provides the same interface as :py:class:`multiprocessing.pool.AsyncResult`.
34 39 """
35 40
36 41 msg_ids = None
37 42 _targets = None
38 43 _tracker = None
44 _single_result = False
39 45
40 46 def __init__(self, client, msg_ids, fname='unknown', targets=None, tracker=None):
41 self._client = client
42 47 if isinstance(msg_ids, basestring):
48 # always a list
43 49 msg_ids = [msg_ids]
50 if tracker is None:
51 # default to always done
52 tracker = finished_tracker
53 self._client = client
44 54 self.msg_ids = msg_ids
45 55 self._fname=fname
46 56 self._targets = targets
47 57 self._tracker = tracker
48 58 self._ready = False
49 59 self._success = None
50 self._single_result = len(msg_ids) == 1
60 if len(msg_ids) == 1:
61 self._single_result = not isinstance(targets, (list, tuple))
62 else:
63 self._single_result = False
51 64
52 65 def __repr__(self):
53 66 if self._ready:
54 67 return "<%s: finished>"%(self.__class__.__name__)
55 68 else:
56 69 return "<%s: %s>"%(self.__class__.__name__,self._fname)
57 70
58 71
59 72 def _reconstruct_result(self, res):
60 73 """Reconstruct our result from actual result list (always a list)
61 74
62 75 Override me in subclasses for turning a list of results
63 76 into the expected form.
64 77 """
65 78 if self._single_result:
66 79 return res[0]
67 80 else:
68 81 return res
69 82
70 83 def get(self, timeout=-1):
71 84 """Return the result when it arrives.
72 85
73 86 If `timeout` is not ``None`` and the result does not arrive within
74 87 `timeout` seconds then ``TimeoutError`` is raised. If the
75 88 remote call raised an exception then that exception will be reraised
76 89 by get() inside a `RemoteError`.
77 90 """
78 91 if not self.ready():
79 92 self.wait(timeout)
80 93
81 94 if self._ready:
82 95 if self._success:
83 96 return self._result
84 97 else:
85 98 raise self._exception
86 99 else:
87 100 raise error.TimeoutError("Result not ready.")
88 101
89 102 def ready(self):
90 103 """Return whether the call has completed."""
91 104 if not self._ready:
92 105 self.wait(0)
93 106 return self._ready
94 107
95 108 def wait(self, timeout=-1):
96 109 """Wait until the result is available or until `timeout` seconds pass.
97 110
98 111 This method always returns None.
99 112 """
100 113 if self._ready:
101 114 return
102 self._ready = self._client.barrier(self.msg_ids, timeout)
115 self._ready = self._client.wait(self.msg_ids, timeout)
103 116 if self._ready:
104 117 try:
105 118 results = map(self._client.results.get, self.msg_ids)
106 119 self._result = results
107 120 if self._single_result:
108 121 r = results[0]
109 122 if isinstance(r, Exception):
110 123 raise r
111 124 else:
112 125 results = error.collect_exceptions(results, self._fname)
113 126 self._result = self._reconstruct_result(results)
114 127 except Exception, e:
115 128 self._exception = e
116 129 self._success = False
117 130 else:
118 131 self._success = True
119 132 finally:
120 133 self._metadata = map(self._client.metadata.get, self.msg_ids)
121 134
122 135
123 136 def successful(self):
124 137 """Return whether the call completed without raising an exception.
125 138
126 139 Will raise ``AssertionError`` if the result is not ready.
127 140 """
128 141 assert self.ready()
129 142 return self._success
130 143
131 144 #----------------------------------------------------------------
132 145 # Extra methods not in mp.pool.AsyncResult
133 146 #----------------------------------------------------------------
134 147
135 148 def get_dict(self, timeout=-1):
136 149 """Get the results as a dict, keyed by engine_id.
137 150
138 151 timeout behavior is described in `get()`.
139 152 """
140 153
141 154 results = self.get(timeout)
142 155 engine_ids = [ md['engine_id'] for md in self._metadata ]
143 156 bycount = sorted(engine_ids, key=lambda k: engine_ids.count(k))
144 157 maxcount = bycount.count(bycount[-1])
145 158 if maxcount > 1:
146 159 raise ValueError("Cannot build dict, %i jobs ran on engine #%i"%(
147 160 maxcount, bycount[-1]))
148 161
149 162 return dict(zip(engine_ids,results))
150 163
151 164 @property
152 @check_ready
153 165 def result(self):
154 166 """result property wrapper for `get(timeout=0)`."""
155 return self._result
167 return self.get()
156 168
157 169 # abbreviated alias:
158 170 r = result
159 171
160 172 @property
161 173 @check_ready
162 174 def metadata(self):
163 175 """property for accessing execution metadata."""
164 176 if self._single_result:
165 177 return self._metadata[0]
166 178 else:
167 179 return self._metadata
168 180
169 181 @property
170 182 def result_dict(self):
171 183 """result property as a dict."""
172 return self.get_dict(0)
184 return self.get_dict()
173 185
174 186 def __dict__(self):
175 187 return self.get_dict(0)
176 188
177 189 def abort(self):
178 190 """abort my tasks."""
179 191 assert not self.ready(), "Can't abort, I am already done!"
180 192 return self.client.abort(self.msg_ids, targets=self._targets, block=True)
181 193
182 194 @property
183 195 def sent(self):
184 """check whether my messages have been sent"""
185 if self._tracker is None:
186 return True
187 else:
196 """check whether my messages have been sent."""
188 197 return self._tracker.done
189 198
199 def wait_for_send(self, timeout=-1):
200 """wait for pyzmq send to complete.
201
202 This is necessary when sending arrays that you intend to edit in-place.
203 `timeout` is in seconds, and will raise TimeoutError if it is reached
204 before the send completes.
205 """
206 return self._tracker.wait(timeout)
207
190 208 #-------------------------------------
191 209 # dict-access
192 210 #-------------------------------------
193 211
194 212 @check_ready
195 213 def __getitem__(self, key):
196 214 """getitem returns result value(s) if keyed by int/slice, or metadata if key is str.
197 215 """
198 216 if isinstance(key, int):
199 217 return error.collect_exceptions([self._result[key]], self._fname)[0]
200 218 elif isinstance(key, slice):
201 219 return error.collect_exceptions(self._result[key], self._fname)
202 220 elif isinstance(key, basestring):
203 221 values = [ md[key] for md in self._metadata ]
204 222 if self._single_result:
205 223 return values[0]
206 224 else:
207 225 return values
208 226 else:
209 227 raise TypeError("Invalid key type %r, must be 'int','slice', or 'str'"%type(key))
210 228
211 229 @check_ready
212 230 def __getattr__(self, key):
213 231 """getattr maps to getitem for convenient attr access to metadata."""
214 232 if key not in self._metadata[0].keys():
215 233 raise AttributeError("%r object has no attribute %r"%(
216 234 self.__class__.__name__, key))
217 235 return self.__getitem__(key)
218 236
219 237 # asynchronous iterator:
220 238 def __iter__(self):
221 239 if self._single_result:
222 240 raise TypeError("AsyncResults with a single result are not iterable.")
223 241 try:
224 242 rlist = self.get(0)
225 243 except error.TimeoutError:
226 244 # wait for each result individually
227 245 for msg_id in self.msg_ids:
228 246 ar = AsyncResult(self._client, msg_id, self._fname)
229 247 yield ar.get()
230 248 else:
231 249 # already done
232 250 for r in rlist:
233 251 yield r
234 252
235 253
236 254
237 255 class AsyncMapResult(AsyncResult):
238 256 """Class for representing results of non-blocking gathers.
239 257
240 258 This will properly reconstruct the gather.
241 259 """
242 260
243 261 def __init__(self, client, msg_ids, mapObject, fname=''):
244 262 AsyncResult.__init__(self, client, msg_ids, fname=fname)
245 263 self._mapObject = mapObject
246 264 self._single_result = False
247 265
248 266 def _reconstruct_result(self, res):
249 267 """Perform the gather on the actual results."""
250 268 return self._mapObject.joinPartitions(res)
251 269
252 270 # asynchronous iterator:
253 271 def __iter__(self):
254 272 try:
255 273 rlist = self.get(0)
256 274 except error.TimeoutError:
257 275 # wait for each result individually
258 276 for msg_id in self.msg_ids:
259 277 ar = AsyncResult(self._client, msg_id, self._fname)
260 278 rlist = ar.get()
261 279 try:
262 280 for r in rlist:
263 281 yield r
264 282 except TypeError:
265 283 # flattened, not a list
266 284 # this could get broken by flattened data that returns iterables
267 285 # but most calls to map do not expose the `flatten` argument
268 286 yield rlist
269 287 else:
270 288 # already done
271 289 for r in rlist:
272 290 yield r
273 291
274 292
275 293 class AsyncHubResult(AsyncResult):
276 294 """Class to wrap pending results that must be requested from the Hub.
277 295
278 296 Note that waiting/polling on these objects requires polling the Hubover the network,
279 297 so use `AsyncHubResult.wait()` sparingly.
280 298 """
281 299
282 300 def wait(self, timeout=-1):
283 301 """wait for result to complete."""
284 302 start = time.time()
285 303 if self._ready:
286 304 return
287 305 local_ids = filter(lambda msg_id: msg_id in self._client.outstanding, self.msg_ids)
288 local_ready = self._client.barrier(local_ids, timeout)
306 local_ready = self._client.wait(local_ids, timeout)
289 307 if local_ready:
290 308 remote_ids = filter(lambda msg_id: msg_id not in self._client.results, self.msg_ids)
291 309 if not remote_ids:
292 310 self._ready = True
293 311 else:
294 312 rdict = self._client.result_status(remote_ids, status_only=False)
295 313 pending = rdict['pending']
296 314 while pending and (timeout < 0 or time.time() < start+timeout):
297 315 rdict = self._client.result_status(remote_ids, status_only=False)
298 316 pending = rdict['pending']
299 317 if pending:
300 318 time.sleep(0.1)
301 319 if not pending:
302 320 self._ready = True
303 321 if self._ready:
304 322 try:
305 323 results = map(self._client.results.get, self.msg_ids)
306 324 self._result = results
307 325 if self._single_result:
308 326 r = results[0]
309 327 if isinstance(r, Exception):
310 328 raise r
311 329 else:
312 330 results = error.collect_exceptions(results, self._fname)
313 331 self._result = self._reconstruct_result(results)
314 332 except Exception, e:
315 333 self._exception = e
316 334 self._success = False
317 335 else:
318 336 self._success = True
319 337 finally:
320 338 self._metadata = map(self._client.metadata.get, self.msg_ids)
321 339
322 340 __all__ = ['AsyncResult', 'AsyncMapResult', 'AsyncHubResult'] No newline at end of file
This diff has been collapsed as it changes many lines, (632 lines changed) Show them Hide them
@@ -1,1591 +1,1343 b''
1 """A semi-synchronous Client for the ZMQ controller"""
1 """A semi-synchronous Client for the ZMQ cluster"""
2 2 #-----------------------------------------------------------------------------
3 3 # Copyright (C) 2010 The IPython Development Team
4 4 #
5 5 # Distributed under the terms of the BSD License. The full license is in
6 6 # the file COPYING, distributed as part of this software.
7 7 #-----------------------------------------------------------------------------
8 8
9 9 #-----------------------------------------------------------------------------
10 10 # Imports
11 11 #-----------------------------------------------------------------------------
12 12
13 13 import os
14 14 import json
15 15 import time
16 16 import warnings
17 17 from datetime import datetime
18 18 from getpass import getpass
19 19 from pprint import pprint
20 20
21 21 pjoin = os.path.join
22 22
23 23 import zmq
24 24 # from zmq.eventloop import ioloop, zmqstream
25 25
26 26 from IPython.utils.path import get_ipython_dir
27 27 from IPython.utils.pickleutil import Reference
28 28 from IPython.utils.traitlets import (HasTraits, Int, Instance, CUnicode,
29 29 Dict, List, Bool, Str, Set)
30 30 from IPython.external.decorator import decorator
31 31 from IPython.external.ssh import tunnel
32 32
33 33 from . import error
34 from . import map as Map
35 34 from . import util
36 35 from . import streamsession as ss
37 36 from .asyncresult import AsyncResult, AsyncMapResult, AsyncHubResult
38 37 from .clusterdir import ClusterDir, ClusterDirError
39 38 from .dependency import Dependency, depend, require, dependent
40 39 from .remotefunction import remote, parallel, ParallelFunction, RemoteFunction
41 from .util import ReverseDict, validate_url, disambiguate_url
42 40 from .view import DirectView, LoadBalancedView
43 41
44 42 #--------------------------------------------------------------------------
45 # helpers for implementing old MEC API via client.apply
46 #--------------------------------------------------------------------------
47
48 def _push(user_ns, **ns):
49 """helper method for implementing `client.push` via `client.apply`"""
50 user_ns.update(ns)
51
52 def _pull(user_ns, keys):
53 """helper method for implementing `client.pull` via `client.apply`"""
54 if isinstance(keys, (list,tuple, set)):
55 for key in keys:
56 if not user_ns.has_key(key):
57 raise NameError("name '%s' is not defined"%key)
58 return map(user_ns.get, keys)
59 else:
60 if not user_ns.has_key(keys):
61 raise NameError("name '%s' is not defined"%keys)
62 return user_ns.get(keys)
63
64 def _clear(user_ns):
65 """helper method for implementing `client.clear` via `client.apply`"""
66 user_ns.clear()
67
68 def _execute(user_ns, code):
69 """helper method for implementing `client.execute` via `client.apply`"""
70 exec code in user_ns
71
72
73 #--------------------------------------------------------------------------
74 43 # Decorators for Client methods
75 44 #--------------------------------------------------------------------------
76 45
77 46 @decorator
78 def spinfirst(f, self, *args, **kwargs):
47 def spin_first(f, self, *args, **kwargs):
79 48 """Call spin() to sync state prior to calling the method."""
80 49 self.spin()
81 50 return f(self, *args, **kwargs)
82 51
83 52 @decorator
84 def defaultblock(f, self, *args, **kwargs):
53 def default_block(f, self, *args, **kwargs):
85 54 """Default to self.block; preserve self.block."""
86 55 block = kwargs.get('block',None)
87 56 block = self.block if block is None else block
88 57 saveblock = self.block
89 58 self.block = block
90 59 try:
91 60 ret = f(self, *args, **kwargs)
92 61 finally:
93 62 self.block = saveblock
94 63 return ret
95 64
96 65
97 66 #--------------------------------------------------------------------------
98 67 # Classes
99 68 #--------------------------------------------------------------------------
100 69
101 70 class Metadata(dict):
102 71 """Subclass of dict for initializing metadata values.
103 72
104 73 Attribute access works on keys.
105 74
106 75 These objects have a strict set of keys - errors will raise if you try
107 76 to add new keys.
108 77 """
109 78 def __init__(self, *args, **kwargs):
110 79 dict.__init__(self)
111 80 md = {'msg_id' : None,
112 81 'submitted' : None,
113 82 'started' : None,
114 83 'completed' : None,
115 84 'received' : None,
116 85 'engine_uuid' : None,
117 86 'engine_id' : None,
118 87 'follow' : None,
119 88 'after' : None,
120 89 'status' : None,
121 90
122 91 'pyin' : None,
123 92 'pyout' : None,
124 93 'pyerr' : None,
125 94 'stdout' : '',
126 95 'stderr' : '',
127 96 }
128 97 self.update(md)
129 98 self.update(dict(*args, **kwargs))
130 99
131 100 def __getattr__(self, key):
132 101 """getattr aliased to getitem"""
133 102 if key in self.iterkeys():
134 103 return self[key]
135 104 else:
136 105 raise AttributeError(key)
137 106
138 107 def __setattr__(self, key, value):
139 108 """setattr aliased to setitem, with strict"""
140 109 if key in self.iterkeys():
141 110 self[key] = value
142 111 else:
143 112 raise AttributeError(key)
144 113
145 114 def __setitem__(self, key, value):
146 115 """strict static key enforcement"""
147 116 if key in self.iterkeys():
148 117 dict.__setitem__(self, key, value)
149 118 else:
150 119 raise KeyError(key)
151 120
152 121
153 122 class Client(HasTraits):
154 """A semi-synchronous client to the IPython ZMQ controller
123 """A semi-synchronous client to the IPython ZMQ cluster
155 124
156 125 Parameters
157 126 ----------
158 127
159 128 url_or_file : bytes; zmq url or path to ipcontroller-client.json
160 129 Connection information for the Hub's registration. If a json connector
161 130 file is given, then likely no further configuration is necessary.
162 131 [Default: use profile]
163 132 profile : bytes
164 133 The name of the Cluster profile to be used to find connector information.
165 134 [Default: 'default']
166 135 context : zmq.Context
167 136 Pass an existing zmq.Context instance, otherwise the client will create its own.
168 137 username : bytes
169 138 set username to be passed to the Session object
170 139 debug : bool
171 140 flag for lots of message printing for debug purposes
172 141
173 142 #-------------- ssh related args ----------------
174 143 # These are args for configuring the ssh tunnel to be used
175 144 # credentials are used to forward connections over ssh to the Controller
176 145 # Note that the ip given in `addr` needs to be relative to sshserver
177 146 # The most basic case is to leave addr as pointing to localhost (127.0.0.1),
178 147 # and set sshserver as the same machine the Controller is on. However,
179 148 # the only requirement is that sshserver is able to see the Controller
180 149 # (i.e. is within the same trusted network).
181 150
182 151 sshserver : str
183 152 A string of the form passed to ssh, i.e. 'server.tld' or 'user@server.tld:port'
184 153 If keyfile or password is specified, and this is not, it will default to
185 154 the ip given in addr.
186 155 sshkey : str; path to public ssh key file
187 156 This specifies a key to be used in ssh login, default None.
188 157 Regular default ssh keys will be used without specifying this argument.
189 158 password : str
190 159 Your ssh password to sshserver. Note that if this is left None,
191 160 you will be prompted for it if passwordless key based login is unavailable.
192 161 paramiko : bool
193 162 flag for whether to use paramiko instead of shell ssh for tunneling.
194 163 [default: True on win32, False else]
195 164
196 #------- exec authentication args -------
197 # If even localhost is untrusted, you can have some protection against
198 # unauthorized execution by using a key. Messages are still sent
199 # as cleartext, so if someone can snoop your loopback traffic this will
200 # not help against malicious attacks.
165 ------- exec authentication args -------
166 If even localhost is untrusted, you can have some protection against
167 unauthorized execution by using a key. Messages are still sent
168 as cleartext, so if someone can snoop your loopback traffic this will
169 not help against malicious attacks.
201 170
202 171 exec_key : str
203 172 an authentication key or file containing a key
204 173 default: None
205 174
206 175
207 176 Attributes
208 177 ----------
209 178
210 ids : set of int engine IDs
179 ids : list of int engine IDs
211 180 requesting the ids attribute always synchronizes
212 181 the registration state. To request ids without synchronization,
213 182 use semi-private _ids attributes.
214 183
215 184 history : list of msg_ids
216 185 a list of msg_ids, keeping track of all the execution
217 186 messages you have submitted in order.
218 187
219 188 outstanding : set of msg_ids
220 189 a set of msg_ids that have been submitted, but whose
221 190 results have not yet been received.
222 191
223 192 results : dict
224 193 a dict of all our results, keyed by msg_id
225 194
226 195 block : bool
227 196 determines default behavior when block not specified
228 197 in execution methods
229 198
230 199 Methods
231 200 -------
232 201
233 202 spin
234 203 flushes incoming results and registration state changes
235 204 control methods spin, and requesting `ids` also ensures up to date
236 205
237 barrier
206 wait
238 207 wait on one or more msg_ids
239 208
240 209 execution methods
241 210 apply
242 211 legacy: execute, run
243 212
213 data movement
214 push, pull, scatter, gather
215
244 216 query methods
245 queue_status, get_result, purge
217 queue_status, get_result, purge, result_status
246 218
247 219 control methods
248 220 abort, shutdown
249 221
250 222 """
251 223
252 224
253 225 block = Bool(False)
254 226 outstanding = Set()
255 227 results = Instance('collections.defaultdict', (dict,))
256 228 metadata = Instance('collections.defaultdict', (Metadata,))
257 229 history = List()
258 230 debug = Bool(False)
259 231 profile=CUnicode('default')
260 232
261 233 _outstanding_dict = Instance('collections.defaultdict', (set,))
262 234 _ids = List()
263 235 _connected=Bool(False)
264 236 _ssh=Bool(False)
265 237 _context = Instance('zmq.Context')
266 238 _config = Dict()
267 _engines=Instance(ReverseDict, (), {})
239 _engines=Instance(util.ReverseDict, (), {})
268 240 # _hub_socket=Instance('zmq.Socket')
269 241 _query_socket=Instance('zmq.Socket')
270 242 _control_socket=Instance('zmq.Socket')
271 243 _iopub_socket=Instance('zmq.Socket')
272 244 _notification_socket=Instance('zmq.Socket')
273 _apply_socket=Instance('zmq.Socket')
274 _mux_ident=Str()
275 _task_ident=Str()
245 _mux_socket=Instance('zmq.Socket')
246 _task_socket=Instance('zmq.Socket')
276 247 _task_scheme=Str()
277 248 _balanced_views=Dict()
278 249 _direct_views=Dict()
279 250 _closed = False
251 _ignored_control_replies=Int(0)
252 _ignored_hub_replies=Int(0)
280 253
281 254 def __init__(self, url_or_file=None, profile='default', cluster_dir=None, ipython_dir=None,
282 255 context=None, username=None, debug=False, exec_key=None,
283 256 sshserver=None, sshkey=None, password=None, paramiko=None,
257 timeout=10
284 258 ):
285 259 super(Client, self).__init__(debug=debug, profile=profile)
286 260 if context is None:
287 261 context = zmq.Context.instance()
288 262 self._context = context
289 263
290 264
291 265 self._setup_cluster_dir(profile, cluster_dir, ipython_dir)
292 266 if self._cd is not None:
293 267 if url_or_file is None:
294 268 url_or_file = pjoin(self._cd.security_dir, 'ipcontroller-client.json')
295 assert url_or_file is not None, "I can't find enough information to connect to a controller!"\
269 assert url_or_file is not None, "I can't find enough information to connect to a hub!"\
296 270 " Please specify at least one of url_or_file or profile."
297 271
298 272 try:
299 validate_url(url_or_file)
273 util.validate_url(url_or_file)
300 274 except AssertionError:
301 275 if not os.path.exists(url_or_file):
302 276 if self._cd:
303 277 url_or_file = os.path.join(self._cd.security_dir, url_or_file)
304 278 assert os.path.exists(url_or_file), "Not a valid connection file or url: %r"%url_or_file
305 279 with open(url_or_file) as f:
306 280 cfg = json.loads(f.read())
307 281 else:
308 282 cfg = {'url':url_or_file}
309 283
310 284 # sync defaults from args, json:
311 285 if sshserver:
312 286 cfg['ssh'] = sshserver
313 287 if exec_key:
314 288 cfg['exec_key'] = exec_key
315 289 exec_key = cfg['exec_key']
316 290 sshserver=cfg['ssh']
317 291 url = cfg['url']
318 292 location = cfg.setdefault('location', None)
319 cfg['url'] = disambiguate_url(cfg['url'], location)
293 cfg['url'] = util.disambiguate_url(cfg['url'], location)
320 294 url = cfg['url']
321 295
322 296 self._config = cfg
323 297
324 298 self._ssh = bool(sshserver or sshkey or password)
325 299 if self._ssh and sshserver is None:
326 300 # default to ssh via localhost
327 301 sshserver = url.split('://')[1].split(':')[0]
328 302 if self._ssh and password is None:
329 303 if tunnel.try_passwordless_ssh(sshserver, sshkey, paramiko):
330 304 password=False
331 305 else:
332 306 password = getpass("SSH Password for %s: "%sshserver)
333 307 ssh_kwargs = dict(keyfile=sshkey, password=password, paramiko=paramiko)
334 308 if exec_key is not None and os.path.isfile(exec_key):
335 309 arg = 'keyfile'
336 310 else:
337 311 arg = 'key'
338 312 key_arg = {arg:exec_key}
339 313 if username is None:
340 314 self.session = ss.StreamSession(**key_arg)
341 315 else:
342 316 self.session = ss.StreamSession(username, **key_arg)
343 317 self._query_socket = self._context.socket(zmq.XREQ)
344 318 self._query_socket.setsockopt(zmq.IDENTITY, self.session.session)
345 319 if self._ssh:
346 320 tunnel.tunnel_connection(self._query_socket, url, sshserver, **ssh_kwargs)
347 321 else:
348 322 self._query_socket.connect(url)
349 323
350 324 self.session.debug = self.debug
351 325
352 326 self._notification_handlers = {'registration_notification' : self._register_engine,
353 327 'unregistration_notification' : self._unregister_engine,
328 'shutdown_notification' : lambda msg: self.close(),
354 329 }
355 330 self._queue_handlers = {'execute_reply' : self._handle_execute_reply,
356 331 'apply_reply' : self._handle_apply_reply}
357 self._connect(sshserver, ssh_kwargs)
332 self._connect(sshserver, ssh_kwargs, timeout)
358 333
359 334 def __del__(self):
360 335 """cleanup sockets, but _not_ context."""
361 336 self.close()
362 337
363 338 def _setup_cluster_dir(self, profile, cluster_dir, ipython_dir):
364 339 if ipython_dir is None:
365 340 ipython_dir = get_ipython_dir()
366 341 if cluster_dir is not None:
367 342 try:
368 343 self._cd = ClusterDir.find_cluster_dir(cluster_dir)
369 344 return
370 345 except ClusterDirError:
371 346 pass
372 347 elif profile is not None:
373 348 try:
374 349 self._cd = ClusterDir.find_cluster_dir_by_profile(
375 350 ipython_dir, profile)
376 351 return
377 352 except ClusterDirError:
378 353 pass
379 354 self._cd = None
380 355
381 @property
382 def ids(self):
383 """Always up-to-date ids property."""
384 self._flush_notifications()
385 # always copy:
386 return list(self._ids)
387
388 def close(self):
389 if self._closed:
390 return
391 snames = filter(lambda n: n.endswith('socket'), dir(self))
392 for socket in map(lambda name: getattr(self, name), snames):
393 if isinstance(socket, zmq.Socket) and not socket.closed:
394 socket.close()
395 self._closed = True
396
397 356 def _update_engines(self, engines):
398 357 """Update our engines dict and _ids from a dict of the form: {id:uuid}."""
399 358 for k,v in engines.iteritems():
400 359 eid = int(k)
401 360 self._engines[eid] = bytes(v) # force not unicode
402 361 self._ids.append(eid)
403 362 self._ids = sorted(self._ids)
404 363 if sorted(self._engines.keys()) != range(len(self._engines)) and \
405 self._task_scheme == 'pure' and self._task_ident:
364 self._task_scheme == 'pure' and self._task_socket:
406 365 self._stop_scheduling_tasks()
407 366
408 367 def _stop_scheduling_tasks(self):
409 368 """Stop scheduling tasks because an engine has been unregistered
410 369 from a pure ZMQ scheduler.
411 370 """
412 self._task_ident = ''
413 # self._task_socket.close()
414 # self._task_socket = None
371 self._task_socket.close()
372 self._task_socket = None
415 373 msg = "An engine has been unregistered, and we are using pure " +\
416 374 "ZMQ task scheduling. Task farming will be disabled."
417 375 if self.outstanding:
418 376 msg += " If you were running tasks when this happened, " +\
419 377 "some `outstanding` msg_ids may never resolve."
420 378 warnings.warn(msg, RuntimeWarning)
421 379
422 380 def _build_targets(self, targets):
423 381 """Turn valid target IDs or 'all' into two lists:
424 382 (int_ids, uuids).
425 383 """
426 384 if targets is None:
427 385 targets = self._ids
428 386 elif isinstance(targets, str):
429 387 if targets.lower() == 'all':
430 388 targets = self._ids
431 389 else:
432 390 raise TypeError("%r not valid str target, must be 'all'"%(targets))
433 391 elif isinstance(targets, int):
434 392 targets = [targets]
435 393 return [self._engines[t] for t in targets], list(targets)
436 394
437 def _connect(self, sshserver, ssh_kwargs):
438 """setup all our socket connections to the controller. This is called from
395 def _connect(self, sshserver, ssh_kwargs, timeout):
396 """setup all our socket connections to the cluster. This is called from
439 397 __init__."""
440 398
441 399 # Maybe allow reconnecting?
442 400 if self._connected:
443 401 return
444 402 self._connected=True
445 403
446 404 def connect_socket(s, url):
447 url = disambiguate_url(url, self._config['location'])
405 url = util.disambiguate_url(url, self._config['location'])
448 406 if self._ssh:
449 407 return tunnel.tunnel_connection(s, url, sshserver, **ssh_kwargs)
450 408 else:
451 409 return s.connect(url)
452 410
453 411 self.session.send(self._query_socket, 'connection_request')
412 r,w,x = zmq.select([self._query_socket],[],[], timeout)
413 if not r:
414 raise error.TimeoutError("Hub connection request timed out")
454 415 idents,msg = self.session.recv(self._query_socket,mode=0)
455 416 if self.debug:
456 417 pprint(msg)
457 418 msg = ss.Message(msg)
458 419 content = msg.content
459 420 self._config['registration'] = dict(content)
460 421 if content.status == 'ok':
461 self._apply_socket = self._context.socket(zmq.XREP)
462 self._apply_socket.setsockopt(zmq.IDENTITY, self.session.session)
463 422 if content.mux:
464 # self._mux_socket = self._context.socket(zmq.XREQ)
465 self._mux_ident = 'mux'
466 connect_socket(self._apply_socket, content.mux)
423 self._mux_socket = self._context.socket(zmq.XREQ)
424 self._mux_socket.setsockopt(zmq.IDENTITY, self.session.session)
425 connect_socket(self._mux_socket, content.mux)
467 426 if content.task:
468 427 self._task_scheme, task_addr = content.task
469 # self._task_socket = self._context.socket(zmq.XREQ)
470 # self._task_socket.setsockopt(zmq.IDENTITY, self.session.session)
471 connect_socket(self._apply_socket, task_addr)
472 self._task_ident = 'task'
428 self._task_socket = self._context.socket(zmq.XREQ)
429 self._task_socket.setsockopt(zmq.IDENTITY, self.session.session)
430 connect_socket(self._task_socket, task_addr)
473 431 if content.notification:
474 432 self._notification_socket = self._context.socket(zmq.SUB)
475 433 connect_socket(self._notification_socket, content.notification)
476 434 self._notification_socket.setsockopt(zmq.SUBSCRIBE, b'')
477 435 # if content.query:
478 436 # self._query_socket = self._context.socket(zmq.XREQ)
479 437 # self._query_socket.setsockopt(zmq.IDENTITY, self.session.session)
480 438 # connect_socket(self._query_socket, content.query)
481 439 if content.control:
482 440 self._control_socket = self._context.socket(zmq.XREQ)
483 441 self._control_socket.setsockopt(zmq.IDENTITY, self.session.session)
484 442 connect_socket(self._control_socket, content.control)
485 443 if content.iopub:
486 444 self._iopub_socket = self._context.socket(zmq.SUB)
487 445 self._iopub_socket.setsockopt(zmq.SUBSCRIBE, b'')
488 446 self._iopub_socket.setsockopt(zmq.IDENTITY, self.session.session)
489 447 connect_socket(self._iopub_socket, content.iopub)
490 448 self._update_engines(dict(content.engines))
491 # give XREP apply_socket some time to connect
492 time.sleep(0.25)
493 449 else:
494 450 self._connected = False
495 451 raise Exception("Failed to connect!")
496 452
497 453 #--------------------------------------------------------------------------
498 454 # handlers and callbacks for incoming messages
499 455 #--------------------------------------------------------------------------
500 456
501 457 def _unwrap_exception(self, content):
502 """unwrap exception, and remap engineid to int."""
458 """unwrap exception, and remap engine_id to int."""
503 459 e = error.unwrap_exception(content)
504 460 # print e.traceback
505 461 if e.engine_info:
506 462 e_uuid = e.engine_info['engine_uuid']
507 463 eid = self._engines[e_uuid]
508 464 e.engine_info['engine_id'] = eid
509 465 return e
510 466
511 467 def _extract_metadata(self, header, parent, content):
512 468 md = {'msg_id' : parent['msg_id'],
513 469 'received' : datetime.now(),
514 470 'engine_uuid' : header.get('engine', None),
515 471 'follow' : parent.get('follow', []),
516 472 'after' : parent.get('after', []),
517 473 'status' : content['status'],
518 474 }
519 475
520 476 if md['engine_uuid'] is not None:
521 477 md['engine_id'] = self._engines.get(md['engine_uuid'], None)
522 478
523 479 if 'date' in parent:
524 480 md['submitted'] = datetime.strptime(parent['date'], util.ISO8601)
525 481 if 'started' in header:
526 482 md['started'] = datetime.strptime(header['started'], util.ISO8601)
527 483 if 'date' in header:
528 484 md['completed'] = datetime.strptime(header['date'], util.ISO8601)
529 485 return md
530 486
531 487 def _register_engine(self, msg):
532 488 """Register a new engine, and update our connection info."""
533 489 content = msg['content']
534 490 eid = content['id']
535 491 d = {eid : content['queue']}
536 492 self._update_engines(d)
537 493
538 494 def _unregister_engine(self, msg):
539 495 """Unregister an engine that has died."""
540 496 content = msg['content']
541 497 eid = int(content['id'])
542 498 if eid in self._ids:
543 499 self._ids.remove(eid)
544 500 uuid = self._engines.pop(eid)
545 501
546 502 self._handle_stranded_msgs(eid, uuid)
547 503
548 if self._task_ident and self._task_scheme == 'pure':
504 if self._task_socket and self._task_scheme == 'pure':
549 505 self._stop_scheduling_tasks()
550 506
551 507 def _handle_stranded_msgs(self, eid, uuid):
552 508 """Handle messages known to be on an engine when the engine unregisters.
553 509
554 510 It is possible that this will fire prematurely - that is, an engine will
555 511 go down after completing a result, and the client will be notified
556 512 of the unregistration and later receive the successful result.
557 513 """
558 514
559 515 outstanding = self._outstanding_dict[uuid]
560 516
561 517 for msg_id in list(outstanding):
562 518 if msg_id in self.results:
563 519 # we already
564 520 continue
565 521 try:
566 522 raise error.EngineError("Engine %r died while running task %r"%(eid, msg_id))
567 523 except:
568 524 content = error.wrap_exception()
569 525 # build a fake message:
570 526 parent = {}
571 527 header = {}
572 528 parent['msg_id'] = msg_id
573 529 header['engine'] = uuid
574 530 header['date'] = datetime.now().strftime(util.ISO8601)
575 531 msg = dict(parent_header=parent, header=header, content=content)
576 532 self._handle_apply_reply(msg)
577 533
578 534 def _handle_execute_reply(self, msg):
579 535 """Save the reply to an execute_request into our results.
580 536
581 537 execute messages are never actually used. apply is used instead.
582 538 """
583 539
584 540 parent = msg['parent_header']
585 541 msg_id = parent['msg_id']
586 542 if msg_id not in self.outstanding:
587 543 if msg_id in self.history:
588 544 print ("got stale result: %s"%msg_id)
589 545 else:
590 546 print ("got unknown result: %s"%msg_id)
591 547 else:
592 548 self.outstanding.remove(msg_id)
593 549 self.results[msg_id] = self._unwrap_exception(msg['content'])
594 550
595 551 def _handle_apply_reply(self, msg):
596 552 """Save the reply to an apply_request into our results."""
597 553 parent = msg['parent_header']
598 554 msg_id = parent['msg_id']
599 555 if msg_id not in self.outstanding:
600 556 if msg_id in self.history:
601 557 print ("got stale result: %s"%msg_id)
602 558 print self.results[msg_id]
603 559 print msg
604 560 else:
605 561 print ("got unknown result: %s"%msg_id)
606 562 else:
607 563 self.outstanding.remove(msg_id)
608 564 content = msg['content']
609 565 header = msg['header']
610 566
611 567 # construct metadata:
612 568 md = self.metadata[msg_id]
613 569 md.update(self._extract_metadata(header, parent, content))
614 570 # is this redundant?
615 571 self.metadata[msg_id] = md
616 572
617 573 e_outstanding = self._outstanding_dict[md['engine_uuid']]
618 574 if msg_id in e_outstanding:
619 575 e_outstanding.remove(msg_id)
620 576
621 577 # construct result:
622 578 if content['status'] == 'ok':
623 579 self.results[msg_id] = util.unserialize_object(msg['buffers'])[0]
624 580 elif content['status'] == 'aborted':
625 self.results[msg_id] = error.AbortedTask(msg_id)
581 self.results[msg_id] = error.TaskAborted(msg_id)
626 582 elif content['status'] == 'resubmitted':
627 583 # TODO: handle resubmission
628 584 pass
629 585 else:
630 586 self.results[msg_id] = self._unwrap_exception(content)
631 587
632 588 def _flush_notifications(self):
633 589 """Flush notifications of engine registrations waiting
634 590 in ZMQ queue."""
635 591 msg = self.session.recv(self._notification_socket, mode=zmq.NOBLOCK)
636 592 while msg is not None:
637 593 if self.debug:
638 594 pprint(msg)
639 595 msg = msg[-1]
640 596 msg_type = msg['msg_type']
641 597 handler = self._notification_handlers.get(msg_type, None)
642 598 if handler is None:
643 599 raise Exception("Unhandled message type: %s"%msg.msg_type)
644 600 else:
645 601 handler(msg)
646 602 msg = self.session.recv(self._notification_socket, mode=zmq.NOBLOCK)
647 603
648 604 def _flush_results(self, sock):
649 605 """Flush task or queue results waiting in ZMQ queue."""
650 606 msg = self.session.recv(sock, mode=zmq.NOBLOCK)
651 607 while msg is not None:
652 608 if self.debug:
653 609 pprint(msg)
654 610 msg = msg[-1]
655 611 msg_type = msg['msg_type']
656 612 handler = self._queue_handlers.get(msg_type, None)
657 613 if handler is None:
658 614 raise Exception("Unhandled message type: %s"%msg.msg_type)
659 615 else:
660 616 handler(msg)
661 617 msg = self.session.recv(sock, mode=zmq.NOBLOCK)
662 618
663 619 def _flush_control(self, sock):
664 620 """Flush replies from the control channel waiting
665 621 in the ZMQ queue.
666 622
667 623 Currently: ignore them."""
624 if self._ignored_control_replies <= 0:
625 return
668 626 msg = self.session.recv(sock, mode=zmq.NOBLOCK)
669 627 while msg is not None:
628 self._ignored_control_replies -= 1
670 629 if self.debug:
671 630 pprint(msg)
672 631 msg = self.session.recv(sock, mode=zmq.NOBLOCK)
673 632
633 def _flush_ignored_control(self):
634 """flush ignored control replies"""
635 while self._ignored_control_replies > 0:
636 self.session.recv(self._control_socket)
637 self._ignored_control_replies -= 1
638
639 def _flush_ignored_hub_replies(self):
640 msg = self.session.recv(self._query_socket, mode=zmq.NOBLOCK)
641 while msg is not None:
642 msg = self.session.recv(self._query_socket, mode=zmq.NOBLOCK)
643
674 644 def _flush_iopub(self, sock):
675 645 """Flush replies from the iopub channel waiting
676 646 in the ZMQ queue.
677 647 """
678 648 msg = self.session.recv(sock, mode=zmq.NOBLOCK)
679 649 while msg is not None:
680 650 if self.debug:
681 651 pprint(msg)
682 652 msg = msg[-1]
683 653 parent = msg['parent_header']
684 654 msg_id = parent['msg_id']
685 655 content = msg['content']
686 656 header = msg['header']
687 657 msg_type = msg['msg_type']
688 658
689 659 # init metadata:
690 660 md = self.metadata[msg_id]
691 661
692 662 if msg_type == 'stream':
693 663 name = content['name']
694 664 s = md[name] or ''
695 665 md[name] = s + content['data']
696 666 elif msg_type == 'pyerr':
697 667 md.update({'pyerr' : self._unwrap_exception(content)})
698 668 else:
699 669 md.update({msg_type : content['data']})
700 670
701 671 # reduntant?
702 672 self.metadata[msg_id] = md
703 673
704 674 msg = self.session.recv(sock, mode=zmq.NOBLOCK)
705 675
706 676 #--------------------------------------------------------------------------
707 677 # len, getitem
708 678 #--------------------------------------------------------------------------
709 679
710 680 def __len__(self):
711 681 """len(client) returns # of engines."""
712 682 return len(self.ids)
713 683
714 684 def __getitem__(self, key):
715 685 """index access returns DirectView multiplexer objects
716 686
717 687 Must be int, slice, or list/tuple/xrange of ints"""
718 688 if not isinstance(key, (int, slice, tuple, list, xrange)):
719 689 raise TypeError("key by int/slice/iterable of ints only, not %s"%(type(key)))
720 690 else:
721 return self.view(key, balanced=False)
691 return self._get_view(key, balanced=False)
722 692
723 693 #--------------------------------------------------------------------------
724 694 # Begin public methods
725 695 #--------------------------------------------------------------------------
726 696
697 @property
698 def ids(self):
699 """Always up-to-date ids property."""
700 self._flush_notifications()
701 # always copy:
702 return list(self._ids)
703
704 def close(self):
705 if self._closed:
706 return
707 snames = filter(lambda n: n.endswith('socket'), dir(self))
708 for socket in map(lambda name: getattr(self, name), snames):
709 if isinstance(socket, zmq.Socket) and not socket.closed:
710 socket.close()
711 self._closed = True
712
727 713 def spin(self):
728 714 """Flush any registration notifications and execution results
729 715 waiting in the ZMQ queue.
730 716 """
731 717 if self._notification_socket:
732 718 self._flush_notifications()
733 if self._apply_socket:
734 self._flush_results(self._apply_socket)
719 if self._mux_socket:
720 self._flush_results(self._mux_socket)
721 if self._task_socket:
722 self._flush_results(self._task_socket)
735 723 if self._control_socket:
736 724 self._flush_control(self._control_socket)
737 725 if self._iopub_socket:
738 726 self._flush_iopub(self._iopub_socket)
727 if self._query_socket:
728 self._flush_ignored_hub_replies()
739 729
740 def barrier(self, jobs=None, timeout=-1):
730 def wait(self, jobs=None, timeout=-1):
741 731 """waits on one or more `jobs`, for up to `timeout` seconds.
742 732
743 733 Parameters
744 734 ----------
745 735
746 736 jobs : int, str, or list of ints and/or strs, or one or more AsyncResult objects
747 737 ints are indices to self.history
748 738 strs are msg_ids
749 739 default: wait on all outstanding messages
750 740 timeout : float
751 741 a time in seconds, after which to give up.
752 742 default is -1, which means no timeout
753 743
754 744 Returns
755 745 -------
756 746
757 747 True : when all msg_ids are done
758 748 False : timeout reached, some msg_ids still outstanding
759 749 """
760 750 tic = time.time()
761 751 if jobs is None:
762 752 theids = self.outstanding
763 753 else:
764 754 if isinstance(jobs, (int, str, AsyncResult)):
765 755 jobs = [jobs]
766 756 theids = set()
767 757 for job in jobs:
768 758 if isinstance(job, int):
769 759 # index access
770 760 job = self.history[job]
771 761 elif isinstance(job, AsyncResult):
772 762 map(theids.add, job.msg_ids)
773 763 continue
774 764 theids.add(job)
775 765 if not theids.intersection(self.outstanding):
776 766 return True
777 767 self.spin()
778 768 while theids.intersection(self.outstanding):
779 769 if timeout >= 0 and ( time.time()-tic ) > timeout:
780 770 break
781 771 time.sleep(1e-3)
782 772 self.spin()
783 773 return len(theids.intersection(self.outstanding)) == 0
784 774
785 775 #--------------------------------------------------------------------------
786 776 # Control methods
787 777 #--------------------------------------------------------------------------
788 778
789 @spinfirst
790 @defaultblock
779 @spin_first
780 @default_block
791 781 def clear(self, targets=None, block=None):
792 782 """Clear the namespace in target(s)."""
793 783 targets = self._build_targets(targets)[0]
794 784 for t in targets:
795 785 self.session.send(self._control_socket, 'clear_request', content={}, ident=t)
796 786 error = False
797 787 if self.block:
788 self._flush_ignored_control()
798 789 for i in range(len(targets)):
799 790 idents,msg = self.session.recv(self._control_socket,0)
800 791 if self.debug:
801 792 pprint(msg)
802 793 if msg['content']['status'] != 'ok':
803 794 error = self._unwrap_exception(msg['content'])
795 else:
796 self._ignored_control_replies += len(targets)
804 797 if error:
805 798 raise error
806 799
807 800
808 @spinfirst
809 @defaultblock
801 @spin_first
802 @default_block
810 803 def abort(self, jobs=None, targets=None, block=None):
811 804 """Abort specific jobs from the execution queues of target(s).
812 805
813 806 This is a mechanism to prevent jobs that have already been submitted
814 807 from executing.
815 808
816 809 Parameters
817 810 ----------
818 811
819 812 jobs : msg_id, list of msg_ids, or AsyncResult
820 813 The jobs to be aborted
821 814
822 815
823 816 """
824 817 targets = self._build_targets(targets)[0]
825 818 msg_ids = []
826 819 if isinstance(jobs, (basestring,AsyncResult)):
827 820 jobs = [jobs]
828 821 bad_ids = filter(lambda obj: not isinstance(obj, (basestring, AsyncResult)), jobs)
829 822 if bad_ids:
830 823 raise TypeError("Invalid msg_id type %r, expected str or AsyncResult"%bad_ids[0])
831 824 for j in jobs:
832 825 if isinstance(j, AsyncResult):
833 826 msg_ids.extend(j.msg_ids)
834 827 else:
835 828 msg_ids.append(j)
836 829 content = dict(msg_ids=msg_ids)
837 830 for t in targets:
838 831 self.session.send(self._control_socket, 'abort_request',
839 832 content=content, ident=t)
840 833 error = False
841 834 if self.block:
835 self._flush_ignored_control()
842 836 for i in range(len(targets)):
843 837 idents,msg = self.session.recv(self._control_socket,0)
844 838 if self.debug:
845 839 pprint(msg)
846 840 if msg['content']['status'] != 'ok':
847 841 error = self._unwrap_exception(msg['content'])
842 else:
843 self._ignored_control_replies += len(targets)
848 844 if error:
849 845 raise error
850 846
851 @spinfirst
852 @defaultblock
853 def shutdown(self, targets=None, restart=False, controller=False, block=None):
854 """Terminates one or more engine processes, optionally including the controller."""
855 if controller:
847 @spin_first
848 @default_block
849 def shutdown(self, targets=None, restart=False, hub=False, block=None):
850 """Terminates one or more engine processes, optionally including the hub."""
851 if hub:
856 852 targets = 'all'
857 853 targets = self._build_targets(targets)[0]
858 854 for t in targets:
859 855 self.session.send(self._control_socket, 'shutdown_request',
860 856 content={'restart':restart},ident=t)
861 857 error = False
862 if block or controller:
858 if block or hub:
859 self._flush_ignored_control()
863 860 for i in range(len(targets)):
864 861 idents,msg = self.session.recv(self._control_socket,0)
865 862 if self.debug:
866 863 pprint(msg)
867 864 if msg['content']['status'] != 'ok':
868 865 error = self._unwrap_exception(msg['content'])
866 else:
867 self._ignored_control_replies += len(targets)
869 868
870 if controller:
869 if hub:
871 870 time.sleep(0.25)
872 871 self.session.send(self._query_socket, 'shutdown_request')
873 872 idents,msg = self.session.recv(self._query_socket, 0)
874 873 if self.debug:
875 874 pprint(msg)
876 875 if msg['content']['status'] != 'ok':
877 876 error = self._unwrap_exception(msg['content'])
878 877
879 878 if error:
880 879 raise error
881 880
882 881 #--------------------------------------------------------------------------
883 882 # Execution methods
884 883 #--------------------------------------------------------------------------
885 884
886 @defaultblock
887 def execute(self, code, targets='all', block=None):
885 @default_block
886 def _execute(self, code, targets='all', block=None):
888 887 """Executes `code` on `targets` in blocking or nonblocking manner.
889 888
890 889 ``execute`` is always `bound` (affects engine namespace)
891 890
892 891 Parameters
893 892 ----------
894 893
895 894 code : str
896 895 the code string to be executed
897 896 targets : int/str/list of ints/strs
898 897 the engines on which to execute
899 898 default : all
900 899 block : bool
901 900 whether or not to wait until done to return
902 901 default: self.block
903 902 """
904 result = self.apply(_execute, (code,), targets=targets, block=block, bound=True, balanced=False)
905 if not block:
906 return result
907
908 def run(self, filename, targets='all', block=None):
909 """Execute contents of `filename` on engine(s).
910
911 This simply reads the contents of the file and calls `execute`.
912
913 Parameters
914 ----------
915
916 filename : str
917 The path to the file
918 targets : int/str/list of ints/strs
919 the engines on which to execute
920 default : all
921 block : bool
922 whether or not to wait until done
923 default: self.block
924
925 """
926 with open(filename, 'r') as f:
927 # add newline in case of trailing indented whitespace
928 # which will cause SyntaxError
929 code = f.read()+'\n'
930 return self.execute(code, targets=targets, block=block)
903 return self[targets].execute(code, block=block)
931 904
932 905 def _maybe_raise(self, result):
933 906 """wrapper for maybe raising an exception if apply failed."""
934 907 if isinstance(result, error.RemoteError):
935 908 raise result
936 909
937 910 return result
938 911
939 def _build_dependency(self, dep):
940 """helper for building jsonable dependencies from various input forms"""
941 if isinstance(dep, Dependency):
942 return dep.as_dict()
943 elif isinstance(dep, AsyncResult):
944 return dep.msg_ids
945 elif dep is None:
946 return []
947 else:
948 # pass to Dependency constructor
949 return list(Dependency(dep))
950
951 @defaultblock
952 def apply(self, f, args=None, kwargs=None, bound=False, block=None,
953 targets=None, balanced=None,
954 after=None, follow=None, timeout=None,
955 track=False):
956 """Call `f(*args, **kwargs)` on a remote engine(s), returning the result.
957
958 This is the central execution command for the client.
959
960 Parameters
961 ----------
962
963 f : function
964 The fuction to be called remotely
965 args : tuple/list
966 The positional arguments passed to `f`
967 kwargs : dict
968 The keyword arguments passed to `f`
969 bound : bool (default: False)
970 Whether to pass the Engine(s) Namespace as the first argument to `f`.
971 block : bool (default: self.block)
972 Whether to wait for the result, or return immediately.
973 False:
974 returns AsyncResult
975 True:
976 returns actual result(s) of f(*args, **kwargs)
977 if multiple targets:
978 list of results, matching `targets`
979 track : bool
980 whether to track non-copying sends.
981 [default False]
982
983 targets : int,list of ints, 'all', None
984 Specify the destination of the job.
985 if None:
986 Submit via Task queue for load-balancing.
987 if 'all':
988 Run on all active engines
989 if list:
990 Run on each specified engine
991 if int:
992 Run on single engine
993 Note:
994 that if `balanced=True`, and `targets` is specified,
995 then the load-balancing will be limited to balancing
996 among `targets`.
997
998 balanced : bool, default None
999 whether to load-balance. This will default to True
1000 if targets is unspecified, or False if targets is specified.
1001
1002 If `balanced` and `targets` are both specified, the task will
1003 be assigne to *one* of the targets by the scheduler.
1004
1005 The following arguments are only used when balanced is True:
1006
1007 after : Dependency or collection of msg_ids
1008 Only for load-balanced execution (targets=None)
1009 Specify a list of msg_ids as a time-based dependency.
1010 This job will only be run *after* the dependencies
1011 have been met.
1012
1013 follow : Dependency or collection of msg_ids
1014 Only for load-balanced execution (targets=None)
1015 Specify a list of msg_ids as a location-based dependency.
1016 This job will only be run on an engine where this dependency
1017 is met.
1018
1019 timeout : float/int or None
1020 Only for load-balanced execution (targets=None)
1021 Specify an amount of time (in seconds) for the scheduler to
1022 wait for dependencies to be met before failing with a
1023 DependencyTimeout.
912 def send_apply_message(self, socket, f, args=None, kwargs=None, subheader=None, track=False,
913 ident=None):
914 """construct and send an apply message via a socket.
1024 915
1025 Returns
1026 -------
1027
1028 if block is False:
1029 return AsyncResult wrapping msg_ids
1030 output of AsyncResult.get() is identical to that of `apply(...block=True)`
1031 else:
1032 if single target (or balanced):
1033 return result of `f(*args, **kwargs)`
1034 else:
1035 return list of results, matching `targets`
916 This is the principal method with which all engine execution is performed by views.
1036 917 """
918
1037 919 assert not self._closed, "cannot use me anymore, I'm closed!"
1038 920 # defaults:
1039 block = block if block is not None else self.block
1040 921 args = args if args is not None else []
1041 922 kwargs = kwargs if kwargs is not None else {}
923 subheader = subheader if subheader is not None else {}
1042 924
1043 if not self._ids:
1044 # flush notification socket if no engines yet
1045 any_ids = self.ids
1046 if not any_ids:
1047 raise error.NoEnginesRegistered("Can't execute without any connected engines.")
1048
1049 if balanced is None:
1050 if targets is None:
1051 # default to balanced if targets unspecified
1052 balanced = True
1053 else:
1054 # otherwise default to multiplexing
1055 balanced = False
1056
1057 if targets is None and balanced is False:
1058 # default to all if *not* balanced, and targets is unspecified
1059 targets = 'all'
1060
1061 # enforce types of f,args,kwrags
925 # validate arguments
1062 926 if not callable(f):
1063 927 raise TypeError("f must be callable, not %s"%type(f))
1064 928 if not isinstance(args, (tuple, list)):
1065 929 raise TypeError("args must be tuple or list, not %s"%type(args))
1066 930 if not isinstance(kwargs, dict):
1067 931 raise TypeError("kwargs must be dict, not %s"%type(kwargs))
932 if not isinstance(subheader, dict):
933 raise TypeError("subheader must be dict, not %s"%type(subheader))
1068 934
1069 options = dict(bound=bound, block=block, targets=targets, track=track)
1070
1071 if balanced:
1072 return self._apply_balanced(f, args, kwargs, timeout=timeout,
1073 after=after, follow=follow, **options)
1074 elif follow or after or timeout:
1075 msg = "follow, after, and timeout args are only used for"
1076 msg += " load-balanced execution."
1077 raise ValueError(msg)
1078 else:
1079 return self._apply_direct(f, args, kwargs, **options)
1080
1081 def _apply_balanced(self, f, args, kwargs, bound=None, block=None, targets=None,
1082 after=None, follow=None, timeout=None, track=None):
1083 """call f(*args, **kwargs) remotely in a load-balanced manner.
1084
1085 This is a private method, see `apply` for details.
1086 Not to be called directly!
1087 """
1088
1089 loc = locals()
1090 for name in ('bound', 'block', 'track'):
1091 assert loc[name] is not None, "kwarg %r must be specified!"%name
1092
1093 if not self._task_ident:
1094 msg = "Task farming is disabled"
1095 if self._task_scheme == 'pure':
1096 msg += " because the pure ZMQ scheduler cannot handle"
1097 msg += " disappearing engines."
1098 raise RuntimeError(msg)
1099
1100 if self._task_scheme == 'pure':
1101 # pure zmq scheme doesn't support dependencies
1102 msg = "Pure ZMQ scheduler doesn't support dependencies"
1103 if (follow or after):
1104 # hard fail on DAG dependencies
1105 raise RuntimeError(msg)
1106 if isinstance(f, dependent):
1107 # soft warn on functional dependencies
1108 warnings.warn(msg, RuntimeWarning)
1109
1110 # defaults:
1111 args = args if args is not None else []
1112 kwargs = kwargs if kwargs is not None else {}
1113
1114 if targets:
1115 idents,_ = self._build_targets(targets)
1116 else:
1117 idents = []
935 if not self._ids:
936 # flush notification socket if no engines yet
937 any_ids = self.ids
938 if not any_ids:
939 raise error.NoEnginesRegistered("Can't execute without any connected engines.")
940 # enforce types of f,args,kwargs
1118 941
1119 after = self._build_dependency(after)
1120 follow = self._build_dependency(follow)
1121 subheader = dict(after=after, follow=follow, timeout=timeout, targets=idents)
1122 942 bufs = util.pack_apply_message(f,args,kwargs)
1123 content = dict(bound=bound)
1124 943
1125 msg = self.session.send(self._apply_socket, "apply_request", ident=self._task_ident,
1126 content=content, buffers=bufs, subheader=subheader, track=track)
1127 msg_id = msg['msg_id']
1128 self.outstanding.add(msg_id)
1129 self.history.append(msg_id)
1130 self.metadata[msg_id]['submitted'] = datetime.now()
1131 tracker = None if track is False else msg['tracker']
1132 ar = AsyncResult(self, [msg_id], fname=f.__name__, targets=targets, tracker=tracker)
1133 if block:
1134 try:
1135 return ar.get()
1136 except KeyboardInterrupt:
1137 return ar
1138 else:
1139 return ar
1140
1141 def _apply_direct(self, f, args, kwargs, bound=None, block=None, targets=None,
1142 track=None):
1143 """Then underlying method for applying functions to specific engines
1144 via the MUX queue.
1145
1146 This is a private method, see `apply` for details.
1147 Not to be called directly!
1148 """
1149
1150 if not self._mux_ident:
1151 msg = "Multiplexing is disabled"
1152 raise RuntimeError(msg)
1153
1154 loc = locals()
1155 for name in ('bound', 'block', 'targets', 'track'):
1156 assert loc[name] is not None, "kwarg %r must be specified!"%name
944 msg = self.session.send(socket, "apply_request", buffers=bufs, ident=ident,
945 subheader=subheader, track=track)
1157 946
1158 idents,targets = self._build_targets(targets)
1159
1160 subheader = {}
1161 content = dict(bound=bound)
1162 bufs = util.pack_apply_message(f,args,kwargs)
1163
1164 msg_ids = []
1165 trackers = []
1166 for ident in idents:
1167 msg = self.session.send(self._apply_socket, "apply_request",
1168 content=content, buffers=bufs, ident=[self._mux_ident, ident], subheader=subheader,
1169 track=track)
1170 if track:
1171 trackers.append(msg['tracker'])
1172 947 msg_id = msg['msg_id']
1173 948 self.outstanding.add(msg_id)
949 if ident:
950 # possibly routed to a specific engine
951 if isinstance(ident, list):
952 ident = ident[-1]
953 if ident in self._engines.values():
954 # save for later, in case of engine death
1174 955 self._outstanding_dict[ident].add(msg_id)
1175 956 self.history.append(msg_id)
1176 msg_ids.append(msg_id)
1177
1178 tracker = None if track is False else zmq.MessageTracker(*trackers)
1179 ar = AsyncResult(self, msg_ids, fname=f.__name__, targets=targets, tracker=tracker)
957 self.metadata[msg_id]['submitted'] = datetime.now()
1180 958
1181 if block:
1182 try:
1183 return ar.get()
1184 except KeyboardInterrupt:
1185 return ar
1186 else:
1187 return ar
959 return msg
1188 960
1189 961 #--------------------------------------------------------------------------
1190 962 # construct a View object
1191 963 #--------------------------------------------------------------------------
1192 964
1193 @defaultblock
1194 def remote(self, bound=False, block=None, targets=None, balanced=None):
1195 """Decorator for making a RemoteFunction"""
1196 return remote(self, bound=bound, targets=targets, block=block, balanced=balanced)
1197
1198 @defaultblock
1199 def parallel(self, dist='b', bound=False, block=None, targets=None, balanced=None):
1200 """Decorator for making a ParallelFunction"""
1201 return parallel(self, bound=bound, targets=targets, block=block, balanced=balanced)
1202
1203 965 def _cache_view(self, targets, balanced):
1204 966 """save views, so subsequent requests don't create new objects."""
1205 967 if balanced:
968 # validate whether we can run
969 if not self._task_socket:
970 msg = "Task farming is disabled"
971 if self._task_scheme == 'pure':
972 msg += " because the pure ZMQ scheduler cannot handle"
973 msg += " disappearing engines."
974 raise RuntimeError(msg)
975 socket = self._task_socket
1206 976 view_class = LoadBalancedView
1207 977 view_cache = self._balanced_views
1208 978 else:
979 socket = self._mux_socket
1209 980 view_class = DirectView
1210 981 view_cache = self._direct_views
1211 982
1212 983 # use str, since often targets will be a list
1213 984 key = str(targets)
1214 985 if key not in view_cache:
1215 view_cache[key] = view_class(client=self, targets=targets)
986 view_cache[key] = view_class(client=self, socket=socket, targets=targets)
1216 987
1217 988 return view_cache[key]
1218 989
1219 def view(self, targets=None, balanced=None):
990 def load_balanced_view(self, targets=None):
991 """construct a DirectView object.
992
993 If no arguments are specified, create a LoadBalancedView
994 using all engines.
995
996 Parameters
997 ----------
998
999 targets: list,slice,int,etc. [default: use all engines]
1000 The subset of engines across which to load-balance
1001 """
1002 return self._get_view(targets, balanced=True)
1003
1004 def direct_view(self, targets='all'):
1005 """construct a DirectView object.
1006
1007 If no targets are specified, create a DirectView
1008 using all engines.
1009
1010 Parameters
1011 ----------
1012
1013 targets: list,slice,int,etc. [default: use all engines]
1014 The engines to use for the View
1015 """
1016 return self._get_view(targets, balanced=False)
1017
1018 def _get_view(self, targets, balanced):
1220 1019 """Method for constructing View objects.
1221 1020
1222 1021 If no arguments are specified, create a LoadBalancedView
1223 1022 using all engines. If only `targets` specified, it will
1224 1023 be a DirectView. This method is the underlying implementation
1225 1024 of ``client.__getitem__``.
1226 1025
1227 1026 Parameters
1228 1027 ----------
1229 1028
1230 1029 targets: list,slice,int,etc. [default: use all engines]
1231 1030 The engines to use for the View
1232 1031 balanced : bool [default: False if targets specified, True else]
1233 1032 whether to build a LoadBalancedView or a DirectView
1234 1033
1235 1034 """
1236 1035
1237 balanced = (targets is None) if balanced is None else balanced
1238
1239 if targets is None:
1036 if targets in (None,'all'):
1240 1037 if balanced:
1241 1038 return self._cache_view(None,True)
1242 1039 else:
1243 1040 targets = slice(None)
1244 1041
1245 1042 if isinstance(targets, int):
1246 1043 if targets < 0:
1247 1044 targets = self.ids[targets]
1248 1045 if targets not in self.ids:
1249 1046 raise IndexError("No such engine: %i"%targets)
1250 1047 return self._cache_view(targets, balanced)
1251 1048
1252 1049 if isinstance(targets, slice):
1253 1050 indices = range(len(self.ids))[targets]
1254 1051 ids = sorted(self._ids)
1255 1052 targets = [ ids[i] for i in indices ]
1256 1053
1257 1054 if isinstance(targets, (tuple, list, xrange)):
1258 1055 _,targets = self._build_targets(list(targets))
1259 1056 return self._cache_view(targets, balanced)
1260 1057 else:
1261 1058 raise TypeError("targets by int/slice/collection of ints only, not %s"%(type(targets)))
1262 1059
1263 1060 #--------------------------------------------------------------------------
1264 # Data movement
1061 # Data movement (TO BE REMOVED)
1265 1062 #--------------------------------------------------------------------------
1266 1063
1267 @defaultblock
1268 def push(self, ns, targets='all', block=None, track=False):
1064 @default_block
1065 def _push(self, ns, targets='all', block=None, track=False):
1269 1066 """Push the contents of `ns` into the namespace on `target`"""
1270 1067 if not isinstance(ns, dict):
1271 1068 raise TypeError("Must be a dict, not %s"%type(ns))
1272 result = self.apply(_push, kwargs=ns, targets=targets, block=block, bound=True, balanced=False, track=track)
1069 result = self.apply(util._push, kwargs=ns, targets=targets, block=block, bound=True, balanced=False, track=track)
1273 1070 if not block:
1274 1071 return result
1275 1072
1276 @defaultblock
1277 def pull(self, keys, targets='all', block=None):
1073 @default_block
1074 def _pull(self, keys, targets='all', block=None):
1278 1075 """Pull objects from `target`'s namespace by `keys`"""
1279 1076 if isinstance(keys, basestring):
1280 1077 pass
1281 1078 elif isinstance(keys, (list,tuple,set)):
1282 1079 for key in keys:
1283 1080 if not isinstance(key, basestring):
1284 1081 raise TypeError("keys must be str, not type %r"%type(key))
1285 1082 else:
1286 1083 raise TypeError("keys must be strs, not %r"%keys)
1287 result = self.apply(_pull, (keys,), targets=targets, block=block, bound=True, balanced=False)
1084 result = self.apply(util._pull, (keys,), targets=targets, block=block, bound=True, balanced=False)
1288 1085 return result
1289 1086
1290 @defaultblock
1291 def scatter(self, key, seq, dist='b', flatten=False, targets='all', block=None, track=False):
1292 """
1293 Partition a Python sequence and send the partitions to a set of engines.
1294 """
1295 targets = self._build_targets(targets)[-1]
1296 mapObject = Map.dists[dist]()
1297 nparts = len(targets)
1298 msg_ids = []
1299 trackers = []
1300 for index, engineid in enumerate(targets):
1301 partition = mapObject.getPartition(seq, index, nparts)
1302 if flatten and len(partition) == 1:
1303 r = self.push({key: partition[0]}, targets=engineid, block=False, track=track)
1304 else:
1305 r = self.push({key: partition}, targets=engineid, block=False, track=track)
1306 msg_ids.extend(r.msg_ids)
1307 if track:
1308 trackers.append(r._tracker)
1309
1310 if track:
1311 tracker = zmq.MessageTracker(*trackers)
1312 else:
1313 tracker = None
1314
1315 r = AsyncResult(self, msg_ids, fname='scatter', targets=targets, tracker=tracker)
1316 if block:
1317 r.wait()
1318 else:
1319 return r
1320
1321 @defaultblock
1322 def gather(self, key, dist='b', targets='all', block=None):
1323 """
1324 Gather a partitioned sequence on a set of engines as a single local seq.
1325 """
1326
1327 targets = self._build_targets(targets)[-1]
1328 mapObject = Map.dists[dist]()
1329 msg_ids = []
1330 for index, engineid in enumerate(targets):
1331 msg_ids.extend(self.pull(key, targets=engineid,block=False).msg_ids)
1332
1333 r = AsyncMapResult(self, msg_ids, mapObject, fname='gather')
1334 if block:
1335 return r.get()
1336 else:
1337 return r
1338
1339 1087 #--------------------------------------------------------------------------
1340 1088 # Query methods
1341 1089 #--------------------------------------------------------------------------
1342 1090
1343 @spinfirst
1344 @defaultblock
1091 @spin_first
1092 @default_block
1345 1093 def get_result(self, indices_or_msg_ids=None, block=None):
1346 1094 """Retrieve a result by msg_id or history index, wrapped in an AsyncResult object.
1347 1095
1348 1096 If the client already has the results, no request to the Hub will be made.
1349 1097
1350 1098 This is a convenient way to construct AsyncResult objects, which are wrappers
1351 1099 that include metadata about execution, and allow for awaiting results that
1352 1100 were not submitted by this Client.
1353 1101
1354 1102 It can also be a convenient way to retrieve the metadata associated with
1355 1103 blocking execution, since it always retrieves
1356 1104
1357 1105 Examples
1358 1106 --------
1359 1107 ::
1360 1108
1361 1109 In [10]: r = client.apply()
1362 1110
1363 1111 Parameters
1364 1112 ----------
1365 1113
1366 1114 indices_or_msg_ids : integer history index, str msg_id, or list of either
1367 1115 The indices or msg_ids of indices to be retrieved
1368 1116
1369 1117 block : bool
1370 1118 Whether to wait for the result to be done
1371 1119
1372 1120 Returns
1373 1121 -------
1374 1122
1375 1123 AsyncResult
1376 1124 A single AsyncResult object will always be returned.
1377 1125
1378 1126 AsyncHubResult
1379 1127 A subclass of AsyncResult that retrieves results from the Hub
1380 1128
1381 1129 """
1382 1130 if indices_or_msg_ids is None:
1383 1131 indices_or_msg_ids = -1
1384 1132
1385 1133 if not isinstance(indices_or_msg_ids, (list,tuple)):
1386 1134 indices_or_msg_ids = [indices_or_msg_ids]
1387 1135
1388 1136 theids = []
1389 1137 for id in indices_or_msg_ids:
1390 1138 if isinstance(id, int):
1391 1139 id = self.history[id]
1392 1140 if not isinstance(id, str):
1393 1141 raise TypeError("indices must be str or int, not %r"%id)
1394 1142 theids.append(id)
1395 1143
1396 1144 local_ids = filter(lambda msg_id: msg_id in self.history or msg_id in self.results, theids)
1397 1145 remote_ids = filter(lambda msg_id: msg_id not in local_ids, theids)
1398 1146
1399 1147 if remote_ids:
1400 1148 ar = AsyncHubResult(self, msg_ids=theids)
1401 1149 else:
1402 1150 ar = AsyncResult(self, msg_ids=theids)
1403 1151
1404 1152 if block:
1405 1153 ar.wait()
1406 1154
1407 1155 return ar
1408 1156
1409 @spinfirst
1157 @spin_first
1410 1158 def result_status(self, msg_ids, status_only=True):
1411 1159 """Check on the status of the result(s) of the apply request with `msg_ids`.
1412 1160
1413 1161 If status_only is False, then the actual results will be retrieved, else
1414 1162 only the status of the results will be checked.
1415 1163
1416 1164 Parameters
1417 1165 ----------
1418 1166
1419 1167 msg_ids : list of msg_ids
1420 1168 if int:
1421 1169 Passed as index to self.history for convenience.
1422 1170 status_only : bool (default: True)
1423 1171 if False:
1424 1172 Retrieve the actual results of completed tasks.
1425 1173
1426 1174 Returns
1427 1175 -------
1428 1176
1429 1177 results : dict
1430 1178 There will always be the keys 'pending' and 'completed', which will
1431 1179 be lists of msg_ids that are incomplete or complete. If `status_only`
1432 1180 is False, then completed results will be keyed by their `msg_id`.
1433 1181 """
1434 1182 if not isinstance(msg_ids, (list,tuple)):
1435 1183 msg_ids = [msg_ids]
1436 1184
1437 1185 theids = []
1438 1186 for msg_id in msg_ids:
1439 1187 if isinstance(msg_id, int):
1440 1188 msg_id = self.history[msg_id]
1441 1189 if not isinstance(msg_id, basestring):
1442 1190 raise TypeError("msg_ids must be str, not %r"%msg_id)
1443 1191 theids.append(msg_id)
1444 1192
1445 1193 completed = []
1446 1194 local_results = {}
1447 1195
1448 1196 # comment this block out to temporarily disable local shortcut:
1449 1197 for msg_id in theids:
1450 1198 if msg_id in self.results:
1451 1199 completed.append(msg_id)
1452 1200 local_results[msg_id] = self.results[msg_id]
1453 1201 theids.remove(msg_id)
1454 1202
1455 1203 if theids: # some not locally cached
1456 1204 content = dict(msg_ids=theids, status_only=status_only)
1457 1205 msg = self.session.send(self._query_socket, "result_request", content=content)
1458 1206 zmq.select([self._query_socket], [], [])
1459 1207 idents,msg = self.session.recv(self._query_socket, zmq.NOBLOCK)
1460 1208 if self.debug:
1461 1209 pprint(msg)
1462 1210 content = msg['content']
1463 1211 if content['status'] != 'ok':
1464 1212 raise self._unwrap_exception(content)
1465 1213 buffers = msg['buffers']
1466 1214 else:
1467 1215 content = dict(completed=[],pending=[])
1468 1216
1469 1217 content['completed'].extend(completed)
1470 1218
1471 1219 if status_only:
1472 1220 return content
1473 1221
1474 1222 failures = []
1475 1223 # load cached results into result:
1476 1224 content.update(local_results)
1477 1225 # update cache with results:
1478 1226 for msg_id in sorted(theids):
1479 1227 if msg_id in content['completed']:
1480 1228 rec = content[msg_id]
1481 1229 parent = rec['header']
1482 1230 header = rec['result_header']
1483 1231 rcontent = rec['result_content']
1484 1232 iodict = rec['io']
1485 1233 if isinstance(rcontent, str):
1486 1234 rcontent = self.session.unpack(rcontent)
1487 1235
1488 1236 md = self.metadata[msg_id]
1489 1237 md.update(self._extract_metadata(header, parent, rcontent))
1490 1238 md.update(iodict)
1491 1239
1492 1240 if rcontent['status'] == 'ok':
1493 1241 res,buffers = util.unserialize_object(buffers)
1494 1242 else:
1495 1243 print rcontent
1496 1244 res = self._unwrap_exception(rcontent)
1497 1245 failures.append(res)
1498 1246
1499 1247 self.results[msg_id] = res
1500 1248 content[msg_id] = res
1501 1249
1502 1250 if len(theids) == 1 and failures:
1503 1251 raise failures[0]
1504 1252
1505 1253 error.collect_exceptions(failures, "result_status")
1506 1254 return content
1507 1255
1508 @spinfirst
1256 @spin_first
1509 1257 def queue_status(self, targets='all', verbose=False):
1510 1258 """Fetch the status of engine queues.
1511 1259
1512 1260 Parameters
1513 1261 ----------
1514 1262
1515 1263 targets : int/str/list of ints/strs
1516 1264 the engines whose states are to be queried.
1517 1265 default : all
1518 1266 verbose : bool
1519 1267 Whether to return lengths only, or lists of ids for each element
1520 1268 """
1521 targets = self._build_targets(targets)[1]
1522 content = dict(targets=targets, verbose=verbose)
1269 engine_ids = self._build_targets(targets)[1]
1270 content = dict(targets=engine_ids, verbose=verbose)
1523 1271 self.session.send(self._query_socket, "queue_request", content=content)
1524 1272 idents,msg = self.session.recv(self._query_socket, 0)
1525 1273 if self.debug:
1526 1274 pprint(msg)
1527 1275 content = msg['content']
1528 1276 status = content.pop('status')
1529 1277 if status != 'ok':
1530 1278 raise self._unwrap_exception(content)
1531 return util.rekey(content)
1279 content = util.rekey(content)
1280 if isinstance(targets, int):
1281 return content[targets]
1282 else:
1283 return content
1532 1284
1533 @spinfirst
1285 @spin_first
1534 1286 def purge_results(self, jobs=[], targets=[]):
1535 """Tell the controller to forget results.
1287 """Tell the Hub to forget results.
1536 1288
1537 1289 Individual results can be purged by msg_id, or the entire
1538 1290 history of specific targets can be purged.
1539 1291
1540 1292 Parameters
1541 1293 ----------
1542 1294
1543 jobs : str or list of strs or AsyncResult objects
1295 jobs : str or list of str or AsyncResult objects
1544 1296 the msg_ids whose results should be forgotten.
1545 1297 targets : int/str/list of ints/strs
1546 1298 The targets, by uuid or int_id, whose entire history is to be purged.
1547 Use `targets='all'` to scrub everything from the controller's memory.
1299 Use `targets='all'` to scrub everything from the Hub's memory.
1548 1300
1549 1301 default : None
1550 1302 """
1551 1303 if not targets and not jobs:
1552 1304 raise ValueError("Must specify at least one of `targets` and `jobs`")
1553 1305 if targets:
1554 1306 targets = self._build_targets(targets)[1]
1555 1307
1556 1308 # construct msg_ids from jobs
1557 1309 msg_ids = []
1558 1310 if isinstance(jobs, (basestring,AsyncResult)):
1559 1311 jobs = [jobs]
1560 1312 bad_ids = filter(lambda obj: not isinstance(obj, (basestring, AsyncResult)), jobs)
1561 1313 if bad_ids:
1562 1314 raise TypeError("Invalid msg_id type %r, expected str or AsyncResult"%bad_ids[0])
1563 1315 for j in jobs:
1564 1316 if isinstance(j, AsyncResult):
1565 1317 msg_ids.extend(j.msg_ids)
1566 1318 else:
1567 1319 msg_ids.append(j)
1568 1320
1569 1321 content = dict(targets=targets, msg_ids=msg_ids)
1570 1322 self.session.send(self._query_socket, "purge_request", content=content)
1571 1323 idents, msg = self.session.recv(self._query_socket, 0)
1572 1324 if self.debug:
1573 1325 pprint(msg)
1574 1326 content = msg['content']
1575 1327 if content['status'] != 'ok':
1576 1328 raise self._unwrap_exception(content)
1577 1329
1578 1330
1579 1331 __all__ = [ 'Client',
1580 1332 'depend',
1581 1333 'require',
1582 1334 'remote',
1583 1335 'parallel',
1584 1336 'RemoteFunction',
1585 1337 'ParallelFunction',
1586 1338 'DirectView',
1587 1339 'LoadBalancedView',
1588 1340 'AsyncResult',
1589 1341 'AsyncMapResult',
1590 1342 'Reference'
1591 1343 ]
@@ -1,159 +1,184 b''
1 1 """Dependency utilities"""
2 2 #-----------------------------------------------------------------------------
3 3 # Copyright (C) 2010-2011 The IPython Development Team
4 4 #
5 5 # Distributed under the terms of the BSD License. The full license is in
6 6 # the file COPYING, distributed as part of this software.
7 7 #-----------------------------------------------------------------------------
8 8
9 from IPython.external.decorator import decorator
10
11 9 from .asyncresult import AsyncResult
12 10 from .error import UnmetDependency
13
11 from .util import interactive
14 12
15 13 class depend(object):
16 14 """Dependency decorator, for use with tasks.
17 15
18 16 `@depend` lets you define a function for engine dependencies
19 17 just like you use `apply` for tasks.
20 18
21 19
22 20 Examples
23 21 --------
24 22 ::
25 23
26 24 @depend(df, a,b, c=5)
27 25 def f(m,n,p)
28 26
29 27 view.apply(f, 1,2,3)
30 28
31 29 will call df(a,b,c=5) on the engine, and if it returns False or
32 30 raises an UnmetDependency error, then the task will not be run
33 31 and another engine will be tried.
34 32 """
35 33 def __init__(self, f, *args, **kwargs):
36 34 self.f = f
37 35 self.args = args
38 36 self.kwargs = kwargs
39 37
40 38 def __call__(self, f):
41 39 return dependent(f, self.f, *self.args, **self.kwargs)
42 40
43 41 class dependent(object):
44 42 """A function that depends on another function.
45 43 This is an object to prevent the closure used
46 44 in traditional decorators, which are not picklable.
47 45 """
48 46
49 47 def __init__(self, f, df, *dargs, **dkwargs):
50 48 self.f = f
51 49 self.func_name = getattr(f, '__name__', 'f')
52 50 self.df = df
53 51 self.dargs = dargs
54 52 self.dkwargs = dkwargs
55 53
56 54 def __call__(self, *args, **kwargs):
55 # if hasattr(self.f, 'func_globals') and hasattr(self.df, 'func_globals'):
56 # self.df.func_globals = self.f.func_globals
57 57 if self.df(*self.dargs, **self.dkwargs) is False:
58 58 raise UnmetDependency()
59 59 return self.f(*args, **kwargs)
60 60
61 61 @property
62 62 def __name__(self):
63 63 return self.func_name
64 64
65 @interactive
65 66 def _require(*names):
66 67 """Helper for @require decorator."""
68 from IPython.zmq.parallel.error import UnmetDependency
69 user_ns = globals()
67 70 for name in names:
71 if name in user_ns:
72 continue
68 73 try:
69 __import__(name)
74 exec 'import %s'%name in user_ns
70 75 except ImportError:
71 return False
76 raise UnmetDependency(name)
72 77 return True
73 78
74 79 def require(*names):
75 80 """Simple decorator for requiring names to be importable.
76 81
77 82 Examples
78 83 --------
79 84
80 85 In [1]: @require('numpy')
81 86 ...: def norm(a):
82 87 ...: import numpy
83 88 ...: return numpy.linalg.norm(a,2)
84 89 """
85 90 return depend(_require, *names)
86 91
87 92 class Dependency(set):
88 93 """An object for representing a set of msg_id dependencies.
89 94
90 95 Subclassed from set().
91 96
92 97 Parameters
93 98 ----------
94 99 dependencies: list/set of msg_ids or AsyncResult objects or output of Dependency.as_dict()
95 100 The msg_ids to depend on
96 101 all : bool [default True]
97 102 Whether the dependency should be considered met when *all* depending tasks have completed
98 103 or only when *any* have been completed.
99 success_only : bool [default True]
100 Whether to consider only successes for Dependencies, or consider failures as well.
101 If `all=success_only=True`, then this task will fail with an ImpossibleDependency
104 success : bool [default True]
105 Whether to consider successes as fulfilling dependencies.
106 failure : bool [default False]
107 Whether to consider failures as fulfilling dependencies.
108
109 If `all=success=True` and `failure=False`, then the task will fail with an ImpossibleDependency
102 110 as soon as the first depended-upon task fails.
103 111 """
104 112
105 113 all=True
106 success_only=True
114 success=True
115 failure=True
107 116
108 def __init__(self, dependencies=[], all=True, success_only=True):
117 def __init__(self, dependencies=[], all=True, success=True, failure=False):
109 118 if isinstance(dependencies, dict):
110 119 # load from dict
111 120 all = dependencies.get('all', True)
112 success_only = dependencies.get('success_only', success_only)
121 success = dependencies.get('success', success)
122 failure = dependencies.get('failure', failure)
113 123 dependencies = dependencies.get('dependencies', [])
114 124 ids = []
115 if isinstance(dependencies, AsyncResult):
116 ids.extend(AsyncResult.msg_ids)
117 else:
125
126 # extract ids from various sources:
127 if isinstance(dependencies, (basestring, AsyncResult)):
128 dependencies = [dependencies]
118 129 for d in dependencies:
119 130 if isinstance(d, basestring):
120 131 ids.append(d)
121 132 elif isinstance(d, AsyncResult):
122 133 ids.extend(d.msg_ids)
123 134 else:
124 135 raise TypeError("invalid dependency type: %r"%type(d))
136
125 137 set.__init__(self, ids)
126 138 self.all = all
127 self.success_only=success_only
139 if not (success or failure):
140 raise ValueError("Must depend on at least one of successes or failures!")
141 self.success=success
142 self.failure = failure
128 143
129 144 def check(self, completed, failed=None):
130 if failed is not None and not self.success_only:
131 completed = completed.union(failed)
145 """check whether our dependencies have been met."""
132 146 if len(self) == 0:
133 147 return True
148 against = set()
149 if self.success:
150 against = completed
151 if failed is not None and self.failure:
152 against = against.union(failed)
134 153 if self.all:
135 return self.issubset(completed)
154 return self.issubset(against)
136 155 else:
137 return not self.isdisjoint(completed)
156 return not self.isdisjoint(against)
138 157
139 def unreachable(self, failed):
140 if len(self) == 0 or len(failed) == 0 or not self.success_only:
158 def unreachable(self, completed, failed=None):
159 """return whether this dependency has become impossible."""
160 if len(self) == 0:
141 161 return False
142 # print self, self.success_only, self.all, failed
162 against = set()
163 if not self.success:
164 against = completed
165 if failed is not None and not self.failure:
166 against = against.union(failed)
143 167 if self.all:
144 return not self.isdisjoint(failed)
168 return not self.isdisjoint(against)
145 169 else:
146 return self.issubset(failed)
170 return self.issubset(against)
147 171
148 172
149 173 def as_dict(self):
150 174 """Represent this dependency as a dict. For json compatibility."""
151 175 return dict(
152 176 dependencies=list(self),
153 177 all=self.all,
154 success_only=self.success_only,
178 success=self.success,
179 failure=self.failure
155 180 )
156 181
157 182
158 183 __all__ = ['depend', 'require', 'dependent', 'Dependency']
159 184
@@ -1,1039 +1,1035 b''
1 1 #!/usr/bin/env python
2 2 """The IPython Controller Hub with 0MQ
3 3 This is the master object that handles connections from engines and clients,
4 4 and monitors traffic through the various queues.
5 5 """
6 6 #-----------------------------------------------------------------------------
7 7 # Copyright (C) 2010 The IPython Development Team
8 8 #
9 9 # Distributed under the terms of the BSD License. The full license is in
10 10 # the file COPYING, distributed as part of this software.
11 11 #-----------------------------------------------------------------------------
12 12
13 13 #-----------------------------------------------------------------------------
14 14 # Imports
15 15 #-----------------------------------------------------------------------------
16 16 from __future__ import print_function
17 17
18 18 import sys
19 19 import time
20 20 from datetime import datetime
21 21
22 22 import zmq
23 23 from zmq.eventloop import ioloop
24 24 from zmq.eventloop.zmqstream import ZMQStream
25 25
26 26 # internal:
27 27 from IPython.utils.importstring import import_item
28 28 from IPython.utils.traitlets import HasTraits, Instance, Int, CStr, Str, Dict, Set, List, Bool
29 29
30 30 from .entry_point import select_random_ports
31 31 from .factory import RegistrationFactory, LoggingFactory
32 32
33 33 from . import error
34 34 from .heartmonitor import HeartMonitor
35 35 from .util import validate_url_container, ISO8601
36 36
37 37 #-----------------------------------------------------------------------------
38 38 # Code
39 39 #-----------------------------------------------------------------------------
40 40
41 41 def _passer(*args, **kwargs):
42 42 return
43 43
44 44 def _printer(*args, **kwargs):
45 45 print (args)
46 46 print (kwargs)
47 47
48 48 def init_record(msg):
49 49 """Initialize a TaskRecord based on a request."""
50 50 header = msg['header']
51 51 return {
52 52 'msg_id' : header['msg_id'],
53 53 'header' : header,
54 54 'content': msg['content'],
55 55 'buffers': msg['buffers'],
56 56 'submitted': datetime.strptime(header['date'], ISO8601),
57 57 'client_uuid' : None,
58 58 'engine_uuid' : None,
59 59 'started': None,
60 60 'completed': None,
61 61 'resubmitted': None,
62 62 'result_header' : None,
63 63 'result_content' : None,
64 64 'result_buffers' : None,
65 65 'queue' : None,
66 66 'pyin' : None,
67 67 'pyout': None,
68 68 'pyerr': None,
69 69 'stdout': '',
70 70 'stderr': '',
71 71 }
72 72
73 73
74 74 class EngineConnector(HasTraits):
75 75 """A simple object for accessing the various zmq connections of an object.
76 76 Attributes are:
77 77 id (int): engine ID
78 78 uuid (str): uuid (unused?)
79 79 queue (str): identity of queue's XREQ socket
80 80 registration (str): identity of registration XREQ socket
81 81 heartbeat (str): identity of heartbeat XREQ socket
82 82 """
83 83 id=Int(0)
84 84 queue=Str()
85 85 control=Str()
86 86 registration=Str()
87 87 heartbeat=Str()
88 88 pending=Set()
89 89
90 90 class HubFactory(RegistrationFactory):
91 91 """The Configurable for setting up a Hub."""
92 92
93 93 # name of a scheduler scheme
94 94 scheme = Str('leastload', config=True)
95 95
96 96 # port-pairs for monitoredqueues:
97 97 hb = Instance(list, config=True)
98 98 def _hb_default(self):
99 99 return select_random_ports(2)
100 100
101 101 mux = Instance(list, config=True)
102 102 def _mux_default(self):
103 103 return select_random_ports(2)
104 104
105 105 task = Instance(list, config=True)
106 106 def _task_default(self):
107 107 return select_random_ports(2)
108 108
109 109 control = Instance(list, config=True)
110 110 def _control_default(self):
111 111 return select_random_ports(2)
112 112
113 113 iopub = Instance(list, config=True)
114 114 def _iopub_default(self):
115 115 return select_random_ports(2)
116 116
117 117 # single ports:
118 118 mon_port = Instance(int, config=True)
119 119 def _mon_port_default(self):
120 120 return select_random_ports(1)[0]
121 121
122 122 notifier_port = Instance(int, config=True)
123 123 def _notifier_port_default(self):
124 124 return select_random_ports(1)[0]
125 125
126 126 ping = Int(1000, config=True) # ping frequency
127 127
128 128 engine_ip = CStr('127.0.0.1', config=True)
129 129 engine_transport = CStr('tcp', config=True)
130 130
131 131 client_ip = CStr('127.0.0.1', config=True)
132 132 client_transport = CStr('tcp', config=True)
133 133
134 134 monitor_ip = CStr('127.0.0.1', config=True)
135 135 monitor_transport = CStr('tcp', config=True)
136 136
137 137 monitor_url = CStr('')
138 138
139 139 db_class = CStr('IPython.zmq.parallel.dictdb.DictDB', config=True)
140 140
141 141 # not configurable
142 142 db = Instance('IPython.zmq.parallel.dictdb.BaseDB')
143 143 heartmonitor = Instance('IPython.zmq.parallel.heartmonitor.HeartMonitor')
144 144 subconstructors = List()
145 145 _constructed = Bool(False)
146 146
147 147 def _ip_changed(self, name, old, new):
148 148 self.engine_ip = new
149 149 self.client_ip = new
150 150 self.monitor_ip = new
151 151 self._update_monitor_url()
152 152
153 153 def _update_monitor_url(self):
154 154 self.monitor_url = "%s://%s:%i"%(self.monitor_transport, self.monitor_ip, self.mon_port)
155 155
156 156 def _transport_changed(self, name, old, new):
157 157 self.engine_transport = new
158 158 self.client_transport = new
159 159 self.monitor_transport = new
160 160 self._update_monitor_url()
161 161
162 162 def __init__(self, **kwargs):
163 163 super(HubFactory, self).__init__(**kwargs)
164 164 self._update_monitor_url()
165 165 # self.on_trait_change(self._sync_ips, 'ip')
166 166 # self.on_trait_change(self._sync_transports, 'transport')
167 167 self.subconstructors.append(self.construct_hub)
168 168
169 169
170 170 def construct(self):
171 171 assert not self._constructed, "already constructed!"
172 172
173 173 for subc in self.subconstructors:
174 174 subc()
175 175
176 176 self._constructed = True
177 177
178 178
179 179 def start(self):
180 180 assert self._constructed, "must be constructed by self.construct() first!"
181 181 self.heartmonitor.start()
182 182 self.log.info("Heartmonitor started")
183 183
184 184 def construct_hub(self):
185 185 """construct"""
186 186 client_iface = "%s://%s:"%(self.client_transport, self.client_ip) + "%i"
187 187 engine_iface = "%s://%s:"%(self.engine_transport, self.engine_ip) + "%i"
188 188
189 189 ctx = self.context
190 190 loop = self.loop
191 191
192 192 # Registrar socket
193 193 q = ZMQStream(ctx.socket(zmq.XREP), loop)
194 194 q.bind(client_iface % self.regport)
195 195 self.log.info("Hub listening on %s for registration."%(client_iface%self.regport))
196 196 if self.client_ip != self.engine_ip:
197 197 q.bind(engine_iface % self.regport)
198 198 self.log.info("Hub listening on %s for registration."%(engine_iface%self.regport))
199 199
200 200 ### Engine connections ###
201 201
202 202 # heartbeat
203 203 hpub = ctx.socket(zmq.PUB)
204 204 hpub.bind(engine_iface % self.hb[0])
205 205 hrep = ctx.socket(zmq.XREP)
206 206 hrep.bind(engine_iface % self.hb[1])
207 207 self.heartmonitor = HeartMonitor(loop=loop, pingstream=ZMQStream(hpub,loop), pongstream=ZMQStream(hrep,loop),
208 208 period=self.ping, logname=self.log.name)
209 209
210 210 ### Client connections ###
211 211 # Notifier socket
212 212 n = ZMQStream(ctx.socket(zmq.PUB), loop)
213 213 n.bind(client_iface%self.notifier_port)
214 214
215 215 ### build and launch the queues ###
216 216
217 217 # monitor socket
218 218 sub = ctx.socket(zmq.SUB)
219 219 sub.setsockopt(zmq.SUBSCRIBE, "")
220 220 sub.bind(self.monitor_url)
221 221 sub.bind('inproc://monitor')
222 222 sub = ZMQStream(sub, loop)
223 223
224 224 # connect the db
225 225 self.log.info('Hub using DB backend: %r'%(self.db_class.split()[-1]))
226 226 # cdir = self.config.Global.cluster_dir
227 227 self.db = import_item(self.db_class)(session=self.session.session, config=self.config)
228 228 time.sleep(.25)
229 229
230 230 # build connection dicts
231 231 self.engine_info = {
232 232 'control' : engine_iface%self.control[1],
233 233 'mux': engine_iface%self.mux[1],
234 234 'heartbeat': (engine_iface%self.hb[0], engine_iface%self.hb[1]),
235 235 'task' : engine_iface%self.task[1],
236 236 'iopub' : engine_iface%self.iopub[1],
237 237 # 'monitor' : engine_iface%self.mon_port,
238 238 }
239 239
240 240 self.client_info = {
241 241 'control' : client_iface%self.control[0],
242 242 'mux': client_iface%self.mux[0],
243 243 'task' : (self.scheme, client_iface%self.task[0]),
244 244 'iopub' : client_iface%self.iopub[0],
245 245 'notification': client_iface%self.notifier_port
246 246 }
247 247 self.log.debug("Hub engine addrs: %s"%self.engine_info)
248 248 self.log.debug("Hub client addrs: %s"%self.client_info)
249 249 self.hub = Hub(loop=loop, session=self.session, monitor=sub, heartmonitor=self.heartmonitor,
250 250 query=q, notifier=n, db=self.db,
251 251 engine_info=self.engine_info, client_info=self.client_info,
252 252 logname=self.log.name)
253 253
254 254
255 255 class Hub(LoggingFactory):
256 256 """The IPython Controller Hub with 0MQ connections
257 257
258 258 Parameters
259 259 ==========
260 260 loop: zmq IOLoop instance
261 261 session: StreamSession object
262 262 <removed> context: zmq context for creating new connections (?)
263 263 queue: ZMQStream for monitoring the command queue (SUB)
264 264 query: ZMQStream for engine registration and client queries requests (XREP)
265 265 heartbeat: HeartMonitor object checking the pulse of the engines
266 266 notifier: ZMQStream for broadcasting engine registration changes (PUB)
267 267 db: connection to db for out of memory logging of commands
268 268 NotImplemented
269 269 engine_info: dict of zmq connection information for engines to connect
270 270 to the queues.
271 271 client_info: dict of zmq connection information for engines to connect
272 272 to the queues.
273 273 """
274 274 # internal data structures:
275 275 ids=Set() # engine IDs
276 276 keytable=Dict()
277 277 by_ident=Dict()
278 278 engines=Dict()
279 279 clients=Dict()
280 280 hearts=Dict()
281 281 pending=Set()
282 282 queues=Dict() # pending msg_ids keyed by engine_id
283 283 tasks=Dict() # pending msg_ids submitted as tasks, keyed by client_id
284 284 completed=Dict() # completed msg_ids keyed by engine_id
285 285 all_completed=Set() # completed msg_ids keyed by engine_id
286 286 # mia=None
287 287 incoming_registrations=Dict()
288 288 registration_timeout=Int()
289 289 _idcounter=Int(0)
290 290
291 291 # objects from constructor:
292 292 loop=Instance(ioloop.IOLoop)
293 293 query=Instance(ZMQStream)
294 294 monitor=Instance(ZMQStream)
295 295 heartmonitor=Instance(HeartMonitor)
296 296 notifier=Instance(ZMQStream)
297 297 db=Instance(object)
298 298 client_info=Dict()
299 299 engine_info=Dict()
300 300
301 301
302 302 def __init__(self, **kwargs):
303 303 """
304 304 # universal:
305 305 loop: IOLoop for creating future connections
306 306 session: streamsession for sending serialized data
307 307 # engine:
308 308 queue: ZMQStream for monitoring queue messages
309 309 query: ZMQStream for engine+client registration and client requests
310 310 heartbeat: HeartMonitor object for tracking engines
311 311 # extra:
312 312 db: ZMQStream for db connection (NotImplemented)
313 313 engine_info: zmq address/protocol dict for engine connections
314 314 client_info: zmq address/protocol dict for client connections
315 315 """
316 316
317 317 super(Hub, self).__init__(**kwargs)
318 318 self.registration_timeout = max(5000, 2*self.heartmonitor.period)
319 319
320 320 # validate connection dicts:
321 321 for k,v in self.client_info.iteritems():
322 322 if k == 'task':
323 323 validate_url_container(v[1])
324 324 else:
325 325 validate_url_container(v)
326 326 # validate_url_container(self.client_info)
327 327 validate_url_container(self.engine_info)
328 328
329 329 # register our callbacks
330 330 self.query.on_recv(self.dispatch_query)
331 331 self.monitor.on_recv(self.dispatch_monitor_traffic)
332 332
333 333 self.heartmonitor.add_heart_failure_handler(self.handle_heart_failure)
334 334 self.heartmonitor.add_new_heart_handler(self.handle_new_heart)
335 335
336 336 self.monitor_handlers = { 'in' : self.save_queue_request,
337 337 'out': self.save_queue_result,
338 338 'intask': self.save_task_request,
339 339 'outtask': self.save_task_result,
340 340 'tracktask': self.save_task_destination,
341 341 'incontrol': _passer,
342 342 'outcontrol': _passer,
343 343 'iopub': self.save_iopub_message,
344 344 }
345 345
346 346 self.query_handlers = {'queue_request': self.queue_status,
347 347 'result_request': self.get_results,
348 348 'purge_request': self.purge_results,
349 349 'load_request': self.check_load,
350 350 'resubmit_request': self.resubmit_task,
351 351 'shutdown_request': self.shutdown_request,
352 352 'registration_request' : self.register_engine,
353 353 'unregistration_request' : self.unregister_engine,
354 354 'connection_request': self.connection_request,
355 355 }
356 356
357 357 self.log.info("hub::created hub")
358 358
359 359 @property
360 360 def _next_id(self):
361 361 """gemerate a new ID.
362 362
363 363 No longer reuse old ids, just count from 0."""
364 364 newid = self._idcounter
365 365 self._idcounter += 1
366 366 return newid
367 367 # newid = 0
368 368 # incoming = [id[0] for id in self.incoming_registrations.itervalues()]
369 369 # # print newid, self.ids, self.incoming_registrations
370 370 # while newid in self.ids or newid in incoming:
371 371 # newid += 1
372 372 # return newid
373 373
374 374 #-----------------------------------------------------------------------------
375 375 # message validation
376 376 #-----------------------------------------------------------------------------
377 377
378 378 def _validate_targets(self, targets):
379 379 """turn any valid targets argument into a list of integer ids"""
380 380 if targets is None:
381 381 # default to all
382 382 targets = self.ids
383 383
384 384 if isinstance(targets, (int,str,unicode)):
385 385 # only one target specified
386 386 targets = [targets]
387 387 _targets = []
388 388 for t in targets:
389 389 # map raw identities to ids
390 390 if isinstance(t, (str,unicode)):
391 391 t = self.by_ident.get(t, t)
392 392 _targets.append(t)
393 393 targets = _targets
394 394 bad_targets = [ t for t in targets if t not in self.ids ]
395 395 if bad_targets:
396 396 raise IndexError("No Such Engine: %r"%bad_targets)
397 397 if not targets:
398 398 raise IndexError("No Engines Registered")
399 399 return targets
400 400
401 401 #-----------------------------------------------------------------------------
402 402 # dispatch methods (1 per stream)
403 403 #-----------------------------------------------------------------------------
404 404
405 405 # def dispatch_registration_request(self, msg):
406 406 # """"""
407 407 # self.log.debug("registration::dispatch_register_request(%s)"%msg)
408 408 # idents,msg = self.session.feed_identities(msg)
409 409 # if not idents:
410 410 # self.log.error("Bad Query Message: %s"%msg, exc_info=True)
411 411 # return
412 412 # try:
413 413 # msg = self.session.unpack_message(msg,content=True)
414 414 # except:
415 415 # self.log.error("registration::got bad registration message: %s"%msg, exc_info=True)
416 416 # return
417 417 #
418 418 # msg_type = msg['msg_type']
419 419 # content = msg['content']
420 420 #
421 421 # handler = self.query_handlers.get(msg_type, None)
422 422 # if handler is None:
423 423 # self.log.error("registration::got bad registration message: %s"%msg)
424 424 # else:
425 425 # handler(idents, msg)
426 426
427 427 def dispatch_monitor_traffic(self, msg):
428 428 """all ME and Task queue messages come through here, as well as
429 429 IOPub traffic."""
430 430 self.log.debug("monitor traffic: %s"%msg[:2])
431 431 switch = msg[0]
432 432 idents, msg = self.session.feed_identities(msg[1:])
433 433 if not idents:
434 434 self.log.error("Bad Monitor Message: %s"%msg)
435 435 return
436 436 handler = self.monitor_handlers.get(switch, None)
437 437 if handler is not None:
438 438 handler(idents, msg)
439 439 else:
440 440 self.log.error("Invalid monitor topic: %s"%switch)
441 441
442 442
443 443 def dispatch_query(self, msg):
444 444 """Route registration requests and queries from clients."""
445 445 idents, msg = self.session.feed_identities(msg)
446 446 if not idents:
447 447 self.log.error("Bad Query Message: %s"%msg)
448 448 return
449 449 client_id = idents[0]
450 450 try:
451 451 msg = self.session.unpack_message(msg, content=True)
452 452 except:
453 453 content = error.wrap_exception()
454 454 self.log.error("Bad Query Message: %s"%msg, exc_info=True)
455 455 self.session.send(self.query, "hub_error", ident=client_id,
456 456 content=content)
457 457 return
458 458
459 459 # print client_id, header, parent, content
460 460 #switch on message type:
461 461 msg_type = msg['msg_type']
462 462 self.log.info("client::client %s requested %s"%(client_id, msg_type))
463 463 handler = self.query_handlers.get(msg_type, None)
464 464 try:
465 465 assert handler is not None, "Bad Message Type: %s"%msg_type
466 466 except:
467 467 content = error.wrap_exception()
468 468 self.log.error("Bad Message Type: %s"%msg_type, exc_info=True)
469 469 self.session.send(self.query, "hub_error", ident=client_id,
470 470 content=content)
471 471 return
472 472 else:
473 473 handler(idents, msg)
474 474
475 475 def dispatch_db(self, msg):
476 476 """"""
477 477 raise NotImplementedError
478 478
479 479 #---------------------------------------------------------------------------
480 480 # handler methods (1 per event)
481 481 #---------------------------------------------------------------------------
482 482
483 483 #----------------------- Heartbeat --------------------------------------
484 484
485 485 def handle_new_heart(self, heart):
486 486 """handler to attach to heartbeater.
487 487 Called when a new heart starts to beat.
488 488 Triggers completion of registration."""
489 489 self.log.debug("heartbeat::handle_new_heart(%r)"%heart)
490 490 if heart not in self.incoming_registrations:
491 491 self.log.info("heartbeat::ignoring new heart: %r"%heart)
492 492 else:
493 493 self.finish_registration(heart)
494 494
495 495
496 496 def handle_heart_failure(self, heart):
497 497 """handler to attach to heartbeater.
498 498 called when a previously registered heart fails to respond to beat request.
499 499 triggers unregistration"""
500 500 self.log.debug("heartbeat::handle_heart_failure(%r)"%heart)
501 501 eid = self.hearts.get(heart, None)
502 502 queue = self.engines[eid].queue
503 503 if eid is None:
504 504 self.log.info("heartbeat::ignoring heart failure %r"%heart)
505 505 else:
506 506 self.unregister_engine(heart, dict(content=dict(id=eid, queue=queue)))
507 507
508 508 #----------------------- MUX Queue Traffic ------------------------------
509 509
510 510 def save_queue_request(self, idents, msg):
511 511 if len(idents) < 2:
512 512 self.log.error("invalid identity prefix: %s"%idents)
513 513 return
514 514 queue_id, client_id = idents[:2]
515 515 try:
516 516 msg = self.session.unpack_message(msg, content=False)
517 517 except:
518 518 self.log.error("queue::client %r sent invalid message to %r: %s"%(client_id, queue_id, msg), exc_info=True)
519 519 return
520 520
521 521 eid = self.by_ident.get(queue_id, None)
522 522 if eid is None:
523 523 self.log.error("queue::target %r not registered"%queue_id)
524 524 self.log.debug("queue:: valid are: %s"%(self.by_ident.keys()))
525 525 return
526 526
527 527 header = msg['header']
528 528 msg_id = header['msg_id']
529 529 record = init_record(msg)
530 530 record['engine_uuid'] = queue_id
531 531 record['client_uuid'] = client_id
532 532 record['queue'] = 'mux'
533 533
534 534 self.pending.add(msg_id)
535 535 self.queues[eid].append(msg_id)
536 536 self.db.add_record(msg_id, record)
537 537
538 538 def save_queue_result(self, idents, msg):
539 539 if len(idents) < 2:
540 540 self.log.error("invalid identity prefix: %s"%idents)
541 541 return
542 542
543 543 client_id, queue_id = idents[:2]
544 544 try:
545 545 msg = self.session.unpack_message(msg, content=False)
546 546 except:
547 547 self.log.error("queue::engine %r sent invalid message to %r: %s"%(
548 548 queue_id,client_id, msg), exc_info=True)
549 549 return
550 550
551 551 eid = self.by_ident.get(queue_id, None)
552 552 if eid is None:
553 553 self.log.error("queue::unknown engine %r is sending a reply: "%queue_id)
554 554 self.log.debug("queue:: %s"%msg[2:])
555 555 return
556 556
557 557 parent = msg['parent_header']
558 558 if not parent:
559 559 return
560 560 msg_id = parent['msg_id']
561 561 if msg_id in self.pending:
562 562 self.pending.remove(msg_id)
563 563 self.all_completed.add(msg_id)
564 564 self.queues[eid].remove(msg_id)
565 565 self.completed[eid].append(msg_id)
566 566 elif msg_id not in self.all_completed:
567 567 # it could be a result from a dead engine that died before delivering the
568 568 # result
569 569 self.log.warn("queue:: unknown msg finished %s"%msg_id)
570 570 return
571 571 # update record anyway, because the unregistration could have been premature
572 572 rheader = msg['header']
573 573 completed = datetime.strptime(rheader['date'], ISO8601)
574 574 started = rheader.get('started', None)
575 575 if started is not None:
576 576 started = datetime.strptime(started, ISO8601)
577 577 result = {
578 578 'result_header' : rheader,
579 579 'result_content': msg['content'],
580 580 'started' : started,
581 581 'completed' : completed
582 582 }
583 583
584 584 result['result_buffers'] = msg['buffers']
585 585 self.db.update_record(msg_id, result)
586 586
587 587
588 588 #--------------------- Task Queue Traffic ------------------------------
589 589
590 590 def save_task_request(self, idents, msg):
591 591 """Save the submission of a task."""
592 592 client_id = idents[0]
593 593
594 594 try:
595 595 msg = self.session.unpack_message(msg, content=False)
596 596 except:
597 597 self.log.error("task::client %r sent invalid task message: %s"%(
598 598 client_id, msg), exc_info=True)
599 599 return
600 600 record = init_record(msg)
601 601
602 602 record['client_uuid'] = client_id
603 603 record['queue'] = 'task'
604 604 header = msg['header']
605 605 msg_id = header['msg_id']
606 606 self.pending.add(msg_id)
607 607 self.db.add_record(msg_id, record)
608 608
609 609 def save_task_result(self, idents, msg):
610 610 """save the result of a completed task."""
611 611 client_id = idents[0]
612 612 try:
613 613 msg = self.session.unpack_message(msg, content=False)
614 614 except:
615 615 self.log.error("task::invalid task result message send to %r: %s"%(
616 616 client_id, msg), exc_info=True)
617 617 raise
618 618 return
619 619
620 620 parent = msg['parent_header']
621 621 if not parent:
622 622 # print msg
623 623 self.log.warn("Task %r had no parent!"%msg)
624 624 return
625 625 msg_id = parent['msg_id']
626 626
627 627 header = msg['header']
628 628 engine_uuid = header.get('engine', None)
629 629 eid = self.by_ident.get(engine_uuid, None)
630 630
631 631 if msg_id in self.pending:
632 632 self.pending.remove(msg_id)
633 633 self.all_completed.add(msg_id)
634 634 if eid is not None:
635 635 self.completed[eid].append(msg_id)
636 636 if msg_id in self.tasks[eid]:
637 637 self.tasks[eid].remove(msg_id)
638 638 completed = datetime.strptime(header['date'], ISO8601)
639 639 started = header.get('started', None)
640 640 if started is not None:
641 641 started = datetime.strptime(started, ISO8601)
642 642 result = {
643 643 'result_header' : header,
644 644 'result_content': msg['content'],
645 645 'started' : started,
646 646 'completed' : completed,
647 647 'engine_uuid': engine_uuid
648 648 }
649 649
650 650 result['result_buffers'] = msg['buffers']
651 651 self.db.update_record(msg_id, result)
652 652
653 653 else:
654 654 self.log.debug("task::unknown task %s finished"%msg_id)
655 655
656 656 def save_task_destination(self, idents, msg):
657 657 try:
658 658 msg = self.session.unpack_message(msg, content=True)
659 659 except:
660 660 self.log.error("task::invalid task tracking message", exc_info=True)
661 661 return
662 662 content = msg['content']
663 663 # print (content)
664 664 msg_id = content['msg_id']
665 665 engine_uuid = content['engine_id']
666 666 eid = self.by_ident[engine_uuid]
667 667
668 668 self.log.info("task::task %s arrived on %s"%(msg_id, eid))
669 669 # if msg_id in self.mia:
670 670 # self.mia.remove(msg_id)
671 671 # else:
672 672 # self.log.debug("task::task %s not listed as MIA?!"%(msg_id))
673 673
674 674 self.tasks[eid].append(msg_id)
675 675 # self.pending[msg_id][1].update(received=datetime.now(),engine=(eid,engine_uuid))
676 676 self.db.update_record(msg_id, dict(engine_uuid=engine_uuid))
677 677
678 678 def mia_task_request(self, idents, msg):
679 679 raise NotImplementedError
680 680 client_id = idents[0]
681 681 # content = dict(mia=self.mia,status='ok')
682 682 # self.session.send('mia_reply', content=content, idents=client_id)
683 683
684 684
685 685 #--------------------- IOPub Traffic ------------------------------
686 686
687 687 def save_iopub_message(self, topics, msg):
688 688 """save an iopub message into the db"""
689 689 # print (topics)
690 690 try:
691 691 msg = self.session.unpack_message(msg, content=True)
692 692 except:
693 693 self.log.error("iopub::invalid IOPub message", exc_info=True)
694 694 return
695 695
696 696 parent = msg['parent_header']
697 697 if not parent:
698 698 self.log.error("iopub::invalid IOPub message: %s"%msg)
699 699 return
700 700 msg_id = parent['msg_id']
701 701 msg_type = msg['msg_type']
702 702 content = msg['content']
703 703
704 704 # ensure msg_id is in db
705 705 try:
706 706 rec = self.db.get_record(msg_id)
707 707 except:
708 708 self.log.error("iopub::IOPub message has invalid parent", exc_info=True)
709 709 return
710 710 # stream
711 711 d = {}
712 712 if msg_type == 'stream':
713 713 name = content['name']
714 714 s = rec[name] or ''
715 715 d[name] = s + content['data']
716 716
717 717 elif msg_type == 'pyerr':
718 718 d['pyerr'] = content
719 719 else:
720 720 d[msg_type] = content['data']
721 721
722 722 self.db.update_record(msg_id, d)
723 723
724 724
725 725
726 726 #-------------------------------------------------------------------------
727 727 # Registration requests
728 728 #-------------------------------------------------------------------------
729 729
730 730 def connection_request(self, client_id, msg):
731 731 """Reply with connection addresses for clients."""
732 732 self.log.info("client::client %s connected"%client_id)
733 733 content = dict(status='ok')
734 734 content.update(self.client_info)
735 735 jsonable = {}
736 736 for k,v in self.keytable.iteritems():
737 737 jsonable[str(k)] = v
738 738 content['engines'] = jsonable
739 739 self.session.send(self.query, 'connection_reply', content, parent=msg, ident=client_id)
740 740
741 741 def register_engine(self, reg, msg):
742 742 """Register a new engine."""
743 743 content = msg['content']
744 744 try:
745 745 queue = content['queue']
746 746 except KeyError:
747 747 self.log.error("registration::queue not specified", exc_info=True)
748 748 return
749 749 heart = content.get('heartbeat', None)
750 750 """register a new engine, and create the socket(s) necessary"""
751 751 eid = self._next_id
752 752 # print (eid, queue, reg, heart)
753 753
754 754 self.log.debug("registration::register_engine(%i, %r, %r, %r)"%(eid, queue, reg, heart))
755 755
756 756 content = dict(id=eid,status='ok')
757 757 content.update(self.engine_info)
758 758 # check if requesting available IDs:
759 759 if queue in self.by_ident:
760 760 try:
761 761 raise KeyError("queue_id %r in use"%queue)
762 762 except:
763 763 content = error.wrap_exception()
764 764 self.log.error("queue_id %r in use"%queue, exc_info=True)
765 765 elif heart in self.hearts: # need to check unique hearts?
766 766 try:
767 767 raise KeyError("heart_id %r in use"%heart)
768 768 except:
769 769 self.log.error("heart_id %r in use"%heart, exc_info=True)
770 770 content = error.wrap_exception()
771 771 else:
772 772 for h, pack in self.incoming_registrations.iteritems():
773 773 if heart == h:
774 774 try:
775 775 raise KeyError("heart_id %r in use"%heart)
776 776 except:
777 777 self.log.error("heart_id %r in use"%heart, exc_info=True)
778 778 content = error.wrap_exception()
779 779 break
780 780 elif queue == pack[1]:
781 781 try:
782 782 raise KeyError("queue_id %r in use"%queue)
783 783 except:
784 784 self.log.error("queue_id %r in use"%queue, exc_info=True)
785 785 content = error.wrap_exception()
786 786 break
787 787
788 788 msg = self.session.send(self.query, "registration_reply",
789 789 content=content,
790 790 ident=reg)
791 791
792 792 if content['status'] == 'ok':
793 793 if heart in self.heartmonitor.hearts:
794 794 # already beating
795 795 self.incoming_registrations[heart] = (eid,queue,reg[0],None)
796 796 self.finish_registration(heart)
797 797 else:
798 798 purge = lambda : self._purge_stalled_registration(heart)
799 799 dc = ioloop.DelayedCallback(purge, self.registration_timeout, self.loop)
800 800 dc.start()
801 801 self.incoming_registrations[heart] = (eid,queue,reg[0],dc)
802 802 else:
803 803 self.log.error("registration::registration %i failed: %s"%(eid, content['evalue']))
804 804 return eid
805 805
806 806 def unregister_engine(self, ident, msg):
807 807 """Unregister an engine that explicitly requested to leave."""
808 808 try:
809 809 eid = msg['content']['id']
810 810 except:
811 811 self.log.error("registration::bad engine id for unregistration: %s"%ident, exc_info=True)
812 812 return
813 813 self.log.info("registration::unregister_engine(%s)"%eid)
814 814 # print (eid)
815 815 content=dict(id=eid, queue=self.engines[eid].queue)
816 816 self.ids.remove(eid)
817 817 uuid = self.keytable.pop(eid)
818 818 ec = self.engines.pop(eid)
819 819 self.hearts.pop(ec.heartbeat)
820 820 self.by_ident.pop(ec.queue)
821 821 self.completed.pop(eid)
822 822 self._handle_stranded_msgs(eid, uuid)
823 823 ############## TODO: HANDLE IT ################
824 824
825 825 if self.notifier:
826 826 self.session.send(self.notifier, "unregistration_notification", content=content)
827 827
828 828 def _handle_stranded_msgs(self, eid, uuid):
829 829 """Handle messages known to be on an engine when the engine unregisters.
830 830
831 831 It is possible that this will fire prematurely - that is, an engine will
832 832 go down after completing a result, and the client will be notified
833 833 that the result failed and later receive the actual result.
834 834 """
835 835
836 836 outstanding = self.queues.pop(eid)
837 837
838 838 for msg_id in outstanding:
839 839 self.pending.remove(msg_id)
840 840 self.all_completed.add(msg_id)
841 841 try:
842 842 raise error.EngineError("Engine %r died while running task %r"%(eid, msg_id))
843 843 except:
844 844 content = error.wrap_exception()
845 845 # build a fake header:
846 846 header = {}
847 847 header['engine'] = uuid
848 848 header['date'] = datetime.now().strftime(ISO8601)
849 849 rec = dict(result_content=content, result_header=header, result_buffers=[])
850 850 rec['completed'] = header['date']
851 851 rec['engine_uuid'] = uuid
852 852 self.db.update_record(msg_id, rec)
853 853
854 854 def finish_registration(self, heart):
855 855 """Second half of engine registration, called after our HeartMonitor
856 856 has received a beat from the Engine's Heart."""
857 857 try:
858 858 (eid,queue,reg,purge) = self.incoming_registrations.pop(heart)
859 859 except KeyError:
860 860 self.log.error("registration::tried to finish nonexistant registration", exc_info=True)
861 861 return
862 862 self.log.info("registration::finished registering engine %i:%r"%(eid,queue))
863 863 if purge is not None:
864 864 purge.stop()
865 865 control = queue
866 866 self.ids.add(eid)
867 867 self.keytable[eid] = queue
868 868 self.engines[eid] = EngineConnector(id=eid, queue=queue, registration=reg,
869 869 control=control, heartbeat=heart)
870 870 self.by_ident[queue] = eid
871 871 self.queues[eid] = list()
872 872 self.tasks[eid] = list()
873 873 self.completed[eid] = list()
874 874 self.hearts[heart] = eid
875 875 content = dict(id=eid, queue=self.engines[eid].queue)
876 876 if self.notifier:
877 877 self.session.send(self.notifier, "registration_notification", content=content)
878 878 self.log.info("engine::Engine Connected: %i"%eid)
879 879
880 880 def _purge_stalled_registration(self, heart):
881 881 if heart in self.incoming_registrations:
882 882 eid = self.incoming_registrations.pop(heart)[0]
883 883 self.log.info("registration::purging stalled registration: %i"%eid)
884 884 else:
885 885 pass
886 886
887 887 #-------------------------------------------------------------------------
888 888 # Client Requests
889 889 #-------------------------------------------------------------------------
890 890
891 891 def shutdown_request(self, client_id, msg):
892 892 """handle shutdown request."""
893 # s = self.context.socket(zmq.XREQ)
894 # s.connect(self.client_connections['mux'])
895 # time.sleep(0.1)
896 # for eid,ec in self.engines.iteritems():
897 # self.session.send(s, 'shutdown_request', content=dict(restart=False), ident=ec.queue)
898 # time.sleep(1)
899 893 self.session.send(self.query, 'shutdown_reply', content={'status': 'ok'}, ident=client_id)
894 # also notify other clients of shutdown
895 self.session.send(self.notifier, 'shutdown_notice', content={'status': 'ok'})
900 896 dc = ioloop.DelayedCallback(lambda : self._shutdown(), 1000, self.loop)
901 897 dc.start()
902 898
903 899 def _shutdown(self):
904 900 self.log.info("hub::hub shutting down.")
905 901 time.sleep(0.1)
906 902 sys.exit(0)
907 903
908 904
909 905 def check_load(self, client_id, msg):
910 906 content = msg['content']
911 907 try:
912 908 targets = content['targets']
913 909 targets = self._validate_targets(targets)
914 910 except:
915 911 content = error.wrap_exception()
916 912 self.session.send(self.query, "hub_error",
917 913 content=content, ident=client_id)
918 914 return
919 915
920 916 content = dict(status='ok')
921 917 # loads = {}
922 918 for t in targets:
923 919 content[bytes(t)] = len(self.queues[t])+len(self.tasks[t])
924 920 self.session.send(self.query, "load_reply", content=content, ident=client_id)
925 921
926 922
927 923 def queue_status(self, client_id, msg):
928 924 """Return the Queue status of one or more targets.
929 925 if verbose: return the msg_ids
930 926 else: return len of each type.
931 927 keys: queue (pending MUX jobs)
932 928 tasks (pending Task jobs)
933 929 completed (finished jobs from both queues)"""
934 930 content = msg['content']
935 931 targets = content['targets']
936 932 try:
937 933 targets = self._validate_targets(targets)
938 934 except:
939 935 content = error.wrap_exception()
940 936 self.session.send(self.query, "hub_error",
941 937 content=content, ident=client_id)
942 938 return
943 939 verbose = content.get('verbose', False)
944 940 content = dict(status='ok')
945 941 for t in targets:
946 942 queue = self.queues[t]
947 943 completed = self.completed[t]
948 944 tasks = self.tasks[t]
949 945 if not verbose:
950 946 queue = len(queue)
951 947 completed = len(completed)
952 948 tasks = len(tasks)
953 949 content[bytes(t)] = {'queue': queue, 'completed': completed , 'tasks': tasks}
954 950 # pending
955 951 self.session.send(self.query, "queue_reply", content=content, ident=client_id)
956 952
957 953 def purge_results(self, client_id, msg):
958 954 """Purge results from memory. This method is more valuable before we move
959 955 to a DB based message storage mechanism."""
960 956 content = msg['content']
961 957 msg_ids = content.get('msg_ids', [])
962 958 reply = dict(status='ok')
963 959 if msg_ids == 'all':
964 960 self.db.drop_matching_records(dict(completed={'$ne':None}))
965 961 else:
966 962 for msg_id in msg_ids:
967 963 if msg_id in self.all_completed:
968 964 self.db.drop_record(msg_id)
969 965 else:
970 966 if msg_id in self.pending:
971 967 try:
972 968 raise IndexError("msg pending: %r"%msg_id)
973 969 except:
974 970 reply = error.wrap_exception()
975 971 else:
976 972 try:
977 973 raise IndexError("No such msg: %r"%msg_id)
978 974 except:
979 975 reply = error.wrap_exception()
980 976 break
981 977 eids = content.get('engine_ids', [])
982 978 for eid in eids:
983 979 if eid not in self.engines:
984 980 try:
985 981 raise IndexError("No such engine: %i"%eid)
986 982 except:
987 983 reply = error.wrap_exception()
988 984 break
989 985 msg_ids = self.completed.pop(eid)
990 986 uid = self.engines[eid].queue
991 987 self.db.drop_matching_records(dict(engine_uuid=uid, completed={'$ne':None}))
992 988
993 989 self.session.send(self.query, 'purge_reply', content=reply, ident=client_id)
994 990
995 991 def resubmit_task(self, client_id, msg, buffers):
996 992 """Resubmit a task."""
997 993 raise NotImplementedError
998 994
999 995 def get_results(self, client_id, msg):
1000 996 """Get the result of 1 or more messages."""
1001 997 content = msg['content']
1002 998 msg_ids = sorted(set(content['msg_ids']))
1003 999 statusonly = content.get('status_only', False)
1004 1000 pending = []
1005 1001 completed = []
1006 1002 content = dict(status='ok')
1007 1003 content['pending'] = pending
1008 1004 content['completed'] = completed
1009 1005 buffers = []
1010 1006 if not statusonly:
1011 1007 content['results'] = {}
1012 1008 records = self.db.find_records(dict(msg_id={'$in':msg_ids}))
1013 1009 for msg_id in msg_ids:
1014 1010 if msg_id in self.pending:
1015 1011 pending.append(msg_id)
1016 1012 elif msg_id in self.all_completed:
1017 1013 completed.append(msg_id)
1018 1014 if not statusonly:
1019 1015 rec = records[msg_id]
1020 1016 io_dict = {}
1021 1017 for key in 'pyin pyout pyerr stdout stderr'.split():
1022 1018 io_dict[key] = rec[key]
1023 1019 content[msg_id] = { 'result_content': rec['result_content'],
1024 1020 'header': rec['header'],
1025 1021 'result_header' : rec['result_header'],
1026 1022 'io' : io_dict,
1027 1023 }
1028 1024 if rec['result_buffers']:
1029 1025 buffers.extend(map(str, rec['result_buffers']))
1030 1026 else:
1031 1027 try:
1032 1028 raise KeyError('No such message: '+msg_id)
1033 1029 except:
1034 1030 content = error.wrap_exception()
1035 1031 break
1036 1032 self.session.send(self.query, "result_reply", content=content,
1037 1033 parent=msg, ident=client_id,
1038 1034 buffers=buffers)
1039 1035
@@ -1,203 +1,200 b''
1 """Remote Functions and decorators for the client."""
1 """Remote Functions and decorators for Views."""
2 2 #-----------------------------------------------------------------------------
3 3 # Copyright (C) 2010 The IPython Development Team
4 4 #
5 5 # Distributed under the terms of the BSD License. The full license is in
6 6 # the file COPYING, distributed as part of this software.
7 7 #-----------------------------------------------------------------------------
8 8
9 9 #-----------------------------------------------------------------------------
10 10 # Imports
11 11 #-----------------------------------------------------------------------------
12 12
13 13 import warnings
14 14
15 15 from IPython.testing import decorators as testdec
16 16
17 17 from . import map as Map
18 18 from .asyncresult import AsyncMapResult
19 19
20 20 #-----------------------------------------------------------------------------
21 21 # Decorators
22 22 #-----------------------------------------------------------------------------
23 23
24 24 @testdec.skip_doctest
25 def remote(client, bound=False, block=None, targets=None, balanced=None):
25 def remote(view, block=None, **flags):
26 26 """Turn a function into a remote function.
27 27
28 28 This method can be used for map:
29 29
30 In [1]: @remote(client,block=True)
30 In [1]: @remote(view,block=True)
31 31 ...: def func(a):
32 32 ...: pass
33 33 """
34 34
35 35 def remote_function(f):
36 return RemoteFunction(client, f, bound, block, targets, balanced)
36 return RemoteFunction(view, f, block=block, **flags)
37 37 return remote_function
38 38
39 39 @testdec.skip_doctest
40 def parallel(client, dist='b', bound=False, block=None, targets='all', balanced=None):
40 def parallel(view, dist='b', block=None, **flags):
41 41 """Turn a function into a parallel remote function.
42 42
43 43 This method can be used for map:
44 44
45 In [1]: @parallel(client,block=True)
45 In [1]: @parallel(view, block=True)
46 46 ...: def func(a):
47 47 ...: pass
48 48 """
49 49
50 50 def parallel_function(f):
51 return ParallelFunction(client, f, dist, bound, block, targets, balanced)
51 return ParallelFunction(view, f, dist=dist, block=block, **flags)
52 52 return parallel_function
53 53
54 54 #--------------------------------------------------------------------------
55 55 # Classes
56 56 #--------------------------------------------------------------------------
57 57
58 58 class RemoteFunction(object):
59 59 """Turn an existing function into a remote function.
60 60
61 61 Parameters
62 62 ----------
63 63
64 client : Client instance
65 The client to be used to connect to engines
64 view : View instance
65 The view to be used for execution
66 66 f : callable
67 67 The function to be wrapped into a remote function
68 bound : bool [default: False]
69 Whether the affect the remote namespace when called
70 68 block : bool [default: None]
71 69 Whether to wait for results or not. The default behavior is
72 to use the current `block` attribute of `client`
73 targets : valid target list [default: all]
74 The targets on which to execute.
75 balanced : bool
76 Whether to load-balance with the Task scheduler or not
70 to use the current `block` attribute of `view`
71
72 **flags : remaining kwargs are passed to View.temp_flags
77 73 """
78 74
79 client = None # the remote connection
75 view = None # the remote connection
80 76 func = None # the wrapped function
81 77 block = None # whether to block
82 bound = None # whether to affect the namespace
83 targets = None # where to execute
84 balanced = None # whether to load-balance
78 flags = None # dict of extra kwargs for temp_flags
85 79
86 def __init__(self, client, f, bound=False, block=None, targets=None, balanced=None):
87 self.client = client
80 def __init__(self, view, f, block=None, **flags):
81 self.view = view
88 82 self.func = f
89 83 self.block=block
90 self.bound=bound
91 self.targets=targets
92 if balanced is None:
93 if targets is None:
94 balanced = True
95 else:
96 balanced = False
97 self.balanced = balanced
84 self.flags=flags
98 85
99 86 def __call__(self, *args, **kwargs):
100 return self.client.apply(self.func, args=args, kwargs=kwargs,
101 block=self.block, targets=self.targets, bound=self.bound, balanced=self.balanced)
87 block = self.view.block if self.block is None else self.block
88 with self.view.temp_flags(block=block, **self.flags):
89 return self.view.apply(self.func, *args, **kwargs)
102 90
103 91
104 92 class ParallelFunction(RemoteFunction):
105 93 """Class for mapping a function to sequences.
106 94
107 95 This will distribute the sequences according the a mapper, and call
108 96 the function on each sub-sequence. If called via map, then the function
109 97 will be called once on each element, rather that each sub-sequence.
110 98
111 99 Parameters
112 100 ----------
113 101
114 client : Client instance
115 The client to be used to connect to engines
102 view : View instance
103 The view to be used for execution
116 104 f : callable
117 105 The function to be wrapped into a remote function
118 bound : bool [default: False]
119 Whether the affect the remote namespace when called
106 dist : str [default: 'b']
107 The key for which mapObject to use to distribute sequences
108 options are:
109 * 'b' : use contiguous chunks in order
110 * 'r' : use round-robin striping
120 111 block : bool [default: None]
121 112 Whether to wait for results or not. The default behavior is
122 to use the current `block` attribute of `client`
123 targets : valid target list [default: all]
124 The targets on which to execute.
125 balanced : bool
126 Whether to load-balance with the Task scheduler or not
127 chunk_size : int or None
113 to use the current `block` attribute of `view`
114 chunksize : int or None
128 115 The size of chunk to use when breaking up sequences in a load-balanced manner
116 **flags : remaining kwargs are passed to View.temp_flags
129 117 """
130 def __init__(self, client, f, dist='b', bound=False, block=None, targets='all', balanced=None, chunk_size=None):
131 super(ParallelFunction, self).__init__(client,f,bound,block,targets,balanced)
132 self.chunk_size = chunk_size
118
119 chunksize=None
120 mapObject=None
121
122 def __init__(self, view, f, dist='b', block=None, chunksize=None, **flags):
123 super(ParallelFunction, self).__init__(view, f, block=block, **flags)
124 self.chunksize = chunksize
133 125
134 126 mapClass = Map.dists[dist]
135 127 self.mapObject = mapClass()
136 128
137 129 def __call__(self, *sequences):
130 # check that the length of sequences match
138 131 len_0 = len(sequences[0])
139 132 for s in sequences:
140 133 if len(s)!=len_0:
141 134 msg = 'all sequences must have equal length, but %i!=%i'%(len_0,len(s))
142 135 raise ValueError(msg)
143
144 if self.balanced:
145 if self.chunk_size:
146 nparts = len_0/self.chunk_size + int(len_0%self.chunk_size > 0)
136 balanced = 'Balanced' in self.view.__class__.__name__
137 if balanced:
138 if self.chunksize:
139 nparts = len_0/self.chunksize + int(len_0%self.chunksize > 0)
147 140 else:
148 141 nparts = len_0
149 targets = [self.targets]*nparts
142 targets = [None]*nparts
150 143 else:
151 if self.chunk_size:
152 warnings.warn("`chunk_size` is ignored when `balanced=False", UserWarning)
144 if self.chunksize:
145 warnings.warn("`chunksize` is ignored unless load balancing", UserWarning)
153 146 # multiplexed:
154 targets = self.client._build_targets(self.targets)[-1]
147 targets = self.view.targets
155 148 nparts = len(targets)
156 149
157 150 msg_ids = []
158 151 # my_f = lambda *a: map(self.func, *a)
152 client = self.view.client
159 153 for index, t in enumerate(targets):
160 154 args = []
161 155 for seq in sequences:
162 156 part = self.mapObject.getPartition(seq, index, nparts)
163 157 if len(part) == 0:
164 158 continue
165 159 else:
166 160 args.append(part)
167 161 if not args:
168 162 continue
169 163
170 164 # print (args)
171 165 if hasattr(self, '_map'):
172 166 f = map
173 167 args = [self.func]+args
174 168 else:
175 169 f=self.func
176 ar = self.client.apply(f, args=args, block=False, bound=self.bound,
177 targets=t, balanced=self.balanced)
170
171 view = self.view if balanced else client[t]
172 with view.temp_flags(block=False, **self.flags):
173 ar = view.apply(f, *args)
178 174
179 175 msg_ids.append(ar.msg_ids[0])
180 176
181 r = AsyncMapResult(self.client, msg_ids, self.mapObject, fname=self.func.__name__)
177 r = AsyncMapResult(self.view.client, msg_ids, self.mapObject, fname=self.func.__name__)
178
182 179 if self.block:
183 180 try:
184 181 return r.get()
185 182 except KeyboardInterrupt:
186 183 return r
187 184 else:
188 185 return r
189 186
190 187 def map(self, *sequences):
191 188 """call a function on each element of a sequence remotely.
192 189 This should behave very much like the builtin map, but return an AsyncMapResult
193 190 if self.block is False.
194 191 """
195 192 # set _map as a flag for use inside self.__call__
196 193 self._map = True
197 194 try:
198 195 ret = self.__call__(*sequences)
199 196 finally:
200 197 del self._map
201 198 return ret
202 199
203 200 __all__ = ['remote', 'parallel', 'RemoteFunction', 'ParallelFunction'] No newline at end of file
@@ -1,590 +1,592 b''
1 1 """The Python scheduler for rich scheduling.
2 2
3 3 The Pure ZMQ scheduler does not allow routing schemes other than LRU,
4 4 nor does it check msg_id DAG dependencies. For those, a slightly slower
5 5 Python Scheduler exists.
6 6 """
7 7 #-----------------------------------------------------------------------------
8 8 # Copyright (C) 2010-2011 The IPython Development Team
9 9 #
10 10 # Distributed under the terms of the BSD License. The full license is in
11 11 # the file COPYING, distributed as part of this software.
12 12 #-----------------------------------------------------------------------------
13 13
14 14 #----------------------------------------------------------------------
15 15 # Imports
16 16 #----------------------------------------------------------------------
17 17
18 18 from __future__ import print_function
19 19
20 20 import logging
21 21 import sys
22 22
23 23 from datetime import datetime, timedelta
24 24 from random import randint, random
25 25 from types import FunctionType
26 26
27 27 try:
28 28 import numpy
29 29 except ImportError:
30 30 numpy = None
31 31
32 32 import zmq
33 33 from zmq.eventloop import ioloop, zmqstream
34 34
35 35 # local imports
36 36 from IPython.external.decorator import decorator
37 37 from IPython.utils.traitlets import Instance, Dict, List, Set
38 38
39 39 from . import error
40 40 from .dependency import Dependency
41 41 from .entry_point import connect_logger, local_logger
42 42 from .factory import SessionFactory
43 43
44 44
45 45 @decorator
46 46 def logged(f,self,*args,**kwargs):
47 47 # print ("#--------------------")
48 48 self.log.debug("scheduler::%s(*%s,**%s)"%(f.func_name, args, kwargs))
49 49 # print ("#--")
50 50 return f(self,*args, **kwargs)
51 51
52 52 #----------------------------------------------------------------------
53 53 # Chooser functions
54 54 #----------------------------------------------------------------------
55 55
56 56 def plainrandom(loads):
57 57 """Plain random pick."""
58 58 n = len(loads)
59 59 return randint(0,n-1)
60 60
61 61 def lru(loads):
62 62 """Always pick the front of the line.
63 63
64 64 The content of `loads` is ignored.
65 65
66 66 Assumes LRU ordering of loads, with oldest first.
67 67 """
68 68 return 0
69 69
70 70 def twobin(loads):
71 71 """Pick two at random, use the LRU of the two.
72 72
73 73 The content of loads is ignored.
74 74
75 75 Assumes LRU ordering of loads, with oldest first.
76 76 """
77 77 n = len(loads)
78 78 a = randint(0,n-1)
79 79 b = randint(0,n-1)
80 80 return min(a,b)
81 81
82 82 def weighted(loads):
83 83 """Pick two at random using inverse load as weight.
84 84
85 85 Return the less loaded of the two.
86 86 """
87 87 # weight 0 a million times more than 1:
88 88 weights = 1./(1e-6+numpy.array(loads))
89 89 sums = weights.cumsum()
90 90 t = sums[-1]
91 91 x = random()*t
92 92 y = random()*t
93 93 idx = 0
94 94 idy = 0
95 95 while sums[idx] < x:
96 96 idx += 1
97 97 while sums[idy] < y:
98 98 idy += 1
99 99 if weights[idy] > weights[idx]:
100 100 return idy
101 101 else:
102 102 return idx
103 103
104 104 def leastload(loads):
105 105 """Always choose the lowest load.
106 106
107 107 If the lowest load occurs more than once, the first
108 108 occurance will be used. If loads has LRU ordering, this means
109 109 the LRU of those with the lowest load is chosen.
110 110 """
111 111 return loads.index(min(loads))
112 112
113 113 #---------------------------------------------------------------------
114 114 # Classes
115 115 #---------------------------------------------------------------------
116 116 # store empty default dependency:
117 117 MET = Dependency([])
118 118
119 119 class TaskScheduler(SessionFactory):
120 120 """Python TaskScheduler object.
121 121
122 122 This is the simplest object that supports msg_id based
123 123 DAG dependencies. *Only* task msg_ids are checked, not
124 124 msg_ids of jobs submitted via the MUX queue.
125 125
126 126 """
127 127
128 128 # input arguments:
129 129 scheme = Instance(FunctionType, default=leastload) # function for determining the destination
130 130 client_stream = Instance(zmqstream.ZMQStream) # client-facing stream
131 131 engine_stream = Instance(zmqstream.ZMQStream) # engine-facing stream
132 132 notifier_stream = Instance(zmqstream.ZMQStream) # hub-facing sub stream
133 133 mon_stream = Instance(zmqstream.ZMQStream) # hub-facing pub stream
134 134
135 135 # internals:
136 136 graph = Dict() # dict by msg_id of [ msg_ids that depend on key ]
137 137 depending = Dict() # dict by msg_id of (msg_id, raw_msg, after, follow)
138 138 pending = Dict() # dict by engine_uuid of submitted tasks
139 139 completed = Dict() # dict by engine_uuid of completed tasks
140 140 failed = Dict() # dict by engine_uuid of failed tasks
141 141 destinations = Dict() # dict by msg_id of engine_uuids where jobs ran (reverse of completed+failed)
142 142 clients = Dict() # dict by msg_id for who submitted the task
143 143 targets = List() # list of target IDENTs
144 144 loads = List() # list of engine loads
145 145 all_completed = Set() # set of all completed tasks
146 146 all_failed = Set() # set of all failed tasks
147 147 all_done = Set() # set of all finished tasks=union(completed,failed)
148 148 all_ids = Set() # set of all submitted task IDs
149 149 blacklist = Dict() # dict by msg_id of locations where a job has encountered UnmetDependency
150 150 auditor = Instance('zmq.eventloop.ioloop.PeriodicCallback')
151 151
152 152
153 153 def start(self):
154 154 self.engine_stream.on_recv(self.dispatch_result, copy=False)
155 155 self._notification_handlers = dict(
156 156 registration_notification = self._register_engine,
157 157 unregistration_notification = self._unregister_engine
158 158 )
159 159 self.notifier_stream.on_recv(self.dispatch_notification)
160 160 self.auditor = ioloop.PeriodicCallback(self.audit_timeouts, 2e3, self.loop) # 1 Hz
161 161 self.auditor.start()
162 162 self.log.info("Scheduler started...%r"%self)
163 163
164 164 def resume_receiving(self):
165 165 """Resume accepting jobs."""
166 166 self.client_stream.on_recv(self.dispatch_submission, copy=False)
167 167
168 168 def stop_receiving(self):
169 169 """Stop accepting jobs while there are no engines.
170 170 Leave them in the ZMQ queue."""
171 171 self.client_stream.on_recv(None)
172 172
173 173 #-----------------------------------------------------------------------
174 174 # [Un]Registration Handling
175 175 #-----------------------------------------------------------------------
176 176
177 177 def dispatch_notification(self, msg):
178 178 """dispatch register/unregister events."""
179 179 idents,msg = self.session.feed_identities(msg)
180 180 msg = self.session.unpack_message(msg)
181 181 msg_type = msg['msg_type']
182 182 handler = self._notification_handlers.get(msg_type, None)
183 183 if handler is None:
184 184 raise Exception("Unhandled message type: %s"%msg_type)
185 185 else:
186 186 try:
187 187 handler(str(msg['content']['queue']))
188 188 except KeyError:
189 189 self.log.error("task::Invalid notification msg: %s"%msg)
190 190
191 191 @logged
192 192 def _register_engine(self, uid):
193 193 """New engine with ident `uid` became available."""
194 194 # head of the line:
195 195 self.targets.insert(0,uid)
196 196 self.loads.insert(0,0)
197 197 # initialize sets
198 198 self.completed[uid] = set()
199 199 self.failed[uid] = set()
200 200 self.pending[uid] = {}
201 201 if len(self.targets) == 1:
202 202 self.resume_receiving()
203 203
204 204 def _unregister_engine(self, uid):
205 205 """Existing engine with ident `uid` became unavailable."""
206 206 if len(self.targets) == 1:
207 207 # this was our only engine
208 208 self.stop_receiving()
209 209
210 210 # handle any potentially finished tasks:
211 211 self.engine_stream.flush()
212 212
213 213 self.completed.pop(uid)
214 214 self.failed.pop(uid)
215 215 # don't pop destinations, because it might be used later
216 216 # map(self.destinations.pop, self.completed.pop(uid))
217 217 # map(self.destinations.pop, self.failed.pop(uid))
218 218
219 219 idx = self.targets.index(uid)
220 220 self.targets.pop(idx)
221 221 self.loads.pop(idx)
222 222
223 223 # wait 5 seconds before cleaning up pending jobs, since the results might
224 224 # still be incoming
225 225 if self.pending[uid]:
226 226 dc = ioloop.DelayedCallback(lambda : self.handle_stranded_tasks(uid), 5000, self.loop)
227 227 dc.start()
228 228
229 229 @logged
230 230 def handle_stranded_tasks(self, engine):
231 231 """Deal with jobs resident in an engine that died."""
232 232 lost = self.pending.pop(engine)
233 233
234 234 for msg_id, (raw_msg, targets, MET, follow, timeout) in lost.iteritems():
235 235 self.all_failed.add(msg_id)
236 236 self.all_done.add(msg_id)
237 237 idents,msg = self.session.feed_identities(raw_msg, copy=False)
238 238 msg = self.session.unpack_message(msg, copy=False, content=False)
239 239 parent = msg['header']
240 240 idents = [idents[0],engine]+idents[1:]
241 print (idents)
241 # print (idents)
242 242 try:
243 243 raise error.EngineError("Engine %r died while running task %r"%(engine, msg_id))
244 244 except:
245 245 content = error.wrap_exception()
246 246 msg = self.session.send(self.client_stream, 'apply_reply', content,
247 247 parent=parent, ident=idents)
248 248 self.session.send(self.mon_stream, msg, ident=['outtask']+idents)
249 249 self.update_graph(msg_id)
250 250
251 251
252 252 #-----------------------------------------------------------------------
253 253 # Job Submission
254 254 #-----------------------------------------------------------------------
255 255 @logged
256 256 def dispatch_submission(self, raw_msg):
257 257 """Dispatch job submission to appropriate handlers."""
258 258 # ensure targets up to date:
259 259 self.notifier_stream.flush()
260 260 try:
261 261 idents, msg = self.session.feed_identities(raw_msg, copy=False)
262 262 msg = self.session.unpack_message(msg, content=False, copy=False)
263 263 except:
264 264 self.log.error("task::Invaid task: %s"%raw_msg, exc_info=True)
265 265 return
266 266
267 267 # send to monitor
268 268 self.mon_stream.send_multipart(['intask']+raw_msg, copy=False)
269 269
270 270 header = msg['header']
271 271 msg_id = header['msg_id']
272 272 self.all_ids.add(msg_id)
273 273
274 274 # targets
275 275 targets = set(header.get('targets', []))
276 276
277 277 # time dependencies
278 278 after = Dependency(header.get('after', []))
279 279 if after.all:
280 if after.success:
280 281 after.difference_update(self.all_completed)
281 if not after.success_only:
282 if after.failure:
282 283 after.difference_update(self.all_failed)
283 284 if after.check(self.all_completed, self.all_failed):
284 285 # recast as empty set, if `after` already met,
285 286 # to prevent unnecessary set comparisons
286 287 after = MET
287 288
288 289 # location dependencies
289 290 follow = Dependency(header.get('follow', []))
290 291
291 292 # turn timeouts into datetime objects:
292 293 timeout = header.get('timeout', None)
293 294 if timeout:
294 295 timeout = datetime.now() + timedelta(0,timeout,0)
295 296
296 297 args = [raw_msg, targets, after, follow, timeout]
297 298
298 299 # validate and reduce dependencies:
299 300 for dep in after,follow:
300 301 # check valid:
301 302 if msg_id in dep or dep.difference(self.all_ids):
302 303 self.depending[msg_id] = args
303 304 return self.fail_unreachable(msg_id, error.InvalidDependency)
304 305 # check if unreachable:
305 if dep.unreachable(self.all_failed):
306 if dep.unreachable(self.all_completed, self.all_failed):
306 307 self.depending[msg_id] = args
307 308 return self.fail_unreachable(msg_id)
308 309
309 310 if after.check(self.all_completed, self.all_failed):
310 311 # time deps already met, try to run
311 312 if not self.maybe_run(msg_id, *args):
312 313 # can't run yet
313 314 self.save_unmet(msg_id, *args)
314 315 else:
315 316 self.save_unmet(msg_id, *args)
316 317
317 318 # @logged
318 319 def audit_timeouts(self):
319 320 """Audit all waiting tasks for expired timeouts."""
320 321 now = datetime.now()
321 322 for msg_id in self.depending.keys():
322 323 # must recheck, in case one failure cascaded to another:
323 324 if msg_id in self.depending:
324 325 raw,after,targets,follow,timeout = self.depending[msg_id]
325 326 if timeout and timeout < now:
326 327 self.fail_unreachable(msg_id, timeout=True)
327 328
328 329 @logged
329 330 def fail_unreachable(self, msg_id, why=error.ImpossibleDependency):
330 331 """a task has become unreachable, send a reply with an ImpossibleDependency
331 332 error."""
332 333 if msg_id not in self.depending:
333 334 self.log.error("msg %r already failed!"%msg_id)
334 335 return
335 336 raw_msg,targets,after,follow,timeout = self.depending.pop(msg_id)
336 337 for mid in follow.union(after):
337 338 if mid in self.graph:
338 339 self.graph[mid].remove(msg_id)
339 340
340 341 # FIXME: unpacking a message I've already unpacked, but didn't save:
341 342 idents,msg = self.session.feed_identities(raw_msg, copy=False)
342 343 msg = self.session.unpack_message(msg, copy=False, content=False)
343 344 header = msg['header']
344 345
345 346 try:
346 347 raise why()
347 348 except:
348 349 content = error.wrap_exception()
349 350
350 351 self.all_done.add(msg_id)
351 352 self.all_failed.add(msg_id)
352 353
353 354 msg = self.session.send(self.client_stream, 'apply_reply', content,
354 355 parent=header, ident=idents)
355 356 self.session.send(self.mon_stream, msg, ident=['outtask']+idents)
356 357
357 358 self.update_graph(msg_id, success=False)
358 359
359 360 @logged
360 361 def maybe_run(self, msg_id, raw_msg, targets, after, follow, timeout):
361 362 """check location dependencies, and run if they are met."""
362 363 blacklist = self.blacklist.setdefault(msg_id, set())
363 364 if follow or targets or blacklist:
364 365 # we need a can_run filter
365 366 def can_run(idx):
366 367 target = self.targets[idx]
367 368 # check targets
368 369 if targets and target not in targets:
369 370 return False
370 371 # check blacklist
371 372 if target in blacklist:
372 373 return False
373 374 # check follow
374 375 return follow.check(self.completed[target], self.failed[target])
375 376
376 377 indices = filter(can_run, range(len(self.targets)))
377 378 if not indices:
378 379 # couldn't run
379 380 if follow.all:
380 381 # check follow for impossibility
381 382 dests = set()
382 relevant = self.all_completed if follow.success_only else self.all_done
383 relevant = set()
384 if follow.success:
385 relevant = self.all_completed
386 if follow.failure:
387 relevant = relevant.union(self.all_failed)
383 388 for m in follow.intersection(relevant):
384 389 dests.add(self.destinations[m])
385 390 if len(dests) > 1:
386 391 self.fail_unreachable(msg_id)
387 392 return False
388 393 if targets:
389 394 # check blacklist+targets for impossibility
390 395 targets.difference_update(blacklist)
391 396 if not targets or not targets.intersection(self.targets):
392 397 self.fail_unreachable(msg_id)
393 398 return False
394 399 return False
395 400 else:
396 401 indices = None
397 402
398 403 self.submit_task(msg_id, raw_msg, targets, follow, timeout, indices)
399 404 return True
400 405
401 406 @logged
402 407 def save_unmet(self, msg_id, raw_msg, targets, after, follow, timeout):
403 408 """Save a message for later submission when its dependencies are met."""
404 409 self.depending[msg_id] = [raw_msg,targets,after,follow,timeout]
405 410 # track the ids in follow or after, but not those already finished
406 411 for dep_id in after.union(follow).difference(self.all_done):
407 412 if dep_id not in self.graph:
408 413 self.graph[dep_id] = set()
409 414 self.graph[dep_id].add(msg_id)
410 415
411 416 @logged
412 417 def submit_task(self, msg_id, raw_msg, targets, follow, timeout, indices=None):
413 418 """Submit a task to any of a subset of our targets."""
414 419 if indices:
415 420 loads = [self.loads[i] for i in indices]
416 421 else:
417 422 loads = self.loads
418 423 idx = self.scheme(loads)
419 424 if indices:
420 425 idx = indices[idx]
421 426 target = self.targets[idx]
422 427 # print (target, map(str, msg[:3]))
423 428 self.engine_stream.send(target, flags=zmq.SNDMORE, copy=False)
424 429 self.engine_stream.send_multipart(raw_msg, copy=False)
425 430 self.add_job(idx)
426 431 self.pending[target][msg_id] = (raw_msg, targets, MET, follow, timeout)
427 432 content = dict(msg_id=msg_id, engine_id=target)
428 433 self.session.send(self.mon_stream, 'task_destination', content=content,
429 434 ident=['tracktask',self.session.session])
430 435
431 436 #-----------------------------------------------------------------------
432 437 # Result Handling
433 438 #-----------------------------------------------------------------------
434 439 @logged
435 440 def dispatch_result(self, raw_msg):
436 441 """dispatch method for result replies"""
437 442 try:
438 443 idents,msg = self.session.feed_identities(raw_msg, copy=False)
439 444 msg = self.session.unpack_message(msg, content=False, copy=False)
440 445 except:
441 446 self.log.error("task::Invaid result: %s"%raw_msg, exc_info=True)
442 447 return
443 448
444 449 header = msg['header']
445 450 if header.get('dependencies_met', True):
446 451 success = (header['status'] == 'ok')
447 452 self.handle_result(idents, msg['parent_header'], raw_msg, success)
448 453 # send to Hub monitor
449 454 self.mon_stream.send_multipart(['outtask']+raw_msg, copy=False)
450 455 else:
451 456 self.handle_unmet_dependency(idents, msg['parent_header'])
452 457
453 458 @logged
454 459 def handle_result(self, idents, parent, raw_msg, success=True):
455 460 """handle a real task result, either success or failure"""
456 461 # first, relay result to client
457 462 engine = idents[0]
458 463 client = idents[1]
459 464 # swap_ids for XREP-XREP mirror
460 465 raw_msg[:2] = [client,engine]
461 466 # print (map(str, raw_msg[:4]))
462 467 self.client_stream.send_multipart(raw_msg, copy=False)
463 468 # now, update our data structures
464 469 msg_id = parent['msg_id']
465 470 self.blacklist.pop(msg_id, None)
466 471 self.pending[engine].pop(msg_id)
467 472 if success:
468 473 self.completed[engine].add(msg_id)
469 474 self.all_completed.add(msg_id)
470 475 else:
471 476 self.failed[engine].add(msg_id)
472 477 self.all_failed.add(msg_id)
473 478 self.all_done.add(msg_id)
474 479 self.destinations[msg_id] = engine
475 480
476 481 self.update_graph(msg_id, success)
477 482
478 483 @logged
479 484 def handle_unmet_dependency(self, idents, parent):
480 485 """handle an unmet dependency"""
481 486 engine = idents[0]
482 487 msg_id = parent['msg_id']
483 488
484 489 if msg_id not in self.blacklist:
485 490 self.blacklist[msg_id] = set()
486 491 self.blacklist[msg_id].add(engine)
487 492
488 493 args = self.pending[engine].pop(msg_id)
489 494 raw,targets,after,follow,timeout = args
490 495
491 496 if self.blacklist[msg_id] == targets:
492 497 self.depending[msg_id] = args
493 498 return self.fail_unreachable(msg_id)
494 499
495 500 elif not self.maybe_run(msg_id, *args):
496 501 # resubmit failed, put it back in our dependency tree
497 502 self.save_unmet(msg_id, *args)
498 503
499 504
500 505 @logged
501 506 def update_graph(self, dep_id, success=True):
502 507 """dep_id just finished. Update our dependency
503 508 graph and submit any jobs that just became runable."""
504 509 # print ("\n\n***********")
505 510 # pprint (dep_id)
506 511 # pprint (self.graph)
507 512 # pprint (self.depending)
508 513 # pprint (self.all_completed)
509 514 # pprint (self.all_failed)
510 515 # print ("\n\n***********\n\n")
511 516 if dep_id not in self.graph:
512 517 return
513 518 jobs = self.graph.pop(dep_id)
514 519
515 520 for msg_id in jobs:
516 521 raw_msg, targets, after, follow, timeout = self.depending[msg_id]
517 # if dep_id in after:
518 # if after.all and (success or not after.success_only):
519 # after.remove(dep_id)
520 522
521 if after.unreachable(self.all_failed) or follow.unreachable(self.all_failed):
523 if after.unreachable(self.all_completed, self.all_failed) or follow.unreachable(self.all_completed, self.all_failed):
522 524 self.fail_unreachable(msg_id)
523 525
524 526 elif after.check(self.all_completed, self.all_failed): # time deps met, maybe run
525 527 if self.maybe_run(msg_id, raw_msg, targets, MET, follow, timeout):
526 528
527 529 self.depending.pop(msg_id)
528 530 for mid in follow.union(after):
529 531 if mid in self.graph:
530 532 self.graph[mid].remove(msg_id)
531 533
532 534 #----------------------------------------------------------------------
533 535 # methods to be overridden by subclasses
534 536 #----------------------------------------------------------------------
535 537
536 538 def add_job(self, idx):
537 539 """Called after self.targets[idx] just got the job with header.
538 540 Override with subclasses. The default ordering is simple LRU.
539 541 The default loads are the number of outstanding jobs."""
540 542 self.loads[idx] += 1
541 543 for lis in (self.targets, self.loads):
542 544 lis.append(lis.pop(idx))
543 545
544 546
545 547 def finish_job(self, idx):
546 548 """Called after self.targets[idx] just finished a job.
547 549 Override with subclasses."""
548 550 self.loads[idx] -= 1
549 551
550 552
551 553
552 554 def launch_scheduler(in_addr, out_addr, mon_addr, not_addr, config=None,logname='ZMQ',
553 555 log_addr=None, loglevel=logging.DEBUG, scheme='lru',
554 556 identity=b'task'):
555 557 from zmq.eventloop import ioloop
556 558 from zmq.eventloop.zmqstream import ZMQStream
557 559
558 560 ctx = zmq.Context()
559 561 loop = ioloop.IOLoop()
560 562 print (in_addr, out_addr, mon_addr, not_addr)
561 563 ins = ZMQStream(ctx.socket(zmq.XREP),loop)
562 564 ins.setsockopt(zmq.IDENTITY, identity)
563 565 ins.bind(in_addr)
564 566
565 567 outs = ZMQStream(ctx.socket(zmq.XREP),loop)
566 568 outs.setsockopt(zmq.IDENTITY, identity)
567 569 outs.bind(out_addr)
568 570 mons = ZMQStream(ctx.socket(zmq.PUB),loop)
569 571 mons.connect(mon_addr)
570 572 nots = ZMQStream(ctx.socket(zmq.SUB),loop)
571 573 nots.setsockopt(zmq.SUBSCRIBE, '')
572 574 nots.connect(not_addr)
573 575
574 576 scheme = globals().get(scheme, None)
575 577 # setup logging
576 578 if log_addr:
577 579 connect_logger(logname, ctx, log_addr, root="scheduler", loglevel=loglevel)
578 580 else:
579 581 local_logger(logname, loglevel)
580 582
581 583 scheduler = TaskScheduler(client_stream=ins, engine_stream=outs,
582 584 mon_stream=mons, notifier_stream=nots,
583 585 scheme=scheme, loop=loop, logname=logname,
584 586 config=config)
585 587 scheduler.start()
586 588 try:
587 589 loop.start()
588 590 except KeyboardInterrupt:
589 591 print ("interrupted, exiting...", file=sys.__stderr__)
590 592
@@ -1,493 +1,489 b''
1 1 #!/usr/bin/env python
2 2 """
3 3 Kernel adapted from kernel.py to use ZMQ Streams
4 4 """
5 5 #-----------------------------------------------------------------------------
6 6 # Copyright (C) 2010-2011 The IPython Development Team
7 7 #
8 8 # Distributed under the terms of the BSD License. The full license is in
9 9 # the file COPYING, distributed as part of this software.
10 10 #-----------------------------------------------------------------------------
11 11
12 12 #-----------------------------------------------------------------------------
13 13 # Imports
14 14 #-----------------------------------------------------------------------------
15 15
16 16 # Standard library imports.
17 17 from __future__ import print_function
18 18
19 19 import sys
20 20 import time
21 21
22 22 from code import CommandCompiler
23 23 from datetime import datetime
24 24 from pprint import pprint
25 25 from signal import SIGTERM, SIGKILL
26 26
27 27 # System library imports.
28 28 import zmq
29 29 from zmq.eventloop import ioloop, zmqstream
30 30
31 31 # Local imports.
32 32 from IPython.core import ultratb
33 33 from IPython.utils.traitlets import Instance, List, Int, Dict, Set, Str
34 34 from IPython.zmq.completer import KernelCompleter
35 35 from IPython.zmq.iostream import OutStream
36 36 from IPython.zmq.displayhook import DisplayHook
37 37
38 38 from . import heartmonitor
39 39 from .client import Client
40 40 from .error import wrap_exception
41 41 from .factory import SessionFactory
42 42 from .streamsession import StreamSession
43 43 from .util import serialize_object, unpack_apply_message, ISO8601, Namespace
44 44
45 45 def printer(*args):
46 46 pprint(args, stream=sys.__stdout__)
47 47
48 48
49 49 class _Passer:
50 50 """Empty class that implements `send()` that does nothing."""
51 51 def send(self, *args, **kwargs):
52 52 pass
53 53 send_multipart = send
54 54
55 55
56 56 #-----------------------------------------------------------------------------
57 57 # Main kernel class
58 58 #-----------------------------------------------------------------------------
59 59
60 60 class Kernel(SessionFactory):
61 61
62 62 #---------------------------------------------------------------------------
63 63 # Kernel interface
64 64 #---------------------------------------------------------------------------
65 65
66 66 # kwargs:
67 67 int_id = Int(-1, config=True)
68 68 user_ns = Dict(config=True)
69 69 exec_lines = List(config=True)
70 70
71 71 control_stream = Instance(zmqstream.ZMQStream)
72 72 task_stream = Instance(zmqstream.ZMQStream)
73 73 iopub_stream = Instance(zmqstream.ZMQStream)
74 74 client = Instance('IPython.zmq.parallel.client.Client')
75 75
76 76 # internals
77 77 shell_streams = List()
78 78 compiler = Instance(CommandCompiler, (), {})
79 79 completer = Instance(KernelCompleter)
80 80
81 81 aborted = Set()
82 82 shell_handlers = Dict()
83 83 control_handlers = Dict()
84 84
85 85 def _set_prefix(self):
86 86 self.prefix = "engine.%s"%self.int_id
87 87
88 88 def _connect_completer(self):
89 89 self.completer = KernelCompleter(self.user_ns)
90 90
91 91 def __init__(self, **kwargs):
92 92 super(Kernel, self).__init__(**kwargs)
93 93 self._set_prefix()
94 94 self._connect_completer()
95 95
96 96 self.on_trait_change(self._set_prefix, 'id')
97 97 self.on_trait_change(self._connect_completer, 'user_ns')
98 98
99 99 # Build dict of handlers for message types
100 100 for msg_type in ['execute_request', 'complete_request', 'apply_request',
101 101 'clear_request']:
102 102 self.shell_handlers[msg_type] = getattr(self, msg_type)
103 103
104 104 for msg_type in ['shutdown_request', 'abort_request']+self.shell_handlers.keys():
105 105 self.control_handlers[msg_type] = getattr(self, msg_type)
106 106
107 107 self._initial_exec_lines()
108 108
109 109 def _wrap_exception(self, method=None):
110 110 e_info = dict(engine_uuid=self.ident, engine_id=self.int_id, method=method)
111 111 content=wrap_exception(e_info)
112 112 return content
113 113
114 114 def _initial_exec_lines(self):
115 115 s = _Passer()
116 116 content = dict(silent=True, user_variable=[],user_expressions=[])
117 117 for line in self.exec_lines:
118 118 self.log.debug("executing initialization: %s"%line)
119 119 content.update({'code':line})
120 120 msg = self.session.msg('execute_request', content)
121 121 self.execute_request(s, [], msg)
122 122
123 123
124 124 #-------------------- control handlers -----------------------------
125 125 def abort_queues(self):
126 126 for stream in self.shell_streams:
127 127 if stream:
128 128 self.abort_queue(stream)
129 129
130 130 def abort_queue(self, stream):
131 131 while True:
132 132 try:
133 133 msg = self.session.recv(stream, zmq.NOBLOCK,content=True)
134 134 except zmq.ZMQError as e:
135 135 if e.errno == zmq.EAGAIN:
136 136 break
137 137 else:
138 138 return
139 139 else:
140 140 if msg is None:
141 141 return
142 142 else:
143 143 idents,msg = msg
144 144
145 145 # assert self.reply_socketly_socket.rcvmore(), "Unexpected missing message part."
146 146 # msg = self.reply_socket.recv_json()
147 147 self.log.info("Aborting:")
148 148 self.log.info(str(msg))
149 149 msg_type = msg['msg_type']
150 150 reply_type = msg_type.split('_')[0] + '_reply'
151 151 # reply_msg = self.session.msg(reply_type, {'status' : 'aborted'}, msg)
152 152 # self.reply_socket.send(ident,zmq.SNDMORE)
153 153 # self.reply_socket.send_json(reply_msg)
154 154 reply_msg = self.session.send(stream, reply_type,
155 155 content={'status' : 'aborted'}, parent=msg, ident=idents)[0]
156 156 self.log.debug(str(reply_msg))
157 157 # We need to wait a bit for requests to come in. This can probably
158 158 # be set shorter for true asynchronous clients.
159 159 time.sleep(0.05)
160 160
161 161 def abort_request(self, stream, ident, parent):
162 162 """abort a specifig msg by id"""
163 163 msg_ids = parent['content'].get('msg_ids', None)
164 164 if isinstance(msg_ids, basestring):
165 165 msg_ids = [msg_ids]
166 166 if not msg_ids:
167 167 self.abort_queues()
168 168 for mid in msg_ids:
169 169 self.aborted.add(str(mid))
170 170
171 171 content = dict(status='ok')
172 172 reply_msg = self.session.send(stream, 'abort_reply', content=content,
173 parent=parent, ident=ident)[0]
173 parent=parent, ident=ident)
174 174 self.log.debug(str(reply_msg))
175 175
176 176 def shutdown_request(self, stream, ident, parent):
177 177 """kill ourself. This should really be handled in an external process"""
178 178 try:
179 179 self.abort_queues()
180 180 except:
181 181 content = self._wrap_exception('shutdown')
182 182 else:
183 183 content = dict(parent['content'])
184 184 content['status'] = 'ok'
185 185 msg = self.session.send(stream, 'shutdown_reply',
186 186 content=content, parent=parent, ident=ident)
187 # msg = self.session.send(self.pub_socket, 'shutdown_reply',
188 # content, parent, ident)
189 # print >> sys.__stdout__, msg
190 # time.sleep(0.2)
187 self.log.debug(str(msg))
191 188 dc = ioloop.DelayedCallback(lambda : sys.exit(0), 1000, self.loop)
192 189 dc.start()
193 190
194 191 def dispatch_control(self, msg):
195 192 idents,msg = self.session.feed_identities(msg, copy=False)
196 193 try:
197 194 msg = self.session.unpack_message(msg, content=True, copy=False)
198 195 except:
199 196 self.log.error("Invalid Message", exc_info=True)
200 197 return
201 198
202 199 header = msg['header']
203 200 msg_id = header['msg_id']
204 201
205 202 handler = self.control_handlers.get(msg['msg_type'], None)
206 203 if handler is None:
207 204 self.log.error("UNKNOWN CONTROL MESSAGE TYPE: %r"%msg['msg_type'])
208 205 else:
209 206 handler(self.control_stream, idents, msg)
210 207
211 208
212 209 #-------------------- queue helpers ------------------------------
213 210
214 211 def check_dependencies(self, dependencies):
215 212 if not dependencies:
216 213 return True
217 214 if len(dependencies) == 2 and dependencies[0] in 'any all'.split():
218 215 anyorall = dependencies[0]
219 216 dependencies = dependencies[1]
220 217 else:
221 218 anyorall = 'all'
222 219 results = self.client.get_results(dependencies,status_only=True)
223 220 if results['status'] != 'ok':
224 221 return False
225 222
226 223 if anyorall == 'any':
227 224 if not results['completed']:
228 225 return False
229 226 else:
230 227 if results['pending']:
231 228 return False
232 229
233 230 return True
234 231
235 232 def check_aborted(self, msg_id):
236 233 return msg_id in self.aborted
237 234
238 235 #-------------------- queue handlers -----------------------------
239 236
240 237 def clear_request(self, stream, idents, parent):
241 238 """Clear our namespace."""
242 239 self.user_ns = {}
243 240 msg = self.session.send(stream, 'clear_reply', ident=idents, parent=parent,
244 241 content = dict(status='ok'))
245 242 self._initial_exec_lines()
246 243
247 244 def execute_request(self, stream, ident, parent):
248 245 self.log.debug('execute request %s'%parent)
249 246 try:
250 247 code = parent[u'content'][u'code']
251 248 except:
252 249 self.log.error("Got bad msg: %s"%parent, exc_info=True)
253 250 return
254 251 self.session.send(self.iopub_stream, u'pyin', {u'code':code},parent=parent,
255 252 ident='%s.pyin'%self.prefix)
256 253 started = datetime.now().strftime(ISO8601)
257 254 try:
258 255 comp_code = self.compiler(code, '<zmq-kernel>')
259 256 # allow for not overriding displayhook
260 257 if hasattr(sys.displayhook, 'set_parent'):
261 258 sys.displayhook.set_parent(parent)
262 259 sys.stdout.set_parent(parent)
263 260 sys.stderr.set_parent(parent)
264 261 exec comp_code in self.user_ns, self.user_ns
265 262 except:
266 263 exc_content = self._wrap_exception('execute')
267 264 # exc_msg = self.session.msg(u'pyerr', exc_content, parent)
268 265 self.session.send(self.iopub_stream, u'pyerr', exc_content, parent=parent,
269 266 ident='%s.pyerr'%self.prefix)
270 267 reply_content = exc_content
271 268 else:
272 269 reply_content = {'status' : 'ok'}
273 270
274 271 reply_msg = self.session.send(stream, u'execute_reply', reply_content, parent=parent,
275 272 ident=ident, subheader = dict(started=started))
276 273 self.log.debug(str(reply_msg))
277 274 if reply_msg['content']['status'] == u'error':
278 275 self.abort_queues()
279 276
280 277 def complete_request(self, stream, ident, parent):
281 278 matches = {'matches' : self.complete(parent),
282 279 'status' : 'ok'}
283 280 completion_msg = self.session.send(stream, 'complete_reply',
284 281 matches, parent, ident)
285 282 # print >> sys.__stdout__, completion_msg
286 283
287 284 def complete(self, msg):
288 285 return self.completer.complete(msg.content.line, msg.content.text)
289 286
290 287 def apply_request(self, stream, ident, parent):
291 288 # flush previous reply, so this request won't block it
292 289 stream.flush(zmq.POLLOUT)
293 290
294 291 try:
295 292 content = parent[u'content']
296 293 bufs = parent[u'buffers']
297 294 msg_id = parent['header']['msg_id']
298 bound = content.get('bound', False)
295 # bound = parent['header'].get('bound', False)
299 296 except:
300 297 self.log.error("Got bad msg: %s"%parent, exc_info=True)
301 298 return
302 299 # pyin_msg = self.session.msg(u'pyin',{u'code':code}, parent=parent)
303 300 # self.iopub_stream.send(pyin_msg)
304 301 # self.session.send(self.iopub_stream, u'pyin', {u'code':code},parent=parent)
305 302 sub = {'dependencies_met' : True, 'engine' : self.ident,
306 303 'started': datetime.now().strftime(ISO8601)}
307 304 try:
308 305 # allow for not overriding displayhook
309 306 if hasattr(sys.displayhook, 'set_parent'):
310 307 sys.displayhook.set_parent(parent)
311 308 sys.stdout.set_parent(parent)
312 309 sys.stderr.set_parent(parent)
313 310 # exec "f(*args,**kwargs)" in self.user_ns, self.user_ns
314 311 working = self.user_ns
315 312 # suffix =
316 313 prefix = "_"+str(msg_id).replace("-","")+"_"
317 # if bound:
318 #
319 # else:
320 # working = dict()
321 # suffix = prefix = "_" # prevent keyword collisions with lambda
314
322 315 f,args,kwargs = unpack_apply_message(bufs, working, copy=False)
323 if bound:
324 bound_ns = Namespace(working)
325 args = [bound_ns]+list(args)
326 # if f.fun
316 # if bound:
317 # bound_ns = Namespace(working)
318 # args = [bound_ns]+list(args)
319
327 320 fname = getattr(f, '__name__', 'f')
328 321
329 322 fname = prefix+"f"
330 323 argname = prefix+"args"
331 324 kwargname = prefix+"kwargs"
332 325 resultname = prefix+"result"
333 326
334 327 ns = { fname : f, argname : args, kwargname : kwargs , resultname : None }
335 328 # print ns
336 329 working.update(ns)
337 330 code = "%s=%s(*%s,**%s)"%(resultname, fname, argname, kwargname)
338 331 try:
339 332 exec code in working,working
340 333 result = working.get(resultname)
341 334 finally:
342 335 for key in ns.iterkeys():
343 336 working.pop(key)
344 if bound:
345 working.update(bound_ns)
337 # if bound:
338 # working.update(bound_ns)
346 339
347 340 packed_result,buf = serialize_object(result)
348 341 result_buf = [packed_result]+buf
349 342 except:
350 343 exc_content = self._wrap_exception('apply')
351 344 # exc_msg = self.session.msg(u'pyerr', exc_content, parent)
352 345 self.session.send(self.iopub_stream, u'pyerr', exc_content, parent=parent,
353 346 ident='%s.pyerr'%self.prefix)
354 347 reply_content = exc_content
355 348 result_buf = []
356 349
357 350 if exc_content['ename'] == 'UnmetDependency':
358 351 sub['dependencies_met'] = False
359 352 else:
360 353 reply_content = {'status' : 'ok'}
361 354
362 355 # put 'ok'/'error' status in header, for scheduler introspection:
363 356 sub['status'] = reply_content['status']
364 357
365 358 reply_msg = self.session.send(stream, u'apply_reply', reply_content,
366 359 parent=parent, ident=ident,buffers=result_buf, subheader=sub)
367 360
368 # if reply_msg['content']['status'] == u'error':
369 # self.abort_queues()
361 # flush i/o
362 # should this be before reply_msg is sent, like in the single-kernel code,
363 # or should nothing get in the way of real results?
364 sys.stdout.flush()
365 sys.stderr.flush()
370 366
371 367 def dispatch_queue(self, stream, msg):
372 368 self.control_stream.flush()
373 369 idents,msg = self.session.feed_identities(msg, copy=False)
374 370 try:
375 371 msg = self.session.unpack_message(msg, content=True, copy=False)
376 372 except:
377 373 self.log.error("Invalid Message", exc_info=True)
378 374 return
379 375
380 376
381 377 header = msg['header']
382 378 msg_id = header['msg_id']
383 379 if self.check_aborted(msg_id):
384 380 self.aborted.remove(msg_id)
385 381 # is it safe to assume a msg_id will not be resubmitted?
386 382 reply_type = msg['msg_type'].split('_')[0] + '_reply'
387 383 reply_msg = self.session.send(stream, reply_type,
388 384 content={'status' : 'aborted'}, parent=msg, ident=idents)
389 385 return
390 386 handler = self.shell_handlers.get(msg['msg_type'], None)
391 387 if handler is None:
392 388 self.log.error("UNKNOWN MESSAGE TYPE: %r"%msg['msg_type'])
393 389 else:
394 390 handler(stream, idents, msg)
395 391
396 392 def start(self):
397 393 #### stream mode:
398 394 if self.control_stream:
399 395 self.control_stream.on_recv(self.dispatch_control, copy=False)
400 396 self.control_stream.on_err(printer)
401 397
402 398 def make_dispatcher(stream):
403 399 def dispatcher(msg):
404 400 return self.dispatch_queue(stream, msg)
405 401 return dispatcher
406 402
407 403 for s in self.shell_streams:
408 404 s.on_recv(make_dispatcher(s), copy=False)
409 405 s.on_err(printer)
410 406
411 407 if self.iopub_stream:
412 408 self.iopub_stream.on_err(printer)
413 409
414 410 #### while True mode:
415 411 # while True:
416 412 # idle = True
417 413 # try:
418 414 # msg = self.shell_stream.socket.recv_multipart(
419 415 # zmq.NOBLOCK, copy=False)
420 416 # except zmq.ZMQError, e:
421 417 # if e.errno != zmq.EAGAIN:
422 418 # raise e
423 419 # else:
424 420 # idle=False
425 421 # self.dispatch_queue(self.shell_stream, msg)
426 422 #
427 423 # if not self.task_stream.empty():
428 424 # idle=False
429 425 # msg = self.task_stream.recv_multipart()
430 426 # self.dispatch_queue(self.task_stream, msg)
431 427 # if idle:
432 428 # # don't busywait
433 429 # time.sleep(1e-3)
434 430
435 431 def make_kernel(int_id, identity, control_addr, shell_addrs, iopub_addr, hb_addrs,
436 432 client_addr=None, loop=None, context=None, key=None,
437 433 out_stream_factory=OutStream, display_hook_factory=DisplayHook):
438 434 """NO LONGER IN USE"""
439 435 # create loop, context, and session:
440 436 if loop is None:
441 437 loop = ioloop.IOLoop.instance()
442 438 if context is None:
443 439 context = zmq.Context()
444 440 c = context
445 441 session = StreamSession(key=key)
446 442 # print (session.key)
447 443 # print (control_addr, shell_addrs, iopub_addr, hb_addrs)
448 444
449 445 # create Control Stream
450 446 control_stream = zmqstream.ZMQStream(c.socket(zmq.PAIR), loop)
451 447 control_stream.setsockopt(zmq.IDENTITY, identity)
452 448 control_stream.connect(control_addr)
453 449
454 450 # create Shell Streams (MUX, Task, etc.):
455 451 shell_streams = []
456 452 for addr in shell_addrs:
457 453 stream = zmqstream.ZMQStream(c.socket(zmq.PAIR), loop)
458 454 stream.setsockopt(zmq.IDENTITY, identity)
459 455 stream.connect(addr)
460 456 shell_streams.append(stream)
461 457
462 458 # create iopub stream:
463 459 iopub_stream = zmqstream.ZMQStream(c.socket(zmq.PUB), loop)
464 460 iopub_stream.setsockopt(zmq.IDENTITY, identity)
465 461 iopub_stream.connect(iopub_addr)
466 462
467 463 # Redirect input streams and set a display hook.
468 464 if out_stream_factory:
469 465 sys.stdout = out_stream_factory(session, iopub_stream, u'stdout')
470 466 sys.stdout.topic = 'engine.%i.stdout'%int_id
471 467 sys.stderr = out_stream_factory(session, iopub_stream, u'stderr')
472 468 sys.stderr.topic = 'engine.%i.stderr'%int_id
473 469 if display_hook_factory:
474 470 sys.displayhook = display_hook_factory(session, iopub_stream)
475 471 sys.displayhook.topic = 'engine.%i.pyout'%int_id
476 472
477 473
478 474 # launch heartbeat
479 475 heart = heartmonitor.Heart(*map(str, hb_addrs), heart_id=identity)
480 476 heart.start()
481 477
482 478 # create (optional) Client
483 479 if client_addr:
484 480 client = Client(client_addr, username=identity)
485 481 else:
486 482 client = None
487 483
488 484 kernel = Kernel(id=int_id, session=session, control_stream=control_stream,
489 485 shell_streams=shell_streams, iopub_stream=iopub_stream,
490 486 client=client, loop=loop)
491 487 kernel.start()
492 488 return loop, c, kernel
493 489
@@ -1,48 +1,69 b''
1 1 """toplevel setup/teardown for parallel tests."""
2 2
3 #-------------------------------------------------------------------------------
4 # Copyright (C) 2011 The IPython Development Team
5 #
6 # Distributed under the terms of the BSD License. The full license is in
7 # the file COPYING, distributed as part of this software.
8 #-------------------------------------------------------------------------------
9
10 #-------------------------------------------------------------------------------
11 # Imports
12 #-------------------------------------------------------------------------------
13
3 14 import tempfile
4 15 import time
5 16 from subprocess import Popen, PIPE, STDOUT
6 17
7 18 from IPython.zmq.parallel import client
8 19
9 20 processes = []
10 21 blackhole = tempfile.TemporaryFile()
11 22
12 23 # nose setup/teardown
13 24
14 25 def setup():
15 26 cp = Popen('ipcontrollerz --profile iptest -r --log-level 10 --log-to-file'.split(), stdout=blackhole, stderr=STDOUT)
16 27 processes.append(cp)
17 28 time.sleep(.5)
18 add_engine()
29 add_engines(1)
19 30 c = client.Client(profile='iptest')
20 31 while not c.ids:
21 32 time.sleep(.1)
22 33 c.spin()
34 c.close()
23 35
24 def add_engine(profile='iptest'):
36 def add_engines(n=1, profile='iptest'):
37 rc = client.Client(profile=profile)
38 base = len(rc)
39 eps = []
40 for i in range(n):
25 41 ep = Popen(['ipenginez']+ ['--profile', profile, '--log-level', '10', '--log-to-file'], stdout=blackhole, stderr=STDOUT)
26 42 # ep.start()
27 43 processes.append(ep)
28 return ep
44 eps.append(ep)
45 while len(rc) < base+n:
46 time.sleep(.1)
47 rc.spin()
48 rc.close()
49 return eps
29 50
30 51 def teardown():
31 52 time.sleep(1)
32 53 while processes:
33 54 p = processes.pop()
34 55 if p.poll() is None:
35 56 try:
36 57 p.terminate()
37 58 except Exception, e:
38 59 print e
39 60 pass
40 61 if p.poll() is None:
41 62 time.sleep(.25)
42 63 if p.poll() is None:
43 64 try:
44 65 print 'killing'
45 66 p.kill()
46 67 except:
47 68 print "couldn't shutdown process: ", p
48 69
@@ -1,106 +1,119 b''
1 """base class for parallel client tests"""
2
3 #-------------------------------------------------------------------------------
4 # Copyright (C) 2011 The IPython Development Team
5 #
6 # Distributed under the terms of the BSD License. The full license is in
7 # the file COPYING, distributed as part of this software.
8 #-------------------------------------------------------------------------------
9
1 10 import sys
2 11 import tempfile
3 12 import time
4 13 from signal import SIGINT
5 14 from multiprocessing import Process
6 15
7 16 from nose import SkipTest
8 17
18 import zmq
9 19 from zmq.tests import BaseZMQTestCase
10 20
11 21 from IPython.external.decorator import decorator
12 22
13 23 from IPython.zmq.parallel import error
14 24 from IPython.zmq.parallel.client import Client
15 25 from IPython.zmq.parallel.ipcluster import launch_process
16 26 from IPython.zmq.parallel.entry_point import select_random_ports
17 from IPython.zmq.parallel.tests import processes,add_engine
27 from IPython.zmq.parallel.tests import processes,add_engines
18 28
19 29 # simple tasks for use in apply tests
20 30
21 31 def segfault():
22 32 """this will segfault"""
23 33 import ctypes
24 34 ctypes.memset(-1,0,1)
25 35
26 36 def wait(n):
27 37 """sleep for a time"""
28 38 import time
29 39 time.sleep(n)
30 40 return n
31 41
32 42 def raiser(eclass):
33 43 """raise an exception"""
34 44 raise eclass()
35 45
36 46 # test decorator for skipping tests when libraries are unavailable
37 47 def skip_without(*names):
38 48 """skip a test if some names are not importable"""
39 49 @decorator
40 50 def skip_without_names(f, *args, **kwargs):
41 51 """decorator to skip tests in the absence of numpy."""
42 52 for name in names:
43 53 try:
44 54 __import__(name)
45 55 except ImportError:
46 56 raise SkipTest
47 57 return f(*args, **kwargs)
48 58 return skip_without_names
49 59
50
51 60 class ClusterTestCase(BaseZMQTestCase):
52 61
53 62 def add_engines(self, n=1, block=True):
54 63 """add multiple engines to our cluster"""
55 for i in range(n):
56 self.engines.append(add_engine())
64 self.engines.extend(add_engines(n))
57 65 if block:
58 66 self.wait_on_engines()
59 67
60 68 def wait_on_engines(self, timeout=5):
61 69 """wait for our engines to connect."""
62 70 n = len(self.engines)+self.base_engine_count
63 71 tic = time.time()
64 72 while time.time()-tic < timeout and len(self.client.ids) < n:
65 73 time.sleep(0.1)
66 74
67 75 assert not len(self.client.ids) < n, "waiting for engines timed out"
68 76
69 77 def connect_client(self):
70 78 """connect a client with my Context, and track its sockets for cleanup"""
71 79 c = Client(profile='iptest',context=self.context)
72
73 # for name in filter(lambda n:n.endswith('socket'), dir(c)):
74 # self.sockets.append(getattr(c, name))
80 for name in filter(lambda n:n.endswith('socket'), dir(c)):
81 s = getattr(c, name)
82 s.setsockopt(zmq.LINGER, 0)
83 self.sockets.append(s)
75 84 return c
76 85
77 86 def assertRaisesRemote(self, etype, f, *args, **kwargs):
78 87 try:
79 88 try:
80 89 f(*args, **kwargs)
81 90 except error.CompositeError as e:
82 91 e.raise_exception()
83 92 except error.RemoteError as e:
84 93 self.assertEquals(etype.__name__, e.ename, "Should have raised %r, but raised %r"%(e.ename, etype.__name__))
85 94 else:
86 95 self.fail("should have raised a RemoteError")
87 96
88 97 def setUp(self):
89 98 BaseZMQTestCase.setUp(self)
90 99 self.client = self.connect_client()
91 100 self.base_engine_count=len(self.client.ids)
92 101 self.engines=[]
93 102
94 103 def tearDown(self):
95
104 # self.client.clear(block=True)
96 105 # close fds:
97 106 for e in filter(lambda e: e.poll() is not None, processes):
98 107 processes.remove(e)
99 108
109 # allow flushing of incoming messages to prevent crash on socket close
110 self.client.wait(timeout=2)
111 # time.sleep(2)
112 self.client.spin()
100 113 self.client.close()
101 114 BaseZMQTestCase.tearDown(self)
102 # this will be superfluous when pyzmq merges PR #88
103 self.context.term()
115 # this will be redundant when pyzmq merges PR #88
116 # self.context.term()
104 117 # print tempfile.TemporaryFile().fileno(),
105 118 # sys.stdout.flush()
106 119 No newline at end of file
@@ -1,262 +1,147 b''
1 """Tests for parallel client.py"""
2
3 #-------------------------------------------------------------------------------
4 # Copyright (C) 2011 The IPython Development Team
5 #
6 # Distributed under the terms of the BSD License. The full license is in
7 # the file COPYING, distributed as part of this software.
8 #-------------------------------------------------------------------------------
9
10 #-------------------------------------------------------------------------------
11 # Imports
12 #-------------------------------------------------------------------------------
13
1 14 import time
2 15 from tempfile import mktemp
3 16
4 17 import zmq
5 18
6 19 from IPython.zmq.parallel import client as clientmod
7 20 from IPython.zmq.parallel import error
8 21 from IPython.zmq.parallel.asyncresult import AsyncResult, AsyncHubResult
9 22 from IPython.zmq.parallel.view import LoadBalancedView, DirectView
10 23
11 from clienttest import ClusterTestCase, segfault, wait
24 from clienttest import ClusterTestCase, segfault, wait, add_engines
25
26 def setup():
27 add_engines(4)
12 28
13 29 class TestClient(ClusterTestCase):
14 30
15 31 def test_ids(self):
16 32 n = len(self.client.ids)
17 33 self.add_engines(3)
18 34 self.assertEquals(len(self.client.ids), n+3)
19 35
20 def test_segfault_task(self):
21 """test graceful handling of engine death (balanced)"""
22 self.add_engines(1)
23 ar = self.client.apply(segfault, block=False)
24 self.assertRaisesRemote(error.EngineError, ar.get)
25 eid = ar.engine_id
26 while eid in self.client.ids:
27 time.sleep(.01)
28 self.client.spin()
29
30 def test_segfault_mux(self):
31 """test graceful handling of engine death (direct)"""
32 self.add_engines(1)
33 eid = self.client.ids[-1]
34 ar = self.client[eid].apply_async(segfault)
35 self.assertRaisesRemote(error.EngineError, ar.get)
36 eid = ar.engine_id
37 while eid in self.client.ids:
38 time.sleep(.01)
39 self.client.spin()
40
41 36 def test_view_indexing(self):
42 37 """test index access for views"""
43 38 self.add_engines(2)
44 39 targets = self.client._build_targets('all')[-1]
45 40 v = self.client[:]
46 41 self.assertEquals(v.targets, targets)
47 42 t = self.client.ids[2]
48 43 v = self.client[t]
49 44 self.assert_(isinstance(v, DirectView))
50 45 self.assertEquals(v.targets, t)
51 46 t = self.client.ids[2:4]
52 47 v = self.client[t]
53 48 self.assert_(isinstance(v, DirectView))
54 49 self.assertEquals(v.targets, t)
55 50 v = self.client[::2]
56 51 self.assert_(isinstance(v, DirectView))
57 52 self.assertEquals(v.targets, targets[::2])
58 53 v = self.client[1::3]
59 54 self.assert_(isinstance(v, DirectView))
60 55 self.assertEquals(v.targets, targets[1::3])
61 56 v = self.client[:-3]
62 57 self.assert_(isinstance(v, DirectView))
63 58 self.assertEquals(v.targets, targets[:-3])
64 59 v = self.client[-1]
65 60 self.assert_(isinstance(v, DirectView))
66 61 self.assertEquals(v.targets, targets[-1])
67 62 self.assertRaises(TypeError, lambda : self.client[None])
68 63
69 64 def test_view_cache(self):
70 65 """test that multiple view requests return the same object"""
71 66 v = self.client[:2]
72 67 v2 =self.client[:2]
73 68 self.assertTrue(v is v2)
74 v = self.client.view()
75 v2 = self.client.view(balanced=True)
69 v = self.client.load_balanced_view()
70 v2 = self.client.load_balanced_view(targets=None)
76 71 self.assertTrue(v is v2)
77 72
78 73 def test_targets(self):
79 74 """test various valid targets arguments"""
80 75 build = self.client._build_targets
81 76 ids = self.client.ids
82 77 idents,targets = build(None)
83 78 self.assertEquals(ids, targets)
84 79
85 80 def test_clear(self):
86 81 """test clear behavior"""
87 self.add_engines(2)
88 self.client.block=True
89 self.client.push(dict(a=5))
90 self.client.pull('a')
82 # self.add_engines(2)
83 v = self.client[:]
84 v.block=True
85 v.push(dict(a=5))
86 v.pull('a')
91 87 id0 = self.client.ids[-1]
92 88 self.client.clear(targets=id0)
93 self.client.pull('a', targets=self.client.ids[:-1])
94 self.assertRaisesRemote(NameError, self.client.pull, 'a')
95 self.client.clear()
89 self.client[:-1].pull('a')
90 self.assertRaisesRemote(NameError, self.client[id0].get, 'a')
91 self.client.clear(block=True)
96 92 for i in self.client.ids:
97 self.assertRaisesRemote(NameError, self.client.pull, 'a', targets=i)
98
99
100 def test_push_pull(self):
101 """test pushing and pulling"""
102 data = dict(a=10, b=1.05, c=range(10), d={'e':(1,2),'f':'hi'})
103 t = self.client.ids[-1]
104 self.add_engines(2)
105 push = self.client.push
106 pull = self.client.pull
107 self.client.block=True
108 nengines = len(self.client)
109 push({'data':data}, targets=t)
110 d = pull('data', targets=t)
111 self.assertEquals(d, data)
112 push({'data':data})
113 d = pull('data')
114 self.assertEquals(d, nengines*[data])
115 ar = push({'data':data}, block=False)
116 self.assertTrue(isinstance(ar, AsyncResult))
117 r = ar.get()
118 ar = pull('data', block=False)
119 self.assertTrue(isinstance(ar, AsyncResult))
120 r = ar.get()
121 self.assertEquals(r, nengines*[data])
122 push(dict(a=10,b=20))
123 r = pull(('a','b'))
124 self.assertEquals(r, nengines*[[10,20]])
125
126 def test_push_pull_function(self):
127 "test pushing and pulling functions"
128 def testf(x):
129 return 2.0*x
130
131 self.add_engines(4)
132 t = self.client.ids[-1]
133 self.client.block=True
134 push = self.client.push
135 pull = self.client.pull
136 execute = self.client.execute
137 push({'testf':testf}, targets=t)
138 r = pull('testf', targets=t)
139 self.assertEqual(r(1.0), testf(1.0))
140 execute('r = testf(10)', targets=t)
141 r = pull('r', targets=t)
142 self.assertEquals(r, testf(10))
143 ar = push({'testf':testf}, block=False)
144 ar.get()
145 ar = pull('testf', block=False)
146 rlist = ar.get()
147 for r in rlist:
148 self.assertEqual(r(1.0), testf(1.0))
149 execute("def g(x): return x*x", targets=t)
150 r = pull(('testf','g'),targets=t)
151 self.assertEquals((r[0](10),r[1](10)), (testf(10), 100))
152
153 def test_push_function_globals(self):
154 """test that pushed functions have access to globals"""
155 def geta():
156 return a
157 self.add_engines(1)
158 v = self.client[-1]
159 v.block=True
160 v['f'] = geta
161 self.assertRaisesRemote(NameError, v.execute, 'b=f()')
162 v.execute('a=5')
163 v.execute('b=f()')
164 self.assertEquals(v['b'], 5)
165
166 def test_push_function_defaults(self):
167 """test that pushed functions preserve default args"""
168 def echo(a=10):
169 return a
170 self.add_engines(1)
171 v = self.client[-1]
172 v.block=True
173 v['f'] = echo
174 v.execute('b=f()')
175 self.assertEquals(v['b'], 10)
93 # print i
94 self.assertRaisesRemote(NameError, self.client[i].get, 'a')
176 95
177 96 def test_get_result(self):
178 97 """test getting results from the Hub."""
179 98 c = clientmod.Client(profile='iptest')
180 self.add_engines(1)
99 # self.add_engines(1)
181 100 t = c.ids[-1]
182 ar = c.apply(wait, (1,), block=False, targets=t)
101 ar = c[t].apply_async(wait, 1)
183 102 # give the monitor time to notice the message
184 103 time.sleep(.25)
185 104 ahr = self.client.get_result(ar.msg_ids)
186 105 self.assertTrue(isinstance(ahr, AsyncHubResult))
187 106 self.assertEquals(ahr.get(), ar.get())
188 107 ar2 = self.client.get_result(ar.msg_ids)
189 108 self.assertFalse(isinstance(ar2, AsyncHubResult))
109 c.close()
190 110
191 111 def test_ids_list(self):
192 112 """test client.ids"""
193 self.add_engines(2)
113 # self.add_engines(2)
194 114 ids = self.client.ids
195 115 self.assertEquals(ids, self.client._ids)
196 116 self.assertFalse(ids is self.client._ids)
197 117 ids.remove(ids[-1])
198 118 self.assertNotEquals(ids, self.client._ids)
199 119
200 def test_run_newline(self):
201 """test that run appends newline to files"""
202 tmpfile = mktemp()
203 with open(tmpfile, 'w') as f:
204 f.write("""def g():
205 return 5
206 """)
207 v = self.client[-1]
208 v.run(tmpfile, block=True)
209 self.assertEquals(v.apply_sync(lambda : g()), 5)
210
211 def test_apply_tracked(self):
212 """test tracking for apply"""
213 # self.add_engines(1)
214 t = self.client.ids[-1]
215 self.client.block=False
216 def echo(n=1024*1024, **kwargs):
217 return self.client.apply(lambda x: x, args=('x'*n,), targets=t, **kwargs)
218 ar = echo(1)
219 self.assertTrue(ar._tracker is None)
220 self.assertTrue(ar.sent)
221 ar = echo(track=True)
222 self.assertTrue(isinstance(ar._tracker, zmq.MessageTracker))
223 self.assertEquals(ar.sent, ar._tracker.done)
224 ar._tracker.wait()
225 self.assertTrue(ar.sent)
226
227 def test_push_tracked(self):
228 t = self.client.ids[-1]
229 ns = dict(x='x'*1024*1024)
230 ar = self.client.push(ns, targets=t, block=False)
231 self.assertTrue(ar._tracker is None)
232 self.assertTrue(ar.sent)
233
234 ar = self.client.push(ns, targets=t, block=False, track=True)
235 self.assertTrue(isinstance(ar._tracker, zmq.MessageTracker))
236 self.assertEquals(ar.sent, ar._tracker.done)
237 ar._tracker.wait()
238 self.assertTrue(ar.sent)
239 ar.get()
240
241 def test_scatter_tracked(self):
242 t = self.client.ids
243 x='x'*1024*1024
244 ar = self.client.scatter('x', x, targets=t, block=False)
245 self.assertTrue(ar._tracker is None)
246 self.assertTrue(ar.sent)
247
248 ar = self.client.scatter('x', x, targets=t, block=False, track=True)
249 self.assertTrue(isinstance(ar._tracker, zmq.MessageTracker))
250 self.assertEquals(ar.sent, ar._tracker.done)
251 ar._tracker.wait()
252 self.assertTrue(ar.sent)
253 ar.get()
254
255 def test_remote_reference(self):
256 v = self.client[-1]
257 v['a'] = 123
258 ra = clientmod.Reference('a')
259 b = v.apply_sync(lambda x: x, ra)
260 self.assertEquals(b, 123)
120 def test_queue_status(self):
121 # self.addEngine(4)
122 ids = self.client.ids
123 id0 = ids[0]
124 qs = self.client.queue_status(targets=id0)
125 self.assertTrue(isinstance(qs, dict))
126 self.assertEquals(sorted(qs.keys()), ['completed', 'queue', 'tasks'])
127 allqs = self.client.queue_status()
128 self.assertTrue(isinstance(allqs, dict))
129 self.assertEquals(sorted(allqs.keys()), self.client.ids)
130 for eid,qs in allqs.items():
131 self.assertTrue(isinstance(qs, dict))
132 self.assertEquals(sorted(qs.keys()), ['completed', 'queue', 'tasks'])
133
134 def test_shutdown(self):
135 # self.addEngine(4)
136 ids = self.client.ids
137 id0 = ids[0]
138 self.client.shutdown(id0, block=True)
139 while id0 in self.client.ids:
140 time.sleep(0.1)
141 self.client.spin()
261 142
143 self.assertRaises(IndexError, lambda : self.client[id0])
262 144
145 def test_result_status(self):
146 pass
147 # to be written
@@ -1,87 +1,108 b''
1 1 """test serialization with newserialized"""
2 2
3 #-------------------------------------------------------------------------------
4 # Copyright (C) 2011 The IPython Development Team
5 #
6 # Distributed under the terms of the BSD License. The full license is in
7 # the file COPYING, distributed as part of this software.
8 #-------------------------------------------------------------------------------
9
10 #-------------------------------------------------------------------------------
11 # Imports
12 #-------------------------------------------------------------------------------
13
3 14 from unittest import TestCase
4 15
5 16 from IPython.testing.parametric import parametric
6 17 from IPython.utils import newserialized as ns
7 18 from IPython.utils.pickleutil import can, uncan, CannedObject, CannedFunction
8 19 from IPython.zmq.parallel.tests.clienttest import skip_without
9 20
10 21
11 22 class CanningTestCase(TestCase):
12 23 def test_canning(self):
13 24 d = dict(a=5,b=6)
14 25 cd = can(d)
15 26 self.assertTrue(isinstance(cd, dict))
16 27
17 28 def test_canned_function(self):
18 29 f = lambda : 7
19 30 cf = can(f)
20 31 self.assertTrue(isinstance(cf, CannedFunction))
21 32
22 33 @parametric
23 34 def test_can_roundtrip(cls):
24 35 objs = [
25 36 dict(),
26 37 set(),
27 38 list(),
28 39 ['a',1,['a',1],u'e'],
29 40 ]
30 41 return map(cls.run_roundtrip, objs)
31 42
32 43 @classmethod
33 44 def run_roundtrip(self, obj):
34 45 o = uncan(can(obj))
35 46 assert o == obj, "failed assertion: %r == %r"%(o,obj)
36 47
37 48 def test_serialized_interfaces(self):
38 49
39 50 us = {'a':10, 'b':range(10)}
40 51 s = ns.serialize(us)
41 52 uus = ns.unserialize(s)
42 53 self.assertTrue(isinstance(s, ns.SerializeIt))
43 54 self.assertEquals(uus, us)
44 55
45 56 def test_pickle_serialized(self):
46 57 obj = {'a':1.45345, 'b':'asdfsdf', 'c':10000L}
47 58 original = ns.UnSerialized(obj)
48 59 originalSer = ns.SerializeIt(original)
49 60 firstData = originalSer.getData()
50 61 firstTD = originalSer.getTypeDescriptor()
51 62 firstMD = originalSer.getMetadata()
52 63 self.assertEquals(firstTD, 'pickle')
53 64 self.assertEquals(firstMD, {})
54 65 unSerialized = ns.UnSerializeIt(originalSer)
55 66 secondObj = unSerialized.getObject()
56 67 for k, v in secondObj.iteritems():
57 68 self.assertEquals(obj[k], v)
58 69 secondSer = ns.SerializeIt(ns.UnSerialized(secondObj))
59 70 self.assertEquals(firstData, secondSer.getData())
60 71 self.assertEquals(firstTD, secondSer.getTypeDescriptor() )
61 72 self.assertEquals(firstMD, secondSer.getMetadata())
62 73
63 74 @skip_without('numpy')
64 75 def test_ndarray_serialized(self):
65 76 import numpy
66 77 a = numpy.linspace(0.0, 1.0, 1000)
67 78 unSer1 = ns.UnSerialized(a)
68 79 ser1 = ns.SerializeIt(unSer1)
69 80 td = ser1.getTypeDescriptor()
70 81 self.assertEquals(td, 'ndarray')
71 82 md = ser1.getMetadata()
72 83 self.assertEquals(md['shape'], a.shape)
73 84 self.assertEquals(md['dtype'], a.dtype.str)
74 85 buff = ser1.getData()
75 86 self.assertEquals(buff, numpy.getbuffer(a))
76 87 s = ns.Serialized(buff, td, md)
77 88 final = ns.unserialize(s)
78 89 self.assertEquals(numpy.getbuffer(a), numpy.getbuffer(final))
79 90 self.assertTrue((a==final).all())
80 91 self.assertEquals(a.dtype.str, final.dtype.str)
81 92 self.assertEquals(a.shape, final.shape)
82 93 # test non-copying:
83 94 a[2] = 1e9
84 95 self.assertTrue((a==final).all())
85 96
97 def test_uncan_function_globals(self):
98 """test that uncanning a module function restores it into its module"""
99 from re import search
100 cf = can(search)
101 csearch = uncan(cf)
102 self.assertEqual(csearch.__module__, search.__module__)
103 self.assertNotEqual(csearch('asd', 'asdf'), None)
104 csearch = uncan(cf, dict(a=5))
105 self.assertEqual(csearch.__module__, search.__module__)
106 self.assertNotEqual(csearch('asd', 'asdf'), None)
86 107
87 108 No newline at end of file
@@ -1,99 +1,111 b''
1 """test building messages with streamsession"""
2
3 #-------------------------------------------------------------------------------
4 # Copyright (C) 2011 The IPython Development Team
5 #
6 # Distributed under the terms of the BSD License. The full license is in
7 # the file COPYING, distributed as part of this software.
8 #-------------------------------------------------------------------------------
9
10 #-------------------------------------------------------------------------------
11 # Imports
12 #-------------------------------------------------------------------------------
1 13
2 14 import os
3 15 import uuid
4 16 import zmq
5 17
6 18 from zmq.tests import BaseZMQTestCase
7 19 from zmq.eventloop.zmqstream import ZMQStream
8 20 # from IPython.zmq.tests import SessionTestCase
9 21 from IPython.zmq.parallel import streamsession as ss
10 22
11 23 class SessionTestCase(BaseZMQTestCase):
12 24
13 25 def setUp(self):
14 26 BaseZMQTestCase.setUp(self)
15 27 self.session = ss.StreamSession()
16 28
17 29 class TestSession(SessionTestCase):
18 30
19 31 def test_msg(self):
20 32 """message format"""
21 33 msg = self.session.msg('execute')
22 34 thekeys = set('header msg_id parent_header msg_type content'.split())
23 35 s = set(msg.keys())
24 36 self.assertEquals(s, thekeys)
25 37 self.assertTrue(isinstance(msg['content'],dict))
26 38 self.assertTrue(isinstance(msg['header'],dict))
27 39 self.assertTrue(isinstance(msg['parent_header'],dict))
28 40 self.assertEquals(msg['msg_type'], 'execute')
29 41
30 42
31 43
32 44 def test_args(self):
33 45 """initialization arguments for StreamSession"""
34 46 s = self.session
35 47 self.assertTrue(s.pack is ss.default_packer)
36 48 self.assertTrue(s.unpack is ss.default_unpacker)
37 49 self.assertEquals(s.username, os.environ.get('USER', 'username'))
38 50
39 51 s = ss.StreamSession(username=None)
40 52 self.assertEquals(s.username, os.environ.get('USER', 'username'))
41 53
42 54 self.assertRaises(TypeError, ss.StreamSession, packer='hi')
43 55 self.assertRaises(TypeError, ss.StreamSession, unpacker='hi')
44 56 u = str(uuid.uuid4())
45 57 s = ss.StreamSession(username='carrot', session=u)
46 58 self.assertEquals(s.session, u)
47 59 self.assertEquals(s.username, 'carrot')
48 60
49 61 def test_tracking(self):
50 62 """test tracking messages"""
51 63 a,b = self.create_bound_pair(zmq.PAIR, zmq.PAIR)
52 64 s = self.session
53 65 stream = ZMQStream(a)
54 66 msg = s.send(a, 'hello', track=False)
55 67 self.assertTrue(msg['tracker'] is None)
56 68 msg = s.send(a, 'hello', track=True)
57 69 self.assertTrue(isinstance(msg['tracker'], zmq.MessageTracker))
58 70 M = zmq.Message(b'hi there', track=True)
59 71 msg = s.send(a, 'hello', buffers=[M], track=True)
60 72 t = msg['tracker']
61 73 self.assertTrue(isinstance(t, zmq.MessageTracker))
62 74 self.assertRaises(zmq.NotDone, t.wait, .1)
63 75 del M
64 76 t.wait(1) # this will raise
65 77
66 78
67 79 # def test_rekey(self):
68 80 # """rekeying dict around json str keys"""
69 81 # d = {'0': uuid.uuid4(), 0:uuid.uuid4()}
70 82 # self.assertRaises(KeyError, ss.rekey, d)
71 83 #
72 84 # d = {'0': uuid.uuid4(), 1:uuid.uuid4(), 'asdf':uuid.uuid4()}
73 85 # d2 = {0:d['0'],1:d[1],'asdf':d['asdf']}
74 86 # rd = ss.rekey(d)
75 87 # self.assertEquals(d2,rd)
76 88 #
77 89 # d = {'1.5':uuid.uuid4(),'1':uuid.uuid4()}
78 90 # d2 = {1.5:d['1.5'],1:d['1']}
79 91 # rd = ss.rekey(d)
80 92 # self.assertEquals(d2,rd)
81 93 #
82 94 # d = {'1.0':uuid.uuid4(),'1':uuid.uuid4()}
83 95 # self.assertRaises(KeyError, ss.rekey, d)
84 96 #
85 97 def test_unique_msg_ids(self):
86 98 """test that messages receive unique ids"""
87 99 ids = set()
88 100 for i in range(2**12):
89 101 h = self.session.msg_header('test')
90 102 msg_id = h['msg_id']
91 103 self.assertTrue(msg_id not in ids)
92 104 ids.add(msg_id)
93 105
94 106 def test_feed_identities(self):
95 107 """scrub the front for zmq IDENTITIES"""
96 108 theids = "engine client other".split()
97 109 content = dict(code='whoda',stuff=object())
98 110 themsg = self.session.msg('execute',content=content)
99 111 pmsg = theids
@@ -1,318 +1,354 b''
1 1 """some generic utilities for dealing with classes, urls, and serialization"""
2 2 #-----------------------------------------------------------------------------
3 3 # Copyright (C) 2010-2011 The IPython Development Team
4 4 #
5 5 # Distributed under the terms of the BSD License. The full license is in
6 6 # the file COPYING, distributed as part of this software.
7 7 #-----------------------------------------------------------------------------
8 8
9 9 #-----------------------------------------------------------------------------
10 10 # Imports
11 11 #-----------------------------------------------------------------------------
12 12
13 13 import re
14 14 import socket
15 15
16 16 try:
17 17 import cPickle
18 18 pickle = cPickle
19 19 except:
20 20 cPickle = None
21 21 import pickle
22 22
23 23
24 24 from IPython.utils.pickleutil import can, uncan, canSequence, uncanSequence
25 25 from IPython.utils.newserialized import serialize, unserialize
26 26
27 27 ISO8601="%Y-%m-%dT%H:%M:%S.%f"
28 28
29 29 #-----------------------------------------------------------------------------
30 30 # Classes
31 31 #-----------------------------------------------------------------------------
32 32
33 33 class Namespace(dict):
34 34 """Subclass of dict for attribute access to keys."""
35 35
36 36 def __getattr__(self, key):
37 37 """getattr aliased to getitem"""
38 38 if key in self.iterkeys():
39 39 return self[key]
40 40 else:
41 41 raise NameError(key)
42 42
43 43 def __setattr__(self, key, value):
44 44 """setattr aliased to setitem, with strict"""
45 45 if hasattr(dict, key):
46 46 raise KeyError("Cannot override dict keys %r"%key)
47 47 self[key] = value
48 48
49 49
50 50 class ReverseDict(dict):
51 51 """simple double-keyed subset of dict methods."""
52 52
53 53 def __init__(self, *args, **kwargs):
54 54 dict.__init__(self, *args, **kwargs)
55 55 self._reverse = dict()
56 56 for key, value in self.iteritems():
57 57 self._reverse[value] = key
58 58
59 59 def __getitem__(self, key):
60 60 try:
61 61 return dict.__getitem__(self, key)
62 62 except KeyError:
63 63 return self._reverse[key]
64 64
65 65 def __setitem__(self, key, value):
66 66 if key in self._reverse:
67 67 raise KeyError("Can't have key %r on both sides!"%key)
68 68 dict.__setitem__(self, key, value)
69 69 self._reverse[value] = key
70 70
71 71 def pop(self, key):
72 72 value = dict.pop(self, key)
73 73 self._reverse.pop(value)
74 74 return value
75 75
76 76 def get(self, key, default=None):
77 77 try:
78 78 return self[key]
79 79 except KeyError:
80 80 return default
81 81
82 82 #-----------------------------------------------------------------------------
83 83 # Functions
84 84 #-----------------------------------------------------------------------------
85 85
86 86 def validate_url(url):
87 87 """validate a url for zeromq"""
88 88 if not isinstance(url, basestring):
89 89 raise TypeError("url must be a string, not %r"%type(url))
90 90 url = url.lower()
91 91
92 92 proto_addr = url.split('://')
93 93 assert len(proto_addr) == 2, 'Invalid url: %r'%url
94 94 proto, addr = proto_addr
95 95 assert proto in ['tcp','pgm','epgm','ipc','inproc'], "Invalid protocol: %r"%proto
96 96
97 97 # domain pattern adapted from http://www.regexlib.com/REDetails.aspx?regexp_id=391
98 98 # author: Remi Sabourin
99 99 pat = re.compile(r'^([\w\d]([\w\d\-]{0,61}[\w\d])?\.)*[\w\d]([\w\d\-]{0,61}[\w\d])?$')
100 100
101 101 if proto == 'tcp':
102 102 lis = addr.split(':')
103 103 assert len(lis) == 2, 'Invalid url: %r'%url
104 104 addr,s_port = lis
105 105 try:
106 106 port = int(s_port)
107 107 except ValueError:
108 108 raise AssertionError("Invalid port %r in url: %r"%(port, url))
109 109
110 110 assert addr == '*' or pat.match(addr) is not None, 'Invalid url: %r'%url
111 111
112 112 else:
113 113 # only validate tcp urls currently
114 114 pass
115 115
116 116 return True
117 117
118 118
119 119 def validate_url_container(container):
120 120 """validate a potentially nested collection of urls."""
121 121 if isinstance(container, basestring):
122 122 url = container
123 123 return validate_url(url)
124 124 elif isinstance(container, dict):
125 125 container = container.itervalues()
126 126
127 127 for element in container:
128 128 validate_url_container(element)
129 129
130 130
131 131 def split_url(url):
132 132 """split a zmq url (tcp://ip:port) into ('tcp','ip','port')."""
133 133 proto_addr = url.split('://')
134 134 assert len(proto_addr) == 2, 'Invalid url: %r'%url
135 135 proto, addr = proto_addr
136 136 lis = addr.split(':')
137 137 assert len(lis) == 2, 'Invalid url: %r'%url
138 138 addr,s_port = lis
139 139 return proto,addr,s_port
140 140
141 141 def disambiguate_ip_address(ip, location=None):
142 142 """turn multi-ip interfaces '0.0.0.0' and '*' into connectable
143 143 ones, based on the location (default interpretation of location is localhost)."""
144 144 if ip in ('0.0.0.0', '*'):
145 145 external_ips = socket.gethostbyname_ex(socket.gethostname())[2]
146 146 if location is None or location in external_ips:
147 147 ip='127.0.0.1'
148 148 elif location:
149 149 return location
150 150 return ip
151 151
152 152 def disambiguate_url(url, location=None):
153 153 """turn multi-ip interfaces '0.0.0.0' and '*' into connectable
154 154 ones, based on the location (default interpretation is localhost).
155 155
156 156 This is for zeromq urls, such as tcp://*:10101."""
157 157 try:
158 158 proto,ip,port = split_url(url)
159 159 except AssertionError:
160 160 # probably not tcp url; could be ipc, etc.
161 161 return url
162 162
163 163 ip = disambiguate_ip_address(ip,location)
164 164
165 165 return "%s://%s:%s"%(proto,ip,port)
166 166
167 167
168 168 def rekey(dikt):
169 169 """Rekey a dict that has been forced to use str keys where there should be
170 170 ints by json. This belongs in the jsonutil added by fperez."""
171 171 for k in dikt.iterkeys():
172 172 if isinstance(k, str):
173 173 ik=fk=None
174 174 try:
175 175 ik = int(k)
176 176 except ValueError:
177 177 try:
178 178 fk = float(k)
179 179 except ValueError:
180 180 continue
181 181 if ik is not None:
182 182 nk = ik
183 183 else:
184 184 nk = fk
185 185 if nk in dikt:
186 186 raise KeyError("already have key %r"%nk)
187 187 dikt[nk] = dikt.pop(k)
188 188 return dikt
189 189
190 190 def serialize_object(obj, threshold=64e-6):
191 191 """Serialize an object into a list of sendable buffers.
192 192
193 193 Parameters
194 194 ----------
195 195
196 196 obj : object
197 197 The object to be serialized
198 198 threshold : float
199 199 The threshold for not double-pickling the content.
200 200
201 201
202 202 Returns
203 203 -------
204 204 ('pmd', [bufs]) :
205 205 where pmd is the pickled metadata wrapper,
206 206 bufs is a list of data buffers
207 207 """
208 208 databuffers = []
209 209 if isinstance(obj, (list, tuple)):
210 210 clist = canSequence(obj)
211 211 slist = map(serialize, clist)
212 212 for s in slist:
213 213 if s.typeDescriptor in ('buffer', 'ndarray') or s.getDataSize() > threshold:
214 214 databuffers.append(s.getData())
215 215 s.data = None
216 216 return pickle.dumps(slist,-1), databuffers
217 217 elif isinstance(obj, dict):
218 218 sobj = {}
219 219 for k in sorted(obj.iterkeys()):
220 220 s = serialize(can(obj[k]))
221 221 if s.typeDescriptor in ('buffer', 'ndarray') or s.getDataSize() > threshold:
222 222 databuffers.append(s.getData())
223 223 s.data = None
224 224 sobj[k] = s
225 225 return pickle.dumps(sobj,-1),databuffers
226 226 else:
227 227 s = serialize(can(obj))
228 228 if s.typeDescriptor in ('buffer', 'ndarray') or s.getDataSize() > threshold:
229 229 databuffers.append(s.getData())
230 230 s.data = None
231 231 return pickle.dumps(s,-1),databuffers
232 232
233 233
234 234 def unserialize_object(bufs):
235 235 """reconstruct an object serialized by serialize_object from data buffers."""
236 236 bufs = list(bufs)
237 237 sobj = pickle.loads(bufs.pop(0))
238 238 if isinstance(sobj, (list, tuple)):
239 239 for s in sobj:
240 240 if s.data is None:
241 241 s.data = bufs.pop(0)
242 242 return uncanSequence(map(unserialize, sobj)), bufs
243 243 elif isinstance(sobj, dict):
244 244 newobj = {}
245 245 for k in sorted(sobj.iterkeys()):
246 246 s = sobj[k]
247 247 if s.data is None:
248 248 s.data = bufs.pop(0)
249 249 newobj[k] = uncan(unserialize(s))
250 250 return newobj, bufs
251 251 else:
252 252 if sobj.data is None:
253 253 sobj.data = bufs.pop(0)
254 254 return uncan(unserialize(sobj)), bufs
255 255
256 256 def pack_apply_message(f, args, kwargs, threshold=64e-6):
257 257 """pack up a function, args, and kwargs to be sent over the wire
258 258 as a series of buffers. Any object whose data is larger than `threshold`
259 259 will not have their data copied (currently only numpy arrays support zero-copy)"""
260 260 msg = [pickle.dumps(can(f),-1)]
261 261 databuffers = [] # for large objects
262 262 sargs, bufs = serialize_object(args,threshold)
263 263 msg.append(sargs)
264 264 databuffers.extend(bufs)
265 265 skwargs, bufs = serialize_object(kwargs,threshold)
266 266 msg.append(skwargs)
267 267 databuffers.extend(bufs)
268 268 msg.extend(databuffers)
269 269 return msg
270 270
271 271 def unpack_apply_message(bufs, g=None, copy=True):
272 272 """unpack f,args,kwargs from buffers packed by pack_apply_message()
273 273 Returns: original f,args,kwargs"""
274 274 bufs = list(bufs) # allow us to pop
275 275 assert len(bufs) >= 3, "not enough buffers!"
276 276 if not copy:
277 277 for i in range(3):
278 278 bufs[i] = bufs[i].bytes
279 279 cf = pickle.loads(bufs.pop(0))
280 280 sargs = list(pickle.loads(bufs.pop(0)))
281 281 skwargs = dict(pickle.loads(bufs.pop(0)))
282 282 # print sargs, skwargs
283 283 f = uncan(cf, g)
284 284 for sa in sargs:
285 285 if sa.data is None:
286 286 m = bufs.pop(0)
287 287 if sa.getTypeDescriptor() in ('buffer', 'ndarray'):
288 288 if copy:
289 289 sa.data = buffer(m)
290 290 else:
291 291 sa.data = m.buffer
292 292 else:
293 293 if copy:
294 294 sa.data = m
295 295 else:
296 296 sa.data = m.bytes
297 297
298 298 args = uncanSequence(map(unserialize, sargs), g)
299 299 kwargs = {}
300 300 for k in sorted(skwargs.iterkeys()):
301 301 sa = skwargs[k]
302 302 if sa.data is None:
303 303 m = bufs.pop(0)
304 304 if sa.getTypeDescriptor() in ('buffer', 'ndarray'):
305 305 if copy:
306 306 sa.data = buffer(m)
307 307 else:
308 308 sa.data = m.buffer
309 309 else:
310 310 if copy:
311 311 sa.data = m
312 312 else:
313 313 sa.data = m.bytes
314 314
315 315 kwargs[k] = uncan(unserialize(sa), g)
316 316
317 317 return f,args,kwargs
318 318
319 #--------------------------------------------------------------------------
320 # helpers for implementing old MEC API via view.apply
321 #--------------------------------------------------------------------------
322
323 def interactive(f):
324 """decorator for making functions appear as interactively defined.
325 This results in the function being linked to the user_ns as globals()
326 instead of the module globals().
327 """
328 f.__module__ = '__main__'
329 return f
330
331 @interactive
332 def _push(ns):
333 """helper method for implementing `client.push` via `client.apply`"""
334 globals().update(ns)
335
336 @interactive
337 def _pull(keys):
338 """helper method for implementing `client.pull` via `client.apply`"""
339 user_ns = globals()
340 if isinstance(keys, (list,tuple, set)):
341 for key in keys:
342 if not user_ns.has_key(key):
343 raise NameError("name '%s' is not defined"%key)
344 return map(user_ns.get, keys)
345 else:
346 if not user_ns.has_key(keys):
347 raise NameError("name '%s' is not defined"%keys)
348 return user_ns.get(keys)
349
350 @interactive
351 def _execute(code):
352 """helper method for implementing `client.execute` via `client.apply`"""
353 exec code in globals()
354
This diff has been collapsed as it changes many lines, (596 lines changed) Show them Hide them
@@ -1,670 +1,920 b''
1 1 """Views of remote engines."""
2 2 #-----------------------------------------------------------------------------
3 3 # Copyright (C) 2010 The IPython Development Team
4 4 #
5 5 # Distributed under the terms of the BSD License. The full license is in
6 6 # the file COPYING, distributed as part of this software.
7 7 #-----------------------------------------------------------------------------
8 8
9 9 #-----------------------------------------------------------------------------
10 10 # Imports
11 11 #-----------------------------------------------------------------------------
12 12
13 import warnings
14 from contextlib import contextmanager
15
16 import zmq
17
13 18 from IPython.testing import decorators as testdec
14 19 from IPython.utils.traitlets import HasTraits, Any, Bool, List, Dict, Set, Int, Instance
15 20
16 21 from IPython.external.decorator import decorator
17 22
18 from .asyncresult import AsyncResult
19 from .dependency import Dependency
23 from . import map as Map
24 from . import util
25 from .asyncresult import AsyncResult, AsyncMapResult
26 from .dependency import Dependency, dependent
20 27 from .remotefunction import ParallelFunction, parallel, remote
21 28
22 29 #-----------------------------------------------------------------------------
23 30 # Decorators
24 31 #-----------------------------------------------------------------------------
25 32
26 33 @decorator
27 def myblock(f, self, *args, **kwargs):
28 """override client.block with self.block during a call"""
29 block = self.client.block
30 self.client.block = self.block
31 try:
32 ret = f(self, *args, **kwargs)
33 finally:
34 self.client.block = block
35 return ret
36
37 @decorator
38 34 def save_ids(f, self, *args, **kwargs):
39 35 """Keep our history and outstanding attributes up to date after a method call."""
40 36 n_previous = len(self.client.history)
37 try:
41 38 ret = f(self, *args, **kwargs)
39 finally:
42 40 nmsgs = len(self.client.history) - n_previous
43 41 msg_ids = self.client.history[-nmsgs:]
44 42 self.history.extend(msg_ids)
45 43 map(self.outstanding.add, msg_ids)
46 44 return ret
47 45
48 46 @decorator
49 47 def sync_results(f, self, *args, **kwargs):
50 48 """sync relevant results from self.client to our results attribute."""
51 49 ret = f(self, *args, **kwargs)
52 50 delta = self.outstanding.difference(self.client.outstanding)
53 51 completed = self.outstanding.intersection(delta)
54 52 self.outstanding = self.outstanding.difference(completed)
55 53 for msg_id in completed:
56 54 self.results[msg_id] = self.client.results[msg_id]
57 55 return ret
58 56
59 57 @decorator
60 58 def spin_after(f, self, *args, **kwargs):
61 59 """call spin after the method."""
62 60 ret = f(self, *args, **kwargs)
63 61 self.spin()
64 62 return ret
65 63
66 64 #-----------------------------------------------------------------------------
67 65 # Classes
68 66 #-----------------------------------------------------------------------------
69 67
70 68 class View(HasTraits):
71 69 """Base View class for more convenint apply(f,*args,**kwargs) syntax via attributes.
72 70
73 71 Don't use this class, use subclasses.
72
73 Methods
74 -------
75
76 spin
77 flushes incoming results and registration state changes
78 control methods spin, and requesting `ids` also ensures up to date
79
80 wait
81 wait on one or more msg_ids
82
83 execution methods
84 apply
85 legacy: execute, run
86
87 data movement
88 push, pull, scatter, gather
89
90 query methods
91 get_result, queue_status, purge_results, result_status
92
93 control methods
94 abort, shutdown
95
74 96 """
75 97 block=Bool(False)
76 bound=Bool(False)
77 track=Bool(False)
98 track=Bool(True)
78 99 history=List()
79 100 outstanding = Set()
80 101 results = Dict()
81 102 client = Instance('IPython.zmq.parallel.client.Client')
82 103
104 _socket = Instance('zmq.Socket')
83 105 _ntargets = Int(1)
84 _balanced = Bool(False)
85 _default_names = List(['block', 'bound', 'track'])
106 _flag_names = List(['block', 'track'])
86 107 _targets = Any()
108 _idents = Any()
87 109
88 def __init__(self, client=None, targets=None):
89 super(View, self).__init__(client=client)
90 self._targets = targets
110 def __init__(self, client=None, socket=None, targets=None):
111 super(View, self).__init__(client=client, _socket=socket)
91 112 self._ntargets = 1 if isinstance(targets, (int,type(None))) else len(targets)
92 113 self.block = client.block
93 114
94 for name in self._default_names:
115 self._idents, self._targets = self.client._build_targets(targets)
116 if targets is None or isinstance(targets, int):
117 self._targets = targets
118 for name in self._flag_names:
119 # set flags, if they haven't been set yet
95 120 setattr(self, name, getattr(self, name, None))
96 121
97 122 assert not self.__class__ is View, "Don't use base View objects, use subclasses"
98 123
99 124
100 125 def __repr__(self):
101 126 strtargets = str(self._targets)
102 127 if len(strtargets) > 16:
103 128 strtargets = strtargets[:12]+'...]'
104 129 return "<%s %s>"%(self.__class__.__name__, strtargets)
105 130
106 131 @property
107 132 def targets(self):
108 133 return self._targets
109 134
110 135 @targets.setter
111 136 def targets(self, value):
112 137 raise AttributeError("Cannot set View `targets` after construction!")
113 138
114 @property
115 def balanced(self):
116 return self._balanced
117
118 @balanced.setter
119 def balanced(self, value):
120 raise AttributeError("Cannot set View `balanced` after construction!")
121
122 def _defaults(self, *excludes):
123 """return dict of our default attributes, excluding names given."""
124 d = dict(balanced=self._balanced, targets=self._targets)
125 for name in self._default_names:
126 if name not in excludes:
127 d[name] = getattr(self, name)
128 return d
129
130 139 def set_flags(self, **kwargs):
131 140 """set my attribute flags by keyword.
132 141
133 A View is a wrapper for the Client's apply method, but
134 with attributes that specify keyword arguments, those attributes
135 can be set by keyword argument with this method.
142 Views determine behavior with a few attributes (`block`, `track`, etc.).
143 These attributes can be set all at once by name with this method.
136 144
137 145 Parameters
138 146 ----------
139 147
140 148 block : bool
141 149 whether to wait for results
142 bound : bool
143 whether to pass the client's Namespace as the first argument
144 to functions called via `apply`.
145 150 track : bool
146 151 whether to create a MessageTracker to allow the user to
147 152 safely edit after arrays and buffers during non-copying
148 153 sends.
149 154 """
150 for key in kwargs:
151 if key not in self._default_names:
152 raise KeyError("Invalid name: %r"%key)
153 for name in ('block', 'bound'):
154 if name in kwargs:
155 setattr(self, name, kwargs[name])
155 for name, value in kwargs.iteritems():
156 if name not in self._flag_names:
157 raise KeyError("Invalid name: %r"%name)
158 else:
159 setattr(self, name, value)
160
161 @contextmanager
162 def temp_flags(self, **kwargs):
163 """temporarily set flags, for use in `with` statements.
164
165 See set_flags for permanent setting of flags
166
167 Examples
168 --------
169
170 >>> view.track=False
171 ...
172 >>> with view.temp_flags(track=True):
173 ... ar = view.apply(dostuff, my_big_array)
174 ... ar.tracker.wait() # wait for send to finish
175 >>> view.track
176 False
177
178 """
179 # preflight: save flags, and set temporaries
180 saved_flags = {}
181 for f in self._flag_names:
182 saved_flags[f] = getattr(self, f)
183 self.set_flags(**kwargs)
184 # yield to the with-statement block
185 yield
186 # postflight: restore saved flags
187 self.set_flags(**saved_flags)
188
156 189
157 190 #----------------------------------------------------------------
158 # wrappers for client methods:
191 # apply
159 192 #----------------------------------------------------------------
160 @sync_results
161 def spin(self):
162 """spin the client, and sync"""
163 self.client.spin()
164 193
165 194 @sync_results
166 195 @save_ids
196 def _really_apply(self, f, args, kwargs, block=None, **options):
197 """wrapper for client.send_apply_message"""
198 raise NotImplementedError("Implement in subclasses")
199
167 200 def apply(self, f, *args, **kwargs):
168 201 """calls f(*args, **kwargs) on remote engines, returning the result.
169 202
170 This method sets all of `client.apply`'s keyword arguments via this
171 View's attributes.
203 This method sets all apply flags via this View's attributes.
172 204
173 205 if self.block is False:
174 206 returns AsyncResult
175 207 else:
176 208 returns actual result of f(*args, **kwargs)
177 209 """
178 return self.client.apply(f, args, kwargs, **self._defaults())
210 return self._really_apply(f, args, kwargs)
179 211
180 @save_ids
181 212 def apply_async(self, f, *args, **kwargs):
182 213 """calls f(*args, **kwargs) on remote engines in a nonblocking manner.
183 214
184 215 returns AsyncResult
185 216 """
186 d = self._defaults('block', 'bound')
187 return self.client.apply(f,args,kwargs, block=False, bound=False, **d)
217 return self._really_apply(f, args, kwargs, block=False)
188 218
189 219 @spin_after
190 @save_ids
191 220 def apply_sync(self, f, *args, **kwargs):
192 221 """calls f(*args, **kwargs) on remote engines in a blocking manner,
193 222 returning the result.
194 223
195 224 returns: actual result of f(*args, **kwargs)
196 225 """
197 d = self._defaults('block', 'bound', 'track')
198 return self.client.apply(f,args,kwargs, block=True, bound=False, **d)
226 return self._really_apply(f, args, kwargs, block=True)
199 227
200 # @sync_results
201 # @save_ids
202 # def apply_bound(self, f, *args, **kwargs):
203 # """calls f(*args, **kwargs) bound to engine namespace(s).
204 #
205 # if self.block is False:
206 # returns msg_id
207 # else:
208 # returns actual result of f(*args, **kwargs)
209 #
210 # This method has access to the targets' namespace via globals()
211 #
212 # """
213 # d = self._defaults('bound')
214 # return self.client.apply(f, args, kwargs, bound=True, **d)
215 #
228 #----------------------------------------------------------------
229 # wrappers for client and control methods
230 #----------------------------------------------------------------
216 231 @sync_results
217 @save_ids
218 def apply_async_bound(self, f, *args, **kwargs):
219 """calls f(*args, **kwargs) bound to engine namespace(s)
220 in a nonblocking manner.
221
222 The first argument to `f` will be the Engine's Namespace
223
224 returns: AsyncResult
232 def spin(self):
233 """spin the client, and sync"""
234 self.client.spin()
225 235
226 """
227 d = self._defaults('block', 'bound')
228 return self.client.apply(f, args, kwargs, block=False, bound=True, **d)
236 @sync_results
237 def wait(self, jobs=None, timeout=-1):
238 """waits on one or more `jobs`, for up to `timeout` seconds.
229 239
230 @spin_after
231 @save_ids
232 def apply_sync_bound(self, f, *args, **kwargs):
233 """calls f(*args, **kwargs) bound to engine namespace(s), waiting for the result.
240 Parameters
241 ----------
234 242
235 The first argument to `f` will be the Engine's Namespace
243 jobs : int, str, or list of ints and/or strs, or one or more AsyncResult objects
244 ints are indices to self.history
245 strs are msg_ids
246 default: wait on all outstanding messages
247 timeout : float
248 a time in seconds, after which to give up.
249 default is -1, which means no timeout
236 250
237 returns: actual result of f(*args, **kwargs)
251 Returns
252 -------
238 253
254 True : when all msg_ids are done
255 False : timeout reached, some msg_ids still outstanding
239 256 """
240 d = self._defaults('block', 'bound')
241 return self.client.apply(f, args, kwargs, block=True, bound=True, **d)
257 if jobs is None:
258 jobs = self.history
259 return self.client.wait(jobs, timeout)
242 260
243 261 def abort(self, jobs=None, block=None):
244 262 """Abort jobs on my engines.
245 263
246 264 Parameters
247 265 ----------
248 266
249 267 jobs : None, str, list of strs, optional
250 268 if None: abort all jobs.
251 269 else: abort specific msg_id(s).
252 270 """
253 271 block = block if block is not None else self.block
254 272 return self.client.abort(jobs=jobs, targets=self._targets, block=block)
255 273
256 274 def queue_status(self, verbose=False):
257 275 """Fetch the Queue status of my engines"""
258 276 return self.client.queue_status(targets=self._targets, verbose=verbose)
259 277
260 278 def purge_results(self, jobs=[], targets=[]):
261 279 """Instruct the controller to forget specific results."""
262 280 if targets is None or targets == 'all':
263 281 targets = self._targets
264 282 return self.client.purge_results(jobs=jobs, targets=targets)
265 283
266 284 @spin_after
267 285 def get_result(self, indices_or_msg_ids=None):
268 286 """return one or more results, specified by history index or msg_id.
269 287
270 288 See client.get_result for details.
271 289
272 290 """
273 291
274 292 if indices_or_msg_ids is None:
275 293 indices_or_msg_ids = -1
276 294 if isinstance(indices_or_msg_ids, int):
277 295 indices_or_msg_ids = self.history[indices_or_msg_ids]
278 296 elif isinstance(indices_or_msg_ids, (list,tuple,set)):
279 297 indices_or_msg_ids = list(indices_or_msg_ids)
280 298 for i,index in enumerate(indices_or_msg_ids):
281 299 if isinstance(index, int):
282 300 indices_or_msg_ids[i] = self.history[index]
283 301 return self.client.get_result(indices_or_msg_ids)
284 302
285 303 #-------------------------------------------------------------------
286 304 # Map
287 305 #-------------------------------------------------------------------
288 306
289 307 def map(self, f, *sequences, **kwargs):
290 308 """override in subclasses"""
291 309 raise NotImplementedError
292 310
293 311 def map_async(self, f, *sequences, **kwargs):
294 312 """Parallel version of builtin `map`, using this view's engines.
295 313
296 314 This is equivalent to map(...block=False)
297 315
298 316 See `self.map` for details.
299 317 """
300 318 if 'block' in kwargs:
301 319 raise TypeError("map_async doesn't take a `block` keyword argument.")
302 320 kwargs['block'] = False
303 321 return self.map(f,*sequences,**kwargs)
304 322
305 323 def map_sync(self, f, *sequences, **kwargs):
306 324 """Parallel version of builtin `map`, using this view's engines.
307 325
308 326 This is equivalent to map(...block=True)
309 327
310 328 See `self.map` for details.
311 329 """
312 330 if 'block' in kwargs:
313 331 raise TypeError("map_sync doesn't take a `block` keyword argument.")
314 332 kwargs['block'] = True
315 333 return self.map(f,*sequences,**kwargs)
316 334
317 335 def imap(self, f, *sequences, **kwargs):
318 336 """Parallel version of `itertools.imap`.
319 337
320 338 See `self.map` for details.
339
321 340 """
322 341
323 342 return iter(self.map_async(f,*sequences, **kwargs))
324 343
325 344 #-------------------------------------------------------------------
326 345 # Decorators
327 346 #-------------------------------------------------------------------
328 347
329 def remote(self, bound=False, block=True):
348 def remote(self, block=True, **flags):
330 349 """Decorator for making a RemoteFunction"""
331 return remote(self.client, bound=bound, targets=self._targets, block=block, balanced=self._balanced)
350 block = self.block if block is None else block
351 return remote(self, block=block, **flags)
332 352
333 def parallel(self, dist='b', bound=False, block=None):
353 def parallel(self, dist='b', block=None, **flags):
334 354 """Decorator for making a ParallelFunction"""
335 355 block = self.block if block is None else block
336 return parallel(self.client, bound=bound, targets=self._targets, block=block, balanced=self._balanced)
356 return parallel(self, dist=dist, block=block, **flags)
337 357
338 358 @testdec.skip_doctest
339 359 class DirectView(View):
340 360 """Direct Multiplexer View of one or more engines.
341 361
342 362 These are created via indexed access to a client:
343 363
344 364 >>> dv_1 = client[1]
345 365 >>> dv_all = client[:]
346 366 >>> dv_even = client[::2]
347 367 >>> dv_some = client[1:3]
348 368
349 369 This object provides dictionary access to engine namespaces:
350 370
351 371 # push a=5:
352 372 >>> dv['a'] = 5
353 373 # pull 'foo':
354 374 >>> db['foo']
355 375
356 376 """
357 377
358 def __init__(self, client=None, targets=None):
359 super(DirectView, self).__init__(client=client, targets=targets)
360 self._balanced = False
378 def __init__(self, client=None, socket=None, targets=None):
379 super(DirectView, self).__init__(client=client, socket=socket, targets=targets)
361 380
362 @spin_after
381
382 @sync_results
363 383 @save_ids
384 def _really_apply(self, f, args=None, kwargs=None, block=None, track=None):
385 """calls f(*args, **kwargs) on remote engines, returning the result.
386
387 This method sets all of `apply`'s flags via this View's attributes.
388
389 Parameters
390 ----------
391
392 f : callable
393
394 args : list [default: empty]
395
396 kwargs : dict [default: empty]
397
398 block : bool [default: self.block]
399 whether to block
400 track : bool [default: self.track]
401 whether to ask zmq to track the message, for safe non-copying sends
402
403 Returns
404 -------
405
406 if self.block is False:
407 returns AsyncResult
408 else:
409 returns actual result of f(*args, **kwargs) on the engine(s)
410 This will be a list of self.targets is also a list (even length 1), or
411 the single result if self.targets is an integer engine id
412 """
413 args = [] if args is None else args
414 kwargs = {} if kwargs is None else kwargs
415 block = self.block if block is None else block
416 track = self.track if track is None else track
417 msg_ids = []
418 trackers = []
419 for ident in self._idents:
420 msg = self.client.send_apply_message(self._socket, f, args, kwargs, track=track,
421 ident=ident)
422 if track:
423 trackers.append(msg['tracker'])
424 msg_ids.append(msg['msg_id'])
425 tracker = None if track is False else zmq.MessageTracker(*trackers)
426 ar = AsyncResult(self.client, msg_ids, fname=f.__name__, targets=self._targets, tracker=tracker)
427 if block:
428 try:
429 return ar.get()
430 except KeyboardInterrupt:
431 pass
432 return ar
433
434 @spin_after
364 435 def map(self, f, *sequences, **kwargs):
365 """view.map(f, *sequences, block=self.block, bound=self.bound) => list|AsyncMapResult
436 """view.map(f, *sequences, block=self.block) => list|AsyncMapResult
366 437
367 438 Parallel version of builtin `map`, using this View's `targets`.
368 439
369 440 There will be one task per target, so work will be chunked
370 441 if the sequences are longer than `targets`.
371 442
372 443 Results can be iterated as they are ready, but will become available in chunks.
373 444
374 445 Parameters
375 446 ----------
376 447
377 448 f : callable
378 449 function to be mapped
379 450 *sequences: one or more sequences of matching length
380 451 the sequences to be distributed and passed to `f`
381 452 block : bool
382 453 whether to wait for the result or not [default self.block]
383 bound : bool
384 whether to pass the client's Namespace as the first argument to `f`
385 454
386 455 Returns
387 456 -------
388 457
389 458 if block=False:
390 459 AsyncMapResult
391 460 An object like AsyncResult, but which reassembles the sequence of results
392 461 into a single list. AsyncMapResults can be iterated through before all
393 462 results are complete.
394 463 else:
395 464 list
396 465 the result of map(f,*sequences)
397 466 """
398 467
399 block = kwargs.get('block', self.block)
400 bound = kwargs.get('bound', self.bound)
468 block = kwargs.pop('block', self.block)
401 469 for k in kwargs.keys():
402 if k not in ['block', 'bound']:
470 if k not in ['block', 'track']:
403 471 raise TypeError("invalid keyword arg, %r"%k)
404 472
405 473 assert len(sequences) > 0, "must have some sequences to map onto!"
406 pf = ParallelFunction(self.client, f, block=block, bound=bound,
407 targets=self._targets, balanced=False)
474 pf = ParallelFunction(self, f, block=block, **kwargs)
408 475 return pf.map(*sequences)
409 476
410 @sync_results
411 @save_ids
412 477 def execute(self, code, block=None):
413 """execute some code on my targets."""
478 """Executes `code` on `targets` in blocking or nonblocking manner.
414 479
415 block = block if block is not None else self.block
480 ``execute`` is always `bound` (affects engine namespace)
416 481
417 return self.client.execute(code, block=block, targets=self._targets)
482 Parameters
483 ----------
418 484
419 @sync_results
420 @save_ids
421 def run(self, fname, block=None):
422 """execute the code in a file on my targets."""
485 code : str
486 the code string to be executed
487 block : bool
488 whether or not to wait until done to return
489 default: self.block
490 """
491 return self._really_apply(util._execute, args=(code,), block=block)
423 492
424 block = block if block is not None else self.block
493 def run(self, filename, block=None):
494 """Execute contents of `filename` on my engine(s).
495
496 This simply reads the contents of the file and calls `execute`.
497
498 Parameters
499 ----------
500
501 filename : str
502 The path to the file
503 targets : int/str/list of ints/strs
504 the engines on which to execute
505 default : all
506 block : bool
507 whether or not to wait until done
508 default: self.block
425 509
426 return self.client.run(fname, block=block, targets=self._targets)
510 """
511 with open(filename, 'r') as f:
512 # add newline in case of trailing indented whitespace
513 # which will cause SyntaxError
514 code = f.read()+'\n'
515 return self.execute(code, block=block)
427 516
428 517 def update(self, ns):
429 """update remote namespace with dict `ns`"""
430 return self.client.push(ns, targets=self._targets, block=self.block)
518 """update remote namespace with dict `ns`
431 519
432 def push(self, ns, block=None):
433 """update remote namespace with dict `ns`"""
520 See `push` for details.
521 """
522 return self.push(ns, block=self.block, track=self.track)
434 523
435 block = block if block is not None else self.block
524 def push(self, ns, block=None, track=None):
525 """update remote namespace with dict `ns`
436 526
437 return self.client.push(ns, targets=self._targets, block=block)
527 Parameters
528 ----------
529
530 ns : dict
531 dict of keys with which to update engine namespace(s)
532 block : bool [default : self.block]
533 whether to wait to be notified of engine receipt
534
535 """
536
537 block = block if block is not None else self.block
538 track = track if track is not None else self.track
539 # applier = self.apply_sync if block else self.apply_async
540 if not isinstance(ns, dict):
541 raise TypeError("Must be a dict, not %s"%type(ns))
542 return self._really_apply(util._push, (ns,),block=block, track=track)
438 543
439 544 def get(self, key_s):
440 545 """get object(s) by `key_s` from remote namespace
441 will return one object if it is a key.
442 It also takes a list of keys, and will return a list of objects."""
546
547 see `pull` for details.
548 """
443 549 # block = block if block is not None else self.block
444 return self.client.pull(key_s, block=True, targets=self._targets)
550 return self.pull(key_s, block=True)
551
552 def pull(self, names, block=True):
553 """get object(s) by `name` from remote namespace
445 554
446 @sync_results
447 @save_ids
448 def pull(self, key_s, block=True):
449 """get object(s) by `key_s` from remote namespace
450 555 will return one object if it is a key.
451 It also takes a list of keys, and will return a list of objects."""
556 can also take a list of keys, in which case it will return a list of objects.
557 """
452 558 block = block if block is not None else self.block
453 return self.client.pull(key_s, block=block, targets=self._targets)
559 applier = self.apply_sync if block else self.apply_async
560 if isinstance(names, basestring):
561 pass
562 elif isinstance(names, (list,tuple,set)):
563 for key in names:
564 if not isinstance(key, basestring):
565 raise TypeError("keys must be str, not type %r"%type(key))
566 else:
567 raise TypeError("names must be strs, not %r"%names)
568 return applier(util._pull, names)
454 569
455 def scatter(self, key, seq, dist='b', flatten=False, block=None):
570 def scatter(self, key, seq, dist='b', flatten=False, block=None, track=None):
456 571 """
457 572 Partition a Python sequence and send the partitions to a set of engines.
458 573 """
459 574 block = block if block is not None else self.block
575 track = track if track is not None else self.track
576 targets = self._targets
577 mapObject = Map.dists[dist]()
578 nparts = len(targets)
579 msg_ids = []
580 trackers = []
581 for index, engineid in enumerate(targets):
582 push = self.client[engineid].push
583 partition = mapObject.getPartition(seq, index, nparts)
584 if flatten and len(partition) == 1:
585 r = push({key: partition[0]}, block=False, track=track)
586 else:
587 r = push({key: partition},block=False, track=track)
588 msg_ids.extend(r.msg_ids)
589 if track:
590 trackers.append(r._tracker)
460 591
461 return self.client.scatter(key, seq, dist=dist, flatten=flatten,
462 targets=self._targets, block=block)
592 if track:
593 tracker = zmq.MessageTracker(*trackers)
594 else:
595 tracker = None
596
597 r = AsyncResult(self.client, msg_ids, fname='scatter', targets=targets, tracker=tracker)
598 if block:
599 r.wait()
600 else:
601 return r
463 602
464 603 @sync_results
465 604 @save_ids
466 605 def gather(self, key, dist='b', block=None):
467 606 """
468 607 Gather a partitioned sequence on a set of engines as a single local seq.
469 608 """
470 609 block = block if block is not None else self.block
610 mapObject = Map.dists[dist]()
611 msg_ids = []
612 for index, engineid in enumerate(self._targets):
471 613
472 return self.client.gather(key, dist=dist, targets=self._targets, block=block)
614 msg_ids.extend(self.client[engineid].pull(key, block=False).msg_ids)
615
616 r = AsyncMapResult(self.client, msg_ids, mapObject, fname='gather')
617
618 if block:
619 try:
620 return r.get()
621 except KeyboardInterrupt:
622 pass
623 return r
473 624
474 625 def __getitem__(self, key):
475 626 return self.get(key)
476 627
477 628 def __setitem__(self,key, value):
478 629 self.update({key:value})
479 630
480 631 def clear(self, block=False):
481 632 """Clear the remote namespaces on my engines."""
482 633 block = block if block is not None else self.block
483 634 return self.client.clear(targets=self._targets, block=block)
484 635
485 636 def kill(self, block=True):
486 637 """Kill my engines."""
487 638 block = block if block is not None else self.block
488 639 return self.client.kill(targets=self._targets, block=block)
489 640
490 641 #----------------------------------------
491 642 # activate for %px,%autopx magics
492 643 #----------------------------------------
493 644 def activate(self):
494 645 """Make this `View` active for parallel magic commands.
495 646
496 647 IPython has a magic command syntax to work with `MultiEngineClient` objects.
497 648 In a given IPython session there is a single active one. While
498 649 there can be many `Views` created and used by the user,
499 650 there is only one active one. The active `View` is used whenever
500 651 the magic commands %px and %autopx are used.
501 652
502 653 The activate() method is called on a given `View` to make it
503 654 active. Once this has been done, the magic commands can be used.
504 655 """
505 656
506 657 try:
507 658 # This is injected into __builtins__.
508 659 ip = get_ipython()
509 660 except NameError:
510 661 print "The IPython parallel magics (%result, %px, %autopx) only work within IPython."
511 662 else:
512 663 pmagic = ip.plugin_manager.get_plugin('parallelmagic')
513 664 if pmagic is not None:
514 665 pmagic.active_multiengine_client = self
515 666 else:
516 667 print "You must first load the parallelmagic extension " \
517 668 "by doing '%load_ext parallelmagic'"
518 669
519 670
520 671 @testdec.skip_doctest
521 672 class LoadBalancedView(View):
522 673 """An load-balancing View that only executes via the Task scheduler.
523 674
524 675 Load-balanced views can be created with the client's `view` method:
525 676
526 >>> v = client.view(balanced=True)
677 >>> v = client.load_balanced_view()
527 678
528 679 or targets can be specified, to restrict the potential destinations:
529 680
530 >>> v = client.view([1,3],balanced=True)
681 >>> v = client.client.load_balanced_view(([1,3])
531 682
532 683 which would restrict loadbalancing to between engines 1 and 3.
533 684
534 685 """
535 686
536 _default_names = ['block', 'bound', 'follow', 'after', 'timeout']
687 _flag_names = ['block', 'track', 'follow', 'after', 'timeout']
537 688
538 def __init__(self, client=None, targets=None):
539 super(LoadBalancedView, self).__init__(client=client, targets=targets)
689 def __init__(self, client=None, socket=None, targets=None):
690 super(LoadBalancedView, self).__init__(client=client, socket=socket, targets=targets)
540 691 self._ntargets = 1
541 self._balanced = True
692 self._task_scheme=client._task_scheme
693 if targets is None:
694 self._targets = None
695 self._idents=[]
542 696
543 697 def _validate_dependency(self, dep):
544 698 """validate a dependency.
545 699
546 700 For use in `set_flags`.
547 701 """
548 702 if dep is None or isinstance(dep, (str, AsyncResult, Dependency)):
549 703 return True
550 704 elif isinstance(dep, (list,set, tuple)):
551 705 for d in dep:
552 if not isinstance(d, str, AsyncResult):
706 if not isinstance(d, (str, AsyncResult)):
553 707 return False
554 708 elif isinstance(dep, dict):
555 709 if set(dep.keys()) != set(Dependency().as_dict().keys()):
556 710 return False
557 711 if not isinstance(dep['msg_ids'], list):
558 712 return False
559 713 for d in dep['msg_ids']:
560 714 if not isinstance(d, str):
561 715 return False
562 716 else:
563 717 return False
564 718
719 return True
720
721 def _render_dependency(self, dep):
722 """helper for building jsonable dependencies from various input forms."""
723 if isinstance(dep, Dependency):
724 return dep.as_dict()
725 elif isinstance(dep, AsyncResult):
726 return dep.msg_ids
727 elif dep is None:
728 return []
729 else:
730 # pass to Dependency constructor
731 return list(Dependency(dep))
732
565 733 def set_flags(self, **kwargs):
566 734 """set my attribute flags by keyword.
567 735
568 736 A View is a wrapper for the Client's apply method, but with attributes
569 737 that specify keyword arguments, those attributes can be set by keyword
570 738 argument with this method.
571 739
572 740 Parameters
573 741 ----------
574 742
575 743 block : bool
576 744 whether to wait for results
577 bound : bool
578 whether to pass the client's Namespace as the first argument
579 to functions called via `apply`.
580 745 track : bool
581 746 whether to create a MessageTracker to allow the user to
582 747 safely edit after arrays and buffers during non-copying
583 748 sends.
584 follow : Dependency, list, msg_id, AsyncResult
585 the location dependencies of tasks
586 after : Dependency, list, msg_id, AsyncResult
587 the time dependencies of tasks
588 timeout : int,None
589 the timeout to be used for tasks
749 #
750 after : Dependency or collection of msg_ids
751 Only for load-balanced execution (targets=None)
752 Specify a list of msg_ids as a time-based dependency.
753 This job will only be run *after* the dependencies
754 have been met.
755
756 follow : Dependency or collection of msg_ids
757 Only for load-balanced execution (targets=None)
758 Specify a list of msg_ids as a location-based dependency.
759 This job will only be run on an engine where this dependency
760 is met.
761
762 timeout : float/int or None
763 Only for load-balanced execution (targets=None)
764 Specify an amount of time (in seconds) for the scheduler to
765 wait for dependencies to be met before failing with a
766 DependencyTimeout.
590 767 """
591 768
592 769 super(LoadBalancedView, self).set_flags(**kwargs)
593 770 for name in ('follow', 'after'):
594 771 if name in kwargs:
595 772 value = kwargs[name]
596 773 if self._validate_dependency(value):
597 774 setattr(self, name, value)
598 775 else:
599 776 raise ValueError("Invalid dependency: %r"%value)
600 777 if 'timeout' in kwargs:
601 778 t = kwargs['timeout']
602 if not isinstance(t, (int, long, float, None)):
779 if not isinstance(t, (int, long, float, type(None))):
603 780 raise TypeError("Invalid type for timeout: %r"%type(t))
604 781 if t is not None:
605 782 if t < 0:
606 783 raise ValueError("Invalid timeout: %s"%t)
607 784 self.timeout = t
608 785
786 @sync_results
787 @save_ids
788 def _really_apply(self, f, args=None, kwargs=None, block=None, track=None,
789 after=None, follow=None, timeout=None):
790 """calls f(*args, **kwargs) on a remote engine, returning the result.
791
792 This method temporarily sets all of `apply`'s flags for a single call.
793
794 Parameters
795 ----------
796
797 f : callable
798
799 args : list [default: empty]
800
801 kwargs : dict [default: empty]
802
803 block : bool [default: self.block]
804 whether to block
805 track : bool [default: self.track]
806 whether to ask zmq to track the message, for safe non-copying sends
807
808 !!!!!! TODO: THE REST HERE !!!!
809
810 Returns
811 -------
812
813 if self.block is False:
814 returns AsyncResult
815 else:
816 returns actual result of f(*args, **kwargs) on the engine(s)
817 This will be a list of self.targets is also a list (even length 1), or
818 the single result if self.targets is an integer engine id
819 """
820
821 # validate whether we can run
822 if self._socket.closed:
823 msg = "Task farming is disabled"
824 if self._task_scheme == 'pure':
825 msg += " because the pure ZMQ scheduler cannot handle"
826 msg += " disappearing engines."
827 raise RuntimeError(msg)
828
829 if self._task_scheme == 'pure':
830 # pure zmq scheme doesn't support dependencies
831 msg = "Pure ZMQ scheduler doesn't support dependencies"
832 if (follow or after):
833 # hard fail on DAG dependencies
834 raise RuntimeError(msg)
835 if isinstance(f, dependent):
836 # soft warn on functional dependencies
837 warnings.warn(msg, RuntimeWarning)
838
839 # build args
840 args = [] if args is None else args
841 kwargs = {} if kwargs is None else kwargs
842 block = self.block if block is None else block
843 track = self.track if track is None else track
844 after = self.after if after is None else after
845 follow = self.follow if follow is None else follow
846 timeout = self.timeout if timeout is None else timeout
847 after = self._render_dependency(after)
848 follow = self._render_dependency(follow)
849 subheader = dict(after=after, follow=follow, timeout=timeout, targets=self._idents)
850
851 msg = self.client.send_apply_message(self._socket, f, args, kwargs, track=track,
852 subheader=subheader)
853 tracker = None if track is False else msg['tracker']
854
855 ar = AsyncResult(self.client, msg['msg_id'], fname=f.__name__, targets=None, tracker=tracker)
856
857 if block:
858 try:
859 return ar.get()
860 except KeyboardInterrupt:
861 pass
862 return ar
863
609 864 @spin_after
610 865 @save_ids
611 866 def map(self, f, *sequences, **kwargs):
612 """view.map(f, *sequences, block=self.block, bound=self.bound, chunk_size=1) => list|AsyncMapResult
867 """view.map(f, *sequences, block=self.block, chunksize=1) => list|AsyncMapResult
613 868
614 869 Parallel version of builtin `map`, load-balanced by this View.
615 870
616 `block`, `bound`, and `chunk_size` can be specified by keyword only.
871 `block`, and `chunksize` can be specified by keyword only.
617 872
618 Each `chunk_size` elements will be a separate task, and will be
873 Each `chunksize` elements will be a separate task, and will be
619 874 load-balanced. This lets individual elements be available for iteration
620 875 as soon as they arrive.
621 876
622 877 Parameters
623 878 ----------
624 879
625 880 f : callable
626 881 function to be mapped
627 882 *sequences: one or more sequences of matching length
628 883 the sequences to be distributed and passed to `f`
629 884 block : bool
630 885 whether to wait for the result or not [default self.block]
631 bound : bool
632 whether to pass the client's Namespace as the first argument to `f`
633 886 track : bool
634 887 whether to create a MessageTracker to allow the user to
635 888 safely edit after arrays and buffers during non-copying
636 889 sends.
637 chunk_size : int
890 chunksize : int
638 891 how many elements should be in each task [default 1]
639 892
640 893 Returns
641 894 -------
642 895
643 896 if block=False:
644 897 AsyncMapResult
645 898 An object like AsyncResult, but which reassembles the sequence of results
646 899 into a single list. AsyncMapResults can be iterated through before all
647 900 results are complete.
648 901 else:
649 902 the result of map(f,*sequences)
650 903
651 904 """
652 905
653 906 # default
654 907 block = kwargs.get('block', self.block)
655 bound = kwargs.get('bound', self.bound)
656 chunk_size = kwargs.get('chunk_size', 1)
908 chunksize = kwargs.get('chunksize', 1)
657 909
658 910 keyset = set(kwargs.keys())
659 extra_keys = keyset.difference_update(set(['block', 'bound', 'chunk_size']))
911 extra_keys = keyset.difference_update(set(['block', 'chunksize']))
660 912 if extra_keys:
661 913 raise TypeError("Invalid kwargs: %s"%list(extra_keys))
662 914
663 915 assert len(sequences) > 0, "must have some sequences to map onto!"
664 916
665 pf = ParallelFunction(self.client, f, block=block, bound=bound,
666 targets=self._targets, balanced=True,
667 chunk_size=chunk_size)
917 pf = ParallelFunction(self, f, block=block, chunksize=chunksize)
668 918 return pf.map(*sequences)
669 919
670 920 __all__ = ['LoadBalancedView', 'DirectView'] No newline at end of file
@@ -1,119 +1,120 b''
1 1 """Example for generating an arbitrary DAG as a dependency map.
2 2
3 3 This demo uses networkx to generate the graph.
4 4
5 5 Authors
6 6 -------
7 7 * MinRK
8 8 """
9 9 import networkx as nx
10 10 from random import randint, random
11 11 from IPython.zmq.parallel import client as cmod
12 12
13 13 def randomwait():
14 14 import time
15 15 from random import random
16 16 time.sleep(random())
17 17 return time.time()
18 18
19 19
20 20 def random_dag(nodes, edges):
21 21 """Generate a random Directed Acyclic Graph (DAG) with a given number of nodes and edges."""
22 22 G = nx.DiGraph()
23 23 for i in range(nodes):
24 24 G.add_node(i)
25 25 while edges > 0:
26 26 a = randint(0,nodes-1)
27 27 b=a
28 28 while b==a:
29 29 b = randint(0,nodes-1)
30 30 G.add_edge(a,b)
31 31 if nx.is_directed_acyclic_graph(G):
32 32 edges -= 1
33 33 else:
34 34 # we closed a loop!
35 35 G.remove_edge(a,b)
36 36 return G
37 37
38 38 def add_children(G, parent, level, n=2):
39 39 """Add children recursively to a binary tree."""
40 40 if level == 0:
41 41 return
42 42 for i in range(n):
43 43 child = parent+str(i)
44 44 G.add_node(child)
45 45 G.add_edge(parent,child)
46 46 add_children(G, child, level-1, n)
47 47
48 48 def make_bintree(levels):
49 49 """Make a symmetrical binary tree with @levels"""
50 50 G = nx.DiGraph()
51 51 root = '0'
52 52 G.add_node(root)
53 53 add_children(G, root, levels, 2)
54 54 return G
55 55
56 def submit_jobs(client, G, jobs):
56 def submit_jobs(view, G, jobs):
57 57 """Submit jobs via client where G describes the time dependencies."""
58 58 results = {}
59 59 for node in nx.topological_sort(G):
60 deps = [ results[n] for n in G.predecessors(node) ]
61 results[node] = client.apply(jobs[node], after=deps)
60 with view.temp_flags(after=[ results[n] for n in G.predecessors(node) ]):
61 results[node] = view.apply(jobs[node])
62 62 return results
63 63
64 64 def validate_tree(G, results):
65 65 """Validate that jobs executed after their dependencies."""
66 66 for node in G:
67 67 started = results[node].metadata.started
68 68 for parent in G.predecessors(node):
69 69 finished = results[parent].metadata.completed
70 70 assert started > finished, "%s should have happened after %s"%(node, parent)
71 71
72 72 def main(nodes, edges):
73 73 """Generate a random graph, submit jobs, then validate that the
74 74 dependency order was enforced.
75 75 Finally, plot the graph, with time on the x-axis, and
76 76 in-degree on the y (just for spread). All arrows must
77 77 point at least slightly to the right if the graph is valid.
78 78 """
79 import pylab
79 from matplotlib import pyplot as plt
80 80 from matplotlib.dates import date2num
81 81 from matplotlib.cm import gist_rainbow
82 82 print "building DAG"
83 83 G = random_dag(nodes, edges)
84 84 jobs = {}
85 85 pos = {}
86 86 colors = {}
87 87 for node in G:
88 88 jobs[node] = randomwait
89 89
90 90 client = cmod.Client()
91 view = client.load_balanced_view()
91 92 print "submitting %i tasks with %i dependencies"%(nodes,edges)
92 results = submit_jobs(client, G, jobs)
93 results = submit_jobs(view, G, jobs)
93 94 print "waiting for results"
94 client.barrier()
95 view.wait()
95 96 print "done"
96 97 for node in G:
97 98 md = results[node].metadata
98 99 start = date2num(md.started)
99 100 runtime = date2num(md.completed) - start
100 101 pos[node] = (start, runtime)
101 102 colors[node] = md.engine_id
102 103 validate_tree(G, results)
103 104 nx.draw(G, pos, node_list=colors.keys(), node_color=colors.values(), cmap=gist_rainbow,
104 105 with_labels=False)
105 106 x,y = zip(*pos.values())
106 107 xmin,ymin = map(min, (x,y))
107 108 xmax,ymax = map(max, (x,y))
108 109 xscale = xmax-xmin
109 110 yscale = ymax-ymin
110 pylab.xlim(xmin-xscale*.1,xmax+xscale*.1)
111 pylab.ylim(ymin-yscale*.1,ymax+yscale*.1)
111 plt.xlim(xmin-xscale*.1,xmax+xscale*.1)
112 plt.ylim(ymin-yscale*.1,ymax+yscale*.1)
112 113 return G,results
113 114
114 115 if __name__ == '__main__':
115 import pylab
116 from matplotlib import pyplot as plt
116 117 # main(5,10)
117 118 main(32,96)
118 pylab.show()
119 plt.show()
119 120 No newline at end of file
@@ -1,118 +1,130 b''
1 1 from IPython.zmq.parallel import error
2 2 from IPython.zmq.parallel.dependency import Dependency
3 3 from IPython.zmq.parallel.client import *
4 4
5 5 client = Client()
6 6
7 7 # this will only run on machines that can import numpy:
8 8 @require('numpy')
9 9 def norm(A):
10 10 from numpy.linalg import norm
11 11 return norm(A,2)
12 12
13 13 def checkpid(pid):
14 14 """return the pid of the engine"""
15 15 import os
16 16 return os.getpid() == pid
17 17
18 18 def checkhostname(host):
19 19 import socket
20 20 return socket.gethostname() == host
21 21
22 22 def getpid():
23 23 import os
24 24 return os.getpid()
25 25
26 26 pid0 = client[0].apply_sync(getpid)
27 27
28 28 # this will depend on the pid being that of target 0:
29 29 @depend(checkpid, pid0)
30 30 def getpid2():
31 31 import os
32 32 return os.getpid()
33 33
34 view = client[None]
34 view = client.load_balanced_view()
35 35 view.block=True
36 36
37 37 # will run on anything:
38 38 pids1 = [ view.apply(getpid) for i in range(len(client.ids)) ]
39 39 print pids1
40 40 # will only run on e0:
41 41 pids2 = [ view.apply(getpid2) for i in range(len(client.ids)) ]
42 42 print pids2
43 43
44 44 print "now test some dependency behaviors"
45 45
46 46 def wait(t):
47 47 import time
48 48 time.sleep(t)
49 49 return t
50 50
51 51 # fail after some time:
52 52 def wait_and_fail(t):
53 53 import time
54 54 time.sleep(t)
55 55 return 1/0
56 56
57 57 successes = [ view.apply_async(wait, 1).msg_ids[0] for i in range(len(client.ids)) ]
58 58 failures = [ view.apply_async(wait_and_fail, 1).msg_ids[0] for i in range(len(client.ids)) ]
59 59
60 60 mixed = [failures[0],successes[0]]
61 d1a = Dependency(mixed, mode='any', success_only=False) # yes
62 d1b = Dependency(mixed, mode='any', success_only=True) # yes
63 d2a = Dependency(mixed, mode='all', success_only=False) # yes after / no follow
64 d2b = Dependency(mixed, mode='all', success_only=True) # no
65 d3 = Dependency(failures, mode='any', success_only=True) # no
66 d4 = Dependency(failures, mode='any', success_only=False) # yes
67 d5 = Dependency(failures, mode='all', success_only=False) # yes after / no follow
68 d6 = Dependency(successes, mode='all', success_only=False) # yes after / no follow
69
70 client.block = False
71
72 r1a = client.apply(getpid, after=d1a)
73 r1b = client.apply(getpid, follow=d1b)
74 r2a = client.apply(getpid, after=d2b, follow=d2a)
75 r2b = client.apply(getpid, after=d2a, follow=d2b)
76 r3 = client.apply(getpid, after=d3)
77 r4a = client.apply(getpid, after=d4)
78 r4b = client.apply(getpid, follow=d4)
79 r4c = client.apply(getpid, after=d3, follow=d4)
80 r5 = client.apply(getpid, after=d5)
81 r5b = client.apply(getpid, follow=d5, after=d3)
82 r6 = client.apply(getpid, follow=d6)
83 r6b = client.apply(getpid, after=d6, follow=d2b)
61 d1a = Dependency(mixed, all=False, failure=True) # yes
62 d1b = Dependency(mixed, all=False) # yes
63 d2a = Dependency(mixed, all=True, failure=True) # yes after / no follow
64 d2b = Dependency(mixed, all=True) # no
65 d3 = Dependency(failures, all=False) # no
66 d4 = Dependency(failures, all=False, failure=True) # yes
67 d5 = Dependency(failures, all=True, failure=True) # yes after / no follow
68 d6 = Dependency(successes, all=True, failure=True) # yes after / no follow
69
70 view.block = False
71 flags = view.temp_flags
72 with flags(after=d1a):
73 r1a = view.apply(getpid)
74 with flags(follow=d1b):
75 r1b = view.apply(getpid)
76 with flags(after=d2b, follow=d2a):
77 r2a = view.apply(getpid)
78 with flags(after=d2a, follow=d2b):
79 r2b = view.apply(getpid)
80 with flags(after=d3):
81 r3 = view.apply(getpid)
82 with flags(after=d4):
83 r4a = view.apply(getpid)
84 with flags(follow=d4):
85 r4b = view.apply(getpid)
86 with flags(after=d3, follow=d4):
87 r4c = view.apply(getpid)
88 with flags(after=d5):
89 r5 = view.apply(getpid)
90 with flags(follow=d5, after=d3):
91 r5b = view.apply(getpid)
92 with flags(follow=d6):
93 r6 = view.apply(getpid)
94 with flags(after=d6, follow=d2b):
95 r6b = view.apply(getpid)
84 96
85 97 def should_fail(f):
86 98 try:
87 99 f()
88 100 except error.KernelError:
89 101 pass
90 102 else:
91 103 print 'should have raised'
92 104 # raise Exception("should have raised")
93 105
94 106 # print r1a.msg_ids
95 107 r1a.get()
96 108 # print r1b.msg_ids
97 109 r1b.get()
98 110 # print r2a.msg_ids
99 111 should_fail(r2a.get)
100 112 # print r2b.msg_ids
101 113 should_fail(r2b.get)
102 114 # print r3.msg_ids
103 115 should_fail(r3.get)
104 116 # print r4a.msg_ids
105 117 r4a.get()
106 118 # print r4b.msg_ids
107 119 r4b.get()
108 120 # print r4c.msg_ids
109 121 should_fail(r4c.get)
110 122 # print r5.msg_ids
111 123 r5.get()
112 124 # print r5b.msg_ids
113 125 should_fail(r5b.get)
114 126 # print r6.msg_ids
115 127 should_fail(r6.get) # assuming > 1 engine
116 128 # print r6b.msg_ids
117 129 should_fail(r6b.get)
118 130 print 'done'
@@ -1,36 +1,37 b''
1 1 from IPython.zmq.parallel.client import *
2 2
3 3 client = Client()
4 view = client[:]
4 5
5 @remote(client, block=True)
6 @view.remote(block=True)
6 7 def square(a):
7 8 """return square of a number"""
8 9 return a*a
9 10
10 11 squares = map(square, range(42))
11 12
12 13 # but that blocked between each result; not exactly useful
13 14
14 15 square.block = False
15 16
16 17 arlist = map(square, range(42))
17 18 # submitted very fast
18 19
19 20 # wait for the results:
20 21 squares2 = [ r.get() for r in arlist ]
21 22
22 23 # now the more convenient @parallel decorator, which has a map method:
23 24
24 @parallel(client, block=False)
25 @view.parallel(block=False)
25 26 def psquare(a):
26 27 """return square of a number"""
27 28 return a*a
28 29
29 30 # this chunks the data into n-negines jobs, not 42 jobs:
30 31 ar = psquare.map(range(42))
31 32
32 33 # wait for the results to be done:
33 34 squares3 = ar.get()
34 35
35 36 print squares == squares2, squares3==squares
36 37 # True No newline at end of file
@@ -1,15 +1,15 b''
1 1 from IPython.zmq.parallel.client import *
2 2
3 3 client = Client()
4 4
5 5 for id in client.ids:
6 client.push(dict(ids=id*id), targets=id)
6 client[id].push(dict(ids=id*id))
7 7
8 rns = client[0]
9 rns['a'] = 5
8 v = client[0]
9 v['a'] = 5
10 10
11 print rns['a']
11 print v['a']
12 12
13 13 remotes = client[:]
14 14
15 15 print remotes['ids'] No newline at end of file
@@ -1,144 +1,144 b''
1 1 #!/usr/bin/env python
2 2 """Run a Monte-Carlo options pricer in parallel."""
3 3
4 4 #-----------------------------------------------------------------------------
5 5 # Imports
6 6 #-----------------------------------------------------------------------------
7 7
8 8 import sys
9 9 import time
10 10 from IPython.zmq.parallel import client
11 11 import numpy as np
12 12 from mcpricer import price_options
13 13 from matplotlib import pyplot as plt
14 14
15 15 #-----------------------------------------------------------------------------
16 16 # Setup parameters for the run
17 17 #-----------------------------------------------------------------------------
18 18
19 19 def ask_question(text, the_type, default):
20 20 s = '%s [%r]: ' % (text, the_type(default))
21 21 result = raw_input(s)
22 22 if result:
23 23 return the_type(result)
24 24 else:
25 25 return the_type(default)
26 26
27 27 cluster_profile = ask_question("Cluster profile", str, "default")
28 28 price = ask_question("Initial price", float, 100.0)
29 29 rate = ask_question("Interest rate", float, 0.05)
30 30 days = ask_question("Days to expiration", int, 260)
31 31 paths = ask_question("Number of MC paths", int, 10000)
32 32 n_strikes = ask_question("Number of strike values", int, 5)
33 33 min_strike = ask_question("Min strike price", float, 90.0)
34 34 max_strike = ask_question("Max strike price", float, 110.0)
35 35 n_sigmas = ask_question("Number of volatility values", int, 5)
36 36 min_sigma = ask_question("Min volatility", float, 0.1)
37 37 max_sigma = ask_question("Max volatility", float, 0.4)
38 38
39 39 strike_vals = np.linspace(min_strike, max_strike, n_strikes)
40 40 sigma_vals = np.linspace(min_sigma, max_sigma, n_sigmas)
41 41
42 42 #-----------------------------------------------------------------------------
43 43 # Setup for parallel calculation
44 44 #-----------------------------------------------------------------------------
45 45
46 46 # The Client is used to setup the calculation and works with all
47 47 # engines.
48 48 c = client.Client(profile=cluster_profile)
49 49
50 50 # A LoadBalancedView is an interface to the engines that provides dynamic load
51 51 # balancing at the expense of not knowing which engine will execute the code.
52 view = c.view()
52 view = c.load_balanced_view()
53 53
54 54 # Initialize the common code on the engines. This Python module has the
55 55 # price_options function that prices the options.
56 56
57 57 #-----------------------------------------------------------------------------
58 58 # Perform parallel calculation
59 59 #-----------------------------------------------------------------------------
60 60
61 61 print "Running parallel calculation over strike prices and volatilities..."
62 62 print "Strike prices: ", strike_vals
63 63 print "Volatilities: ", sigma_vals
64 64 sys.stdout.flush()
65 65
66 66 # Submit tasks to the TaskClient for each (strike, sigma) pair as a MapTask.
67 67 t1 = time.time()
68 68 async_results = []
69 69 for strike in strike_vals:
70 70 for sigma in sigma_vals:
71 71 ar = view.apply_async(price_options, price, strike, sigma, rate, days, paths)
72 72 async_results.append(ar)
73 73
74 74 print "Submitted tasks: ", len(async_results)
75 75 sys.stdout.flush()
76 76
77 77 # Block until all tasks are completed.
78 c.barrier(async_results)
78 c.wait(async_results)
79 79 t2 = time.time()
80 80 t = t2-t1
81 81
82 82 print "Parallel calculation completed, time = %s s" % t
83 83 print "Collecting results..."
84 84
85 85 # Get the results using TaskClient.get_task_result.
86 86 results = [ar.get() for ar in async_results]
87 87
88 88 # Assemble the result into a structured NumPy array.
89 89 prices = np.empty(n_strikes*n_sigmas,
90 90 dtype=[('ecall',float),('eput',float),('acall',float),('aput',float)]
91 91 )
92 92
93 93 for i, price in enumerate(results):
94 94 prices[i] = tuple(price)
95 95
96 96 prices.shape = (n_strikes, n_sigmas)
97 97 strike_mesh, sigma_mesh = np.meshgrid(strike_vals, sigma_vals)
98 98
99 99 print "Results are available: strike_mesh, sigma_mesh, prices"
100 100 print "To plot results type 'plot_options(sigma_mesh, strike_mesh, prices)'"
101 101
102 102 #-----------------------------------------------------------------------------
103 103 # Utilities
104 104 #-----------------------------------------------------------------------------
105 105
106 106 def plot_options(sigma_mesh, strike_mesh, prices):
107 107 """
108 108 Make a contour plot of the option price in (sigma, strike) space.
109 109 """
110 110 plt.figure(1)
111 111
112 112 plt.subplot(221)
113 113 plt.contourf(sigma_mesh, strike_mesh, prices['ecall'])
114 114 plt.axis('tight')
115 115 plt.colorbar()
116 116 plt.title('European Call')
117 117 plt.ylabel("Strike Price")
118 118
119 119 plt.subplot(222)
120 120 plt.contourf(sigma_mesh, strike_mesh, prices['acall'])
121 121 plt.axis('tight')
122 122 plt.colorbar()
123 123 plt.title("Asian Call")
124 124
125 125 plt.subplot(223)
126 126 plt.contourf(sigma_mesh, strike_mesh, prices['eput'])
127 127 plt.axis('tight')
128 128 plt.colorbar()
129 129 plt.title("European Put")
130 130 plt.xlabel("Volatility")
131 131 plt.ylabel("Strike Price")
132 132
133 133 plt.subplot(224)
134 134 plt.contourf(sigma_mesh, strike_mesh, prices['aput'])
135 135 plt.axis('tight')
136 136 plt.colorbar()
137 137 plt.title("Asian Put")
138 138 plt.xlabel("Volatility")
139 139
140 140
141 141
142 142
143 143
144 144
@@ -1,64 +1,64 b''
1 1 """Calculate statistics on the digits of pi in parallel.
2 2
3 3 This program uses the functions in :file:`pidigits.py` to calculate
4 4 the frequencies of 2 digit sequences in the digits of pi. The
5 5 results are plotted using matplotlib.
6 6
7 7 To run, text files from http://www.super-computing.org/
8 8 must be installed in the working directory of the IPython engines.
9 9 The actual filenames to be used can be set with the ``filestring``
10 10 variable below.
11 11
12 12 The dataset we have been using for this is the 200 million digit one here:
13 13 ftp://pi.super-computing.org/.2/pi200m/
14 14
15 15 and the files used will be downloaded if they are not in the working directory
16 16 of the IPython engines.
17 17 """
18 18
19 19 from IPython.zmq.parallel import client
20 20 from matplotlib import pyplot as plt
21 21 import numpy as np
22 22 from pidigits import *
23 23 from timeit import default_timer as clock
24 24
25 25 # Files with digits of pi (10m digits each)
26 26 filestring = 'pi200m.ascii.%(i)02dof20'
27 27 files = [filestring % {'i':i} for i in range(1,16)]
28 28
29 29 # Connect to the IPython cluster
30 c = client.Client(profile='edison')
31 c.run('pidigits.py')
30 c = client.Client()
31 c[:].run('pidigits.py')
32 32
33 33 # the number of engines
34 34 n = len(c)
35 35 id0 = c.ids[0]
36 36 v = c[:]
37 v.set_flags(bound=True,block=True)
37 v.block=True
38 38 # fetch the pi-files
39 39 print "downloading %i files of pi"%n
40 40 v.map(fetch_pi_file, files[:n])
41 41 print "done"
42 42
43 43 # Run 10m digits on 1 engine
44 44 t1 = clock()
45 45 freqs10m = c[id0].apply_sync(compute_two_digit_freqs, files[0])
46 46 t2 = clock()
47 47 digits_per_second1 = 10.0e6/(t2-t1)
48 48 print "Digits per second (1 core, 10m digits): ", digits_per_second1
49 49
50 50
51 51 # Run n*10m digits on all engines
52 52 t1 = clock()
53 53 freqs_all = v.map(compute_two_digit_freqs, files[:n])
54 54 freqs150m = reduce_freqs(freqs_all)
55 55 t2 = clock()
56 56 digits_per_second8 = n*10.0e6/(t2-t1)
57 57 print "Digits per second (%i engines, %i0m digits): "%(n,n), digits_per_second8
58 58
59 59 print "Speedup: ", digits_per_second8/digits_per_second1
60 60
61 61 plot_two_digit_freqs(freqs150m)
62 62 plt.title("2 digit sequences in %i0m digits of pi"%n)
63 63 plt.show()
64 64
@@ -1,203 +1,205 b''
1 1 #!/usr/bin/env python
2 2 """
3 3 A simple python program of solving a 2D wave equation in parallel.
4 4 Domain partitioning and inter-processor communication
5 5 are done by an object of class MPIRectPartitioner2D
6 6 (which is a subclass of RectPartitioner2D and uses MPI via mpi4py)
7 7
8 8 An example of running the program is (8 processors, 4x2 partition,
9 9 400x100 grid cells)::
10 10
11 11 $ ipclusterz start --profile mpi -n 8 # start 8 engines (assuming mpi profile has been configured)
12 12 $ ./parallelwave-mpi.py --grid 400 100 --partition 4 2 --profile mpi
13 13
14 14 See also parallelwave-mpi, which runs the same program, but uses MPI
15 15 (via mpi4py) for the inter-engine communication.
16 16
17 17 Authors
18 18 -------
19 19
20 20 * Xing Cai
21 21 * Min Ragan-Kelley
22 22
23 23 """
24 24
25 25 import sys
26 26 import time
27 27
28 28 from numpy import exp, zeros, newaxis, sqrt
29 29
30 30 from IPython.external import argparse
31 31 from IPython.zmq.parallel.client import Client, Reference
32 32
33 def setup_partitioner(ns, index, num_procs, gnum_cells, parts):
33 def setup_partitioner(index, num_procs, gnum_cells, parts):
34 34 """create a partitioner in the engine namespace"""
35 global partitioner
35 36 p = MPIRectPartitioner2D(my_id=index, num_procs=num_procs)
36 37 p.redim(global_num_cells=gnum_cells, num_parts=parts)
37 38 p.prepare_communication()
38 39 # put the partitioner into the global namespace:
39 ns.partitioner=p
40 partitioner=p
40 41
41 def setup_solver(ns, *args, **kwargs):
42 def setup_solver(*args, **kwargs):
42 43 """create a WaveSolver in the engine namespace"""
43 ns.solver = WaveSolver(*args, **kwargs)
44 global solver
45 solver = WaveSolver(*args, **kwargs)
44 46
45 47 def wave_saver(u, x, y, t):
46 48 """save the wave log"""
47 49 global u_hist
48 50 global t_hist
49 51 t_hist.append(t)
50 52 u_hist.append(1.0*u)
51 53
52 54
53 55 # main program:
54 56 if __name__ == '__main__':
55 57
56 58 parser = argparse.ArgumentParser()
57 59 paa = parser.add_argument
58 60 paa('--grid', '-g',
59 61 type=int, nargs=2, default=[100,100], dest='grid',
60 62 help="Cells in the grid, e.g. --grid 100 200")
61 63 paa('--partition', '-p',
62 64 type=int, nargs=2, default=None,
63 65 help="Process partition grid, e.g. --partition 4 2 for 4x2")
64 66 paa('-c',
65 67 type=float, default=1.,
66 68 help="Wave speed (I think)")
67 69 paa('-Ly',
68 70 type=float, default=1.,
69 71 help="system size (in y)")
70 72 paa('-Lx',
71 73 type=float, default=1.,
72 74 help="system size (in x)")
73 75 paa('-t', '--tstop',
74 76 type=float, default=1.,
75 77 help="Time units to run")
76 78 paa('--profile',
77 79 type=unicode, default=u'default',
78 80 help="Specify the ipcluster profile for the client to connect to.")
79 81 paa('--save',
80 82 action='store_true',
81 83 help="Add this flag to save the time/wave history during the run.")
82 84 paa('--scalar',
83 85 action='store_true',
84 86 help="Also run with scalar interior implementation, to see vector speedup.")
85 87
86 88 ns = parser.parse_args()
87 89 # set up arguments
88 90 grid = ns.grid
89 91 partition = ns.partition
90 92 Lx = ns.Lx
91 93 Ly = ns.Ly
92 94 c = ns.c
93 95 tstop = ns.tstop
94 96 if ns.save:
95 97 user_action = wave_saver
96 98 else:
97 99 user_action = None
98 100
99 101 num_cells = 1.0*(grid[0]-1)*(grid[1]-1)
100 102 final_test = True
101 103
102 104 # create the Client
103 105 rc = Client(profile=ns.profile)
104 106 num_procs = len(rc.ids)
105 107
106 108 if partition is None:
107 109 partition = [1,num_procs]
108 110
109 111 assert partition[0]*partition[1] == num_procs, "can't map partition %s to %i engines"%(partition, num_procs)
110 112
111 113 view = rc[:]
112 114 print "Running %s system on %s processes until %f"%(grid, partition, tstop)
113 115
114 116 # functions defining initial/boundary/source conditions
115 117 def I(x,y):
116 118 from numpy import exp
117 119 return 1.5*exp(-100*((x-0.5)**2+(y-0.5)**2))
118 120 def f(x,y,t):
119 121 return 0.0
120 122 # from numpy import exp,sin
121 123 # return 10*exp(-(x - sin(100*t))**2)
122 124 def bc(x,y,t):
123 125 return 0.0
124 126
125 127 # initial imports, setup rank
126 128 view.execute('\n'.join([
127 129 "from mpi4py import MPI",
128 130 "import numpy",
129 131 "mpi = MPI.COMM_WORLD",
130 132 "my_id = MPI.COMM_WORLD.Get_rank()"]), block=True)
131 133
132 134 # initialize t_hist/u_hist for saving the state at each step (optional)
133 135 view['t_hist'] = []
134 136 view['u_hist'] = []
135 137
136 138 # set vector/scalar implementation details
137 139 impl = {}
138 140 impl['ic'] = 'vectorized'
139 141 impl['inner'] = 'scalar'
140 142 impl['bc'] = 'vectorized'
141 143
142 144 # execute some files so that the classes we need will be defined on the engines:
143 145 view.run('RectPartitioner.py')
144 146 view.run('wavesolver.py')
145 147
146 148 # setup remote partitioner
147 149 # note that Reference means that the argument passed to setup_partitioner will be the
148 150 # object named 'my_id' in the engine's namespace
149 view.apply_sync_bound(setup_partitioner, Reference('my_id'), num_procs, grid, partition)
151 view.apply_sync(setup_partitioner, Reference('my_id'), num_procs, grid, partition)
150 152 # wait for initial communication to complete
151 153 view.execute('mpi.barrier()')
152 154 # setup remote solvers
153 view.apply_sync_bound(setup_solver, I,f,c,bc,Lx,Ly,partitioner=Reference('partitioner'), dt=0,implementation=impl)
155 view.apply_sync(setup_solver, I,f,c,bc,Lx,Ly,partitioner=Reference('partitioner'), dt=0,implementation=impl)
154 156
155 157 # lambda for calling solver.solve:
156 158 _solve = lambda *args, **kwargs: solver.solve(*args, **kwargs)
157 159
158 160 if ns.scalar:
159 161 impl['inner'] = 'scalar'
160 162 # run first with element-wise Python operations for each cell
161 163 t0 = time.time()
162 164 ar = view.apply_async(_solve, tstop, dt=0, verbose=True, final_test=final_test, user_action=user_action)
163 165 if final_test:
164 166 # this sum is performed element-wise as results finish
165 167 s = sum(ar)
166 168 # the L2 norm (RMS) of the result:
167 169 norm = sqrt(s/num_cells)
168 170 else:
169 171 norm = -1
170 172 t1 = time.time()
171 173 print 'scalar inner-version, Wtime=%g, norm=%g'%(t1-t0, norm)
172 174
173 175 impl['inner'] = 'vectorized'
174 176 # setup new solvers
175 view.apply_sync_bound(setup_solver, I,f,c,bc,Lx,Ly,partitioner=Reference('partitioner'), dt=0,implementation=impl)
177 view.apply_sync(setup_solver, I,f,c,bc,Lx,Ly,partitioner=Reference('partitioner'), dt=0,implementation=impl)
176 178 view.execute('mpi.barrier()')
177 179
178 180 # run again with numpy vectorized inner-implementation
179 181 t0 = time.time()
180 182 ar = view.apply_async(_solve, tstop, dt=0, verbose=True, final_test=final_test)#, user_action=wave_saver)
181 183 if final_test:
182 184 # this sum is performed element-wise as results finish
183 185 s = sum(ar)
184 186 # the L2 norm (RMS) of the result:
185 187 norm = sqrt(s/num_cells)
186 188 else:
187 189 norm = -1
188 190 t1 = time.time()
189 191 print 'vector inner-version, Wtime=%g, norm=%g'%(t1-t0, norm)
190 192
191 193 # if ns.save is True, then u_hist stores the history of u as a list
192 194 # If the partion scheme is Nx1, then u can be reconstructed via 'gather':
193 195 if ns.save and partition[-1] == 1:
194 196 import pylab
195 197 view.execute('u_last=u_hist[-1]')
196 198 # map mpi IDs to IPython IDs, which may not match
197 199 ranks = view['my_id']
198 200 targets = range(len(ranks))
199 201 for idx in range(len(ranks)):
200 202 targets[idx] = ranks.index(idx)
201 203 u_last = rc[targets].gather('u_last', block=True)
202 204 pylab.pcolor(u_last)
203 205 pylab.show()
@@ -1,207 +1,209 b''
1 1 #!/usr/bin/env python
2 2 """
3 3 A simple python program of solving a 2D wave equation in parallel.
4 4 Domain partitioning and inter-processor communication
5 5 are done by an object of class ZMQRectPartitioner2D
6 6 (which is a subclass of RectPartitioner2D and uses 0MQ via pyzmq)
7 7
8 8 An example of running the program is (8 processors, 4x2 partition,
9 9 200x200 grid cells)::
10 10
11 11 $ ipclusterz start -n 8 # start 8 engines
12 12 $ ./parallelwave.py --grid 200 200 --partition 4 2
13 13
14 14 See also parallelwave-mpi, which runs the same program, but uses MPI
15 15 (via mpi4py) for the inter-engine communication.
16 16
17 17 Authors
18 18 -------
19 19
20 20 * Xing Cai
21 21 * Min Ragan-Kelley
22 22
23 23 """
24 24 #
25 25 import sys
26 26 import time
27 27
28 28 from numpy import exp, zeros, newaxis, sqrt
29 29
30 30 from IPython.external import argparse
31 31 from IPython.zmq.parallel.client import Client, Reference
32 32
33 def setup_partitioner(ns, comm, addrs, index, num_procs, gnum_cells, parts):
33 def setup_partitioner(comm, addrs, index, num_procs, gnum_cells, parts):
34 34 """create a partitioner in the engine namespace"""
35 global partitioner
35 36 p = ZMQRectPartitioner2D(comm, addrs, my_id=index, num_procs=num_procs)
36 37 p.redim(global_num_cells=gnum_cells, num_parts=parts)
37 38 p.prepare_communication()
38 39 # put the partitioner into the global namespace:
39 ns.partitioner=p
40 partitioner=p
40 41
41 def setup_solver(ns, *args, **kwargs):
42 def setup_solver(*args, **kwargs):
42 43 """create a WaveSolver in the engine namespace."""
43 ns.solver = WaveSolver(*args, **kwargs)
44 global solver
45 solver = WaveSolver(*args, **kwargs)
44 46
45 47 def wave_saver(u, x, y, t):
46 48 """save the wave state for each timestep."""
47 49 global u_hist
48 50 global t_hist
49 51 t_hist.append(t)
50 52 u_hist.append(1.0*u)
51 53
52 54
53 55 # main program:
54 56 if __name__ == '__main__':
55 57
56 58 parser = argparse.ArgumentParser()
57 59 paa = parser.add_argument
58 60 paa('--grid', '-g',
59 61 type=int, nargs=2, default=[100,100], dest='grid',
60 62 help="Cells in the grid, e.g. --grid 100 200")
61 63 paa('--partition', '-p',
62 64 type=int, nargs=2, default=None,
63 65 help="Process partition grid, e.g. --partition 4 2 for 4x2")
64 66 paa('-c',
65 67 type=float, default=1.,
66 68 help="Wave speed (I think)")
67 69 paa('-Ly',
68 70 type=float, default=1.,
69 71 help="system size (in y)")
70 72 paa('-Lx',
71 73 type=float, default=1.,
72 74 help="system size (in x)")
73 75 paa('-t', '--tstop',
74 76 type=float, default=1.,
75 77 help="Time units to run")
76 78 paa('--profile',
77 79 type=unicode, default=u'default',
78 80 help="Specify the ipcluster profile for the client to connect to.")
79 81 paa('--save',
80 82 action='store_true',
81 83 help="Add this flag to save the time/wave history during the run.")
82 84 paa('--scalar',
83 85 action='store_true',
84 86 help="Also run with scalar interior implementation, to see vector speedup.")
85 87
86 88 ns = parser.parse_args()
87 89 # set up arguments
88 90 grid = ns.grid
89 91 partition = ns.partition
90 92 Lx = ns.Lx
91 93 Ly = ns.Ly
92 94 c = ns.c
93 95 tstop = ns.tstop
94 96 if ns.save:
95 97 user_action = wave_saver
96 98 else:
97 99 user_action = None
98 100
99 101 num_cells = 1.0*(grid[0]-1)*(grid[1]-1)
100 102 final_test = True
101 103
102 104 # create the Client
103 105 rc = Client(profile=ns.profile)
104 106 num_procs = len(rc.ids)
105 107
106 108 if partition is None:
107 109 partition = [num_procs,1]
108 110 else:
109 111 num_procs = min(num_procs, partition[0]*partition[1])
110 112
111 113 assert partition[0]*partition[1] == num_procs, "can't map partition %s to %i engines"%(partition, num_procs)
112 114
113 115 # construct the View:
114 116 view = rc[:num_procs]
115 117 print "Running %s system on %s processes until %f"%(grid, partition, tstop)
116 118
117 119 # functions defining initial/boundary/source conditions
118 120 def I(x,y):
119 121 from numpy import exp
120 122 return 1.5*exp(-100*((x-0.5)**2+(y-0.5)**2))
121 123 def f(x,y,t):
122 124 return 0.0
123 125 # from numpy import exp,sin
124 126 # return 10*exp(-(x - sin(100*t))**2)
125 127 def bc(x,y,t):
126 128 return 0.0
127 129
128 130 # initialize t_hist/u_hist for saving the state at each step (optional)
129 131 view['t_hist'] = []
130 132 view['u_hist'] = []
131 133
132 134 # set vector/scalar implementation details
133 135 impl = {}
134 136 impl['ic'] = 'vectorized'
135 137 impl['inner'] = 'scalar'
136 138 impl['bc'] = 'vectorized'
137 139
138 140 # execute some files so that the classes we need will be defined on the engines:
139 141 view.execute('import numpy')
140 142 view.run('communicator.py')
141 143 view.run('RectPartitioner.py')
142 144 view.run('wavesolver.py')
143 145
144 146 # scatter engine IDs
145 147 view.scatter('my_id', range(num_procs), flatten=True)
146 148
147 149 # create the engine connectors
148 150 view.execute('com = EngineCommunicator()')
149 151
150 152 # gather the connection information into a single dict
151 153 ar = view.apply_async(lambda : com.info)
152 154 peers = ar.get_dict()
153 155 # print peers
154 156 # this is a dict, keyed by engine ID, of the connection info for the EngineCommunicators
155 157
156 158 # setup remote partitioner
157 159 # note that Reference means that the argument passed to setup_partitioner will be the
158 160 # object named 'com' in the engine's namespace
159 view.apply_sync_bound(setup_partitioner, Reference('com'), peers, Reference('my_id'), num_procs, grid, partition)
161 view.apply_sync(setup_partitioner, Reference('com'), peers, Reference('my_id'), num_procs, grid, partition)
160 162 time.sleep(1)
161 163 # convenience lambda to call solver.solve:
162 164 _solve = lambda *args, **kwargs: solver.solve(*args, **kwargs)
163 165
164 166 if ns.scalar:
165 167 impl['inner'] = 'scalar'
166 168 # setup remote solvers
167 view.apply_sync_bound(setup_solver, I,f,c,bc,Lx,Ly, partitioner=Reference('partitioner'), dt=0,implementation=impl)
169 view.apply_sync(setup_solver, I,f,c,bc,Lx,Ly, partitioner=Reference('partitioner'), dt=0,implementation=impl)
168 170
169 171 # run first with element-wise Python operations for each cell
170 172 t0 = time.time()
171 173 ar = view.apply_async(_solve, tstop, dt=0, verbose=True, final_test=final_test, user_action=user_action)
172 174 if final_test:
173 175 # this sum is performed element-wise as results finish
174 176 s = sum(ar)
175 177 # the L2 norm (RMS) of the result:
176 178 norm = sqrt(s/num_cells)
177 179 else:
178 180 norm = -1
179 181 t1 = time.time()
180 182 print 'scalar inner-version, Wtime=%g, norm=%g'%(t1-t0, norm)
181 183
182 184 # run again with faster numpy-vectorized inner implementation:
183 185 impl['inner'] = 'vectorized'
184 186 # setup remote solvers
185 view.apply_sync_bound(setup_solver, I,f,c,bc,Lx,Ly,partitioner=Reference('partitioner'), dt=0,implementation=impl)
187 view.apply_sync(setup_solver, I,f,c,bc,Lx,Ly,partitioner=Reference('partitioner'), dt=0,implementation=impl)
186 188
187 189 t0 = time.time()
188 190
189 191 ar = view.apply_async(_solve, tstop, dt=0, verbose=True, final_test=final_test)#, user_action=wave_saver)
190 192 if final_test:
191 193 # this sum is performed element-wise as results finish
192 194 s = sum(ar)
193 195 # the L2 norm (RMS) of the result:
194 196 norm = sqrt(s/num_cells)
195 197 else:
196 198 norm = -1
197 199 t1 = time.time()
198 200 print 'vector inner-version, Wtime=%g, norm=%g'%(t1-t0, norm)
199 201
200 202 # if ns.save is True, then u_hist stores the history of u as a list
201 203 # If the partion scheme is Nx1, then u can be reconstructed via 'gather':
202 204 if ns.save and partition[-1] == 1:
203 205 import pylab
204 206 view.execute('u_last=u_hist[-1]')
205 207 u_last = view.gather('u_last', block=True)
206 208 pylab.pcolor(u_last)
207 209 pylab.show() No newline at end of file
@@ -1,34 +1,34 b''
1 1 =====================
2 2 IPython Documentation
3 3 =====================
4 4
5 5 .. htmlonly::
6 6
7 7 :Release: |release|
8 8 :Date: |today|
9 9
10 10 Welcome to the official IPython documentation.
11 11
12 12 Contents
13 13 ========
14 14
15 15 .. toctree::
16 16 :maxdepth: 1
17 17
18 18 overview.txt
19 19 whatsnew/index.txt
20 20 install/index.txt
21 21 interactive/index.txt
22 parallel/index.txt
22 .. parallel/index.txt
23 23 parallelz/index.txt
24 24 config/index.txt
25 25 development/index.txt
26 26 api/index.txt
27 27 faq.txt
28 28 about/index.txt
29 29
30 30 .. htmlonly::
31 31 * :ref:`genindex`
32 32 * :ref:`modindex`
33 33 * :ref:`search`
34 34
@@ -1,172 +1,173 b''
1 1 .. _dag_dependencies:
2 2
3 3 ================
4 4 DAG Dependencies
5 5 ================
6 6
7 7 Often, parallel workflow is described in terms of a `Directed Acyclic Graph
8 8 <http://en.wikipedia.org/wiki/Directed_acyclic_graph>`_ or DAG. A popular library
9 9 for working with Graphs is NetworkX_. Here, we will walk through a demo mapping
10 10 a nx DAG to task dependencies.
11 11
12 12 The full script that runs this demo can be found in
13 13 :file:`docs/examples/newparallel/dagdeps.py`.
14 14
15 15 Why are DAGs good for task dependencies?
16 16 ----------------------------------------
17 17
18 18 The 'G' in DAG is 'Graph'. A Graph is a collection of **nodes** and **edges** that connect
19 19 the nodes. For our purposes, each node would be a task, and each edge would be a
20 20 dependency. The 'D' in DAG stands for 'Directed'. This means that each edge has a
21 21 direction associated with it. So we can interpret the edge (a,b) as meaning that b depends
22 22 on a, whereas the edge (b,a) would mean a depends on b. The 'A' is 'Acyclic', meaning that
23 23 there must not be any closed loops in the graph. This is important for dependencies,
24 24 because if a loop were closed, then a task could ultimately depend on itself, and never be
25 25 able to run. If your workflow can be described as a DAG, then it is impossible for your
26 26 dependencies to cause a deadlock.
27 27
28 28 A Sample DAG
29 29 ------------
30 30
31 31 Here, we have a very simple 5-node DAG:
32 32
33 33 .. figure:: simpledag.*
34 34
35 35 With NetworkX, an arrow is just a fattened bit on the edge. Here, we can see that task 0
36 36 depends on nothing, and can run immediately. 1 and 2 depend on 0; 3 depends on
37 37 1 and 2; and 4 depends only on 1.
38 38
39 39 A possible sequence of events for this workflow:
40 40
41 41 0. Task 0 can run right away
42 42 1. 0 finishes, so 1,2 can start
43 43 2. 1 finishes, 3 is still waiting on 2, but 4 can start right away
44 44 3. 2 finishes, and 3 can finally start
45 45
46 46
47 47 Further, taking failures into account, assuming all dependencies are run with the default
48 `success_only=True`, the following cases would occur for each node's failure:
48 `success=True,failure=False`, the following cases would occur for each node's failure:
49 49
50 50 0. fails: all other tasks fail as Impossible
51 51 1. 2 can still succeed, but 3,4 are unreachable
52 52 2. 3 becomes unreachable, but 4 is unaffected
53 53 3. and 4. are terminal, and can have no effect on other nodes
54 54
55 55 The code to generate the simple DAG:
56 56
57 57 .. sourcecode:: python
58 58
59 59 import networkx as nx
60 60
61 61 G = nx.DiGraph()
62 62
63 63 # add 5 nodes, labeled 0-4:
64 64 map(G.add_node, range(5))
65 65 # 1,2 depend on 0:
66 66 G.add_edge(0,1)
67 67 G.add_edge(0,2)
68 68 # 3 depends on 1,2
69 69 G.add_edge(1,3)
70 70 G.add_edge(2,3)
71 71 # 4 depends on 1
72 72 G.add_edge(1,4)
73 73
74 74 # now draw the graph:
75 75 pos = { 0 : (0,0), 1 : (1,1), 2 : (-1,1),
76 76 3 : (0,2), 4 : (2,2)}
77 77 nx.draw(G, pos, edge_color='r')
78 78
79 79
80 80 For demonstration purposes, we have a function that generates a random DAG with a given
81 81 number of nodes and edges.
82 82
83 83 .. literalinclude:: ../../examples/newparallel/dagdeps.py
84 84 :language: python
85 85 :lines: 20-36
86 86
87 87 So first, we start with a graph of 32 nodes, with 128 edges:
88 88
89 89 .. sourcecode:: ipython
90 90
91 91 In [2]: G = random_dag(32,128)
92 92
93 93 Now, we need to build our dict of jobs corresponding to the nodes on the graph:
94 94
95 95 .. sourcecode:: ipython
96 96
97 97 In [3]: jobs = {}
98 98
99 99 # in reality, each job would presumably be different
100 100 # randomwait is just a function that sleeps for a random interval
101 101 In [4]: for node in G:
102 102 ...: jobs[node] = randomwait
103 103
104 104 Once we have a dict of jobs matching the nodes on the graph, we can start submitting jobs,
105 105 and linking up the dependencies. Since we don't know a job's msg_id until it is submitted,
106 106 which is necessary for building dependencies, it is critical that we don't submit any jobs
107 107 before other jobs it may depend on. Fortunately, NetworkX provides a
108 108 :meth:`topological_sort` method which ensures exactly this. It presents an iterable, that
109 109 guarantees that when you arrive at a node, you have already visited all the nodes it
110 110 on which it depends:
111 111
112 112 .. sourcecode:: ipython
113 113
114 In [5]: c = client.Client()
114 In [5]: rc = client.Client()
115 In [5]: view = rc.load_balanced_view()
115 116
116 117 In [6]: results = {}
117 118
118 119 In [7]: for node in G.topological_sort():
119 120 ...: # get list of AsyncResult objects from nodes
120 121 ...: # leading into this one as dependencies
121 122 ...: deps = [ results[n] for n in G.predecessors(node) ]
122 123 ...: # submit and store AsyncResult object
123 ...: results[node] = client.apply(jobs[node], after=deps, block=False)
124 ...: results[node] = view.apply_with_flags(jobs[node], after=deps, block=False)
124 125
125 126 Now that we have submitted all the jobs, we can wait for the results:
126 127
127 128 .. sourcecode:: ipython
128 129
129 In [8]: [ r.get() for r in results.values() ]
130 In [8]: view.wait(results.values())
130 131
131 132 Now, at least we know that all the jobs ran and did not fail (``r.get()`` would have
132 133 raised an error if a task failed). But we don't know that the ordering was properly
133 134 respected. For this, we can use the :attr:`metadata` attribute of each AsyncResult.
134 135
135 136 These objects store a variety of metadata about each task, including various timestamps.
136 137 We can validate that the dependencies were respected by checking that each task was
137 138 started after all of its predecessors were completed:
138 139
139 140 .. literalinclude:: ../../examples/newparallel/dagdeps.py
140 141 :language: python
141 142 :lines: 64-70
142 143
143 144 We can also validate the graph visually. By drawing the graph with each node's x-position
144 145 as its start time, all arrows must be pointing to the right if dependencies were respected.
145 146 For spreading, the y-position will be the runtime of the task, so long tasks
146 147 will be at the top, and quick, small tasks will be at the bottom.
147 148
148 149 .. sourcecode:: ipython
149 150
150 151 In [10]: from matplotlib.dates import date2num
151 152
152 153 In [11]: from matplotlib.cm import gist_rainbow
153 154
154 155 In [12]: pos = {}; colors = {}
155 156
156 157 In [12]: for node in G:
157 158 ...: md = results[node].metadata
158 159 ...: start = date2num(md.started)
159 160 ...: runtime = date2num(md.completed) - start
160 161 ...: pos[node] = (start, runtime)
161 162 ...: colors[node] = md.engine_id
162 163
163 164 In [13]: nx.draw(G, pos, node_list=colors.keys(), node_color=colors.values(),
164 165 ...: cmap=gist_rainbow)
165 166
166 167 .. figure:: dagdeps.*
167 168
168 169 Time started on x, runtime on y, and color-coded by engine-id (in this case there
169 170 were four engines). Edges denote dependencies.
170 171
171 172
172 173 .. _NetworkX: http://networkx.lanl.gov/
@@ -1,21 +1,22 b''
1 1 .. _parallelz_index:
2 2
3 3 ==========================================
4 4 Using IPython for parallel computing (ZMQ)
5 5 ==========================================
6 6
7 7 .. toctree::
8 8 :maxdepth: 2
9 9
10 10 parallel_intro.txt
11 11 parallel_process.txt
12 12 parallel_multiengine.txt
13 13 parallel_task.txt
14 14 parallel_mpi.txt
15 15 parallel_security.txt
16 16 parallel_winhpc.txt
17 17 parallel_demos.txt
18 18 dag_dependencies.txt
19 19 parallel_details.txt
20 parallel_transition.txt
20 21
21 22
@@ -1,284 +1,284 b''
1 1 =================
2 2 Parallel examples
3 3 =================
4 4
5 5 .. note::
6 6
7 7 Performance numbers from ``IPython.kernel``, not newparallel.
8 8
9 9 In this section we describe two more involved examples of using an IPython
10 10 cluster to perform a parallel computation. In these examples, we will be using
11 11 IPython's "pylab" mode, which enables interactive plotting using the
12 12 Matplotlib package. IPython can be started in this mode by typing::
13 13
14 14 ipython --pylab
15 15
16 16 at the system command line.
17 17
18 18 150 million digits of pi
19 19 ========================
20 20
21 21 In this example we would like to study the distribution of digits in the
22 22 number pi (in base 10). While it is not known if pi is a normal number (a
23 23 number is normal in base 10 if 0-9 occur with equal likelihood) numerical
24 24 investigations suggest that it is. We will begin with a serial calculation on
25 25 10,000 digits of pi and then perform a parallel calculation involving 150
26 26 million digits.
27 27
28 28 In both the serial and parallel calculation we will be using functions defined
29 29 in the :file:`pidigits.py` file, which is available in the
30 30 :file:`docs/examples/newparallel` directory of the IPython source distribution.
31 31 These functions provide basic facilities for working with the digits of pi and
32 32 can be loaded into IPython by putting :file:`pidigits.py` in your current
33 33 working directory and then doing:
34 34
35 35 .. sourcecode:: ipython
36 36
37 37 In [1]: run pidigits.py
38 38
39 39 Serial calculation
40 40 ------------------
41 41
42 42 For the serial calculation, we will use `SymPy <http://www.sympy.org>`_ to
43 43 calculate 10,000 digits of pi and then look at the frequencies of the digits
44 44 0-9. Out of 10,000 digits, we expect each digit to occur 1,000 times. While
45 45 SymPy is capable of calculating many more digits of pi, our purpose here is to
46 46 set the stage for the much larger parallel calculation.
47 47
48 48 In this example, we use two functions from :file:`pidigits.py`:
49 49 :func:`one_digit_freqs` (which calculates how many times each digit occurs)
50 50 and :func:`plot_one_digit_freqs` (which uses Matplotlib to plot the result).
51 51 Here is an interactive IPython session that uses these functions with
52 52 SymPy:
53 53
54 54 .. sourcecode:: ipython
55 55
56 56 In [7]: import sympy
57 57
58 58 In [8]: pi = sympy.pi.evalf(40)
59 59
60 60 In [9]: pi
61 61 Out[9]: 3.141592653589793238462643383279502884197
62 62
63 63 In [10]: pi = sympy.pi.evalf(10000)
64 64
65 65 In [11]: digits = (d for d in str(pi)[2:]) # create a sequence of digits
66 66
67 67 In [12]: run pidigits.py # load one_digit_freqs/plot_one_digit_freqs
68 68
69 69 In [13]: freqs = one_digit_freqs(digits)
70 70
71 71 In [14]: plot_one_digit_freqs(freqs)
72 72 Out[14]: [<matplotlib.lines.Line2D object at 0x18a55290>]
73 73
74 74 The resulting plot of the single digit counts shows that each digit occurs
75 75 approximately 1,000 times, but that with only 10,000 digits the
76 76 statistical fluctuations are still rather large:
77 77
78 78 .. image:: ../parallel/single_digits.*
79 79
80 80 It is clear that to reduce the relative fluctuations in the counts, we need
81 81 to look at many more digits of pi. That brings us to the parallel calculation.
82 82
83 83 Parallel calculation
84 84 --------------------
85 85
86 86 Calculating many digits of pi is a challenging computational problem in itself.
87 87 Because we want to focus on the distribution of digits in this example, we
88 88 will use pre-computed digit of pi from the website of Professor Yasumasa
89 89 Kanada at the University of Tokyo (http://www.super-computing.org). These
90 90 digits come in a set of text files (ftp://pi.super-computing.org/.2/pi200m/)
91 91 that each have 10 million digits of pi.
92 92
93 93 For the parallel calculation, we have copied these files to the local hard
94 94 drives of the compute nodes. A total of 15 of these files will be used, for a
95 95 total of 150 million digits of pi. To make things a little more interesting we
96 96 will calculate the frequencies of all 2 digits sequences (00-99) and then plot
97 97 the result using a 2D matrix in Matplotlib.
98 98
99 99 The overall idea of the calculation is simple: each IPython engine will
100 100 compute the two digit counts for the digits in a single file. Then in a final
101 101 step the counts from each engine will be added up. To perform this
102 102 calculation, we will need two top-level functions from :file:`pidigits.py`:
103 103
104 104 .. literalinclude:: ../../examples/newparallel/pidigits.py
105 105 :language: python
106 106 :lines: 41-56
107 107
108 108 We will also use the :func:`plot_two_digit_freqs` function to plot the
109 109 results. The code to run this calculation in parallel is contained in
110 110 :file:`docs/examples/newparallel/parallelpi.py`. This code can be run in parallel
111 111 using IPython by following these steps:
112 112
113 113 1. Use :command:`ipclusterz` to start 15 engines. We used an 8 core (2 quad
114 114 core CPUs) cluster with hyperthreading enabled which makes the 8 cores
115 115 looks like 16 (1 controller + 15 engines) in the OS. However, the maximum
116 116 speedup we can observe is still only 8x.
117 117 2. With the file :file:`parallelpi.py` in your current working directory, open
118 118 up IPython in pylab mode and type ``run parallelpi.py``. This will download
119 119 the pi files via ftp the first time you run it, if they are not
120 120 present in the Engines' working directory.
121 121
122 122 When run on our 8 core cluster, we observe a speedup of 7.7x. This is slightly
123 123 less than linear scaling (8x) because the controller is also running on one of
124 124 the cores.
125 125
126 126 To emphasize the interactive nature of IPython, we now show how the
127 127 calculation can also be run by simply typing the commands from
128 128 :file:`parallelpi.py` interactively into IPython:
129 129
130 130 .. sourcecode:: ipython
131 131
132 132 In [1]: from IPython.zmq.parallel import client
133 133
134 134 # The Client allows us to use the engines interactively.
135 135 # We simply pass Client the name of the cluster profile we
136 136 # are using.
137 137 In [2]: c = client.Client(profile='mycluster')
138 In [3]: view = c.view(balanced=True)
138 In [3]: view = c.load_balanced_view()
139 139
140 140 In [3]: c.ids
141 141 Out[3]: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
142 142
143 143 In [4]: run pidigits.py
144 144
145 145 In [5]: filestring = 'pi200m.ascii.%(i)02dof20'
146 146
147 147 # Create the list of files to process.
148 148 In [6]: files = [filestring % {'i':i} for i in range(1,16)]
149 149
150 150 In [7]: files
151 151 Out[7]:
152 152 ['pi200m.ascii.01of20',
153 153 'pi200m.ascii.02of20',
154 154 'pi200m.ascii.03of20',
155 155 'pi200m.ascii.04of20',
156 156 'pi200m.ascii.05of20',
157 157 'pi200m.ascii.06of20',
158 158 'pi200m.ascii.07of20',
159 159 'pi200m.ascii.08of20',
160 160 'pi200m.ascii.09of20',
161 161 'pi200m.ascii.10of20',
162 162 'pi200m.ascii.11of20',
163 163 'pi200m.ascii.12of20',
164 164 'pi200m.ascii.13of20',
165 165 'pi200m.ascii.14of20',
166 166 'pi200m.ascii.15of20']
167 167
168 168 # download the data files if they don't already exist:
169 In [8]: c.map(fetch_pi_file, files)
169 In [8]: v.map(fetch_pi_file, files)
170 170
171 171 # This is the parallel calculation using the Client.map method
172 172 # which applies compute_two_digit_freqs to each file in files in parallel.
173 In [9]: freqs_all = c.map(compute_two_digit_freqs, files)
173 In [9]: freqs_all = v.map(compute_two_digit_freqs, files)
174 174
175 175 # Add up the frequencies from each engine.
176 176 In [10]: freqs = reduce_freqs(freqs_all)
177 177
178 178 In [11]: plot_two_digit_freqs(freqs)
179 179 Out[11]: <matplotlib.image.AxesImage object at 0x18beb110>
180 180
181 181 In [12]: plt.title('2 digit counts of 150m digits of pi')
182 182 Out[12]: <matplotlib.text.Text object at 0x18d1f9b0>
183 183
184 184 The resulting plot generated by Matplotlib is shown below. The colors indicate
185 185 which two digit sequences are more (red) or less (blue) likely to occur in the
186 186 first 150 million digits of pi. We clearly see that the sequence "41" is
187 187 most likely and that "06" and "07" are least likely. Further analysis would
188 188 show that the relative size of the statistical fluctuations have decreased
189 189 compared to the 10,000 digit calculation.
190 190
191 191 .. image:: ../parallel/two_digit_counts.*
192 192
193 193
194 194 Parallel options pricing
195 195 ========================
196 196
197 197 An option is a financial contract that gives the buyer of the contract the
198 198 right to buy (a "call") or sell (a "put") a secondary asset (a stock for
199 199 example) at a particular date in the future (the expiration date) for a
200 200 pre-agreed upon price (the strike price). For this right, the buyer pays the
201 201 seller a premium (the option price). There are a wide variety of flavors of
202 202 options (American, European, Asian, etc.) that are useful for different
203 203 purposes: hedging against risk, speculation, etc.
204 204
205 205 Much of modern finance is driven by the need to price these contracts
206 206 accurately based on what is known about the properties (such as volatility) of
207 207 the underlying asset. One method of pricing options is to use a Monte Carlo
208 208 simulation of the underlying asset price. In this example we use this approach
209 209 to price both European and Asian (path dependent) options for various strike
210 210 prices and volatilities.
211 211
212 212 The code for this example can be found in the :file:`docs/examples/newparallel`
213 213 directory of the IPython source. The function :func:`price_options` in
214 214 :file:`mcpricer.py` implements the basic Monte Carlo pricing algorithm using
215 215 the NumPy package and is shown here:
216 216
217 217 .. literalinclude:: ../../examples/newparallel/mcpricer.py
218 218 :language: python
219 219
220 220 To run this code in parallel, we will use IPython's :class:`LoadBalancedView` class,
221 221 which distributes work to the engines using dynamic load balancing. This
222 222 view is a wrapper of the :class:`Client` class shown in
223 223 the previous example. The parallel calculation using :class:`LoadBalancedView` can
224 224 be found in the file :file:`mcpricer.py`. The code in this file creates a
225 225 :class:`TaskClient` instance and then submits a set of tasks using
226 226 :meth:`TaskClient.run` that calculate the option prices for different
227 227 volatilities and strike prices. The results are then plotted as a 2D contour
228 228 plot using Matplotlib.
229 229
230 230 .. literalinclude:: ../../examples/newparallel/mcdriver.py
231 231 :language: python
232 232
233 233 To use this code, start an IPython cluster using :command:`ipclusterz`, open
234 234 IPython in the pylab mode with the file :file:`mcdriver.py` in your current
235 235 working directory and then type:
236 236
237 237 .. sourcecode:: ipython
238 238
239 239 In [7]: run mcdriver.py
240 240 Submitted tasks: [0, 1, 2, ...]
241 241
242 242 Once all the tasks have finished, the results can be plotted using the
243 243 :func:`plot_options` function. Here we make contour plots of the Asian
244 244 call and Asian put options as function of the volatility and strike price:
245 245
246 246 .. sourcecode:: ipython
247 247
248 248 In [8]: plot_options(sigma_vals, K_vals, prices['acall'])
249 249
250 250 In [9]: plt.figure()
251 251 Out[9]: <matplotlib.figure.Figure object at 0x18c178d0>
252 252
253 253 In [10]: plot_options(sigma_vals, K_vals, prices['aput'])
254 254
255 255 These results are shown in the two figures below. On a 8 core cluster the
256 256 entire calculation (10 strike prices, 10 volatilities, 100,000 paths for each)
257 257 took 30 seconds in parallel, giving a speedup of 7.7x, which is comparable
258 258 to the speedup observed in our previous example.
259 259
260 260 .. image:: ../parallel/asian_call.*
261 261
262 262 .. image:: ../parallel/asian_put.*
263 263
264 264 Conclusion
265 265 ==========
266 266
267 267 To conclude these examples, we summarize the key features of IPython's
268 268 parallel architecture that have been demonstrated:
269 269
270 270 * Serial code can be parallelized often with only a few extra lines of code.
271 271 We have used the :class:`DirectView` and :class:`LoadBalancedView` classes
272 272 for this purpose.
273 273 * The resulting parallel code can be run without ever leaving the IPython's
274 274 interactive shell.
275 275 * Any data computed in parallel can be explored interactively through
276 276 visualization or further numerical calculations.
277 277 * We have run these examples on a cluster running Windows HPC Server 2008.
278 278 IPython's built in support for the Windows HPC job scheduler makes it
279 279 easy to get started with IPython's parallel capabilities.
280 280
281 281 .. note::
282 282
283 283 The newparallel code has never been run on Windows HPC Server, so the last
284 284 conclusion is untested.
@@ -1,438 +1,493 b''
1 1 .. _parallel_details:
2 2
3 3 ==========================================
4 4 Details of Parallel Computing with IPython
5 5 ==========================================
6 6
7 7 .. note::
8 8
9 9 There are still many sections to fill out
10 10
11 11
12 12 Caveats
13 13 =======
14 14
15 15 First, some caveats about the detailed workings of parallel computing with 0MQ and IPython.
16 16
17 17 Non-copying sends and numpy arrays
18 18 ----------------------------------
19 19
20 20 When numpy arrays are passed as arguments to apply or via data-movement methods, they are not
21 copied. This means that you must be careful if you are sending an array that you intend to work on.
22 PyZMQ does allow you to track when a message has been sent so you can know when it is safe to edit the buffer, but
23 IPython only allows for this.
21 copied. This means that you must be careful if you are sending an array that you intend to work
22 on. PyZMQ does allow you to track when a message has been sent so you can know when it is safe
23 to edit the buffer, but IPython only allows for this.
24 24
25 25 It is also important to note that the non-copying receive of a message is *read-only*. That
26 means that if you intend to work in-place on an array that you have sent or received, you must copy
27 it. This is true for both numpy arrays sent to engines and numpy arrays retrieved as results.
26 means that if you intend to work in-place on an array that you have sent or received, you must
27 copy it. This is true for both numpy arrays sent to engines and numpy arrays retrieved as
28 results.
28 29
29 30 The following will fail:
30 31
31 32 .. sourcecode:: ipython
32 33
33 34 In [3]: A = numpy.zeros(2)
34 35
35 36 In [4]: def setter(a):
36 37 ...: a[0]=1
37 38 ...: return a
38 39
39 40 In [5]: rc[0].apply_sync(setter, A)
40 41 ---------------------------------------------------------------------------
41 42 RemoteError Traceback (most recent call last)
42 43 ...
43 44 RemoteError: RuntimeError(array is not writeable)
44 45 Traceback (most recent call last):
45 46 File "/Users/minrk/dev/ip/mine/IPython/zmq/parallel/streamkernel.py", line 329, in apply_request
46 47 exec code in working, working
47 48 File "<string>", line 1, in <module>
48 49 File "<ipython-input-14-736187483856>", line 2, in setter
49 50 RuntimeError: array is not writeable
50 51
51 52 If you do need to edit the array in-place, just remember to copy the array if it's read-only.
52 53 The :attr:`ndarray.flags.writeable` flag will tell you if you can write to an array.
53 54
54 55 .. sourcecode:: ipython
55 56
56 57 In [3]: A = numpy.zeros(2)
57 58
58 59 In [4]: def setter(a):
59 60 ...: """only copy read-only arrays"""
60 61 ...: if not a.flags.writeable:
61 62 ...: a=a.copy()
62 63 ...: a[0]=1
63 64 ...: return a
64 65
65 66 In [5]: rc[0].apply_sync(setter, A)
66 67 Out[5]: array([ 1., 0.])
67 68
68 69 # note that results will also be read-only:
69 70 In [6]: _.flags.writeable
70 71 Out[6]: False
71 72
73 If you want to safely edit an array in-place after *sending* it, you must use the `track=True` flag. IPython always performs non-copying sends of arrays, which return immediately. You
74 must instruct IPython track those messages *at send time* in order to know for sure that the send has completed. AsyncResults have a :attr:`sent` property, and :meth:`wait_on_send` method
75 for checking and waiting for 0MQ to finish with a buffer.
76
77 .. sourcecode:: ipython
78
79 In [5]: A = numpy.random.random((1024,1024))
80
81 In [6]: view.track=True
82
83 In [7]: ar = view.apply_async(lambda x: 2*x, A)
84
85 In [8]: ar.sent
86 Out[8]: False
87
88 In [9]: ar.wait_on_send() # blocks until sent is True
89
90
72 91 What is sendable?
73 92 -----------------
74 93
75 94 If IPython doesn't know what to do with an object, it will pickle it. There is a short list of
76 95 objects that are not pickled: ``buffers``, ``str/bytes`` objects, and ``numpy``
77 96 arrays. These are handled specially by IPython in order to prevent the copying of data. Sending
78 97 bytes or numpy arrays will result in exactly zero in-memory copies of your data (unless the data
79 98 is very small).
80 99
81 100 If you have an object that provides a Python buffer interface, then you can always send that
82 101 buffer without copying - and reconstruct the object on the other side in your own code. It is
83 102 possible that the object reconstruction will become extensible, so you can add your own
84 103 non-copying types, but this does not yet exist.
85 104
105 Closures
106 ********
107
108 Just about anything in Python is pickleable. The one notable exception is objects (generally
109 functions) with *closures*. Closures can be a complicated topic, but the basic principal is that
110 functions that refer to variables in their parent scope have closures.
111
112 An example of a function that uses a closure:
113
114 .. sourcecode:: python
115
116 def f(a):
117 def inner():
118 # inner will have a closure
119 return a
120 return echo
121
122 f1 = f(1)
123 f2 = f(2)
124 f1() # returns 1
125 f2() # returns 2
126
127 f1 and f2 will have closures referring to the scope in which `inner` was defined, because they
128 use the variable 'a'. As a result, you would not be able to send ``f1`` or ``f2`` with IPython.
129 Note that you *would* be able to send `f`. This is only true for interactively defined
130 functions (as are often used in decorators), and only when there are variables used inside the
131 inner function, that are defined in the outer function. If the names are *not* in the outer
132 function, then there will not be a closure, and the generated function will look in
133 ``globals()`` for the name:
134
135 .. sourcecode:: python
136
137 def g(b):
138 # note that `b` is not referenced in inner's scope
139 def inner():
140 # this inner will *not* have a closure
141 return a
142 return echo
143 g1 = g(1)
144 g2 = g(2)
145 g1() # raises NameError on 'a'
146 a=5
147 g2() # returns 5
148
149 `g1` and `g2` *will* be sendable with IPython, and will treat the engine's namespace as
150 globals(). The :meth:`pull` method is implemented based on this principal. If we did not
151 provide pull, you could implement it yourself with `apply`, by simply returning objects out
152 of the global namespace:
153
154 .. sourcecode:: ipython
155
156 In [10]: view.apply(lambda : a)
157
158 # is equivalent to
159 In [11]: view.pull('a')
86 160
87 161 Running Code
88 162 ============
89 163
90 164 There are two principal units of execution in Python: strings of Python code (e.g. 'a=5'),
91 165 and Python functions. IPython is designed around the use of functions via the core
92 166 Client method, called `apply`.
93 167
94 168 Apply
95 169 -----
96 170
97 The principal method of remote execution is :meth:`apply`, of Client and View objects. The Client provides the full execution and communication API for engines via its apply method.
171 The principal method of remote execution is :meth:`apply`, of View objects. The Client provides
172 the full execution and communication API for engines via its low-level
173 :meth:`send_apply_message` method.
98 174
99 175 f : function
100 176 The fuction to be called remotely
101 177 args : tuple/list
102 178 The positional arguments passed to `f`
103 179 kwargs : dict
104 180 The keyword arguments passed to `f`
105 bound : bool (default: False)
106 Whether to pass the Engine(s) Namespace as the first argument to `f`.
107 181 block : bool (default: self.block)
108 182 Whether to wait for the result, or return immediately.
109 183 False:
110 184 returns AsyncResult
111 185 True:
112 186 returns actual result(s) of f(*args, **kwargs)
113 187 if multiple targets:
114 188 list of results, matching `targets`
115 189 track : bool
116 190 whether to track non-copying sends.
117 191 [default False]
118 192
119 193 targets : int,list of ints, 'all', None
120 194 Specify the destination of the job.
121 195 if None:
122 196 Submit via Task queue for load-balancing.
123 197 if 'all':
124 198 Run on all active engines
125 199 if list:
126 200 Run on each specified engine
127 201 if int:
128 202 Run on single engine
129 203 Not eht
130 204
131 205 balanced : bool, default None
132 206 whether to load-balance. This will default to True
133 207 if targets is unspecified, or False if targets is specified.
134 208
135 209 If `balanced` and `targets` are both specified, the task will
136 210 be assigne to *one* of the targets by the scheduler.
137 211
138 The following arguments are only used when balanced is True:
139
140 212 after : Dependency or collection of msg_ids
141 213 Only for load-balanced execution (targets=None)
142 214 Specify a list of msg_ids as a time-based dependency.
143 215 This job will only be run *after* the dependencies
144 216 have been met.
145 217
146 218 follow : Dependency or collection of msg_ids
147 219 Only for load-balanced execution (targets=None)
148 220 Specify a list of msg_ids as a location-based dependency.
149 221 This job will only be run on an engine where this dependency
150 222 is met.
151 223
152 224 timeout : float/int or None
153 225 Only for load-balanced execution (targets=None)
154 226 Specify an amount of time (in seconds) for the scheduler to
155 227 wait for dependencies to be met before failing with a
156 228 DependencyTimeout.
157 229
158 230 execute and run
159 231 ---------------
160 232
161 For executing strings of Python code, Clients also provide an :meth:`execute` and a :meth:`run`
162 method, which rather than take functions and arguments, take simple strings. `execute` simply
163 takes a string of Python code to execute, and sends it to the Engine(s). `run` is the same as
164 `execute`, but for a *file*, rather than a string. It is simply a wrapper that does something
165 very similar to ``execute(open(f).read())``.
233 For executing strings of Python code, :class:`DirectView`s also provide an :meth:`execute` and a
234 :meth:`run` method, which rather than take functions and arguments, take simple strings.
235 `execute` simply takes a string of Python code to execute, and sends it to the Engine(s). `run`
236 is the same as `execute`, but for a *file*, rather than a string. It is simply a wrapper that
237 does something very similar to ``execute(open(f).read())``.
166 238
167 239 .. note::
168 240
169 241 TODO: Example
170 242
171 243 Views
172 244 =====
173 245
174 246 The principal extension of the :class:`~parallel.client.Client` is the
175 :class:`~parallel.view.View` class. The client is a fairly stateless object with respect to
176 execution patterns, where you must specify everything about the execution as keywords to each
177 call to :meth:`apply`. For users who want to more conveniently specify various options for
178 several similar calls, we have the :class:`~parallel.view.View` objects. The basic principle of
179 the views is to encapsulate the keyword arguments to :meth:`client.apply` as attributes,
180 allowing users to specify them once and apply to any subsequent calls until the attribute is
181 changed.
247 :class:`~parallel.view.View` class. The client
182 248
183 249 Two of apply's keyword arguments are set at the construction of the View, and are immutable for
184 250 a given View: `balanced` and `targets`. `balanced` determines whether the View will be a
185 251 :class:`.LoadBalancedView` or a :class:`.DirectView`, and `targets` will be the View's `targets`
186 252 attribute. Attempts to change this will raise errors.
187 253
188 Views are cached by targets+balanced combinations, so requesting a view multiple times will always return the *same object*, not create a new one:
254 Views are cached by targets/class, so requesting a view multiple times will always return the
255 *same object*, not create a new one:
189 256
190 257 .. sourcecode:: ipython
191 258
192 In [3]: v1 = rc.view([1,2,3], balanced=True)
193 In [4]: v2 = rc.view([1,2,3], balanced=True)
259 In [3]: v1 = rc.load_balanced_view([1,2,3])
260 In [4]: v2 = rc.load_balanced_view([1,2,3])
194 261
195 262 In [5]: v2 is v1
196 263 Out[5]: True
197 264
198 265
199 A :class:`View` always uses its `targets` attribute, and it will use its `bound`
200 and `block` attributes in its :meth:`apply` method, but the suffixed :meth:`apply_x`
201 methods allow overriding `bound` and `block` for a single call.
202
203 ================== ========== ==========
204 method block bound
205 ================== ========== ==========
206 apply self.block self.bound
207 apply_sync True False
208 apply_async False False
209 apply_sync_bound True True
210 apply_async_bound False True
211 ================== ========== ==========
212
213 266 DirectView
214 267 ----------
215 268
216 269 The :class:`.DirectView` is the class for the IPython :ref:`Multiplexing Interface
217 270 <parallel_multiengine>`.
218 271
219 272 Creating a DirectView
220 273 *********************
221 274
222 275 DirectViews can be created in two ways, by index access to a client, or by a client's
223 276 :meth:`view` method. Index access to a Client works in a few ways. First, you can create
224 277 DirectViews to single engines simply by accessing the client by engine id:
225 278
226 279 .. sourcecode:: ipython
227 280
228 281 In [2]: rc[0]
229 282 Out[2]: <DirectView 0>
230 283
231 284 You can also create a DirectView with a list of engines:
232 285
233 286 .. sourcecode:: ipython
234 287
235 288 In [2]: rc[0,1,2]
236 289 Out[2]: <DirectView [0,1,2]>
237 290
238 291 Other methods for accessing elements, such as slicing and negative indexing, work by passing
239 292 the index directly to the client's :attr:`ids` list, so:
240 293
241 294 .. sourcecode:: ipython
242 295
243 296 # negative index
244 297 In [2]: rc[-1]
245 298 Out[2]: <DirectView 3>
246 299
247 300 # or slicing:
248 301 In [3]: rc[::2]
249 302 Out[3]: <DirectView [0,2]>
250 303
251 304 are always the same as:
252 305
253 306 .. sourcecode:: ipython
254 307
255 308 In [2]: rc[rc.ids[-1]]
256 309 Out[2]: <DirectView 3>
257 310
258 311 In [3]: rc[rc.ids[::2]]
259 312 Out[3]: <DirectView [0,2]>
260 313
261 314 Also note that the slice is evaluated at the time of construction of the DirectView, so the
262 315 targets will not change over time if engines are added/removed from the cluster. Requesting
263 316 two views with the same slice at different times will *not* necessarily return the same View
264 317 if the number of engines has changed.
265 318
266 319 Execution via DirectView
267 320 ************************
268 321
269 322 The DirectView is the simplest way to work with one or more engines directly (hence the name).
270 323
271 324
272 325 Data movement via DirectView
273 326 ****************************
274 327
275 328 Since a Python namespace is just a :class:`dict`, :class:`DirectView` objects provide
276 329 dictionary-style access by key and methods such as :meth:`get` and
277 330 :meth:`update` for convenience. This make the remote namespaces of the engines
278 331 appear as a local dictionary. Underneath, these methods call :meth:`apply`:
279 332
280 333 .. sourcecode:: ipython
281 334
282 335 In [51]: dview['a']=['foo','bar']
283 336
284 337 In [52]: dview['a']
285 338 Out[52]: [ ['foo', 'bar'], ['foo', 'bar'], ['foo', 'bar'], ['foo', 'bar'] ]
286 339
287 340 Scatter and gather
288 341 ------------------
289 342
290 343 Sometimes it is useful to partition a sequence and push the partitions to
291 344 different engines. In MPI language, this is know as scatter/gather and we
292 345 follow that terminology. However, it is important to remember that in
293 346 IPython's :class:`Client` class, :meth:`scatter` is from the
294 347 interactive IPython session to the engines and :meth:`gather` is from the
295 348 engines back to the interactive IPython session. For scatter/gather operations
296 349 between engines, MPI should be used:
297 350
298 351 .. sourcecode:: ipython
299 352
300 353 In [58]: dview.scatter('a',range(16))
301 354 Out[58]: [None,None,None,None]
302 355
303 356 In [59]: dview['a']
304 357 Out[59]: [ [0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15] ]
305 358
306 359 In [60]: dview.gather('a')
307 360 Out[60]: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
308 361
309 362
310 363
311 364 LoadBalancedView
312 365 ----------------
313 366
314 367 The :class:`.LoadBalancedView`
315 368
316 369
317 370 Data Movement
318 371 =============
319 372
320 373 push
321 374
322 375 pull
323 376
324 377 Reference
325 378
326 379 Results
327 380 =======
328 381
329 382 AsyncResults are the primary class
330 383
331 384 get_result
332 385
333 386 results,metadata
334 387
335 388 Querying the Hub
336 389 ================
337 390
338 391 The Hub sees all traffic that may pass through the schedulers between engines and clients.
339 392 It does this so that it can track state, allowing multiple clients to retrieve results of
340 393 computations submitted by their peers, as well as persisting the state to a database.
341 394
342 395 queue_status
343 396
344 397 You can check the status of the queues of the engines with this command.
345 398
346 399 result_status
347 400
348 401 purge_results
349 402
350 403 Controlling the Engines
351 404 =======================
352 405
353 406 There are a few actions you can do with Engines that do not involve execution. These
354 407 messages are sent via the Control socket, and bypass any long queues of waiting execution
355 408 jobs
356 409
357 410 abort
358 411
359 412 Sometimes you may want to prevent a job you have submitted from actually running. The method
360 413 for this is :meth:`abort`. It takes a container of msg_ids, and instructs the Engines to not
361 414 run the jobs if they arrive. The jobs will then fail with an AbortedTask error.
362 415
363 416 clear
364 417
365 418 You may want to purge the Engine(s) namespace of any data you have left in it. After
366 419 running `clear`, there will be no names in the Engine's namespace
367 420
368 421 shutdown
369 422
370 423 You can also instruct engines (and the Controller) to terminate from a Client. This
371 424 can be useful when a job is finished, since you can shutdown all the processes with a
372 425 single command.
373 426
374 427 Synchronization
375 428 ===============
376 429
377 430 Since the Client is a synchronous object, events do not automatically trigger in your
378 431 interactive session - you must poll the 0MQ sockets for incoming messages. Note that
379 432 this polling *does not* actually make any network requests. It simply performs a `select`
380 433 operation, to check if messages are already in local memory, waiting to be handled.
381 434
382 The method that handles incoming messages is :meth:`spin`. This method flushes any waiting messages on the various incoming sockets, and updates the state of the Client.
435 The method that handles incoming messages is :meth:`spin`. This method flushes any waiting
436 messages on the various incoming sockets, and updates the state of the Client.
383 437
384 If you need to wait for particular results to finish, you can use the :meth:`barrier` method,
438 If you need to wait for particular results to finish, you can use the :meth:`wait` method,
385 439 which will call :meth:`spin` until the messages are no longer outstanding. Anything that
386 440 represents a collection of messages, such as a list of msg_ids or one or more AsyncResult
387 objects, can be passed as argument to barrier. A timeout can be specified, which will prevent
388 the barrier from blocking for more than a specified time, but the default behavior is to wait
441 objects, can be passed as argument to wait. A timeout can be specified, which will prevent
442 the call from blocking for more than a specified time, but the default behavior is to wait
389 443 forever.
390 444
391 445
392 446
393 447 The client also has an `outstanding` attribute - a ``set`` of msg_ids that are awaiting replies.
394 This is the default if barrier is called with no arguments - i.e. barrier on *all* outstanding messages.
448 This is the default if wait is called with no arguments - i.e. wait on *all* outstanding
449 messages.
395 450
396 451
397 452 .. note::
398 453
399 TODO barrier example
454 TODO wait example
400 455
401 456 Map
402 457 ===
403 458
404 459 Many parallel computing problems can be expressed as a `map`, or running a single program with a
405 460 variety of different inputs. Python has a built-in :py-func:`map`, which does exactly this, and
406 461 many parallel execution tools in Python, such as the built-in :py-class:`multiprocessing.Pool`
407 462 object provide implementations of `map`. All View objects provide a :meth:`map` method as well,
408 463 but the load-balanced and direct implementations differ.
409 464
410 465 Views' map methods can be called on any number of sequences, but they can also take the `block`
411 466 and `bound` keyword arguments, just like :meth:`~client.apply`, but *only as keywords*.
412 467
413 468 .. sourcecode:: python
414 469
415 470 dview.map(*sequences, block=None)
416 471
417 472
418 473 * iter, map_async, reduce
419 474
420 475 Decorators and RemoteFunctions
421 476 ==============================
422 477
423 478 @parallel
424 479
425 480 @remote
426 481
427 482 RemoteFunction
428 483
429 484 ParallelFunction
430 485
431 486 Dependencies
432 487 ============
433 488
434 489 @depend
435 490
436 491 @require
437 492
438 493 Dependency
@@ -1,243 +1,244 b''
1 1 .. _ip1par:
2 2
3 3 ============================
4 4 Overview and getting started
5 5 ============================
6 6
7 7 Introduction
8 8 ============
9 9
10 10 This section gives an overview of IPython's sophisticated and powerful
11 11 architecture for parallel and distributed computing. This architecture
12 12 abstracts out parallelism in a very general way, which enables IPython to
13 13 support many different styles of parallelism including:
14 14
15 15 * Single program, multiple data (SPMD) parallelism.
16 16 * Multiple program, multiple data (MPMD) parallelism.
17 17 * Message passing using MPI.
18 18 * Task farming.
19 19 * Data parallel.
20 20 * Combinations of these approaches.
21 21 * Custom user defined approaches.
22 22
23 23 Most importantly, IPython enables all types of parallel applications to
24 24 be developed, executed, debugged and monitored *interactively*. Hence,
25 25 the ``I`` in IPython. The following are some example usage cases for IPython:
26 26
27 27 * Quickly parallelize algorithms that are embarrassingly parallel
28 28 using a number of simple approaches. Many simple things can be
29 29 parallelized interactively in one or two lines of code.
30 30
31 31 * Steer traditional MPI applications on a supercomputer from an
32 32 IPython session on your laptop.
33 33
34 34 * Analyze and visualize large datasets (that could be remote and/or
35 35 distributed) interactively using IPython and tools like
36 36 matplotlib/TVTK.
37 37
38 38 * Develop, test and debug new parallel algorithms
39 39 (that may use MPI) interactively.
40 40
41 41 * Tie together multiple MPI jobs running on different systems into
42 42 one giant distributed and parallel system.
43 43
44 44 * Start a parallel job on your cluster and then have a remote
45 45 collaborator connect to it and pull back data into their
46 46 local IPython session for plotting and analysis.
47 47
48 48 * Run a set of tasks on a set of CPUs using dynamic load balancing.
49 49
50 50 Architecture overview
51 51 =====================
52 52
53 53 The IPython architecture consists of four components:
54 54
55 55 * The IPython engine.
56 56 * The IPython hub.
57 57 * The IPython schedulers.
58 58 * The controller client.
59 59
60 60 These components live in the :mod:`IPython.zmq.parallel` package and are
61 61 installed with IPython. They do, however, have additional dependencies
62 62 that must be installed. For more information, see our
63 63 :ref:`installation documentation <install_index>`.
64 64
65 65 .. TODO: include zmq in install_index
66 66
67 67 IPython engine
68 68 ---------------
69 69
70 70 The IPython engine is a Python instance that takes Python commands over a
71 71 network connection. Eventually, the IPython engine will be a full IPython
72 72 interpreter, but for now, it is a regular Python interpreter. The engine
73 73 can also handle incoming and outgoing Python objects sent over a network
74 74 connection. When multiple engines are started, parallel and distributed
75 75 computing becomes possible. An important feature of an IPython engine is
76 76 that it blocks while user code is being executed. Read on for how the
77 77 IPython controller solves this problem to expose a clean asynchronous API
78 78 to the user.
79 79
80 80 IPython controller
81 81 ------------------
82 82
83 83 The IPython controller processes provide an interface for working with a set of engines.
84 84 At a general level, the controller is a collection of processes to which IPython engines
85 85 and clients can connect. The controller is composed of a :class:`Hub` and a collection of
86 86 :class:`Schedulers`. These Schedulers are typically run in separate processes but on the
87 87 same machine as the Hub, but can be run anywhere from local threads or on remote machines.
88 88
89 89 The controller also provides a single point of contact for users who wish to
90 90 utilize the engines connected to the controller. There are different ways of
91 91 working with a controller. In IPython, all of these models are implemented via
92 the client's :meth:`.Client.apply` method, with various arguments, or
92 the client's :meth:`.View.apply` method, with various arguments, or
93 93 constructing :class:`.View` objects to represent subsets of engines. The two
94 94 primary models for interacting with engines are:
95 95
96 96 * A **Direct** interface, where engines are addressed explicitly.
97 97 * A **LoadBalanced** interface, where the Scheduler is trusted with assigning work to
98 98 appropriate engines.
99 99
100 100 Advanced users can readily extend the View models to enable other
101 101 styles of parallelism.
102 102
103 103 .. note::
104 104
105 105 A single controller and set of engines can be used with multiple models
106 106 simultaneously. This opens the door for lots of interesting things.
107 107
108 108
109 109 The Hub
110 110 *******
111 111
112 112 The center of an IPython cluster is the Hub. This is the process that keeps
113 113 track of engine connections, schedulers, clients, as well as all task requests and
114 114 results. The primary role of the Hub is to facilitate queries of the cluster state, and
115 115 minimize the necessary information required to establish the many connections involved in
116 116 connecting new clients and engines.
117 117
118 118
119 119 Schedulers
120 120 **********
121 121
122 122 All actions that can be performed on the engine go through a Scheduler. While the engines
123 123 themselves block when user code is run, the schedulers hide that from the user to provide
124 124 a fully asynchronous interface to a set of engines.
125 125
126 126
127 IPython client
128 --------------
127 IPython client and views
128 ------------------------
129 129
130 There is one primary object, the :class:`~.parallel.client.Client`, for connecting to a
131 controller. For each model, there is a corresponding view. These views allow users to
132 interact with a set of engines through the interface. Here are the two default views:
130 There is one primary object, the :class:`~.parallel.client.Client`, for connecting to a cluster.
131 For each execution model, there is a corresponding :class:`~.parallel.view.View`. These views
132 allow users to interact with a set of engines through the interface. Here are the two default
133 views:
133 134
134 135 * The :class:`DirectView` class for explicit addressing.
135 136 * The :class:`LoadBalancedView` class for destination-agnostic scheduling.
136 137
137 138 Security
138 139 --------
139 140
140 141 IPython uses ZeroMQ for networking, which has provided many advantages, but
141 142 one of the setbacks is its utter lack of security [ZeroMQ]_. By default, no IPython
142 143 connections are encrypted, but open ports only listen on localhost. The only
143 144 source of security for IPython is via ssh-tunnel. IPython supports both shell
144 145 (`openssh`) and `paramiko` based tunnels for connections. There is a key necessary
145 146 to submit requests, but due to the lack of encryption, it does not provide
146 147 significant security if loopback traffic is compromised.
147 148
148 149 In our architecture, the controller is the only process that listens on
149 150 network ports, and is thus the main point of vulnerability. The standard model
150 151 for secure connections is to designate that the controller listen on
151 152 localhost, and use ssh-tunnels to connect clients and/or
152 153 engines.
153 154
154 155 To connect and authenticate to the controller an engine or client needs
155 156 some information that the controller has stored in a JSON file.
156 157 Thus, the JSON files need to be copied to a location where
157 158 the clients and engines can find them. Typically, this is the
158 159 :file:`~/.ipython/clusterz_default/security` directory on the host where the
159 160 client/engine is running (which could be a different host than the controller).
160 161 Once the JSON files are copied over, everything should work fine.
161 162
162 163 Currently, there are two JSON files that the controller creates:
163 164
164 165 ipcontroller-engine.json
165 166 This JSON file has the information necessary for an engine to connect
166 167 to a controller.
167 168
168 169 ipcontroller-client.json
169 170 The client's connection information. This may not differ from the engine's,
170 171 but since the controller may listen on different ports for clients and
171 172 engines, it is stored separately.
172 173
173 174 More details of how these JSON files are used are given below.
174 175
175 176 A detailed description of the security model and its implementation in IPython
176 177 can be found :ref:`here <parallelsecurity>`.
177 178
178 179 .. warning::
179 180
180 181 Even at its most secure, the Controller listens on ports on localhost, and
181 182 every time you make a tunnel, you open a localhost port on the connecting
182 183 machine that points to the Controller. If localhost on the Controller's
183 184 machine, or the machine of any client or engine, is untrusted, then your
184 185 Controller is insecure. There is no way around this with ZeroMQ.
185 186
186 187
187 188
188 189 Getting Started
189 190 ===============
190 191
191 192 To use IPython for parallel computing, you need to start one instance of the
192 193 controller and one or more instances of the engine. Initially, it is best to
193 194 simply start a controller and engines on a single host using the
194 195 :command:`ipclusterz` command. To start a controller and 4 engines on your
195 196 localhost, just do::
196 197
197 198 $ ipclusterz start -n 4
198 199
199 200 More details about starting the IPython controller and engines can be found
200 201 :ref:`here <parallel_process>`
201 202
202 203 Once you have started the IPython controller and one or more engines, you
203 204 are ready to use the engines to do something useful. To make sure
204 205 everything is working correctly, try the following commands:
205 206
206 207 .. sourcecode:: ipython
207 208
208 209 In [1]: from IPython.zmq.parallel import client
209 210
210 211 In [2]: c = client.Client()
211 212
212 213 In [4]: c.ids
213 214 Out[4]: set([0, 1, 2, 3])
214 215
215 In [5]: c.apply(lambda : "Hello, World", targets='all', block=True)
216 In [5]: c[:].apply_sync(lambda : "Hello, World")
216 217 Out[5]: [ 'Hello, World', 'Hello, World', 'Hello, World', 'Hello, World' ]
217 218
218 219
219 220 When a client is created with no arguments, the client tries to find the corresponding
220 221 JSON file in the local `~/.ipython/clusterz_default/security` directory. If it finds it,
221 222 you are set. If you have put the JSON file in a different location or it has a different
222 223 name, create the client like this:
223 224
224 225 .. sourcecode:: ipython
225 226
226 227 In [2]: c = client.Client('/path/to/my/ipcontroller-client.json')
227 228
228 229 Remember, a client needs to be able to see the Hub's ports to connect. So if they are on a
229 230 different machine, you may need to use an ssh server to tunnel access to that machine,
230 231 then you would connect to it with:
231 232
232 233 .. sourcecode:: ipython
233 234
234 235 In [2]: c = client.Client(sshserver='myhub.example.com')
235 236
236 237 Where 'myhub.example.com' is the url or IP address of the machine on
237 which the Hub process is running.
238 which the Hub process is running (or another machine that has direct access to the Hub's ports).
238 239
239 240 You are now ready to learn more about the :ref:`Direct
240 <parallelmultiengine>` and :ref:`LoadBalanced <paralleltask>` interfaces to the
241 <parallel_multiengine>` and :ref:`LoadBalanced <parallel_task>` interfaces to the
241 242 controller.
242 243
243 244 .. [ZeroMQ] ZeroMQ. http://www.zeromq.org
@@ -1,845 +1,799 b''
1 .. _parallelmultiengine:
1 .. _parallel_multiengine:
2 2
3 3 ==========================
4 4 IPython's Direct interface
5 5 ==========================
6 6
7 7 The direct, or multiengine, interface represents one possible way of working with a set of
8 8 IPython engines. The basic idea behind the multiengine interface is that the
9 9 capabilities of each engine are directly and explicitly exposed to the user.
10 10 Thus, in the multiengine interface, each engine is given an id that is used to
11 11 identify the engine and give it work to do. This interface is very intuitive
12 and is designed with interactive usage in mind, and is thus the best place for
12 and is designed with interactive usage in mind, and is the best place for
13 13 new users of IPython to begin.
14 14
15 15 Starting the IPython controller and engines
16 16 ===========================================
17 17
18 18 To follow along with this tutorial, you will need to start the IPython
19 19 controller and four IPython engines. The simplest way of doing this is to use
20 20 the :command:`ipclusterz` command::
21 21
22 22 $ ipclusterz start -n 4
23 23
24 24 For more detailed information about starting the controller and engines, see
25 25 our :ref:`introduction <ip1par>` to using IPython for parallel computing.
26 26
27 27 Creating a ``Client`` instance
28 28 ==============================
29 29
30 30 The first step is to import the IPython :mod:`IPython.zmq.parallel.client`
31 31 module and then create a :class:`.Client` instance:
32 32
33 33 .. sourcecode:: ipython
34 34
35 35 In [1]: from IPython.zmq.parallel import client
36 36
37 37 In [2]: rc = client.Client()
38 38
39 39 This form assumes that the default connection information (stored in
40 40 :file:`ipcontroller-client.json` found in :file:`IPYTHON_DIR/clusterz_default/security`) is
41 41 accurate. If the controller was started on a remote machine, you must copy that connection
42 42 file to the client machine, or enter its contents as arguments to the Client constructor:
43 43
44 44 .. sourcecode:: ipython
45 45
46 46 # If you have copied the json connector file from the controller:
47 47 In [2]: rc = client.Client('/path/to/ipcontroller-client.json')
48 48 # or to connect with a specific profile you have set up:
49 49 In [3]: rc = client.Client(profile='mpi')
50 50
51 51
52 52 To make sure there are engines connected to the controller, users can get a list
53 53 of engine ids:
54 54
55 55 .. sourcecode:: ipython
56 56
57 57 In [3]: rc.ids
58 58 Out[3]: [0, 1, 2, 3]
59 59
60 60 Here we see that there are four engines ready to do work for us.
61 61
62 62 For direct execution, we will make use of a :class:`DirectView` object, which can be
63 63 constructed via list-access to the client:
64 64
65 65 .. sourcecode:: ipython
66 66
67 67 In [4]: dview = rc[:] # use all engines
68 68
69 69 .. seealso::
70 70
71 71 For more information, see the in-depth explanation of :ref:`Views <parallel_details>`.
72 72
73 73
74 74 Quick and easy parallelism
75 75 ==========================
76 76
77 77 In many cases, you simply want to apply a Python function to a sequence of
78 78 objects, but *in parallel*. The client interface provides a simple way
79 79 of accomplishing this: using the DirectView's :meth:`~DirectView.map` method.
80 80
81 81 Parallel map
82 82 ------------
83 83
84 84 Python's builtin :func:`map` functions allows a function to be applied to a
85 85 sequence element-by-element. This type of code is typically trivial to
86 86 parallelize. In fact, since IPython's interface is all about functions anyway,
87 87 you can just use the builtin :func:`map` with a :class:`RemoteFunction`, or a
88 88 DirectView's :meth:`map` method:
89 89
90 90 .. sourcecode:: ipython
91 91
92 92 In [62]: serial_result = map(lambda x:x**10, range(32))
93 93
94 In [63]: dview.block = True
95
96 In [66]: parallel_result = dview.map(lambda x: x**10, range(32))
94 In [63]: parallel_result = dview.map_sync(lambda x: x**10, range(32))
97 95
98 96 In [67]: serial_result==parallel_result
99 97 Out[67]: True
100 98
101 99
102 100 .. note::
103 101
104 102 The :class:`DirectView`'s version of :meth:`map` does
105 103 not do dynamic load balancing. For a load balanced version, use a
106 :class:`LoadBalancedView`, or a :class:`ParallelFunction` with
107 `balanced=True`.
104 :class:`LoadBalancedView`.
108 105
109 106 .. seealso::
110 107
111 108 :meth:`map` is implemented via :class:`ParallelFunction`.
112 109
113 110 Remote function decorators
114 111 --------------------------
115 112
116 113 Remote functions are just like normal functions, but when they are called,
117 114 they execute on one or more engines, rather than locally. IPython provides
118 115 two decorators:
119 116
120 117 .. sourcecode:: ipython
121 118
122 In [10]: @rc.remote(block=True, targets='all')
119 In [10]: @dview.remote(block=True)
123 120 ...: def getpid():
124 121 ...: import os
125 122 ...: return os.getpid()
126 123 ...:
127 124
128 125 In [11]: getpid()
129 126 Out[11]: [12345, 12346, 12347, 12348]
130 127
131 A ``@parallel`` decorator creates parallel functions, that break up an element-wise
128 The ``@parallel`` decorator creates parallel functions, that break up an element-wise
132 129 operations and distribute them, reconstructing the result.
133 130
134 131 .. sourcecode:: ipython
135 132
136 133 In [12]: import numpy as np
137 134
138 135 In [13]: A = np.random.random((64,48))
139 136
140 In [14]: @rc.parallel(block=True, targets='all')
137 In [14]: @dview.parallel(block=True)
141 138 ...: def pmul(A,B):
142 139 ...: return A*B
143 140
144 141 In [15]: C_local = A*A
145 142
146 In [16]: C_remote_partial = pmul(A,A)
143 In [16]: C_remote = pmul(A,A)
147 144
148 145 In [17]: (C_local == C_remote).all()
149 146 Out[17]: True
150 147
151 148 .. seealso::
152 149
153 150 See the docstrings for the :func:`parallel` and :func:`remote` decorators for
154 151 options.
155 152
156 153 Calling Python functions
157 154 ========================
158 155
159 156 The most basic type of operation that can be performed on the engines is to
160 157 execute Python code or call Python functions. Executing Python code can be
161 158 done in blocking or non-blocking mode (non-blocking is default) using the
162 :meth:`execute` method, and calling functions can be done via the
159 :meth:`.View.execute` method, and calling functions can be done via the
163 160 :meth:`.View.apply` method.
164 161
165 162 apply
166 163 -----
167 164
168 165 The main method for doing remote execution (in fact, all methods that
169 communicate with the engines are built on top of it), is :meth:`Client.apply`.
170 Ideally, :meth:`apply` would have the signature ``apply(f,*args,**kwargs)``,
171 which would call ``f(*args,**kwargs)`` remotely. However, since :class:`Clients`
172 require some more options, they cannot easily provide this interface.
173 Instead, they provide the signature:
174
175 .. sourcecode:: python
166 communicate with the engines are built on top of it), is :meth:`View.apply`.
176 167
177 c.apply(f, args=None, kwargs=None, bound=True, block=None, targets=None,
178 after=None, follow=None, timeout=None)
168 We strive to provide the cleanest interface we can, so `apply` has the following
169 signature:
179 170
180 Where various behavior is controlled via keyword arguments. This means that in the client,
181 you must pass `args` as a tuple, and `kwargs` as a dict.
171 .. sourcecode:: python
182 172
183 In order to provide the nicer interface, we have :class:`View` classes, which wrap
184 :meth:`Client.apply` by using attributes and extra :meth:`apply_x` methods to determine
185 the extra keyword arguments. This means that the views can have the desired pattern:
173 view.apply(f, *args, **kwargs)
186 174
187 .. sourcecode:: python
175 There are various ways to call functions with IPython, and these flags are set as
176 attributes of the View. The ``DirectView`` has just two of these flags:
188 177
189 v.apply(f, *args, **kwargs)
178 dv.block : bool
179 whether to wait for the result, or return an :class:`AsyncResult` object
180 immediately
181 dv.track : bool
182 whether to instruct pyzmq to track when
183 This is primarily useful for non-copying sends of numpy arrays that you plan to
184 edit in-place. You need to know when it becomes safe to edit the buffer
185 without corrupting the message.
190 186
191 187
192 For instance, performing index-access on a client creates a
193 :class:`.DirectView`.
188 Creating a view is simple: index-access on a client creates a :class:`.DirectView`.
194 189
195 190 .. sourcecode:: ipython
196 191
197 192 In [4]: view = rc[1:3]
198 193 Out[4]: <DirectView [1, 2]>
199 194
200 195 In [5]: view.apply<tab>
201 view.apply view.apply_async view.apply_async_bound view.apply_sync view.apply_sync_bound
202
203 A :class:`DirectView` always uses its `targets` attribute, and it will use its `bound`
204 and `block` attributes in its :meth:`apply` method, but the suffixed :meth:`apply_x`
205 methods allow specifying `bound` and `block` via the different methods.
196 view.apply view.apply_async view.apply_sync view.apply_with_flags
206 197
207 ================== ========== ==========
208 method block bound
209 ================== ========== ==========
210 apply self.block self.bound
211 apply_sync True False
212 apply_async False False
213 apply_sync_bound True True
214 apply_async_bound False True
215 ================== ========== ==========
216
217 For explanation of these values, read on.
198 For convenience, you can set block temporarily for a single call with the extra sync/async methods.
218 199
219 200 Blocking execution
220 201 ------------------
221 202
222 203 In blocking mode, the :class:`.DirectView` object (called ``dview`` in
223 204 these examples) submits the command to the controller, which places the
224 205 command in the engines' queues for execution. The :meth:`apply` call then
225 206 blocks until the engines are done executing the command:
226 207
227 208 .. sourcecode:: ipython
228 209
229 210 In [2]: dview = rc[:] # A DirectView of all engines
230 211 In [3]: dview.block=True
231 212 In [4]: dview['a'] = 5
232 213
233 214 In [5]: dview['b'] = 10
234 215
235 In [6]: dview.apply_sync(lambda x: a+b+x, 27)
216 In [6]: dview.apply(lambda x: a+b+x, 27)
236 217 Out[6]: [42, 42, 42, 42]
237 218
238 Python commands can be executed on specific engines by calling execute using the ``targets``
239 keyword argument in :meth:`client.execute`, or creating a :class:`DirectView` instance by
240 index-access to the client:
241
242 .. sourcecode:: ipython
243
244 In [6]: rc.execute('c=a+b', targets=[0,2])
245
246 In [7]: rc[1::2].execute('c=a-b') # shorthand for rc.execute('c=a-b',targets=[1,3])
247
248 In [8]: rc[:]['c'] # shorthand for rc.pull('c',targets='all')
249 Out[8]: [15, -5, 15, -5]
250
251 .. note::
219 You can also select blocking execution on a call-by-call basis with the :meth:`apply_sync`
220 method:
252 221
253 Note that every call to ``rc.<meth>(...,targets=x)`` can be made via
254 ``rc[<x>].<meth>(...)``, which constructs a View object. The only place
255 where this differs in in :meth:`apply`. The :class:`Client` takes many
256 arguments to apply, so it requires `args` and `kwargs` to be passed as
257 individual arguments. Extended options such as `bound`,`targets`, and
258 `block` are controlled by the attributes of the :class:`View` objects, so
259 they can provide the much more convenient
260 :meth:`View.apply(f,*args,**kwargs)`, which simply calls
261 ``f(*args,**kwargs)`` remotely.
222 In [7]: dview.block=False
262 223
263 Bound and unbound execution
264 ---------------------------
224 In [8]: dview.apply_sync(lambda x: a+b+x, 27)
225 Out[8]: [42, 42, 42, 42]
265 226
266 The previous example also shows one of the most important things about the IPython
267 engines: they have a persistent user namespaces. The :meth:`apply` method can
268 be run in either a bound or unbound manner.
269
270 When applying a function in a `bound` manner, the first argument to that function
271 will be the Engine's namespace, which is a :class:`Namespace` object, a dictionary
272 also providing attribute-access to keys.
273
274 In all (unbound and bound) execution
227 Python commands can be executed as strings on specific engines by using a View's ``execute``
228 method:
275 229
276 230 .. sourcecode:: ipython
277 231
278 In [9]: dview['b'] = 5 # assign b to 5 everywhere
279
280 In [10]: v0 = rc[0]
232 In [6]: rc[::2].execute('c=a+b')
281 233
282 # multiply b*2 inplace
283 In [12]: v0.apply_sync_bound(lambda ns: ns.b*=2)
234 In [7]: rc[1::2].execute('c=a-b')
284 235
285 # b is still available in globals during unbound execution
286 In [13]: v0.apply_sync(lambda a: a*b, 3)
287 Out[13]: 30
236 In [8]: rc[:]['c'] # shorthand for rc[:].pull('c', block=True)
237 Out[8]: [15, -5, 15, -5]
288 238
289 `bound=True` specifies that the engine's namespace is to be passed as the first argument when
290 the function is called, and the default `bound=False` specifies that the normal behavior, but
291 the engine's namespace will be available as the globals() when the function is called.
292 239
293 240 Non-blocking execution
294 241 ----------------------
295 242
296 243 In non-blocking mode, :meth:`apply` submits the command to be executed and
297 244 then returns a :class:`AsyncResult` object immediately. The
298 245 :class:`AsyncResult` object gives you a way of getting a result at a later
299 246 time through its :meth:`get` method.
300 247
301 248 .. Note::
302 249
303 250 The :class:`AsyncResult` object provides a superset of the interface in
304 251 :py:class:`multiprocessing.pool.AsyncResult`. See the
305 252 `official Python documentation <http://docs.python.org/library/multiprocessing#multiprocessing.pool.AsyncResult>`_
306 253 for more.
307 254
308 255
309 256 This allows you to quickly submit long running commands without blocking your
310 257 local Python/IPython session:
311 258
312 259 .. sourcecode:: ipython
313 260
314 261 # define our function
315 262 In [6]: def wait(t):
316 263 ...: import time
317 264 ...: tic = time.time()
318 265 ...: time.sleep(t)
319 266 ...: return time.time()-tic
320 267
321 268 # In non-blocking mode
322 269 In [7]: ar = dview.apply_async(wait, 2)
323 270
324 271 # Now block for the result
325 272 In [8]: ar.get()
326 273 Out[8]: [2.0006198883056641, 1.9997570514678955, 1.9996809959411621, 2.0003249645233154]
327 274
328 275 # Again in non-blocking mode
329 276 In [9]: ar = dview.apply_async(wait, 10)
330 277
331 278 # Poll to see if the result is ready
332 279 In [10]: ar.ready()
333 280 Out[10]: False
334 281
335 282 # ask for the result, but wait a maximum of 1 second:
336 283 In [45]: ar.get(1)
337 284 ---------------------------------------------------------------------------
338 285 TimeoutError Traceback (most recent call last)
339 286 /home/you/<ipython-input-45-7cd858bbb8e0> in <module>()
340 287 ----> 1 ar.get(1)
341 288
342 289 /path/to/site-packages/IPython/zmq/parallel/asyncresult.pyc in get(self, timeout)
343 290 62 raise self._exception
344 291 63 else:
345 292 ---> 64 raise error.TimeoutError("Result not ready.")
346 293 65
347 294 66 def ready(self):
348 295
349 296 TimeoutError: Result not ready.
350 297
351 298 .. Note::
352 299
353 300 Note the import inside the function. This is a common model, to ensure
354 that the appropriate modules are imported where the task is run.
301 that the appropriate modules are imported where the task is run. You can
302 also manually import modules into the engine(s) namespace(s) via
303 :meth:`view.execute('import numpy')`.
355 304
356 305 Often, it is desirable to wait until a set of :class:`AsyncResult` objects
357 are done. For this, there is a the method :meth:`barrier`. This method takes a
306 are done. For this, there is a the method :meth:`wait`. This method takes a
358 307 tuple of :class:`AsyncResult` objects (or `msg_ids` or indices to the client's History),
359 308 and blocks until all of the associated results are ready:
360 309
361 310 .. sourcecode:: ipython
362 311
363 In [72]: rc.block=False
312 In [72]: dview.block=False
364 313
365 314 # A trivial list of AsyncResults objects
366 315 In [73]: pr_list = [dview.apply_async(wait, 3) for i in range(10)]
367 316
368 317 # Wait until all of them are done
369 In [74]: rc.barrier(pr_list)
318 In [74]: dview.wait(pr_list)
370 319
371 320 # Then, their results are ready using get() or the `.r` attribute
372 321 In [75]: pr_list[0].get()
373 322 Out[75]: [2.9982571601867676, 2.9982588291168213, 2.9987530708312988, 2.9990990161895752]
374 323
375 324
376 325
377 The ``block`` keyword argument and attributes
378 ---------------------------------------------
326 The ``block`` attribute
327 -----------------------
379 328
380 Most client methods(like :meth:`apply`) accept
329 Many View methods(excluding :meth:`apply`) accept
381 330 ``block`` as a keyword argument. As we have seen above, these
382 keyword arguments control the blocking mode. The :class:`Client` class also has
331 keyword arguments control the blocking mode. The :class:`View` class also has
383 332 a :attr:`block` attribute that controls the default behavior when the keyword
384 333 argument is not provided. Thus the following logic is used for :attr:`block`:
385 334
386 335 * If no keyword argument is provided, the instance attributes are used.
387 336 * Keyword argument, if provided override the instance attributes for
388 337 the duration of a single call.
389 338
390 DirectView objects also have a ``bound`` attribute, which is used in the same way.
391
392 339 The following examples demonstrate how to use the instance attributes:
393 340
394 341 .. sourcecode:: ipython
395 342
396 In [17]: rc.block = False
343 In [17]: dview.block = False
397 344
398 In [18]: ar = rc.apply(lambda : 10, targets=[0,2])
345 In [18]: ar = dview.apply(lambda : 10)
399 346
400 347 In [19]: ar.get()
401 Out[19]: [10,10]
348 Out[19]: [10, 10, 10, 10]
402 349
403 In [21]: rc.block = True
350 In [21]: dview.block = True
404 351
405 352 # Note targets='all' means all engines
406 In [22]: rc.apply(lambda : 42, targets='all')
353 In [22]: dview.apply(lambda : 42)
407 354 Out[22]: [42, 42, 42, 42]
408 355
409 The :attr:`block`, :attr:`bound`, and :attr:`targets` instance attributes of the
356 The :attr:`block` and :attr:`targets` instance attributes of the
410 357 :class:`.DirectView` also determine the behavior of the parallel magic commands.
411 358
412
413 359 Parallel magic commands
414 360 -----------------------
415 361
416 362 .. warning::
417 363
418 The magics have not been changed to work with the zeromq system. ``%px``
419 and ``%autopx`` do work, but ``%result`` does not. %px and %autopx *do
420 not* print stdin/out.
364 The magics have not been changed to work with the zeromq system. The
365 magics do work, but *do not* print stdin/out like they used to in IPython.kernel.
421 366
422 367 We provide a few IPython magic commands (``%px``, ``%autopx`` and ``%result``)
423 368 that make it more pleasant to execute Python commands on the engines
424 369 interactively. These are simply shortcuts to :meth:`execute` and
425 370 :meth:`get_result` of the :class:`DirectView`. The ``%px`` magic executes a single
426 371 Python command on the engines specified by the :attr:`targets` attribute of the
427 372 :class:`DirectView` instance:
428 373
429 374 .. sourcecode:: ipython
430 375
376 # load the parallel magic extension:
377 In [21]: %load_ext parallelmagic
378
431 379 # Create a DirectView for all targets
432 380 In [22]: dv = rc[:]
433 381
434 382 # Make this DirectView active for parallel magic commands
435 383 In [23]: dv.activate()
436 384
437 385 In [24]: dv.block=True
438 386
439 387 In [25]: import numpy
440 388
441 389 In [26]: %px import numpy
442 390 Parallel execution on engines: [0, 1, 2, 3]
443 391
444 392 In [27]: %px a = numpy.random.rand(2,2)
445 393 Parallel execution on engines: [0, 1, 2, 3]
446 394
447 395 In [28]: %px ev = numpy.linalg.eigvals(a)
448 396 Parallel execution on engines: [0, 1, 2, 3]
449 397
450 398 In [28]: dv['ev']
451 399 Out[28]: [ array([ 1.09522024, -0.09645227]),
452 400 array([ 1.21435496, -0.35546712]),
453 401 array([ 0.72180653, 0.07133042]),
454 402 array([ 1.46384341e+00, 1.04353244e-04])
455 403 ]
456 404
457 405 The ``%result`` magic gets the most recent result, or takes an argument
458 406 specifying the index of the result to be requested. It is simply a shortcut to the
459 407 :meth:`get_result` method:
460 408
461 409 .. sourcecode:: ipython
462 410
463 411 In [29]: dv.apply_async(lambda : ev)
464 412
465 413 In [30]: %result
466 414 Out[30]: [ [ 1.28167017 0.14197338],
467 415 [-0.14093616 1.27877273],
468 416 [-0.37023573 1.06779409],
469 417 [ 0.83664764 -0.25602658] ]
470 418
471 419 The ``%autopx`` magic switches to a mode where everything you type is executed
472 420 on the engines given by the :attr:`targets` attribute:
473 421
474 422 .. sourcecode:: ipython
475 423
476 424 In [30]: dv.block=False
477 425
478 426 In [31]: %autopx
479 427 Auto Parallel Enabled
480 428 Type %autopx to disable
481 429
482 430 In [32]: max_evals = []
483 431 <IPython.zmq.parallel.asyncresult.AsyncResult object at 0x17b8a70>
484 432
485 433 In [33]: for i in range(100):
486 434 ....: a = numpy.random.rand(10,10)
487 435 ....: a = a+a.transpose()
488 436 ....: evals = numpy.linalg.eigvals(a)
489 437 ....: max_evals.append(evals[0].real)
490 438 ....:
491 439 ....:
492 440 <IPython.zmq.parallel.asyncresult.AsyncResult object at 0x17af8f0>
493 441
494 442 In [34]: %autopx
495 443 Auto Parallel Disabled
496 444
497 445 In [35]: dv.block=True
498 446
499 447 In [36]: px ans= "Average max eigenvalue is: %f"%(sum(max_evals)/len(max_evals))
500 448 Parallel execution on engines: [0, 1, 2, 3]
501 449
502 450 In [37]: dv['ans']
503 451 Out[37]: [ 'Average max eigenvalue is: 10.1387247332',
504 452 'Average max eigenvalue is: 10.2076902286',
505 453 'Average max eigenvalue is: 10.1891484655',
506 454 'Average max eigenvalue is: 10.1158837784',]
507 455
508 456
509 457 Moving Python objects around
510 458 ============================
511 459
512 460 In addition to calling functions and executing code on engines, you can
513 461 transfer Python objects to and from your IPython session and the engines. In
514 462 IPython, these operations are called :meth:`push` (sending an object to the
515 463 engines) and :meth:`pull` (getting an object from the engines).
516 464
517 465 Basic push and pull
518 466 -------------------
519 467
520 468 Here are some examples of how you use :meth:`push` and :meth:`pull`:
521 469
522 470 .. sourcecode:: ipython
523 471
524 In [38]: rc.push(dict(a=1.03234,b=3453))
472 In [38]: dview.push(dict(a=1.03234,b=3453))
525 473 Out[38]: [None,None,None,None]
526 474
527 In [39]: rc.pull('a')
475 In [39]: dview.pull('a')
528 476 Out[39]: [ 1.03234, 1.03234, 1.03234, 1.03234]
529 477
530 In [40]: rc.pull('b',targets=0)
478 In [40]: rc[0].pull('b')
531 479 Out[40]: 3453
532 480
533 In [41]: rc.pull(('a','b'))
481 In [41]: dview.pull(('a','b'))
534 482 Out[41]: [ [1.03234, 3453], [1.03234, 3453], [1.03234, 3453], [1.03234, 3453] ]
535 483
536 # zmq client does not have zip_pull
537 In [42]: rc.zip_pull(('a','b'))
538 Out[42]: [(1.03234, 1.03234, 1.03234, 1.03234), (3453, 3453, 3453, 3453)]
539
540 In [43]: rc.push(dict(c='speed'))
484 In [43]: dview.push(dict(c='speed'))
541 485 Out[43]: [None,None,None,None]
542 486
543 487 In non-blocking mode :meth:`push` and :meth:`pull` also return
544 488 :class:`AsyncResult` objects:
545 489
546 490 .. sourcecode:: ipython
547 491
548 In [47]: rc.block=False
549
550 In [48]: ar = rc.pull('a')
492 In [48]: ar = dview.pull('a', block=False)
551 493
552 494 In [49]: ar.get()
553 495 Out[49]: [1.03234, 1.03234, 1.03234, 1.03234]
554 496
555 497
556 498 Dictionary interface
557 499 --------------------
558 500
559 501 Since a Python namespace is just a :class:`dict`, :class:`DirectView` objects provide
560 502 dictionary-style access by key and methods such as :meth:`get` and
561 503 :meth:`update` for convenience. This make the remote namespaces of the engines
562 504 appear as a local dictionary. Underneath, these methods call :meth:`apply`:
563 505
564 506 .. sourcecode:: ipython
565 507
566 In [50]: dview.block=True
567
568 508 In [51]: dview['a']=['foo','bar']
569 509
570 510 In [52]: dview['a']
571 511 Out[52]: [ ['foo', 'bar'], ['foo', 'bar'], ['foo', 'bar'], ['foo', 'bar'] ]
572 512
573 513 Scatter and gather
574 514 ------------------
575 515
576 516 Sometimes it is useful to partition a sequence and push the partitions to
577 517 different engines. In MPI language, this is know as scatter/gather and we
578 518 follow that terminology. However, it is important to remember that in
579 519 IPython's :class:`Client` class, :meth:`scatter` is from the
580 520 interactive IPython session to the engines and :meth:`gather` is from the
581 521 engines back to the interactive IPython session. For scatter/gather operations
582 522 between engines, MPI should be used:
583 523
584 524 .. sourcecode:: ipython
585 525
586 526 In [58]: dview.scatter('a',range(16))
587 527 Out[58]: [None,None,None,None]
588 528
589 529 In [59]: dview['a']
590 530 Out[59]: [ [0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15] ]
591 531
592 532 In [60]: dview.gather('a')
593 533 Out[60]: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
594 534
595 535 Other things to look at
596 536 =======================
597 537
598 538 How to do parallel list comprehensions
599 539 --------------------------------------
600 540
601 541 In many cases list comprehensions are nicer than using the map function. While
602 542 we don't have fully parallel list comprehensions, it is simple to get the
603 543 basic effect using :meth:`scatter` and :meth:`gather`:
604 544
605 545 .. sourcecode:: ipython
606 546
607 547 In [66]: dview.scatter('x',range(64))
608 548
609 In [67]: px y = [i**10 for i in x]
549 In [67]: %px y = [i**10 for i in x]
610 550 Parallel execution on engines: [0, 1, 2, 3]
611 551 Out[67]:
612 552
613 553 In [68]: y = dview.gather('y')
614 554
615 555 In [69]: print y
616 556 [0, 1, 1024, 59049, 1048576, 9765625, 60466176, 282475249, 1073741824,...]
617 557
618 558 Parallel exceptions
619 559 -------------------
620 560
621 561 In the multiengine interface, parallel commands can raise Python exceptions,
622 562 just like serial commands. But, it is a little subtle, because a single
623 563 parallel command can actually raise multiple exceptions (one for each engine
624 564 the command was run on). To express this idea, we have a
625 565 :exc:`CompositeError` exception class that will be raised in most cases. The
626 566 :exc:`CompositeError` class is a special type of exception that wraps one or
627 567 more other types of exceptions. Here is how it works:
628 568
629 569 .. sourcecode:: ipython
630 570
631 571 In [76]: dview.block=True
632 572
633 573 In [77]: dview.execute('1/0')
634 574 ---------------------------------------------------------------------------
635 575 CompositeError Traceback (most recent call last)
636 /Users/minrk/<ipython-input-10-5d56b303a66c> in <module>()
637 ----> 1 dview.execute('1/0')
638
639 ...
576 /home/you/<ipython-input-10-15c2c22dec39> in <module>()
577 ----> 1 dview.execute('1/0', block=True)
578
579 /path/to/site-packages/IPython/zmq/parallel/view.py in execute(self, code, block)
580 460 default: self.block
581 461 """
582 --> 462 return self.apply_with_flags(util._execute, args=(code,), block=block)
583 463
584 464 def run(self, filename, block=None):
585
586 /home/you/<string> in apply_with_flags(self, f, args, kwargs, block, track)
587
588 /path/to/site-packages/IPython/zmq/parallel/view.py in sync_results(f, self, *args, **kwargs)
589 46 def sync_results(f, self, *args, **kwargs):
590 47 """sync relevant results from self.client to our results attribute."""
591 ---> 48 ret = f(self, *args, **kwargs)
592 49 delta = self.outstanding.difference(self.client.outstanding)
593 50 completed = self.outstanding.intersection(delta)
594
595 /home/you/<string> in apply_with_flags(self, f, args, kwargs, block, track)
596
597 /path/to/site-packages/IPython/zmq/parallel/view.py in save_ids(f, self, *args, **kwargs)
598 35 n_previous = len(self.client.history)
599 36 try:
600 ---> 37 ret = f(self, *args, **kwargs)
601 38 finally:
602 39 nmsgs = len(self.client.history) - n_previous
603
604 /path/to/site-packages/IPython/zmq/parallel/view.py in apply_with_flags(self, f, args, kwargs, block, track)
605 398 if block:
606 399 try:
607 --> 400 return ar.get()
608 401 except KeyboardInterrupt:
609 402 pass
640 610
641 /Users/minrk/dev/ip/mine/IPython/zmq/parallel/client.pyc in apply(self, f, args, kwargs, bound, block, targets, balanced, after, follow, timeout)
642 1012 raise ValueError(msg)
643 1013 else:
644 -> 1014 return self._apply_direct(f, args, kwargs, **options)
645 1015
646 1016 def _apply_balanced(self, f, args, kwargs, bound=None, block=None, targets=None,
647
648 /Users/minrk/dev/ip/mine/IPython/zmq/parallel/client.pyc in _apply_direct(self, f, args, kwargs, bound, block, targets)
649 1100 if block:
650 1101 try:
651 -> 1102 return ar.get()
652 1103 except KeyboardInterrupt:
653 1104 return ar
654
655 /Users/minrk/dev/ip/mine/IPython/zmq/parallel/asyncresult.pyc in get(self, timeout)
656 78 return self._result
657 79 else:
658 ---> 80 raise self._exception
659 81 else:
660 82 raise error.TimeoutError("Result not ready.")
611 /path/to/site-packages/IPython/zmq/parallel/asyncresult.pyc in get(self, timeout)
612 87 return self._result
613 88 else:
614 ---> 89 raise self._exception
615 90 else:
616 91 raise error.TimeoutError("Result not ready.")
661 617
662 618 CompositeError: one or more exceptions from call to method: _execute
663 619 [0:apply]: ZeroDivisionError: integer division or modulo by zero
664 620 [1:apply]: ZeroDivisionError: integer division or modulo by zero
665 621 [2:apply]: ZeroDivisionError: integer division or modulo by zero
666 622 [3:apply]: ZeroDivisionError: integer division or modulo by zero
667 623
624
668 625 Notice how the error message printed when :exc:`CompositeError` is raised has
669 626 information about the individual exceptions that were raised on each engine.
670 627 If you want, you can even raise one of these original exceptions:
671 628
672 629 .. sourcecode:: ipython
673 630
674 631 In [80]: try:
675 ....: rc.execute('1/0')
632 ....: dview.execute('1/0')
676 633 ....: except client.CompositeError, e:
677 634 ....: e.raise_exception()
678 635 ....:
679 636 ....:
680 637 ---------------------------------------------------------------------------
681 638 ZeroDivisionError Traceback (most recent call last)
682 639
683 640 /ipython1-client-r3021/docs/examples/<ipython console> in <module>()
684 641
685 642 /ipython1-client-r3021/ipython1/kernel/error.pyc in raise_exception(self, excid)
686 643 156 raise IndexError("an exception with index %i does not exist"%excid)
687 644 157 else:
688 645 --> 158 raise et, ev, etb
689 646 159
690 647 160 def collect_exceptions(rlist, method):
691 648
692 649 ZeroDivisionError: integer division or modulo by zero
693 650
694 651 If you are working in IPython, you can simple type ``%debug`` after one of
695 652 these :exc:`CompositeError` exceptions is raised, and inspect the exception
696 653 instance:
697 654
698 655 .. sourcecode:: ipython
699 656
700 In [81]: rc.execute('1/0')
657 In [81]: dview.execute('1/0')
701 658 ---------------------------------------------------------------------------
702 659 CompositeError Traceback (most recent call last)
703 /Users/minrk/<ipython-input-5-b0c7a2b62c52> in <module>()
704 ----> 1 rc.execute('1/0')
705
706 /Users/minrk/<string> in execute(self, code, targets, block)
707
708 /Users/minrk/dev/ip/mine/IPython/zmq/parallel/client.pyc in defaultblock(f, self, *args, **kwargs)
709 88 self.block = block
710 89 try:
711 ---> 90 ret = f(self, *args, **kwargs)
712 91 finally:
713 92 self.block = saveblock
714
715 /Users/minrk/dev/ip/mine/IPython/zmq/parallel/client.pyc in execute(self, code, targets, block)
716 855 default: self.block
717 856 """
718 --> 857 result = self.apply(_execute, (code,), targets=targets, block=block, bound=True, balanced=False)
719 858 if not block:
720 859 return result
721
722 /Users/minrk/<string> in apply(self, f, args, kwargs, bound, block, targets, balanced, after, follow, timeout)
723
724 /Users/minrk/dev/ip/mine/IPython/zmq/parallel/client.pyc in defaultblock(f, self, *args, **kwargs)
725 88 self.block = block
726 89 try:
727 ---> 90 ret = f(self, *args, **kwargs)
728 91 finally:
729 92 self.block = saveblock
730
731 /Users/minrk/dev/ip/mine/IPython/zmq/parallel/client.pyc in apply(self, f, args, kwargs, bound, block, targets, balanced, after, follow, timeout)
732 1012 raise ValueError(msg)
733 1013 else:
734 -> 1014 return self._apply_direct(f, args, kwargs, **options)
735 1015
736 1016 def _apply_balanced(self, f, args, kwargs, bound=None, block=None, targets=None,
737
738 /Users/minrk/dev/ip/mine/IPython/zmq/parallel/client.pyc in _apply_direct(self, f, args, kwargs, bound, block, targets)
739 1100 if block:
740 1101 try:
741 -> 1102 return ar.get()
742 1103 except KeyboardInterrupt:
743 1104 return ar
660 /home/you/<ipython-input-10-15c2c22dec39> in <module>()
661 ----> 1 dview.execute('1/0', block=True)
662
663 /path/to/site-packages/IPython/zmq/parallel/view.py in execute(self, code, block)
664 460 default: self.block
665 461 """
666 --> 462 return self.apply_with_flags(util._execute, args=(code,), block=block)
667 463
668 464 def run(self, filename, block=None):
669
670 /home/you/<string> in apply_with_flags(self, f, args, kwargs, block, track)
671
672 /path/to/site-packages/IPython/zmq/parallel/view.py in sync_results(f, self, *args, **kwargs)
673 46 def sync_results(f, self, *args, **kwargs):
674 47 """sync relevant results from self.client to our results attribute."""
675 ---> 48 ret = f(self, *args, **kwargs)
676 49 delta = self.outstanding.difference(self.client.outstanding)
677 50 completed = self.outstanding.intersection(delta)
678
679 /home/you/<string> in apply_with_flags(self, f, args, kwargs, block, track)
680
681 /path/to/site-packages/IPython/zmq/parallel/view.py in save_ids(f, self, *args, **kwargs)
682 35 n_previous = len(self.client.history)
683 36 try:
684 ---> 37 ret = f(self, *args, **kwargs)
685 38 finally:
686 39 nmsgs = len(self.client.history) - n_previous
687
688 /path/to/site-packages/IPython/zmq/parallel/view.py in apply_with_flags(self, f, args, kwargs, block, track)
689 398 if block:
690 399 try:
691 --> 400 return ar.get()
692 401 except KeyboardInterrupt:
693 402 pass
744 694
745 /Users/minrk/dev/ip/mine/IPython/zmq/parallel/asyncresult.pyc in get(self, timeout)
746 78 return self._result
747 79 else:
748 ---> 80 raise self._exception
749 81 else:
750 82 raise error.TimeoutError("Result not ready.")
695 /path/to/site-packages/IPython/zmq/parallel/asyncresult.pyc in get(self, timeout)
696 87 return self._result
697 88 else:
698 ---> 89 raise self._exception
699 90 else:
700 91 raise error.TimeoutError("Result not ready.")
751 701
752 702 CompositeError: one or more exceptions from call to method: _execute
753 703 [0:apply]: ZeroDivisionError: integer division or modulo by zero
754 704 [1:apply]: ZeroDivisionError: integer division or modulo by zero
755 705 [2:apply]: ZeroDivisionError: integer division or modulo by zero
756 706 [3:apply]: ZeroDivisionError: integer division or modulo by zero
757 707
758 708 In [82]: %debug
759 709 > /Users/minrk/dev/ip/mine/IPython/zmq/parallel/asyncresult.py(80)get()
760 710 79 else:
761 711 ---> 80 raise self._exception
762 712 81 else:
763 713
764 714
765 715 # With the debugger running, e is the exceptions instance. We can tab complete
766 716 # on it and see the extra methods that are available.
767 717 ipdb> e.
768 718 e.__class__ e.__getitem__ e.__new__ e.__setstate__ e.args
769 719 e.__delattr__ e.__getslice__ e.__reduce__ e.__str__ e.elist
770 720 e.__dict__ e.__hash__ e.__reduce_ex__ e.__weakref__ e.message
771 721 e.__doc__ e.__init__ e.__repr__ e._get_engine_str e.print_tracebacks
772 722 e.__getattribute__ e.__module__ e.__setattr__ e._get_traceback e.raise_exception
773 723 ipdb> e.print_tracebacks()
774 724 [0:apply]:
775 725 Traceback (most recent call last):
776 726 File "/Users/minrk/dev/ip/mine/IPython/zmq/parallel/streamkernel.py", line 332, in apply_request
777 727 exec code in working, working
778 728 File "<string>", line 1, in <module>
779 729 File "/Users/minrk/dev/ip/mine/IPython/zmq/parallel/client.py", line 69, in _execute
780 730 exec code in globals()
781 731 File "<string>", line 1, in <module>
782 732 ZeroDivisionError: integer division or modulo by zero
783 733
784 734
785 735 [1:apply]:
786 736 Traceback (most recent call last):
787 737 File "/Users/minrk/dev/ip/mine/IPython/zmq/parallel/streamkernel.py", line 332, in apply_request
788 738 exec code in working, working
789 739 File "<string>", line 1, in <module>
790 740 File "/Users/minrk/dev/ip/mine/IPython/zmq/parallel/client.py", line 69, in _execute
791 741 exec code in globals()
792 742 File "<string>", line 1, in <module>
793 743 ZeroDivisionError: integer division or modulo by zero
794 744
795 745
796 746 [2:apply]:
797 747 Traceback (most recent call last):
798 748 File "/Users/minrk/dev/ip/mine/IPython/zmq/parallel/streamkernel.py", line 332, in apply_request
799 749 exec code in working, working
800 750 File "<string>", line 1, in <module>
801 751 File "/Users/minrk/dev/ip/mine/IPython/zmq/parallel/client.py", line 69, in _execute
802 752 exec code in globals()
803 753 File "<string>", line 1, in <module>
804 754 ZeroDivisionError: integer division or modulo by zero
805 755
806 756
807 757 [3:apply]:
808 758 Traceback (most recent call last):
809 759 File "/Users/minrk/dev/ip/mine/IPython/zmq/parallel/streamkernel.py", line 332, in apply_request
810 760 exec code in working, working
811 761 File "<string>", line 1, in <module>
812 762 File "/Users/minrk/dev/ip/mine/IPython/zmq/parallel/client.py", line 69, in _execute
813 763 exec code in globals()
814 764 File "<string>", line 1, in <module>
815 765 ZeroDivisionError: integer division or modulo by zero
816 766
817 767
768 .. note::
769
770 TODO: The above tracebacks are not up to date
771
818 772
819 773 All of this same error handling magic even works in non-blocking mode:
820 774
821 775 .. sourcecode:: ipython
822 776
823 In [83]: rc.block=False
777 In [83]: dview.block=False
824 778
825 In [84]: ar = rc.execute('1/0')
779 In [84]: ar = dview.execute('1/0')
826 780
827 781 In [85]: ar.get()
828 782 ---------------------------------------------------------------------------
829 783 CompositeError Traceback (most recent call last)
830 784 /Users/minrk/<ipython-input-3-8531eb3d26fb> in <module>()
831 785 ----> 1 ar.get()
832 786
833 787 /Users/minrk/dev/ip/mine/IPython/zmq/parallel/asyncresult.pyc in get(self, timeout)
834 788 78 return self._result
835 789 79 else:
836 790 ---> 80 raise self._exception
837 791 81 else:
838 792 82 raise error.TimeoutError("Result not ready.")
839 793
840 794 CompositeError: one or more exceptions from call to method: _execute
841 795 [0:apply]: ZeroDivisionError: integer division or modulo by zero
842 796 [1:apply]: ZeroDivisionError: integer division or modulo by zero
843 797 [2:apply]: ZeroDivisionError: integer division or modulo by zero
844 798 [3:apply]: ZeroDivisionError: integer division or modulo by zero
845 799
@@ -1,412 +1,419 b''
1 .. _paralleltask:
1 .. _parallel_task:
2 2
3 3 ==========================
4 4 The IPython task interface
5 5 ==========================
6 6
7 7 The task interface to the cluster presents the engines as a fault tolerant,
8 8 dynamic load-balanced system of workers. Unlike the multiengine interface, in
9 9 the task interface the user have no direct access to individual engines. By
10 10 allowing the IPython scheduler to assign work, this interface is simultaneously
11 11 simpler and more powerful.
12 12
13 13 Best of all, the user can use both of these interfaces running at the same time
14 14 to take advantage of their respective strengths. When the user can break up
15 15 the user's work into segments that do not depend on previous execution, the
16 16 task interface is ideal. But it also has more power and flexibility, allowing
17 17 the user to guide the distribution of jobs, without having to assign tasks to
18 18 engines explicitly.
19 19
20 20 Starting the IPython controller and engines
21 21 ===========================================
22 22
23 23 To follow along with this tutorial, you will need to start the IPython
24 24 controller and four IPython engines. The simplest way of doing this is to use
25 25 the :command:`ipclusterz` command::
26 26
27 27 $ ipclusterz start -n 4
28 28
29 29 For more detailed information about starting the controller and engines, see
30 30 our :ref:`introduction <ip1par>` to using IPython for parallel computing.
31 31
32 32 Creating a ``Client`` instance
33 33 ==============================
34 34
35 35 The first step is to import the IPython :mod:`IPython.zmq.parallel.client`
36 36 module and then create a :class:`.Client` instance, and we will also be using
37 37 a :class:`LoadBalancedView`, here called `lview`:
38 38
39 39 .. sourcecode:: ipython
40 40
41 41 In [1]: from IPython.zmq.parallel import client
42 42
43 43 In [2]: rc = client.Client()
44 44
45 45
46 46 This form assumes that the controller was started on localhost with default
47 47 configuration. If not, the location of the controller must be given as an
48 48 argument to the constructor:
49 49
50 50 .. sourcecode:: ipython
51 51
52 52 # for a visible LAN controller listening on an external port:
53 53 In [2]: rc = client.Client('tcp://192.168.1.16:10101')
54 54 # or to connect with a specific profile you have set up:
55 55 In [3]: rc = client.Client(profile='mpi')
56 56
57 For load-balanced execution, we will make use of a :class:`LoadBalancedView` object, which can be constructed via the client's :meth:`view` method:
57 For load-balanced execution, we will make use of a :class:`LoadBalancedView` object, which can
58 be constructed via the client's :meth:`load_balanced_view` method:
58 59
59 60 .. sourcecode:: ipython
60 61
61 In [4]: lview = rc.view() # default load-balanced view
62 In [4]: lview = rc.load_balanced_view() # default load-balanced view
62 63
63 64 .. seealso::
64 65
65 66 For more information, see the in-depth explanation of :ref:`Views <parallel_details>`.
66 67
67 68
68 69 Quick and easy parallelism
69 70 ==========================
70 71
71 72 In many cases, you simply want to apply a Python function to a sequence of
72 73 objects, but *in parallel*. Like the multiengine interface, these can be
73 74 implemented via the task interface. The exact same tools can perform these
74 75 actions in load-balanced ways as well as multiplexed ways: a parallel version
75 76 of :func:`map` and :func:`@parallel` function decorator. If one specifies the
76 77 argument `balanced=True`, then they are dynamically load balanced. Thus, if the
77 78 execution time per item varies significantly, you should use the versions in
78 79 the task interface.
79 80
80 81 Parallel map
81 82 ------------
82 83
83 84 To load-balance :meth:`map`,simply use a LoadBalancedView:
84 85
85 86 .. sourcecode:: ipython
86 87
87 88 In [62]: lview.block = True
88 89
89 90 In [63]: serial_result = map(lambda x:x**10, range(32))
90 91
91 92 In [64]: parallel_result = lview.map(lambda x:x**10, range(32))
92 93
93 94 In [65]: serial_result==parallel_result
94 95 Out[65]: True
95 96
96 97 Parallel function decorator
97 98 ---------------------------
98 99
99 100 Parallel functions are just like normal function, but they can be called on
100 101 sequences and *in parallel*. The multiengine interface provides a decorator
101 102 that turns any Python function into a parallel function:
102 103
103 104 .. sourcecode:: ipython
104 105
105 106 In [10]: @lview.parallel()
106 107 ....: def f(x):
107 108 ....: return 10.0*x**4
108 109 ....:
109 110
110 111 In [11]: f.map(range(32)) # this is done in parallel
111 112 Out[11]: [0.0,10.0,160.0,...]
112 113
114 .. _parallel_dependencies:
115
113 116 Dependencies
114 117 ============
115 118
116 119 Often, pure atomic load-balancing is too primitive for your work. In these cases, you
117 120 may want to associate some kind of `Dependency` that describes when, where, or whether
118 121 a task can be run. In IPython, we provide two types of dependencies:
119 122 `Functional Dependencies`_ and `Graph Dependencies`_
120 123
121 124 .. note::
122 125
123 126 It is important to note that the pure ZeroMQ scheduler does not support dependencies,
124 127 and you will see errors or warnings if you try to use dependencies with the pure
125 128 scheduler.
126 129
127 130 Functional Dependencies
128 131 -----------------------
129 132
130 133 Functional dependencies are used to determine whether a given engine is capable of running
131 134 a particular task. This is implemented via a special :class:`Exception` class,
132 135 :class:`UnmetDependency`, found in `IPython.zmq.parallel.error`. Its use is very simple:
133 136 if a task fails with an UnmetDependency exception, then the scheduler, instead of relaying
134 137 the error up to the client like any other error, catches the error, and submits the task
135 138 to a different engine. This will repeat indefinitely, and a task will never be submitted
136 139 to a given engine a second time.
137 140
138 141 You can manually raise the :class:`UnmetDependency` yourself, but IPython has provided
139 142 some decorators for facilitating this behavior.
140 143
141 144 There are two decorators and a class used for functional dependencies:
142 145
143 146 .. sourcecode:: ipython
144 147
145 148 In [9]: from IPython.zmq.parallel.dependency import depend, require, dependent
146 149
147 150 @require
148 151 ********
149 152
150 153 The simplest sort of dependency is requiring that a Python module is available. The
151 154 ``@require`` decorator lets you define a function that will only run on engines where names
152 155 you specify are importable:
153 156
154 157 .. sourcecode:: ipython
155 158
156 159 In [10]: @require('numpy', 'zmq')
157 160 ...: def myfunc():
158 161 ...: import numpy,zmq
159 162 ...: return dostuff()
160 163
161 164 Now, any time you apply :func:`myfunc`, the task will only run on a machine that has
162 165 numpy and pyzmq available.
163 166
164 167 @depend
165 168 *******
166 169
167 170 The ``@depend`` decorator lets you decorate any function with any *other* function to
168 171 evaluate the dependency. The dependency function will be called at the start of the task,
169 172 and if it returns ``False``, then the dependency will be considered unmet, and the task
170 173 will be assigned to another engine. If the dependency returns *anything other than
171 174 ``False``*, the rest of the task will continue.
172 175
173 176 .. sourcecode:: ipython
174 177
175 178 In [10]: def platform_specific(plat):
176 179 ...: import sys
177 180 ...: return sys.platform == plat
178 181
179 182 In [11]: @depend(platform_specific, 'darwin')
180 183 ...: def mactask():
181 184 ...: do_mac_stuff()
182 185
183 186 In [12]: @depend(platform_specific, 'nt')
184 187 ...: def wintask():
185 188 ...: do_windows_stuff()
186 189
187 190 In this case, any time you apply ``mytask``, it will only run on an OSX machine.
188 191 ``@depend`` is just like ``apply``, in that it has a ``@depend(f,*args,**kwargs)``
189 192 signature.
190 193
191 194 dependents
192 195 **********
193 196
194 197 You don't have to use the decorators on your tasks, if for instance you may want
195 198 to run tasks with a single function but varying dependencies, you can directly construct
196 199 the :class:`dependent` object that the decorators use:
197 200
198 201 .. sourcecode::ipython
199 202
200 203 In [13]: def mytask(*args):
201 204 ...: dostuff()
202 205
203 206 In [14]: mactask = dependent(mytask, platform_specific, 'darwin')
204 207 # this is the same as decorating the declaration of mytask with @depend
205 208 # but you can do it again:
206 209
207 210 In [15]: wintask = dependent(mytask, platform_specific, 'nt')
208 211
209 212 # in general:
210 213 In [16]: t = dependent(f, g, *dargs, **dkwargs)
211 214
212 215 # is equivalent to:
213 216 In [17]: @depend(g, *dargs, **dkwargs)
214 217 ...: def t(a,b,c):
215 218 ...: # contents of f
216 219
217 220 Graph Dependencies
218 221 ------------------
219 222
220 223 Sometimes you want to restrict the time and/or location to run a given task as a function
221 224 of the time and/or location of other tasks. This is implemented via a subclass of
222 225 :class:`set`, called a :class:`Dependency`. A Dependency is just a set of `msg_ids`
223 226 corresponding to tasks, and a few attributes to guide how to decide when the Dependency
224 227 has been met.
225 228
226 229 The switches we provide for interpreting whether a given dependency set has been met:
227 230
228 231 any|all
229 232 Whether the dependency is considered met if *any* of the dependencies are done, or
230 233 only after *all* of them have finished. This is set by a Dependency's :attr:`all`
231 234 boolean attribute, which defaults to ``True``.
232 235
233 success_only
234 Whether to consider only tasks that did not raise an error as being fulfilled.
235 Sometimes you want to run a task after another, but only if that task succeeded. In
236 this case, ``success_only`` should be ``True``. However sometimes you may not care
237 whether the task succeeds, and always want the second task to run, in which case
238 you should use `success_only=False`. The default behavior is to only use successes.
236 success [default: True]
237 Whether to consider tasks that succeeded as fulfilling dependencies.
238
239 failure [default : False]
240 Whether to consider tasks that failed as fulfilling dependencies.
241 using `failure=True,success=False` is useful for setting up cleanup tasks, to be run
242 only when tasks have failed.
243
244 Sometimes you want to run a task after another, but only if that task succeeded. In this case,
245 ``success`` should be ``True`` and ``failure`` should be ``False``. However sometimes you may
246 not care whether the task succeeds, and always want the second task to run, in which case you
247 should use `success=failure=True`. The default behavior is to only use successes.
239 248
240 249 There are other switches for interpretation that are made at the *task* level. These are
241 250 specified via keyword arguments to the client's :meth:`apply` method.
242 251
243 252 after,follow
244 253 You may want to run a task *after* a given set of dependencies have been run and/or
245 254 run it *where* another set of dependencies are met. To support this, every task has an
246 255 `after` dependency to restrict time, and a `follow` dependency to restrict
247 256 destination.
248 257
249 258 timeout
250 259 You may also want to set a time-limit for how long the scheduler should wait before a
251 260 task's dependencies are met. This is done via a `timeout`, which defaults to 0, which
252 261 indicates that the task should never timeout. If the timeout is reached, and the
253 262 scheduler still hasn't been able to assign the task to an engine, the task will fail
254 263 with a :class:`DependencyTimeout`.
255 264
256 265 .. note::
257 266
258 267 Dependencies only work within the task scheduler. You cannot instruct a load-balanced
259 268 task to run after a job submitted via the MUX interface.
260 269
261 The simplest form of Dependencies is with `all=True,success_only=True`. In these cases,
270 The simplest form of Dependencies is with `all=True,success=True,failure=False`. In these cases,
262 271 you can skip using Dependency objects, and just pass msg_ids or AsyncResult objects as the
263 272 `follow` and `after` keywords to :meth:`client.apply`:
264 273
265 274 .. sourcecode:: ipython
266 275
267 276 In [14]: client.block=False
268 277
269 In [15]: ar = client.apply(f, args, kwargs, balanced=True)
278 In [15]: ar = lview.apply(f, args, kwargs)
270 279
271 In [16]: ar2 = client.apply(f2, balanced=True)
280 In [16]: ar2 = lview.apply(f2)
272 281
273 In [17]: ar3 = client.apply(f3, after=[ar,ar2], balanced=True)
282 In [17]: ar3 = lview.apply_with_flags(f3, after=[ar,ar2])
274 283
275 In [17]: ar4 = client.apply(f3, follow=[ar], timeout=2.5, balanced=True)
284 In [17]: ar4 = lview.apply_with_flags(f3, follow=[ar], timeout=2.5)
276 285
277 286
278 287 .. seealso::
279 288
280 289 Some parallel workloads can be described as a `Directed Acyclic Graph
281 290 <http://en.wikipedia.org/wiki/Directed_acyclic_graph>`_, or DAG. See :ref:`DAG
282 291 Dependencies <dag_dependencies>` for an example demonstrating how to use map a NetworkX DAG
283 292 onto task dependencies.
284 293
285 294
286 295
287 296 Impossible Dependencies
288 297 ***********************
289 298
290 299 The schedulers do perform some analysis on graph dependencies to determine whether they
291 300 are not possible to be met. If the scheduler does discover that a dependency cannot be
292 301 met, then the task will fail with an :class:`ImpossibleDependency` error. This way, if the
293 302 scheduler realized that a task can never be run, it won't sit indefinitely in the
294 303 scheduler clogging the pipeline.
295 304
296 305 The basic cases that are checked:
297 306
298 307 * depending on nonexistent messages
299 308 * `follow` dependencies were run on more than one machine and `all=True`
300 * any dependencies failed and `all=True,success_only=True`
301 * all dependencies failed and `all=False,success_only=True`
309 * any dependencies failed and `all=True,success=True,failures=False`
310 * all dependencies failed and `all=False,success=True,failure=False`
302 311
303 312 .. warning::
304 313
305 314 This analysis has not been proven to be rigorous, so it is likely possible for tasks
306 315 to become impossible to run in obscure situations, so a timeout may be a good choice.
307 316
308 317 .. _parallel_schedulers:
309 318
310 319 Schedulers
311 320 ==========
312 321
313 322 There are a variety of valid ways to determine where jobs should be assigned in a
314 323 load-balancing situation. In IPython, we support several standard schemes, and
315 324 even make it easy to define your own. The scheme can be selected via the ``--scheme``
316 325 argument to :command:`ipcontrollerz`, or in the :attr:`HubFactory.scheme` attribute
317 326 of a controller config object.
318 327
319 328 The built-in routing schemes:
320 329
321 330 To select one of these schemes, simply do::
322 331
323 332 $ ipcontrollerz --scheme <schemename>
324 333 for instance:
325 334 $ ipcontrollerz --scheme lru
326 335
327 336 lru: Least Recently Used
328 337
329 338 Always assign work to the least-recently-used engine. A close relative of
330 339 round-robin, it will be fair with respect to the number of tasks, agnostic
331 340 with respect to runtime of each task.
332 341
333 342 plainrandom: Plain Random
334 343
335 344 Randomly picks an engine on which to run.
336 345
337 346 twobin: Two-Bin Random
338 347
339 348 **Requires numpy**
340 349
341 350 Pick two engines at random, and use the LRU of the two. This is known to be better
342 351 than plain random in many cases, but requires a small amount of computation.
343 352
344 353 leastload: Least Load
345 354
346 355 **This is the default scheme**
347 356
348 357 Always assign tasks to the engine with the fewest outstanding tasks (LRU breaks tie).
349 358
350 359 weighted: Weighted Two-Bin Random
351 360
352 361 **Requires numpy**
353 362
354 363 Pick two engines at random using the number of outstanding tasks as inverse weights,
355 364 and use the one with the lower load.
356 365
357 366
358 367 Pure ZMQ Scheduler
359 368 ------------------
360 369
361 370 For maximum throughput, the 'pure' scheme is not Python at all, but a C-level
362 371 :class:`MonitoredQueue` from PyZMQ, which uses a ZeroMQ ``XREQ`` socket to perform all
363 372 load-balancing. This scheduler does not support any of the advanced features of the Python
364 373 :class:`.Scheduler`.
365 374
366 375 Disabled features when using the ZMQ Scheduler:
367 376
368 377 * Engine unregistration
369 378 Task farming will be disabled if an engine unregisters.
370 379 Further, if an engine is unregistered during computation, the scheduler may not recover.
371 380 * Dependencies
372 381 Since there is no Python logic inside the Scheduler, routing decisions cannot be made
373 382 based on message content.
374 383 * Early destination notification
375 384 The Python schedulers know which engine gets which task, and notify the Hub. This
376 385 allows graceful handling of Engines coming and going. There is no way to know
377 386 where ZeroMQ messages have gone, so there is no way to know what tasks are on which
378 387 engine until they *finish*. This makes recovery from engine shutdown very difficult.
379 388
380 389
381 390 .. note::
382 391
383 392 TODO: performance comparisons
384 393
385 394
386 395 More details
387 396 ============
388 397
389 The :class:`Client` has many more powerful features that allow quite a bit
398 The :class:`LoadBalancedView` has many more powerful features that allow quite a bit
390 399 of flexibility in how tasks are defined and run. The next places to look are
391 400 in the following classes:
392 401
393 * :class:`IPython.zmq.parallel.client.Client`
402 * :class:`IPython.zmq.parallel.view.LoadBalancedView`
394 403 * :class:`IPython.zmq.parallel.client.AsyncResult`
395 * :meth:`IPython.zmq.parallel.client.Client.apply`
404 * :meth:`IPython.zmq.parallel.view.LoadBalancedView.apply`
396 405 * :mod:`IPython.zmq.parallel.dependency`
397 406
398 407 The following is an overview of how to use these classes together:
399 408
400 1. Create a :class:`Client`.
409 1. Create a :class:`Client` and :class:`LoadBalancedView`
401 410 2. Define some functions to be run as tasks
402 411 3. Submit your tasks to using the :meth:`apply` method of your
403 :class:`Client` instance, specifying `balanced=True`. This signals
404 the :class:`Client` to entrust the Scheduler with assigning tasks to engines.
405 4. Use :meth:`Client.get_results` to get the results of the
412 :class:`LoadBalancedView` instance.
413 4. Use :meth:`Client.get_result` to get the results of the
406 414 tasks, or use the :meth:`AsyncResult.get` method of the results to wait
407 415 for and then receive the results.
408 416
409
410 417 .. seealso::
411 418
412 419 A demo of :ref:`DAG Dependencies <dag_dependencies>` with NetworkX and IPython.
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
General Comments 0
You need to be logged in to leave comments. Login now