##// END OF EJS Templates
use ROUTER/DEALER socket names instead of XREP/XREQ...
MinRK -
Show More
@@ -1,46 +1,40 b''
1 1 """The IPython ZMQ-based parallel computing interface.
2 2
3 3 Authors:
4 4
5 5 * MinRK
6 6 """
7 7 #-----------------------------------------------------------------------------
8 8 # Copyright (C) 2011 The IPython Development Team
9 9 #
10 10 # Distributed under the terms of the BSD License. The full license is in
11 11 # the file COPYING, distributed as part of this software.
12 12 #-----------------------------------------------------------------------------
13 13
14 14 #-----------------------------------------------------------------------------
15 15 # Imports
16 16 #-----------------------------------------------------------------------------
17 17
18 18 import os
19 19 import warnings
20 20
21 21 import zmq
22 22
23 from IPython.zmq import check_for_zmq
23 24
24 25 if os.name == 'nt':
25 if zmq.__version__ < '2.1.7':
26 raise ImportError("IPython.parallel requires pyzmq/0MQ >= 2.1.7 on Windows, "
27 "and you appear to have %s"%zmq.__version__)
28 elif zmq.__version__ < '2.1.4':
29 raise ImportError("IPython.parallel requires pyzmq/0MQ >= 2.1.4, you appear to have %s"%zmq.__version__)
30
31 if zmq.zmq_version() >= '3.0.0':
32 warnings.warn("""libzmq 3 detected.
33 It is unlikely that IPython's zmq code will work properly.
34 Please install libzmq stable, which is 2.1.x or 2.2.x""",
35 RuntimeWarning)
26 min_pyzmq = '2.1.7'
27 else:
28 min_pyzmq = '2.1.4'
36 29
30 check_for_zmq(min_pyzmq, 'IPython.parallel')
37 31
38 32 from IPython.utils.pickleutil import Reference
39 33
40 34 from .client.asyncresult import *
41 35 from .client.client import Client
42 36 from .client.remotefunction import *
43 37 from .client.view import *
44 38 from .controller.dependency import *
45 39
46 40
@@ -1,441 +1,441 b''
1 1 #!/usr/bin/env python
2 2 # encoding: utf-8
3 3 """
4 4 The IPython controller application.
5 5
6 6 Authors:
7 7
8 8 * Brian Granger
9 9 * MinRK
10 10
11 11 """
12 12
13 13 #-----------------------------------------------------------------------------
14 14 # Copyright (C) 2008-2011 The IPython Development Team
15 15 #
16 16 # Distributed under the terms of the BSD License. The full license is in
17 17 # the file COPYING, distributed as part of this software.
18 18 #-----------------------------------------------------------------------------
19 19
20 20 #-----------------------------------------------------------------------------
21 21 # Imports
22 22 #-----------------------------------------------------------------------------
23 23
24 24 from __future__ import with_statement
25 25
26 26 import os
27 27 import socket
28 28 import stat
29 29 import sys
30 30 import uuid
31 31
32 32 from multiprocessing import Process
33 33
34 34 import zmq
35 35 from zmq.devices import ProcessMonitoredQueue
36 36 from zmq.log.handlers import PUBHandler
37 37 from zmq.utils import jsonapi as json
38 38
39 39 from IPython.config.application import boolean_flag
40 40 from IPython.core.profiledir import ProfileDir
41 41
42 42 from IPython.parallel.apps.baseapp import (
43 43 BaseParallelApplication,
44 44 base_aliases,
45 45 base_flags,
46 46 )
47 47 from IPython.utils.importstring import import_item
48 48 from IPython.utils.traitlets import Instance, Unicode, Bool, List, Dict
49 49
50 50 # from IPython.parallel.controller.controller import ControllerFactory
51 51 from IPython.zmq.session import Session
52 52 from IPython.parallel.controller.heartmonitor import HeartMonitor
53 53 from IPython.parallel.controller.hub import HubFactory
54 54 from IPython.parallel.controller.scheduler import TaskScheduler,launch_scheduler
55 55 from IPython.parallel.controller.sqlitedb import SQLiteDB
56 56
57 57 from IPython.parallel.util import signal_children, split_url, asbytes
58 58
59 59 # conditional import of MongoDB backend class
60 60
61 61 try:
62 62 from IPython.parallel.controller.mongodb import MongoDB
63 63 except ImportError:
64 64 maybe_mongo = []
65 65 else:
66 66 maybe_mongo = [MongoDB]
67 67
68 68
69 69 #-----------------------------------------------------------------------------
70 70 # Module level variables
71 71 #-----------------------------------------------------------------------------
72 72
73 73
74 74 #: The default config file name for this application
75 75 default_config_file_name = u'ipcontroller_config.py'
76 76
77 77
78 78 _description = """Start the IPython controller for parallel computing.
79 79
80 80 The IPython controller provides a gateway between the IPython engines and
81 81 clients. The controller needs to be started before the engines and can be
82 82 configured using command line options or using a cluster directory. Cluster
83 83 directories contain config, log and security files and are usually located in
84 84 your ipython directory and named as "profile_name". See the `profile`
85 85 and `profile-dir` options for details.
86 86 """
87 87
88 88 _examples = """
89 89 ipcontroller --ip=192.168.0.1 --port=1000 # listen on ip, port for engines
90 90 ipcontroller --scheme=pure # use the pure zeromq scheduler
91 91 """
92 92
93 93
94 94 #-----------------------------------------------------------------------------
95 95 # The main application
96 96 #-----------------------------------------------------------------------------
97 97 flags = {}
98 98 flags.update(base_flags)
99 99 flags.update({
100 100 'usethreads' : ( {'IPControllerApp' : {'use_threads' : True}},
101 101 'Use threads instead of processes for the schedulers'),
102 102 'sqlitedb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.sqlitedb.SQLiteDB'}},
103 103 'use the SQLiteDB backend'),
104 104 'mongodb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.mongodb.MongoDB'}},
105 105 'use the MongoDB backend'),
106 106 'dictdb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.dictdb.DictDB'}},
107 107 'use the in-memory DictDB backend'),
108 108 'reuse' : ({'IPControllerApp' : {'reuse_files' : True}},
109 109 'reuse existing json connection files')
110 110 })
111 111
112 112 flags.update(boolean_flag('secure', 'IPControllerApp.secure',
113 113 "Use HMAC digests for authentication of messages.",
114 114 "Don't authenticate messages."
115 115 ))
116 116 aliases = dict(
117 117 secure = 'IPControllerApp.secure',
118 118 ssh = 'IPControllerApp.ssh_server',
119 119 enginessh = 'IPControllerApp.engine_ssh_server',
120 120 location = 'IPControllerApp.location',
121 121
122 122 ident = 'Session.session',
123 123 user = 'Session.username',
124 124 keyfile = 'Session.keyfile',
125 125
126 126 url = 'HubFactory.url',
127 127 ip = 'HubFactory.ip',
128 128 transport = 'HubFactory.transport',
129 129 port = 'HubFactory.regport',
130 130
131 131 ping = 'HeartMonitor.period',
132 132
133 133 scheme = 'TaskScheduler.scheme_name',
134 134 hwm = 'TaskScheduler.hwm',
135 135 )
136 136 aliases.update(base_aliases)
137 137
138 138
139 139 class IPControllerApp(BaseParallelApplication):
140 140
141 141 name = u'ipcontroller'
142 142 description = _description
143 143 examples = _examples
144 144 config_file_name = Unicode(default_config_file_name)
145 145 classes = [ProfileDir, Session, HubFactory, TaskScheduler, HeartMonitor, SQLiteDB] + maybe_mongo
146 146
147 147 # change default to True
148 148 auto_create = Bool(True, config=True,
149 149 help="""Whether to create profile dir if it doesn't exist.""")
150 150
151 151 reuse_files = Bool(False, config=True,
152 152 help='Whether to reuse existing json connection files.'
153 153 )
154 154 secure = Bool(True, config=True,
155 155 help='Whether to use HMAC digests for extra message authentication.'
156 156 )
157 157 ssh_server = Unicode(u'', config=True,
158 158 help="""ssh url for clients to use when connecting to the Controller
159 159 processes. It should be of the form: [user@]server[:port]. The
160 160 Controller's listening addresses must be accessible from the ssh server""",
161 161 )
162 162 engine_ssh_server = Unicode(u'', config=True,
163 163 help="""ssh url for engines to use when connecting to the Controller
164 164 processes. It should be of the form: [user@]server[:port]. The
165 165 Controller's listening addresses must be accessible from the ssh server""",
166 166 )
167 167 location = Unicode(u'', config=True,
168 168 help="""The external IP or domain name of the Controller, used for disambiguating
169 169 engine and client connections.""",
170 170 )
171 171 import_statements = List([], config=True,
172 172 help="import statements to be run at startup. Necessary in some environments"
173 173 )
174 174
175 175 use_threads = Bool(False, config=True,
176 176 help='Use threads instead of processes for the schedulers',
177 177 )
178 178
179 179 # internal
180 180 children = List()
181 181 mq_class = Unicode('zmq.devices.ProcessMonitoredQueue')
182 182
183 183 def _use_threads_changed(self, name, old, new):
184 184 self.mq_class = 'zmq.devices.%sMonitoredQueue'%('Thread' if new else 'Process')
185 185
186 186 aliases = Dict(aliases)
187 187 flags = Dict(flags)
188 188
189 189
190 190 def save_connection_dict(self, fname, cdict):
191 191 """save a connection dict to json file."""
192 192 c = self.config
193 193 url = cdict['url']
194 194 location = cdict['location']
195 195 if not location:
196 196 try:
197 197 proto,ip,port = split_url(url)
198 198 except AssertionError:
199 199 pass
200 200 else:
201 201 try:
202 202 location = socket.gethostbyname_ex(socket.gethostname())[2][-1]
203 203 except (socket.gaierror, IndexError):
204 204 self.log.warn("Could not identify this machine's IP, assuming 127.0.0.1."
205 205 " You may need to specify '--location=<external_ip_address>' to help"
206 206 " IPython decide when to connect via loopback.")
207 207 location = '127.0.0.1'
208 208 cdict['location'] = location
209 209 fname = os.path.join(self.profile_dir.security_dir, fname)
210 210 with open(fname, 'wb') as f:
211 211 f.write(json.dumps(cdict, indent=2))
212 212 os.chmod(fname, stat.S_IRUSR|stat.S_IWUSR)
213 213
214 214 def load_config_from_json(self):
215 215 """load config from existing json connector files."""
216 216 c = self.config
217 217 # load from engine config
218 218 with open(os.path.join(self.profile_dir.security_dir, 'ipcontroller-engine.json')) as f:
219 219 cfg = json.loads(f.read())
220 220 key = c.Session.key = asbytes(cfg['exec_key'])
221 221 xport,addr = cfg['url'].split('://')
222 222 c.HubFactory.engine_transport = xport
223 223 ip,ports = addr.split(':')
224 224 c.HubFactory.engine_ip = ip
225 225 c.HubFactory.regport = int(ports)
226 226 self.location = cfg['location']
227 227 if not self.engine_ssh_server:
228 228 self.engine_ssh_server = cfg['ssh']
229 229 # load client config
230 230 with open(os.path.join(self.profile_dir.security_dir, 'ipcontroller-client.json')) as f:
231 231 cfg = json.loads(f.read())
232 232 assert key == cfg['exec_key'], "exec_key mismatch between engine and client keys"
233 233 xport,addr = cfg['url'].split('://')
234 234 c.HubFactory.client_transport = xport
235 235 ip,ports = addr.split(':')
236 236 c.HubFactory.client_ip = ip
237 237 if not self.ssh_server:
238 238 self.ssh_server = cfg['ssh']
239 239 assert int(ports) == c.HubFactory.regport, "regport mismatch"
240 240
241 241 def init_hub(self):
242 242 c = self.config
243 243
244 244 self.do_import_statements()
245 245 reusing = self.reuse_files
246 246 if reusing:
247 247 try:
248 248 self.load_config_from_json()
249 249 except (AssertionError,IOError):
250 250 reusing=False
251 251 # check again, because reusing may have failed:
252 252 if reusing:
253 253 pass
254 254 elif self.secure:
255 255 key = str(uuid.uuid4())
256 256 # keyfile = os.path.join(self.profile_dir.security_dir, self.exec_key)
257 257 # with open(keyfile, 'w') as f:
258 258 # f.write(key)
259 259 # os.chmod(keyfile, stat.S_IRUSR|stat.S_IWUSR)
260 260 c.Session.key = asbytes(key)
261 261 else:
262 262 key = c.Session.key = b''
263 263
264 264 try:
265 265 self.factory = HubFactory(config=c, log=self.log)
266 266 # self.start_logging()
267 267 self.factory.init_hub()
268 268 except:
269 269 self.log.error("Couldn't construct the Controller", exc_info=True)
270 270 self.exit(1)
271 271
272 272 if not reusing:
273 273 # save to new json config files
274 274 f = self.factory
275 275 cdict = {'exec_key' : key,
276 276 'ssh' : self.ssh_server,
277 277 'url' : "%s://%s:%s"%(f.client_transport, f.client_ip, f.regport),
278 278 'location' : self.location
279 279 }
280 280 self.save_connection_dict('ipcontroller-client.json', cdict)
281 281 edict = cdict
282 282 edict['url']="%s://%s:%s"%((f.client_transport, f.client_ip, f.regport))
283 283 edict['ssh'] = self.engine_ssh_server
284 284 self.save_connection_dict('ipcontroller-engine.json', edict)
285 285
286 286 #
287 287 def init_schedulers(self):
288 288 children = self.children
289 289 mq = import_item(str(self.mq_class))
290 290
291 291 hub = self.factory
292 292 # maybe_inproc = 'inproc://monitor' if self.use_threads else self.monitor_url
293 293 # IOPub relay (in a Process)
294 294 q = mq(zmq.PUB, zmq.SUB, zmq.PUB, b'N/A',b'iopub')
295 295 q.bind_in(hub.client_info['iopub'])
296 296 q.bind_out(hub.engine_info['iopub'])
297 297 q.setsockopt_out(zmq.SUBSCRIBE, b'')
298 298 q.connect_mon(hub.monitor_url)
299 299 q.daemon=True
300 300 children.append(q)
301 301
302 302 # Multiplexer Queue (in a Process)
303 q = mq(zmq.XREP, zmq.XREP, zmq.PUB, b'in', b'out')
303 q = mq(zmq.ROUTER, zmq.ROUTER, zmq.PUB, b'in', b'out')
304 304 q.bind_in(hub.client_info['mux'])
305 305 q.setsockopt_in(zmq.IDENTITY, b'mux')
306 306 q.bind_out(hub.engine_info['mux'])
307 307 q.connect_mon(hub.monitor_url)
308 308 q.daemon=True
309 309 children.append(q)
310 310
311 311 # Control Queue (in a Process)
312 q = mq(zmq.XREP, zmq.XREP, zmq.PUB, b'incontrol', b'outcontrol')
312 q = mq(zmq.ROUTER, zmq.ROUTER, zmq.PUB, b'incontrol', b'outcontrol')
313 313 q.bind_in(hub.client_info['control'])
314 314 q.setsockopt_in(zmq.IDENTITY, b'control')
315 315 q.bind_out(hub.engine_info['control'])
316 316 q.connect_mon(hub.monitor_url)
317 317 q.daemon=True
318 318 children.append(q)
319 319 try:
320 320 scheme = self.config.TaskScheduler.scheme_name
321 321 except AttributeError:
322 322 scheme = TaskScheduler.scheme_name.get_default_value()
323 323 # Task Queue (in a Process)
324 324 if scheme == 'pure':
325 325 self.log.warn("task::using pure XREQ Task scheduler")
326 q = mq(zmq.XREP, zmq.XREQ, zmq.PUB, b'intask', b'outtask')
326 q = mq(zmq.ROUTER, zmq.DEALER, zmq.PUB, b'intask', b'outtask')
327 327 # q.setsockopt_out(zmq.HWM, hub.hwm)
328 328 q.bind_in(hub.client_info['task'][1])
329 329 q.setsockopt_in(zmq.IDENTITY, b'task')
330 330 q.bind_out(hub.engine_info['task'])
331 331 q.connect_mon(hub.monitor_url)
332 332 q.daemon=True
333 333 children.append(q)
334 334 elif scheme == 'none':
335 335 self.log.warn("task::using no Task scheduler")
336 336
337 337 else:
338 338 self.log.info("task::using Python %s Task scheduler"%scheme)
339 339 sargs = (hub.client_info['task'][1], hub.engine_info['task'],
340 340 hub.monitor_url, hub.client_info['notification'])
341 341 kwargs = dict(logname='scheduler', loglevel=self.log_level,
342 342 log_url = self.log_url, config=dict(self.config))
343 343 if 'Process' in self.mq_class:
344 344 # run the Python scheduler in a Process
345 345 q = Process(target=launch_scheduler, args=sargs, kwargs=kwargs)
346 346 q.daemon=True
347 347 children.append(q)
348 348 else:
349 349 # single-threaded Controller
350 350 kwargs['in_thread'] = True
351 351 launch_scheduler(*sargs, **kwargs)
352 352
353 353
354 354 def save_urls(self):
355 355 """save the registration urls to files."""
356 356 c = self.config
357 357
358 358 sec_dir = self.profile_dir.security_dir
359 359 cf = self.factory
360 360
361 361 with open(os.path.join(sec_dir, 'ipcontroller-engine.url'), 'w') as f:
362 362 f.write("%s://%s:%s"%(cf.engine_transport, cf.engine_ip, cf.regport))
363 363
364 364 with open(os.path.join(sec_dir, 'ipcontroller-client.url'), 'w') as f:
365 365 f.write("%s://%s:%s"%(cf.client_transport, cf.client_ip, cf.regport))
366 366
367 367
368 368 def do_import_statements(self):
369 369 statements = self.import_statements
370 370 for s in statements:
371 371 try:
372 372 self.log.msg("Executing statement: '%s'" % s)
373 373 exec s in globals(), locals()
374 374 except:
375 375 self.log.msg("Error running statement: %s" % s)
376 376
377 377 def forward_logging(self):
378 378 if self.log_url:
379 379 self.log.info("Forwarding logging to %s"%self.log_url)
380 380 context = zmq.Context.instance()
381 381 lsock = context.socket(zmq.PUB)
382 382 lsock.connect(self.log_url)
383 383 handler = PUBHandler(lsock)
384 384 self.log.removeHandler(self._log_handler)
385 385 handler.root_topic = 'controller'
386 386 handler.setLevel(self.log_level)
387 387 self.log.addHandler(handler)
388 388 self._log_handler = handler
389 389 # #
390 390
391 391 def initialize(self, argv=None):
392 392 super(IPControllerApp, self).initialize(argv)
393 393 self.forward_logging()
394 394 self.init_hub()
395 395 self.init_schedulers()
396 396
397 397 def start(self):
398 398 # Start the subprocesses:
399 399 self.factory.start()
400 400 child_procs = []
401 401 for child in self.children:
402 402 child.start()
403 403 if isinstance(child, ProcessMonitoredQueue):
404 404 child_procs.append(child.launcher)
405 405 elif isinstance(child, Process):
406 406 child_procs.append(child)
407 407 if child_procs:
408 408 signal_children(child_procs)
409 409
410 410 self.write_pid_file(overwrite=True)
411 411
412 412 try:
413 413 self.factory.loop.start()
414 414 except KeyboardInterrupt:
415 415 self.log.critical("Interrupted, Exiting...\n")
416 416
417 417
418 418
419 419 def launch_new_instance():
420 420 """Create and run the IPython controller"""
421 421 if sys.platform == 'win32':
422 422 # make sure we don't get called from a multiprocessing subprocess
423 423 # this can result in infinite Controllers being started on Windows
424 424 # which doesn't have a proper fork, so multiprocessing is wonky
425 425
426 426 # this only comes up when IPython has been installed using vanilla
427 427 # setuptools, and *not* distribute.
428 428 import multiprocessing
429 429 p = multiprocessing.current_process()
430 430 # the main process has name 'MainProcess'
431 431 # subprocesses will have names like 'Process-1'
432 432 if p.name != 'MainProcess':
433 433 # we are a subprocess, don't start another Controller!
434 434 return
435 435 app = IPControllerApp.instance()
436 436 app.initialize()
437 437 app.start()
438 438
439 439
440 440 if __name__ == '__main__':
441 441 launch_new_instance()
@@ -1,1435 +1,1435 b''
1 1 """A semi-synchronous Client for the ZMQ cluster
2 2
3 3 Authors:
4 4
5 5 * MinRK
6 6 """
7 7 #-----------------------------------------------------------------------------
8 8 # Copyright (C) 2010-2011 The IPython Development Team
9 9 #
10 10 # Distributed under the terms of the BSD License. The full license is in
11 11 # the file COPYING, distributed as part of this software.
12 12 #-----------------------------------------------------------------------------
13 13
14 14 #-----------------------------------------------------------------------------
15 15 # Imports
16 16 #-----------------------------------------------------------------------------
17 17
18 18 import os
19 19 import json
20 20 import sys
21 21 import time
22 22 import warnings
23 23 from datetime import datetime
24 24 from getpass import getpass
25 25 from pprint import pprint
26 26
27 27 pjoin = os.path.join
28 28
29 29 import zmq
30 30 # from zmq.eventloop import ioloop, zmqstream
31 31
32 32 from IPython.config.configurable import MultipleInstanceError
33 33 from IPython.core.application import BaseIPythonApplication
34 34
35 35 from IPython.utils.jsonutil import rekey
36 36 from IPython.utils.localinterfaces import LOCAL_IPS
37 37 from IPython.utils.path import get_ipython_dir
38 38 from IPython.utils.traitlets import (HasTraits, Int, Instance, Unicode,
39 39 Dict, List, Bool, Set)
40 40 from IPython.external.decorator import decorator
41 41 from IPython.external.ssh import tunnel
42 42
43 43 from IPython.parallel import error
44 44 from IPython.parallel import util
45 45
46 46 from IPython.zmq.session import Session, Message
47 47
48 48 from .asyncresult import AsyncResult, AsyncHubResult
49 49 from IPython.core.profiledir import ProfileDir, ProfileDirError
50 50 from .view import DirectView, LoadBalancedView
51 51
52 52 if sys.version_info[0] >= 3:
53 53 # xrange is used in a couple 'isinstance' tests in py2
54 54 # should be just 'range' in 3k
55 55 xrange = range
56 56
57 57 #--------------------------------------------------------------------------
58 58 # Decorators for Client methods
59 59 #--------------------------------------------------------------------------
60 60
61 61 @decorator
62 62 def spin_first(f, self, *args, **kwargs):
63 63 """Call spin() to sync state prior to calling the method."""
64 64 self.spin()
65 65 return f(self, *args, **kwargs)
66 66
67 67
68 68 #--------------------------------------------------------------------------
69 69 # Classes
70 70 #--------------------------------------------------------------------------
71 71
72 72 class Metadata(dict):
73 73 """Subclass of dict for initializing metadata values.
74 74
75 75 Attribute access works on keys.
76 76
77 77 These objects have a strict set of keys - errors will raise if you try
78 78 to add new keys.
79 79 """
80 80 def __init__(self, *args, **kwargs):
81 81 dict.__init__(self)
82 82 md = {'msg_id' : None,
83 83 'submitted' : None,
84 84 'started' : None,
85 85 'completed' : None,
86 86 'received' : None,
87 87 'engine_uuid' : None,
88 88 'engine_id' : None,
89 89 'follow' : None,
90 90 'after' : None,
91 91 'status' : None,
92 92
93 93 'pyin' : None,
94 94 'pyout' : None,
95 95 'pyerr' : None,
96 96 'stdout' : '',
97 97 'stderr' : '',
98 98 }
99 99 self.update(md)
100 100 self.update(dict(*args, **kwargs))
101 101
102 102 def __getattr__(self, key):
103 103 """getattr aliased to getitem"""
104 104 if key in self.iterkeys():
105 105 return self[key]
106 106 else:
107 107 raise AttributeError(key)
108 108
109 109 def __setattr__(self, key, value):
110 110 """setattr aliased to setitem, with strict"""
111 111 if key in self.iterkeys():
112 112 self[key] = value
113 113 else:
114 114 raise AttributeError(key)
115 115
116 116 def __setitem__(self, key, value):
117 117 """strict static key enforcement"""
118 118 if key in self.iterkeys():
119 119 dict.__setitem__(self, key, value)
120 120 else:
121 121 raise KeyError(key)
122 122
123 123
124 124 class Client(HasTraits):
125 125 """A semi-synchronous client to the IPython ZMQ cluster
126 126
127 127 Parameters
128 128 ----------
129 129
130 130 url_or_file : bytes or unicode; zmq url or path to ipcontroller-client.json
131 131 Connection information for the Hub's registration. If a json connector
132 132 file is given, then likely no further configuration is necessary.
133 133 [Default: use profile]
134 134 profile : bytes
135 135 The name of the Cluster profile to be used to find connector information.
136 136 If run from an IPython application, the default profile will be the same
137 137 as the running application, otherwise it will be 'default'.
138 138 context : zmq.Context
139 139 Pass an existing zmq.Context instance, otherwise the client will create its own.
140 140 debug : bool
141 141 flag for lots of message printing for debug purposes
142 142 timeout : int/float
143 143 time (in seconds) to wait for connection replies from the Hub
144 144 [Default: 10]
145 145
146 146 #-------------- session related args ----------------
147 147
148 148 config : Config object
149 149 If specified, this will be relayed to the Session for configuration
150 150 username : str
151 151 set username for the session object
152 152 packer : str (import_string) or callable
153 153 Can be either the simple keyword 'json' or 'pickle', or an import_string to a
154 154 function to serialize messages. Must support same input as
155 155 JSON, and output must be bytes.
156 156 You can pass a callable directly as `pack`
157 157 unpacker : str (import_string) or callable
158 158 The inverse of packer. Only necessary if packer is specified as *not* one
159 159 of 'json' or 'pickle'.
160 160
161 161 #-------------- ssh related args ----------------
162 162 # These are args for configuring the ssh tunnel to be used
163 163 # credentials are used to forward connections over ssh to the Controller
164 164 # Note that the ip given in `addr` needs to be relative to sshserver
165 165 # The most basic case is to leave addr as pointing to localhost (127.0.0.1),
166 166 # and set sshserver as the same machine the Controller is on. However,
167 167 # the only requirement is that sshserver is able to see the Controller
168 168 # (i.e. is within the same trusted network).
169 169
170 170 sshserver : str
171 171 A string of the form passed to ssh, i.e. 'server.tld' or 'user@server.tld:port'
172 172 If keyfile or password is specified, and this is not, it will default to
173 173 the ip given in addr.
174 174 sshkey : str; path to ssh private key file
175 175 This specifies a key to be used in ssh login, default None.
176 176 Regular default ssh keys will be used without specifying this argument.
177 177 password : str
178 178 Your ssh password to sshserver. Note that if this is left None,
179 179 you will be prompted for it if passwordless key based login is unavailable.
180 180 paramiko : bool
181 181 flag for whether to use paramiko instead of shell ssh for tunneling.
182 182 [default: True on win32, False else]
183 183
184 184 ------- exec authentication args -------
185 185 If even localhost is untrusted, you can have some protection against
186 186 unauthorized execution by signing messages with HMAC digests.
187 187 Messages are still sent as cleartext, so if someone can snoop your
188 188 loopback traffic this will not protect your privacy, but will prevent
189 189 unauthorized execution.
190 190
191 191 exec_key : str
192 192 an authentication key or file containing a key
193 193 default: None
194 194
195 195
196 196 Attributes
197 197 ----------
198 198
199 199 ids : list of int engine IDs
200 200 requesting the ids attribute always synchronizes
201 201 the registration state. To request ids without synchronization,
202 202 use semi-private _ids attributes.
203 203
204 204 history : list of msg_ids
205 205 a list of msg_ids, keeping track of all the execution
206 206 messages you have submitted in order.
207 207
208 208 outstanding : set of msg_ids
209 209 a set of msg_ids that have been submitted, but whose
210 210 results have not yet been received.
211 211
212 212 results : dict
213 213 a dict of all our results, keyed by msg_id
214 214
215 215 block : bool
216 216 determines default behavior when block not specified
217 217 in execution methods
218 218
219 219 Methods
220 220 -------
221 221
222 222 spin
223 223 flushes incoming results and registration state changes
224 224 control methods spin, and requesting `ids` also ensures up to date
225 225
226 226 wait
227 227 wait on one or more msg_ids
228 228
229 229 execution methods
230 230 apply
231 231 legacy: execute, run
232 232
233 233 data movement
234 234 push, pull, scatter, gather
235 235
236 236 query methods
237 237 queue_status, get_result, purge, result_status
238 238
239 239 control methods
240 240 abort, shutdown
241 241
242 242 """
243 243
244 244
245 245 block = Bool(False)
246 246 outstanding = Set()
247 247 results = Instance('collections.defaultdict', (dict,))
248 248 metadata = Instance('collections.defaultdict', (Metadata,))
249 249 history = List()
250 250 debug = Bool(False)
251 251
252 252 profile=Unicode()
253 253 def _profile_default(self):
254 254 if BaseIPythonApplication.initialized():
255 255 # an IPython app *might* be running, try to get its profile
256 256 try:
257 257 return BaseIPythonApplication.instance().profile
258 258 except (AttributeError, MultipleInstanceError):
259 259 # could be a *different* subclass of config.Application,
260 260 # which would raise one of these two errors.
261 261 return u'default'
262 262 else:
263 263 return u'default'
264 264
265 265
266 266 _outstanding_dict = Instance('collections.defaultdict', (set,))
267 267 _ids = List()
268 268 _connected=Bool(False)
269 269 _ssh=Bool(False)
270 270 _context = Instance('zmq.Context')
271 271 _config = Dict()
272 272 _engines=Instance(util.ReverseDict, (), {})
273 273 # _hub_socket=Instance('zmq.Socket')
274 274 _query_socket=Instance('zmq.Socket')
275 275 _control_socket=Instance('zmq.Socket')
276 276 _iopub_socket=Instance('zmq.Socket')
277 277 _notification_socket=Instance('zmq.Socket')
278 278 _mux_socket=Instance('zmq.Socket')
279 279 _task_socket=Instance('zmq.Socket')
280 280 _task_scheme=Unicode()
281 281 _closed = False
282 282 _ignored_control_replies=Int(0)
283 283 _ignored_hub_replies=Int(0)
284 284
285 285 def __new__(self, *args, **kw):
286 286 # don't raise on positional args
287 287 return HasTraits.__new__(self, **kw)
288 288
289 289 def __init__(self, url_or_file=None, profile=None, profile_dir=None, ipython_dir=None,
290 290 context=None, debug=False, exec_key=None,
291 291 sshserver=None, sshkey=None, password=None, paramiko=None,
292 292 timeout=10, **extra_args
293 293 ):
294 294 if profile:
295 295 super(Client, self).__init__(debug=debug, profile=profile)
296 296 else:
297 297 super(Client, self).__init__(debug=debug)
298 298 if context is None:
299 299 context = zmq.Context.instance()
300 300 self._context = context
301 301
302 302 self._setup_profile_dir(self.profile, profile_dir, ipython_dir)
303 303 if self._cd is not None:
304 304 if url_or_file is None:
305 305 url_or_file = pjoin(self._cd.security_dir, 'ipcontroller-client.json')
306 306 assert url_or_file is not None, "I can't find enough information to connect to a hub!"\
307 307 " Please specify at least one of url_or_file or profile."
308 308
309 309 try:
310 310 util.validate_url(url_or_file)
311 311 except AssertionError:
312 312 if not os.path.exists(url_or_file):
313 313 if self._cd:
314 314 url_or_file = os.path.join(self._cd.security_dir, url_or_file)
315 315 assert os.path.exists(url_or_file), "Not a valid connection file or url: %r"%url_or_file
316 316 with open(url_or_file) as f:
317 317 cfg = json.loads(f.read())
318 318 else:
319 319 cfg = {'url':url_or_file}
320 320
321 321 # sync defaults from args, json:
322 322 if sshserver:
323 323 cfg['ssh'] = sshserver
324 324 if exec_key:
325 325 cfg['exec_key'] = exec_key
326 326 exec_key = cfg['exec_key']
327 327 location = cfg.setdefault('location', None)
328 328 cfg['url'] = util.disambiguate_url(cfg['url'], location)
329 329 url = cfg['url']
330 330 proto,addr,port = util.split_url(url)
331 331 if location is not None and addr == '127.0.0.1':
332 332 # location specified, and connection is expected to be local
333 333 if location not in LOCAL_IPS and not sshserver:
334 334 # load ssh from JSON *only* if the controller is not on
335 335 # this machine
336 336 sshserver=cfg['ssh']
337 337 if location not in LOCAL_IPS and not sshserver:
338 338 # warn if no ssh specified, but SSH is probably needed
339 339 # This is only a warning, because the most likely cause
340 340 # is a local Controller on a laptop whose IP is dynamic
341 341 warnings.warn("""
342 342 Controller appears to be listening on localhost, but not on this machine.
343 343 If this is true, you should specify Client(...,sshserver='you@%s')
344 344 or instruct your controller to listen on an external IP."""%location,
345 345 RuntimeWarning)
346 346 elif not sshserver:
347 347 # otherwise sync with cfg
348 348 sshserver = cfg['ssh']
349 349
350 350 self._config = cfg
351 351
352 352 self._ssh = bool(sshserver or sshkey or password)
353 353 if self._ssh and sshserver is None:
354 354 # default to ssh via localhost
355 355 sshserver = url.split('://')[1].split(':')[0]
356 356 if self._ssh and password is None:
357 357 if tunnel.try_passwordless_ssh(sshserver, sshkey, paramiko):
358 358 password=False
359 359 else:
360 360 password = getpass("SSH Password for %s: "%sshserver)
361 361 ssh_kwargs = dict(keyfile=sshkey, password=password, paramiko=paramiko)
362 362
363 363 # configure and construct the session
364 364 if exec_key is not None:
365 365 if os.path.isfile(exec_key):
366 366 extra_args['keyfile'] = exec_key
367 367 else:
368 368 exec_key = util.asbytes(exec_key)
369 369 extra_args['key'] = exec_key
370 370 self.session = Session(**extra_args)
371 371
372 self._query_socket = self._context.socket(zmq.XREQ)
372 self._query_socket = self._context.socket(zmq.DEALER)
373 373 self._query_socket.setsockopt(zmq.IDENTITY, util.asbytes(self.session.session))
374 374 if self._ssh:
375 375 tunnel.tunnel_connection(self._query_socket, url, sshserver, **ssh_kwargs)
376 376 else:
377 377 self._query_socket.connect(url)
378 378
379 379 self.session.debug = self.debug
380 380
381 381 self._notification_handlers = {'registration_notification' : self._register_engine,
382 382 'unregistration_notification' : self._unregister_engine,
383 383 'shutdown_notification' : lambda msg: self.close(),
384 384 }
385 385 self._queue_handlers = {'execute_reply' : self._handle_execute_reply,
386 386 'apply_reply' : self._handle_apply_reply}
387 387 self._connect(sshserver, ssh_kwargs, timeout)
388 388
389 389 def __del__(self):
390 390 """cleanup sockets, but _not_ context."""
391 391 self.close()
392 392
393 393 def _setup_profile_dir(self, profile, profile_dir, ipython_dir):
394 394 if ipython_dir is None:
395 395 ipython_dir = get_ipython_dir()
396 396 if profile_dir is not None:
397 397 try:
398 398 self._cd = ProfileDir.find_profile_dir(profile_dir)
399 399 return
400 400 except ProfileDirError:
401 401 pass
402 402 elif profile is not None:
403 403 try:
404 404 self._cd = ProfileDir.find_profile_dir_by_name(
405 405 ipython_dir, profile)
406 406 return
407 407 except ProfileDirError:
408 408 pass
409 409 self._cd = None
410 410
411 411 def _update_engines(self, engines):
412 412 """Update our engines dict and _ids from a dict of the form: {id:uuid}."""
413 413 for k,v in engines.iteritems():
414 414 eid = int(k)
415 415 self._engines[eid] = v
416 416 self._ids.append(eid)
417 417 self._ids = sorted(self._ids)
418 418 if sorted(self._engines.keys()) != range(len(self._engines)) and \
419 419 self._task_scheme == 'pure' and self._task_socket:
420 420 self._stop_scheduling_tasks()
421 421
422 422 def _stop_scheduling_tasks(self):
423 423 """Stop scheduling tasks because an engine has been unregistered
424 424 from a pure ZMQ scheduler.
425 425 """
426 426 self._task_socket.close()
427 427 self._task_socket = None
428 428 msg = "An engine has been unregistered, and we are using pure " +\
429 429 "ZMQ task scheduling. Task farming will be disabled."
430 430 if self.outstanding:
431 431 msg += " If you were running tasks when this happened, " +\
432 432 "some `outstanding` msg_ids may never resolve."
433 433 warnings.warn(msg, RuntimeWarning)
434 434
435 435 def _build_targets(self, targets):
436 436 """Turn valid target IDs or 'all' into two lists:
437 437 (int_ids, uuids).
438 438 """
439 439 if not self._ids:
440 440 # flush notification socket if no engines yet, just in case
441 441 if not self.ids:
442 442 raise error.NoEnginesRegistered("Can't build targets without any engines")
443 443
444 444 if targets is None:
445 445 targets = self._ids
446 446 elif isinstance(targets, basestring):
447 447 if targets.lower() == 'all':
448 448 targets = self._ids
449 449 else:
450 450 raise TypeError("%r not valid str target, must be 'all'"%(targets))
451 451 elif isinstance(targets, int):
452 452 if targets < 0:
453 453 targets = self.ids[targets]
454 454 if targets not in self._ids:
455 455 raise IndexError("No such engine: %i"%targets)
456 456 targets = [targets]
457 457
458 458 if isinstance(targets, slice):
459 459 indices = range(len(self._ids))[targets]
460 460 ids = self.ids
461 461 targets = [ ids[i] for i in indices ]
462 462
463 463 if not isinstance(targets, (tuple, list, xrange)):
464 464 raise TypeError("targets by int/slice/collection of ints only, not %s"%(type(targets)))
465 465
466 466 return [util.asbytes(self._engines[t]) for t in targets], list(targets)
467 467
468 468 def _connect(self, sshserver, ssh_kwargs, timeout):
469 469 """setup all our socket connections to the cluster. This is called from
470 470 __init__."""
471 471
472 472 # Maybe allow reconnecting?
473 473 if self._connected:
474 474 return
475 475 self._connected=True
476 476
477 477 def connect_socket(s, url):
478 478 url = util.disambiguate_url(url, self._config['location'])
479 479 if self._ssh:
480 480 return tunnel.tunnel_connection(s, url, sshserver, **ssh_kwargs)
481 481 else:
482 482 return s.connect(url)
483 483
484 484 self.session.send(self._query_socket, 'connection_request')
485 485 # use Poller because zmq.select has wrong units in pyzmq 2.1.7
486 486 poller = zmq.Poller()
487 487 poller.register(self._query_socket, zmq.POLLIN)
488 488 # poll expects milliseconds, timeout is seconds
489 489 evts = poller.poll(timeout*1000)
490 490 if not evts:
491 491 raise error.TimeoutError("Hub connection request timed out")
492 492 idents,msg = self.session.recv(self._query_socket,mode=0)
493 493 if self.debug:
494 494 pprint(msg)
495 495 msg = Message(msg)
496 496 content = msg.content
497 497 self._config['registration'] = dict(content)
498 498 if content.status == 'ok':
499 499 ident = util.asbytes(self.session.session)
500 500 if content.mux:
501 self._mux_socket = self._context.socket(zmq.XREQ)
501 self._mux_socket = self._context.socket(zmq.DEALER)
502 502 self._mux_socket.setsockopt(zmq.IDENTITY, ident)
503 503 connect_socket(self._mux_socket, content.mux)
504 504 if content.task:
505 505 self._task_scheme, task_addr = content.task
506 self._task_socket = self._context.socket(zmq.XREQ)
506 self._task_socket = self._context.socket(zmq.DEALER)
507 507 self._task_socket.setsockopt(zmq.IDENTITY, ident)
508 508 connect_socket(self._task_socket, task_addr)
509 509 if content.notification:
510 510 self._notification_socket = self._context.socket(zmq.SUB)
511 511 connect_socket(self._notification_socket, content.notification)
512 512 self._notification_socket.setsockopt(zmq.SUBSCRIBE, b'')
513 513 # if content.query:
514 # self._query_socket = self._context.socket(zmq.XREQ)
514 # self._query_socket = self._context.socket(zmq.DEALER)
515 515 # self._query_socket.setsockopt(zmq.IDENTITY, self.session.session)
516 516 # connect_socket(self._query_socket, content.query)
517 517 if content.control:
518 self._control_socket = self._context.socket(zmq.XREQ)
518 self._control_socket = self._context.socket(zmq.DEALER)
519 519 self._control_socket.setsockopt(zmq.IDENTITY, ident)
520 520 connect_socket(self._control_socket, content.control)
521 521 if content.iopub:
522 522 self._iopub_socket = self._context.socket(zmq.SUB)
523 523 self._iopub_socket.setsockopt(zmq.SUBSCRIBE, b'')
524 524 self._iopub_socket.setsockopt(zmq.IDENTITY, ident)
525 525 connect_socket(self._iopub_socket, content.iopub)
526 526 self._update_engines(dict(content.engines))
527 527 else:
528 528 self._connected = False
529 529 raise Exception("Failed to connect!")
530 530
531 531 #--------------------------------------------------------------------------
532 532 # handlers and callbacks for incoming messages
533 533 #--------------------------------------------------------------------------
534 534
535 535 def _unwrap_exception(self, content):
536 536 """unwrap exception, and remap engine_id to int."""
537 537 e = error.unwrap_exception(content)
538 538 # print e.traceback
539 539 if e.engine_info:
540 540 e_uuid = e.engine_info['engine_uuid']
541 541 eid = self._engines[e_uuid]
542 542 e.engine_info['engine_id'] = eid
543 543 return e
544 544
545 545 def _extract_metadata(self, header, parent, content):
546 546 md = {'msg_id' : parent['msg_id'],
547 547 'received' : datetime.now(),
548 548 'engine_uuid' : header.get('engine', None),
549 549 'follow' : parent.get('follow', []),
550 550 'after' : parent.get('after', []),
551 551 'status' : content['status'],
552 552 }
553 553
554 554 if md['engine_uuid'] is not None:
555 555 md['engine_id'] = self._engines.get(md['engine_uuid'], None)
556 556
557 557 if 'date' in parent:
558 558 md['submitted'] = parent['date']
559 559 if 'started' in header:
560 560 md['started'] = header['started']
561 561 if 'date' in header:
562 562 md['completed'] = header['date']
563 563 return md
564 564
565 565 def _register_engine(self, msg):
566 566 """Register a new engine, and update our connection info."""
567 567 content = msg['content']
568 568 eid = content['id']
569 569 d = {eid : content['queue']}
570 570 self._update_engines(d)
571 571
572 572 def _unregister_engine(self, msg):
573 573 """Unregister an engine that has died."""
574 574 content = msg['content']
575 575 eid = int(content['id'])
576 576 if eid in self._ids:
577 577 self._ids.remove(eid)
578 578 uuid = self._engines.pop(eid)
579 579
580 580 self._handle_stranded_msgs(eid, uuid)
581 581
582 582 if self._task_socket and self._task_scheme == 'pure':
583 583 self._stop_scheduling_tasks()
584 584
585 585 def _handle_stranded_msgs(self, eid, uuid):
586 586 """Handle messages known to be on an engine when the engine unregisters.
587 587
588 588 It is possible that this will fire prematurely - that is, an engine will
589 589 go down after completing a result, and the client will be notified
590 590 of the unregistration and later receive the successful result.
591 591 """
592 592
593 593 outstanding = self._outstanding_dict[uuid]
594 594
595 595 for msg_id in list(outstanding):
596 596 if msg_id in self.results:
597 597 # we already
598 598 continue
599 599 try:
600 600 raise error.EngineError("Engine %r died while running task %r"%(eid, msg_id))
601 601 except:
602 602 content = error.wrap_exception()
603 603 # build a fake message:
604 604 parent = {}
605 605 header = {}
606 606 parent['msg_id'] = msg_id
607 607 header['engine'] = uuid
608 608 header['date'] = datetime.now()
609 609 msg = dict(parent_header=parent, header=header, content=content)
610 610 self._handle_apply_reply(msg)
611 611
612 612 def _handle_execute_reply(self, msg):
613 613 """Save the reply to an execute_request into our results.
614 614
615 615 execute messages are never actually used. apply is used instead.
616 616 """
617 617
618 618 parent = msg['parent_header']
619 619 msg_id = parent['msg_id']
620 620 if msg_id not in self.outstanding:
621 621 if msg_id in self.history:
622 622 print ("got stale result: %s"%msg_id)
623 623 else:
624 624 print ("got unknown result: %s"%msg_id)
625 625 else:
626 626 self.outstanding.remove(msg_id)
627 627 self.results[msg_id] = self._unwrap_exception(msg['content'])
628 628
629 629 def _handle_apply_reply(self, msg):
630 630 """Save the reply to an apply_request into our results."""
631 631 parent = msg['parent_header']
632 632 msg_id = parent['msg_id']
633 633 if msg_id not in self.outstanding:
634 634 if msg_id in self.history:
635 635 print ("got stale result: %s"%msg_id)
636 636 print self.results[msg_id]
637 637 print msg
638 638 else:
639 639 print ("got unknown result: %s"%msg_id)
640 640 else:
641 641 self.outstanding.remove(msg_id)
642 642 content = msg['content']
643 643 header = msg['header']
644 644
645 645 # construct metadata:
646 646 md = self.metadata[msg_id]
647 647 md.update(self._extract_metadata(header, parent, content))
648 648 # is this redundant?
649 649 self.metadata[msg_id] = md
650 650
651 651 e_outstanding = self._outstanding_dict[md['engine_uuid']]
652 652 if msg_id in e_outstanding:
653 653 e_outstanding.remove(msg_id)
654 654
655 655 # construct result:
656 656 if content['status'] == 'ok':
657 657 self.results[msg_id] = util.unserialize_object(msg['buffers'])[0]
658 658 elif content['status'] == 'aborted':
659 659 self.results[msg_id] = error.TaskAborted(msg_id)
660 660 elif content['status'] == 'resubmitted':
661 661 # TODO: handle resubmission
662 662 pass
663 663 else:
664 664 self.results[msg_id] = self._unwrap_exception(content)
665 665
666 666 def _flush_notifications(self):
667 667 """Flush notifications of engine registrations waiting
668 668 in ZMQ queue."""
669 669 idents,msg = self.session.recv(self._notification_socket, mode=zmq.NOBLOCK)
670 670 while msg is not None:
671 671 if self.debug:
672 672 pprint(msg)
673 673 msg_type = msg['header']['msg_type']
674 674 handler = self._notification_handlers.get(msg_type, None)
675 675 if handler is None:
676 676 raise Exception("Unhandled message type: %s"%msg.msg_type)
677 677 else:
678 678 handler(msg)
679 679 idents,msg = self.session.recv(self._notification_socket, mode=zmq.NOBLOCK)
680 680
681 681 def _flush_results(self, sock):
682 682 """Flush task or queue results waiting in ZMQ queue."""
683 683 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
684 684 while msg is not None:
685 685 if self.debug:
686 686 pprint(msg)
687 687 msg_type = msg['header']['msg_type']
688 688 handler = self._queue_handlers.get(msg_type, None)
689 689 if handler is None:
690 690 raise Exception("Unhandled message type: %s"%msg.msg_type)
691 691 else:
692 692 handler(msg)
693 693 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
694 694
695 695 def _flush_control(self, sock):
696 696 """Flush replies from the control channel waiting
697 697 in the ZMQ queue.
698 698
699 699 Currently: ignore them."""
700 700 if self._ignored_control_replies <= 0:
701 701 return
702 702 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
703 703 while msg is not None:
704 704 self._ignored_control_replies -= 1
705 705 if self.debug:
706 706 pprint(msg)
707 707 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
708 708
709 709 def _flush_ignored_control(self):
710 710 """flush ignored control replies"""
711 711 while self._ignored_control_replies > 0:
712 712 self.session.recv(self._control_socket)
713 713 self._ignored_control_replies -= 1
714 714
715 715 def _flush_ignored_hub_replies(self):
716 716 ident,msg = self.session.recv(self._query_socket, mode=zmq.NOBLOCK)
717 717 while msg is not None:
718 718 ident,msg = self.session.recv(self._query_socket, mode=zmq.NOBLOCK)
719 719
720 720 def _flush_iopub(self, sock):
721 721 """Flush replies from the iopub channel waiting
722 722 in the ZMQ queue.
723 723 """
724 724 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
725 725 while msg is not None:
726 726 if self.debug:
727 727 pprint(msg)
728 728 parent = msg['parent_header']
729 729 msg_id = parent['msg_id']
730 730 content = msg['content']
731 731 header = msg['header']
732 732 msg_type = msg['header']['msg_type']
733 733
734 734 # init metadata:
735 735 md = self.metadata[msg_id]
736 736
737 737 if msg_type == 'stream':
738 738 name = content['name']
739 739 s = md[name] or ''
740 740 md[name] = s + content['data']
741 741 elif msg_type == 'pyerr':
742 742 md.update({'pyerr' : self._unwrap_exception(content)})
743 743 elif msg_type == 'pyin':
744 744 md.update({'pyin' : content['code']})
745 745 else:
746 746 md.update({msg_type : content.get('data', '')})
747 747
748 748 # reduntant?
749 749 self.metadata[msg_id] = md
750 750
751 751 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
752 752
753 753 #--------------------------------------------------------------------------
754 754 # len, getitem
755 755 #--------------------------------------------------------------------------
756 756
757 757 def __len__(self):
758 758 """len(client) returns # of engines."""
759 759 return len(self.ids)
760 760
761 761 def __getitem__(self, key):
762 762 """index access returns DirectView multiplexer objects
763 763
764 764 Must be int, slice, or list/tuple/xrange of ints"""
765 765 if not isinstance(key, (int, slice, tuple, list, xrange)):
766 766 raise TypeError("key by int/slice/iterable of ints only, not %s"%(type(key)))
767 767 else:
768 768 return self.direct_view(key)
769 769
770 770 #--------------------------------------------------------------------------
771 771 # Begin public methods
772 772 #--------------------------------------------------------------------------
773 773
774 774 @property
775 775 def ids(self):
776 776 """Always up-to-date ids property."""
777 777 self._flush_notifications()
778 778 # always copy:
779 779 return list(self._ids)
780 780
781 781 def close(self):
782 782 if self._closed:
783 783 return
784 784 snames = filter(lambda n: n.endswith('socket'), dir(self))
785 785 for socket in map(lambda name: getattr(self, name), snames):
786 786 if isinstance(socket, zmq.Socket) and not socket.closed:
787 787 socket.close()
788 788 self._closed = True
789 789
790 790 def spin(self):
791 791 """Flush any registration notifications and execution results
792 792 waiting in the ZMQ queue.
793 793 """
794 794 if self._notification_socket:
795 795 self._flush_notifications()
796 796 if self._mux_socket:
797 797 self._flush_results(self._mux_socket)
798 798 if self._task_socket:
799 799 self._flush_results(self._task_socket)
800 800 if self._control_socket:
801 801 self._flush_control(self._control_socket)
802 802 if self._iopub_socket:
803 803 self._flush_iopub(self._iopub_socket)
804 804 if self._query_socket:
805 805 self._flush_ignored_hub_replies()
806 806
807 807 def wait(self, jobs=None, timeout=-1):
808 808 """waits on one or more `jobs`, for up to `timeout` seconds.
809 809
810 810 Parameters
811 811 ----------
812 812
813 813 jobs : int, str, or list of ints and/or strs, or one or more AsyncResult objects
814 814 ints are indices to self.history
815 815 strs are msg_ids
816 816 default: wait on all outstanding messages
817 817 timeout : float
818 818 a time in seconds, after which to give up.
819 819 default is -1, which means no timeout
820 820
821 821 Returns
822 822 -------
823 823
824 824 True : when all msg_ids are done
825 825 False : timeout reached, some msg_ids still outstanding
826 826 """
827 827 tic = time.time()
828 828 if jobs is None:
829 829 theids = self.outstanding
830 830 else:
831 831 if isinstance(jobs, (int, basestring, AsyncResult)):
832 832 jobs = [jobs]
833 833 theids = set()
834 834 for job in jobs:
835 835 if isinstance(job, int):
836 836 # index access
837 837 job = self.history[job]
838 838 elif isinstance(job, AsyncResult):
839 839 map(theids.add, job.msg_ids)
840 840 continue
841 841 theids.add(job)
842 842 if not theids.intersection(self.outstanding):
843 843 return True
844 844 self.spin()
845 845 while theids.intersection(self.outstanding):
846 846 if timeout >= 0 and ( time.time()-tic ) > timeout:
847 847 break
848 848 time.sleep(1e-3)
849 849 self.spin()
850 850 return len(theids.intersection(self.outstanding)) == 0
851 851
852 852 #--------------------------------------------------------------------------
853 853 # Control methods
854 854 #--------------------------------------------------------------------------
855 855
856 856 @spin_first
857 857 def clear(self, targets=None, block=None):
858 858 """Clear the namespace in target(s)."""
859 859 block = self.block if block is None else block
860 860 targets = self._build_targets(targets)[0]
861 861 for t in targets:
862 862 self.session.send(self._control_socket, 'clear_request', content={}, ident=t)
863 863 error = False
864 864 if block:
865 865 self._flush_ignored_control()
866 866 for i in range(len(targets)):
867 867 idents,msg = self.session.recv(self._control_socket,0)
868 868 if self.debug:
869 869 pprint(msg)
870 870 if msg['content']['status'] != 'ok':
871 871 error = self._unwrap_exception(msg['content'])
872 872 else:
873 873 self._ignored_control_replies += len(targets)
874 874 if error:
875 875 raise error
876 876
877 877
878 878 @spin_first
879 879 def abort(self, jobs=None, targets=None, block=None):
880 880 """Abort specific jobs from the execution queues of target(s).
881 881
882 882 This is a mechanism to prevent jobs that have already been submitted
883 883 from executing.
884 884
885 885 Parameters
886 886 ----------
887 887
888 888 jobs : msg_id, list of msg_ids, or AsyncResult
889 889 The jobs to be aborted
890 890
891 891
892 892 """
893 893 block = self.block if block is None else block
894 894 targets = self._build_targets(targets)[0]
895 895 msg_ids = []
896 896 if isinstance(jobs, (basestring,AsyncResult)):
897 897 jobs = [jobs]
898 898 bad_ids = filter(lambda obj: not isinstance(obj, (basestring, AsyncResult)), jobs)
899 899 if bad_ids:
900 900 raise TypeError("Invalid msg_id type %r, expected str or AsyncResult"%bad_ids[0])
901 901 for j in jobs:
902 902 if isinstance(j, AsyncResult):
903 903 msg_ids.extend(j.msg_ids)
904 904 else:
905 905 msg_ids.append(j)
906 906 content = dict(msg_ids=msg_ids)
907 907 for t in targets:
908 908 self.session.send(self._control_socket, 'abort_request',
909 909 content=content, ident=t)
910 910 error = False
911 911 if block:
912 912 self._flush_ignored_control()
913 913 for i in range(len(targets)):
914 914 idents,msg = self.session.recv(self._control_socket,0)
915 915 if self.debug:
916 916 pprint(msg)
917 917 if msg['content']['status'] != 'ok':
918 918 error = self._unwrap_exception(msg['content'])
919 919 else:
920 920 self._ignored_control_replies += len(targets)
921 921 if error:
922 922 raise error
923 923
924 924 @spin_first
925 925 def shutdown(self, targets=None, restart=False, hub=False, block=None):
926 926 """Terminates one or more engine processes, optionally including the hub."""
927 927 block = self.block if block is None else block
928 928 if hub:
929 929 targets = 'all'
930 930 targets = self._build_targets(targets)[0]
931 931 for t in targets:
932 932 self.session.send(self._control_socket, 'shutdown_request',
933 933 content={'restart':restart},ident=t)
934 934 error = False
935 935 if block or hub:
936 936 self._flush_ignored_control()
937 937 for i in range(len(targets)):
938 938 idents,msg = self.session.recv(self._control_socket, 0)
939 939 if self.debug:
940 940 pprint(msg)
941 941 if msg['content']['status'] != 'ok':
942 942 error = self._unwrap_exception(msg['content'])
943 943 else:
944 944 self._ignored_control_replies += len(targets)
945 945
946 946 if hub:
947 947 time.sleep(0.25)
948 948 self.session.send(self._query_socket, 'shutdown_request')
949 949 idents,msg = self.session.recv(self._query_socket, 0)
950 950 if self.debug:
951 951 pprint(msg)
952 952 if msg['content']['status'] != 'ok':
953 953 error = self._unwrap_exception(msg['content'])
954 954
955 955 if error:
956 956 raise error
957 957
958 958 #--------------------------------------------------------------------------
959 959 # Execution related methods
960 960 #--------------------------------------------------------------------------
961 961
962 962 def _maybe_raise(self, result):
963 963 """wrapper for maybe raising an exception if apply failed."""
964 964 if isinstance(result, error.RemoteError):
965 965 raise result
966 966
967 967 return result
968 968
969 969 def send_apply_message(self, socket, f, args=None, kwargs=None, subheader=None, track=False,
970 970 ident=None):
971 971 """construct and send an apply message via a socket.
972 972
973 973 This is the principal method with which all engine execution is performed by views.
974 974 """
975 975
976 976 assert not self._closed, "cannot use me anymore, I'm closed!"
977 977 # defaults:
978 978 args = args if args is not None else []
979 979 kwargs = kwargs if kwargs is not None else {}
980 980 subheader = subheader if subheader is not None else {}
981 981
982 982 # validate arguments
983 983 if not callable(f):
984 984 raise TypeError("f must be callable, not %s"%type(f))
985 985 if not isinstance(args, (tuple, list)):
986 986 raise TypeError("args must be tuple or list, not %s"%type(args))
987 987 if not isinstance(kwargs, dict):
988 988 raise TypeError("kwargs must be dict, not %s"%type(kwargs))
989 989 if not isinstance(subheader, dict):
990 990 raise TypeError("subheader must be dict, not %s"%type(subheader))
991 991
992 992 bufs = util.pack_apply_message(f,args,kwargs)
993 993
994 994 msg = self.session.send(socket, "apply_request", buffers=bufs, ident=ident,
995 995 subheader=subheader, track=track)
996 996
997 997 msg_id = msg['header']['msg_id']
998 998 self.outstanding.add(msg_id)
999 999 if ident:
1000 1000 # possibly routed to a specific engine
1001 1001 if isinstance(ident, list):
1002 1002 ident = ident[-1]
1003 1003 if ident in self._engines.values():
1004 1004 # save for later, in case of engine death
1005 1005 self._outstanding_dict[ident].add(msg_id)
1006 1006 self.history.append(msg_id)
1007 1007 self.metadata[msg_id]['submitted'] = datetime.now()
1008 1008
1009 1009 return msg
1010 1010
1011 1011 #--------------------------------------------------------------------------
1012 1012 # construct a View object
1013 1013 #--------------------------------------------------------------------------
1014 1014
1015 1015 def load_balanced_view(self, targets=None):
1016 1016 """construct a DirectView object.
1017 1017
1018 1018 If no arguments are specified, create a LoadBalancedView
1019 1019 using all engines.
1020 1020
1021 1021 Parameters
1022 1022 ----------
1023 1023
1024 1024 targets: list,slice,int,etc. [default: use all engines]
1025 1025 The subset of engines across which to load-balance
1026 1026 """
1027 1027 if targets == 'all':
1028 1028 targets = None
1029 1029 if targets is not None:
1030 1030 targets = self._build_targets(targets)[1]
1031 1031 return LoadBalancedView(client=self, socket=self._task_socket, targets=targets)
1032 1032
1033 1033 def direct_view(self, targets='all'):
1034 1034 """construct a DirectView object.
1035 1035
1036 1036 If no targets are specified, create a DirectView
1037 1037 using all engines.
1038 1038
1039 1039 Parameters
1040 1040 ----------
1041 1041
1042 1042 targets: list,slice,int,etc. [default: use all engines]
1043 1043 The engines to use for the View
1044 1044 """
1045 1045 single = isinstance(targets, int)
1046 1046 # allow 'all' to be lazily evaluated at each execution
1047 1047 if targets != 'all':
1048 1048 targets = self._build_targets(targets)[1]
1049 1049 if single:
1050 1050 targets = targets[0]
1051 1051 return DirectView(client=self, socket=self._mux_socket, targets=targets)
1052 1052
1053 1053 #--------------------------------------------------------------------------
1054 1054 # Query methods
1055 1055 #--------------------------------------------------------------------------
1056 1056
1057 1057 @spin_first
1058 1058 def get_result(self, indices_or_msg_ids=None, block=None):
1059 1059 """Retrieve a result by msg_id or history index, wrapped in an AsyncResult object.
1060 1060
1061 1061 If the client already has the results, no request to the Hub will be made.
1062 1062
1063 1063 This is a convenient way to construct AsyncResult objects, which are wrappers
1064 1064 that include metadata about execution, and allow for awaiting results that
1065 1065 were not submitted by this Client.
1066 1066
1067 1067 It can also be a convenient way to retrieve the metadata associated with
1068 1068 blocking execution, since it always retrieves
1069 1069
1070 1070 Examples
1071 1071 --------
1072 1072 ::
1073 1073
1074 1074 In [10]: r = client.apply()
1075 1075
1076 1076 Parameters
1077 1077 ----------
1078 1078
1079 1079 indices_or_msg_ids : integer history index, str msg_id, or list of either
1080 1080 The indices or msg_ids of indices to be retrieved
1081 1081
1082 1082 block : bool
1083 1083 Whether to wait for the result to be done
1084 1084
1085 1085 Returns
1086 1086 -------
1087 1087
1088 1088 AsyncResult
1089 1089 A single AsyncResult object will always be returned.
1090 1090
1091 1091 AsyncHubResult
1092 1092 A subclass of AsyncResult that retrieves results from the Hub
1093 1093
1094 1094 """
1095 1095 block = self.block if block is None else block
1096 1096 if indices_or_msg_ids is None:
1097 1097 indices_or_msg_ids = -1
1098 1098
1099 1099 if not isinstance(indices_or_msg_ids, (list,tuple)):
1100 1100 indices_or_msg_ids = [indices_or_msg_ids]
1101 1101
1102 1102 theids = []
1103 1103 for id in indices_or_msg_ids:
1104 1104 if isinstance(id, int):
1105 1105 id = self.history[id]
1106 1106 if not isinstance(id, basestring):
1107 1107 raise TypeError("indices must be str or int, not %r"%id)
1108 1108 theids.append(id)
1109 1109
1110 1110 local_ids = filter(lambda msg_id: msg_id in self.history or msg_id in self.results, theids)
1111 1111 remote_ids = filter(lambda msg_id: msg_id not in local_ids, theids)
1112 1112
1113 1113 if remote_ids:
1114 1114 ar = AsyncHubResult(self, msg_ids=theids)
1115 1115 else:
1116 1116 ar = AsyncResult(self, msg_ids=theids)
1117 1117
1118 1118 if block:
1119 1119 ar.wait()
1120 1120
1121 1121 return ar
1122 1122
1123 1123 @spin_first
1124 1124 def resubmit(self, indices_or_msg_ids=None, subheader=None, block=None):
1125 1125 """Resubmit one or more tasks.
1126 1126
1127 1127 in-flight tasks may not be resubmitted.
1128 1128
1129 1129 Parameters
1130 1130 ----------
1131 1131
1132 1132 indices_or_msg_ids : integer history index, str msg_id, or list of either
1133 1133 The indices or msg_ids of indices to be retrieved
1134 1134
1135 1135 block : bool
1136 1136 Whether to wait for the result to be done
1137 1137
1138 1138 Returns
1139 1139 -------
1140 1140
1141 1141 AsyncHubResult
1142 1142 A subclass of AsyncResult that retrieves results from the Hub
1143 1143
1144 1144 """
1145 1145 block = self.block if block is None else block
1146 1146 if indices_or_msg_ids is None:
1147 1147 indices_or_msg_ids = -1
1148 1148
1149 1149 if not isinstance(indices_or_msg_ids, (list,tuple)):
1150 1150 indices_or_msg_ids = [indices_or_msg_ids]
1151 1151
1152 1152 theids = []
1153 1153 for id in indices_or_msg_ids:
1154 1154 if isinstance(id, int):
1155 1155 id = self.history[id]
1156 1156 if not isinstance(id, basestring):
1157 1157 raise TypeError("indices must be str or int, not %r"%id)
1158 1158 theids.append(id)
1159 1159
1160 1160 for msg_id in theids:
1161 1161 self.outstanding.discard(msg_id)
1162 1162 if msg_id in self.history:
1163 1163 self.history.remove(msg_id)
1164 1164 self.results.pop(msg_id, None)
1165 1165 self.metadata.pop(msg_id, None)
1166 1166 content = dict(msg_ids = theids)
1167 1167
1168 1168 self.session.send(self._query_socket, 'resubmit_request', content)
1169 1169
1170 1170 zmq.select([self._query_socket], [], [])
1171 1171 idents,msg = self.session.recv(self._query_socket, zmq.NOBLOCK)
1172 1172 if self.debug:
1173 1173 pprint(msg)
1174 1174 content = msg['content']
1175 1175 if content['status'] != 'ok':
1176 1176 raise self._unwrap_exception(content)
1177 1177
1178 1178 ar = AsyncHubResult(self, msg_ids=theids)
1179 1179
1180 1180 if block:
1181 1181 ar.wait()
1182 1182
1183 1183 return ar
1184 1184
1185 1185 @spin_first
1186 1186 def result_status(self, msg_ids, status_only=True):
1187 1187 """Check on the status of the result(s) of the apply request with `msg_ids`.
1188 1188
1189 1189 If status_only is False, then the actual results will be retrieved, else
1190 1190 only the status of the results will be checked.
1191 1191
1192 1192 Parameters
1193 1193 ----------
1194 1194
1195 1195 msg_ids : list of msg_ids
1196 1196 if int:
1197 1197 Passed as index to self.history for convenience.
1198 1198 status_only : bool (default: True)
1199 1199 if False:
1200 1200 Retrieve the actual results of completed tasks.
1201 1201
1202 1202 Returns
1203 1203 -------
1204 1204
1205 1205 results : dict
1206 1206 There will always be the keys 'pending' and 'completed', which will
1207 1207 be lists of msg_ids that are incomplete or complete. If `status_only`
1208 1208 is False, then completed results will be keyed by their `msg_id`.
1209 1209 """
1210 1210 if not isinstance(msg_ids, (list,tuple)):
1211 1211 msg_ids = [msg_ids]
1212 1212
1213 1213 theids = []
1214 1214 for msg_id in msg_ids:
1215 1215 if isinstance(msg_id, int):
1216 1216 msg_id = self.history[msg_id]
1217 1217 if not isinstance(msg_id, basestring):
1218 1218 raise TypeError("msg_ids must be str, not %r"%msg_id)
1219 1219 theids.append(msg_id)
1220 1220
1221 1221 completed = []
1222 1222 local_results = {}
1223 1223
1224 1224 # comment this block out to temporarily disable local shortcut:
1225 1225 for msg_id in theids:
1226 1226 if msg_id in self.results:
1227 1227 completed.append(msg_id)
1228 1228 local_results[msg_id] = self.results[msg_id]
1229 1229 theids.remove(msg_id)
1230 1230
1231 1231 if theids: # some not locally cached
1232 1232 content = dict(msg_ids=theids, status_only=status_only)
1233 1233 msg = self.session.send(self._query_socket, "result_request", content=content)
1234 1234 zmq.select([self._query_socket], [], [])
1235 1235 idents,msg = self.session.recv(self._query_socket, zmq.NOBLOCK)
1236 1236 if self.debug:
1237 1237 pprint(msg)
1238 1238 content = msg['content']
1239 1239 if content['status'] != 'ok':
1240 1240 raise self._unwrap_exception(content)
1241 1241 buffers = msg['buffers']
1242 1242 else:
1243 1243 content = dict(completed=[],pending=[])
1244 1244
1245 1245 content['completed'].extend(completed)
1246 1246
1247 1247 if status_only:
1248 1248 return content
1249 1249
1250 1250 failures = []
1251 1251 # load cached results into result:
1252 1252 content.update(local_results)
1253 1253
1254 1254 # update cache with results:
1255 1255 for msg_id in sorted(theids):
1256 1256 if msg_id in content['completed']:
1257 1257 rec = content[msg_id]
1258 1258 parent = rec['header']
1259 1259 header = rec['result_header']
1260 1260 rcontent = rec['result_content']
1261 1261 iodict = rec['io']
1262 1262 if isinstance(rcontent, str):
1263 1263 rcontent = self.session.unpack(rcontent)
1264 1264
1265 1265 md = self.metadata[msg_id]
1266 1266 md.update(self._extract_metadata(header, parent, rcontent))
1267 1267 md.update(iodict)
1268 1268
1269 1269 if rcontent['status'] == 'ok':
1270 1270 res,buffers = util.unserialize_object(buffers)
1271 1271 else:
1272 1272 print rcontent
1273 1273 res = self._unwrap_exception(rcontent)
1274 1274 failures.append(res)
1275 1275
1276 1276 self.results[msg_id] = res
1277 1277 content[msg_id] = res
1278 1278
1279 1279 if len(theids) == 1 and failures:
1280 1280 raise failures[0]
1281 1281
1282 1282 error.collect_exceptions(failures, "result_status")
1283 1283 return content
1284 1284
1285 1285 @spin_first
1286 1286 def queue_status(self, targets='all', verbose=False):
1287 1287 """Fetch the status of engine queues.
1288 1288
1289 1289 Parameters
1290 1290 ----------
1291 1291
1292 1292 targets : int/str/list of ints/strs
1293 1293 the engines whose states are to be queried.
1294 1294 default : all
1295 1295 verbose : bool
1296 1296 Whether to return lengths only, or lists of ids for each element
1297 1297 """
1298 1298 engine_ids = self._build_targets(targets)[1]
1299 1299 content = dict(targets=engine_ids, verbose=verbose)
1300 1300 self.session.send(self._query_socket, "queue_request", content=content)
1301 1301 idents,msg = self.session.recv(self._query_socket, 0)
1302 1302 if self.debug:
1303 1303 pprint(msg)
1304 1304 content = msg['content']
1305 1305 status = content.pop('status')
1306 1306 if status != 'ok':
1307 1307 raise self._unwrap_exception(content)
1308 1308 content = rekey(content)
1309 1309 if isinstance(targets, int):
1310 1310 return content[targets]
1311 1311 else:
1312 1312 return content
1313 1313
1314 1314 @spin_first
1315 1315 def purge_results(self, jobs=[], targets=[]):
1316 1316 """Tell the Hub to forget results.
1317 1317
1318 1318 Individual results can be purged by msg_id, or the entire
1319 1319 history of specific targets can be purged.
1320 1320
1321 1321 Use `purge_results('all')` to scrub everything from the Hub's db.
1322 1322
1323 1323 Parameters
1324 1324 ----------
1325 1325
1326 1326 jobs : str or list of str or AsyncResult objects
1327 1327 the msg_ids whose results should be forgotten.
1328 1328 targets : int/str/list of ints/strs
1329 1329 The targets, by int_id, whose entire history is to be purged.
1330 1330
1331 1331 default : None
1332 1332 """
1333 1333 if not targets and not jobs:
1334 1334 raise ValueError("Must specify at least one of `targets` and `jobs`")
1335 1335 if targets:
1336 1336 targets = self._build_targets(targets)[1]
1337 1337
1338 1338 # construct msg_ids from jobs
1339 1339 if jobs == 'all':
1340 1340 msg_ids = jobs
1341 1341 else:
1342 1342 msg_ids = []
1343 1343 if isinstance(jobs, (basestring,AsyncResult)):
1344 1344 jobs = [jobs]
1345 1345 bad_ids = filter(lambda obj: not isinstance(obj, (basestring, AsyncResult)), jobs)
1346 1346 if bad_ids:
1347 1347 raise TypeError("Invalid msg_id type %r, expected str or AsyncResult"%bad_ids[0])
1348 1348 for j in jobs:
1349 1349 if isinstance(j, AsyncResult):
1350 1350 msg_ids.extend(j.msg_ids)
1351 1351 else:
1352 1352 msg_ids.append(j)
1353 1353
1354 1354 content = dict(engine_ids=targets, msg_ids=msg_ids)
1355 1355 self.session.send(self._query_socket, "purge_request", content=content)
1356 1356 idents, msg = self.session.recv(self._query_socket, 0)
1357 1357 if self.debug:
1358 1358 pprint(msg)
1359 1359 content = msg['content']
1360 1360 if content['status'] != 'ok':
1361 1361 raise self._unwrap_exception(content)
1362 1362
1363 1363 @spin_first
1364 1364 def hub_history(self):
1365 1365 """Get the Hub's history
1366 1366
1367 1367 Just like the Client, the Hub has a history, which is a list of msg_ids.
1368 1368 This will contain the history of all clients, and, depending on configuration,
1369 1369 may contain history across multiple cluster sessions.
1370 1370
1371 1371 Any msg_id returned here is a valid argument to `get_result`.
1372 1372
1373 1373 Returns
1374 1374 -------
1375 1375
1376 1376 msg_ids : list of strs
1377 1377 list of all msg_ids, ordered by task submission time.
1378 1378 """
1379 1379
1380 1380 self.session.send(self._query_socket, "history_request", content={})
1381 1381 idents, msg = self.session.recv(self._query_socket, 0)
1382 1382
1383 1383 if self.debug:
1384 1384 pprint(msg)
1385 1385 content = msg['content']
1386 1386 if content['status'] != 'ok':
1387 1387 raise self._unwrap_exception(content)
1388 1388 else:
1389 1389 return content['history']
1390 1390
1391 1391 @spin_first
1392 1392 def db_query(self, query, keys=None):
1393 1393 """Query the Hub's TaskRecord database
1394 1394
1395 1395 This will return a list of task record dicts that match `query`
1396 1396
1397 1397 Parameters
1398 1398 ----------
1399 1399
1400 1400 query : mongodb query dict
1401 1401 The search dict. See mongodb query docs for details.
1402 1402 keys : list of strs [optional]
1403 1403 The subset of keys to be returned. The default is to fetch everything but buffers.
1404 1404 'msg_id' will *always* be included.
1405 1405 """
1406 1406 if isinstance(keys, basestring):
1407 1407 keys = [keys]
1408 1408 content = dict(query=query, keys=keys)
1409 1409 self.session.send(self._query_socket, "db_request", content=content)
1410 1410 idents, msg = self.session.recv(self._query_socket, 0)
1411 1411 if self.debug:
1412 1412 pprint(msg)
1413 1413 content = msg['content']
1414 1414 if content['status'] != 'ok':
1415 1415 raise self._unwrap_exception(content)
1416 1416
1417 1417 records = content['records']
1418 1418
1419 1419 buffer_lens = content['buffer_lens']
1420 1420 result_buffer_lens = content['result_buffer_lens']
1421 1421 buffers = msg['buffers']
1422 1422 has_bufs = buffer_lens is not None
1423 1423 has_rbufs = result_buffer_lens is not None
1424 1424 for i,rec in enumerate(records):
1425 1425 # relink buffers
1426 1426 if has_bufs:
1427 1427 blen = buffer_lens[i]
1428 1428 rec['buffers'], buffers = buffers[:blen],buffers[blen:]
1429 1429 if has_rbufs:
1430 1430 blen = result_buffer_lens[i]
1431 1431 rec['result_buffers'], buffers = buffers[:blen],buffers[blen:]
1432 1432
1433 1433 return records
1434 1434
1435 1435 __all__ = [ 'Client' ]
@@ -1,173 +1,173 b''
1 1 #!/usr/bin/env python
2 2 """
3 3 A multi-heart Heartbeat system using PUB and XREP sockets. pings are sent out on the PUB,
4 4 and hearts are tracked based on their XREQ identities.
5 5
6 6 Authors:
7 7
8 8 * Min RK
9 9 """
10 10 #-----------------------------------------------------------------------------
11 11 # Copyright (C) 2010-2011 The IPython Development Team
12 12 #
13 13 # Distributed under the terms of the BSD License. The full license is in
14 14 # the file COPYING, distributed as part of this software.
15 15 #-----------------------------------------------------------------------------
16 16
17 17 from __future__ import print_function
18 18 import time
19 19 import uuid
20 20
21 21 import zmq
22 22 from zmq.devices import ThreadDevice
23 23 from zmq.eventloop import ioloop, zmqstream
24 24
25 25 from IPython.config.configurable import LoggingConfigurable
26 26 from IPython.utils.traitlets import Set, Instance, CFloat
27 27
28 28 from IPython.parallel.util import asbytes
29 29
30 30 class Heart(object):
31 31 """A basic heart object for responding to a HeartMonitor.
32 32 This is a simple wrapper with defaults for the most common
33 33 Device model for responding to heartbeats.
34 34
35 35 It simply builds a threadsafe zmq.FORWARDER Device, defaulting to using
36 36 SUB/XREQ for in/out.
37 37
38 38 You can specify the XREQ's IDENTITY via the optional heart_id argument."""
39 39 device=None
40 40 id=None
41 def __init__(self, in_addr, out_addr, in_type=zmq.SUB, out_type=zmq.XREQ, heart_id=None):
41 def __init__(self, in_addr, out_addr, in_type=zmq.SUB, out_type=zmq.DEALER, heart_id=None):
42 42 self.device = ThreadDevice(zmq.FORWARDER, in_type, out_type)
43 43 self.device.daemon=True
44 44 self.device.connect_in(in_addr)
45 45 self.device.connect_out(out_addr)
46 46 if in_type == zmq.SUB:
47 47 self.device.setsockopt_in(zmq.SUBSCRIBE, b"")
48 48 if heart_id is None:
49 49 heart_id = uuid.uuid4().bytes
50 50 self.device.setsockopt_out(zmq.IDENTITY, heart_id)
51 51 self.id = heart_id
52 52
53 53 def start(self):
54 54 return self.device.start()
55 55
56 56 class HeartMonitor(LoggingConfigurable):
57 57 """A basic HeartMonitor class
58 58 pingstream: a PUB stream
59 59 pongstream: an XREP stream
60 60 period: the period of the heartbeat in milliseconds"""
61 61
62 62 period=CFloat(1000, config=True,
63 63 help='The frequency at which the Hub pings the engines for heartbeats '
64 64 ' (in ms) [default: 100]',
65 65 )
66 66
67 67 pingstream=Instance('zmq.eventloop.zmqstream.ZMQStream')
68 68 pongstream=Instance('zmq.eventloop.zmqstream.ZMQStream')
69 69 loop = Instance('zmq.eventloop.ioloop.IOLoop')
70 70 def _loop_default(self):
71 71 return ioloop.IOLoop.instance()
72 72
73 73 # not settable:
74 74 hearts=Set()
75 75 responses=Set()
76 76 on_probation=Set()
77 77 last_ping=CFloat(0)
78 78 _new_handlers = Set()
79 79 _failure_handlers = Set()
80 80 lifetime = CFloat(0)
81 81 tic = CFloat(0)
82 82
83 83 def __init__(self, **kwargs):
84 84 super(HeartMonitor, self).__init__(**kwargs)
85 85
86 86 self.pongstream.on_recv(self.handle_pong)
87 87
88 88 def start(self):
89 89 self.caller = ioloop.PeriodicCallback(self.beat, self.period, self.loop)
90 90 self.caller.start()
91 91
92 92 def add_new_heart_handler(self, handler):
93 93 """add a new handler for new hearts"""
94 94 self.log.debug("heartbeat::new_heart_handler: %s"%handler)
95 95 self._new_handlers.add(handler)
96 96
97 97 def add_heart_failure_handler(self, handler):
98 98 """add a new handler for heart failure"""
99 99 self.log.debug("heartbeat::new heart failure handler: %s"%handler)
100 100 self._failure_handlers.add(handler)
101 101
102 102 def beat(self):
103 103 self.pongstream.flush()
104 104 self.last_ping = self.lifetime
105 105
106 106 toc = time.time()
107 107 self.lifetime += toc-self.tic
108 108 self.tic = toc
109 109 # self.log.debug("heartbeat::%s"%self.lifetime)
110 110 goodhearts = self.hearts.intersection(self.responses)
111 111 missed_beats = self.hearts.difference(goodhearts)
112 112 heartfailures = self.on_probation.intersection(missed_beats)
113 113 newhearts = self.responses.difference(goodhearts)
114 114 map(self.handle_new_heart, newhearts)
115 115 map(self.handle_heart_failure, heartfailures)
116 116 self.on_probation = missed_beats.intersection(self.hearts)
117 117 self.responses = set()
118 118 # print self.on_probation, self.hearts
119 119 # self.log.debug("heartbeat::beat %.3f, %i beating hearts"%(self.lifetime, len(self.hearts)))
120 120 self.pingstream.send(asbytes(str(self.lifetime)))
121 121
122 122 def handle_new_heart(self, heart):
123 123 if self._new_handlers:
124 124 for handler in self._new_handlers:
125 125 handler(heart)
126 126 else:
127 127 self.log.info("heartbeat::yay, got new heart %s!"%heart)
128 128 self.hearts.add(heart)
129 129
130 130 def handle_heart_failure(self, heart):
131 131 if self._failure_handlers:
132 132 for handler in self._failure_handlers:
133 133 try:
134 134 handler(heart)
135 135 except Exception as e:
136 136 self.log.error("heartbeat::Bad Handler! %s"%handler, exc_info=True)
137 137 pass
138 138 else:
139 139 self.log.info("heartbeat::Heart %s failed :("%heart)
140 140 self.hearts.remove(heart)
141 141
142 142
143 143 def handle_pong(self, msg):
144 144 "a heart just beat"
145 145 current = asbytes(str(self.lifetime))
146 146 last = asbytes(str(self.last_ping))
147 147 if msg[1] == current:
148 148 delta = time.time()-self.tic
149 149 # self.log.debug("heartbeat::heart %r took %.2f ms to respond"%(msg[0], 1000*delta))
150 150 self.responses.add(msg[0])
151 151 elif msg[1] == last:
152 152 delta = time.time()-self.tic + (self.lifetime-self.last_ping)
153 153 self.log.warn("heartbeat::heart %r missed a beat, and took %.2f ms to respond"%(msg[0], 1000*delta))
154 154 self.responses.add(msg[0])
155 155 else:
156 156 self.log.warn("heartbeat::got bad heartbeat (possibly old?): %s (current=%.3f)"%
157 157 (msg[1],self.lifetime))
158 158
159 159
160 160 if __name__ == '__main__':
161 161 loop = ioloop.IOLoop.instance()
162 162 context = zmq.Context()
163 163 pub = context.socket(zmq.PUB)
164 164 pub.bind('tcp://127.0.0.1:5555')
165 xrep = context.socket(zmq.XREP)
165 xrep = context.socket(zmq.ROUTER)
166 166 xrep.bind('tcp://127.0.0.1:5556')
167 167
168 168 outstream = zmqstream.ZMQStream(pub, loop)
169 169 instream = zmqstream.ZMQStream(xrep, loop)
170 170
171 171 hb = HeartMonitor(loop, outstream, instream)
172 172
173 173 loop.start()
@@ -1,1290 +1,1290 b''
1 1 """The IPython Controller Hub with 0MQ
2 2 This is the master object that handles connections from engines and clients,
3 3 and monitors traffic through the various queues.
4 4
5 5 Authors:
6 6
7 7 * Min RK
8 8 """
9 9 #-----------------------------------------------------------------------------
10 10 # Copyright (C) 2010 The IPython Development Team
11 11 #
12 12 # Distributed under the terms of the BSD License. The full license is in
13 13 # the file COPYING, distributed as part of this software.
14 14 #-----------------------------------------------------------------------------
15 15
16 16 #-----------------------------------------------------------------------------
17 17 # Imports
18 18 #-----------------------------------------------------------------------------
19 19 from __future__ import print_function
20 20
21 21 import sys
22 22 import time
23 23 from datetime import datetime
24 24
25 25 import zmq
26 26 from zmq.eventloop import ioloop
27 27 from zmq.eventloop.zmqstream import ZMQStream
28 28
29 29 # internal:
30 30 from IPython.utils.importstring import import_item
31 31 from IPython.utils.traitlets import (
32 32 HasTraits, Instance, Int, Unicode, Dict, Set, Tuple, CBytes, DottedObjectName
33 33 )
34 34
35 35 from IPython.parallel import error, util
36 36 from IPython.parallel.factory import RegistrationFactory
37 37
38 38 from IPython.zmq.session import SessionFactory
39 39
40 40 from .heartmonitor import HeartMonitor
41 41
42 42 #-----------------------------------------------------------------------------
43 43 # Code
44 44 #-----------------------------------------------------------------------------
45 45
46 46 def _passer(*args, **kwargs):
47 47 return
48 48
49 49 def _printer(*args, **kwargs):
50 50 print (args)
51 51 print (kwargs)
52 52
53 53 def empty_record():
54 54 """Return an empty dict with all record keys."""
55 55 return {
56 56 'msg_id' : None,
57 57 'header' : None,
58 58 'content': None,
59 59 'buffers': None,
60 60 'submitted': None,
61 61 'client_uuid' : None,
62 62 'engine_uuid' : None,
63 63 'started': None,
64 64 'completed': None,
65 65 'resubmitted': None,
66 66 'result_header' : None,
67 67 'result_content' : None,
68 68 'result_buffers' : None,
69 69 'queue' : None,
70 70 'pyin' : None,
71 71 'pyout': None,
72 72 'pyerr': None,
73 73 'stdout': '',
74 74 'stderr': '',
75 75 }
76 76
77 77 def init_record(msg):
78 78 """Initialize a TaskRecord based on a request."""
79 79 header = msg['header']
80 80 return {
81 81 'msg_id' : header['msg_id'],
82 82 'header' : header,
83 83 'content': msg['content'],
84 84 'buffers': msg['buffers'],
85 85 'submitted': header['date'],
86 86 'client_uuid' : None,
87 87 'engine_uuid' : None,
88 88 'started': None,
89 89 'completed': None,
90 90 'resubmitted': None,
91 91 'result_header' : None,
92 92 'result_content' : None,
93 93 'result_buffers' : None,
94 94 'queue' : None,
95 95 'pyin' : None,
96 96 'pyout': None,
97 97 'pyerr': None,
98 98 'stdout': '',
99 99 'stderr': '',
100 100 }
101 101
102 102
103 103 class EngineConnector(HasTraits):
104 104 """A simple object for accessing the various zmq connections of an object.
105 105 Attributes are:
106 106 id (int): engine ID
107 107 uuid (str): uuid (unused?)
108 108 queue (str): identity of queue's XREQ socket
109 109 registration (str): identity of registration XREQ socket
110 110 heartbeat (str): identity of heartbeat XREQ socket
111 111 """
112 112 id=Int(0)
113 113 queue=CBytes()
114 114 control=CBytes()
115 115 registration=CBytes()
116 116 heartbeat=CBytes()
117 117 pending=Set()
118 118
119 119 class HubFactory(RegistrationFactory):
120 120 """The Configurable for setting up a Hub."""
121 121
122 122 # port-pairs for monitoredqueues:
123 123 hb = Tuple(Int,Int,config=True,
124 124 help="""XREQ/SUB Port pair for Engine heartbeats""")
125 125 def _hb_default(self):
126 126 return tuple(util.select_random_ports(2))
127 127
128 128 mux = Tuple(Int,Int,config=True,
129 129 help="""Engine/Client Port pair for MUX queue""")
130 130
131 131 def _mux_default(self):
132 132 return tuple(util.select_random_ports(2))
133 133
134 134 task = Tuple(Int,Int,config=True,
135 135 help="""Engine/Client Port pair for Task queue""")
136 136 def _task_default(self):
137 137 return tuple(util.select_random_ports(2))
138 138
139 139 control = Tuple(Int,Int,config=True,
140 140 help="""Engine/Client Port pair for Control queue""")
141 141
142 142 def _control_default(self):
143 143 return tuple(util.select_random_ports(2))
144 144
145 145 iopub = Tuple(Int,Int,config=True,
146 146 help="""Engine/Client Port pair for IOPub relay""")
147 147
148 148 def _iopub_default(self):
149 149 return tuple(util.select_random_ports(2))
150 150
151 151 # single ports:
152 152 mon_port = Int(config=True,
153 153 help="""Monitor (SUB) port for queue traffic""")
154 154
155 155 def _mon_port_default(self):
156 156 return util.select_random_ports(1)[0]
157 157
158 158 notifier_port = Int(config=True,
159 159 help="""PUB port for sending engine status notifications""")
160 160
161 161 def _notifier_port_default(self):
162 162 return util.select_random_ports(1)[0]
163 163
164 164 engine_ip = Unicode('127.0.0.1', config=True,
165 165 help="IP on which to listen for engine connections. [default: loopback]")
166 166 engine_transport = Unicode('tcp', config=True,
167 167 help="0MQ transport for engine connections. [default: tcp]")
168 168
169 169 client_ip = Unicode('127.0.0.1', config=True,
170 170 help="IP on which to listen for client connections. [default: loopback]")
171 171 client_transport = Unicode('tcp', config=True,
172 172 help="0MQ transport for client connections. [default : tcp]")
173 173
174 174 monitor_ip = Unicode('127.0.0.1', config=True,
175 175 help="IP on which to listen for monitor messages. [default: loopback]")
176 176 monitor_transport = Unicode('tcp', config=True,
177 177 help="0MQ transport for monitor messages. [default : tcp]")
178 178
179 179 monitor_url = Unicode('')
180 180
181 181 db_class = DottedObjectName('IPython.parallel.controller.dictdb.DictDB',
182 182 config=True, help="""The class to use for the DB backend""")
183 183
184 184 # not configurable
185 185 db = Instance('IPython.parallel.controller.dictdb.BaseDB')
186 186 heartmonitor = Instance('IPython.parallel.controller.heartmonitor.HeartMonitor')
187 187
188 188 def _ip_changed(self, name, old, new):
189 189 self.engine_ip = new
190 190 self.client_ip = new
191 191 self.monitor_ip = new
192 192 self._update_monitor_url()
193 193
194 194 def _update_monitor_url(self):
195 195 self.monitor_url = "%s://%s:%i"%(self.monitor_transport, self.monitor_ip, self.mon_port)
196 196
197 197 def _transport_changed(self, name, old, new):
198 198 self.engine_transport = new
199 199 self.client_transport = new
200 200 self.monitor_transport = new
201 201 self._update_monitor_url()
202 202
203 203 def __init__(self, **kwargs):
204 204 super(HubFactory, self).__init__(**kwargs)
205 205 self._update_monitor_url()
206 206
207 207
208 208 def construct(self):
209 209 self.init_hub()
210 210
211 211 def start(self):
212 212 self.heartmonitor.start()
213 213 self.log.info("Heartmonitor started")
214 214
215 215 def init_hub(self):
216 216 """construct"""
217 217 client_iface = "%s://%s:"%(self.client_transport, self.client_ip) + "%i"
218 218 engine_iface = "%s://%s:"%(self.engine_transport, self.engine_ip) + "%i"
219 219
220 220 ctx = self.context
221 221 loop = self.loop
222 222
223 223 # Registrar socket
224 q = ZMQStream(ctx.socket(zmq.XREP), loop)
224 q = ZMQStream(ctx.socket(zmq.ROUTER), loop)
225 225 q.bind(client_iface % self.regport)
226 226 self.log.info("Hub listening on %s for registration."%(client_iface%self.regport))
227 227 if self.client_ip != self.engine_ip:
228 228 q.bind(engine_iface % self.regport)
229 229 self.log.info("Hub listening on %s for registration."%(engine_iface%self.regport))
230 230
231 231 ### Engine connections ###
232 232
233 233 # heartbeat
234 234 hpub = ctx.socket(zmq.PUB)
235 235 hpub.bind(engine_iface % self.hb[0])
236 hrep = ctx.socket(zmq.XREP)
236 hrep = ctx.socket(zmq.ROUTER)
237 237 hrep.bind(engine_iface % self.hb[1])
238 238 self.heartmonitor = HeartMonitor(loop=loop, config=self.config, log=self.log,
239 239 pingstream=ZMQStream(hpub,loop),
240 240 pongstream=ZMQStream(hrep,loop)
241 241 )
242 242
243 243 ### Client connections ###
244 244 # Notifier socket
245 245 n = ZMQStream(ctx.socket(zmq.PUB), loop)
246 246 n.bind(client_iface%self.notifier_port)
247 247
248 248 ### build and launch the queues ###
249 249
250 250 # monitor socket
251 251 sub = ctx.socket(zmq.SUB)
252 252 sub.setsockopt(zmq.SUBSCRIBE, b"")
253 253 sub.bind(self.monitor_url)
254 254 sub.bind('inproc://monitor')
255 255 sub = ZMQStream(sub, loop)
256 256
257 257 # connect the db
258 258 self.log.info('Hub using DB backend: %r'%(self.db_class.split()[-1]))
259 259 # cdir = self.config.Global.cluster_dir
260 260 self.db = import_item(str(self.db_class))(session=self.session.session,
261 261 config=self.config, log=self.log)
262 262 time.sleep(.25)
263 263 try:
264 264 scheme = self.config.TaskScheduler.scheme_name
265 265 except AttributeError:
266 266 from .scheduler import TaskScheduler
267 267 scheme = TaskScheduler.scheme_name.get_default_value()
268 268 # build connection dicts
269 269 self.engine_info = {
270 270 'control' : engine_iface%self.control[1],
271 271 'mux': engine_iface%self.mux[1],
272 272 'heartbeat': (engine_iface%self.hb[0], engine_iface%self.hb[1]),
273 273 'task' : engine_iface%self.task[1],
274 274 'iopub' : engine_iface%self.iopub[1],
275 275 # 'monitor' : engine_iface%self.mon_port,
276 276 }
277 277
278 278 self.client_info = {
279 279 'control' : client_iface%self.control[0],
280 280 'mux': client_iface%self.mux[0],
281 281 'task' : (scheme, client_iface%self.task[0]),
282 282 'iopub' : client_iface%self.iopub[0],
283 283 'notification': client_iface%self.notifier_port
284 284 }
285 285 self.log.debug("Hub engine addrs: %s"%self.engine_info)
286 286 self.log.debug("Hub client addrs: %s"%self.client_info)
287 287
288 288 # resubmit stream
289 r = ZMQStream(ctx.socket(zmq.XREQ), loop)
289 r = ZMQStream(ctx.socket(zmq.DEALER), loop)
290 290 url = util.disambiguate_url(self.client_info['task'][-1])
291 291 r.setsockopt(zmq.IDENTITY, util.asbytes(self.session.session))
292 292 r.connect(url)
293 293
294 294 self.hub = Hub(loop=loop, session=self.session, monitor=sub, heartmonitor=self.heartmonitor,
295 295 query=q, notifier=n, resubmit=r, db=self.db,
296 296 engine_info=self.engine_info, client_info=self.client_info,
297 297 log=self.log)
298 298
299 299
300 300 class Hub(SessionFactory):
301 301 """The IPython Controller Hub with 0MQ connections
302 302
303 303 Parameters
304 304 ==========
305 305 loop: zmq IOLoop instance
306 306 session: Session object
307 307 <removed> context: zmq context for creating new connections (?)
308 308 queue: ZMQStream for monitoring the command queue (SUB)
309 309 query: ZMQStream for engine registration and client queries requests (XREP)
310 310 heartbeat: HeartMonitor object checking the pulse of the engines
311 311 notifier: ZMQStream for broadcasting engine registration changes (PUB)
312 312 db: connection to db for out of memory logging of commands
313 313 NotImplemented
314 314 engine_info: dict of zmq connection information for engines to connect
315 315 to the queues.
316 316 client_info: dict of zmq connection information for engines to connect
317 317 to the queues.
318 318 """
319 319 # internal data structures:
320 320 ids=Set() # engine IDs
321 321 keytable=Dict()
322 322 by_ident=Dict()
323 323 engines=Dict()
324 324 clients=Dict()
325 325 hearts=Dict()
326 326 pending=Set()
327 327 queues=Dict() # pending msg_ids keyed by engine_id
328 328 tasks=Dict() # pending msg_ids submitted as tasks, keyed by client_id
329 329 completed=Dict() # completed msg_ids keyed by engine_id
330 330 all_completed=Set() # completed msg_ids keyed by engine_id
331 331 dead_engines=Set() # completed msg_ids keyed by engine_id
332 332 unassigned=Set() # set of task msg_ds not yet assigned a destination
333 333 incoming_registrations=Dict()
334 334 registration_timeout=Int()
335 335 _idcounter=Int(0)
336 336
337 337 # objects from constructor:
338 338 query=Instance(ZMQStream)
339 339 monitor=Instance(ZMQStream)
340 340 notifier=Instance(ZMQStream)
341 341 resubmit=Instance(ZMQStream)
342 342 heartmonitor=Instance(HeartMonitor)
343 343 db=Instance(object)
344 344 client_info=Dict()
345 345 engine_info=Dict()
346 346
347 347
348 348 def __init__(self, **kwargs):
349 349 """
350 350 # universal:
351 351 loop: IOLoop for creating future connections
352 352 session: streamsession for sending serialized data
353 353 # engine:
354 354 queue: ZMQStream for monitoring queue messages
355 355 query: ZMQStream for engine+client registration and client requests
356 356 heartbeat: HeartMonitor object for tracking engines
357 357 # extra:
358 358 db: ZMQStream for db connection (NotImplemented)
359 359 engine_info: zmq address/protocol dict for engine connections
360 360 client_info: zmq address/protocol dict for client connections
361 361 """
362 362
363 363 super(Hub, self).__init__(**kwargs)
364 364 self.registration_timeout = max(5000, 2*self.heartmonitor.period)
365 365
366 366 # validate connection dicts:
367 367 for k,v in self.client_info.iteritems():
368 368 if k == 'task':
369 369 util.validate_url_container(v[1])
370 370 else:
371 371 util.validate_url_container(v)
372 372 # util.validate_url_container(self.client_info)
373 373 util.validate_url_container(self.engine_info)
374 374
375 375 # register our callbacks
376 376 self.query.on_recv(self.dispatch_query)
377 377 self.monitor.on_recv(self.dispatch_monitor_traffic)
378 378
379 379 self.heartmonitor.add_heart_failure_handler(self.handle_heart_failure)
380 380 self.heartmonitor.add_new_heart_handler(self.handle_new_heart)
381 381
382 382 self.monitor_handlers = {b'in' : self.save_queue_request,
383 383 b'out': self.save_queue_result,
384 384 b'intask': self.save_task_request,
385 385 b'outtask': self.save_task_result,
386 386 b'tracktask': self.save_task_destination,
387 387 b'incontrol': _passer,
388 388 b'outcontrol': _passer,
389 389 b'iopub': self.save_iopub_message,
390 390 }
391 391
392 392 self.query_handlers = {'queue_request': self.queue_status,
393 393 'result_request': self.get_results,
394 394 'history_request': self.get_history,
395 395 'db_request': self.db_query,
396 396 'purge_request': self.purge_results,
397 397 'load_request': self.check_load,
398 398 'resubmit_request': self.resubmit_task,
399 399 'shutdown_request': self.shutdown_request,
400 400 'registration_request' : self.register_engine,
401 401 'unregistration_request' : self.unregister_engine,
402 402 'connection_request': self.connection_request,
403 403 }
404 404
405 405 # ignore resubmit replies
406 406 self.resubmit.on_recv(lambda msg: None, copy=False)
407 407
408 408 self.log.info("hub::created hub")
409 409
410 410 @property
411 411 def _next_id(self):
412 412 """gemerate a new ID.
413 413
414 414 No longer reuse old ids, just count from 0."""
415 415 newid = self._idcounter
416 416 self._idcounter += 1
417 417 return newid
418 418 # newid = 0
419 419 # incoming = [id[0] for id in self.incoming_registrations.itervalues()]
420 420 # # print newid, self.ids, self.incoming_registrations
421 421 # while newid in self.ids or newid in incoming:
422 422 # newid += 1
423 423 # return newid
424 424
425 425 #-----------------------------------------------------------------------------
426 426 # message validation
427 427 #-----------------------------------------------------------------------------
428 428
429 429 def _validate_targets(self, targets):
430 430 """turn any valid targets argument into a list of integer ids"""
431 431 if targets is None:
432 432 # default to all
433 433 targets = self.ids
434 434
435 435 if isinstance(targets, (int,str,unicode)):
436 436 # only one target specified
437 437 targets = [targets]
438 438 _targets = []
439 439 for t in targets:
440 440 # map raw identities to ids
441 441 if isinstance(t, (str,unicode)):
442 442 t = self.by_ident.get(t, t)
443 443 _targets.append(t)
444 444 targets = _targets
445 445 bad_targets = [ t for t in targets if t not in self.ids ]
446 446 if bad_targets:
447 447 raise IndexError("No Such Engine: %r"%bad_targets)
448 448 if not targets:
449 449 raise IndexError("No Engines Registered")
450 450 return targets
451 451
452 452 #-----------------------------------------------------------------------------
453 453 # dispatch methods (1 per stream)
454 454 #-----------------------------------------------------------------------------
455 455
456 456
457 457 def dispatch_monitor_traffic(self, msg):
458 458 """all ME and Task queue messages come through here, as well as
459 459 IOPub traffic."""
460 460 self.log.debug("monitor traffic: %r"%msg[:2])
461 461 switch = msg[0]
462 462 try:
463 463 idents, msg = self.session.feed_identities(msg[1:])
464 464 except ValueError:
465 465 idents=[]
466 466 if not idents:
467 467 self.log.error("Bad Monitor Message: %r"%msg)
468 468 return
469 469 handler = self.monitor_handlers.get(switch, None)
470 470 if handler is not None:
471 471 handler(idents, msg)
472 472 else:
473 473 self.log.error("Invalid monitor topic: %r"%switch)
474 474
475 475
476 476 def dispatch_query(self, msg):
477 477 """Route registration requests and queries from clients."""
478 478 try:
479 479 idents, msg = self.session.feed_identities(msg)
480 480 except ValueError:
481 481 idents = []
482 482 if not idents:
483 483 self.log.error("Bad Query Message: %r"%msg)
484 484 return
485 485 client_id = idents[0]
486 486 try:
487 487 msg = self.session.unserialize(msg, content=True)
488 488 except Exception:
489 489 content = error.wrap_exception()
490 490 self.log.error("Bad Query Message: %r"%msg, exc_info=True)
491 491 self.session.send(self.query, "hub_error", ident=client_id,
492 492 content=content)
493 493 return
494 494 # print client_id, header, parent, content
495 495 #switch on message type:
496 496 msg_type = msg['header']['msg_type']
497 497 self.log.info("client::client %r requested %r"%(client_id, msg_type))
498 498 handler = self.query_handlers.get(msg_type, None)
499 499 try:
500 500 assert handler is not None, "Bad Message Type: %r"%msg_type
501 501 except:
502 502 content = error.wrap_exception()
503 503 self.log.error("Bad Message Type: %r"%msg_type, exc_info=True)
504 504 self.session.send(self.query, "hub_error", ident=client_id,
505 505 content=content)
506 506 return
507 507
508 508 else:
509 509 handler(idents, msg)
510 510
511 511 def dispatch_db(self, msg):
512 512 """"""
513 513 raise NotImplementedError
514 514
515 515 #---------------------------------------------------------------------------
516 516 # handler methods (1 per event)
517 517 #---------------------------------------------------------------------------
518 518
519 519 #----------------------- Heartbeat --------------------------------------
520 520
521 521 def handle_new_heart(self, heart):
522 522 """handler to attach to heartbeater.
523 523 Called when a new heart starts to beat.
524 524 Triggers completion of registration."""
525 525 self.log.debug("heartbeat::handle_new_heart(%r)"%heart)
526 526 if heart not in self.incoming_registrations:
527 527 self.log.info("heartbeat::ignoring new heart: %r"%heart)
528 528 else:
529 529 self.finish_registration(heart)
530 530
531 531
532 532 def handle_heart_failure(self, heart):
533 533 """handler to attach to heartbeater.
534 534 called when a previously registered heart fails to respond to beat request.
535 535 triggers unregistration"""
536 536 self.log.debug("heartbeat::handle_heart_failure(%r)"%heart)
537 537 eid = self.hearts.get(heart, None)
538 538 queue = self.engines[eid].queue
539 539 if eid is None:
540 540 self.log.info("heartbeat::ignoring heart failure %r"%heart)
541 541 else:
542 542 self.unregister_engine(heart, dict(content=dict(id=eid, queue=queue)))
543 543
544 544 #----------------------- MUX Queue Traffic ------------------------------
545 545
546 546 def save_queue_request(self, idents, msg):
547 547 if len(idents) < 2:
548 548 self.log.error("invalid identity prefix: %r"%idents)
549 549 return
550 550 queue_id, client_id = idents[:2]
551 551 try:
552 552 msg = self.session.unserialize(msg)
553 553 except Exception:
554 554 self.log.error("queue::client %r sent invalid message to %r: %r"%(client_id, queue_id, msg), exc_info=True)
555 555 return
556 556
557 557 eid = self.by_ident.get(queue_id, None)
558 558 if eid is None:
559 559 self.log.error("queue::target %r not registered"%queue_id)
560 560 self.log.debug("queue:: valid are: %r"%(self.by_ident.keys()))
561 561 return
562 562 record = init_record(msg)
563 563 msg_id = record['msg_id']
564 564 # Unicode in records
565 565 record['engine_uuid'] = queue_id.decode('ascii')
566 566 record['client_uuid'] = client_id.decode('ascii')
567 567 record['queue'] = 'mux'
568 568
569 569 try:
570 570 # it's posible iopub arrived first:
571 571 existing = self.db.get_record(msg_id)
572 572 for key,evalue in existing.iteritems():
573 573 rvalue = record.get(key, None)
574 574 if evalue and rvalue and evalue != rvalue:
575 575 self.log.warn("conflicting initial state for record: %r:%r <%r> %r"%(msg_id, rvalue, key, evalue))
576 576 elif evalue and not rvalue:
577 577 record[key] = evalue
578 578 try:
579 579 self.db.update_record(msg_id, record)
580 580 except Exception:
581 581 self.log.error("DB Error updating record %r"%msg_id, exc_info=True)
582 582 except KeyError:
583 583 try:
584 584 self.db.add_record(msg_id, record)
585 585 except Exception:
586 586 self.log.error("DB Error adding record %r"%msg_id, exc_info=True)
587 587
588 588
589 589 self.pending.add(msg_id)
590 590 self.queues[eid].append(msg_id)
591 591
592 592 def save_queue_result(self, idents, msg):
593 593 if len(idents) < 2:
594 594 self.log.error("invalid identity prefix: %r"%idents)
595 595 return
596 596
597 597 client_id, queue_id = idents[:2]
598 598 try:
599 599 msg = self.session.unserialize(msg)
600 600 except Exception:
601 601 self.log.error("queue::engine %r sent invalid message to %r: %r"%(
602 602 queue_id,client_id, msg), exc_info=True)
603 603 return
604 604
605 605 eid = self.by_ident.get(queue_id, None)
606 606 if eid is None:
607 607 self.log.error("queue::unknown engine %r is sending a reply: "%queue_id)
608 608 return
609 609
610 610 parent = msg['parent_header']
611 611 if not parent:
612 612 return
613 613 msg_id = parent['msg_id']
614 614 if msg_id in self.pending:
615 615 self.pending.remove(msg_id)
616 616 self.all_completed.add(msg_id)
617 617 self.queues[eid].remove(msg_id)
618 618 self.completed[eid].append(msg_id)
619 619 elif msg_id not in self.all_completed:
620 620 # it could be a result from a dead engine that died before delivering the
621 621 # result
622 622 self.log.warn("queue:: unknown msg finished %r"%msg_id)
623 623 return
624 624 # update record anyway, because the unregistration could have been premature
625 625 rheader = msg['header']
626 626 completed = rheader['date']
627 627 started = rheader.get('started', None)
628 628 result = {
629 629 'result_header' : rheader,
630 630 'result_content': msg['content'],
631 631 'started' : started,
632 632 'completed' : completed
633 633 }
634 634
635 635 result['result_buffers'] = msg['buffers']
636 636 try:
637 637 self.db.update_record(msg_id, result)
638 638 except Exception:
639 639 self.log.error("DB Error updating record %r"%msg_id, exc_info=True)
640 640
641 641
642 642 #--------------------- Task Queue Traffic ------------------------------
643 643
644 644 def save_task_request(self, idents, msg):
645 645 """Save the submission of a task."""
646 646 client_id = idents[0]
647 647
648 648 try:
649 649 msg = self.session.unserialize(msg)
650 650 except Exception:
651 651 self.log.error("task::client %r sent invalid task message: %r"%(
652 652 client_id, msg), exc_info=True)
653 653 return
654 654 record = init_record(msg)
655 655
656 656 record['client_uuid'] = client_id
657 657 record['queue'] = 'task'
658 658 header = msg['header']
659 659 msg_id = header['msg_id']
660 660 self.pending.add(msg_id)
661 661 self.unassigned.add(msg_id)
662 662 try:
663 663 # it's posible iopub arrived first:
664 664 existing = self.db.get_record(msg_id)
665 665 if existing['resubmitted']:
666 666 for key in ('submitted', 'client_uuid', 'buffers'):
667 667 # don't clobber these keys on resubmit
668 668 # submitted and client_uuid should be different
669 669 # and buffers might be big, and shouldn't have changed
670 670 record.pop(key)
671 671 # still check content,header which should not change
672 672 # but are not expensive to compare as buffers
673 673
674 674 for key,evalue in existing.iteritems():
675 675 if key.endswith('buffers'):
676 676 # don't compare buffers
677 677 continue
678 678 rvalue = record.get(key, None)
679 679 if evalue and rvalue and evalue != rvalue:
680 680 self.log.warn("conflicting initial state for record: %r:%r <%r> %r"%(msg_id, rvalue, key, evalue))
681 681 elif evalue and not rvalue:
682 682 record[key] = evalue
683 683 try:
684 684 self.db.update_record(msg_id, record)
685 685 except Exception:
686 686 self.log.error("DB Error updating record %r"%msg_id, exc_info=True)
687 687 except KeyError:
688 688 try:
689 689 self.db.add_record(msg_id, record)
690 690 except Exception:
691 691 self.log.error("DB Error adding record %r"%msg_id, exc_info=True)
692 692 except Exception:
693 693 self.log.error("DB Error saving task request %r"%msg_id, exc_info=True)
694 694
695 695 def save_task_result(self, idents, msg):
696 696 """save the result of a completed task."""
697 697 client_id = idents[0]
698 698 try:
699 699 msg = self.session.unserialize(msg)
700 700 except Exception:
701 701 self.log.error("task::invalid task result message send to %r: %r"%(
702 702 client_id, msg), exc_info=True)
703 703 return
704 704
705 705 parent = msg['parent_header']
706 706 if not parent:
707 707 # print msg
708 708 self.log.warn("Task %r had no parent!"%msg)
709 709 return
710 710 msg_id = parent['msg_id']
711 711 if msg_id in self.unassigned:
712 712 self.unassigned.remove(msg_id)
713 713
714 714 header = msg['header']
715 715 engine_uuid = header.get('engine', None)
716 716 eid = self.by_ident.get(engine_uuid, None)
717 717
718 718 if msg_id in self.pending:
719 719 self.pending.remove(msg_id)
720 720 self.all_completed.add(msg_id)
721 721 if eid is not None:
722 722 self.completed[eid].append(msg_id)
723 723 if msg_id in self.tasks[eid]:
724 724 self.tasks[eid].remove(msg_id)
725 725 completed = header['date']
726 726 started = header.get('started', None)
727 727 result = {
728 728 'result_header' : header,
729 729 'result_content': msg['content'],
730 730 'started' : started,
731 731 'completed' : completed,
732 732 'engine_uuid': engine_uuid
733 733 }
734 734
735 735 result['result_buffers'] = msg['buffers']
736 736 try:
737 737 self.db.update_record(msg_id, result)
738 738 except Exception:
739 739 self.log.error("DB Error saving task request %r"%msg_id, exc_info=True)
740 740
741 741 else:
742 742 self.log.debug("task::unknown task %r finished"%msg_id)
743 743
744 744 def save_task_destination(self, idents, msg):
745 745 try:
746 746 msg = self.session.unserialize(msg, content=True)
747 747 except Exception:
748 748 self.log.error("task::invalid task tracking message", exc_info=True)
749 749 return
750 750 content = msg['content']
751 751 # print (content)
752 752 msg_id = content['msg_id']
753 753 engine_uuid = content['engine_id']
754 754 eid = self.by_ident[util.asbytes(engine_uuid)]
755 755
756 756 self.log.info("task::task %r arrived on %r"%(msg_id, eid))
757 757 if msg_id in self.unassigned:
758 758 self.unassigned.remove(msg_id)
759 759 # else:
760 760 # self.log.debug("task::task %r not listed as MIA?!"%(msg_id))
761 761
762 762 self.tasks[eid].append(msg_id)
763 763 # self.pending[msg_id][1].update(received=datetime.now(),engine=(eid,engine_uuid))
764 764 try:
765 765 self.db.update_record(msg_id, dict(engine_uuid=engine_uuid))
766 766 except Exception:
767 767 self.log.error("DB Error saving task destination %r"%msg_id, exc_info=True)
768 768
769 769
770 770 def mia_task_request(self, idents, msg):
771 771 raise NotImplementedError
772 772 client_id = idents[0]
773 773 # content = dict(mia=self.mia,status='ok')
774 774 # self.session.send('mia_reply', content=content, idents=client_id)
775 775
776 776
777 777 #--------------------- IOPub Traffic ------------------------------
778 778
779 779 def save_iopub_message(self, topics, msg):
780 780 """save an iopub message into the db"""
781 781 # print (topics)
782 782 try:
783 783 msg = self.session.unserialize(msg, content=True)
784 784 except Exception:
785 785 self.log.error("iopub::invalid IOPub message", exc_info=True)
786 786 return
787 787
788 788 parent = msg['parent_header']
789 789 if not parent:
790 790 self.log.error("iopub::invalid IOPub message: %r"%msg)
791 791 return
792 792 msg_id = parent['msg_id']
793 793 msg_type = msg['header']['msg_type']
794 794 content = msg['content']
795 795
796 796 # ensure msg_id is in db
797 797 try:
798 798 rec = self.db.get_record(msg_id)
799 799 except KeyError:
800 800 rec = empty_record()
801 801 rec['msg_id'] = msg_id
802 802 self.db.add_record(msg_id, rec)
803 803 # stream
804 804 d = {}
805 805 if msg_type == 'stream':
806 806 name = content['name']
807 807 s = rec[name] or ''
808 808 d[name] = s + content['data']
809 809
810 810 elif msg_type == 'pyerr':
811 811 d['pyerr'] = content
812 812 elif msg_type == 'pyin':
813 813 d['pyin'] = content['code']
814 814 else:
815 815 d[msg_type] = content.get('data', '')
816 816
817 817 try:
818 818 self.db.update_record(msg_id, d)
819 819 except Exception:
820 820 self.log.error("DB Error saving iopub message %r"%msg_id, exc_info=True)
821 821
822 822
823 823
824 824 #-------------------------------------------------------------------------
825 825 # Registration requests
826 826 #-------------------------------------------------------------------------
827 827
828 828 def connection_request(self, client_id, msg):
829 829 """Reply with connection addresses for clients."""
830 830 self.log.info("client::client %r connected"%client_id)
831 831 content = dict(status='ok')
832 832 content.update(self.client_info)
833 833 jsonable = {}
834 834 for k,v in self.keytable.iteritems():
835 835 if v not in self.dead_engines:
836 836 jsonable[str(k)] = v.decode('ascii')
837 837 content['engines'] = jsonable
838 838 self.session.send(self.query, 'connection_reply', content, parent=msg, ident=client_id)
839 839
840 840 def register_engine(self, reg, msg):
841 841 """Register a new engine."""
842 842 content = msg['content']
843 843 try:
844 844 queue = util.asbytes(content['queue'])
845 845 except KeyError:
846 846 self.log.error("registration::queue not specified", exc_info=True)
847 847 return
848 848 heart = content.get('heartbeat', None)
849 849 if heart:
850 850 heart = util.asbytes(heart)
851 851 """register a new engine, and create the socket(s) necessary"""
852 852 eid = self._next_id
853 853 # print (eid, queue, reg, heart)
854 854
855 855 self.log.debug("registration::register_engine(%i, %r, %r, %r)"%(eid, queue, reg, heart))
856 856
857 857 content = dict(id=eid,status='ok')
858 858 content.update(self.engine_info)
859 859 # check if requesting available IDs:
860 860 if queue in self.by_ident:
861 861 try:
862 862 raise KeyError("queue_id %r in use"%queue)
863 863 except:
864 864 content = error.wrap_exception()
865 865 self.log.error("queue_id %r in use"%queue, exc_info=True)
866 866 elif heart in self.hearts: # need to check unique hearts?
867 867 try:
868 868 raise KeyError("heart_id %r in use"%heart)
869 869 except:
870 870 self.log.error("heart_id %r in use"%heart, exc_info=True)
871 871 content = error.wrap_exception()
872 872 else:
873 873 for h, pack in self.incoming_registrations.iteritems():
874 874 if heart == h:
875 875 try:
876 876 raise KeyError("heart_id %r in use"%heart)
877 877 except:
878 878 self.log.error("heart_id %r in use"%heart, exc_info=True)
879 879 content = error.wrap_exception()
880 880 break
881 881 elif queue == pack[1]:
882 882 try:
883 883 raise KeyError("queue_id %r in use"%queue)
884 884 except:
885 885 self.log.error("queue_id %r in use"%queue, exc_info=True)
886 886 content = error.wrap_exception()
887 887 break
888 888
889 889 msg = self.session.send(self.query, "registration_reply",
890 890 content=content,
891 891 ident=reg)
892 892
893 893 if content['status'] == 'ok':
894 894 if heart in self.heartmonitor.hearts:
895 895 # already beating
896 896 self.incoming_registrations[heart] = (eid,queue,reg[0],None)
897 897 self.finish_registration(heart)
898 898 else:
899 899 purge = lambda : self._purge_stalled_registration(heart)
900 900 dc = ioloop.DelayedCallback(purge, self.registration_timeout, self.loop)
901 901 dc.start()
902 902 self.incoming_registrations[heart] = (eid,queue,reg[0],dc)
903 903 else:
904 904 self.log.error("registration::registration %i failed: %r"%(eid, content['evalue']))
905 905 return eid
906 906
907 907 def unregister_engine(self, ident, msg):
908 908 """Unregister an engine that explicitly requested to leave."""
909 909 try:
910 910 eid = msg['content']['id']
911 911 except:
912 912 self.log.error("registration::bad engine id for unregistration: %r"%ident, exc_info=True)
913 913 return
914 914 self.log.info("registration::unregister_engine(%r)"%eid)
915 915 # print (eid)
916 916 uuid = self.keytable[eid]
917 917 content=dict(id=eid, queue=uuid.decode('ascii'))
918 918 self.dead_engines.add(uuid)
919 919 # self.ids.remove(eid)
920 920 # uuid = self.keytable.pop(eid)
921 921 #
922 922 # ec = self.engines.pop(eid)
923 923 # self.hearts.pop(ec.heartbeat)
924 924 # self.by_ident.pop(ec.queue)
925 925 # self.completed.pop(eid)
926 926 handleit = lambda : self._handle_stranded_msgs(eid, uuid)
927 927 dc = ioloop.DelayedCallback(handleit, self.registration_timeout, self.loop)
928 928 dc.start()
929 929 ############## TODO: HANDLE IT ################
930 930
931 931 if self.notifier:
932 932 self.session.send(self.notifier, "unregistration_notification", content=content)
933 933
934 934 def _handle_stranded_msgs(self, eid, uuid):
935 935 """Handle messages known to be on an engine when the engine unregisters.
936 936
937 937 It is possible that this will fire prematurely - that is, an engine will
938 938 go down after completing a result, and the client will be notified
939 939 that the result failed and later receive the actual result.
940 940 """
941 941
942 942 outstanding = self.queues[eid]
943 943
944 944 for msg_id in outstanding:
945 945 self.pending.remove(msg_id)
946 946 self.all_completed.add(msg_id)
947 947 try:
948 948 raise error.EngineError("Engine %r died while running task %r"%(eid, msg_id))
949 949 except:
950 950 content = error.wrap_exception()
951 951 # build a fake header:
952 952 header = {}
953 953 header['engine'] = uuid
954 954 header['date'] = datetime.now()
955 955 rec = dict(result_content=content, result_header=header, result_buffers=[])
956 956 rec['completed'] = header['date']
957 957 rec['engine_uuid'] = uuid
958 958 try:
959 959 self.db.update_record(msg_id, rec)
960 960 except Exception:
961 961 self.log.error("DB Error handling stranded msg %r"%msg_id, exc_info=True)
962 962
963 963
964 964 def finish_registration(self, heart):
965 965 """Second half of engine registration, called after our HeartMonitor
966 966 has received a beat from the Engine's Heart."""
967 967 try:
968 968 (eid,queue,reg,purge) = self.incoming_registrations.pop(heart)
969 969 except KeyError:
970 970 self.log.error("registration::tried to finish nonexistant registration", exc_info=True)
971 971 return
972 972 self.log.info("registration::finished registering engine %i:%r"%(eid,queue))
973 973 if purge is not None:
974 974 purge.stop()
975 975 control = queue
976 976 self.ids.add(eid)
977 977 self.keytable[eid] = queue
978 978 self.engines[eid] = EngineConnector(id=eid, queue=queue, registration=reg,
979 979 control=control, heartbeat=heart)
980 980 self.by_ident[queue] = eid
981 981 self.queues[eid] = list()
982 982 self.tasks[eid] = list()
983 983 self.completed[eid] = list()
984 984 self.hearts[heart] = eid
985 985 content = dict(id=eid, queue=self.engines[eid].queue.decode('ascii'))
986 986 if self.notifier:
987 987 self.session.send(self.notifier, "registration_notification", content=content)
988 988 self.log.info("engine::Engine Connected: %i"%eid)
989 989
990 990 def _purge_stalled_registration(self, heart):
991 991 if heart in self.incoming_registrations:
992 992 eid = self.incoming_registrations.pop(heart)[0]
993 993 self.log.info("registration::purging stalled registration: %i"%eid)
994 994 else:
995 995 pass
996 996
997 997 #-------------------------------------------------------------------------
998 998 # Client Requests
999 999 #-------------------------------------------------------------------------
1000 1000
1001 1001 def shutdown_request(self, client_id, msg):
1002 1002 """handle shutdown request."""
1003 1003 self.session.send(self.query, 'shutdown_reply', content={'status': 'ok'}, ident=client_id)
1004 1004 # also notify other clients of shutdown
1005 1005 self.session.send(self.notifier, 'shutdown_notice', content={'status': 'ok'})
1006 1006 dc = ioloop.DelayedCallback(lambda : self._shutdown(), 1000, self.loop)
1007 1007 dc.start()
1008 1008
1009 1009 def _shutdown(self):
1010 1010 self.log.info("hub::hub shutting down.")
1011 1011 time.sleep(0.1)
1012 1012 sys.exit(0)
1013 1013
1014 1014
1015 1015 def check_load(self, client_id, msg):
1016 1016 content = msg['content']
1017 1017 try:
1018 1018 targets = content['targets']
1019 1019 targets = self._validate_targets(targets)
1020 1020 except:
1021 1021 content = error.wrap_exception()
1022 1022 self.session.send(self.query, "hub_error",
1023 1023 content=content, ident=client_id)
1024 1024 return
1025 1025
1026 1026 content = dict(status='ok')
1027 1027 # loads = {}
1028 1028 for t in targets:
1029 1029 content[bytes(t)] = len(self.queues[t])+len(self.tasks[t])
1030 1030 self.session.send(self.query, "load_reply", content=content, ident=client_id)
1031 1031
1032 1032
1033 1033 def queue_status(self, client_id, msg):
1034 1034 """Return the Queue status of one or more targets.
1035 1035 if verbose: return the msg_ids
1036 1036 else: return len of each type.
1037 1037 keys: queue (pending MUX jobs)
1038 1038 tasks (pending Task jobs)
1039 1039 completed (finished jobs from both queues)"""
1040 1040 content = msg['content']
1041 1041 targets = content['targets']
1042 1042 try:
1043 1043 targets = self._validate_targets(targets)
1044 1044 except:
1045 1045 content = error.wrap_exception()
1046 1046 self.session.send(self.query, "hub_error",
1047 1047 content=content, ident=client_id)
1048 1048 return
1049 1049 verbose = content.get('verbose', False)
1050 1050 content = dict(status='ok')
1051 1051 for t in targets:
1052 1052 queue = self.queues[t]
1053 1053 completed = self.completed[t]
1054 1054 tasks = self.tasks[t]
1055 1055 if not verbose:
1056 1056 queue = len(queue)
1057 1057 completed = len(completed)
1058 1058 tasks = len(tasks)
1059 1059 content[str(t)] = {'queue': queue, 'completed': completed , 'tasks': tasks}
1060 1060 content['unassigned'] = list(self.unassigned) if verbose else len(self.unassigned)
1061 1061 # print (content)
1062 1062 self.session.send(self.query, "queue_reply", content=content, ident=client_id)
1063 1063
1064 1064 def purge_results(self, client_id, msg):
1065 1065 """Purge results from memory. This method is more valuable before we move
1066 1066 to a DB based message storage mechanism."""
1067 1067 content = msg['content']
1068 1068 self.log.info("Dropping records with %s", content)
1069 1069 msg_ids = content.get('msg_ids', [])
1070 1070 reply = dict(status='ok')
1071 1071 if msg_ids == 'all':
1072 1072 try:
1073 1073 self.db.drop_matching_records(dict(completed={'$ne':None}))
1074 1074 except Exception:
1075 1075 reply = error.wrap_exception()
1076 1076 else:
1077 1077 pending = filter(lambda m: m in self.pending, msg_ids)
1078 1078 if pending:
1079 1079 try:
1080 1080 raise IndexError("msg pending: %r"%pending[0])
1081 1081 except:
1082 1082 reply = error.wrap_exception()
1083 1083 else:
1084 1084 try:
1085 1085 self.db.drop_matching_records(dict(msg_id={'$in':msg_ids}))
1086 1086 except Exception:
1087 1087 reply = error.wrap_exception()
1088 1088
1089 1089 if reply['status'] == 'ok':
1090 1090 eids = content.get('engine_ids', [])
1091 1091 for eid in eids:
1092 1092 if eid not in self.engines:
1093 1093 try:
1094 1094 raise IndexError("No such engine: %i"%eid)
1095 1095 except:
1096 1096 reply = error.wrap_exception()
1097 1097 break
1098 1098 uid = self.engines[eid].queue
1099 1099 try:
1100 1100 self.db.drop_matching_records(dict(engine_uuid=uid, completed={'$ne':None}))
1101 1101 except Exception:
1102 1102 reply = error.wrap_exception()
1103 1103 break
1104 1104
1105 1105 self.session.send(self.query, 'purge_reply', content=reply, ident=client_id)
1106 1106
1107 1107 def resubmit_task(self, client_id, msg):
1108 1108 """Resubmit one or more tasks."""
1109 1109 def finish(reply):
1110 1110 self.session.send(self.query, 'resubmit_reply', content=reply, ident=client_id)
1111 1111
1112 1112 content = msg['content']
1113 1113 msg_ids = content['msg_ids']
1114 1114 reply = dict(status='ok')
1115 1115 try:
1116 1116 records = self.db.find_records({'msg_id' : {'$in' : msg_ids}}, keys=[
1117 1117 'header', 'content', 'buffers'])
1118 1118 except Exception:
1119 1119 self.log.error('db::db error finding tasks to resubmit', exc_info=True)
1120 1120 return finish(error.wrap_exception())
1121 1121
1122 1122 # validate msg_ids
1123 1123 found_ids = [ rec['msg_id'] for rec in records ]
1124 1124 invalid_ids = filter(lambda m: m in self.pending, found_ids)
1125 1125 if len(records) > len(msg_ids):
1126 1126 try:
1127 1127 raise RuntimeError("DB appears to be in an inconsistent state."
1128 1128 "More matching records were found than should exist")
1129 1129 except Exception:
1130 1130 return finish(error.wrap_exception())
1131 1131 elif len(records) < len(msg_ids):
1132 1132 missing = [ m for m in msg_ids if m not in found_ids ]
1133 1133 try:
1134 1134 raise KeyError("No such msg(s): %r"%missing)
1135 1135 except KeyError:
1136 1136 return finish(error.wrap_exception())
1137 1137 elif invalid_ids:
1138 1138 msg_id = invalid_ids[0]
1139 1139 try:
1140 1140 raise ValueError("Task %r appears to be inflight"%(msg_id))
1141 1141 except Exception:
1142 1142 return finish(error.wrap_exception())
1143 1143
1144 1144 # clear the existing records
1145 1145 now = datetime.now()
1146 1146 rec = empty_record()
1147 1147 map(rec.pop, ['msg_id', 'header', 'content', 'buffers', 'submitted'])
1148 1148 rec['resubmitted'] = now
1149 1149 rec['queue'] = 'task'
1150 1150 rec['client_uuid'] = client_id[0]
1151 1151 try:
1152 1152 for msg_id in msg_ids:
1153 1153 self.all_completed.discard(msg_id)
1154 1154 self.db.update_record(msg_id, rec)
1155 1155 except Exception:
1156 1156 self.log.error('db::db error upating record', exc_info=True)
1157 1157 reply = error.wrap_exception()
1158 1158 else:
1159 1159 # send the messages
1160 1160 for rec in records:
1161 1161 header = rec['header']
1162 1162 # include resubmitted in header to prevent digest collision
1163 1163 header['resubmitted'] = now
1164 1164 msg = self.session.msg(header['msg_type'])
1165 1165 msg['content'] = rec['content']
1166 1166 msg['header'] = header
1167 1167 msg['header']['msg_id'] = rec['msg_id']
1168 1168 self.session.send(self.resubmit, msg, buffers=rec['buffers'])
1169 1169
1170 1170 finish(dict(status='ok'))
1171 1171
1172 1172
1173 1173 def _extract_record(self, rec):
1174 1174 """decompose a TaskRecord dict into subsection of reply for get_result"""
1175 1175 io_dict = {}
1176 1176 for key in 'pyin pyout pyerr stdout stderr'.split():
1177 1177 io_dict[key] = rec[key]
1178 1178 content = { 'result_content': rec['result_content'],
1179 1179 'header': rec['header'],
1180 1180 'result_header' : rec['result_header'],
1181 1181 'io' : io_dict,
1182 1182 }
1183 1183 if rec['result_buffers']:
1184 1184 buffers = map(bytes, rec['result_buffers'])
1185 1185 else:
1186 1186 buffers = []
1187 1187
1188 1188 return content, buffers
1189 1189
1190 1190 def get_results(self, client_id, msg):
1191 1191 """Get the result of 1 or more messages."""
1192 1192 content = msg['content']
1193 1193 msg_ids = sorted(set(content['msg_ids']))
1194 1194 statusonly = content.get('status_only', False)
1195 1195 pending = []
1196 1196 completed = []
1197 1197 content = dict(status='ok')
1198 1198 content['pending'] = pending
1199 1199 content['completed'] = completed
1200 1200 buffers = []
1201 1201 if not statusonly:
1202 1202 try:
1203 1203 matches = self.db.find_records(dict(msg_id={'$in':msg_ids}))
1204 1204 # turn match list into dict, for faster lookup
1205 1205 records = {}
1206 1206 for rec in matches:
1207 1207 records[rec['msg_id']] = rec
1208 1208 except Exception:
1209 1209 content = error.wrap_exception()
1210 1210 self.session.send(self.query, "result_reply", content=content,
1211 1211 parent=msg, ident=client_id)
1212 1212 return
1213 1213 else:
1214 1214 records = {}
1215 1215 for msg_id in msg_ids:
1216 1216 if msg_id in self.pending:
1217 1217 pending.append(msg_id)
1218 1218 elif msg_id in self.all_completed:
1219 1219 completed.append(msg_id)
1220 1220 if not statusonly:
1221 1221 c,bufs = self._extract_record(records[msg_id])
1222 1222 content[msg_id] = c
1223 1223 buffers.extend(bufs)
1224 1224 elif msg_id in records:
1225 1225 if rec['completed']:
1226 1226 completed.append(msg_id)
1227 1227 c,bufs = self._extract_record(records[msg_id])
1228 1228 content[msg_id] = c
1229 1229 buffers.extend(bufs)
1230 1230 else:
1231 1231 pending.append(msg_id)
1232 1232 else:
1233 1233 try:
1234 1234 raise KeyError('No such message: '+msg_id)
1235 1235 except:
1236 1236 content = error.wrap_exception()
1237 1237 break
1238 1238 self.session.send(self.query, "result_reply", content=content,
1239 1239 parent=msg, ident=client_id,
1240 1240 buffers=buffers)
1241 1241
1242 1242 def get_history(self, client_id, msg):
1243 1243 """Get a list of all msg_ids in our DB records"""
1244 1244 try:
1245 1245 msg_ids = self.db.get_history()
1246 1246 except Exception as e:
1247 1247 content = error.wrap_exception()
1248 1248 else:
1249 1249 content = dict(status='ok', history=msg_ids)
1250 1250
1251 1251 self.session.send(self.query, "history_reply", content=content,
1252 1252 parent=msg, ident=client_id)
1253 1253
1254 1254 def db_query(self, client_id, msg):
1255 1255 """Perform a raw query on the task record database."""
1256 1256 content = msg['content']
1257 1257 query = content.get('query', {})
1258 1258 keys = content.get('keys', None)
1259 1259 buffers = []
1260 1260 empty = list()
1261 1261 try:
1262 1262 records = self.db.find_records(query, keys)
1263 1263 except Exception as e:
1264 1264 content = error.wrap_exception()
1265 1265 else:
1266 1266 # extract buffers from reply content:
1267 1267 if keys is not None:
1268 1268 buffer_lens = [] if 'buffers' in keys else None
1269 1269 result_buffer_lens = [] if 'result_buffers' in keys else None
1270 1270 else:
1271 1271 buffer_lens = []
1272 1272 result_buffer_lens = []
1273 1273
1274 1274 for rec in records:
1275 1275 # buffers may be None, so double check
1276 1276 if buffer_lens is not None:
1277 1277 b = rec.pop('buffers', empty) or empty
1278 1278 buffer_lens.append(len(b))
1279 1279 buffers.extend(b)
1280 1280 if result_buffer_lens is not None:
1281 1281 rb = rec.pop('result_buffers', empty) or empty
1282 1282 result_buffer_lens.append(len(rb))
1283 1283 buffers.extend(rb)
1284 1284 content = dict(status='ok', records=records, buffer_lens=buffer_lens,
1285 1285 result_buffer_lens=result_buffer_lens)
1286 1286 # self.log.debug (content)
1287 1287 self.session.send(self.query, "db_reply", content=content,
1288 1288 parent=msg, ident=client_id,
1289 1289 buffers=buffers)
1290 1290
@@ -1,714 +1,714 b''
1 1 """The Python scheduler for rich scheduling.
2 2
3 3 The Pure ZMQ scheduler does not allow routing schemes other than LRU,
4 4 nor does it check msg_id DAG dependencies. For those, a slightly slower
5 5 Python Scheduler exists.
6 6
7 7 Authors:
8 8
9 9 * Min RK
10 10 """
11 11 #-----------------------------------------------------------------------------
12 12 # Copyright (C) 2010-2011 The IPython Development Team
13 13 #
14 14 # Distributed under the terms of the BSD License. The full license is in
15 15 # the file COPYING, distributed as part of this software.
16 16 #-----------------------------------------------------------------------------
17 17
18 18 #----------------------------------------------------------------------
19 19 # Imports
20 20 #----------------------------------------------------------------------
21 21
22 22 from __future__ import print_function
23 23
24 24 import logging
25 25 import sys
26 26
27 27 from datetime import datetime, timedelta
28 28 from random import randint, random
29 29 from types import FunctionType
30 30
31 31 try:
32 32 import numpy
33 33 except ImportError:
34 34 numpy = None
35 35
36 36 import zmq
37 37 from zmq.eventloop import ioloop, zmqstream
38 38
39 39 # local imports
40 40 from IPython.external.decorator import decorator
41 41 from IPython.config.application import Application
42 42 from IPython.config.loader import Config
43 43 from IPython.utils.traitlets import Instance, Dict, List, Set, Int, Enum, CBytes
44 44
45 45 from IPython.parallel import error
46 46 from IPython.parallel.factory import SessionFactory
47 47 from IPython.parallel.util import connect_logger, local_logger, asbytes
48 48
49 49 from .dependency import Dependency
50 50
51 51 @decorator
52 52 def logged(f,self,*args,**kwargs):
53 53 # print ("#--------------------")
54 54 self.log.debug("scheduler::%s(*%s,**%s)", f.func_name, args, kwargs)
55 55 # print ("#--")
56 56 return f(self,*args, **kwargs)
57 57
58 58 #----------------------------------------------------------------------
59 59 # Chooser functions
60 60 #----------------------------------------------------------------------
61 61
62 62 def plainrandom(loads):
63 63 """Plain random pick."""
64 64 n = len(loads)
65 65 return randint(0,n-1)
66 66
67 67 def lru(loads):
68 68 """Always pick the front of the line.
69 69
70 70 The content of `loads` is ignored.
71 71
72 72 Assumes LRU ordering of loads, with oldest first.
73 73 """
74 74 return 0
75 75
76 76 def twobin(loads):
77 77 """Pick two at random, use the LRU of the two.
78 78
79 79 The content of loads is ignored.
80 80
81 81 Assumes LRU ordering of loads, with oldest first.
82 82 """
83 83 n = len(loads)
84 84 a = randint(0,n-1)
85 85 b = randint(0,n-1)
86 86 return min(a,b)
87 87
88 88 def weighted(loads):
89 89 """Pick two at random using inverse load as weight.
90 90
91 91 Return the less loaded of the two.
92 92 """
93 93 # weight 0 a million times more than 1:
94 94 weights = 1./(1e-6+numpy.array(loads))
95 95 sums = weights.cumsum()
96 96 t = sums[-1]
97 97 x = random()*t
98 98 y = random()*t
99 99 idx = 0
100 100 idy = 0
101 101 while sums[idx] < x:
102 102 idx += 1
103 103 while sums[idy] < y:
104 104 idy += 1
105 105 if weights[idy] > weights[idx]:
106 106 return idy
107 107 else:
108 108 return idx
109 109
110 110 def leastload(loads):
111 111 """Always choose the lowest load.
112 112
113 113 If the lowest load occurs more than once, the first
114 114 occurance will be used. If loads has LRU ordering, this means
115 115 the LRU of those with the lowest load is chosen.
116 116 """
117 117 return loads.index(min(loads))
118 118
119 119 #---------------------------------------------------------------------
120 120 # Classes
121 121 #---------------------------------------------------------------------
122 122 # store empty default dependency:
123 123 MET = Dependency([])
124 124
125 125 class TaskScheduler(SessionFactory):
126 126 """Python TaskScheduler object.
127 127
128 128 This is the simplest object that supports msg_id based
129 129 DAG dependencies. *Only* task msg_ids are checked, not
130 130 msg_ids of jobs submitted via the MUX queue.
131 131
132 132 """
133 133
134 134 hwm = Int(0, config=True, shortname='hwm',
135 135 help="""specify the High Water Mark (HWM) for the downstream
136 136 socket in the Task scheduler. This is the maximum number
137 137 of allowed outstanding tasks on each engine."""
138 138 )
139 139 scheme_name = Enum(('leastload', 'pure', 'lru', 'plainrandom', 'weighted', 'twobin'),
140 140 'leastload', config=True, shortname='scheme', allow_none=False,
141 141 help="""select the task scheduler scheme [default: Python LRU]
142 142 Options are: 'pure', 'lru', 'plainrandom', 'weighted', 'twobin','leastload'"""
143 143 )
144 144 def _scheme_name_changed(self, old, new):
145 145 self.log.debug("Using scheme %r"%new)
146 146 self.scheme = globals()[new]
147 147
148 148 # input arguments:
149 149 scheme = Instance(FunctionType) # function for determining the destination
150 150 def _scheme_default(self):
151 151 return leastload
152 152 client_stream = Instance(zmqstream.ZMQStream) # client-facing stream
153 153 engine_stream = Instance(zmqstream.ZMQStream) # engine-facing stream
154 154 notifier_stream = Instance(zmqstream.ZMQStream) # hub-facing sub stream
155 155 mon_stream = Instance(zmqstream.ZMQStream) # hub-facing pub stream
156 156
157 157 # internals:
158 158 graph = Dict() # dict by msg_id of [ msg_ids that depend on key ]
159 159 retries = Dict() # dict by msg_id of retries remaining (non-neg ints)
160 160 # waiting = List() # list of msg_ids ready to run, but haven't due to HWM
161 161 depending = Dict() # dict by msg_id of (msg_id, raw_msg, after, follow)
162 162 pending = Dict() # dict by engine_uuid of submitted tasks
163 163 completed = Dict() # dict by engine_uuid of completed tasks
164 164 failed = Dict() # dict by engine_uuid of failed tasks
165 165 destinations = Dict() # dict by msg_id of engine_uuids where jobs ran (reverse of completed+failed)
166 166 clients = Dict() # dict by msg_id for who submitted the task
167 167 targets = List() # list of target IDENTs
168 168 loads = List() # list of engine loads
169 169 # full = Set() # set of IDENTs that have HWM outstanding tasks
170 170 all_completed = Set() # set of all completed tasks
171 171 all_failed = Set() # set of all failed tasks
172 172 all_done = Set() # set of all finished tasks=union(completed,failed)
173 173 all_ids = Set() # set of all submitted task IDs
174 174 blacklist = Dict() # dict by msg_id of locations where a job has encountered UnmetDependency
175 175 auditor = Instance('zmq.eventloop.ioloop.PeriodicCallback')
176 176
177 177 ident = CBytes() # ZMQ identity. This should just be self.session.session
178 178 # but ensure Bytes
179 179 def _ident_default(self):
180 180 return asbytes(self.session.session)
181 181
182 182 def start(self):
183 183 self.engine_stream.on_recv(self.dispatch_result, copy=False)
184 184 self._notification_handlers = dict(
185 185 registration_notification = self._register_engine,
186 186 unregistration_notification = self._unregister_engine
187 187 )
188 188 self.notifier_stream.on_recv(self.dispatch_notification)
189 189 self.auditor = ioloop.PeriodicCallback(self.audit_timeouts, 2e3, self.loop) # 1 Hz
190 190 self.auditor.start()
191 191 self.log.info("Scheduler started [%s]"%self.scheme_name)
192 192
193 193 def resume_receiving(self):
194 194 """Resume accepting jobs."""
195 195 self.client_stream.on_recv(self.dispatch_submission, copy=False)
196 196
197 197 def stop_receiving(self):
198 198 """Stop accepting jobs while there are no engines.
199 199 Leave them in the ZMQ queue."""
200 200 self.client_stream.on_recv(None)
201 201
202 202 #-----------------------------------------------------------------------
203 203 # [Un]Registration Handling
204 204 #-----------------------------------------------------------------------
205 205
206 206 def dispatch_notification(self, msg):
207 207 """dispatch register/unregister events."""
208 208 try:
209 209 idents,msg = self.session.feed_identities(msg)
210 210 except ValueError:
211 211 self.log.warn("task::Invalid Message: %r",msg)
212 212 return
213 213 try:
214 214 msg = self.session.unserialize(msg)
215 215 except ValueError:
216 216 self.log.warn("task::Unauthorized message from: %r"%idents)
217 217 return
218 218
219 219 msg_type = msg['header']['msg_type']
220 220
221 221 handler = self._notification_handlers.get(msg_type, None)
222 222 if handler is None:
223 223 self.log.error("Unhandled message type: %r"%msg_type)
224 224 else:
225 225 try:
226 226 handler(asbytes(msg['content']['queue']))
227 227 except Exception:
228 228 self.log.error("task::Invalid notification msg: %r",msg)
229 229
230 230 def _register_engine(self, uid):
231 231 """New engine with ident `uid` became available."""
232 232 # head of the line:
233 233 self.targets.insert(0,uid)
234 234 self.loads.insert(0,0)
235 235
236 236 # initialize sets
237 237 self.completed[uid] = set()
238 238 self.failed[uid] = set()
239 239 self.pending[uid] = {}
240 240 if len(self.targets) == 1:
241 241 self.resume_receiving()
242 242 # rescan the graph:
243 243 self.update_graph(None)
244 244
245 245 def _unregister_engine(self, uid):
246 246 """Existing engine with ident `uid` became unavailable."""
247 247 if len(self.targets) == 1:
248 248 # this was our only engine
249 249 self.stop_receiving()
250 250
251 251 # handle any potentially finished tasks:
252 252 self.engine_stream.flush()
253 253
254 254 # don't pop destinations, because they might be used later
255 255 # map(self.destinations.pop, self.completed.pop(uid))
256 256 # map(self.destinations.pop, self.failed.pop(uid))
257 257
258 258 # prevent this engine from receiving work
259 259 idx = self.targets.index(uid)
260 260 self.targets.pop(idx)
261 261 self.loads.pop(idx)
262 262
263 263 # wait 5 seconds before cleaning up pending jobs, since the results might
264 264 # still be incoming
265 265 if self.pending[uid]:
266 266 dc = ioloop.DelayedCallback(lambda : self.handle_stranded_tasks(uid), 5000, self.loop)
267 267 dc.start()
268 268 else:
269 269 self.completed.pop(uid)
270 270 self.failed.pop(uid)
271 271
272 272
273 273 def handle_stranded_tasks(self, engine):
274 274 """Deal with jobs resident in an engine that died."""
275 275 lost = self.pending[engine]
276 276 for msg_id in lost.keys():
277 277 if msg_id not in self.pending[engine]:
278 278 # prevent double-handling of messages
279 279 continue
280 280
281 281 raw_msg = lost[msg_id][0]
282 282 idents,msg = self.session.feed_identities(raw_msg, copy=False)
283 283 parent = self.session.unpack(msg[1].bytes)
284 284 idents = [engine, idents[0]]
285 285
286 286 # build fake error reply
287 287 try:
288 288 raise error.EngineError("Engine %r died while running task %r"%(engine, msg_id))
289 289 except:
290 290 content = error.wrap_exception()
291 291 msg = self.session.msg('apply_reply', content, parent=parent, subheader={'status':'error'})
292 292 raw_reply = map(zmq.Message, self.session.serialize(msg, ident=idents))
293 293 # and dispatch it
294 294 self.dispatch_result(raw_reply)
295 295
296 296 # finally scrub completed/failed lists
297 297 self.completed.pop(engine)
298 298 self.failed.pop(engine)
299 299
300 300
301 301 #-----------------------------------------------------------------------
302 302 # Job Submission
303 303 #-----------------------------------------------------------------------
304 304 def dispatch_submission(self, raw_msg):
305 305 """Dispatch job submission to appropriate handlers."""
306 306 # ensure targets up to date:
307 307 self.notifier_stream.flush()
308 308 try:
309 309 idents, msg = self.session.feed_identities(raw_msg, copy=False)
310 310 msg = self.session.unserialize(msg, content=False, copy=False)
311 311 except Exception:
312 312 self.log.error("task::Invaid task msg: %r"%raw_msg, exc_info=True)
313 313 return
314 314
315 315
316 316 # send to monitor
317 317 self.mon_stream.send_multipart([b'intask']+raw_msg, copy=False)
318 318
319 319 header = msg['header']
320 320 msg_id = header['msg_id']
321 321 self.all_ids.add(msg_id)
322 322
323 323 # get targets as a set of bytes objects
324 324 # from a list of unicode objects
325 325 targets = header.get('targets', [])
326 326 targets = map(asbytes, targets)
327 327 targets = set(targets)
328 328
329 329 retries = header.get('retries', 0)
330 330 self.retries[msg_id] = retries
331 331
332 332 # time dependencies
333 333 after = header.get('after', None)
334 334 if after:
335 335 after = Dependency(after)
336 336 if after.all:
337 337 if after.success:
338 338 after = Dependency(after.difference(self.all_completed),
339 339 success=after.success,
340 340 failure=after.failure,
341 341 all=after.all,
342 342 )
343 343 if after.failure:
344 344 after = Dependency(after.difference(self.all_failed),
345 345 success=after.success,
346 346 failure=after.failure,
347 347 all=after.all,
348 348 )
349 349 if after.check(self.all_completed, self.all_failed):
350 350 # recast as empty set, if `after` already met,
351 351 # to prevent unnecessary set comparisons
352 352 after = MET
353 353 else:
354 354 after = MET
355 355
356 356 # location dependencies
357 357 follow = Dependency(header.get('follow', []))
358 358
359 359 # turn timeouts into datetime objects:
360 360 timeout = header.get('timeout', None)
361 361 if timeout:
362 362 timeout = datetime.now() + timedelta(0,timeout,0)
363 363
364 364 args = [raw_msg, targets, after, follow, timeout]
365 365
366 366 # validate and reduce dependencies:
367 367 for dep in after,follow:
368 368 if not dep: # empty dependency
369 369 continue
370 370 # check valid:
371 371 if msg_id in dep or dep.difference(self.all_ids):
372 372 self.depending[msg_id] = args
373 373 return self.fail_unreachable(msg_id, error.InvalidDependency)
374 374 # check if unreachable:
375 375 if dep.unreachable(self.all_completed, self.all_failed):
376 376 self.depending[msg_id] = args
377 377 return self.fail_unreachable(msg_id)
378 378
379 379 if after.check(self.all_completed, self.all_failed):
380 380 # time deps already met, try to run
381 381 if not self.maybe_run(msg_id, *args):
382 382 # can't run yet
383 383 if msg_id not in self.all_failed:
384 384 # could have failed as unreachable
385 385 self.save_unmet(msg_id, *args)
386 386 else:
387 387 self.save_unmet(msg_id, *args)
388 388
389 389 def audit_timeouts(self):
390 390 """Audit all waiting tasks for expired timeouts."""
391 391 now = datetime.now()
392 392 for msg_id in self.depending.keys():
393 393 # must recheck, in case one failure cascaded to another:
394 394 if msg_id in self.depending:
395 395 raw,after,targets,follow,timeout = self.depending[msg_id]
396 396 if timeout and timeout < now:
397 397 self.fail_unreachable(msg_id, error.TaskTimeout)
398 398
399 399 def fail_unreachable(self, msg_id, why=error.ImpossibleDependency):
400 400 """a task has become unreachable, send a reply with an ImpossibleDependency
401 401 error."""
402 402 if msg_id not in self.depending:
403 403 self.log.error("msg %r already failed!", msg_id)
404 404 return
405 405 raw_msg,targets,after,follow,timeout = self.depending.pop(msg_id)
406 406 for mid in follow.union(after):
407 407 if mid in self.graph:
408 408 self.graph[mid].remove(msg_id)
409 409
410 410 # FIXME: unpacking a message I've already unpacked, but didn't save:
411 411 idents,msg = self.session.feed_identities(raw_msg, copy=False)
412 412 header = self.session.unpack(msg[1].bytes)
413 413
414 414 try:
415 415 raise why()
416 416 except:
417 417 content = error.wrap_exception()
418 418
419 419 self.all_done.add(msg_id)
420 420 self.all_failed.add(msg_id)
421 421
422 422 msg = self.session.send(self.client_stream, 'apply_reply', content,
423 423 parent=header, ident=idents)
424 424 self.session.send(self.mon_stream, msg, ident=[b'outtask']+idents)
425 425
426 426 self.update_graph(msg_id, success=False)
427 427
428 428 def maybe_run(self, msg_id, raw_msg, targets, after, follow, timeout):
429 429 """check location dependencies, and run if they are met."""
430 430 blacklist = self.blacklist.setdefault(msg_id, set())
431 431 if follow or targets or blacklist or self.hwm:
432 432 # we need a can_run filter
433 433 def can_run(idx):
434 434 # check hwm
435 435 if self.hwm and self.loads[idx] == self.hwm:
436 436 return False
437 437 target = self.targets[idx]
438 438 # check blacklist
439 439 if target in blacklist:
440 440 return False
441 441 # check targets
442 442 if targets and target not in targets:
443 443 return False
444 444 # check follow
445 445 return follow.check(self.completed[target], self.failed[target])
446 446
447 447 indices = filter(can_run, range(len(self.targets)))
448 448
449 449 if not indices:
450 450 # couldn't run
451 451 if follow.all:
452 452 # check follow for impossibility
453 453 dests = set()
454 454 relevant = set()
455 455 if follow.success:
456 456 relevant = self.all_completed
457 457 if follow.failure:
458 458 relevant = relevant.union(self.all_failed)
459 459 for m in follow.intersection(relevant):
460 460 dests.add(self.destinations[m])
461 461 if len(dests) > 1:
462 462 self.depending[msg_id] = (raw_msg, targets, after, follow, timeout)
463 463 self.fail_unreachable(msg_id)
464 464 return False
465 465 if targets:
466 466 # check blacklist+targets for impossibility
467 467 targets.difference_update(blacklist)
468 468 if not targets or not targets.intersection(self.targets):
469 469 self.depending[msg_id] = (raw_msg, targets, after, follow, timeout)
470 470 self.fail_unreachable(msg_id)
471 471 return False
472 472 return False
473 473 else:
474 474 indices = None
475 475
476 476 self.submit_task(msg_id, raw_msg, targets, follow, timeout, indices)
477 477 return True
478 478
479 479 def save_unmet(self, msg_id, raw_msg, targets, after, follow, timeout):
480 480 """Save a message for later submission when its dependencies are met."""
481 481 self.depending[msg_id] = [raw_msg,targets,after,follow,timeout]
482 482 # track the ids in follow or after, but not those already finished
483 483 for dep_id in after.union(follow).difference(self.all_done):
484 484 if dep_id not in self.graph:
485 485 self.graph[dep_id] = set()
486 486 self.graph[dep_id].add(msg_id)
487 487
488 488 def submit_task(self, msg_id, raw_msg, targets, follow, timeout, indices=None):
489 489 """Submit a task to any of a subset of our targets."""
490 490 if indices:
491 491 loads = [self.loads[i] for i in indices]
492 492 else:
493 493 loads = self.loads
494 494 idx = self.scheme(loads)
495 495 if indices:
496 496 idx = indices[idx]
497 497 target = self.targets[idx]
498 498 # print (target, map(str, msg[:3]))
499 499 # send job to the engine
500 500 self.engine_stream.send(target, flags=zmq.SNDMORE, copy=False)
501 501 self.engine_stream.send_multipart(raw_msg, copy=False)
502 502 # update load
503 503 self.add_job(idx)
504 504 self.pending[target][msg_id] = (raw_msg, targets, MET, follow, timeout)
505 505 # notify Hub
506 506 content = dict(msg_id=msg_id, engine_id=target.decode('ascii'))
507 507 self.session.send(self.mon_stream, 'task_destination', content=content,
508 508 ident=[b'tracktask',self.ident])
509 509
510 510
511 511 #-----------------------------------------------------------------------
512 512 # Result Handling
513 513 #-----------------------------------------------------------------------
514 514 def dispatch_result(self, raw_msg):
515 515 """dispatch method for result replies"""
516 516 try:
517 517 idents,msg = self.session.feed_identities(raw_msg, copy=False)
518 518 msg = self.session.unserialize(msg, content=False, copy=False)
519 519 engine = idents[0]
520 520 try:
521 521 idx = self.targets.index(engine)
522 522 except ValueError:
523 523 pass # skip load-update for dead engines
524 524 else:
525 525 self.finish_job(idx)
526 526 except Exception:
527 527 self.log.error("task::Invaid result: %r", raw_msg, exc_info=True)
528 528 return
529 529
530 530 header = msg['header']
531 531 parent = msg['parent_header']
532 532 if header.get('dependencies_met', True):
533 533 success = (header['status'] == 'ok')
534 534 msg_id = parent['msg_id']
535 535 retries = self.retries[msg_id]
536 536 if not success and retries > 0:
537 537 # failed
538 538 self.retries[msg_id] = retries - 1
539 539 self.handle_unmet_dependency(idents, parent)
540 540 else:
541 541 del self.retries[msg_id]
542 542 # relay to client and update graph
543 543 self.handle_result(idents, parent, raw_msg, success)
544 544 # send to Hub monitor
545 545 self.mon_stream.send_multipart([b'outtask']+raw_msg, copy=False)
546 546 else:
547 547 self.handle_unmet_dependency(idents, parent)
548 548
549 549 def handle_result(self, idents, parent, raw_msg, success=True):
550 550 """handle a real task result, either success or failure"""
551 551 # first, relay result to client
552 552 engine = idents[0]
553 553 client = idents[1]
554 554 # swap_ids for XREP-XREP mirror
555 555 raw_msg[:2] = [client,engine]
556 556 # print (map(str, raw_msg[:4]))
557 557 self.client_stream.send_multipart(raw_msg, copy=False)
558 558 # now, update our data structures
559 559 msg_id = parent['msg_id']
560 560 self.blacklist.pop(msg_id, None)
561 561 self.pending[engine].pop(msg_id)
562 562 if success:
563 563 self.completed[engine].add(msg_id)
564 564 self.all_completed.add(msg_id)
565 565 else:
566 566 self.failed[engine].add(msg_id)
567 567 self.all_failed.add(msg_id)
568 568 self.all_done.add(msg_id)
569 569 self.destinations[msg_id] = engine
570 570
571 571 self.update_graph(msg_id, success)
572 572
573 573 def handle_unmet_dependency(self, idents, parent):
574 574 """handle an unmet dependency"""
575 575 engine = idents[0]
576 576 msg_id = parent['msg_id']
577 577
578 578 if msg_id not in self.blacklist:
579 579 self.blacklist[msg_id] = set()
580 580 self.blacklist[msg_id].add(engine)
581 581
582 582 args = self.pending[engine].pop(msg_id)
583 583 raw,targets,after,follow,timeout = args
584 584
585 585 if self.blacklist[msg_id] == targets:
586 586 self.depending[msg_id] = args
587 587 self.fail_unreachable(msg_id)
588 588 elif not self.maybe_run(msg_id, *args):
589 589 # resubmit failed
590 590 if msg_id not in self.all_failed:
591 591 # put it back in our dependency tree
592 592 self.save_unmet(msg_id, *args)
593 593
594 594 if self.hwm:
595 595 try:
596 596 idx = self.targets.index(engine)
597 597 except ValueError:
598 598 pass # skip load-update for dead engines
599 599 else:
600 600 if self.loads[idx] == self.hwm-1:
601 601 self.update_graph(None)
602 602
603 603
604 604
605 605 def update_graph(self, dep_id=None, success=True):
606 606 """dep_id just finished. Update our dependency
607 607 graph and submit any jobs that just became runable.
608 608
609 609 Called with dep_id=None to update entire graph for hwm, but without finishing
610 610 a task.
611 611 """
612 612 # print ("\n\n***********")
613 613 # pprint (dep_id)
614 614 # pprint (self.graph)
615 615 # pprint (self.depending)
616 616 # pprint (self.all_completed)
617 617 # pprint (self.all_failed)
618 618 # print ("\n\n***********\n\n")
619 619 # update any jobs that depended on the dependency
620 620 jobs = self.graph.pop(dep_id, [])
621 621
622 622 # recheck *all* jobs if
623 623 # a) we have HWM and an engine just become no longer full
624 624 # or b) dep_id was given as None
625 625 if dep_id is None or self.hwm and any( [ load==self.hwm-1 for load in self.loads ]):
626 626 jobs = self.depending.keys()
627 627
628 628 for msg_id in jobs:
629 629 raw_msg, targets, after, follow, timeout = self.depending[msg_id]
630 630
631 631 if after.unreachable(self.all_completed, self.all_failed)\
632 632 or follow.unreachable(self.all_completed, self.all_failed):
633 633 self.fail_unreachable(msg_id)
634 634
635 635 elif after.check(self.all_completed, self.all_failed): # time deps met, maybe run
636 636 if self.maybe_run(msg_id, raw_msg, targets, MET, follow, timeout):
637 637
638 638 self.depending.pop(msg_id)
639 639 for mid in follow.union(after):
640 640 if mid in self.graph:
641 641 self.graph[mid].remove(msg_id)
642 642
643 643 #----------------------------------------------------------------------
644 644 # methods to be overridden by subclasses
645 645 #----------------------------------------------------------------------
646 646
647 647 def add_job(self, idx):
648 648 """Called after self.targets[idx] just got the job with header.
649 649 Override with subclasses. The default ordering is simple LRU.
650 650 The default loads are the number of outstanding jobs."""
651 651 self.loads[idx] += 1
652 652 for lis in (self.targets, self.loads):
653 653 lis.append(lis.pop(idx))
654 654
655 655
656 656 def finish_job(self, idx):
657 657 """Called after self.targets[idx] just finished a job.
658 658 Override with subclasses."""
659 659 self.loads[idx] -= 1
660 660
661 661
662 662
663 663 def launch_scheduler(in_addr, out_addr, mon_addr, not_addr, config=None,
664 664 logname='root', log_url=None, loglevel=logging.DEBUG,
665 665 identity=b'task', in_thread=False):
666 666
667 667 ZMQStream = zmqstream.ZMQStream
668 668
669 669 if config:
670 670 # unwrap dict back into Config
671 671 config = Config(config)
672 672
673 673 if in_thread:
674 674 # use instance() to get the same Context/Loop as our parent
675 675 ctx = zmq.Context.instance()
676 676 loop = ioloop.IOLoop.instance()
677 677 else:
678 678 # in a process, don't use instance()
679 679 # for safety with multiprocessing
680 680 ctx = zmq.Context()
681 681 loop = ioloop.IOLoop()
682 ins = ZMQStream(ctx.socket(zmq.XREP),loop)
682 ins = ZMQStream(ctx.socket(zmq.ROUTER),loop)
683 683 ins.setsockopt(zmq.IDENTITY, identity)
684 684 ins.bind(in_addr)
685 685
686 outs = ZMQStream(ctx.socket(zmq.XREP),loop)
686 outs = ZMQStream(ctx.socket(zmq.ROUTER),loop)
687 687 outs.setsockopt(zmq.IDENTITY, identity)
688 688 outs.bind(out_addr)
689 689 mons = zmqstream.ZMQStream(ctx.socket(zmq.PUB),loop)
690 690 mons.connect(mon_addr)
691 691 nots = zmqstream.ZMQStream(ctx.socket(zmq.SUB),loop)
692 692 nots.setsockopt(zmq.SUBSCRIBE, b'')
693 693 nots.connect(not_addr)
694 694
695 695 # setup logging.
696 696 if in_thread:
697 697 log = Application.instance().log
698 698 else:
699 699 if log_url:
700 700 log = connect_logger(logname, ctx, log_url, root="scheduler", loglevel=loglevel)
701 701 else:
702 702 log = local_logger(logname, loglevel)
703 703
704 704 scheduler = TaskScheduler(client_stream=ins, engine_stream=outs,
705 705 mon_stream=mons, notifier_stream=nots,
706 706 loop=loop, log=log,
707 707 config=config)
708 708 scheduler.start()
709 709 if not in_thread:
710 710 try:
711 711 loop.start()
712 712 except KeyboardInterrupt:
713 713 print ("interrupted, exiting...", file=sys.__stderr__)
714 714
@@ -1,226 +1,226 b''
1 1 """A simple engine that talks to a controller over 0MQ.
2 2 it handles registration, etc. and launches a kernel
3 3 connected to the Controller's Schedulers.
4 4
5 5 Authors:
6 6
7 7 * Min RK
8 8 """
9 9 #-----------------------------------------------------------------------------
10 10 # Copyright (C) 2010-2011 The IPython Development Team
11 11 #
12 12 # Distributed under the terms of the BSD License. The full license is in
13 13 # the file COPYING, distributed as part of this software.
14 14 #-----------------------------------------------------------------------------
15 15
16 16 from __future__ import print_function
17 17
18 18 import sys
19 19 import time
20 20 from getpass import getpass
21 21
22 22 import zmq
23 23 from zmq.eventloop import ioloop, zmqstream
24 24
25 25 from IPython.external.ssh import tunnel
26 26 # internal
27 27 from IPython.utils.traitlets import (
28 28 Instance, Dict, Int, Type, CFloat, Unicode, CBytes, Bool
29 29 )
30 30 # from IPython.utils.localinterfaces import LOCALHOST
31 31
32 32 from IPython.parallel.controller.heartmonitor import Heart
33 33 from IPython.parallel.factory import RegistrationFactory
34 34 from IPython.parallel.util import disambiguate_url, asbytes
35 35
36 36 from IPython.zmq.session import Message
37 37
38 38 from .streamkernel import Kernel
39 39
40 40 class EngineFactory(RegistrationFactory):
41 41 """IPython engine"""
42 42
43 43 # configurables:
44 44 out_stream_factory=Type('IPython.zmq.iostream.OutStream', config=True,
45 45 help="""The OutStream for handling stdout/err.
46 46 Typically 'IPython.zmq.iostream.OutStream'""")
47 47 display_hook_factory=Type('IPython.zmq.displayhook.ZMQDisplayHook', config=True,
48 48 help="""The class for handling displayhook.
49 49 Typically 'IPython.zmq.displayhook.ZMQDisplayHook'""")
50 50 location=Unicode(config=True,
51 51 help="""The location (an IP address) of the controller. This is
52 52 used for disambiguating URLs, to determine whether
53 53 loopback should be used to connect or the public address.""")
54 54 timeout=CFloat(2,config=True,
55 55 help="""The time (in seconds) to wait for the Controller to respond
56 56 to registration requests before giving up.""")
57 57 sshserver=Unicode(config=True,
58 58 help="""The SSH server to use for tunneling connections to the Controller.""")
59 59 sshkey=Unicode(config=True,
60 60 help="""The SSH private key file to use when tunneling connections to the Controller.""")
61 61 paramiko=Bool(sys.platform == 'win32', config=True,
62 62 help="""Whether to use paramiko instead of openssh for tunnels.""")
63 63
64 64 # not configurable:
65 65 user_ns=Dict()
66 66 id=Int(allow_none=True)
67 67 registrar=Instance('zmq.eventloop.zmqstream.ZMQStream')
68 68 kernel=Instance(Kernel)
69 69
70 70 bident = CBytes()
71 71 ident = Unicode()
72 72 def _ident_changed(self, name, old, new):
73 73 self.bident = asbytes(new)
74 74 using_ssh=Bool(False)
75 75
76 76
77 77 def __init__(self, **kwargs):
78 78 super(EngineFactory, self).__init__(**kwargs)
79 79 self.ident = self.session.session
80 80
81 81 def init_connector(self):
82 82 """construct connection function, which handles tunnels."""
83 83 self.using_ssh = bool(self.sshkey or self.sshserver)
84 84
85 85 if self.sshkey and not self.sshserver:
86 86 # We are using ssh directly to the controller, tunneling localhost to localhost
87 87 self.sshserver = self.url.split('://')[1].split(':')[0]
88 88
89 89 if self.using_ssh:
90 90 if tunnel.try_passwordless_ssh(self.sshserver, self.sshkey, self.paramiko):
91 91 password=False
92 92 else:
93 93 password = getpass("SSH Password for %s: "%self.sshserver)
94 94 else:
95 95 password = False
96 96
97 97 def connect(s, url):
98 98 url = disambiguate_url(url, self.location)
99 99 if self.using_ssh:
100 100 self.log.debug("Tunneling connection to %s via %s"%(url, self.sshserver))
101 101 return tunnel.tunnel_connection(s, url, self.sshserver,
102 102 keyfile=self.sshkey, paramiko=self.paramiko,
103 103 password=password,
104 104 )
105 105 else:
106 106 return s.connect(url)
107 107
108 108 def maybe_tunnel(url):
109 109 """like connect, but don't complete the connection (for use by heartbeat)"""
110 110 url = disambiguate_url(url, self.location)
111 111 if self.using_ssh:
112 112 self.log.debug("Tunneling connection to %s via %s"%(url, self.sshserver))
113 113 url,tunnelobj = tunnel.open_tunnel(url, self.sshserver,
114 114 keyfile=self.sshkey, paramiko=self.paramiko,
115 115 password=password,
116 116 )
117 117 return url
118 118 return connect, maybe_tunnel
119 119
120 120 def register(self):
121 121 """send the registration_request"""
122 122
123 123 self.log.info("Registering with controller at %s"%self.url)
124 124 ctx = self.context
125 125 connect,maybe_tunnel = self.init_connector()
126 reg = ctx.socket(zmq.XREQ)
126 reg = ctx.socket(zmq.DEALER)
127 127 reg.setsockopt(zmq.IDENTITY, self.bident)
128 128 connect(reg, self.url)
129 129 self.registrar = zmqstream.ZMQStream(reg, self.loop)
130 130
131 131
132 132 content = dict(queue=self.ident, heartbeat=self.ident, control=self.ident)
133 133 self.registrar.on_recv(lambda msg: self.complete_registration(msg, connect, maybe_tunnel))
134 134 # print (self.session.key)
135 135 self.session.send(self.registrar, "registration_request",content=content)
136 136
137 137 def complete_registration(self, msg, connect, maybe_tunnel):
138 138 # print msg
139 139 self._abort_dc.stop()
140 140 ctx = self.context
141 141 loop = self.loop
142 142 identity = self.bident
143 143 idents,msg = self.session.feed_identities(msg)
144 144 msg = Message(self.session.unserialize(msg))
145 145
146 146 if msg.content.status == 'ok':
147 147 self.id = int(msg.content.id)
148 148
149 149 # launch heartbeat
150 150 hb_addrs = msg.content.heartbeat
151 151
152 152 # possibly forward hb ports with tunnels
153 153 hb_addrs = [ maybe_tunnel(addr) for addr in hb_addrs ]
154 154 heart = Heart(*map(str, hb_addrs), heart_id=identity)
155 155 heart.start()
156 156
157 157 # create Shell Streams (MUX, Task, etc.):
158 158 queue_addr = msg.content.mux
159 159 shell_addrs = [ str(queue_addr) ]
160 160 task_addr = msg.content.task
161 161 if task_addr:
162 162 shell_addrs.append(str(task_addr))
163 163
164 164 # Uncomment this to go back to two-socket model
165 165 # shell_streams = []
166 166 # for addr in shell_addrs:
167 # stream = zmqstream.ZMQStream(ctx.socket(zmq.XREP), loop)
167 # stream = zmqstream.ZMQStream(ctx.socket(zmq.ROUTER), loop)
168 168 # stream.setsockopt(zmq.IDENTITY, identity)
169 169 # stream.connect(disambiguate_url(addr, self.location))
170 170 # shell_streams.append(stream)
171 171
172 172 # Now use only one shell stream for mux and tasks
173 stream = zmqstream.ZMQStream(ctx.socket(zmq.XREP), loop)
173 stream = zmqstream.ZMQStream(ctx.socket(zmq.ROUTER), loop)
174 174 stream.setsockopt(zmq.IDENTITY, identity)
175 175 shell_streams = [stream]
176 176 for addr in shell_addrs:
177 177 connect(stream, addr)
178 178 # end single stream-socket
179 179
180 180 # control stream:
181 181 control_addr = str(msg.content.control)
182 control_stream = zmqstream.ZMQStream(ctx.socket(zmq.XREP), loop)
182 control_stream = zmqstream.ZMQStream(ctx.socket(zmq.ROUTER), loop)
183 183 control_stream.setsockopt(zmq.IDENTITY, identity)
184 184 connect(control_stream, control_addr)
185 185
186 186 # create iopub stream:
187 187 iopub_addr = msg.content.iopub
188 188 iopub_stream = zmqstream.ZMQStream(ctx.socket(zmq.PUB), loop)
189 189 iopub_stream.setsockopt(zmq.IDENTITY, identity)
190 190 connect(iopub_stream, iopub_addr)
191 191
192 192 # # Redirect input streams and set a display hook.
193 193 if self.out_stream_factory:
194 194 sys.stdout = self.out_stream_factory(self.session, iopub_stream, u'stdout')
195 195 sys.stdout.topic = 'engine.%i.stdout'%self.id
196 196 sys.stderr = self.out_stream_factory(self.session, iopub_stream, u'stderr')
197 197 sys.stderr.topic = 'engine.%i.stderr'%self.id
198 198 if self.display_hook_factory:
199 199 sys.displayhook = self.display_hook_factory(self.session, iopub_stream)
200 200 sys.displayhook.topic = 'engine.%i.pyout'%self.id
201 201
202 202 self.kernel = Kernel(config=self.config, int_id=self.id, ident=self.ident, session=self.session,
203 203 control_stream=control_stream, shell_streams=shell_streams, iopub_stream=iopub_stream,
204 204 loop=loop, user_ns = self.user_ns, log=self.log)
205 205 self.kernel.start()
206 206
207 207
208 208 else:
209 209 self.log.fatal("Registration Failed: %s"%msg)
210 210 raise Exception("Registration Failed: %s"%msg)
211 211
212 212 self.log.info("Completed registration with id %i"%self.id)
213 213
214 214
215 215 def abort(self):
216 216 self.log.fatal("Registration timed out after %.1f seconds"%self.timeout)
217 217 self.session.send(self.registrar, "unregistration_request", content=dict(id=self.id))
218 218 time.sleep(1)
219 219 sys.exit(255)
220 220
221 221 def start(self):
222 222 dc = ioloop.DelayedCallback(self.register, 0, self.loop)
223 223 dc.start()
224 224 self._abort_dc = ioloop.DelayedCallback(self.abort, self.timeout*1000, self.loop)
225 225 self._abort_dc.start()
226 226
@@ -1,230 +1,230 b''
1 1 """KernelStarter class that intercepts Control Queue messages, and handles process management.
2 2
3 3 Authors:
4 4
5 5 * Min RK
6 6 """
7 7 #-----------------------------------------------------------------------------
8 8 # Copyright (C) 2010-2011 The IPython Development Team
9 9 #
10 10 # Distributed under the terms of the BSD License. The full license is in
11 11 # the file COPYING, distributed as part of this software.
12 12 #-----------------------------------------------------------------------------
13 13
14 14 from zmq.eventloop import ioloop
15 15
16 16 from IPython.zmq.session import Session
17 17
18 18 class KernelStarter(object):
19 19 """Object for resetting/killing the Kernel."""
20 20
21 21
22 22 def __init__(self, session, upstream, downstream, *kernel_args, **kernel_kwargs):
23 23 self.session = session
24 24 self.upstream = upstream
25 25 self.downstream = downstream
26 26 self.kernel_args = kernel_args
27 27 self.kernel_kwargs = kernel_kwargs
28 28 self.handlers = {}
29 29 for method in 'shutdown_request shutdown_reply'.split():
30 30 self.handlers[method] = getattr(self, method)
31 31
32 32 def start(self):
33 33 self.upstream.on_recv(self.dispatch_request)
34 34 self.downstream.on_recv(self.dispatch_reply)
35 35
36 36 #--------------------------------------------------------------------------
37 37 # Dispatch methods
38 38 #--------------------------------------------------------------------------
39 39
40 40 def dispatch_request(self, raw_msg):
41 41 idents, msg = self.session.feed_identities()
42 42 try:
43 43 msg = self.session.unserialize(msg, content=False)
44 44 except:
45 45 print ("bad msg: %s"%msg)
46 46
47 47 msgtype = msg['header']['msg_type']
48 48 handler = self.handlers.get(msgtype, None)
49 49 if handler is None:
50 50 self.downstream.send_multipart(raw_msg, copy=False)
51 51 else:
52 52 handler(msg)
53 53
54 54 def dispatch_reply(self, raw_msg):
55 55 idents, msg = self.session.feed_identities()
56 56 try:
57 57 msg = self.session.unserialize(msg, content=False)
58 58 except:
59 59 print ("bad msg: %s"%msg)
60 60
61 61 msgtype = msg['header']['msg_type']
62 62 handler = self.handlers.get(msgtype, None)
63 63 if handler is None:
64 64 self.upstream.send_multipart(raw_msg, copy=False)
65 65 else:
66 66 handler(msg)
67 67
68 68 #--------------------------------------------------------------------------
69 69 # Handlers
70 70 #--------------------------------------------------------------------------
71 71
72 72 def shutdown_request(self, msg):
73 73 """"""
74 74 self.downstream.send_multipart(msg)
75 75
76 76 #--------------------------------------------------------------------------
77 77 # Kernel process management methods, from KernelManager:
78 78 #--------------------------------------------------------------------------
79 79
80 80 def _check_local(addr):
81 81 if isinstance(addr, tuple):
82 82 addr = addr[0]
83 83 return addr in LOCAL_IPS
84 84
85 85 def start_kernel(self, **kw):
86 86 """Starts a kernel process and configures the manager to use it.
87 87
88 88 If random ports (port=0) are being used, this method must be called
89 89 before the channels are created.
90 90
91 91 Parameters:
92 92 -----------
93 93 ipython : bool, optional (default True)
94 94 Whether to use an IPython kernel instead of a plain Python kernel.
95 95 """
96 96 self.kernel = Process(target=make_kernel, args=self.kernel_args,
97 97 kwargs=self.kernel_kwargs)
98 98
99 99 def shutdown_kernel(self, restart=False):
100 100 """ Attempts to the stop the kernel process cleanly. If the kernel
101 101 cannot be stopped, it is killed, if possible.
102 102 """
103 103 # FIXME: Shutdown does not work on Windows due to ZMQ errors!
104 104 if sys.platform == 'win32':
105 105 self.kill_kernel()
106 106 return
107 107
108 108 # Don't send any additional kernel kill messages immediately, to give
109 109 # the kernel a chance to properly execute shutdown actions. Wait for at
110 110 # most 1s, checking every 0.1s.
111 111 self.xreq_channel.shutdown(restart=restart)
112 112 for i in range(10):
113 113 if self.is_alive:
114 114 time.sleep(0.1)
115 115 else:
116 116 break
117 117 else:
118 118 # OK, we've waited long enough.
119 119 if self.has_kernel:
120 120 self.kill_kernel()
121 121
122 122 def restart_kernel(self, now=False):
123 123 """Restarts a kernel with the same arguments that were used to launch
124 124 it. If the old kernel was launched with random ports, the same ports
125 125 will be used for the new kernel.
126 126
127 127 Parameters
128 128 ----------
129 129 now : bool, optional
130 130 If True, the kernel is forcefully restarted *immediately*, without
131 131 having a chance to do any cleanup action. Otherwise the kernel is
132 132 given 1s to clean up before a forceful restart is issued.
133 133
134 134 In all cases the kernel is restarted, the only difference is whether
135 135 it is given a chance to perform a clean shutdown or not.
136 136 """
137 137 if self._launch_args is None:
138 138 raise RuntimeError("Cannot restart the kernel. "
139 139 "No previous call to 'start_kernel'.")
140 140 else:
141 141 if self.has_kernel:
142 142 if now:
143 143 self.kill_kernel()
144 144 else:
145 145 self.shutdown_kernel(restart=True)
146 146 self.start_kernel(**self._launch_args)
147 147
148 148 # FIXME: Messages get dropped in Windows due to probable ZMQ bug
149 149 # unless there is some delay here.
150 150 if sys.platform == 'win32':
151 151 time.sleep(0.2)
152 152
153 153 @property
154 154 def has_kernel(self):
155 155 """Returns whether a kernel process has been specified for the kernel
156 156 manager.
157 157 """
158 158 return self.kernel is not None
159 159
160 160 def kill_kernel(self):
161 161 """ Kill the running kernel. """
162 162 if self.has_kernel:
163 163 # Pause the heart beat channel if it exists.
164 164 if self._hb_channel is not None:
165 165 self._hb_channel.pause()
166 166
167 167 # Attempt to kill the kernel.
168 168 try:
169 169 self.kernel.kill()
170 170 except OSError, e:
171 171 # In Windows, we will get an Access Denied error if the process
172 172 # has already terminated. Ignore it.
173 173 if not (sys.platform == 'win32' and e.winerror == 5):
174 174 raise
175 175 self.kernel = None
176 176 else:
177 177 raise RuntimeError("Cannot kill kernel. No kernel is running!")
178 178
179 179 def interrupt_kernel(self):
180 180 """ Interrupts the kernel. Unlike ``signal_kernel``, this operation is
181 181 well supported on all platforms.
182 182 """
183 183 if self.has_kernel:
184 184 if sys.platform == 'win32':
185 185 from parentpoller import ParentPollerWindows as Poller
186 186 Poller.send_interrupt(self.kernel.win32_interrupt_event)
187 187 else:
188 188 self.kernel.send_signal(signal.SIGINT)
189 189 else:
190 190 raise RuntimeError("Cannot interrupt kernel. No kernel is running!")
191 191
192 192 def signal_kernel(self, signum):
193 193 """ Sends a signal to the kernel. Note that since only SIGTERM is
194 194 supported on Windows, this function is only useful on Unix systems.
195 195 """
196 196 if self.has_kernel:
197 197 self.kernel.send_signal(signum)
198 198 else:
199 199 raise RuntimeError("Cannot signal kernel. No kernel is running!")
200 200
201 201 @property
202 202 def is_alive(self):
203 203 """Is the kernel process still running?"""
204 204 # FIXME: not using a heartbeat means this method is broken for any
205 205 # remote kernel, it's only capable of handling local kernels.
206 206 if self.has_kernel:
207 207 if self.kernel.poll() is None:
208 208 return True
209 209 else:
210 210 return False
211 211 else:
212 212 # We didn't start the kernel with this KernelManager so we don't
213 213 # know if it is running. We should use a heartbeat for this case.
214 214 return True
215 215
216 216
217 217 def make_starter(up_addr, down_addr, *args, **kwargs):
218 218 """entry point function for launching a kernelstarter in a subprocess"""
219 219 loop = ioloop.IOLoop.instance()
220 220 ctx = zmq.Context()
221 221 session = Session()
222 upstream = zmqstream.ZMQStream(ctx.socket(zmq.XREQ),loop)
222 upstream = zmqstream.ZMQStream(ctx.socket(zmq.DEALER),loop)
223 223 upstream.connect(up_addr)
224 downstream = zmqstream.ZMQStream(ctx.socket(zmq.XREQ),loop)
224 downstream = zmqstream.ZMQStream(ctx.socket(zmq.DEALER),loop)
225 225 downstream.connect(down_addr)
226 226
227 227 starter = KernelStarter(session, upstream, downstream, *args, **kwargs)
228 228 starter.start()
229 229 loop.start()
230 230
@@ -1,34 +1,42 b''
1 1 #-----------------------------------------------------------------------------
2 2 # Copyright (C) 2010 The IPython Development Team
3 3 #
4 4 # Distributed under the terms of the BSD License. The full license is in
5 5 # the file COPYING.txt, distributed as part of this software.
6 6 #-----------------------------------------------------------------------------
7 7
8 8 #-----------------------------------------------------------------------------
9 9 # Verify zmq version dependency >= 2.1.4
10 10 #-----------------------------------------------------------------------------
11 11
12 import re
12 13 import warnings
13 14
14 minimum_pyzmq_version = "2.1.4"
15 def check_for_zmq(minimum_version, module='IPython.zmq'):
16 min_vlist = [int(n) for n in minimum_version.split('.')]
15 17
16 18 try:
17 19 import zmq
18 20 except ImportError:
19 raise ImportError("IPython.zmq requires pyzmq >= %s"%minimum_pyzmq_version)
21 raise ImportError("%s requires pyzmq >= %s"%(module, minimum_version))
20 22
21 23 pyzmq_version = zmq.__version__
24 vlist = [int(n) for n in re.findall(r'\d+', pyzmq_version)]
22 25
23 if pyzmq_version < minimum_pyzmq_version:
24 raise ImportError("IPython.zmq requires pyzmq >= %s, but you have %s"%(
25 minimum_pyzmq_version, pyzmq_version))
26 if 'dev' not in pyzmq_version and vlist < min_vlist:
27 raise ImportError("%s requires pyzmq >= %s, but you have %s"%(
28 module, minimum_version, pyzmq_version))
26 29
27 del pyzmq_version
30 # fix missing DEALER/ROUTER aliases in pyzmq < 2.1.9
31 if not hasattr(zmq, 'DEALER'):
32 zmq.DEALER = zmq.XREQ
33 if not hasattr(zmq, 'ROUTER'):
34 zmq.ROUTER = zmq.XREP
28 35
29 if zmq.zmq_version() >= '3.0.0':
30 warnings.warn("""libzmq 3 detected.
36 if zmq.zmq_version() >= '4.0.0':
37 warnings.warn("""libzmq 4 detected.
31 38 It is unlikely that IPython's zmq code will work properly.
32 39 Please install libzmq stable, which is 2.1.x or 2.2.x""",
33 40 RuntimeWarning)
34 41
42 check_for_zmq('2.1.4')
@@ -1,196 +1,196 b''
1 1 #!/usr/bin/env python
2 2 """A simple interactive frontend that talks to a kernel over 0MQ.
3 3 """
4 4
5 5 #-----------------------------------------------------------------------------
6 6 # Imports
7 7 #-----------------------------------------------------------------------------
8 8 # stdlib
9 9 import cPickle as pickle
10 10 import code
11 11 import readline
12 12 import sys
13 13 import time
14 14 import uuid
15 15
16 16 # our own
17 17 import zmq
18 18 import session
19 19 import completer
20 20 from IPython.utils.localinterfaces import LOCALHOST
21 21
22 22 #-----------------------------------------------------------------------------
23 23 # Classes and functions
24 24 #-----------------------------------------------------------------------------
25 25
26 26 class Console(code.InteractiveConsole):
27 27
28 28 def __init__(self, locals=None, filename="<console>",
29 29 session = session,
30 30 request_socket=None,
31 31 sub_socket=None):
32 32 code.InteractiveConsole.__init__(self, locals, filename)
33 33 self.session = session
34 34 self.request_socket = request_socket
35 35 self.sub_socket = sub_socket
36 36 self.backgrounded = 0
37 37 self.messages = {}
38 38
39 39 # Set tab completion
40 40 self.completer = completer.ClientCompleter(self, session, request_socket)
41 41 readline.parse_and_bind('tab: complete')
42 42 readline.parse_and_bind('set show-all-if-ambiguous on')
43 43 readline.set_completer(self.completer.complete)
44 44
45 45 # Set system prompts
46 46 sys.ps1 = 'Py>>> '
47 47 sys.ps2 = ' ... '
48 48 sys.ps3 = 'Out : '
49 49 # Build dict of handlers for message types
50 50 self.handlers = {}
51 51 for msg_type in ['pyin', 'pyout', 'pyerr', 'stream']:
52 52 self.handlers[msg_type] = getattr(self, 'handle_%s' % msg_type)
53 53
54 54 def handle_pyin(self, omsg):
55 55 if omsg.parent_header.session == self.session.session:
56 56 return
57 57 c = omsg.content.code.rstrip()
58 58 if c:
59 59 print '[IN from %s]' % omsg.parent_header.username
60 60 print c
61 61
62 62 def handle_pyout(self, omsg):
63 63 #print omsg # dbg
64 64 if omsg.parent_header.session == self.session.session:
65 65 print "%s%s" % (sys.ps3, omsg.content.data)
66 66 else:
67 67 print '[Out from %s]' % omsg.parent_header.username
68 68 print omsg.content.data
69 69
70 70 def print_pyerr(self, err):
71 71 print >> sys.stderr, err.etype,':', err.evalue
72 72 print >> sys.stderr, ''.join(err.traceback)
73 73
74 74 def handle_pyerr(self, omsg):
75 75 if omsg.parent_header.session == self.session.session:
76 76 return
77 77 print >> sys.stderr, '[ERR from %s]' % omsg.parent_header.username
78 78 self.print_pyerr(omsg.content)
79 79
80 80 def handle_stream(self, omsg):
81 81 if omsg.content.name == 'stdout':
82 82 outstream = sys.stdout
83 83 else:
84 84 outstream = sys.stderr
85 85 print >> outstream, '*ERR*',
86 86 print >> outstream, omsg.content.data,
87 87
88 88 def handle_output(self, omsg):
89 89 handler = self.handlers.get(omsg.msg_type, None)
90 90 if handler is not None:
91 91 handler(omsg)
92 92
93 93 def recv_output(self):
94 94 while True:
95 95 ident,msg = self.session.recv(self.sub_socket)
96 96 if msg is None:
97 97 break
98 98 self.handle_output(Message(msg))
99 99
100 100 def handle_reply(self, rep):
101 101 # Handle any side effects on output channels
102 102 self.recv_output()
103 103 # Now, dispatch on the possible reply types we must handle
104 104 if rep is None:
105 105 return
106 106 if rep.content.status == 'error':
107 107 self.print_pyerr(rep.content)
108 108 elif rep.content.status == 'aborted':
109 109 print >> sys.stderr, "ERROR: ABORTED"
110 110 ab = self.messages[rep.parent_header.msg_id].content
111 111 if 'code' in ab:
112 112 print >> sys.stderr, ab.code
113 113 else:
114 114 print >> sys.stderr, ab
115 115
116 116 def recv_reply(self):
117 117 ident,rep = self.session.recv(self.request_socket)
118 118 mrep = Message(rep)
119 119 self.handle_reply(mrep)
120 120 return mrep
121 121
122 122 def runcode(self, code):
123 123 # We can't pickle code objects, so fetch the actual source
124 124 src = '\n'.join(self.buffer)
125 125
126 126 # for non-background inputs, if we do have previoiusly backgrounded
127 127 # jobs, check to see if they've produced results
128 128 if not src.endswith(';'):
129 129 while self.backgrounded > 0:
130 130 #print 'checking background'
131 131 rep = self.recv_reply()
132 132 if rep:
133 133 self.backgrounded -= 1
134 134 time.sleep(0.05)
135 135
136 136 # Send code execution message to kernel
137 137 omsg = self.session.send(self.request_socket,
138 138 'execute_request', dict(code=src))
139 139 self.messages[omsg.header.msg_id] = omsg
140 140
141 141 # Fake asynchronicity by letting the user put ';' at the end of the line
142 142 if src.endswith(';'):
143 143 self.backgrounded += 1
144 144 return
145 145
146 146 # For foreground jobs, wait for reply
147 147 while True:
148 148 rep = self.recv_reply()
149 149 if rep is not None:
150 150 break
151 151 self.recv_output()
152 152 time.sleep(0.05)
153 153 else:
154 154 # We exited without hearing back from the kernel!
155 155 print >> sys.stderr, 'ERROR!!! kernel never got back to us!!!'
156 156
157 157
158 158 class InteractiveClient(object):
159 159 def __init__(self, session, request_socket, sub_socket):
160 160 self.session = session
161 161 self.request_socket = request_socket
162 162 self.sub_socket = sub_socket
163 163 self.console = Console(None, '<zmq-console>',
164 164 session, request_socket, sub_socket)
165 165
166 166 def interact(self):
167 167 self.console.interact()
168 168
169 169
170 170 def main():
171 171 # Defaults
172 172 #ip = '192.168.2.109'
173 173 ip = LOCALHOST
174 174 #ip = '99.146.222.252'
175 175 port_base = 5575
176 176 connection = ('tcp://%s' % ip) + ':%i'
177 177 req_conn = connection % port_base
178 178 sub_conn = connection % (port_base+1)
179 179
180 180 # Create initial sockets
181 181 c = zmq.Context()
182 request_socket = c.socket(zmq.XREQ)
182 request_socket = c.socket(zmq.DEALER)
183 183 request_socket.connect(req_conn)
184 184
185 185 sub_socket = c.socket(zmq.SUB)
186 186 sub_socket.connect(sub_conn)
187 187 sub_socket.setsockopt(zmq.SUBSCRIBE, '')
188 188
189 189 # Make session and user-facing client
190 190 sess = session.Session()
191 191 client = InteractiveClient(sess, request_socket, sub_socket)
192 192 client.interact()
193 193
194 194
195 195 if __name__ == '__main__':
196 196 main()
@@ -1,226 +1,226 b''
1 1 """An Application for launching a kernel
2 2
3 3 Authors
4 4 -------
5 5 * MinRK
6 6 """
7 7 #-----------------------------------------------------------------------------
8 8 # Copyright (C) 2011 The IPython Development Team
9 9 #
10 10 # Distributed under the terms of the BSD License. The full license is in
11 11 # the file COPYING.txt, distributed as part of this software.
12 12 #-----------------------------------------------------------------------------
13 13
14 14 #-----------------------------------------------------------------------------
15 15 # Imports
16 16 #-----------------------------------------------------------------------------
17 17
18 18 # Standard library imports.
19 19 import os
20 20 import sys
21 21
22 22 # System library imports.
23 23 import zmq
24 24
25 25 # IPython imports.
26 26 from IPython.core.ultratb import FormattedTB
27 27 from IPython.core.application import (
28 28 BaseIPythonApplication, base_flags, base_aliases
29 29 )
30 30 from IPython.utils import io
31 31 from IPython.utils.localinterfaces import LOCALHOST
32 32 from IPython.utils.traitlets import (Any, Instance, Dict, Unicode, Int, Bool,
33 33 DottedObjectName)
34 34 from IPython.utils.importstring import import_item
35 35 # local imports
36 36 from IPython.zmq.heartbeat import Heartbeat
37 37 from IPython.zmq.parentpoller import ParentPollerUnix, ParentPollerWindows
38 38 from IPython.zmq.session import Session
39 39
40 40
41 41 #-----------------------------------------------------------------------------
42 42 # Flags and Aliases
43 43 #-----------------------------------------------------------------------------
44 44
45 45 kernel_aliases = dict(base_aliases)
46 46 kernel_aliases.update({
47 47 'ip' : 'KernelApp.ip',
48 48 'hb' : 'KernelApp.hb_port',
49 49 'shell' : 'KernelApp.shell_port',
50 50 'iopub' : 'KernelApp.iopub_port',
51 51 'stdin' : 'KernelApp.stdin_port',
52 52 'parent': 'KernelApp.parent',
53 53 })
54 54 if sys.platform.startswith('win'):
55 55 kernel_aliases['interrupt'] = 'KernelApp.interrupt'
56 56
57 57 kernel_flags = dict(base_flags)
58 58 kernel_flags.update({
59 59 'no-stdout' : (
60 60 {'KernelApp' : {'no_stdout' : True}},
61 61 "redirect stdout to the null device"),
62 62 'no-stderr' : (
63 63 {'KernelApp' : {'no_stderr' : True}},
64 64 "redirect stderr to the null device"),
65 65 })
66 66
67 67
68 68 #-----------------------------------------------------------------------------
69 69 # Application class for starting a Kernel
70 70 #-----------------------------------------------------------------------------
71 71
72 72 class KernelApp(BaseIPythonApplication):
73 73 name='pykernel'
74 74 aliases = Dict(kernel_aliases)
75 75 flags = Dict(kernel_flags)
76 76 classes = [Session]
77 77 # the kernel class, as an importstring
78 78 kernel_class = DottedObjectName('IPython.zmq.pykernel.Kernel')
79 79 kernel = Any()
80 80 poller = Any() # don't restrict this even though current pollers are all Threads
81 81 heartbeat = Instance(Heartbeat)
82 82 session = Instance('IPython.zmq.session.Session')
83 83 ports = Dict()
84 84
85 85 # inherit config file name from parent:
86 86 parent_appname = Unicode(config=True)
87 87 def _parent_appname_changed(self, name, old, new):
88 88 if self.config_file_specified:
89 89 # it was manually specified, ignore
90 90 return
91 91 self.config_file_name = new.replace('-','_') + u'_config.py'
92 92 # don't let this count as specifying the config file
93 93 self.config_file_specified = False
94 94
95 95 # connection info:
96 96 ip = Unicode(LOCALHOST, config=True,
97 97 help="Set the IP or interface on which the kernel will listen.")
98 98 hb_port = Int(0, config=True, help="set the heartbeat port [default: random]")
99 99 shell_port = Int(0, config=True, help="set the shell (XREP) port [default: random]")
100 100 iopub_port = Int(0, config=True, help="set the iopub (PUB) port [default: random]")
101 101 stdin_port = Int(0, config=True, help="set the stdin (XREQ) port [default: random]")
102 102
103 103 # streams, etc.
104 104 no_stdout = Bool(False, config=True, help="redirect stdout to the null device")
105 105 no_stderr = Bool(False, config=True, help="redirect stderr to the null device")
106 106 outstream_class = DottedObjectName('IPython.zmq.iostream.OutStream',
107 107 config=True, help="The importstring for the OutStream factory")
108 108 displayhook_class = DottedObjectName('IPython.zmq.displayhook.ZMQDisplayHook',
109 109 config=True, help="The importstring for the DisplayHook factory")
110 110
111 111 # polling
112 112 parent = Int(0, config=True,
113 113 help="""kill this process if its parent dies. On Windows, the argument
114 114 specifies the HANDLE of the parent process, otherwise it is simply boolean.
115 115 """)
116 116 interrupt = Int(0, config=True,
117 117 help="""ONLY USED ON WINDOWS
118 118 Interrupt this process when the parent is signalled.
119 119 """)
120 120
121 121 def init_crash_handler(self):
122 122 # Install minimal exception handling
123 123 sys.excepthook = FormattedTB(mode='Verbose', color_scheme='NoColor',
124 124 ostream=sys.__stdout__)
125 125
126 126 def init_poller(self):
127 127 if sys.platform == 'win32':
128 128 if self.interrupt or self.parent:
129 129 self.poller = ParentPollerWindows(self.interrupt, self.parent)
130 130 elif self.parent:
131 131 self.poller = ParentPollerUnix()
132 132
133 133 def _bind_socket(self, s, port):
134 134 iface = 'tcp://%s' % self.ip
135 135 if port <= 0:
136 136 port = s.bind_to_random_port(iface)
137 137 else:
138 138 s.bind(iface + ':%i'%port)
139 139 return port
140 140
141 141 def init_sockets(self):
142 142 # Create a context, a session, and the kernel sockets.
143 143 self.log.info("Starting the kernel at pid: %i", os.getpid())
144 144 context = zmq.Context.instance()
145 145 # Uncomment this to try closing the context.
146 146 # atexit.register(context.term)
147 147
148 self.shell_socket = context.socket(zmq.XREP)
148 self.shell_socket = context.socket(zmq.ROUTER)
149 149 self.shell_port = self._bind_socket(self.shell_socket, self.shell_port)
150 self.log.debug("shell XREP Channel on port: %i"%self.shell_port)
150 self.log.debug("shell ROUTER Channel on port: %i"%self.shell_port)
151 151
152 152 self.iopub_socket = context.socket(zmq.PUB)
153 153 self.iopub_port = self._bind_socket(self.iopub_socket, self.iopub_port)
154 154 self.log.debug("iopub PUB Channel on port: %i"%self.iopub_port)
155 155
156 156 self.stdin_socket = context.socket(zmq.XREQ)
157 157 self.stdin_port = self._bind_socket(self.stdin_socket, self.stdin_port)
158 158 self.log.debug("stdin XREQ Channel on port: %i"%self.stdin_port)
159 159
160 160 self.heartbeat = Heartbeat(context, (self.ip, self.hb_port))
161 161 self.hb_port = self.heartbeat.port
162 162 self.log.debug("Heartbeat REP Channel on port: %i"%self.hb_port)
163 163
164 164 # Helper to make it easier to connect to an existing kernel, until we have
165 165 # single-port connection negotiation fully implemented.
166 166 # set log-level to critical, to make sure it is output
167 167 self.log.critical("To connect another client to this kernel, use:")
168 168 self.log.critical("--existing --shell={0} --iopub={1} --stdin={2} --hb={3}".format(
169 169 self.shell_port, self.iopub_port, self.stdin_port, self.hb_port))
170 170
171 171
172 172 self.ports = dict(shell=self.shell_port, iopub=self.iopub_port,
173 173 stdin=self.stdin_port, hb=self.hb_port)
174 174
175 175 def init_session(self):
176 176 """create our session object"""
177 177 self.session = Session(config=self.config, username=u'kernel')
178 178
179 179 def init_blackhole(self):
180 180 """redirects stdout/stderr to devnull if necessary"""
181 181 if self.no_stdout or self.no_stderr:
182 182 blackhole = file(os.devnull, 'w')
183 183 if self.no_stdout:
184 184 sys.stdout = sys.__stdout__ = blackhole
185 185 if self.no_stderr:
186 186 sys.stderr = sys.__stderr__ = blackhole
187 187
188 188 def init_io(self):
189 189 """Redirect input streams and set a display hook."""
190 190 if self.outstream_class:
191 191 outstream_factory = import_item(str(self.outstream_class))
192 192 sys.stdout = outstream_factory(self.session, self.iopub_socket, u'stdout')
193 193 sys.stderr = outstream_factory(self.session, self.iopub_socket, u'stderr')
194 194 if self.displayhook_class:
195 195 displayhook_factory = import_item(str(self.displayhook_class))
196 196 sys.displayhook = displayhook_factory(self.session, self.iopub_socket)
197 197
198 198 def init_kernel(self):
199 199 """Create the Kernel object itself"""
200 200 kernel_factory = import_item(str(self.kernel_class))
201 201 self.kernel = kernel_factory(config=self.config, session=self.session,
202 202 shell_socket=self.shell_socket,
203 203 iopub_socket=self.iopub_socket,
204 204 stdin_socket=self.stdin_socket,
205 205 log=self.log
206 206 )
207 207 self.kernel.record_ports(self.ports)
208 208
209 209 def initialize(self, argv=None):
210 210 super(KernelApp, self).initialize(argv)
211 211 self.init_blackhole()
212 212 self.init_session()
213 213 self.init_poller()
214 214 self.init_sockets()
215 215 self.init_io()
216 216 self.init_kernel()
217 217
218 218 def start(self):
219 219 self.heartbeat.start()
220 220 if self.poller is not None:
221 221 self.poller.start()
222 222 try:
223 223 self.kernel.start()
224 224 except KeyboardInterrupt:
225 225 pass
226 226
@@ -1,983 +1,983 b''
1 1 """Base classes to manage the interaction with a running kernel.
2 2
3 3 TODO
4 4 * Create logger to handle debugging and console messages.
5 5 """
6 6
7 7 #-----------------------------------------------------------------------------
8 8 # Copyright (C) 2008-2010 The IPython Development Team
9 9 #
10 10 # Distributed under the terms of the BSD License. The full license is in
11 11 # the file COPYING, distributed as part of this software.
12 12 #-----------------------------------------------------------------------------
13 13
14 14 #-----------------------------------------------------------------------------
15 15 # Imports
16 16 #-----------------------------------------------------------------------------
17 17
18 18 # Standard library imports.
19 19 import atexit
20 20 import errno
21 21 from Queue import Queue, Empty
22 22 from subprocess import Popen
23 23 import signal
24 24 import sys
25 25 from threading import Thread
26 26 import time
27 27 import logging
28 28
29 29 # System library imports.
30 30 import zmq
31 31 from zmq import POLLIN, POLLOUT, POLLERR
32 32 from zmq.eventloop import ioloop
33 33
34 34 # Local imports.
35 35 from IPython.config.loader import Config
36 36 from IPython.utils import io
37 37 from IPython.utils.localinterfaces import LOCALHOST, LOCAL_IPS
38 38 from IPython.utils.traitlets import HasTraits, Any, Instance, Type, TCPAddress
39 39 from session import Session, Message
40 40
41 41 #-----------------------------------------------------------------------------
42 42 # Constants and exceptions
43 43 #-----------------------------------------------------------------------------
44 44
45 45 class InvalidPortNumber(Exception):
46 46 pass
47 47
48 48 #-----------------------------------------------------------------------------
49 49 # Utility functions
50 50 #-----------------------------------------------------------------------------
51 51
52 52 # some utilities to validate message structure, these might get moved elsewhere
53 53 # if they prove to have more generic utility
54 54
55 55 def validate_string_list(lst):
56 56 """Validate that the input is a list of strings.
57 57
58 58 Raises ValueError if not."""
59 59 if not isinstance(lst, list):
60 60 raise ValueError('input %r must be a list' % lst)
61 61 for x in lst:
62 62 if not isinstance(x, basestring):
63 63 raise ValueError('element %r in list must be a string' % x)
64 64
65 65
66 66 def validate_string_dict(dct):
67 67 """Validate that the input is a dict with string keys and values.
68 68
69 69 Raises ValueError if not."""
70 70 for k,v in dct.iteritems():
71 71 if not isinstance(k, basestring):
72 72 raise ValueError('key %r in dict must be a string' % k)
73 73 if not isinstance(v, basestring):
74 74 raise ValueError('value %r in dict must be a string' % v)
75 75
76 76
77 77 #-----------------------------------------------------------------------------
78 78 # ZMQ Socket Channel classes
79 79 #-----------------------------------------------------------------------------
80 80
81 81 class ZMQSocketChannel(Thread):
82 82 """The base class for the channels that use ZMQ sockets.
83 83 """
84 84 context = None
85 85 session = None
86 86 socket = None
87 87 ioloop = None
88 88 iostate = None
89 89 _address = None
90 90
91 91 def __init__(self, context, session, address):
92 92 """Create a channel
93 93
94 94 Parameters
95 95 ----------
96 96 context : :class:`zmq.Context`
97 97 The ZMQ context to use.
98 98 session : :class:`session.Session`
99 99 The session to use.
100 100 address : tuple
101 101 Standard (ip, port) tuple that the kernel is listening on.
102 102 """
103 103 super(ZMQSocketChannel, self).__init__()
104 104 self.daemon = True
105 105
106 106 self.context = context
107 107 self.session = session
108 108 if address[1] == 0:
109 109 message = 'The port number for a channel cannot be 0.'
110 110 raise InvalidPortNumber(message)
111 111 self._address = address
112 112
113 113 def _run_loop(self):
114 114 """Run my loop, ignoring EINTR events in the poller"""
115 115 while True:
116 116 try:
117 117 self.ioloop.start()
118 118 except zmq.ZMQError as e:
119 119 if e.errno == errno.EINTR:
120 120 continue
121 121 else:
122 122 raise
123 123 else:
124 124 break
125 125
126 126 def stop(self):
127 127 """Stop the channel's activity.
128 128
129 129 This calls :method:`Thread.join` and returns when the thread
130 130 terminates. :class:`RuntimeError` will be raised if
131 131 :method:`self.start` is called again.
132 132 """
133 133 self.join()
134 134
135 135 @property
136 136 def address(self):
137 137 """Get the channel's address as an (ip, port) tuple.
138 138
139 139 By the default, the address is (localhost, 0), where 0 means a random
140 140 port.
141 141 """
142 142 return self._address
143 143
144 144 def add_io_state(self, state):
145 145 """Add IO state to the eventloop.
146 146
147 147 Parameters
148 148 ----------
149 149 state : zmq.POLLIN|zmq.POLLOUT|zmq.POLLERR
150 150 The IO state flag to set.
151 151
152 152 This is thread safe as it uses the thread safe IOLoop.add_callback.
153 153 """
154 154 def add_io_state_callback():
155 155 if not self.iostate & state:
156 156 self.iostate = self.iostate | state
157 157 self.ioloop.update_handler(self.socket, self.iostate)
158 158 self.ioloop.add_callback(add_io_state_callback)
159 159
160 160 def drop_io_state(self, state):
161 161 """Drop IO state from the eventloop.
162 162
163 163 Parameters
164 164 ----------
165 165 state : zmq.POLLIN|zmq.POLLOUT|zmq.POLLERR
166 166 The IO state flag to set.
167 167
168 168 This is thread safe as it uses the thread safe IOLoop.add_callback.
169 169 """
170 170 def drop_io_state_callback():
171 171 if self.iostate & state:
172 172 self.iostate = self.iostate & (~state)
173 173 self.ioloop.update_handler(self.socket, self.iostate)
174 174 self.ioloop.add_callback(drop_io_state_callback)
175 175
176 176
177 177 class ShellSocketChannel(ZMQSocketChannel):
178 178 """The XREQ channel for issues request/replies to the kernel.
179 179 """
180 180
181 181 command_queue = None
182 182
183 183 def __init__(self, context, session, address):
184 184 super(ShellSocketChannel, self).__init__(context, session, address)
185 185 self.command_queue = Queue()
186 186 self.ioloop = ioloop.IOLoop()
187 187
188 188 def run(self):
189 189 """The thread's main activity. Call start() instead."""
190 self.socket = self.context.socket(zmq.XREQ)
190 self.socket = self.context.socket(zmq.DEALER)
191 191 self.socket.setsockopt(zmq.IDENTITY, self.session.session)
192 192 self.socket.connect('tcp://%s:%i' % self.address)
193 193 self.iostate = POLLERR|POLLIN
194 194 self.ioloop.add_handler(self.socket, self._handle_events,
195 195 self.iostate)
196 196 self._run_loop()
197 197
198 198 def stop(self):
199 199 self.ioloop.stop()
200 200 super(ShellSocketChannel, self).stop()
201 201
202 202 def call_handlers(self, msg):
203 203 """This method is called in the ioloop thread when a message arrives.
204 204
205 205 Subclasses should override this method to handle incoming messages.
206 206 It is important to remember that this method is called in the thread
207 207 so that some logic must be done to ensure that the application leve
208 208 handlers are called in the application thread.
209 209 """
210 210 raise NotImplementedError('call_handlers must be defined in a subclass.')
211 211
212 212 def execute(self, code, silent=False,
213 213 user_variables=None, user_expressions=None):
214 214 """Execute code in the kernel.
215 215
216 216 Parameters
217 217 ----------
218 218 code : str
219 219 A string of Python code.
220 220
221 221 silent : bool, optional (default False)
222 222 If set, the kernel will execute the code as quietly possible.
223 223
224 224 user_variables : list, optional
225 225 A list of variable names to pull from the user's namespace. They
226 226 will come back as a dict with these names as keys and their
227 227 :func:`repr` as values.
228 228
229 229 user_expressions : dict, optional
230 230 A dict with string keys and to pull from the user's
231 231 namespace. They will come back as a dict with these names as keys
232 232 and their :func:`repr` as values.
233 233
234 234 Returns
235 235 -------
236 236 The msg_id of the message sent.
237 237 """
238 238 if user_variables is None:
239 239 user_variables = []
240 240 if user_expressions is None:
241 241 user_expressions = {}
242 242
243 243 # Don't waste network traffic if inputs are invalid
244 244 if not isinstance(code, basestring):
245 245 raise ValueError('code %r must be a string' % code)
246 246 validate_string_list(user_variables)
247 247 validate_string_dict(user_expressions)
248 248
249 249 # Create class for content/msg creation. Related to, but possibly
250 250 # not in Session.
251 251 content = dict(code=code, silent=silent,
252 252 user_variables=user_variables,
253 253 user_expressions=user_expressions)
254 254 msg = self.session.msg('execute_request', content)
255 255 self._queue_request(msg)
256 256 return msg['header']['msg_id']
257 257
258 258 def complete(self, text, line, cursor_pos, block=None):
259 259 """Tab complete text in the kernel's namespace.
260 260
261 261 Parameters
262 262 ----------
263 263 text : str
264 264 The text to complete.
265 265 line : str
266 266 The full line of text that is the surrounding context for the
267 267 text to complete.
268 268 cursor_pos : int
269 269 The position of the cursor in the line where the completion was
270 270 requested.
271 271 block : str, optional
272 272 The full block of code in which the completion is being requested.
273 273
274 274 Returns
275 275 -------
276 276 The msg_id of the message sent.
277 277 """
278 278 content = dict(text=text, line=line, block=block, cursor_pos=cursor_pos)
279 279 msg = self.session.msg('complete_request', content)
280 280 self._queue_request(msg)
281 281 return msg['header']['msg_id']
282 282
283 283 def object_info(self, oname):
284 284 """Get metadata information about an object.
285 285
286 286 Parameters
287 287 ----------
288 288 oname : str
289 289 A string specifying the object name.
290 290
291 291 Returns
292 292 -------
293 293 The msg_id of the message sent.
294 294 """
295 295 content = dict(oname=oname)
296 296 msg = self.session.msg('object_info_request', content)
297 297 self._queue_request(msg)
298 298 return msg['header']['msg_id']
299 299
300 300 def history(self, raw=True, output=False, hist_access_type='range', **kwargs):
301 301 """Get entries from the history list.
302 302
303 303 Parameters
304 304 ----------
305 305 raw : bool
306 306 If True, return the raw input.
307 307 output : bool
308 308 If True, then return the output as well.
309 309 hist_access_type : str
310 310 'range' (fill in session, start and stop params), 'tail' (fill in n)
311 311 or 'search' (fill in pattern param).
312 312
313 313 session : int
314 314 For a range request, the session from which to get lines. Session
315 315 numbers are positive integers; negative ones count back from the
316 316 current session.
317 317 start : int
318 318 The first line number of a history range.
319 319 stop : int
320 320 The final (excluded) line number of a history range.
321 321
322 322 n : int
323 323 The number of lines of history to get for a tail request.
324 324
325 325 pattern : str
326 326 The glob-syntax pattern for a search request.
327 327
328 328 Returns
329 329 -------
330 330 The msg_id of the message sent.
331 331 """
332 332 content = dict(raw=raw, output=output, hist_access_type=hist_access_type,
333 333 **kwargs)
334 334 msg = self.session.msg('history_request', content)
335 335 self._queue_request(msg)
336 336 return msg['header']['msg_id']
337 337
338 338 def shutdown(self, restart=False):
339 339 """Request an immediate kernel shutdown.
340 340
341 341 Upon receipt of the (empty) reply, client code can safely assume that
342 342 the kernel has shut down and it's safe to forcefully terminate it if
343 343 it's still alive.
344 344
345 345 The kernel will send the reply via a function registered with Python's
346 346 atexit module, ensuring it's truly done as the kernel is done with all
347 347 normal operation.
348 348 """
349 349 # Send quit message to kernel. Once we implement kernel-side setattr,
350 350 # this should probably be done that way, but for now this will do.
351 351 msg = self.session.msg('shutdown_request', {'restart':restart})
352 352 self._queue_request(msg)
353 353 return msg['header']['msg_id']
354 354
355 355 def _handle_events(self, socket, events):
356 356 if events & POLLERR:
357 357 self._handle_err()
358 358 if events & POLLOUT:
359 359 self._handle_send()
360 360 if events & POLLIN:
361 361 self._handle_recv()
362 362
363 363 def _handle_recv(self):
364 364 ident,msg = self.session.recv(self.socket, 0)
365 365 self.call_handlers(msg)
366 366
367 367 def _handle_send(self):
368 368 try:
369 369 msg = self.command_queue.get(False)
370 370 except Empty:
371 371 pass
372 372 else:
373 373 self.session.send(self.socket,msg)
374 374 if self.command_queue.empty():
375 375 self.drop_io_state(POLLOUT)
376 376
377 377 def _handle_err(self):
378 378 # We don't want to let this go silently, so eventually we should log.
379 379 raise zmq.ZMQError()
380 380
381 381 def _queue_request(self, msg):
382 382 self.command_queue.put(msg)
383 383 self.add_io_state(POLLOUT)
384 384
385 385
386 386 class SubSocketChannel(ZMQSocketChannel):
387 387 """The SUB channel which listens for messages that the kernel publishes.
388 388 """
389 389
390 390 def __init__(self, context, session, address):
391 391 super(SubSocketChannel, self).__init__(context, session, address)
392 392 self.ioloop = ioloop.IOLoop()
393 393
394 394 def run(self):
395 395 """The thread's main activity. Call start() instead."""
396 396 self.socket = self.context.socket(zmq.SUB)
397 397 self.socket.setsockopt(zmq.SUBSCRIBE,'')
398 398 self.socket.setsockopt(zmq.IDENTITY, self.session.session)
399 399 self.socket.connect('tcp://%s:%i' % self.address)
400 400 self.iostate = POLLIN|POLLERR
401 401 self.ioloop.add_handler(self.socket, self._handle_events,
402 402 self.iostate)
403 403 self._run_loop()
404 404
405 405 def stop(self):
406 406 self.ioloop.stop()
407 407 super(SubSocketChannel, self).stop()
408 408
409 409 def call_handlers(self, msg):
410 410 """This method is called in the ioloop thread when a message arrives.
411 411
412 412 Subclasses should override this method to handle incoming messages.
413 413 It is important to remember that this method is called in the thread
414 414 so that some logic must be done to ensure that the application leve
415 415 handlers are called in the application thread.
416 416 """
417 417 raise NotImplementedError('call_handlers must be defined in a subclass.')
418 418
419 419 def flush(self, timeout=1.0):
420 420 """Immediately processes all pending messages on the SUB channel.
421 421
422 422 Callers should use this method to ensure that :method:`call_handlers`
423 423 has been called for all messages that have been received on the
424 424 0MQ SUB socket of this channel.
425 425
426 426 This method is thread safe.
427 427
428 428 Parameters
429 429 ----------
430 430 timeout : float, optional
431 431 The maximum amount of time to spend flushing, in seconds. The
432 432 default is one second.
433 433 """
434 434 # We do the IOLoop callback process twice to ensure that the IOLoop
435 435 # gets to perform at least one full poll.
436 436 stop_time = time.time() + timeout
437 437 for i in xrange(2):
438 438 self._flushed = False
439 439 self.ioloop.add_callback(self._flush)
440 440 while not self._flushed and time.time() < stop_time:
441 441 time.sleep(0.01)
442 442
443 443 def _handle_events(self, socket, events):
444 444 # Turn on and off POLLOUT depending on if we have made a request
445 445 if events & POLLERR:
446 446 self._handle_err()
447 447 if events & POLLIN:
448 448 self._handle_recv()
449 449
450 450 def _handle_err(self):
451 451 # We don't want to let this go silently, so eventually we should log.
452 452 raise zmq.ZMQError()
453 453
454 454 def _handle_recv(self):
455 455 # Get all of the messages we can
456 456 while True:
457 457 try:
458 458 ident,msg = self.session.recv(self.socket)
459 459 except zmq.ZMQError:
460 460 # Check the errno?
461 461 # Will this trigger POLLERR?
462 462 break
463 463 else:
464 464 if msg is None:
465 465 break
466 466 self.call_handlers(msg)
467 467
468 468 def _flush(self):
469 469 """Callback for :method:`self.flush`."""
470 470 self._flushed = True
471 471
472 472
473 473 class StdInSocketChannel(ZMQSocketChannel):
474 474 """A reply channel to handle raw_input requests that the kernel makes."""
475 475
476 476 msg_queue = None
477 477
478 478 def __init__(self, context, session, address):
479 479 super(StdInSocketChannel, self).__init__(context, session, address)
480 480 self.ioloop = ioloop.IOLoop()
481 481 self.msg_queue = Queue()
482 482
483 483 def run(self):
484 484 """The thread's main activity. Call start() instead."""
485 self.socket = self.context.socket(zmq.XREQ)
485 self.socket = self.context.socket(zmq.DEALER)
486 486 self.socket.setsockopt(zmq.IDENTITY, self.session.session)
487 487 self.socket.connect('tcp://%s:%i' % self.address)
488 488 self.iostate = POLLERR|POLLIN
489 489 self.ioloop.add_handler(self.socket, self._handle_events,
490 490 self.iostate)
491 491 self._run_loop()
492 492
493 493 def stop(self):
494 494 self.ioloop.stop()
495 495 super(StdInSocketChannel, self).stop()
496 496
497 497 def call_handlers(self, msg):
498 498 """This method is called in the ioloop thread when a message arrives.
499 499
500 500 Subclasses should override this method to handle incoming messages.
501 501 It is important to remember that this method is called in the thread
502 502 so that some logic must be done to ensure that the application leve
503 503 handlers are called in the application thread.
504 504 """
505 505 raise NotImplementedError('call_handlers must be defined in a subclass.')
506 506
507 507 def input(self, string):
508 508 """Send a string of raw input to the kernel."""
509 509 content = dict(value=string)
510 510 msg = self.session.msg('input_reply', content)
511 511 self._queue_reply(msg)
512 512
513 513 def _handle_events(self, socket, events):
514 514 if events & POLLERR:
515 515 self._handle_err()
516 516 if events & POLLOUT:
517 517 self._handle_send()
518 518 if events & POLLIN:
519 519 self._handle_recv()
520 520
521 521 def _handle_recv(self):
522 522 ident,msg = self.session.recv(self.socket, 0)
523 523 self.call_handlers(msg)
524 524
525 525 def _handle_send(self):
526 526 try:
527 527 msg = self.msg_queue.get(False)
528 528 except Empty:
529 529 pass
530 530 else:
531 531 self.session.send(self.socket,msg)
532 532 if self.msg_queue.empty():
533 533 self.drop_io_state(POLLOUT)
534 534
535 535 def _handle_err(self):
536 536 # We don't want to let this go silently, so eventually we should log.
537 537 raise zmq.ZMQError()
538 538
539 539 def _queue_reply(self, msg):
540 540 self.msg_queue.put(msg)
541 541 self.add_io_state(POLLOUT)
542 542
543 543
544 544 class HBSocketChannel(ZMQSocketChannel):
545 545 """The heartbeat channel which monitors the kernel heartbeat.
546 546
547 547 Note that the heartbeat channel is paused by default. As long as you start
548 548 this channel, the kernel manager will ensure that it is paused and un-paused
549 549 as appropriate.
550 550 """
551 551
552 552 time_to_dead = 3.0
553 553 socket = None
554 554 poller = None
555 555 _running = None
556 556 _pause = None
557 557
558 558 def __init__(self, context, session, address):
559 559 super(HBSocketChannel, self).__init__(context, session, address)
560 560 self._running = False
561 561 self._pause = True
562 562
563 563 def _create_socket(self):
564 564 self.socket = self.context.socket(zmq.REQ)
565 565 self.socket.setsockopt(zmq.IDENTITY, self.session.session)
566 566 self.socket.connect('tcp://%s:%i' % self.address)
567 567 self.poller = zmq.Poller()
568 568 self.poller.register(self.socket, zmq.POLLIN)
569 569
570 570 def run(self):
571 571 """The thread's main activity. Call start() instead."""
572 572 self._create_socket()
573 573 self._running = True
574 574 while self._running:
575 575 if self._pause:
576 576 time.sleep(self.time_to_dead)
577 577 else:
578 578 since_last_heartbeat = 0.0
579 579 request_time = time.time()
580 580 try:
581 581 #io.rprint('Ping from HB channel') # dbg
582 582 self.socket.send(b'ping')
583 583 except zmq.ZMQError, e:
584 584 #io.rprint('*** HB Error:', e) # dbg
585 585 if e.errno == zmq.EFSM:
586 586 #io.rprint('sleep...', self.time_to_dead) # dbg
587 587 time.sleep(self.time_to_dead)
588 588 self._create_socket()
589 589 else:
590 590 raise
591 591 else:
592 592 while True:
593 593 try:
594 594 self.socket.recv(zmq.NOBLOCK)
595 595 except zmq.ZMQError, e:
596 596 #io.rprint('*** HB Error 2:', e) # dbg
597 597 if e.errno == zmq.EAGAIN:
598 598 before_poll = time.time()
599 599 until_dead = self.time_to_dead - (before_poll -
600 600 request_time)
601 601
602 602 # When the return value of poll() is an empty
603 603 # list, that is when things have gone wrong
604 604 # (zeromq bug). As long as it is not an empty
605 605 # list, poll is working correctly even if it
606 606 # returns quickly. Note: poll timeout is in
607 607 # milliseconds.
608 608 if until_dead > 0.0:
609 609 while True:
610 610 try:
611 611 self.poller.poll(1000 * until_dead)
612 612 except zmq.ZMQError as e:
613 613 if e.errno == errno.EINTR:
614 614 continue
615 615 else:
616 616 raise
617 617 else:
618 618 break
619 619
620 620 since_last_heartbeat = time.time()-request_time
621 621 if since_last_heartbeat > self.time_to_dead:
622 622 self.call_handlers(since_last_heartbeat)
623 623 break
624 624 else:
625 625 # FIXME: We should probably log this instead.
626 626 raise
627 627 else:
628 628 until_dead = self.time_to_dead - (time.time() -
629 629 request_time)
630 630 if until_dead > 0.0:
631 631 #io.rprint('sleep...', self.time_to_dead) # dbg
632 632 time.sleep(until_dead)
633 633 break
634 634
635 635 def pause(self):
636 636 """Pause the heartbeat."""
637 637 self._pause = True
638 638
639 639 def unpause(self):
640 640 """Unpause the heartbeat."""
641 641 self._pause = False
642 642
643 643 def is_beating(self):
644 644 """Is the heartbeat running and not paused."""
645 645 if self.is_alive() and not self._pause:
646 646 return True
647 647 else:
648 648 return False
649 649
650 650 def stop(self):
651 651 self._running = False
652 652 super(HBSocketChannel, self).stop()
653 653
654 654 def call_handlers(self, since_last_heartbeat):
655 655 """This method is called in the ioloop thread when a message arrives.
656 656
657 657 Subclasses should override this method to handle incoming messages.
658 658 It is important to remember that this method is called in the thread
659 659 so that some logic must be done to ensure that the application leve
660 660 handlers are called in the application thread.
661 661 """
662 662 raise NotImplementedError('call_handlers must be defined in a subclass.')
663 663
664 664
665 665 #-----------------------------------------------------------------------------
666 666 # Main kernel manager class
667 667 #-----------------------------------------------------------------------------
668 668
669 669 class KernelManager(HasTraits):
670 670 """ Manages a kernel for a frontend.
671 671
672 672 The SUB channel is for the frontend to receive messages published by the
673 673 kernel.
674 674
675 675 The REQ channel is for the frontend to make requests of the kernel.
676 676
677 677 The REP channel is for the kernel to request stdin (raw_input) from the
678 678 frontend.
679 679 """
680 680 # config object for passing to child configurables
681 681 config = Instance(Config)
682 682
683 683 # The PyZMQ Context to use for communication with the kernel.
684 684 context = Instance(zmq.Context)
685 685 def _context_default(self):
686 686 return zmq.Context.instance()
687 687
688 688 # The Session to use for communication with the kernel.
689 689 session = Instance(Session)
690 690
691 691 # The kernel process with which the KernelManager is communicating.
692 692 kernel = Instance(Popen)
693 693
694 694 # The addresses for the communication channels.
695 695 shell_address = TCPAddress((LOCALHOST, 0))
696 696 sub_address = TCPAddress((LOCALHOST, 0))
697 697 stdin_address = TCPAddress((LOCALHOST, 0))
698 698 hb_address = TCPAddress((LOCALHOST, 0))
699 699
700 700 # The classes to use for the various channels.
701 701 shell_channel_class = Type(ShellSocketChannel)
702 702 sub_channel_class = Type(SubSocketChannel)
703 703 stdin_channel_class = Type(StdInSocketChannel)
704 704 hb_channel_class = Type(HBSocketChannel)
705 705
706 706 # Protected traits.
707 707 _launch_args = Any
708 708 _shell_channel = Any
709 709 _sub_channel = Any
710 710 _stdin_channel = Any
711 711 _hb_channel = Any
712 712
713 713 def __init__(self, **kwargs):
714 714 super(KernelManager, self).__init__(**kwargs)
715 715 if self.session is None:
716 716 self.session = Session(config=self.config)
717 717 # Uncomment this to try closing the context.
718 718 # atexit.register(self.context.term)
719 719
720 720 #--------------------------------------------------------------------------
721 721 # Channel management methods:
722 722 #--------------------------------------------------------------------------
723 723
724 724 def start_channels(self, shell=True, sub=True, stdin=True, hb=True):
725 725 """Starts the channels for this kernel.
726 726
727 727 This will create the channels if they do not exist and then start
728 728 them. If port numbers of 0 are being used (random ports) then you
729 729 must first call :method:`start_kernel`. If the channels have been
730 730 stopped and you call this, :class:`RuntimeError` will be raised.
731 731 """
732 732 if shell:
733 733 self.shell_channel.start()
734 734 if sub:
735 735 self.sub_channel.start()
736 736 if stdin:
737 737 self.stdin_channel.start()
738 738 if hb:
739 739 self.hb_channel.start()
740 740
741 741 def stop_channels(self):
742 742 """Stops all the running channels for this kernel.
743 743 """
744 744 if self.shell_channel.is_alive():
745 745 self.shell_channel.stop()
746 746 if self.sub_channel.is_alive():
747 747 self.sub_channel.stop()
748 748 if self.stdin_channel.is_alive():
749 749 self.stdin_channel.stop()
750 750 if self.hb_channel.is_alive():
751 751 self.hb_channel.stop()
752 752
753 753 @property
754 754 def channels_running(self):
755 755 """Are any of the channels created and running?"""
756 756 return (self.shell_channel.is_alive() or self.sub_channel.is_alive() or
757 757 self.stdin_channel.is_alive() or self.hb_channel.is_alive())
758 758
759 759 #--------------------------------------------------------------------------
760 760 # Kernel process management methods:
761 761 #--------------------------------------------------------------------------
762 762
763 763 def start_kernel(self, **kw):
764 764 """Starts a kernel process and configures the manager to use it.
765 765
766 766 If random ports (port=0) are being used, this method must be called
767 767 before the channels are created.
768 768
769 769 Parameters:
770 770 -----------
771 771 ipython : bool, optional (default True)
772 772 Whether to use an IPython kernel instead of a plain Python kernel.
773 773
774 774 launcher : callable, optional (default None)
775 775 A custom function for launching the kernel process (generally a
776 776 wrapper around ``entry_point.base_launch_kernel``). In most cases,
777 777 it should not be necessary to use this parameter.
778 778
779 779 **kw : optional
780 780 See respective options for IPython and Python kernels.
781 781 """
782 782 shell, sub, stdin, hb = self.shell_address, self.sub_address, \
783 783 self.stdin_address, self.hb_address
784 784 if shell[0] not in LOCAL_IPS or sub[0] not in LOCAL_IPS or \
785 785 stdin[0] not in LOCAL_IPS or hb[0] not in LOCAL_IPS:
786 786 raise RuntimeError("Can only launch a kernel on a local interface. "
787 787 "Make sure that the '*_address' attributes are "
788 788 "configured properly. "
789 789 "Currently valid addresses are: %s"%LOCAL_IPS
790 790 )
791 791
792 792 self._launch_args = kw.copy()
793 793 launch_kernel = kw.pop('launcher', None)
794 794 if launch_kernel is None:
795 795 if kw.pop('ipython', True):
796 796 from ipkernel import launch_kernel
797 797 else:
798 798 from pykernel import launch_kernel
799 799 self.kernel, xrep, pub, req, _hb = launch_kernel(
800 800 shell_port=shell[1], iopub_port=sub[1],
801 801 stdin_port=stdin[1], hb_port=hb[1], **kw)
802 802 self.shell_address = (shell[0], xrep)
803 803 self.sub_address = (sub[0], pub)
804 804 self.stdin_address = (stdin[0], req)
805 805 self.hb_address = (hb[0], _hb)
806 806
807 807 def shutdown_kernel(self, restart=False):
808 808 """ Attempts to the stop the kernel process cleanly. If the kernel
809 809 cannot be stopped, it is killed, if possible.
810 810 """
811 811 # FIXME: Shutdown does not work on Windows due to ZMQ errors!
812 812 if sys.platform == 'win32':
813 813 self.kill_kernel()
814 814 return
815 815
816 816 # Pause the heart beat channel if it exists.
817 817 if self._hb_channel is not None:
818 818 self._hb_channel.pause()
819 819
820 820 # Don't send any additional kernel kill messages immediately, to give
821 821 # the kernel a chance to properly execute shutdown actions. Wait for at
822 822 # most 1s, checking every 0.1s.
823 823 self.shell_channel.shutdown(restart=restart)
824 824 for i in range(10):
825 825 if self.is_alive:
826 826 time.sleep(0.1)
827 827 else:
828 828 break
829 829 else:
830 830 # OK, we've waited long enough.
831 831 if self.has_kernel:
832 832 self.kill_kernel()
833 833
834 834 def restart_kernel(self, now=False, **kw):
835 835 """Restarts a kernel with the arguments that were used to launch it.
836 836
837 837 If the old kernel was launched with random ports, the same ports will be
838 838 used for the new kernel.
839 839
840 840 Parameters
841 841 ----------
842 842 now : bool, optional
843 843 If True, the kernel is forcefully restarted *immediately*, without
844 844 having a chance to do any cleanup action. Otherwise the kernel is
845 845 given 1s to clean up before a forceful restart is issued.
846 846
847 847 In all cases the kernel is restarted, the only difference is whether
848 848 it is given a chance to perform a clean shutdown or not.
849 849
850 850 **kw : optional
851 851 Any options specified here will replace those used to launch the
852 852 kernel.
853 853 """
854 854 if self._launch_args is None:
855 855 raise RuntimeError("Cannot restart the kernel. "
856 856 "No previous call to 'start_kernel'.")
857 857 else:
858 858 # Stop currently running kernel.
859 859 if self.has_kernel:
860 860 if now:
861 861 self.kill_kernel()
862 862 else:
863 863 self.shutdown_kernel(restart=True)
864 864
865 865 # Start new kernel.
866 866 self._launch_args.update(kw)
867 867 self.start_kernel(**self._launch_args)
868 868
869 869 # FIXME: Messages get dropped in Windows due to probable ZMQ bug
870 870 # unless there is some delay here.
871 871 if sys.platform == 'win32':
872 872 time.sleep(0.2)
873 873
874 874 @property
875 875 def has_kernel(self):
876 876 """Returns whether a kernel process has been specified for the kernel
877 877 manager.
878 878 """
879 879 return self.kernel is not None
880 880
881 881 def kill_kernel(self):
882 882 """ Kill the running kernel. """
883 883 if self.has_kernel:
884 884 # Pause the heart beat channel if it exists.
885 885 if self._hb_channel is not None:
886 886 self._hb_channel.pause()
887 887
888 888 # Attempt to kill the kernel.
889 889 try:
890 890 self.kernel.kill()
891 891 except OSError, e:
892 892 # In Windows, we will get an Access Denied error if the process
893 893 # has already terminated. Ignore it.
894 894 if sys.platform == 'win32':
895 895 if e.winerror != 5:
896 896 raise
897 897 # On Unix, we may get an ESRCH error if the process has already
898 898 # terminated. Ignore it.
899 899 else:
900 900 from errno import ESRCH
901 901 if e.errno != ESRCH:
902 902 raise
903 903 self.kernel = None
904 904 else:
905 905 raise RuntimeError("Cannot kill kernel. No kernel is running!")
906 906
907 907 def interrupt_kernel(self):
908 908 """ Interrupts the kernel. Unlike ``signal_kernel``, this operation is
909 909 well supported on all platforms.
910 910 """
911 911 if self.has_kernel:
912 912 if sys.platform == 'win32':
913 913 from parentpoller import ParentPollerWindows as Poller
914 914 Poller.send_interrupt(self.kernel.win32_interrupt_event)
915 915 else:
916 916 self.kernel.send_signal(signal.SIGINT)
917 917 else:
918 918 raise RuntimeError("Cannot interrupt kernel. No kernel is running!")
919 919
920 920 def signal_kernel(self, signum):
921 921 """ Sends a signal to the kernel. Note that since only SIGTERM is
922 922 supported on Windows, this function is only useful on Unix systems.
923 923 """
924 924 if self.has_kernel:
925 925 self.kernel.send_signal(signum)
926 926 else:
927 927 raise RuntimeError("Cannot signal kernel. No kernel is running!")
928 928
929 929 @property
930 930 def is_alive(self):
931 931 """Is the kernel process still running?"""
932 932 # FIXME: not using a heartbeat means this method is broken for any
933 933 # remote kernel, it's only capable of handling local kernels.
934 934 if self.has_kernel:
935 935 if self.kernel.poll() is None:
936 936 return True
937 937 else:
938 938 return False
939 939 else:
940 940 # We didn't start the kernel with this KernelManager so we don't
941 941 # know if it is running. We should use a heartbeat for this case.
942 942 return True
943 943
944 944 #--------------------------------------------------------------------------
945 945 # Channels used for communication with the kernel:
946 946 #--------------------------------------------------------------------------
947 947
948 948 @property
949 949 def shell_channel(self):
950 950 """Get the REQ socket channel object to make requests of the kernel."""
951 951 if self._shell_channel is None:
952 952 self._shell_channel = self.shell_channel_class(self.context,
953 953 self.session,
954 954 self.shell_address)
955 955 return self._shell_channel
956 956
957 957 @property
958 958 def sub_channel(self):
959 959 """Get the SUB socket channel object."""
960 960 if self._sub_channel is None:
961 961 self._sub_channel = self.sub_channel_class(self.context,
962 962 self.session,
963 963 self.sub_address)
964 964 return self._sub_channel
965 965
966 966 @property
967 967 def stdin_channel(self):
968 968 """Get the REP socket channel object to handle stdin (raw_input)."""
969 969 if self._stdin_channel is None:
970 970 self._stdin_channel = self.stdin_channel_class(self.context,
971 971 self.session,
972 972 self.stdin_address)
973 973 return self._stdin_channel
974 974
975 975 @property
976 976 def hb_channel(self):
977 977 """Get the heartbeat socket channel object to check that the
978 978 kernel is alive."""
979 979 if self._hb_channel is None:
980 980 self._hb_channel = self.hb_channel_class(self.context,
981 981 self.session,
982 982 self.hb_address)
983 983 return self._hb_channel
@@ -1,114 +1,114 b''
1 1 .. _ipython_qt:
2 2
3 3 ====================
4 4 IPython Qt interface
5 5 ====================
6 6
7 7 Abstract
8 8 --------
9 9
10 10 This is about the implementation of a Qt-based Graphical User Interface (GUI)
11 11 to execute Python code with an interpreter that runs in a separate process and
12 12 the two systems (GUI frontend and interpreter kernel) communicating via the
13 13 ZeroMQ Messaging library. The bulk of the implementation will be done without
14 14 dependencies on IPython (only on Zmq). Once the key features are ready,
15 15 IPython-specific features can be added using the IPython codebase.
16 16
17 17
18 18 Project details
19 19 ---------------
20 20
21 21 For a long time there has been demand for a graphical user interface for
22 22 IPython, and the project already ships Wx-based prototypes thereof. But these
23 23 run all code in a single process, making them extremely brittle, as a crash of
24 24 the Python interpreter kills the entire user session. Here I propose to build
25 25 a Qt-based GUI that will communicate with a separate process for the code
26 26 execution, so that if the interpreter kernel dies, the frontend can continue to
27 27 function after restarting a new kernel (and offering the user the option to
28 28 re-execute all inputs, which the frontend can know).
29 29
30 30 This GUI will allow for the easy editing of multi-line input and the convenient
31 31 re-editing of previous blocks of input, which can be displayed in a 2-d
32 32 workspace instead of a line-driven one like today's IPython. This makes it much
33 33 easier to incrementally build and tune a code, by combining the rapid feedback
34 34 cycle of IPython with the ability to edit multiline code with good graphical
35 35 support.
36 36
37 37
38 38 2-process model pyzmq base
39 39 ~~~~~~~~~~~~~~~~~~~~~~~~~~
40 40
41 41 Since the necessity of a user to keep his data safe, the design is based in a
42 42 2-process model that will be achieved with a simple client/server system with
43 43 `pyzmq <http://www.zeromq.org/bindings:python>`_, so the GUI session do not
44 44 crash if the the kernel process does. This will be achieved using this test
45 45 `code
46 46 <http://github.com/fperez/pyzmq/blob/completer/examples/kernel/kernel.py>`_ and
47 47 customizing it to the necessities of the GUI such as queue management with
48 48 discrimination for different frontends connected to the same kernel and tab
49 49 completion. A piece of drafted code for the kernel (server) should look like
50 50 this::
51 51
52 52 def main():
53 53 c = zmq.Context(1, 1)
54 54 rep_conn = connection % port_base
55 55 pub_conn = connection % (port_base+1)
56 56 print >>sys.__stdout__, "Starting the kernel..."
57 57 print >>sys.__stdout__, "On:",rep_conn, pub_conn
58 58 session = Session(username=u'kernel')
59 reply_socket = c.socket(zmq.XREP)
59 reply_socket = c.socket(zmq.ROUTER)
60 60 reply_socket.bind(rep_conn)
61 61 pub_socket = c.socket(zmq.PUB)
62 62 pub_socket.bind(pub_conn)
63 63 stdout = OutStream(session, pub_socket, u'stdout')
64 64 stderr = OutStream(session, pub_socket, u'stderr')
65 65 sys.stdout = stdout
66 66 sys.stderr = stderr
67 67 display_hook = DisplayHook(session, pub_socket)
68 68 sys.displayhook = display_hook
69 69 kernel = Kernel(session, reply_socket, pub_socket)
70 70
71 71 This kernel will use two queues (output and input), the input queue will have
72 72 the id of the process(frontend) making the request, type(execute, complete,
73 73 help, etc) and id of the request itself and the string of code to be executed,
74 74 the output queue will have basically the same information just that the string
75 75 is the to be displayed. This model is because the kernel needs to maintain
76 76 control of timeouts when multiple requests are sent and keep them indexed.
77 77
78 78 Qt based GUI
79 79 ~~~~~~~~~~~~
80 80
81 81 Design of the interface is going to be based in cells of code executed on the
82 82 previous defined kernel. It will also have GUI facilities such toolboxes,
83 83 tooltips to autocomplete code and function summary, highlighting and
84 84 autoindentation. It will have the cell kind of multiline edition mode so each
85 85 block of code can be edited and executed independently, this can be achieved
86 86 queuing QTextEdit objects (the cell) giving them format so we can discriminate
87 87 outputs from inputs. One of the main characteristics will be the debug support
88 88 that will show the requested outputs as the debugger (that will be on a popup
89 89 widget) "walks" through the code, this design is to be reviewed with the
90 90 mentor. `This <http://gfif.udea.edu.co/IPythonQt_snapshot.png>`_ is a
91 91 tentative view of the main window.
92 92
93 93 The GUI will check continuously the output queue from the kernel for new
94 94 information to handle. This information have to be handled with care since any
95 95 output will come at anytime and possibly in a different order than requested or
96 96 maybe not appear at all, this could be possible due to a variety of reasons(for
97 97 example tab completion request while the kernel is busy processing another
98 98 frontend's request). This is, if the kernel is busy it won't be possible to
99 99 fulfill the request for a while so the GUI will be prepared to abandon waiting
100 100 for the reply if the user moves on or a certain timeout expires.
101 101
102 102
103 103 POSSIBLE FUTURE DIRECTIONS
104 104 ---------------------------
105 105
106 106 The near future will bring the feature of saving and loading sessions, also
107 107 importing and exporting to different formats like rst, html, pdf and
108 108 python/ipython code, a discussion about this is taking place in the ipython-dev
109 109 mailing list. Also the interaction with a remote kernel and distributed
110 110 computation which is an IPython's project already in development.
111 111
112 112 The idea of a mathematica-like help widget (i.e. there will be parts of it that
113 113 will execute as a native session of IPythonQt) is still to be discussed in the
114 114 development mailing list but it's definitively a great idea.
@@ -1,937 +1,937 b''
1 1 .. _messaging:
2 2
3 3 ======================
4 4 Messaging in IPython
5 5 ======================
6 6
7 7
8 8 Introduction
9 9 ============
10 10
11 11 This document explains the basic communications design and messaging
12 12 specification for how the various IPython objects interact over a network
13 13 transport. The current implementation uses the ZeroMQ_ library for messaging
14 14 within and between hosts.
15 15
16 16 .. Note::
17 17
18 18 This document should be considered the authoritative description of the
19 19 IPython messaging protocol, and all developers are strongly encouraged to
20 20 keep it updated as the implementation evolves, so that we have a single
21 21 common reference for all protocol details.
22 22
23 23 The basic design is explained in the following diagram:
24 24
25 25 .. image:: figs/frontend-kernel.png
26 26 :width: 450px
27 27 :alt: IPython kernel/frontend messaging architecture.
28 28 :align: center
29 29 :target: ../_images/frontend-kernel.png
30 30
31 31 A single kernel can be simultaneously connected to one or more frontends. The
32 32 kernel has three sockets that serve the following functions:
33 33
34 34 1. REQ: this socket is connected to a *single* frontend at a time, and it allows
35 35 the kernel to request input from a frontend when :func:`raw_input` is called.
36 36 The frontend holding the matching REP socket acts as a 'virtual keyboard'
37 37 for the kernel while this communication is happening (illustrated in the
38 38 figure by the black outline around the central keyboard). In practice,
39 39 frontends may display such kernel requests using a special input widget or
40 40 otherwise indicating that the user is to type input for the kernel instead
41 41 of normal commands in the frontend.
42 42
43 2. XREP: this single sockets allows multiple incoming connections from
43 2. ROUTER: this single sockets allows multiple incoming connections from
44 44 frontends, and this is the socket where requests for code execution, object
45 45 information, prompts, etc. are made to the kernel by any frontend. The
46 46 communication on this socket is a sequence of request/reply actions from
47 47 each frontend and the kernel.
48 48
49 49 3. PUB: this socket is the 'broadcast channel' where the kernel publishes all
50 50 side effects (stdout, stderr, etc.) as well as the requests coming from any
51 client over the XREP socket and its own requests on the REP socket. There
51 client over the ROUTER socket and its own requests on the REP socket. There
52 52 are a number of actions in Python which generate side effects: :func:`print`
53 53 writes to ``sys.stdout``, errors generate tracebacks, etc. Additionally, in
54 54 a multi-client scenario, we want all frontends to be able to know what each
55 55 other has sent to the kernel (this can be useful in collaborative scenarios,
56 56 for example). This socket allows both side effects and the information
57 about communications taking place with one client over the XREQ/XREP channel
57 about communications taking place with one client over the ROUTER/DEALER channel
58 58 to be made available to all clients in a uniform manner.
59 59
60 60 All messages are tagged with enough information (details below) for clients
61 61 to know which messages come from their own interaction with the kernel and
62 62 which ones are from other clients, so they can display each type
63 63 appropriately.
64 64
65 65 The actual format of the messages allowed on each of these channels is
66 66 specified below. Messages are dicts of dicts with string keys and values that
67 67 are reasonably representable in JSON. Our current implementation uses JSON
68 68 explicitly as its message format, but this shouldn't be considered a permanent
69 69 feature. As we've discovered that JSON has non-trivial performance issues due
70 70 to excessive copying, we may in the future move to a pure pickle-based raw
71 71 message format. However, it should be possible to easily convert from the raw
72 72 objects to JSON, since we may have non-python clients (e.g. a web frontend).
73 73 As long as it's easy to make a JSON version of the objects that is a faithful
74 74 representation of all the data, we can communicate with such clients.
75 75
76 76 .. Note::
77 77
78 78 Not all of these have yet been fully fleshed out, but the key ones are, see
79 79 kernel and frontend files for actual implementation details.
80 80
81 81
82 82 Python functional API
83 83 =====================
84 84
85 85 As messages are dicts, they map naturally to a ``func(**kw)`` call form. We
86 86 should develop, at a few key points, functional forms of all the requests that
87 87 take arguments in this manner and automatically construct the necessary dict
88 88 for sending.
89 89
90 90
91 91 General Message Format
92 92 ======================
93 93
94 94 All messages send or received by any IPython process should have the following
95 95 generic structure::
96 96
97 97 {
98 98 # The message header contains a pair of unique identifiers for the
99 99 # originating session and the actual message id, in addition to the
100 100 # username for the process that generated the message. This is useful in
101 101 # collaborative settings where multiple users may be interacting with the
102 102 # same kernel simultaneously, so that frontends can label the various
103 103 # messages in a meaningful way.
104 104 'header' : {
105 105 'msg_id' : uuid,
106 106 'username' : str,
107 107 'session' : uuid
108 108 # All recognized message type strings are listed below.
109 109 'msg_type' : str,
110 110 },
111 111
112 112 # In a chain of messages, the header from the parent is copied so that
113 113 # clients can track where messages come from.
114 114 'parent_header' : dict,
115 115
116 116 # The actual content of the message must be a dict, whose structure
117 117 # depends on the message type.x
118 118 'content' : dict,
119 119 }
120 120
121 121 For each message type, the actual content will differ and all existing message
122 122 types are specified in what follows of this document.
123 123
124 124
125 Messages on the XREP/XREQ socket
125 Messages on the ROUTER/DEALER socket
126 126 ================================
127 127
128 128 .. _execute:
129 129
130 130 Execute
131 131 -------
132 132
133 133 This message type is used by frontends to ask the kernel to execute code on
134 134 behalf of the user, in a namespace reserved to the user's variables (and thus
135 135 separate from the kernel's own internal code and variables).
136 136
137 137 Message type: ``execute_request``::
138 138
139 139 content = {
140 140 # Source code to be executed by the kernel, one or more lines.
141 141 'code' : str,
142 142
143 143 # A boolean flag which, if True, signals the kernel to execute this
144 144 # code as quietly as possible. This means that the kernel will compile
145 145 # the code witIPython/core/tests/h 'exec' instead of 'single' (so
146 146 # sys.displayhook will not fire), and will *not*:
147 147 # - broadcast exceptions on the PUB socket
148 148 # - do any logging
149 149 # - populate any history
150 150 #
151 151 # The default is False.
152 152 'silent' : bool,
153 153
154 154 # A list of variable names from the user's namespace to be retrieved. What
155 155 # returns is a JSON string of the variable's repr(), not a python object.
156 156 'user_variables' : list,
157 157
158 158 # Similarly, a dict mapping names to expressions to be evaluated in the
159 159 # user's dict.
160 160 'user_expressions' : dict,
161 161 }
162 162
163 163 The ``code`` field contains a single string (possibly multiline). The kernel
164 164 is responsible for splitting this into one or more independent execution blocks
165 165 and deciding whether to compile these in 'single' or 'exec' mode (see below for
166 166 detailed execution semantics).
167 167
168 168 The ``user_`` fields deserve a detailed explanation. In the past, IPython had
169 169 the notion of a prompt string that allowed arbitrary code to be evaluated, and
170 170 this was put to good use by many in creating prompts that displayed system
171 171 status, path information, and even more esoteric uses like remote instrument
172 172 status aqcuired over the network. But now that IPython has a clean separation
173 173 between the kernel and the clients, the kernel has no prompt knowledge; prompts
174 174 are a frontend-side feature, and it should be even possible for different
175 175 frontends to display different prompts while interacting with the same kernel.
176 176
177 177 The kernel now provides the ability to retrieve data from the user's namespace
178 178 after the execution of the main ``code``, thanks to two fields in the
179 179 ``execute_request`` message:
180 180
181 181 - ``user_variables``: If only variables from the user's namespace are needed, a
182 182 list of variable names can be passed and a dict with these names as keys and
183 183 their :func:`repr()` as values will be returned.
184 184
185 185 - ``user_expressions``: For more complex expressions that require function
186 186 evaluations, a dict can be provided with string keys and arbitrary python
187 187 expressions as values. The return message will contain also a dict with the
188 188 same keys and the :func:`repr()` of the evaluated expressions as value.
189 189
190 190 With this information, frontends can display any status information they wish
191 191 in the form that best suits each frontend (a status line, a popup, inline for a
192 192 terminal, etc).
193 193
194 194 .. Note::
195 195
196 196 In order to obtain the current execution counter for the purposes of
197 197 displaying input prompts, frontends simply make an execution request with an
198 198 empty code string and ``silent=True``.
199 199
200 200 Execution semantics
201 201 ~~~~~~~~~~~~~~~~~~~
202 202
203 203 When the silent flag is false, the execution of use code consists of the
204 204 following phases (in silent mode, only the ``code`` field is executed):
205 205
206 206 1. Run the ``pre_runcode_hook``.
207 207
208 208 2. Execute the ``code`` field, see below for details.
209 209
210 210 3. If #2 succeeds, compute ``user_variables`` and ``user_expressions`` are
211 211 computed. This ensures that any error in the latter don't harm the main
212 212 code execution.
213 213
214 214 4. Call any method registered with :meth:`register_post_execute`.
215 215
216 216 .. warning::
217 217
218 218 The API for running code before/after the main code block is likely to
219 219 change soon. Both the ``pre_runcode_hook`` and the
220 220 :meth:`register_post_execute` are susceptible to modification, as we find a
221 221 consistent model for both.
222 222
223 223 To understand how the ``code`` field is executed, one must know that Python
224 224 code can be compiled in one of three modes (controlled by the ``mode`` argument
225 225 to the :func:`compile` builtin):
226 226
227 227 *single*
228 228 Valid for a single interactive statement (though the source can contain
229 229 multiple lines, such as a for loop). When compiled in this mode, the
230 230 generated bytecode contains special instructions that trigger the calling of
231 231 :func:`sys.displayhook` for any expression in the block that returns a value.
232 232 This means that a single statement can actually produce multiple calls to
233 233 :func:`sys.displayhook`, if for example it contains a loop where each
234 234 iteration computes an unassigned expression would generate 10 calls::
235 235
236 236 for i in range(10):
237 237 i**2
238 238
239 239 *exec*
240 240 An arbitrary amount of source code, this is how modules are compiled.
241 241 :func:`sys.displayhook` is *never* implicitly called.
242 242
243 243 *eval*
244 244 A single expression that returns a value. :func:`sys.displayhook` is *never*
245 245 implicitly called.
246 246
247 247
248 248 The ``code`` field is split into individual blocks each of which is valid for
249 249 execution in 'single' mode, and then:
250 250
251 251 - If there is only a single block: it is executed in 'single' mode.
252 252
253 253 - If there is more than one block:
254 254
255 255 * if the last one is a single line long, run all but the last in 'exec' mode
256 256 and the very last one in 'single' mode. This makes it easy to type simple
257 257 expressions at the end to see computed values.
258 258
259 259 * if the last one is no more than two lines long, run all but the last in
260 260 'exec' mode and the very last one in 'single' mode. This makes it easy to
261 261 type simple expressions at the end to see computed values. - otherwise
262 262 (last one is also multiline), run all in 'exec' mode
263 263
264 264 * otherwise (last one is also multiline), run all in 'exec' mode as a single
265 265 unit.
266 266
267 267 Any error in retrieving the ``user_variables`` or evaluating the
268 268 ``user_expressions`` will result in a simple error message in the return fields
269 269 of the form::
270 270
271 271 [ERROR] ExceptionType: Exception message
272 272
273 273 The user can simply send the same variable name or expression for evaluation to
274 274 see a regular traceback.
275 275
276 276 Errors in any registered post_execute functions are also reported similarly,
277 277 and the failing function is removed from the post_execution set so that it does
278 278 not continue triggering failures.
279 279
280 280 Upon completion of the execution request, the kernel *always* sends a reply,
281 281 with a status code indicating what happened and additional data depending on
282 282 the outcome. See :ref:`below <execution_results>` for the possible return
283 283 codes and associated data.
284 284
285 285
286 286 Execution counter (old prompt number)
287 287 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
288 288
289 289 The kernel has a single, monotonically increasing counter of all execution
290 290 requests that are made with ``silent=False``. This counter is used to populate
291 291 the ``In[n]``, ``Out[n]`` and ``_n`` variables, so clients will likely want to
292 292 display it in some form to the user, which will typically (but not necessarily)
293 293 be done in the prompts. The value of this counter will be returned as the
294 294 ``execution_count`` field of all ``execute_reply`` messages.
295 295
296 296 .. _execution_results:
297 297
298 298 Execution results
299 299 ~~~~~~~~~~~~~~~~~
300 300
301 301 Message type: ``execute_reply``::
302 302
303 303 content = {
304 304 # One of: 'ok' OR 'error' OR 'abort'
305 305 'status' : str,
306 306
307 307 # The global kernel counter that increases by one with each non-silent
308 308 # executed request. This will typically be used by clients to display
309 309 # prompt numbers to the user. If the request was a silent one, this will
310 310 # be the current value of the counter in the kernel.
311 311 'execution_count' : int,
312 312 }
313 313
314 314 When status is 'ok', the following extra fields are present::
315 315
316 316 {
317 317 # The execution payload is a dict with string keys that may have been
318 318 # produced by the code being executed. It is retrieved by the kernel at
319 319 # the end of the execution and sent back to the front end, which can take
320 320 # action on it as needed. See main text for further details.
321 321 'payload' : dict,
322 322
323 323 # Results for the user_variables and user_expressions.
324 324 'user_variables' : dict,
325 325 'user_expressions' : dict,
326 326
327 327 # The kernel will often transform the input provided to it. If the
328 328 # '---->' transform had been applied, this is filled, otherwise it's the
329 329 # empty string. So transformations like magics don't appear here, only
330 330 # autocall ones.
331 331 'transformed_code' : str,
332 332 }
333 333
334 334 .. admonition:: Execution payloads
335 335
336 336 The notion of an 'execution payload' is different from a return value of a
337 337 given set of code, which normally is just displayed on the pyout stream
338 338 through the PUB socket. The idea of a payload is to allow special types of
339 339 code, typically magics, to populate a data container in the IPython kernel
340 340 that will be shipped back to the caller via this channel. The kernel will
341 341 have an API for this, probably something along the lines of::
342 342
343 343 ip.exec_payload_add(key, value)
344 344
345 345 though this API is still in the design stages. The data returned in this
346 346 payload will allow frontends to present special views of what just happened.
347 347
348 348
349 349 When status is 'error', the following extra fields are present::
350 350
351 351 {
352 352 'exc_name' : str, # Exception name, as a string
353 353 'exc_value' : str, # Exception value, as a string
354 354
355 355 # The traceback will contain a list of frames, represented each as a
356 356 # string. For now we'll stick to the existing design of ultraTB, which
357 357 # controls exception level of detail statefully. But eventually we'll
358 358 # want to grow into a model where more information is collected and
359 359 # packed into the traceback object, with clients deciding how little or
360 360 # how much of it to unpack. But for now, let's start with a simple list
361 361 # of strings, since that requires only minimal changes to ultratb as
362 362 # written.
363 363 'traceback' : list,
364 364 }
365 365
366 366
367 367 When status is 'abort', there are for now no additional data fields. This
368 368 happens when the kernel was interrupted by a signal.
369 369
370 370 Kernel attribute access
371 371 -----------------------
372 372
373 373 .. warning::
374 374
375 375 This part of the messaging spec is not actually implemented in the kernel
376 376 yet.
377 377
378 378 While this protocol does not specify full RPC access to arbitrary methods of
379 379 the kernel object, the kernel does allow read (and in some cases write) access
380 380 to certain attributes.
381 381
382 382 The policy for which attributes can be read is: any attribute of the kernel, or
383 383 its sub-objects, that belongs to a :class:`Configurable` object and has been
384 384 declared at the class-level with Traits validation, is in principle accessible
385 385 as long as its name does not begin with a leading underscore. The attribute
386 386 itself will have metadata indicating whether it allows remote read and/or write
387 387 access. The message spec follows for attribute read and write requests.
388 388
389 389 Message type: ``getattr_request``::
390 390
391 391 content = {
392 392 # The (possibly dotted) name of the attribute
393 393 'name' : str,
394 394 }
395 395
396 396 When a ``getattr_request`` fails, there are two possible error types:
397 397
398 398 - AttributeError: this type of error was raised when trying to access the
399 399 given name by the kernel itself. This means that the attribute likely
400 400 doesn't exist.
401 401
402 402 - AccessError: the attribute exists but its value is not readable remotely.
403 403
404 404
405 405 Message type: ``getattr_reply``::
406 406
407 407 content = {
408 408 # One of ['ok', 'AttributeError', 'AccessError'].
409 409 'status' : str,
410 410 # If status is 'ok', a JSON object.
411 411 'value' : object,
412 412 }
413 413
414 414 Message type: ``setattr_request``::
415 415
416 416 content = {
417 417 # The (possibly dotted) name of the attribute
418 418 'name' : str,
419 419
420 420 # A JSON-encoded object, that will be validated by the Traits
421 421 # information in the kernel
422 422 'value' : object,
423 423 }
424 424
425 425 When a ``setattr_request`` fails, there are also two possible error types with
426 426 similar meanings as those of the ``getattr_request`` case, but for writing.
427 427
428 428 Message type: ``setattr_reply``::
429 429
430 430 content = {
431 431 # One of ['ok', 'AttributeError', 'AccessError'].
432 432 'status' : str,
433 433 }
434 434
435 435
436 436
437 437 Object information
438 438 ------------------
439 439
440 440 One of IPython's most used capabilities is the introspection of Python objects
441 441 in the user's namespace, typically invoked via the ``?`` and ``??`` characters
442 442 (which in reality are shorthands for the ``%pinfo`` magic). This is used often
443 443 enough that it warrants an explicit message type, especially because frontends
444 444 may want to get object information in response to user keystrokes (like Tab or
445 445 F1) besides from the user explicitly typing code like ``x??``.
446 446
447 447 Message type: ``object_info_request``::
448 448
449 449 content = {
450 450 # The (possibly dotted) name of the object to be searched in all
451 451 # relevant namespaces
452 452 'name' : str,
453 453
454 454 # The level of detail desired. The default (0) is equivalent to typing
455 455 # 'x?' at the prompt, 1 is equivalent to 'x??'.
456 456 'detail_level' : int,
457 457 }
458 458
459 459 The returned information will be a dictionary with keys very similar to the
460 460 field names that IPython prints at the terminal.
461 461
462 462 Message type: ``object_info_reply``::
463 463
464 464 content = {
465 465 # The name the object was requested under
466 466 'name' : str,
467 467
468 468 # Boolean flag indicating whether the named object was found or not. If
469 469 # it's false, all other fields will be empty.
470 470 'found' : bool,
471 471
472 472 # Flags for magics and system aliases
473 473 'ismagic' : bool,
474 474 'isalias' : bool,
475 475
476 476 # The name of the namespace where the object was found ('builtin',
477 477 # 'magics', 'alias', 'interactive', etc.)
478 478 'namespace' : str,
479 479
480 480 # The type name will be type.__name__ for normal Python objects, but it
481 481 # can also be a string like 'Magic function' or 'System alias'
482 482 'type_name' : str,
483 483
484 484 # The string form of the object, possibly truncated for length if
485 485 # detail_level is 0
486 486 'string_form' : str,
487 487
488 488 # For objects with a __class__ attribute this will be set
489 489 'base_class' : str,
490 490
491 491 # For objects with a __len__ attribute this will be set
492 492 'length' : int,
493 493
494 494 # If the object is a function, class or method whose file we can find,
495 495 # we give its full path
496 496 'file' : str,
497 497
498 498 # For pure Python callable objects, we can reconstruct the object
499 499 # definition line which provides its call signature. For convenience this
500 500 # is returned as a single 'definition' field, but below the raw parts that
501 501 # compose it are also returned as the argspec field.
502 502 'definition' : str,
503 503
504 504 # The individual parts that together form the definition string. Clients
505 505 # with rich display capabilities may use this to provide a richer and more
506 506 # precise representation of the definition line (e.g. by highlighting
507 507 # arguments based on the user's cursor position). For non-callable
508 508 # objects, this field is empty.
509 509 'argspec' : { # The names of all the arguments
510 510 args : list,
511 511 # The name of the varargs (*args), if any
512 512 varargs : str,
513 513 # The name of the varkw (**kw), if any
514 514 varkw : str,
515 515 # The values (as strings) of all default arguments. Note
516 516 # that these must be matched *in reverse* with the 'args'
517 517 # list above, since the first positional args have no default
518 518 # value at all.
519 519 defaults : list,
520 520 },
521 521
522 522 # For instances, provide the constructor signature (the definition of
523 523 # the __init__ method):
524 524 'init_definition' : str,
525 525
526 526 # Docstrings: for any object (function, method, module, package) with a
527 527 # docstring, we show it. But in addition, we may provide additional
528 528 # docstrings. For example, for instances we will show the constructor
529 529 # and class docstrings as well, if available.
530 530 'docstring' : str,
531 531
532 532 # For instances, provide the constructor and class docstrings
533 533 'init_docstring' : str,
534 534 'class_docstring' : str,
535 535
536 536 # If it's a callable object whose call method has a separate docstring and
537 537 # definition line:
538 538 'call_def' : str,
539 539 'call_docstring' : str,
540 540
541 541 # If detail_level was 1, we also try to find the source code that
542 542 # defines the object, if possible. The string 'None' will indicate
543 543 # that no source was found.
544 544 'source' : str,
545 545 }
546 546 '
547 547
548 548 Complete
549 549 --------
550 550
551 551 Message type: ``complete_request``::
552 552
553 553 content = {
554 554 # The text to be completed, such as 'a.is'
555 555 'text' : str,
556 556
557 557 # The full line, such as 'print a.is'. This allows completers to
558 558 # make decisions that may require information about more than just the
559 559 # current word.
560 560 'line' : str,
561 561
562 562 # The entire block of text where the line is. This may be useful in the
563 563 # case of multiline completions where more context may be needed. Note: if
564 564 # in practice this field proves unnecessary, remove it to lighten the
565 565 # messages.
566 566
567 567 'block' : str,
568 568
569 569 # The position of the cursor where the user hit 'TAB' on the line.
570 570 'cursor_pos' : int,
571 571 }
572 572
573 573 Message type: ``complete_reply``::
574 574
575 575 content = {
576 576 # The list of all matches to the completion request, such as
577 577 # ['a.isalnum', 'a.isalpha'] for the above example.
578 578 'matches' : list
579 579 }
580 580
581 581
582 582 History
583 583 -------
584 584
585 585 For clients to explicitly request history from a kernel. The kernel has all
586 586 the actual execution history stored in a single location, so clients can
587 587 request it from the kernel when needed.
588 588
589 589 Message type: ``history_request``::
590 590
591 591 content = {
592 592
593 593 # If True, also return output history in the resulting dict.
594 594 'output' : bool,
595 595
596 596 # If True, return the raw input history, else the transformed input.
597 597 'raw' : bool,
598 598
599 599 # So far, this can be 'range', 'tail' or 'search'.
600 600 'hist_access_type' : str,
601 601
602 602 # If hist_access_type is 'range', get a range of input cells. session can
603 603 # be a positive session number, or a negative number to count back from
604 604 # the current session.
605 605 'session' : int,
606 606 # start and stop are line numbers within that session.
607 607 'start' : int,
608 608 'stop' : int,
609 609
610 610 # If hist_access_type is 'tail', get the last n cells.
611 611 'n' : int,
612 612
613 613 # If hist_access_type is 'search', get cells matching the specified glob
614 614 # pattern (with * and ? as wildcards).
615 615 'pattern' : str,
616 616
617 617 }
618 618
619 619 Message type: ``history_reply``::
620 620
621 621 content = {
622 622 # A list of 3 tuples, either:
623 623 # (session, line_number, input) or
624 624 # (session, line_number, (input, output)),
625 625 # depending on whether output was False or True, respectively.
626 626 'history' : list,
627 627 }
628 628
629 629
630 630 Connect
631 631 -------
632 632
633 633 When a client connects to the request/reply socket of the kernel, it can issue
634 634 a connect request to get basic information about the kernel, such as the ports
635 635 the other ZeroMQ sockets are listening on. This allows clients to only have
636 to know about a single port (the XREQ/XREP channel) to connect to a kernel.
636 to know about a single port (the DEALER/ROUTER channel) to connect to a kernel.
637 637
638 638 Message type: ``connect_request``::
639 639
640 640 content = {
641 641 }
642 642
643 643 Message type: ``connect_reply``::
644 644
645 645 content = {
646 'xrep_port' : int # The port the XREP socket is listening on.
646 'xrep_port' : int # The port the ROUTER socket is listening on.
647 647 'pub_port' : int # The port the PUB socket is listening on.
648 648 'req_port' : int # The port the REQ socket is listening on.
649 649 'hb_port' : int # The port the heartbeat socket is listening on.
650 650 }
651 651
652 652
653 653
654 654 Kernel shutdown
655 655 ---------------
656 656
657 657 The clients can request the kernel to shut itself down; this is used in
658 658 multiple cases:
659 659
660 660 - when the user chooses to close the client application via a menu or window
661 661 control.
662 662 - when the user types 'exit' or 'quit' (or their uppercase magic equivalents).
663 663 - when the user chooses a GUI method (like the 'Ctrl-C' shortcut in the
664 664 IPythonQt client) to force a kernel restart to get a clean kernel without
665 665 losing client-side state like history or inlined figures.
666 666
667 667 The client sends a shutdown request to the kernel, and once it receives the
668 668 reply message (which is otherwise empty), it can assume that the kernel has
669 669 completed shutdown safely.
670 670
671 671 Upon their own shutdown, client applications will typically execute a last
672 672 minute sanity check and forcefully terminate any kernel that is still alive, to
673 673 avoid leaving stray processes in the user's machine.
674 674
675 675 For both shutdown request and reply, there is no actual content that needs to
676 676 be sent, so the content dict is empty.
677 677
678 678 Message type: ``shutdown_request``::
679 679
680 680 content = {
681 681 'restart' : bool # whether the shutdown is final, or precedes a restart
682 682 }
683 683
684 684 Message type: ``shutdown_reply``::
685 685
686 686 content = {
687 687 'restart' : bool # whether the shutdown is final, or precedes a restart
688 688 }
689 689
690 690 .. Note::
691 691
692 692 When the clients detect a dead kernel thanks to inactivity on the heartbeat
693 693 socket, they simply send a forceful process termination signal, since a dead
694 694 process is unlikely to respond in any useful way to messages.
695 695
696 696
697 697 Messages on the PUB/SUB socket
698 698 ==============================
699 699
700 700 Streams (stdout, stderr, etc)
701 701 ------------------------------
702 702
703 703 Message type: ``stream``::
704 704
705 705 content = {
706 706 # The name of the stream is one of 'stdin', 'stdout', 'stderr'
707 707 'name' : str,
708 708
709 709 # The data is an arbitrary string to be written to that stream
710 710 'data' : str,
711 711 }
712 712
713 713 When a kernel receives a raw_input call, it should also broadcast it on the pub
714 714 socket with the names 'stdin' and 'stdin_reply'. This will allow other clients
715 715 to monitor/display kernel interactions and possibly replay them to their user
716 716 or otherwise expose them.
717 717
718 718 Display Data
719 719 ------------
720 720
721 721 This type of message is used to bring back data that should be diplayed (text,
722 722 html, svg, etc.) in the frontends. This data is published to all frontends.
723 723 Each message can have multiple representations of the data; it is up to the
724 724 frontend to decide which to use and how. A single message should contain all
725 725 possible representations of the same information. Each representation should
726 726 be a JSON'able data structure, and should be a valid MIME type.
727 727
728 728 Some questions remain about this design:
729 729
730 730 * Do we use this message type for pyout/displayhook? Probably not, because
731 731 the displayhook also has to handle the Out prompt display. On the other hand
732 732 we could put that information into the metadata secion.
733 733
734 734 Message type: ``display_data``::
735 735
736 736 content = {
737 737
738 738 # Who create the data
739 739 'source' : str,
740 740
741 741 # The data dict contains key/value pairs, where the kids are MIME
742 742 # types and the values are the raw data of the representation in that
743 743 # format. The data dict must minimally contain the ``text/plain``
744 744 # MIME type which is used as a backup representation.
745 745 'data' : dict,
746 746
747 747 # Any metadata that describes the data
748 748 'metadata' : dict
749 749 }
750 750
751 751 Python inputs
752 752 -------------
753 753
754 754 These messages are the re-broadcast of the ``execute_request``.
755 755
756 756 Message type: ``pyin``::
757 757
758 758 content = {
759 759 'code' : str # Source code to be executed, one or more lines
760 760 }
761 761
762 762 Python outputs
763 763 --------------
764 764
765 765 When Python produces output from code that has been compiled in with the
766 766 'single' flag to :func:`compile`, any expression that produces a value (such as
767 767 ``1+1``) is passed to ``sys.displayhook``, which is a callable that can do with
768 768 this value whatever it wants. The default behavior of ``sys.displayhook`` in
769 769 the Python interactive prompt is to print to ``sys.stdout`` the :func:`repr` of
770 770 the value as long as it is not ``None`` (which isn't printed at all). In our
771 771 case, the kernel instantiates as ``sys.displayhook`` an object which has
772 772 similar behavior, but which instead of printing to stdout, broadcasts these
773 773 values as ``pyout`` messages for clients to display appropriately.
774 774
775 775 IPython's displayhook can handle multiple simultaneous formats depending on its
776 776 configuration. The default pretty-printed repr text is always given with the
777 777 ``data`` entry in this message. Any other formats are provided in the
778 778 ``extra_formats`` list. Frontends are free to display any or all of these
779 779 according to its capabilities. ``extra_formats`` list contains 3-tuples of an ID
780 780 string, a type string, and the data. The ID is unique to the formatter
781 781 implementation that created the data. Frontends will typically ignore the ID
782 782 unless if it has requested a particular formatter. The type string tells the
783 783 frontend how to interpret the data. It is often, but not always a MIME type.
784 784 Frontends should ignore types that it does not understand. The data itself is
785 785 any JSON object and depends on the format. It is often, but not always a string.
786 786
787 787 Message type: ``pyout``::
788 788
789 789 content = {
790 790
791 791 # The counter for this execution is also provided so that clients can
792 792 # display it, since IPython automatically creates variables called _N
793 793 # (for prompt N).
794 794 'execution_count' : int,
795 795
796 796 # The data dict contains key/value pairs, where the kids are MIME
797 797 # types and the values are the raw data of the representation in that
798 798 # format. The data dict must minimally contain the ``text/plain``
799 799 # MIME type which is used as a backup representation.
800 800 'data' : dict,
801 801
802 802 }
803 803
804 804 Python errors
805 805 -------------
806 806
807 807 When an error occurs during code execution
808 808
809 809 Message type: ``pyerr``::
810 810
811 811 content = {
812 812 # Similar content to the execute_reply messages for the 'error' case,
813 813 # except the 'status' field is omitted.
814 814 }
815 815
816 816 Kernel status
817 817 -------------
818 818
819 819 This message type is used by frontends to monitor the status of the kernel.
820 820
821 821 Message type: ``status``::
822 822
823 823 content = {
824 824 # When the kernel starts to execute code, it will enter the 'busy'
825 825 # state and when it finishes, it will enter the 'idle' state.
826 826 execution_state : ('busy', 'idle')
827 827 }
828 828
829 829 Kernel crashes
830 830 --------------
831 831
832 832 When the kernel has an unexpected exception, caught by the last-resort
833 833 sys.excepthook, we should broadcast the crash handler's output before exiting.
834 834 This will allow clients to notice that a kernel died, inform the user and
835 835 propose further actions.
836 836
837 837 Message type: ``crash``::
838 838
839 839 content = {
840 840 # Similarly to the 'error' case for execute_reply messages, this will
841 841 # contain exc_name, exc_type and traceback fields.
842 842
843 843 # An additional field with supplementary information such as where to
844 844 # send the crash message
845 845 'info' : str,
846 846 }
847 847
848 848
849 849 Future ideas
850 850 ------------
851 851
852 852 Other potential message types, currently unimplemented, listed below as ideas.
853 853
854 854 Message type: ``file``::
855 855
856 856 content = {
857 857 'path' : 'cool.jpg',
858 858 'mimetype' : str,
859 859 'data' : str,
860 860 }
861 861
862 862
863 863 Messages on the REQ/REP socket
864 864 ==============================
865 865
866 866 This is a socket that goes in the opposite direction: from the kernel to a
867 867 *single* frontend, and its purpose is to allow ``raw_input`` and similar
868 868 operations that read from ``sys.stdin`` on the kernel to be fulfilled by the
869 869 client. For now we will keep these messages as simple as possible, since they
870 870 basically only mean to convey the ``raw_input(prompt)`` call.
871 871
872 872 Message type: ``input_request``::
873 873
874 874 content = { 'prompt' : str }
875 875
876 876 Message type: ``input_reply``::
877 877
878 878 content = { 'value' : str }
879 879
880 880 .. Note::
881 881
882 882 We do not explicitly try to forward the raw ``sys.stdin`` object, because in
883 883 practice the kernel should behave like an interactive program. When a
884 884 program is opened on the console, the keyboard effectively takes over the
885 885 ``stdin`` file descriptor, and it can't be used for raw reading anymore.
886 886 Since the IPython kernel effectively behaves like a console program (albeit
887 887 one whose "keyboard" is actually living in a separate process and
888 888 transported over the zmq connection), raw ``stdin`` isn't expected to be
889 889 available.
890 890
891 891
892 892 Heartbeat for kernels
893 893 =====================
894 894
895 895 Initially we had considered using messages like those above over ZMQ for a
896 896 kernel 'heartbeat' (a way to detect quickly and reliably whether a kernel is
897 897 alive at all, even if it may be busy executing user code). But this has the
898 898 problem that if the kernel is locked inside extension code, it wouldn't execute
899 899 the python heartbeat code. But it turns out that we can implement a basic
900 900 heartbeat with pure ZMQ, without using any Python messaging at all.
901 901
902 902 The monitor sends out a single zmq message (right now, it is a str of the
903 903 monitor's lifetime in seconds), and gets the same message right back, prefixed
904 with the zmq identity of the XREQ socket in the heartbeat process. This can be
904 with the zmq identity of the DEALER socket in the heartbeat process. This can be
905 905 a uuid, or even a full message, but there doesn't seem to be a need for packing
906 906 up a message when the sender and receiver are the exact same Python object.
907 907
908 908 The model is this::
909 909
910 910 monitor.send(str(self.lifetime)) # '1.2345678910'
911 911
912 912 and the monitor receives some number of messages of the form::
913 913
914 914 ['uuid-abcd-dead-beef', '1.2345678910']
915 915
916 where the first part is the zmq.IDENTITY of the heart's XREQ on the engine, and
916 where the first part is the zmq.IDENTITY of the heart's DEALER on the engine, and
917 917 the rest is the message sent by the monitor. No Python code ever has any
918 918 access to the message between the monitor's send, and the monitor's recv.
919 919
920 920
921 921 ToDo
922 922 ====
923 923
924 924 Missing things include:
925 925
926 926 * Important: finish thinking through the payload concept and API.
927 927
928 928 * Important: ensure that we have a good solution for magics like %edit. It's
929 929 likely that with the payload concept we can build a full solution, but not
930 930 100% clear yet.
931 931
932 932 * Finishing the details of the heartbeat protocol.
933 933
934 934 * Signal handling: specify what kind of information kernel should broadcast (or
935 935 not) when it receives signals.
936 936
937 937 .. include:: ../links.rst
@@ -1,154 +1,154 b''
1 1 .. _parallel_connections:
2 2
3 3 ==============================================
4 4 Connection Diagrams of The IPython ZMQ Cluster
5 5 ==============================================
6 6
7 7 This is a quick summary and illustration of the connections involved in the ZeroMQ based
8 8 IPython cluster for parallel computing.
9 9
10 10 All Connections
11 11 ===============
12 12
13 13 The IPython cluster consists of a Controller, and one or more each of clients and engines.
14 14 The goal of the Controller is to manage and monitor the connections and communications
15 15 between the clients and the engines. The Controller is no longer a single process entity,
16 16 but rather a collection of processes - specifically one Hub, and 4 (or more) Schedulers.
17 17
18 18 It is important for security/practicality reasons that all connections be inbound to the
19 19 controller processes. The arrows in the figures indicate the direction of the
20 20 connection.
21 21
22 22
23 23 .. figure:: figs/allconnections.png
24 24 :width: 432px
25 25 :alt: IPython cluster connections
26 26 :align: center
27 27
28 28 All the connections involved in connecting one client to one engine.
29 29
30 30 The Controller consists of 1-4 processes. Central to the cluster is the **Hub**, which monitors
31 31 engine state, execution traffic, and handles registration and notification. The Hub includes a
32 32 Heartbeat Monitor for keeping track of engines that are alive. Outside the Hub are 4
33 33 **Schedulers**. These devices are very small pure-C MonitoredQueue processes (or optionally
34 34 threads) that relay messages very fast, but also send a copy of each message along a side socket
35 35 to the Hub. The MUX queue and Control queue are MonitoredQueue ØMQ devices which relay
36 36 explicitly addressed messages from clients to engines, and their replies back up. The Balanced
37 37 queue performs load-balancing destination-agnostic scheduling. It may be a MonitoredQueue
38 38 device, but may also be a Python Scheduler that behaves externally in an identical fashion to MQ
39 39 devices, but with additional internal logic. stdout/err are also propagated from the Engines to
40 40 the clients via a PUB/SUB MonitoredQueue.
41 41
42 42
43 43 Registration
44 44 ------------
45 45
46 46 .. figure:: figs/queryfade.png
47 47 :width: 432px
48 48 :alt: IPython Registration connections
49 49 :align: center
50 50
51 Engines and Clients only need to know where the Query ``XREP`` is located to start
51 Engines and Clients only need to know where the Query ``ROUTER`` is located to start
52 52 connecting.
53 53
54 54 Once a controller is launched, the only information needed for connecting clients and/or
55 engines is the IP/port of the Hub's ``XREP`` socket called the Registrar. This socket
55 engines is the IP/port of the Hub's ``ROUTER`` socket called the Registrar. This socket
56 56 handles connections from both clients and engines, and replies with the remaining
57 57 information necessary to establish the remaining connections. Clients use this same socket for
58 58 querying the Hub for state information.
59 59
60 60 Heartbeat
61 61 ---------
62 62
63 63 .. figure:: figs/hbfade.png
64 64 :width: 432px
65 65 :alt: IPython Heartbeat connections
66 66 :align: center
67 67
68 68 The heartbeat sockets.
69 69
70 70 The heartbeat process has been described elsewhere. To summarize: the Heartbeat Monitor
71 71 publishes a distinct message periodically via a ``PUB`` socket. Each engine has a
72 ``zmq.FORWARDER`` device with a ``SUB`` socket for input, and ``XREQ`` socket for output.
73 The ``SUB`` socket is connected to the ``PUB`` socket labeled *ping*, and the ``XREQ`` is
74 connected to the ``XREP`` labeled *pong*. This results in the same message being relayed
75 back to the Heartbeat Monitor with the addition of the ``XREQ`` prefix. The Heartbeat
76 Monitor receives all the replies via an ``XREP`` socket, and identifies which hearts are
77 still beating by the ``zmq.IDENTITY`` prefix of the ``XREQ`` sockets, which information
72 ``zmq.FORWARDER`` device with a ``SUB`` socket for input, and ``DEALER`` socket for output.
73 The ``SUB`` socket is connected to the ``PUB`` socket labeled *ping*, and the ``DEALER`` is
74 connected to the ``ROUTER`` labeled *pong*. This results in the same message being relayed
75 back to the Heartbeat Monitor with the addition of the ``DEALER`` prefix. The Heartbeat
76 Monitor receives all the replies via an ``ROUTER`` socket, and identifies which hearts are
77 still beating by the ``zmq.IDENTITY`` prefix of the ``DEALER`` sockets, which information
78 78 the Hub uses to notify clients of any changes in the available engines.
79 79
80 80 Schedulers
81 81 ----------
82 82
83 83 .. figure:: figs/queuefade.png
84 84 :width: 432px
85 85 :alt: IPython Queue connections
86 86 :align: center
87 87
88 88 Control message scheduler on the left, execution (apply) schedulers on the right.
89 89
90 90 The controller has at least three Schedulers. These devices are primarily for
91 91 relaying messages between clients and engines, but the Hub needs to see those
92 92 messages for its own purposes. Since no Python code may exist between the two sockets in a
93 93 queue, all messages sent through these queues (both directions) are also sent via a
94 94 ``PUB`` socket to a monitor, which allows the Hub to monitor queue traffic without
95 95 interfering with it.
96 96
97 For tasks, the engine need not be specified. Messages sent to the ``XREP`` socket from the
98 client side are assigned to an engine via ZMQ's ``XREQ`` round-robin load balancing.
97 For tasks, the engine need not be specified. Messages sent to the ``ROUTER`` socket from the
98 client side are assigned to an engine via ZMQ's ``DEALER`` round-robin load balancing.
99 99 Engine replies are directed to specific clients via the IDENTITY of the client, which is
100 100 received as a prefix at the Engine.
101 101
102 For Multiplexing, ``XREP`` is used for both in and output sockets in the device. Clients must
103 specify the destination by the ``zmq.IDENTITY`` of the ``XREP`` socket connected to
102 For Multiplexing, ``ROUTER`` is used for both in and output sockets in the device. Clients must
103 specify the destination by the ``zmq.IDENTITY`` of the ``ROUTER`` socket connected to
104 104 the downstream end of the device.
105 105
106 At the Kernel level, both of these ``XREP`` sockets are treated in the same way as the ``REP``
106 At the Kernel level, both of these ``ROUTER`` sockets are treated in the same way as the ``REP``
107 107 socket in the serial version (except using ZMQStreams instead of explicit sockets).
108 108
109 109 Execution can be done in a load-balanced (engine-agnostic) or multiplexed (engine-specified)
110 110 manner. The sockets on the Client and Engine are the same for these two actions, but the
111 111 scheduler used determines the actual behavior. This routing is done via the ``zmq.IDENTITY`` of
112 112 the upstream sockets in each MonitoredQueue.
113 113
114 114 IOPub
115 115 -----
116 116
117 117 .. figure:: figs/iopubfade.png
118 118 :width: 432px
119 119 :alt: IOPub connections
120 120 :align: center
121 121
122 122 stdout/err are published via a ``PUB/SUB`` MonitoredQueue
123 123
124 124
125 125 On the kernels, stdout/stderr are captured and published via a ``PUB`` socket. These ``PUB``
126 126 sockets all connect to a ``SUB`` socket input of a MonitoredQueue, which subscribes to all
127 127 messages. They are then republished via another ``PUB`` socket, which can be
128 128 subscribed by the clients.
129 129
130 130 Client connections
131 131 ------------------
132 132
133 133 .. figure:: figs/queryfade.png
134 134 :width: 432px
135 135 :alt: IPython client query connections
136 136 :align: center
137 137
138 Clients connect to an ``XREP`` socket to query the hub.
138 Clients connect to an ``ROUTER`` socket to query the hub.
139 139
140 The hub's registrar ``XREP`` socket also listens for queries from clients as to queue status,
141 and control instructions. Clients connect to this socket via an ``XREQ`` during registration.
140 The hub's registrar ``ROUTER`` socket also listens for queries from clients as to queue status,
141 and control instructions. Clients connect to this socket via an ``DEALER`` during registration.
142 142
143 143 .. figure:: figs/notiffade.png
144 144 :width: 432px
145 145 :alt: IPython Registration connections
146 146 :align: center
147 147
148 148 Engine registration events are published via a ``PUB`` socket.
149 149
150 150 The Hub publishes all registration/unregistration events via a ``PUB`` socket. This
151 151 allows clients to stay up to date with what engines are available by subscribing to the
152 152 feed with a ``SUB`` socket. Other processes could selectively subscribe to just
153 153 registration or unregistration events.
154 154
@@ -1,378 +1,378 b''
1 1 .. _parallel_messages:
2 2
3 3 Messaging for Parallel Computing
4 4 ================================
5 5
6 6 This is an extension of the :ref:`messaging <messaging>` doc. Diagrams of the connections
7 7 can be found in the :ref:`parallel connections <parallel_connections>` doc.
8 8
9 9
10 10 ZMQ messaging is also used in the parallel computing IPython system. All messages to/from
11 11 kernels remain the same as the single kernel model, and are forwarded through a ZMQ Queue
12 12 device. The controller receives all messages and replies in these channels, and saves
13 13 results for future use.
14 14
15 15 The Controller
16 16 --------------
17 17
18 18 The controller is the central collection of processes in the IPython parallel computing
19 19 model. It has two major components:
20 20
21 21 * The Hub
22 22 * A collection of Schedulers
23 23
24 24 The Hub
25 25 -------
26 26
27 27 The Hub is the central process for monitoring the state of the engines, and all task
28 28 requests and results. It has no role in execution and does no relay of messages, so
29 29 large blocking requests or database actions in the Hub do not have the ability to impede
30 30 job submission and results.
31 31
32 Registration (``XREP``)
32 Registration (``ROUTER``)
33 33 ***********************
34 34
35 35 The first function of the Hub is to facilitate and monitor connections of clients
36 36 and engines. Both client and engine registration are handled by the same socket, so only
37 37 one ip/port pair is needed to connect any number of connections and clients.
38 38
39 Engines register with the ``zmq.IDENTITY`` of their two ``XREQ`` sockets, one for the
39 Engines register with the ``zmq.IDENTITY`` of their two ``DEALER`` sockets, one for the
40 40 queue, which receives execute requests, and one for the heartbeat, which is used to
41 41 monitor the survival of the Engine process.
42 42
43 43 Message type: ``registration_request``::
44 44
45 45 content = {
46 46 'queue' : 'abcd-1234-...', # the MUX queue zmq.IDENTITY
47 47 'control' : 'abcd-1234-...', # the control queue zmq.IDENTITY
48 48 'heartbeat' : 'abcd-1234-...' # the heartbeat zmq.IDENTITY
49 49 }
50 50
51 51 .. note::
52 52
53 53 these are always the same, at least for now.
54 54
55 55 The Controller replies to an Engine's registration request with the engine's integer ID,
56 56 and all the remaining connection information for connecting the heartbeat process, and
57 57 kernel queue socket(s). The message status will be an error if the Engine requests IDs that
58 58 already in use.
59 59
60 60 Message type: ``registration_reply``::
61 61
62 62 content = {
63 63 'status' : 'ok', # or 'error'
64 64 # if ok:
65 65 'id' : 0, # int, the engine id
66 66 'queue' : 'tcp://127.0.0.1:12345', # connection for engine side of the queue
67 67 'control' : 'tcp://...', # addr for control queue
68 68 'heartbeat' : ('tcp://...','tcp://...'), # tuple containing two interfaces needed for heartbeat
69 69 'task' : 'tcp://...', # addr for task queue, or None if no task queue running
70 70 }
71 71
72 72 Clients use the same socket as engines to start their connections. Connection requests
73 73 from clients need no information:
74 74
75 75 Message type: ``connection_request``::
76 76
77 77 content = {}
78 78
79 79 The reply to a Client registration request contains the connection information for the
80 80 multiplexer and load balanced queues, as well as the address for direct hub
81 81 queries. If any of these addresses is `None`, that functionality is not available.
82 82
83 83 Message type: ``connection_reply``::
84 84
85 85 content = {
86 86 'status' : 'ok', # or 'error'
87 87 # if ok:
88 88 'queue' : 'tcp://127.0.0.1:12345', # connection for client side of the MUX queue
89 89 'task' : ('lru','tcp...'), # routing scheme and addr for task queue (len 2 tuple)
90 90 'query' : 'tcp...', # addr for methods to query the hub, like queue_request, etc.
91 91 'control' : 'tcp...', # addr for control methods, like abort, etc.
92 92 }
93 93
94 94 Heartbeat
95 95 *********
96 96
97 97 The hub uses a heartbeat system to monitor engines, and track when they become
98 98 unresponsive. As described in :ref:`messaging <messaging>`, and shown in :ref:`connections
99 99 <parallel_connections>`.
100 100
101 101 Notification (``PUB``)
102 102 **********************
103 103
104 104 The hub publishes all engine registration/unregistration events on a ``PUB`` socket.
105 105 This allows clients to have up-to-date engine ID sets without polling. Registration
106 106 notifications contain both the integer engine ID and the queue ID, which is necessary for
107 107 sending messages via the Multiplexer Queue and Control Queues.
108 108
109 109 Message type: ``registration_notification``::
110 110
111 111 content = {
112 112 'id' : 0, # engine ID that has been registered
113 113 'queue' : 'engine_id' # the IDENT for the engine's queue
114 114 }
115 115
116 116 Message type : ``unregistration_notification``::
117 117
118 118 content = {
119 119 'id' : 0 # engine ID that has been unregistered
120 120 }
121 121
122 122
123 Client Queries (``XREP``)
123 Client Queries (``ROUTER``)
124 124 *************************
125 125
126 126 The hub monitors and logs all queue traffic, so that clients can retrieve past
127 127 results or monitor pending tasks. This information may reside in-memory on the Hub, or
128 128 on disk in a database (SQLite and MongoDB are currently supported). These requests are
129 129 handled by the same socket as registration.
130 130
131 131
132 132 :func:`queue_request` requests can specify multiple engines to query via the `targets`
133 133 element. A verbose flag can be passed, to determine whether the result should be the list
134 134 of `msg_ids` in the queue or simply the length of each list.
135 135
136 136 Message type: ``queue_request``::
137 137
138 138 content = {
139 139 'verbose' : True, # whether return should be lists themselves or just lens
140 140 'targets' : [0,3,1] # list of ints
141 141 }
142 142
143 143 The content of a reply to a :func:`queue_request` request is a dict, keyed by the engine
144 144 IDs. Note that they will be the string representation of the integer keys, since JSON
145 145 cannot handle number keys. The three keys of each dict are::
146 146
147 147 'completed' : messages submitted via any queue that ran on the engine
148 148 'queue' : jobs submitted via MUX queue, whose results have not been received
149 149 'tasks' : tasks that are known to have been submitted to the engine, but
150 150 have not completed. Note that with the pure zmq scheduler, this will
151 151 always be 0/[].
152 152
153 153 Message type: ``queue_reply``::
154 154
155 155 content = {
156 156 'status' : 'ok', # or 'error'
157 157 # if verbose=False:
158 158 '0' : {'completed' : 1, 'queue' : 7, 'tasks' : 0},
159 159 # if verbose=True:
160 160 '1' : {'completed' : ['abcd-...','1234-...'], 'queue' : ['58008-'], 'tasks' : []},
161 161 }
162 162
163 163 Clients can request individual results directly from the hub. This is primarily for
164 164 gathering results of executions not submitted by the requesting client, as the client
165 165 will have all its own results already. Requests are made by msg_id, and can contain one or
166 166 more msg_id. An additional boolean key 'statusonly' can be used to not request the
167 167 results, but simply poll the status of the jobs.
168 168
169 169 Message type: ``result_request``::
170 170
171 171 content = {
172 172 'msg_ids' : ['uuid','...'], # list of strs
173 173 'targets' : [1,2,3], # list of int ids or uuids
174 174 'statusonly' : False, # bool
175 175 }
176 176
177 177 The :func:`result_request` reply contains the content objects of the actual execution
178 178 reply messages. If `statusonly=True`, then there will be only the 'pending' and
179 179 'completed' lists.
180 180
181 181
182 182 Message type: ``result_reply``::
183 183
184 184 content = {
185 185 'status' : 'ok', # else error
186 186 # if ok:
187 187 'acbd-...' : msg, # the content dict is keyed by msg_ids,
188 188 # values are the result messages
189 189 # there will be none of these if `statusonly=True`
190 190 'pending' : ['msg_id','...'], # msg_ids still pending
191 191 'completed' : ['msg_id','...'], # list of completed msg_ids
192 192 }
193 193 buffers = ['bufs','...'] # the buffers that contained the results of the objects.
194 194 # this will be empty if no messages are complete, or if
195 195 # statusonly is True.
196 196
197 197 For memory management purposes, Clients can also instruct the hub to forget the
198 198 results of messages. This can be done by message ID or engine ID. Individual messages are
199 199 dropped by msg_id, and all messages completed on an engine are dropped by engine ID. This
200 200 may no longer be necessary with the mongodb-based message logging backend.
201 201
202 202 If the msg_ids element is the string ``'all'`` instead of a list, then all completed
203 203 results are forgotten.
204 204
205 205 Message type: ``purge_request``::
206 206
207 207 content = {
208 208 'msg_ids' : ['id1', 'id2',...], # list of msg_ids or 'all'
209 209 'engine_ids' : [0,2,4] # list of engine IDs
210 210 }
211 211
212 212 The reply to a purge request is simply the status 'ok' if the request succeeded, or an
213 213 explanation of why it failed, such as requesting the purge of a nonexistent or pending
214 214 message.
215 215
216 216 Message type: ``purge_reply``::
217 217
218 218 content = {
219 219 'status' : 'ok', # or 'error'
220 220 }
221 221
222 222
223 223 Schedulers
224 224 ----------
225 225
226 226 There are three basic schedulers:
227 227
228 228 * Task Scheduler
229 229 * MUX Scheduler
230 230 * Control Scheduler
231 231
232 The MUX and Control schedulers are simple MonitoredQueue ØMQ devices, with ``XREP``
232 The MUX and Control schedulers are simple MonitoredQueue ØMQ devices, with ``ROUTER``
233 233 sockets on either side. This allows the queue to relay individual messages to particular
234 234 targets via ``zmq.IDENTITY`` routing. The Task scheduler may be a MonitoredQueue ØMQ
235 device, in which case the client-facing socket is ``XREP``, and the engine-facing socket
236 is ``XREQ``. The result of this is that client-submitted messages are load-balanced via
237 the ``XREQ`` socket, but the engine's replies to each message go to the requesting client.
235 device, in which case the client-facing socket is ``ROUTER``, and the engine-facing socket
236 is ``DEALER``. The result of this is that client-submitted messages are load-balanced via
237 the ``DEALER`` socket, but the engine's replies to each message go to the requesting client.
238 238
239 Raw ``XREQ`` scheduling is quite primitive, and doesn't allow message introspection, so
239 Raw ``DEALER`` scheduling is quite primitive, and doesn't allow message introspection, so
240 240 there are also Python Schedulers that can be used. These Schedulers behave in much the
241 241 same way as a MonitoredQueue does from the outside, but have rich internal logic to
242 242 determine destinations, as well as handle dependency graphs Their sockets are always
243 ``XREP`` on both sides.
243 ``ROUTER`` on both sides.
244 244
245 245 The Python task schedulers have an additional message type, which informs the Hub of
246 246 the destination of a task as soon as that destination is known.
247 247
248 248 Message type: ``task_destination``::
249 249
250 250 content = {
251 251 'msg_id' : 'abcd-1234-...', # the msg's uuid
252 252 'engine_id' : '1234-abcd-...', # the destination engine's zmq.IDENTITY
253 253 }
254 254
255 255 :func:`apply` and :func:`apply_bound`
256 256 *************************************
257 257
258 258 In terms of message classes, the MUX scheduler and Task scheduler relay the exact same
259 259 message types. Their only difference lies in how the destination is selected.
260 260
261 261 The `Namespace <http://gist.github.com/483294>`_ model suggests that execution be able to
262 262 use the model::
263 263
264 264 ns.apply(f, *args, **kwargs)
265 265
266 266 which takes `f`, a function in the user's namespace, and executes ``f(*args, **kwargs)``
267 267 on a remote engine, returning the result (or, for non-blocking, information facilitating
268 268 later retrieval of the result). This model, unlike the execute message which just uses a
269 269 code string, must be able to send arbitrary (pickleable) Python objects. And ideally, copy
270 270 as little data as we can. The `buffers` property of a Message was introduced for this
271 271 purpose.
272 272
273 273 Utility method :func:`build_apply_message` in :mod:`IPython.zmq.streamsession` wraps a
274 274 function signature and builds a sendable buffer format for minimal data copying (exactly
275 275 zero copies of numpy array data or buffers or large strings).
276 276
277 277 Message type: ``apply_request``::
278 278
279 279 content = {
280 280 'bound' : True, # whether to execute in the engine's namespace or unbound
281 281 'after' : ['msg_id',...], # list of msg_ids or output of Dependency.as_dict()
282 282 'follow' : ['msg_id',...], # list of msg_ids or output of Dependency.as_dict()
283 283
284 284 }
285 285 buffers = ['...'] # at least 3 in length
286 286 # as built by build_apply_message(f,args,kwargs)
287 287
288 288 after/follow represent task dependencies. 'after' corresponds to a time dependency. The
289 289 request will not arrive at an engine until the 'after' dependency tasks have completed.
290 290 'follow' corresponds to a location dependency. The task will be submitted to the same
291 291 engine as these msg_ids (see :class:`Dependency` docs for details).
292 292
293 293 Message type: ``apply_reply``::
294 294
295 295 content = {
296 296 'status' : 'ok' # 'ok' or 'error'
297 297 # other error info here, as in other messages
298 298 }
299 299 buffers = ['...'] # either 1 or 2 in length
300 300 # a serialization of the return value of f(*args,**kwargs)
301 301 # only populated if status is 'ok'
302 302
303 303 All engine execution and data movement is performed via apply messages.
304 304
305 305 Control Messages
306 306 ----------------
307 307
308 308 Messages that interact with the engines, but are not meant to execute code, are submitted
309 309 via the Control queue. These messages have high priority, and are thus received and
310 310 handled before any execution requests.
311 311
312 312 Clients may want to clear the namespace on the engine. There are no arguments nor
313 313 information involved in this request, so the content is empty.
314 314
315 315 Message type: ``clear_request``::
316 316
317 317 content = {}
318 318
319 319 Message type: ``clear_reply``::
320 320
321 321 content = {
322 322 'status' : 'ok' # 'ok' or 'error'
323 323 # other error info here, as in other messages
324 324 }
325 325
326 326 Clients may want to abort tasks that have not yet run. This can by done by message id, or
327 327 all enqueued messages can be aborted if None is specified.
328 328
329 329 Message type: ``abort_request``::
330 330
331 331 content = {
332 332 'msg_ids' : ['1234-...', '...'] # list of msg_ids or None
333 333 }
334 334
335 335 Message type: ``abort_reply``::
336 336
337 337 content = {
338 338 'status' : 'ok' # 'ok' or 'error'
339 339 # other error info here, as in other messages
340 340 }
341 341
342 342 The last action a client may want to do is shutdown the kernel. If a kernel receives a
343 343 shutdown request, then it aborts all queued messages, replies to the request, and exits.
344 344
345 345 Message type: ``shutdown_request``::
346 346
347 347 content = {}
348 348
349 349 Message type: ``shutdown_reply``::
350 350
351 351 content = {
352 352 'status' : 'ok' # 'ok' or 'error'
353 353 # other error info here, as in other messages
354 354 }
355 355
356 356
357 357 Implementation
358 358 --------------
359 359
360 360 There are a few differences in implementation between the `StreamSession` object used in
361 361 the newparallel branch and the `Session` object, the main one being that messages are
362 362 sent in parts, rather than as a single serialized object. `StreamSession` objects also
363 363 take pack/unpack functions, which are to be used when serializing/deserializing objects.
364 364 These can be any functions that translate to/from formats that ZMQ sockets can send
365 365 (buffers,bytes, etc.).
366 366
367 367 Split Sends
368 368 ***********
369 369
370 370 Previously, messages were bundled as a single json object and one call to
371 371 :func:`socket.send_json`. Since the hub inspects all messages, and doesn't need to
372 372 see the content of the messages, which can be large, messages are now serialized and sent in
373 373 pieces. All messages are sent in at least 3 parts: the header, the parent header, and the
374 374 content. This allows the controller to unpack and inspect the (always small) header,
375 375 without spending time unpacking the content unless the message is bound for the
376 376 controller. Buffers are added on to the end of the message, and can be any objects that
377 377 present the buffer interface.
378 378
@@ -1,442 +1,442 b''
1 1 .. _parallel_task:
2 2
3 3 ==========================
4 4 The IPython task interface
5 5 ==========================
6 6
7 7 The task interface to the cluster presents the engines as a fault tolerant,
8 8 dynamic load-balanced system of workers. Unlike the multiengine interface, in
9 9 the task interface the user have no direct access to individual engines. By
10 10 allowing the IPython scheduler to assign work, this interface is simultaneously
11 11 simpler and more powerful.
12 12
13 13 Best of all, the user can use both of these interfaces running at the same time
14 14 to take advantage of their respective strengths. When the user can break up
15 15 the user's work into segments that do not depend on previous execution, the
16 16 task interface is ideal. But it also has more power and flexibility, allowing
17 17 the user to guide the distribution of jobs, without having to assign tasks to
18 18 engines explicitly.
19 19
20 20 Starting the IPython controller and engines
21 21 ===========================================
22 22
23 23 To follow along with this tutorial, you will need to start the IPython
24 24 controller and four IPython engines. The simplest way of doing this is to use
25 25 the :command:`ipcluster` command::
26 26
27 27 $ ipcluster start -n 4
28 28
29 29 For more detailed information about starting the controller and engines, see
30 30 our :ref:`introduction <parallel_overview>` to using IPython for parallel computing.
31 31
32 32 Creating a ``Client`` instance
33 33 ==============================
34 34
35 35 The first step is to import the IPython :mod:`IPython.parallel`
36 36 module and then create a :class:`.Client` instance, and we will also be using
37 37 a :class:`LoadBalancedView`, here called `lview`:
38 38
39 39 .. sourcecode:: ipython
40 40
41 41 In [1]: from IPython.parallel import Client
42 42
43 43 In [2]: rc = Client()
44 44
45 45
46 46 This form assumes that the controller was started on localhost with default
47 47 configuration. If not, the location of the controller must be given as an
48 48 argument to the constructor:
49 49
50 50 .. sourcecode:: ipython
51 51
52 52 # for a visible LAN controller listening on an external port:
53 53 In [2]: rc = Client('tcp://192.168.1.16:10101')
54 54 # or to connect with a specific profile you have set up:
55 55 In [3]: rc = Client(profile='mpi')
56 56
57 57 For load-balanced execution, we will make use of a :class:`LoadBalancedView` object, which can
58 58 be constructed via the client's :meth:`load_balanced_view` method:
59 59
60 60 .. sourcecode:: ipython
61 61
62 62 In [4]: lview = rc.load_balanced_view() # default load-balanced view
63 63
64 64 .. seealso::
65 65
66 66 For more information, see the in-depth explanation of :ref:`Views <parallel_details>`.
67 67
68 68
69 69 Quick and easy parallelism
70 70 ==========================
71 71
72 72 In many cases, you simply want to apply a Python function to a sequence of
73 73 objects, but *in parallel*. Like the multiengine interface, these can be
74 74 implemented via the task interface. The exact same tools can perform these
75 75 actions in load-balanced ways as well as multiplexed ways: a parallel version
76 76 of :func:`map` and :func:`@parallel` function decorator. If one specifies the
77 77 argument `balanced=True`, then they are dynamically load balanced. Thus, if the
78 78 execution time per item varies significantly, you should use the versions in
79 79 the task interface.
80 80
81 81 Parallel map
82 82 ------------
83 83
84 84 To load-balance :meth:`map`,simply use a LoadBalancedView:
85 85
86 86 .. sourcecode:: ipython
87 87
88 88 In [62]: lview.block = True
89 89
90 90 In [63]: serial_result = map(lambda x:x**10, range(32))
91 91
92 92 In [64]: parallel_result = lview.map(lambda x:x**10, range(32))
93 93
94 94 In [65]: serial_result==parallel_result
95 95 Out[65]: True
96 96
97 97 Parallel function decorator
98 98 ---------------------------
99 99
100 100 Parallel functions are just like normal function, but they can be called on
101 101 sequences and *in parallel*. The multiengine interface provides a decorator
102 102 that turns any Python function into a parallel function:
103 103
104 104 .. sourcecode:: ipython
105 105
106 106 In [10]: @lview.parallel()
107 107 ....: def f(x):
108 108 ....: return 10.0*x**4
109 109 ....:
110 110
111 111 In [11]: f.map(range(32)) # this is done in parallel
112 112 Out[11]: [0.0,10.0,160.0,...]
113 113
114 114 .. _parallel_dependencies:
115 115
116 116 Dependencies
117 117 ============
118 118
119 119 Often, pure atomic load-balancing is too primitive for your work. In these cases, you
120 120 may want to associate some kind of `Dependency` that describes when, where, or whether
121 121 a task can be run. In IPython, we provide two types of dependencies:
122 122 `Functional Dependencies`_ and `Graph Dependencies`_
123 123
124 124 .. note::
125 125
126 126 It is important to note that the pure ZeroMQ scheduler does not support dependencies,
127 127 and you will see errors or warnings if you try to use dependencies with the pure
128 128 scheduler.
129 129
130 130 Functional Dependencies
131 131 -----------------------
132 132
133 133 Functional dependencies are used to determine whether a given engine is capable of running
134 134 a particular task. This is implemented via a special :class:`Exception` class,
135 135 :class:`UnmetDependency`, found in `IPython.parallel.error`. Its use is very simple:
136 136 if a task fails with an UnmetDependency exception, then the scheduler, instead of relaying
137 137 the error up to the client like any other error, catches the error, and submits the task
138 138 to a different engine. This will repeat indefinitely, and a task will never be submitted
139 139 to a given engine a second time.
140 140
141 141 You can manually raise the :class:`UnmetDependency` yourself, but IPython has provided
142 142 some decorators for facilitating this behavior.
143 143
144 144 There are two decorators and a class used for functional dependencies:
145 145
146 146 .. sourcecode:: ipython
147 147
148 148 In [9]: from IPython.parallel import depend, require, dependent
149 149
150 150 @require
151 151 ********
152 152
153 153 The simplest sort of dependency is requiring that a Python module is available. The
154 154 ``@require`` decorator lets you define a function that will only run on engines where names
155 155 you specify are importable:
156 156
157 157 .. sourcecode:: ipython
158 158
159 159 In [10]: @require('numpy', 'zmq')
160 160 ...: def myfunc():
161 161 ...: return dostuff()
162 162
163 163 Now, any time you apply :func:`myfunc`, the task will only run on a machine that has
164 164 numpy and pyzmq available, and when :func:`myfunc` is called, numpy and zmq will be imported.
165 165
166 166 @depend
167 167 *******
168 168
169 169 The ``@depend`` decorator lets you decorate any function with any *other* function to
170 170 evaluate the dependency. The dependency function will be called at the start of the task,
171 171 and if it returns ``False``, then the dependency will be considered unmet, and the task
172 172 will be assigned to another engine. If the dependency returns *anything other than
173 173 ``False``*, the rest of the task will continue.
174 174
175 175 .. sourcecode:: ipython
176 176
177 177 In [10]: def platform_specific(plat):
178 178 ...: import sys
179 179 ...: return sys.platform == plat
180 180
181 181 In [11]: @depend(platform_specific, 'darwin')
182 182 ...: def mactask():
183 183 ...: do_mac_stuff()
184 184
185 185 In [12]: @depend(platform_specific, 'nt')
186 186 ...: def wintask():
187 187 ...: do_windows_stuff()
188 188
189 189 In this case, any time you apply ``mytask``, it will only run on an OSX machine.
190 190 ``@depend`` is just like ``apply``, in that it has a ``@depend(f,*args,**kwargs)``
191 191 signature.
192 192
193 193 dependents
194 194 **********
195 195
196 196 You don't have to use the decorators on your tasks, if for instance you may want
197 197 to run tasks with a single function but varying dependencies, you can directly construct
198 198 the :class:`dependent` object that the decorators use:
199 199
200 200 .. sourcecode::ipython
201 201
202 202 In [13]: def mytask(*args):
203 203 ...: dostuff()
204 204
205 205 In [14]: mactask = dependent(mytask, platform_specific, 'darwin')
206 206 # this is the same as decorating the declaration of mytask with @depend
207 207 # but you can do it again:
208 208
209 209 In [15]: wintask = dependent(mytask, platform_specific, 'nt')
210 210
211 211 # in general:
212 212 In [16]: t = dependent(f, g, *dargs, **dkwargs)
213 213
214 214 # is equivalent to:
215 215 In [17]: @depend(g, *dargs, **dkwargs)
216 216 ...: def t(a,b,c):
217 217 ...: # contents of f
218 218
219 219 Graph Dependencies
220 220 ------------------
221 221
222 222 Sometimes you want to restrict the time and/or location to run a given task as a function
223 223 of the time and/or location of other tasks. This is implemented via a subclass of
224 224 :class:`set`, called a :class:`Dependency`. A Dependency is just a set of `msg_ids`
225 225 corresponding to tasks, and a few attributes to guide how to decide when the Dependency
226 226 has been met.
227 227
228 228 The switches we provide for interpreting whether a given dependency set has been met:
229 229
230 230 any|all
231 231 Whether the dependency is considered met if *any* of the dependencies are done, or
232 232 only after *all* of them have finished. This is set by a Dependency's :attr:`all`
233 233 boolean attribute, which defaults to ``True``.
234 234
235 235 success [default: True]
236 236 Whether to consider tasks that succeeded as fulfilling dependencies.
237 237
238 238 failure [default : False]
239 239 Whether to consider tasks that failed as fulfilling dependencies.
240 240 using `failure=True,success=False` is useful for setting up cleanup tasks, to be run
241 241 only when tasks have failed.
242 242
243 243 Sometimes you want to run a task after another, but only if that task succeeded. In this case,
244 244 ``success`` should be ``True`` and ``failure`` should be ``False``. However sometimes you may
245 245 not care whether the task succeeds, and always want the second task to run, in which case you
246 246 should use `success=failure=True`. The default behavior is to only use successes.
247 247
248 248 There are other switches for interpretation that are made at the *task* level. These are
249 249 specified via keyword arguments to the client's :meth:`apply` method.
250 250
251 251 after,follow
252 252 You may want to run a task *after* a given set of dependencies have been run and/or
253 253 run it *where* another set of dependencies are met. To support this, every task has an
254 254 `after` dependency to restrict time, and a `follow` dependency to restrict
255 255 destination.
256 256
257 257 timeout
258 258 You may also want to set a time-limit for how long the scheduler should wait before a
259 259 task's dependencies are met. This is done via a `timeout`, which defaults to 0, which
260 260 indicates that the task should never timeout. If the timeout is reached, and the
261 261 scheduler still hasn't been able to assign the task to an engine, the task will fail
262 262 with a :class:`DependencyTimeout`.
263 263
264 264 .. note::
265 265
266 266 Dependencies only work within the task scheduler. You cannot instruct a load-balanced
267 267 task to run after a job submitted via the MUX interface.
268 268
269 269 The simplest form of Dependencies is with `all=True,success=True,failure=False`. In these cases,
270 270 you can skip using Dependency objects, and just pass msg_ids or AsyncResult objects as the
271 271 `follow` and `after` keywords to :meth:`client.apply`:
272 272
273 273 .. sourcecode:: ipython
274 274
275 275 In [14]: client.block=False
276 276
277 277 In [15]: ar = lview.apply(f, args, kwargs)
278 278
279 279 In [16]: ar2 = lview.apply(f2)
280 280
281 281 In [17]: ar3 = lview.apply_with_flags(f3, after=[ar,ar2])
282 282
283 283 In [17]: ar4 = lview.apply_with_flags(f3, follow=[ar], timeout=2.5)
284 284
285 285
286 286 .. seealso::
287 287
288 288 Some parallel workloads can be described as a `Directed Acyclic Graph
289 289 <http://en.wikipedia.org/wiki/Directed_acyclic_graph>`_, or DAG. See :ref:`DAG
290 290 Dependencies <dag_dependencies>` for an example demonstrating how to use map a NetworkX DAG
291 291 onto task dependencies.
292 292
293 293
294 294
295 295
296 296 Impossible Dependencies
297 297 ***********************
298 298
299 299 The schedulers do perform some analysis on graph dependencies to determine whether they
300 300 are not possible to be met. If the scheduler does discover that a dependency cannot be
301 301 met, then the task will fail with an :class:`ImpossibleDependency` error. This way, if the
302 302 scheduler realized that a task can never be run, it won't sit indefinitely in the
303 303 scheduler clogging the pipeline.
304 304
305 305 The basic cases that are checked:
306 306
307 307 * depending on nonexistent messages
308 308 * `follow` dependencies were run on more than one machine and `all=True`
309 309 * any dependencies failed and `all=True,success=True,failures=False`
310 310 * all dependencies failed and `all=False,success=True,failure=False`
311 311
312 312 .. warning::
313 313
314 314 This analysis has not been proven to be rigorous, so it is likely possible for tasks
315 315 to become impossible to run in obscure situations, so a timeout may be a good choice.
316 316
317 317
318 318 Retries and Resubmit
319 319 ====================
320 320
321 321 Retries
322 322 -------
323 323
324 324 Another flag for tasks is `retries`. This is an integer, specifying how many times
325 325 a task should be resubmitted after failure. This is useful for tasks that should still run
326 326 if their engine was shutdown, or may have some statistical chance of failing. The default
327 327 is to not retry tasks.
328 328
329 329 Resubmit
330 330 --------
331 331
332 332 Sometimes you may want to re-run a task. This could be because it failed for some reason, and
333 333 you have fixed the error, or because you want to restore the cluster to an interrupted state.
334 334 For this, the :class:`Client` has a :meth:`rc.resubmit` method. This simply takes one or more
335 335 msg_ids, and returns an :class:`AsyncHubResult` for the result(s). You cannot resubmit
336 336 a task that is pending - only those that have finished, either successful or unsuccessful.
337 337
338 338 .. _parallel_schedulers:
339 339
340 340 Schedulers
341 341 ==========
342 342
343 343 There are a variety of valid ways to determine where jobs should be assigned in a
344 344 load-balancing situation. In IPython, we support several standard schemes, and
345 345 even make it easy to define your own. The scheme can be selected via the ``scheme``
346 346 argument to :command:`ipcontroller`, or in the :attr:`TaskScheduler.schemename` attribute
347 347 of a controller config object.
348 348
349 349 The built-in routing schemes:
350 350
351 351 To select one of these schemes, simply do::
352 352
353 353 $ ipcontroller --scheme=<schemename>
354 354 for instance:
355 355 $ ipcontroller --scheme=lru
356 356
357 357 lru: Least Recently Used
358 358
359 359 Always assign work to the least-recently-used engine. A close relative of
360 360 round-robin, it will be fair with respect to the number of tasks, agnostic
361 361 with respect to runtime of each task.
362 362
363 363 plainrandom: Plain Random
364 364
365 365 Randomly picks an engine on which to run.
366 366
367 367 twobin: Two-Bin Random
368 368
369 369 **Requires numpy**
370 370
371 371 Pick two engines at random, and use the LRU of the two. This is known to be better
372 372 than plain random in many cases, but requires a small amount of computation.
373 373
374 374 leastload: Least Load
375 375
376 376 **This is the default scheme**
377 377
378 378 Always assign tasks to the engine with the fewest outstanding tasks (LRU breaks tie).
379 379
380 380 weighted: Weighted Two-Bin Random
381 381
382 382 **Requires numpy**
383 383
384 384 Pick two engines at random using the number of outstanding tasks as inverse weights,
385 385 and use the one with the lower load.
386 386
387 387
388 388 Pure ZMQ Scheduler
389 389 ------------------
390 390
391 391 For maximum throughput, the 'pure' scheme is not Python at all, but a C-level
392 :class:`MonitoredQueue` from PyZMQ, which uses a ZeroMQ ``XREQ`` socket to perform all
392 :class:`MonitoredQueue` from PyZMQ, which uses a ZeroMQ ``DEALER`` socket to perform all
393 393 load-balancing. This scheduler does not support any of the advanced features of the Python
394 394 :class:`.Scheduler`.
395 395
396 396 Disabled features when using the ZMQ Scheduler:
397 397
398 398 * Engine unregistration
399 399 Task farming will be disabled if an engine unregisters.
400 400 Further, if an engine is unregistered during computation, the scheduler may not recover.
401 401 * Dependencies
402 402 Since there is no Python logic inside the Scheduler, routing decisions cannot be made
403 403 based on message content.
404 404 * Early destination notification
405 405 The Python schedulers know which engine gets which task, and notify the Hub. This
406 406 allows graceful handling of Engines coming and going. There is no way to know
407 407 where ZeroMQ messages have gone, so there is no way to know what tasks are on which
408 408 engine until they *finish*. This makes recovery from engine shutdown very difficult.
409 409
410 410
411 411 .. note::
412 412
413 413 TODO: performance comparisons
414 414
415 415
416 416
417 417
418 418 More details
419 419 ============
420 420
421 421 The :class:`LoadBalancedView` has many more powerful features that allow quite a bit
422 422 of flexibility in how tasks are defined and run. The next places to look are
423 423 in the following classes:
424 424
425 425 * :class:`~IPython.parallel.client.view.LoadBalancedView`
426 426 * :class:`~IPython.parallel.client.asyncresult.AsyncResult`
427 427 * :meth:`~IPython.parallel.client.view.LoadBalancedView.apply`
428 428 * :mod:`~IPython.parallel.controller.dependency`
429 429
430 430 The following is an overview of how to use these classes together:
431 431
432 432 1. Create a :class:`Client` and :class:`LoadBalancedView`
433 433 2. Define some functions to be run as tasks
434 434 3. Submit your tasks to using the :meth:`apply` method of your
435 435 :class:`LoadBalancedView` instance.
436 436 4. Use :meth:`Client.get_result` to get the results of the
437 437 tasks, or use the :meth:`AsyncResult.get` method of the results to wait
438 438 for and then receive the results.
439 439
440 440 .. seealso::
441 441
442 442 A demo of :ref:`DAG Dependencies <dag_dependencies>` with NetworkX and IPython.
General Comments 0
You need to be logged in to leave comments. Login now