##// END OF EJS Templates
Renaming unpack_message to unserialize and updating docstrings.
Brian E. Granger -
Show More
@@ -1,1291 +1,1291 b''
1 1 #!/usr/bin/env python
2 2 """The IPython Controller Hub with 0MQ
3 3 This is the master object that handles connections from engines and clients,
4 4 and monitors traffic through the various queues.
5 5
6 6 Authors:
7 7
8 8 * Min RK
9 9 """
10 10 #-----------------------------------------------------------------------------
11 11 # Copyright (C) 2010 The IPython Development Team
12 12 #
13 13 # Distributed under the terms of the BSD License. The full license is in
14 14 # the file COPYING, distributed as part of this software.
15 15 #-----------------------------------------------------------------------------
16 16
17 17 #-----------------------------------------------------------------------------
18 18 # Imports
19 19 #-----------------------------------------------------------------------------
20 20 from __future__ import print_function
21 21
22 22 import sys
23 23 import time
24 24 from datetime import datetime
25 25
26 26 import zmq
27 27 from zmq.eventloop import ioloop
28 28 from zmq.eventloop.zmqstream import ZMQStream
29 29
30 30 # internal:
31 31 from IPython.utils.importstring import import_item
32 32 from IPython.utils.traitlets import (
33 33 HasTraits, Instance, Int, Unicode, Dict, Set, Tuple, CBytes, DottedObjectName
34 34 )
35 35
36 36 from IPython.parallel import error, util
37 37 from IPython.parallel.factory import RegistrationFactory
38 38
39 39 from IPython.zmq.session import SessionFactory
40 40
41 41 from .heartmonitor import HeartMonitor
42 42
43 43 #-----------------------------------------------------------------------------
44 44 # Code
45 45 #-----------------------------------------------------------------------------
46 46
47 47 def _passer(*args, **kwargs):
48 48 return
49 49
50 50 def _printer(*args, **kwargs):
51 51 print (args)
52 52 print (kwargs)
53 53
54 54 def empty_record():
55 55 """Return an empty dict with all record keys."""
56 56 return {
57 57 'msg_id' : None,
58 58 'header' : None,
59 59 'content': None,
60 60 'buffers': None,
61 61 'submitted': None,
62 62 'client_uuid' : None,
63 63 'engine_uuid' : None,
64 64 'started': None,
65 65 'completed': None,
66 66 'resubmitted': None,
67 67 'result_header' : None,
68 68 'result_content' : None,
69 69 'result_buffers' : None,
70 70 'queue' : None,
71 71 'pyin' : None,
72 72 'pyout': None,
73 73 'pyerr': None,
74 74 'stdout': '',
75 75 'stderr': '',
76 76 }
77 77
78 78 def init_record(msg):
79 79 """Initialize a TaskRecord based on a request."""
80 80 header = msg['header']
81 81 return {
82 82 'msg_id' : header['msg_id'],
83 83 'header' : header,
84 84 'content': msg['content'],
85 85 'buffers': msg['buffers'],
86 86 'submitted': header['date'],
87 87 'client_uuid' : None,
88 88 'engine_uuid' : None,
89 89 'started': None,
90 90 'completed': None,
91 91 'resubmitted': None,
92 92 'result_header' : None,
93 93 'result_content' : None,
94 94 'result_buffers' : None,
95 95 'queue' : None,
96 96 'pyin' : None,
97 97 'pyout': None,
98 98 'pyerr': None,
99 99 'stdout': '',
100 100 'stderr': '',
101 101 }
102 102
103 103
104 104 class EngineConnector(HasTraits):
105 105 """A simple object for accessing the various zmq connections of an object.
106 106 Attributes are:
107 107 id (int): engine ID
108 108 uuid (str): uuid (unused?)
109 109 queue (str): identity of queue's XREQ socket
110 110 registration (str): identity of registration XREQ socket
111 111 heartbeat (str): identity of heartbeat XREQ socket
112 112 """
113 113 id=Int(0)
114 114 queue=CBytes()
115 115 control=CBytes()
116 116 registration=CBytes()
117 117 heartbeat=CBytes()
118 118 pending=Set()
119 119
120 120 class HubFactory(RegistrationFactory):
121 121 """The Configurable for setting up a Hub."""
122 122
123 123 # port-pairs for monitoredqueues:
124 124 hb = Tuple(Int,Int,config=True,
125 125 help="""XREQ/SUB Port pair for Engine heartbeats""")
126 126 def _hb_default(self):
127 127 return tuple(util.select_random_ports(2))
128 128
129 129 mux = Tuple(Int,Int,config=True,
130 130 help="""Engine/Client Port pair for MUX queue""")
131 131
132 132 def _mux_default(self):
133 133 return tuple(util.select_random_ports(2))
134 134
135 135 task = Tuple(Int,Int,config=True,
136 136 help="""Engine/Client Port pair for Task queue""")
137 137 def _task_default(self):
138 138 return tuple(util.select_random_ports(2))
139 139
140 140 control = Tuple(Int,Int,config=True,
141 141 help="""Engine/Client Port pair for Control queue""")
142 142
143 143 def _control_default(self):
144 144 return tuple(util.select_random_ports(2))
145 145
146 146 iopub = Tuple(Int,Int,config=True,
147 147 help="""Engine/Client Port pair for IOPub relay""")
148 148
149 149 def _iopub_default(self):
150 150 return tuple(util.select_random_ports(2))
151 151
152 152 # single ports:
153 153 mon_port = Int(config=True,
154 154 help="""Monitor (SUB) port for queue traffic""")
155 155
156 156 def _mon_port_default(self):
157 157 return util.select_random_ports(1)[0]
158 158
159 159 notifier_port = Int(config=True,
160 160 help="""PUB port for sending engine status notifications""")
161 161
162 162 def _notifier_port_default(self):
163 163 return util.select_random_ports(1)[0]
164 164
165 165 engine_ip = Unicode('127.0.0.1', config=True,
166 166 help="IP on which to listen for engine connections. [default: loopback]")
167 167 engine_transport = Unicode('tcp', config=True,
168 168 help="0MQ transport for engine connections. [default: tcp]")
169 169
170 170 client_ip = Unicode('127.0.0.1', config=True,
171 171 help="IP on which to listen for client connections. [default: loopback]")
172 172 client_transport = Unicode('tcp', config=True,
173 173 help="0MQ transport for client connections. [default : tcp]")
174 174
175 175 monitor_ip = Unicode('127.0.0.1', config=True,
176 176 help="IP on which to listen for monitor messages. [default: loopback]")
177 177 monitor_transport = Unicode('tcp', config=True,
178 178 help="0MQ transport for monitor messages. [default : tcp]")
179 179
180 180 monitor_url = Unicode('')
181 181
182 182 db_class = DottedObjectName('IPython.parallel.controller.dictdb.DictDB',
183 183 config=True, help="""The class to use for the DB backend""")
184 184
185 185 # not configurable
186 186 db = Instance('IPython.parallel.controller.dictdb.BaseDB')
187 187 heartmonitor = Instance('IPython.parallel.controller.heartmonitor.HeartMonitor')
188 188
189 189 def _ip_changed(self, name, old, new):
190 190 self.engine_ip = new
191 191 self.client_ip = new
192 192 self.monitor_ip = new
193 193 self._update_monitor_url()
194 194
195 195 def _update_monitor_url(self):
196 196 self.monitor_url = "%s://%s:%i"%(self.monitor_transport, self.monitor_ip, self.mon_port)
197 197
198 198 def _transport_changed(self, name, old, new):
199 199 self.engine_transport = new
200 200 self.client_transport = new
201 201 self.monitor_transport = new
202 202 self._update_monitor_url()
203 203
204 204 def __init__(self, **kwargs):
205 205 super(HubFactory, self).__init__(**kwargs)
206 206 self._update_monitor_url()
207 207
208 208
209 209 def construct(self):
210 210 self.init_hub()
211 211
212 212 def start(self):
213 213 self.heartmonitor.start()
214 214 self.log.info("Heartmonitor started")
215 215
216 216 def init_hub(self):
217 217 """construct"""
218 218 client_iface = "%s://%s:"%(self.client_transport, self.client_ip) + "%i"
219 219 engine_iface = "%s://%s:"%(self.engine_transport, self.engine_ip) + "%i"
220 220
221 221 ctx = self.context
222 222 loop = self.loop
223 223
224 224 # Registrar socket
225 225 q = ZMQStream(ctx.socket(zmq.XREP), loop)
226 226 q.bind(client_iface % self.regport)
227 227 self.log.info("Hub listening on %s for registration."%(client_iface%self.regport))
228 228 if self.client_ip != self.engine_ip:
229 229 q.bind(engine_iface % self.regport)
230 230 self.log.info("Hub listening on %s for registration."%(engine_iface%self.regport))
231 231
232 232 ### Engine connections ###
233 233
234 234 # heartbeat
235 235 hpub = ctx.socket(zmq.PUB)
236 236 hpub.bind(engine_iface % self.hb[0])
237 237 hrep = ctx.socket(zmq.XREP)
238 238 hrep.bind(engine_iface % self.hb[1])
239 239 self.heartmonitor = HeartMonitor(loop=loop, config=self.config, log=self.log,
240 240 pingstream=ZMQStream(hpub,loop),
241 241 pongstream=ZMQStream(hrep,loop)
242 242 )
243 243
244 244 ### Client connections ###
245 245 # Notifier socket
246 246 n = ZMQStream(ctx.socket(zmq.PUB), loop)
247 247 n.bind(client_iface%self.notifier_port)
248 248
249 249 ### build and launch the queues ###
250 250
251 251 # monitor socket
252 252 sub = ctx.socket(zmq.SUB)
253 253 sub.setsockopt(zmq.SUBSCRIBE, b"")
254 254 sub.bind(self.monitor_url)
255 255 sub.bind('inproc://monitor')
256 256 sub = ZMQStream(sub, loop)
257 257
258 258 # connect the db
259 259 self.log.info('Hub using DB backend: %r'%(self.db_class.split()[-1]))
260 260 # cdir = self.config.Global.cluster_dir
261 261 self.db = import_item(str(self.db_class))(session=self.session.session,
262 262 config=self.config, log=self.log)
263 263 time.sleep(.25)
264 264 try:
265 265 scheme = self.config.TaskScheduler.scheme_name
266 266 except AttributeError:
267 267 from .scheduler import TaskScheduler
268 268 scheme = TaskScheduler.scheme_name.get_default_value()
269 269 # build connection dicts
270 270 self.engine_info = {
271 271 'control' : engine_iface%self.control[1],
272 272 'mux': engine_iface%self.mux[1],
273 273 'heartbeat': (engine_iface%self.hb[0], engine_iface%self.hb[1]),
274 274 'task' : engine_iface%self.task[1],
275 275 'iopub' : engine_iface%self.iopub[1],
276 276 # 'monitor' : engine_iface%self.mon_port,
277 277 }
278 278
279 279 self.client_info = {
280 280 'control' : client_iface%self.control[0],
281 281 'mux': client_iface%self.mux[0],
282 282 'task' : (scheme, client_iface%self.task[0]),
283 283 'iopub' : client_iface%self.iopub[0],
284 284 'notification': client_iface%self.notifier_port
285 285 }
286 286 self.log.debug("Hub engine addrs: %s"%self.engine_info)
287 287 self.log.debug("Hub client addrs: %s"%self.client_info)
288 288
289 289 # resubmit stream
290 290 r = ZMQStream(ctx.socket(zmq.XREQ), loop)
291 291 url = util.disambiguate_url(self.client_info['task'][-1])
292 292 r.setsockopt(zmq.IDENTITY, util.asbytes(self.session.session))
293 293 r.connect(url)
294 294
295 295 self.hub = Hub(loop=loop, session=self.session, monitor=sub, heartmonitor=self.heartmonitor,
296 296 query=q, notifier=n, resubmit=r, db=self.db,
297 297 engine_info=self.engine_info, client_info=self.client_info,
298 298 log=self.log)
299 299
300 300
301 301 class Hub(SessionFactory):
302 302 """The IPython Controller Hub with 0MQ connections
303 303
304 304 Parameters
305 305 ==========
306 306 loop: zmq IOLoop instance
307 307 session: Session object
308 308 <removed> context: zmq context for creating new connections (?)
309 309 queue: ZMQStream for monitoring the command queue (SUB)
310 310 query: ZMQStream for engine registration and client queries requests (XREP)
311 311 heartbeat: HeartMonitor object checking the pulse of the engines
312 312 notifier: ZMQStream for broadcasting engine registration changes (PUB)
313 313 db: connection to db for out of memory logging of commands
314 314 NotImplemented
315 315 engine_info: dict of zmq connection information for engines to connect
316 316 to the queues.
317 317 client_info: dict of zmq connection information for engines to connect
318 318 to the queues.
319 319 """
320 320 # internal data structures:
321 321 ids=Set() # engine IDs
322 322 keytable=Dict()
323 323 by_ident=Dict()
324 324 engines=Dict()
325 325 clients=Dict()
326 326 hearts=Dict()
327 327 pending=Set()
328 328 queues=Dict() # pending msg_ids keyed by engine_id
329 329 tasks=Dict() # pending msg_ids submitted as tasks, keyed by client_id
330 330 completed=Dict() # completed msg_ids keyed by engine_id
331 331 all_completed=Set() # completed msg_ids keyed by engine_id
332 332 dead_engines=Set() # completed msg_ids keyed by engine_id
333 333 unassigned=Set() # set of task msg_ds not yet assigned a destination
334 334 incoming_registrations=Dict()
335 335 registration_timeout=Int()
336 336 _idcounter=Int(0)
337 337
338 338 # objects from constructor:
339 339 query=Instance(ZMQStream)
340 340 monitor=Instance(ZMQStream)
341 341 notifier=Instance(ZMQStream)
342 342 resubmit=Instance(ZMQStream)
343 343 heartmonitor=Instance(HeartMonitor)
344 344 db=Instance(object)
345 345 client_info=Dict()
346 346 engine_info=Dict()
347 347
348 348
349 349 def __init__(self, **kwargs):
350 350 """
351 351 # universal:
352 352 loop: IOLoop for creating future connections
353 353 session: streamsession for sending serialized data
354 354 # engine:
355 355 queue: ZMQStream for monitoring queue messages
356 356 query: ZMQStream for engine+client registration and client requests
357 357 heartbeat: HeartMonitor object for tracking engines
358 358 # extra:
359 359 db: ZMQStream for db connection (NotImplemented)
360 360 engine_info: zmq address/protocol dict for engine connections
361 361 client_info: zmq address/protocol dict for client connections
362 362 """
363 363
364 364 super(Hub, self).__init__(**kwargs)
365 365 self.registration_timeout = max(5000, 2*self.heartmonitor.period)
366 366
367 367 # validate connection dicts:
368 368 for k,v in self.client_info.iteritems():
369 369 if k == 'task':
370 370 util.validate_url_container(v[1])
371 371 else:
372 372 util.validate_url_container(v)
373 373 # util.validate_url_container(self.client_info)
374 374 util.validate_url_container(self.engine_info)
375 375
376 376 # register our callbacks
377 377 self.query.on_recv(self.dispatch_query)
378 378 self.monitor.on_recv(self.dispatch_monitor_traffic)
379 379
380 380 self.heartmonitor.add_heart_failure_handler(self.handle_heart_failure)
381 381 self.heartmonitor.add_new_heart_handler(self.handle_new_heart)
382 382
383 383 self.monitor_handlers = {b'in' : self.save_queue_request,
384 384 b'out': self.save_queue_result,
385 385 b'intask': self.save_task_request,
386 386 b'outtask': self.save_task_result,
387 387 b'tracktask': self.save_task_destination,
388 388 b'incontrol': _passer,
389 389 b'outcontrol': _passer,
390 390 b'iopub': self.save_iopub_message,
391 391 }
392 392
393 393 self.query_handlers = {'queue_request': self.queue_status,
394 394 'result_request': self.get_results,
395 395 'history_request': self.get_history,
396 396 'db_request': self.db_query,
397 397 'purge_request': self.purge_results,
398 398 'load_request': self.check_load,
399 399 'resubmit_request': self.resubmit_task,
400 400 'shutdown_request': self.shutdown_request,
401 401 'registration_request' : self.register_engine,
402 402 'unregistration_request' : self.unregister_engine,
403 403 'connection_request': self.connection_request,
404 404 }
405 405
406 406 # ignore resubmit replies
407 407 self.resubmit.on_recv(lambda msg: None, copy=False)
408 408
409 409 self.log.info("hub::created hub")
410 410
411 411 @property
412 412 def _next_id(self):
413 413 """gemerate a new ID.
414 414
415 415 No longer reuse old ids, just count from 0."""
416 416 newid = self._idcounter
417 417 self._idcounter += 1
418 418 return newid
419 419 # newid = 0
420 420 # incoming = [id[0] for id in self.incoming_registrations.itervalues()]
421 421 # # print newid, self.ids, self.incoming_registrations
422 422 # while newid in self.ids or newid in incoming:
423 423 # newid += 1
424 424 # return newid
425 425
426 426 #-----------------------------------------------------------------------------
427 427 # message validation
428 428 #-----------------------------------------------------------------------------
429 429
430 430 def _validate_targets(self, targets):
431 431 """turn any valid targets argument into a list of integer ids"""
432 432 if targets is None:
433 433 # default to all
434 434 targets = self.ids
435 435
436 436 if isinstance(targets, (int,str,unicode)):
437 437 # only one target specified
438 438 targets = [targets]
439 439 _targets = []
440 440 for t in targets:
441 441 # map raw identities to ids
442 442 if isinstance(t, (str,unicode)):
443 443 t = self.by_ident.get(t, t)
444 444 _targets.append(t)
445 445 targets = _targets
446 446 bad_targets = [ t for t in targets if t not in self.ids ]
447 447 if bad_targets:
448 448 raise IndexError("No Such Engine: %r"%bad_targets)
449 449 if not targets:
450 450 raise IndexError("No Engines Registered")
451 451 return targets
452 452
453 453 #-----------------------------------------------------------------------------
454 454 # dispatch methods (1 per stream)
455 455 #-----------------------------------------------------------------------------
456 456
457 457
458 458 def dispatch_monitor_traffic(self, msg):
459 459 """all ME and Task queue messages come through here, as well as
460 460 IOPub traffic."""
461 461 self.log.debug("monitor traffic: %r"%msg[:2])
462 462 switch = msg[0]
463 463 try:
464 464 idents, msg = self.session.feed_identities(msg[1:])
465 465 except ValueError:
466 466 idents=[]
467 467 if not idents:
468 468 self.log.error("Bad Monitor Message: %r"%msg)
469 469 return
470 470 handler = self.monitor_handlers.get(switch, None)
471 471 if handler is not None:
472 472 handler(idents, msg)
473 473 else:
474 474 self.log.error("Invalid monitor topic: %r"%switch)
475 475
476 476
477 477 def dispatch_query(self, msg):
478 478 """Route registration requests and queries from clients."""
479 479 try:
480 480 idents, msg = self.session.feed_identities(msg)
481 481 except ValueError:
482 482 idents = []
483 483 if not idents:
484 484 self.log.error("Bad Query Message: %r"%msg)
485 485 return
486 486 client_id = idents[0]
487 487 try:
488 msg = self.session.unpack_message(msg, content=True)
488 msg = self.session.unserialize(msg, content=True)
489 489 except Exception:
490 490 content = error.wrap_exception()
491 491 self.log.error("Bad Query Message: %r"%msg, exc_info=True)
492 492 self.session.send(self.query, "hub_error", ident=client_id,
493 493 content=content)
494 494 return
495 495 # print client_id, header, parent, content
496 496 #switch on message type:
497 497 msg_type = msg['header']['msg_type']
498 498 self.log.info("client::client %r requested %r"%(client_id, msg_type))
499 499 handler = self.query_handlers.get(msg_type, None)
500 500 try:
501 501 assert handler is not None, "Bad Message Type: %r"%msg_type
502 502 except:
503 503 content = error.wrap_exception()
504 504 self.log.error("Bad Message Type: %r"%msg_type, exc_info=True)
505 505 self.session.send(self.query, "hub_error", ident=client_id,
506 506 content=content)
507 507 return
508 508
509 509 else:
510 510 handler(idents, msg)
511 511
512 512 def dispatch_db(self, msg):
513 513 """"""
514 514 raise NotImplementedError
515 515
516 516 #---------------------------------------------------------------------------
517 517 # handler methods (1 per event)
518 518 #---------------------------------------------------------------------------
519 519
520 520 #----------------------- Heartbeat --------------------------------------
521 521
522 522 def handle_new_heart(self, heart):
523 523 """handler to attach to heartbeater.
524 524 Called when a new heart starts to beat.
525 525 Triggers completion of registration."""
526 526 self.log.debug("heartbeat::handle_new_heart(%r)"%heart)
527 527 if heart not in self.incoming_registrations:
528 528 self.log.info("heartbeat::ignoring new heart: %r"%heart)
529 529 else:
530 530 self.finish_registration(heart)
531 531
532 532
533 533 def handle_heart_failure(self, heart):
534 534 """handler to attach to heartbeater.
535 535 called when a previously registered heart fails to respond to beat request.
536 536 triggers unregistration"""
537 537 self.log.debug("heartbeat::handle_heart_failure(%r)"%heart)
538 538 eid = self.hearts.get(heart, None)
539 539 queue = self.engines[eid].queue
540 540 if eid is None:
541 541 self.log.info("heartbeat::ignoring heart failure %r"%heart)
542 542 else:
543 543 self.unregister_engine(heart, dict(content=dict(id=eid, queue=queue)))
544 544
545 545 #----------------------- MUX Queue Traffic ------------------------------
546 546
547 547 def save_queue_request(self, idents, msg):
548 548 if len(idents) < 2:
549 549 self.log.error("invalid identity prefix: %r"%idents)
550 550 return
551 551 queue_id, client_id = idents[:2]
552 552 try:
553 msg = self.session.unpack_message(msg)
553 msg = self.session.unserialize(msg)
554 554 except Exception:
555 555 self.log.error("queue::client %r sent invalid message to %r: %r"%(client_id, queue_id, msg), exc_info=True)
556 556 return
557 557
558 558 eid = self.by_ident.get(queue_id, None)
559 559 if eid is None:
560 560 self.log.error("queue::target %r not registered"%queue_id)
561 561 self.log.debug("queue:: valid are: %r"%(self.by_ident.keys()))
562 562 return
563 563 record = init_record(msg)
564 564 msg_id = record['msg_id']
565 565 # Unicode in records
566 566 record['engine_uuid'] = queue_id.decode('ascii')
567 567 record['client_uuid'] = client_id.decode('ascii')
568 568 record['queue'] = 'mux'
569 569
570 570 try:
571 571 # it's posible iopub arrived first:
572 572 existing = self.db.get_record(msg_id)
573 573 for key,evalue in existing.iteritems():
574 574 rvalue = record.get(key, None)
575 575 if evalue and rvalue and evalue != rvalue:
576 576 self.log.warn("conflicting initial state for record: %r:%r <%r> %r"%(msg_id, rvalue, key, evalue))
577 577 elif evalue and not rvalue:
578 578 record[key] = evalue
579 579 try:
580 580 self.db.update_record(msg_id, record)
581 581 except Exception:
582 582 self.log.error("DB Error updating record %r"%msg_id, exc_info=True)
583 583 except KeyError:
584 584 try:
585 585 self.db.add_record(msg_id, record)
586 586 except Exception:
587 587 self.log.error("DB Error adding record %r"%msg_id, exc_info=True)
588 588
589 589
590 590 self.pending.add(msg_id)
591 591 self.queues[eid].append(msg_id)
592 592
593 593 def save_queue_result(self, idents, msg):
594 594 if len(idents) < 2:
595 595 self.log.error("invalid identity prefix: %r"%idents)
596 596 return
597 597
598 598 client_id, queue_id = idents[:2]
599 599 try:
600 msg = self.session.unpack_message(msg)
600 msg = self.session.unserialize(msg)
601 601 except Exception:
602 602 self.log.error("queue::engine %r sent invalid message to %r: %r"%(
603 603 queue_id,client_id, msg), exc_info=True)
604 604 return
605 605
606 606 eid = self.by_ident.get(queue_id, None)
607 607 if eid is None:
608 608 self.log.error("queue::unknown engine %r is sending a reply: "%queue_id)
609 609 return
610 610
611 611 parent = msg['parent_header']
612 612 if not parent:
613 613 return
614 614 msg_id = parent['msg_id']
615 615 if msg_id in self.pending:
616 616 self.pending.remove(msg_id)
617 617 self.all_completed.add(msg_id)
618 618 self.queues[eid].remove(msg_id)
619 619 self.completed[eid].append(msg_id)
620 620 elif msg_id not in self.all_completed:
621 621 # it could be a result from a dead engine that died before delivering the
622 622 # result
623 623 self.log.warn("queue:: unknown msg finished %r"%msg_id)
624 624 return
625 625 # update record anyway, because the unregistration could have been premature
626 626 rheader = msg['header']
627 627 completed = rheader['date']
628 628 started = rheader.get('started', None)
629 629 result = {
630 630 'result_header' : rheader,
631 631 'result_content': msg['content'],
632 632 'started' : started,
633 633 'completed' : completed
634 634 }
635 635
636 636 result['result_buffers'] = msg['buffers']
637 637 try:
638 638 self.db.update_record(msg_id, result)
639 639 except Exception:
640 640 self.log.error("DB Error updating record %r"%msg_id, exc_info=True)
641 641
642 642
643 643 #--------------------- Task Queue Traffic ------------------------------
644 644
645 645 def save_task_request(self, idents, msg):
646 646 """Save the submission of a task."""
647 647 client_id = idents[0]
648 648
649 649 try:
650 msg = self.session.unpack_message(msg)
650 msg = self.session.unserialize(msg)
651 651 except Exception:
652 652 self.log.error("task::client %r sent invalid task message: %r"%(
653 653 client_id, msg), exc_info=True)
654 654 return
655 655 record = init_record(msg)
656 656
657 657 record['client_uuid'] = client_id
658 658 record['queue'] = 'task'
659 659 header = msg['header']
660 660 msg_id = header['msg_id']
661 661 self.pending.add(msg_id)
662 662 self.unassigned.add(msg_id)
663 663 try:
664 664 # it's posible iopub arrived first:
665 665 existing = self.db.get_record(msg_id)
666 666 if existing['resubmitted']:
667 667 for key in ('submitted', 'client_uuid', 'buffers'):
668 668 # don't clobber these keys on resubmit
669 669 # submitted and client_uuid should be different
670 670 # and buffers might be big, and shouldn't have changed
671 671 record.pop(key)
672 672 # still check content,header which should not change
673 673 # but are not expensive to compare as buffers
674 674
675 675 for key,evalue in existing.iteritems():
676 676 if key.endswith('buffers'):
677 677 # don't compare buffers
678 678 continue
679 679 rvalue = record.get(key, None)
680 680 if evalue and rvalue and evalue != rvalue:
681 681 self.log.warn("conflicting initial state for record: %r:%r <%r> %r"%(msg_id, rvalue, key, evalue))
682 682 elif evalue and not rvalue:
683 683 record[key] = evalue
684 684 try:
685 685 self.db.update_record(msg_id, record)
686 686 except Exception:
687 687 self.log.error("DB Error updating record %r"%msg_id, exc_info=True)
688 688 except KeyError:
689 689 try:
690 690 self.db.add_record(msg_id, record)
691 691 except Exception:
692 692 self.log.error("DB Error adding record %r"%msg_id, exc_info=True)
693 693 except Exception:
694 694 self.log.error("DB Error saving task request %r"%msg_id, exc_info=True)
695 695
696 696 def save_task_result(self, idents, msg):
697 697 """save the result of a completed task."""
698 698 client_id = idents[0]
699 699 try:
700 msg = self.session.unpack_message(msg)
700 msg = self.session.unserialize(msg)
701 701 except Exception:
702 702 self.log.error("task::invalid task result message send to %r: %r"%(
703 703 client_id, msg), exc_info=True)
704 704 return
705 705
706 706 parent = msg['parent_header']
707 707 if not parent:
708 708 # print msg
709 709 self.log.warn("Task %r had no parent!"%msg)
710 710 return
711 711 msg_id = parent['msg_id']
712 712 if msg_id in self.unassigned:
713 713 self.unassigned.remove(msg_id)
714 714
715 715 header = msg['header']
716 716 engine_uuid = header.get('engine', None)
717 717 eid = self.by_ident.get(engine_uuid, None)
718 718
719 719 if msg_id in self.pending:
720 720 self.pending.remove(msg_id)
721 721 self.all_completed.add(msg_id)
722 722 if eid is not None:
723 723 self.completed[eid].append(msg_id)
724 724 if msg_id in self.tasks[eid]:
725 725 self.tasks[eid].remove(msg_id)
726 726 completed = header['date']
727 727 started = header.get('started', None)
728 728 result = {
729 729 'result_header' : header,
730 730 'result_content': msg['content'],
731 731 'started' : started,
732 732 'completed' : completed,
733 733 'engine_uuid': engine_uuid
734 734 }
735 735
736 736 result['result_buffers'] = msg['buffers']
737 737 try:
738 738 self.db.update_record(msg_id, result)
739 739 except Exception:
740 740 self.log.error("DB Error saving task request %r"%msg_id, exc_info=True)
741 741
742 742 else:
743 743 self.log.debug("task::unknown task %r finished"%msg_id)
744 744
745 745 def save_task_destination(self, idents, msg):
746 746 try:
747 msg = self.session.unpack_message(msg, content=True)
747 msg = self.session.unserialize(msg, content=True)
748 748 except Exception:
749 749 self.log.error("task::invalid task tracking message", exc_info=True)
750 750 return
751 751 content = msg['content']
752 752 # print (content)
753 753 msg_id = content['msg_id']
754 754 engine_uuid = content['engine_id']
755 755 eid = self.by_ident[util.asbytes(engine_uuid)]
756 756
757 757 self.log.info("task::task %r arrived on %r"%(msg_id, eid))
758 758 if msg_id in self.unassigned:
759 759 self.unassigned.remove(msg_id)
760 760 # else:
761 761 # self.log.debug("task::task %r not listed as MIA?!"%(msg_id))
762 762
763 763 self.tasks[eid].append(msg_id)
764 764 # self.pending[msg_id][1].update(received=datetime.now(),engine=(eid,engine_uuid))
765 765 try:
766 766 self.db.update_record(msg_id, dict(engine_uuid=engine_uuid))
767 767 except Exception:
768 768 self.log.error("DB Error saving task destination %r"%msg_id, exc_info=True)
769 769
770 770
771 771 def mia_task_request(self, idents, msg):
772 772 raise NotImplementedError
773 773 client_id = idents[0]
774 774 # content = dict(mia=self.mia,status='ok')
775 775 # self.session.send('mia_reply', content=content, idents=client_id)
776 776
777 777
778 778 #--------------------- IOPub Traffic ------------------------------
779 779
780 780 def save_iopub_message(self, topics, msg):
781 781 """save an iopub message into the db"""
782 782 # print (topics)
783 783 try:
784 msg = self.session.unpack_message(msg, content=True)
784 msg = self.session.unserialize(msg, content=True)
785 785 except Exception:
786 786 self.log.error("iopub::invalid IOPub message", exc_info=True)
787 787 return
788 788
789 789 parent = msg['parent_header']
790 790 if not parent:
791 791 self.log.error("iopub::invalid IOPub message: %r"%msg)
792 792 return
793 793 msg_id = parent['msg_id']
794 794 msg_type = msg['header']['msg_type']
795 795 content = msg['content']
796 796
797 797 # ensure msg_id is in db
798 798 try:
799 799 rec = self.db.get_record(msg_id)
800 800 except KeyError:
801 801 rec = empty_record()
802 802 rec['msg_id'] = msg_id
803 803 self.db.add_record(msg_id, rec)
804 804 # stream
805 805 d = {}
806 806 if msg_type == 'stream':
807 807 name = content['name']
808 808 s = rec[name] or ''
809 809 d[name] = s + content['data']
810 810
811 811 elif msg_type == 'pyerr':
812 812 d['pyerr'] = content
813 813 elif msg_type == 'pyin':
814 814 d['pyin'] = content['code']
815 815 else:
816 816 d[msg_type] = content.get('data', '')
817 817
818 818 try:
819 819 self.db.update_record(msg_id, d)
820 820 except Exception:
821 821 self.log.error("DB Error saving iopub message %r"%msg_id, exc_info=True)
822 822
823 823
824 824
825 825 #-------------------------------------------------------------------------
826 826 # Registration requests
827 827 #-------------------------------------------------------------------------
828 828
829 829 def connection_request(self, client_id, msg):
830 830 """Reply with connection addresses for clients."""
831 831 self.log.info("client::client %r connected"%client_id)
832 832 content = dict(status='ok')
833 833 content.update(self.client_info)
834 834 jsonable = {}
835 835 for k,v in self.keytable.iteritems():
836 836 if v not in self.dead_engines:
837 837 jsonable[str(k)] = v.decode('ascii')
838 838 content['engines'] = jsonable
839 839 self.session.send(self.query, 'connection_reply', content, parent=msg, ident=client_id)
840 840
841 841 def register_engine(self, reg, msg):
842 842 """Register a new engine."""
843 843 content = msg['content']
844 844 try:
845 845 queue = util.asbytes(content['queue'])
846 846 except KeyError:
847 847 self.log.error("registration::queue not specified", exc_info=True)
848 848 return
849 849 heart = content.get('heartbeat', None)
850 850 if heart:
851 851 heart = util.asbytes(heart)
852 852 """register a new engine, and create the socket(s) necessary"""
853 853 eid = self._next_id
854 854 # print (eid, queue, reg, heart)
855 855
856 856 self.log.debug("registration::register_engine(%i, %r, %r, %r)"%(eid, queue, reg, heart))
857 857
858 858 content = dict(id=eid,status='ok')
859 859 content.update(self.engine_info)
860 860 # check if requesting available IDs:
861 861 if queue in self.by_ident:
862 862 try:
863 863 raise KeyError("queue_id %r in use"%queue)
864 864 except:
865 865 content = error.wrap_exception()
866 866 self.log.error("queue_id %r in use"%queue, exc_info=True)
867 867 elif heart in self.hearts: # need to check unique hearts?
868 868 try:
869 869 raise KeyError("heart_id %r in use"%heart)
870 870 except:
871 871 self.log.error("heart_id %r in use"%heart, exc_info=True)
872 872 content = error.wrap_exception()
873 873 else:
874 874 for h, pack in self.incoming_registrations.iteritems():
875 875 if heart == h:
876 876 try:
877 877 raise KeyError("heart_id %r in use"%heart)
878 878 except:
879 879 self.log.error("heart_id %r in use"%heart, exc_info=True)
880 880 content = error.wrap_exception()
881 881 break
882 882 elif queue == pack[1]:
883 883 try:
884 884 raise KeyError("queue_id %r in use"%queue)
885 885 except:
886 886 self.log.error("queue_id %r in use"%queue, exc_info=True)
887 887 content = error.wrap_exception()
888 888 break
889 889
890 890 msg = self.session.send(self.query, "registration_reply",
891 891 content=content,
892 892 ident=reg)
893 893
894 894 if content['status'] == 'ok':
895 895 if heart in self.heartmonitor.hearts:
896 896 # already beating
897 897 self.incoming_registrations[heart] = (eid,queue,reg[0],None)
898 898 self.finish_registration(heart)
899 899 else:
900 900 purge = lambda : self._purge_stalled_registration(heart)
901 901 dc = ioloop.DelayedCallback(purge, self.registration_timeout, self.loop)
902 902 dc.start()
903 903 self.incoming_registrations[heart] = (eid,queue,reg[0],dc)
904 904 else:
905 905 self.log.error("registration::registration %i failed: %r"%(eid, content['evalue']))
906 906 return eid
907 907
908 908 def unregister_engine(self, ident, msg):
909 909 """Unregister an engine that explicitly requested to leave."""
910 910 try:
911 911 eid = msg['content']['id']
912 912 except:
913 913 self.log.error("registration::bad engine id for unregistration: %r"%ident, exc_info=True)
914 914 return
915 915 self.log.info("registration::unregister_engine(%r)"%eid)
916 916 # print (eid)
917 917 uuid = self.keytable[eid]
918 918 content=dict(id=eid, queue=uuid.decode('ascii'))
919 919 self.dead_engines.add(uuid)
920 920 # self.ids.remove(eid)
921 921 # uuid = self.keytable.pop(eid)
922 922 #
923 923 # ec = self.engines.pop(eid)
924 924 # self.hearts.pop(ec.heartbeat)
925 925 # self.by_ident.pop(ec.queue)
926 926 # self.completed.pop(eid)
927 927 handleit = lambda : self._handle_stranded_msgs(eid, uuid)
928 928 dc = ioloop.DelayedCallback(handleit, self.registration_timeout, self.loop)
929 929 dc.start()
930 930 ############## TODO: HANDLE IT ################
931 931
932 932 if self.notifier:
933 933 self.session.send(self.notifier, "unregistration_notification", content=content)
934 934
935 935 def _handle_stranded_msgs(self, eid, uuid):
936 936 """Handle messages known to be on an engine when the engine unregisters.
937 937
938 938 It is possible that this will fire prematurely - that is, an engine will
939 939 go down after completing a result, and the client will be notified
940 940 that the result failed and later receive the actual result.
941 941 """
942 942
943 943 outstanding = self.queues[eid]
944 944
945 945 for msg_id in outstanding:
946 946 self.pending.remove(msg_id)
947 947 self.all_completed.add(msg_id)
948 948 try:
949 949 raise error.EngineError("Engine %r died while running task %r"%(eid, msg_id))
950 950 except:
951 951 content = error.wrap_exception()
952 952 # build a fake header:
953 953 header = {}
954 954 header['engine'] = uuid
955 955 header['date'] = datetime.now()
956 956 rec = dict(result_content=content, result_header=header, result_buffers=[])
957 957 rec['completed'] = header['date']
958 958 rec['engine_uuid'] = uuid
959 959 try:
960 960 self.db.update_record(msg_id, rec)
961 961 except Exception:
962 962 self.log.error("DB Error handling stranded msg %r"%msg_id, exc_info=True)
963 963
964 964
965 965 def finish_registration(self, heart):
966 966 """Second half of engine registration, called after our HeartMonitor
967 967 has received a beat from the Engine's Heart."""
968 968 try:
969 969 (eid,queue,reg,purge) = self.incoming_registrations.pop(heart)
970 970 except KeyError:
971 971 self.log.error("registration::tried to finish nonexistant registration", exc_info=True)
972 972 return
973 973 self.log.info("registration::finished registering engine %i:%r"%(eid,queue))
974 974 if purge is not None:
975 975 purge.stop()
976 976 control = queue
977 977 self.ids.add(eid)
978 978 self.keytable[eid] = queue
979 979 self.engines[eid] = EngineConnector(id=eid, queue=queue, registration=reg,
980 980 control=control, heartbeat=heart)
981 981 self.by_ident[queue] = eid
982 982 self.queues[eid] = list()
983 983 self.tasks[eid] = list()
984 984 self.completed[eid] = list()
985 985 self.hearts[heart] = eid
986 986 content = dict(id=eid, queue=self.engines[eid].queue.decode('ascii'))
987 987 if self.notifier:
988 988 self.session.send(self.notifier, "registration_notification", content=content)
989 989 self.log.info("engine::Engine Connected: %i"%eid)
990 990
991 991 def _purge_stalled_registration(self, heart):
992 992 if heart in self.incoming_registrations:
993 993 eid = self.incoming_registrations.pop(heart)[0]
994 994 self.log.info("registration::purging stalled registration: %i"%eid)
995 995 else:
996 996 pass
997 997
998 998 #-------------------------------------------------------------------------
999 999 # Client Requests
1000 1000 #-------------------------------------------------------------------------
1001 1001
1002 1002 def shutdown_request(self, client_id, msg):
1003 1003 """handle shutdown request."""
1004 1004 self.session.send(self.query, 'shutdown_reply', content={'status': 'ok'}, ident=client_id)
1005 1005 # also notify other clients of shutdown
1006 1006 self.session.send(self.notifier, 'shutdown_notice', content={'status': 'ok'})
1007 1007 dc = ioloop.DelayedCallback(lambda : self._shutdown(), 1000, self.loop)
1008 1008 dc.start()
1009 1009
1010 1010 def _shutdown(self):
1011 1011 self.log.info("hub::hub shutting down.")
1012 1012 time.sleep(0.1)
1013 1013 sys.exit(0)
1014 1014
1015 1015
1016 1016 def check_load(self, client_id, msg):
1017 1017 content = msg['content']
1018 1018 try:
1019 1019 targets = content['targets']
1020 1020 targets = self._validate_targets(targets)
1021 1021 except:
1022 1022 content = error.wrap_exception()
1023 1023 self.session.send(self.query, "hub_error",
1024 1024 content=content, ident=client_id)
1025 1025 return
1026 1026
1027 1027 content = dict(status='ok')
1028 1028 # loads = {}
1029 1029 for t in targets:
1030 1030 content[bytes(t)] = len(self.queues[t])+len(self.tasks[t])
1031 1031 self.session.send(self.query, "load_reply", content=content, ident=client_id)
1032 1032
1033 1033
1034 1034 def queue_status(self, client_id, msg):
1035 1035 """Return the Queue status of one or more targets.
1036 1036 if verbose: return the msg_ids
1037 1037 else: return len of each type.
1038 1038 keys: queue (pending MUX jobs)
1039 1039 tasks (pending Task jobs)
1040 1040 completed (finished jobs from both queues)"""
1041 1041 content = msg['content']
1042 1042 targets = content['targets']
1043 1043 try:
1044 1044 targets = self._validate_targets(targets)
1045 1045 except:
1046 1046 content = error.wrap_exception()
1047 1047 self.session.send(self.query, "hub_error",
1048 1048 content=content, ident=client_id)
1049 1049 return
1050 1050 verbose = content.get('verbose', False)
1051 1051 content = dict(status='ok')
1052 1052 for t in targets:
1053 1053 queue = self.queues[t]
1054 1054 completed = self.completed[t]
1055 1055 tasks = self.tasks[t]
1056 1056 if not verbose:
1057 1057 queue = len(queue)
1058 1058 completed = len(completed)
1059 1059 tasks = len(tasks)
1060 1060 content[str(t)] = {'queue': queue, 'completed': completed , 'tasks': tasks}
1061 1061 content['unassigned'] = list(self.unassigned) if verbose else len(self.unassigned)
1062 1062 # print (content)
1063 1063 self.session.send(self.query, "queue_reply", content=content, ident=client_id)
1064 1064
1065 1065 def purge_results(self, client_id, msg):
1066 1066 """Purge results from memory. This method is more valuable before we move
1067 1067 to a DB based message storage mechanism."""
1068 1068 content = msg['content']
1069 1069 self.log.info("Dropping records with %s", content)
1070 1070 msg_ids = content.get('msg_ids', [])
1071 1071 reply = dict(status='ok')
1072 1072 if msg_ids == 'all':
1073 1073 try:
1074 1074 self.db.drop_matching_records(dict(completed={'$ne':None}))
1075 1075 except Exception:
1076 1076 reply = error.wrap_exception()
1077 1077 else:
1078 1078 pending = filter(lambda m: m in self.pending, msg_ids)
1079 1079 if pending:
1080 1080 try:
1081 1081 raise IndexError("msg pending: %r"%pending[0])
1082 1082 except:
1083 1083 reply = error.wrap_exception()
1084 1084 else:
1085 1085 try:
1086 1086 self.db.drop_matching_records(dict(msg_id={'$in':msg_ids}))
1087 1087 except Exception:
1088 1088 reply = error.wrap_exception()
1089 1089
1090 1090 if reply['status'] == 'ok':
1091 1091 eids = content.get('engine_ids', [])
1092 1092 for eid in eids:
1093 1093 if eid not in self.engines:
1094 1094 try:
1095 1095 raise IndexError("No such engine: %i"%eid)
1096 1096 except:
1097 1097 reply = error.wrap_exception()
1098 1098 break
1099 1099 uid = self.engines[eid].queue
1100 1100 try:
1101 1101 self.db.drop_matching_records(dict(engine_uuid=uid, completed={'$ne':None}))
1102 1102 except Exception:
1103 1103 reply = error.wrap_exception()
1104 1104 break
1105 1105
1106 1106 self.session.send(self.query, 'purge_reply', content=reply, ident=client_id)
1107 1107
1108 1108 def resubmit_task(self, client_id, msg):
1109 1109 """Resubmit one or more tasks."""
1110 1110 def finish(reply):
1111 1111 self.session.send(self.query, 'resubmit_reply', content=reply, ident=client_id)
1112 1112
1113 1113 content = msg['content']
1114 1114 msg_ids = content['msg_ids']
1115 1115 reply = dict(status='ok')
1116 1116 try:
1117 1117 records = self.db.find_records({'msg_id' : {'$in' : msg_ids}}, keys=[
1118 1118 'header', 'content', 'buffers'])
1119 1119 except Exception:
1120 1120 self.log.error('db::db error finding tasks to resubmit', exc_info=True)
1121 1121 return finish(error.wrap_exception())
1122 1122
1123 1123 # validate msg_ids
1124 1124 found_ids = [ rec['msg_id'] for rec in records ]
1125 1125 invalid_ids = filter(lambda m: m in self.pending, found_ids)
1126 1126 if len(records) > len(msg_ids):
1127 1127 try:
1128 1128 raise RuntimeError("DB appears to be in an inconsistent state."
1129 1129 "More matching records were found than should exist")
1130 1130 except Exception:
1131 1131 return finish(error.wrap_exception())
1132 1132 elif len(records) < len(msg_ids):
1133 1133 missing = [ m for m in msg_ids if m not in found_ids ]
1134 1134 try:
1135 1135 raise KeyError("No such msg(s): %r"%missing)
1136 1136 except KeyError:
1137 1137 return finish(error.wrap_exception())
1138 1138 elif invalid_ids:
1139 1139 msg_id = invalid_ids[0]
1140 1140 try:
1141 1141 raise ValueError("Task %r appears to be inflight"%(msg_id))
1142 1142 except Exception:
1143 1143 return finish(error.wrap_exception())
1144 1144
1145 1145 # clear the existing records
1146 1146 now = datetime.now()
1147 1147 rec = empty_record()
1148 1148 map(rec.pop, ['msg_id', 'header', 'content', 'buffers', 'submitted'])
1149 1149 rec['resubmitted'] = now
1150 1150 rec['queue'] = 'task'
1151 1151 rec['client_uuid'] = client_id[0]
1152 1152 try:
1153 1153 for msg_id in msg_ids:
1154 1154 self.all_completed.discard(msg_id)
1155 1155 self.db.update_record(msg_id, rec)
1156 1156 except Exception:
1157 1157 self.log.error('db::db error upating record', exc_info=True)
1158 1158 reply = error.wrap_exception()
1159 1159 else:
1160 1160 # send the messages
1161 1161 for rec in records:
1162 1162 header = rec['header']
1163 1163 # include resubmitted in header to prevent digest collision
1164 1164 header['resubmitted'] = now
1165 1165 msg = self.session.msg(header['msg_type'])
1166 1166 msg['content'] = rec['content']
1167 1167 msg['header'] = header
1168 1168 msg['msg_id'] = rec['msg_id']
1169 1169 self.session.send(self.resubmit, msg, buffers=rec['buffers'])
1170 1170
1171 1171 finish(dict(status='ok'))
1172 1172
1173 1173
1174 1174 def _extract_record(self, rec):
1175 1175 """decompose a TaskRecord dict into subsection of reply for get_result"""
1176 1176 io_dict = {}
1177 1177 for key in 'pyin pyout pyerr stdout stderr'.split():
1178 1178 io_dict[key] = rec[key]
1179 1179 content = { 'result_content': rec['result_content'],
1180 1180 'header': rec['header'],
1181 1181 'result_header' : rec['result_header'],
1182 1182 'io' : io_dict,
1183 1183 }
1184 1184 if rec['result_buffers']:
1185 1185 buffers = map(bytes, rec['result_buffers'])
1186 1186 else:
1187 1187 buffers = []
1188 1188
1189 1189 return content, buffers
1190 1190
1191 1191 def get_results(self, client_id, msg):
1192 1192 """Get the result of 1 or more messages."""
1193 1193 content = msg['content']
1194 1194 msg_ids = sorted(set(content['msg_ids']))
1195 1195 statusonly = content.get('status_only', False)
1196 1196 pending = []
1197 1197 completed = []
1198 1198 content = dict(status='ok')
1199 1199 content['pending'] = pending
1200 1200 content['completed'] = completed
1201 1201 buffers = []
1202 1202 if not statusonly:
1203 1203 try:
1204 1204 matches = self.db.find_records(dict(msg_id={'$in':msg_ids}))
1205 1205 # turn match list into dict, for faster lookup
1206 1206 records = {}
1207 1207 for rec in matches:
1208 1208 records[rec['msg_id']] = rec
1209 1209 except Exception:
1210 1210 content = error.wrap_exception()
1211 1211 self.session.send(self.query, "result_reply", content=content,
1212 1212 parent=msg, ident=client_id)
1213 1213 return
1214 1214 else:
1215 1215 records = {}
1216 1216 for msg_id in msg_ids:
1217 1217 if msg_id in self.pending:
1218 1218 pending.append(msg_id)
1219 1219 elif msg_id in self.all_completed:
1220 1220 completed.append(msg_id)
1221 1221 if not statusonly:
1222 1222 c,bufs = self._extract_record(records[msg_id])
1223 1223 content[msg_id] = c
1224 1224 buffers.extend(bufs)
1225 1225 elif msg_id in records:
1226 1226 if rec['completed']:
1227 1227 completed.append(msg_id)
1228 1228 c,bufs = self._extract_record(records[msg_id])
1229 1229 content[msg_id] = c
1230 1230 buffers.extend(bufs)
1231 1231 else:
1232 1232 pending.append(msg_id)
1233 1233 else:
1234 1234 try:
1235 1235 raise KeyError('No such message: '+msg_id)
1236 1236 except:
1237 1237 content = error.wrap_exception()
1238 1238 break
1239 1239 self.session.send(self.query, "result_reply", content=content,
1240 1240 parent=msg, ident=client_id,
1241 1241 buffers=buffers)
1242 1242
1243 1243 def get_history(self, client_id, msg):
1244 1244 """Get a list of all msg_ids in our DB records"""
1245 1245 try:
1246 1246 msg_ids = self.db.get_history()
1247 1247 except Exception as e:
1248 1248 content = error.wrap_exception()
1249 1249 else:
1250 1250 content = dict(status='ok', history=msg_ids)
1251 1251
1252 1252 self.session.send(self.query, "history_reply", content=content,
1253 1253 parent=msg, ident=client_id)
1254 1254
1255 1255 def db_query(self, client_id, msg):
1256 1256 """Perform a raw query on the task record database."""
1257 1257 content = msg['content']
1258 1258 query = content.get('query', {})
1259 1259 keys = content.get('keys', None)
1260 1260 buffers = []
1261 1261 empty = list()
1262 1262 try:
1263 1263 records = self.db.find_records(query, keys)
1264 1264 except Exception as e:
1265 1265 content = error.wrap_exception()
1266 1266 else:
1267 1267 # extract buffers from reply content:
1268 1268 if keys is not None:
1269 1269 buffer_lens = [] if 'buffers' in keys else None
1270 1270 result_buffer_lens = [] if 'result_buffers' in keys else None
1271 1271 else:
1272 1272 buffer_lens = []
1273 1273 result_buffer_lens = []
1274 1274
1275 1275 for rec in records:
1276 1276 # buffers may be None, so double check
1277 1277 if buffer_lens is not None:
1278 1278 b = rec.pop('buffers', empty) or empty
1279 1279 buffer_lens.append(len(b))
1280 1280 buffers.extend(b)
1281 1281 if result_buffer_lens is not None:
1282 1282 rb = rec.pop('result_buffers', empty) or empty
1283 1283 result_buffer_lens.append(len(rb))
1284 1284 buffers.extend(rb)
1285 1285 content = dict(status='ok', records=records, buffer_lens=buffer_lens,
1286 1286 result_buffer_lens=result_buffer_lens)
1287 1287 # self.log.debug (content)
1288 1288 self.session.send(self.query, "db_reply", content=content,
1289 1289 parent=msg, ident=client_id,
1290 1290 buffers=buffers)
1291 1291
@@ -1,714 +1,714 b''
1 1 """The Python scheduler for rich scheduling.
2 2
3 3 The Pure ZMQ scheduler does not allow routing schemes other than LRU,
4 4 nor does it check msg_id DAG dependencies. For those, a slightly slower
5 5 Python Scheduler exists.
6 6
7 7 Authors:
8 8
9 9 * Min RK
10 10 """
11 11 #-----------------------------------------------------------------------------
12 12 # Copyright (C) 2010-2011 The IPython Development Team
13 13 #
14 14 # Distributed under the terms of the BSD License. The full license is in
15 15 # the file COPYING, distributed as part of this software.
16 16 #-----------------------------------------------------------------------------
17 17
18 18 #----------------------------------------------------------------------
19 19 # Imports
20 20 #----------------------------------------------------------------------
21 21
22 22 from __future__ import print_function
23 23
24 24 import logging
25 25 import sys
26 26
27 27 from datetime import datetime, timedelta
28 28 from random import randint, random
29 29 from types import FunctionType
30 30
31 31 try:
32 32 import numpy
33 33 except ImportError:
34 34 numpy = None
35 35
36 36 import zmq
37 37 from zmq.eventloop import ioloop, zmqstream
38 38
39 39 # local imports
40 40 from IPython.external.decorator import decorator
41 41 from IPython.config.application import Application
42 42 from IPython.config.loader import Config
43 43 from IPython.utils.traitlets import Instance, Dict, List, Set, Int, Enum, CBytes
44 44
45 45 from IPython.parallel import error
46 46 from IPython.parallel.factory import SessionFactory
47 47 from IPython.parallel.util import connect_logger, local_logger, asbytes
48 48
49 49 from .dependency import Dependency
50 50
51 51 @decorator
52 52 def logged(f,self,*args,**kwargs):
53 53 # print ("#--------------------")
54 54 self.log.debug("scheduler::%s(*%s,**%s)", f.func_name, args, kwargs)
55 55 # print ("#--")
56 56 return f(self,*args, **kwargs)
57 57
58 58 #----------------------------------------------------------------------
59 59 # Chooser functions
60 60 #----------------------------------------------------------------------
61 61
62 62 def plainrandom(loads):
63 63 """Plain random pick."""
64 64 n = len(loads)
65 65 return randint(0,n-1)
66 66
67 67 def lru(loads):
68 68 """Always pick the front of the line.
69 69
70 70 The content of `loads` is ignored.
71 71
72 72 Assumes LRU ordering of loads, with oldest first.
73 73 """
74 74 return 0
75 75
76 76 def twobin(loads):
77 77 """Pick two at random, use the LRU of the two.
78 78
79 79 The content of loads is ignored.
80 80
81 81 Assumes LRU ordering of loads, with oldest first.
82 82 """
83 83 n = len(loads)
84 84 a = randint(0,n-1)
85 85 b = randint(0,n-1)
86 86 return min(a,b)
87 87
88 88 def weighted(loads):
89 89 """Pick two at random using inverse load as weight.
90 90
91 91 Return the less loaded of the two.
92 92 """
93 93 # weight 0 a million times more than 1:
94 94 weights = 1./(1e-6+numpy.array(loads))
95 95 sums = weights.cumsum()
96 96 t = sums[-1]
97 97 x = random()*t
98 98 y = random()*t
99 99 idx = 0
100 100 idy = 0
101 101 while sums[idx] < x:
102 102 idx += 1
103 103 while sums[idy] < y:
104 104 idy += 1
105 105 if weights[idy] > weights[idx]:
106 106 return idy
107 107 else:
108 108 return idx
109 109
110 110 def leastload(loads):
111 111 """Always choose the lowest load.
112 112
113 113 If the lowest load occurs more than once, the first
114 114 occurance will be used. If loads has LRU ordering, this means
115 115 the LRU of those with the lowest load is chosen.
116 116 """
117 117 return loads.index(min(loads))
118 118
119 119 #---------------------------------------------------------------------
120 120 # Classes
121 121 #---------------------------------------------------------------------
122 122 # store empty default dependency:
123 123 MET = Dependency([])
124 124
125 125 class TaskScheduler(SessionFactory):
126 126 """Python TaskScheduler object.
127 127
128 128 This is the simplest object that supports msg_id based
129 129 DAG dependencies. *Only* task msg_ids are checked, not
130 130 msg_ids of jobs submitted via the MUX queue.
131 131
132 132 """
133 133
134 134 hwm = Int(0, config=True, shortname='hwm',
135 135 help="""specify the High Water Mark (HWM) for the downstream
136 136 socket in the Task scheduler. This is the maximum number
137 137 of allowed outstanding tasks on each engine."""
138 138 )
139 139 scheme_name = Enum(('leastload', 'pure', 'lru', 'plainrandom', 'weighted', 'twobin'),
140 140 'leastload', config=True, shortname='scheme', allow_none=False,
141 141 help="""select the task scheduler scheme [default: Python LRU]
142 142 Options are: 'pure', 'lru', 'plainrandom', 'weighted', 'twobin','leastload'"""
143 143 )
144 144 def _scheme_name_changed(self, old, new):
145 145 self.log.debug("Using scheme %r"%new)
146 146 self.scheme = globals()[new]
147 147
148 148 # input arguments:
149 149 scheme = Instance(FunctionType) # function for determining the destination
150 150 def _scheme_default(self):
151 151 return leastload
152 152 client_stream = Instance(zmqstream.ZMQStream) # client-facing stream
153 153 engine_stream = Instance(zmqstream.ZMQStream) # engine-facing stream
154 154 notifier_stream = Instance(zmqstream.ZMQStream) # hub-facing sub stream
155 155 mon_stream = Instance(zmqstream.ZMQStream) # hub-facing pub stream
156 156
157 157 # internals:
158 158 graph = Dict() # dict by msg_id of [ msg_ids that depend on key ]
159 159 retries = Dict() # dict by msg_id of retries remaining (non-neg ints)
160 160 # waiting = List() # list of msg_ids ready to run, but haven't due to HWM
161 161 depending = Dict() # dict by msg_id of (msg_id, raw_msg, after, follow)
162 162 pending = Dict() # dict by engine_uuid of submitted tasks
163 163 completed = Dict() # dict by engine_uuid of completed tasks
164 164 failed = Dict() # dict by engine_uuid of failed tasks
165 165 destinations = Dict() # dict by msg_id of engine_uuids where jobs ran (reverse of completed+failed)
166 166 clients = Dict() # dict by msg_id for who submitted the task
167 167 targets = List() # list of target IDENTs
168 168 loads = List() # list of engine loads
169 169 # full = Set() # set of IDENTs that have HWM outstanding tasks
170 170 all_completed = Set() # set of all completed tasks
171 171 all_failed = Set() # set of all failed tasks
172 172 all_done = Set() # set of all finished tasks=union(completed,failed)
173 173 all_ids = Set() # set of all submitted task IDs
174 174 blacklist = Dict() # dict by msg_id of locations where a job has encountered UnmetDependency
175 175 auditor = Instance('zmq.eventloop.ioloop.PeriodicCallback')
176 176
177 177 ident = CBytes() # ZMQ identity. This should just be self.session.session
178 178 # but ensure Bytes
179 179 def _ident_default(self):
180 180 return asbytes(self.session.session)
181 181
182 182 def start(self):
183 183 self.engine_stream.on_recv(self.dispatch_result, copy=False)
184 184 self._notification_handlers = dict(
185 185 registration_notification = self._register_engine,
186 186 unregistration_notification = self._unregister_engine
187 187 )
188 188 self.notifier_stream.on_recv(self.dispatch_notification)
189 189 self.auditor = ioloop.PeriodicCallback(self.audit_timeouts, 2e3, self.loop) # 1 Hz
190 190 self.auditor.start()
191 191 self.log.info("Scheduler started [%s]"%self.scheme_name)
192 192
193 193 def resume_receiving(self):
194 194 """Resume accepting jobs."""
195 195 self.client_stream.on_recv(self.dispatch_submission, copy=False)
196 196
197 197 def stop_receiving(self):
198 198 """Stop accepting jobs while there are no engines.
199 199 Leave them in the ZMQ queue."""
200 200 self.client_stream.on_recv(None)
201 201
202 202 #-----------------------------------------------------------------------
203 203 # [Un]Registration Handling
204 204 #-----------------------------------------------------------------------
205 205
206 206 def dispatch_notification(self, msg):
207 207 """dispatch register/unregister events."""
208 208 try:
209 209 idents,msg = self.session.feed_identities(msg)
210 210 except ValueError:
211 211 self.log.warn("task::Invalid Message: %r",msg)
212 212 return
213 213 try:
214 msg = self.session.unpack_message(msg)
214 msg = self.session.unserialize(msg)
215 215 except ValueError:
216 216 self.log.warn("task::Unauthorized message from: %r"%idents)
217 217 return
218 218
219 219 msg_type = msg['header']['msg_type']
220 220
221 221 handler = self._notification_handlers.get(msg_type, None)
222 222 if handler is None:
223 223 self.log.error("Unhandled message type: %r"%msg_type)
224 224 else:
225 225 try:
226 226 handler(asbytes(msg['content']['queue']))
227 227 except Exception:
228 228 self.log.error("task::Invalid notification msg: %r",msg)
229 229
230 230 def _register_engine(self, uid):
231 231 """New engine with ident `uid` became available."""
232 232 # head of the line:
233 233 self.targets.insert(0,uid)
234 234 self.loads.insert(0,0)
235 235
236 236 # initialize sets
237 237 self.completed[uid] = set()
238 238 self.failed[uid] = set()
239 239 self.pending[uid] = {}
240 240 if len(self.targets) == 1:
241 241 self.resume_receiving()
242 242 # rescan the graph:
243 243 self.update_graph(None)
244 244
245 245 def _unregister_engine(self, uid):
246 246 """Existing engine with ident `uid` became unavailable."""
247 247 if len(self.targets) == 1:
248 248 # this was our only engine
249 249 self.stop_receiving()
250 250
251 251 # handle any potentially finished tasks:
252 252 self.engine_stream.flush()
253 253
254 254 # don't pop destinations, because they might be used later
255 255 # map(self.destinations.pop, self.completed.pop(uid))
256 256 # map(self.destinations.pop, self.failed.pop(uid))
257 257
258 258 # prevent this engine from receiving work
259 259 idx = self.targets.index(uid)
260 260 self.targets.pop(idx)
261 261 self.loads.pop(idx)
262 262
263 263 # wait 5 seconds before cleaning up pending jobs, since the results might
264 264 # still be incoming
265 265 if self.pending[uid]:
266 266 dc = ioloop.DelayedCallback(lambda : self.handle_stranded_tasks(uid), 5000, self.loop)
267 267 dc.start()
268 268 else:
269 269 self.completed.pop(uid)
270 270 self.failed.pop(uid)
271 271
272 272
273 273 def handle_stranded_tasks(self, engine):
274 274 """Deal with jobs resident in an engine that died."""
275 275 lost = self.pending[engine]
276 276 for msg_id in lost.keys():
277 277 if msg_id not in self.pending[engine]:
278 278 # prevent double-handling of messages
279 279 continue
280 280
281 281 raw_msg = lost[msg_id][0]
282 282 idents,msg = self.session.feed_identities(raw_msg, copy=False)
283 283 parent = self.session.unpack(msg[1].bytes)
284 284 idents = [engine, idents[0]]
285 285
286 286 # build fake error reply
287 287 try:
288 288 raise error.EngineError("Engine %r died while running task %r"%(engine, msg_id))
289 289 except:
290 290 content = error.wrap_exception()
291 291 msg = self.session.msg('apply_reply', content, parent=parent, subheader={'status':'error'})
292 292 raw_reply = map(zmq.Message, self.session.serialize(msg, ident=idents))
293 293 # and dispatch it
294 294 self.dispatch_result(raw_reply)
295 295
296 296 # finally scrub completed/failed lists
297 297 self.completed.pop(engine)
298 298 self.failed.pop(engine)
299 299
300 300
301 301 #-----------------------------------------------------------------------
302 302 # Job Submission
303 303 #-----------------------------------------------------------------------
304 304 def dispatch_submission(self, raw_msg):
305 305 """Dispatch job submission to appropriate handlers."""
306 306 # ensure targets up to date:
307 307 self.notifier_stream.flush()
308 308 try:
309 309 idents, msg = self.session.feed_identities(raw_msg, copy=False)
310 msg = self.session.unpack_message(msg, content=False, copy=False)
310 msg = self.session.unserialize(msg, content=False, copy=False)
311 311 except Exception:
312 312 self.log.error("task::Invaid task msg: %r"%raw_msg, exc_info=True)
313 313 return
314 314
315 315
316 316 # send to monitor
317 317 self.mon_stream.send_multipart([b'intask']+raw_msg, copy=False)
318 318
319 319 header = msg['header']
320 320 msg_id = header['msg_id']
321 321 self.all_ids.add(msg_id)
322 322
323 323 # get targets as a set of bytes objects
324 324 # from a list of unicode objects
325 325 targets = header.get('targets', [])
326 326 targets = map(asbytes, targets)
327 327 targets = set(targets)
328 328
329 329 retries = header.get('retries', 0)
330 330 self.retries[msg_id] = retries
331 331
332 332 # time dependencies
333 333 after = header.get('after', None)
334 334 if after:
335 335 after = Dependency(after)
336 336 if after.all:
337 337 if after.success:
338 338 after = Dependency(after.difference(self.all_completed),
339 339 success=after.success,
340 340 failure=after.failure,
341 341 all=after.all,
342 342 )
343 343 if after.failure:
344 344 after = Dependency(after.difference(self.all_failed),
345 345 success=after.success,
346 346 failure=after.failure,
347 347 all=after.all,
348 348 )
349 349 if after.check(self.all_completed, self.all_failed):
350 350 # recast as empty set, if `after` already met,
351 351 # to prevent unnecessary set comparisons
352 352 after = MET
353 353 else:
354 354 after = MET
355 355
356 356 # location dependencies
357 357 follow = Dependency(header.get('follow', []))
358 358
359 359 # turn timeouts into datetime objects:
360 360 timeout = header.get('timeout', None)
361 361 if timeout:
362 362 timeout = datetime.now() + timedelta(0,timeout,0)
363 363
364 364 args = [raw_msg, targets, after, follow, timeout]
365 365
366 366 # validate and reduce dependencies:
367 367 for dep in after,follow:
368 368 if not dep: # empty dependency
369 369 continue
370 370 # check valid:
371 371 if msg_id in dep or dep.difference(self.all_ids):
372 372 self.depending[msg_id] = args
373 373 return self.fail_unreachable(msg_id, error.InvalidDependency)
374 374 # check if unreachable:
375 375 if dep.unreachable(self.all_completed, self.all_failed):
376 376 self.depending[msg_id] = args
377 377 return self.fail_unreachable(msg_id)
378 378
379 379 if after.check(self.all_completed, self.all_failed):
380 380 # time deps already met, try to run
381 381 if not self.maybe_run(msg_id, *args):
382 382 # can't run yet
383 383 if msg_id not in self.all_failed:
384 384 # could have failed as unreachable
385 385 self.save_unmet(msg_id, *args)
386 386 else:
387 387 self.save_unmet(msg_id, *args)
388 388
389 389 def audit_timeouts(self):
390 390 """Audit all waiting tasks for expired timeouts."""
391 391 now = datetime.now()
392 392 for msg_id in self.depending.keys():
393 393 # must recheck, in case one failure cascaded to another:
394 394 if msg_id in self.depending:
395 395 raw,after,targets,follow,timeout = self.depending[msg_id]
396 396 if timeout and timeout < now:
397 397 self.fail_unreachable(msg_id, error.TaskTimeout)
398 398
399 399 def fail_unreachable(self, msg_id, why=error.ImpossibleDependency):
400 400 """a task has become unreachable, send a reply with an ImpossibleDependency
401 401 error."""
402 402 if msg_id not in self.depending:
403 403 self.log.error("msg %r already failed!", msg_id)
404 404 return
405 405 raw_msg,targets,after,follow,timeout = self.depending.pop(msg_id)
406 406 for mid in follow.union(after):
407 407 if mid in self.graph:
408 408 self.graph[mid].remove(msg_id)
409 409
410 410 # FIXME: unpacking a message I've already unpacked, but didn't save:
411 411 idents,msg = self.session.feed_identities(raw_msg, copy=False)
412 412 header = self.session.unpack(msg[1].bytes)
413 413
414 414 try:
415 415 raise why()
416 416 except:
417 417 content = error.wrap_exception()
418 418
419 419 self.all_done.add(msg_id)
420 420 self.all_failed.add(msg_id)
421 421
422 422 msg = self.session.send(self.client_stream, 'apply_reply', content,
423 423 parent=header, ident=idents)
424 424 self.session.send(self.mon_stream, msg, ident=[b'outtask']+idents)
425 425
426 426 self.update_graph(msg_id, success=False)
427 427
428 428 def maybe_run(self, msg_id, raw_msg, targets, after, follow, timeout):
429 429 """check location dependencies, and run if they are met."""
430 430 blacklist = self.blacklist.setdefault(msg_id, set())
431 431 if follow or targets or blacklist or self.hwm:
432 432 # we need a can_run filter
433 433 def can_run(idx):
434 434 # check hwm
435 435 if self.hwm and self.loads[idx] == self.hwm:
436 436 return False
437 437 target = self.targets[idx]
438 438 # check blacklist
439 439 if target in blacklist:
440 440 return False
441 441 # check targets
442 442 if targets and target not in targets:
443 443 return False
444 444 # check follow
445 445 return follow.check(self.completed[target], self.failed[target])
446 446
447 447 indices = filter(can_run, range(len(self.targets)))
448 448
449 449 if not indices:
450 450 # couldn't run
451 451 if follow.all:
452 452 # check follow for impossibility
453 453 dests = set()
454 454 relevant = set()
455 455 if follow.success:
456 456 relevant = self.all_completed
457 457 if follow.failure:
458 458 relevant = relevant.union(self.all_failed)
459 459 for m in follow.intersection(relevant):
460 460 dests.add(self.destinations[m])
461 461 if len(dests) > 1:
462 462 self.depending[msg_id] = (raw_msg, targets, after, follow, timeout)
463 463 self.fail_unreachable(msg_id)
464 464 return False
465 465 if targets:
466 466 # check blacklist+targets for impossibility
467 467 targets.difference_update(blacklist)
468 468 if not targets or not targets.intersection(self.targets):
469 469 self.depending[msg_id] = (raw_msg, targets, after, follow, timeout)
470 470 self.fail_unreachable(msg_id)
471 471 return False
472 472 return False
473 473 else:
474 474 indices = None
475 475
476 476 self.submit_task(msg_id, raw_msg, targets, follow, timeout, indices)
477 477 return True
478 478
479 479 def save_unmet(self, msg_id, raw_msg, targets, after, follow, timeout):
480 480 """Save a message for later submission when its dependencies are met."""
481 481 self.depending[msg_id] = [raw_msg,targets,after,follow,timeout]
482 482 # track the ids in follow or after, but not those already finished
483 483 for dep_id in after.union(follow).difference(self.all_done):
484 484 if dep_id not in self.graph:
485 485 self.graph[dep_id] = set()
486 486 self.graph[dep_id].add(msg_id)
487 487
488 488 def submit_task(self, msg_id, raw_msg, targets, follow, timeout, indices=None):
489 489 """Submit a task to any of a subset of our targets."""
490 490 if indices:
491 491 loads = [self.loads[i] for i in indices]
492 492 else:
493 493 loads = self.loads
494 494 idx = self.scheme(loads)
495 495 if indices:
496 496 idx = indices[idx]
497 497 target = self.targets[idx]
498 498 # print (target, map(str, msg[:3]))
499 499 # send job to the engine
500 500 self.engine_stream.send(target, flags=zmq.SNDMORE, copy=False)
501 501 self.engine_stream.send_multipart(raw_msg, copy=False)
502 502 # update load
503 503 self.add_job(idx)
504 504 self.pending[target][msg_id] = (raw_msg, targets, MET, follow, timeout)
505 505 # notify Hub
506 506 content = dict(msg_id=msg_id, engine_id=target.decode('ascii'))
507 507 self.session.send(self.mon_stream, 'task_destination', content=content,
508 508 ident=[b'tracktask',self.ident])
509 509
510 510
511 511 #-----------------------------------------------------------------------
512 512 # Result Handling
513 513 #-----------------------------------------------------------------------
514 514 def dispatch_result(self, raw_msg):
515 515 """dispatch method for result replies"""
516 516 try:
517 517 idents,msg = self.session.feed_identities(raw_msg, copy=False)
518 msg = self.session.unpack_message(msg, content=False, copy=False)
518 msg = self.session.unserialize(msg, content=False, copy=False)
519 519 engine = idents[0]
520 520 try:
521 521 idx = self.targets.index(engine)
522 522 except ValueError:
523 523 pass # skip load-update for dead engines
524 524 else:
525 525 self.finish_job(idx)
526 526 except Exception:
527 527 self.log.error("task::Invaid result: %r", raw_msg, exc_info=True)
528 528 return
529 529
530 530 header = msg['header']
531 531 parent = msg['parent_header']
532 532 if header.get('dependencies_met', True):
533 533 success = (header['status'] == 'ok')
534 534 msg_id = parent['msg_id']
535 535 retries = self.retries[msg_id]
536 536 if not success and retries > 0:
537 537 # failed
538 538 self.retries[msg_id] = retries - 1
539 539 self.handle_unmet_dependency(idents, parent)
540 540 else:
541 541 del self.retries[msg_id]
542 542 # relay to client and update graph
543 543 self.handle_result(idents, parent, raw_msg, success)
544 544 # send to Hub monitor
545 545 self.mon_stream.send_multipart([b'outtask']+raw_msg, copy=False)
546 546 else:
547 547 self.handle_unmet_dependency(idents, parent)
548 548
549 549 def handle_result(self, idents, parent, raw_msg, success=True):
550 550 """handle a real task result, either success or failure"""
551 551 # first, relay result to client
552 552 engine = idents[0]
553 553 client = idents[1]
554 554 # swap_ids for XREP-XREP mirror
555 555 raw_msg[:2] = [client,engine]
556 556 # print (map(str, raw_msg[:4]))
557 557 self.client_stream.send_multipart(raw_msg, copy=False)
558 558 # now, update our data structures
559 559 msg_id = parent['msg_id']
560 560 self.blacklist.pop(msg_id, None)
561 561 self.pending[engine].pop(msg_id)
562 562 if success:
563 563 self.completed[engine].add(msg_id)
564 564 self.all_completed.add(msg_id)
565 565 else:
566 566 self.failed[engine].add(msg_id)
567 567 self.all_failed.add(msg_id)
568 568 self.all_done.add(msg_id)
569 569 self.destinations[msg_id] = engine
570 570
571 571 self.update_graph(msg_id, success)
572 572
573 573 def handle_unmet_dependency(self, idents, parent):
574 574 """handle an unmet dependency"""
575 575 engine = idents[0]
576 576 msg_id = parent['msg_id']
577 577
578 578 if msg_id not in self.blacklist:
579 579 self.blacklist[msg_id] = set()
580 580 self.blacklist[msg_id].add(engine)
581 581
582 582 args = self.pending[engine].pop(msg_id)
583 583 raw,targets,after,follow,timeout = args
584 584
585 585 if self.blacklist[msg_id] == targets:
586 586 self.depending[msg_id] = args
587 587 self.fail_unreachable(msg_id)
588 588 elif not self.maybe_run(msg_id, *args):
589 589 # resubmit failed
590 590 if msg_id not in self.all_failed:
591 591 # put it back in our dependency tree
592 592 self.save_unmet(msg_id, *args)
593 593
594 594 if self.hwm:
595 595 try:
596 596 idx = self.targets.index(engine)
597 597 except ValueError:
598 598 pass # skip load-update for dead engines
599 599 else:
600 600 if self.loads[idx] == self.hwm-1:
601 601 self.update_graph(None)
602 602
603 603
604 604
605 605 def update_graph(self, dep_id=None, success=True):
606 606 """dep_id just finished. Update our dependency
607 607 graph and submit any jobs that just became runable.
608 608
609 609 Called with dep_id=None to update entire graph for hwm, but without finishing
610 610 a task.
611 611 """
612 612 # print ("\n\n***********")
613 613 # pprint (dep_id)
614 614 # pprint (self.graph)
615 615 # pprint (self.depending)
616 616 # pprint (self.all_completed)
617 617 # pprint (self.all_failed)
618 618 # print ("\n\n***********\n\n")
619 619 # update any jobs that depended on the dependency
620 620 jobs = self.graph.pop(dep_id, [])
621 621
622 622 # recheck *all* jobs if
623 623 # a) we have HWM and an engine just become no longer full
624 624 # or b) dep_id was given as None
625 625 if dep_id is None or self.hwm and any( [ load==self.hwm-1 for load in self.loads ]):
626 626 jobs = self.depending.keys()
627 627
628 628 for msg_id in jobs:
629 629 raw_msg, targets, after, follow, timeout = self.depending[msg_id]
630 630
631 631 if after.unreachable(self.all_completed, self.all_failed)\
632 632 or follow.unreachable(self.all_completed, self.all_failed):
633 633 self.fail_unreachable(msg_id)
634 634
635 635 elif after.check(self.all_completed, self.all_failed): # time deps met, maybe run
636 636 if self.maybe_run(msg_id, raw_msg, targets, MET, follow, timeout):
637 637
638 638 self.depending.pop(msg_id)
639 639 for mid in follow.union(after):
640 640 if mid in self.graph:
641 641 self.graph[mid].remove(msg_id)
642 642
643 643 #----------------------------------------------------------------------
644 644 # methods to be overridden by subclasses
645 645 #----------------------------------------------------------------------
646 646
647 647 def add_job(self, idx):
648 648 """Called after self.targets[idx] just got the job with header.
649 649 Override with subclasses. The default ordering is simple LRU.
650 650 The default loads are the number of outstanding jobs."""
651 651 self.loads[idx] += 1
652 652 for lis in (self.targets, self.loads):
653 653 lis.append(lis.pop(idx))
654 654
655 655
656 656 def finish_job(self, idx):
657 657 """Called after self.targets[idx] just finished a job.
658 658 Override with subclasses."""
659 659 self.loads[idx] -= 1
660 660
661 661
662 662
663 663 def launch_scheduler(in_addr, out_addr, mon_addr, not_addr, config=None,
664 664 logname='root', log_url=None, loglevel=logging.DEBUG,
665 665 identity=b'task', in_thread=False):
666 666
667 667 ZMQStream = zmqstream.ZMQStream
668 668
669 669 if config:
670 670 # unwrap dict back into Config
671 671 config = Config(config)
672 672
673 673 if in_thread:
674 674 # use instance() to get the same Context/Loop as our parent
675 675 ctx = zmq.Context.instance()
676 676 loop = ioloop.IOLoop.instance()
677 677 else:
678 678 # in a process, don't use instance()
679 679 # for safety with multiprocessing
680 680 ctx = zmq.Context()
681 681 loop = ioloop.IOLoop()
682 682 ins = ZMQStream(ctx.socket(zmq.XREP),loop)
683 683 ins.setsockopt(zmq.IDENTITY, identity)
684 684 ins.bind(in_addr)
685 685
686 686 outs = ZMQStream(ctx.socket(zmq.XREP),loop)
687 687 outs.setsockopt(zmq.IDENTITY, identity)
688 688 outs.bind(out_addr)
689 689 mons = zmqstream.ZMQStream(ctx.socket(zmq.PUB),loop)
690 690 mons.connect(mon_addr)
691 691 nots = zmqstream.ZMQStream(ctx.socket(zmq.SUB),loop)
692 692 nots.setsockopt(zmq.SUBSCRIBE, b'')
693 693 nots.connect(not_addr)
694 694
695 695 # setup logging.
696 696 if in_thread:
697 697 log = Application.instance().log
698 698 else:
699 699 if log_url:
700 700 log = connect_logger(logname, ctx, log_url, root="scheduler", loglevel=loglevel)
701 701 else:
702 702 log = local_logger(logname, loglevel)
703 703
704 704 scheduler = TaskScheduler(client_stream=ins, engine_stream=outs,
705 705 mon_stream=mons, notifier_stream=nots,
706 706 loop=loop, log=log,
707 707 config=config)
708 708 scheduler.start()
709 709 if not in_thread:
710 710 try:
711 711 loop.start()
712 712 except KeyboardInterrupt:
713 713 print ("interrupted, exiting...", file=sys.__stderr__)
714 714
@@ -1,174 +1,174 b''
1 1 #!/usr/bin/env python
2 2 """A simple engine that talks to a controller over 0MQ.
3 3 it handles registration, etc. and launches a kernel
4 4 connected to the Controller's Schedulers.
5 5
6 6 Authors:
7 7
8 8 * Min RK
9 9 """
10 10 #-----------------------------------------------------------------------------
11 11 # Copyright (C) 2010-2011 The IPython Development Team
12 12 #
13 13 # Distributed under the terms of the BSD License. The full license is in
14 14 # the file COPYING, distributed as part of this software.
15 15 #-----------------------------------------------------------------------------
16 16
17 17 from __future__ import print_function
18 18
19 19 import sys
20 20 import time
21 21
22 22 import zmq
23 23 from zmq.eventloop import ioloop, zmqstream
24 24
25 25 # internal
26 26 from IPython.utils.traitlets import Instance, Dict, Int, Type, CFloat, Unicode, CBytes
27 27 # from IPython.utils.localinterfaces import LOCALHOST
28 28
29 29 from IPython.parallel.controller.heartmonitor import Heart
30 30 from IPython.parallel.factory import RegistrationFactory
31 31 from IPython.parallel.util import disambiguate_url, asbytes
32 32
33 33 from IPython.zmq.session import Message
34 34
35 35 from .streamkernel import Kernel
36 36
37 37 class EngineFactory(RegistrationFactory):
38 38 """IPython engine"""
39 39
40 40 # configurables:
41 41 out_stream_factory=Type('IPython.zmq.iostream.OutStream', config=True,
42 42 help="""The OutStream for handling stdout/err.
43 43 Typically 'IPython.zmq.iostream.OutStream'""")
44 44 display_hook_factory=Type('IPython.zmq.displayhook.ZMQDisplayHook', config=True,
45 45 help="""The class for handling displayhook.
46 46 Typically 'IPython.zmq.displayhook.ZMQDisplayHook'""")
47 47 location=Unicode(config=True,
48 48 help="""The location (an IP address) of the controller. This is
49 49 used for disambiguating URLs, to determine whether
50 50 loopback should be used to connect or the public address.""")
51 51 timeout=CFloat(2,config=True,
52 52 help="""The time (in seconds) to wait for the Controller to respond
53 53 to registration requests before giving up.""")
54 54
55 55 # not configurable:
56 56 user_ns=Dict()
57 57 id=Int(allow_none=True)
58 58 registrar=Instance('zmq.eventloop.zmqstream.ZMQStream')
59 59 kernel=Instance(Kernel)
60 60
61 61 bident = CBytes()
62 62 ident = Unicode()
63 63 def _ident_changed(self, name, old, new):
64 64 self.bident = asbytes(new)
65 65
66 66
67 67 def __init__(self, **kwargs):
68 68 super(EngineFactory, self).__init__(**kwargs)
69 69 self.ident = self.session.session
70 70 ctx = self.context
71 71
72 72 reg = ctx.socket(zmq.XREQ)
73 73 reg.setsockopt(zmq.IDENTITY, self.bident)
74 74 reg.connect(self.url)
75 75 self.registrar = zmqstream.ZMQStream(reg, self.loop)
76 76
77 77 def register(self):
78 78 """send the registration_request"""
79 79
80 80 self.log.info("Registering with controller at %s"%self.url)
81 81 content = dict(queue=self.ident, heartbeat=self.ident, control=self.ident)
82 82 self.registrar.on_recv(self.complete_registration)
83 83 # print (self.session.key)
84 84 self.session.send(self.registrar, "registration_request",content=content)
85 85
86 86 def complete_registration(self, msg):
87 87 # print msg
88 88 self._abort_dc.stop()
89 89 ctx = self.context
90 90 loop = self.loop
91 91 identity = self.bident
92 92 idents,msg = self.session.feed_identities(msg)
93 msg = Message(self.session.unpack_message(msg))
93 msg = Message(self.session.unserialize(msg))
94 94
95 95 if msg.content.status == 'ok':
96 96 self.id = int(msg.content.id)
97 97
98 98 # create Shell Streams (MUX, Task, etc.):
99 99 queue_addr = msg.content.mux
100 100 shell_addrs = [ str(queue_addr) ]
101 101 task_addr = msg.content.task
102 102 if task_addr:
103 103 shell_addrs.append(str(task_addr))
104 104
105 105 # Uncomment this to go back to two-socket model
106 106 # shell_streams = []
107 107 # for addr in shell_addrs:
108 108 # stream = zmqstream.ZMQStream(ctx.socket(zmq.XREP), loop)
109 109 # stream.setsockopt(zmq.IDENTITY, identity)
110 110 # stream.connect(disambiguate_url(addr, self.location))
111 111 # shell_streams.append(stream)
112 112
113 113 # Now use only one shell stream for mux and tasks
114 114 stream = zmqstream.ZMQStream(ctx.socket(zmq.XREP), loop)
115 115 stream.setsockopt(zmq.IDENTITY, identity)
116 116 shell_streams = [stream]
117 117 for addr in shell_addrs:
118 118 stream.connect(disambiguate_url(addr, self.location))
119 119 # end single stream-socket
120 120
121 121 # control stream:
122 122 control_addr = str(msg.content.control)
123 123 control_stream = zmqstream.ZMQStream(ctx.socket(zmq.XREP), loop)
124 124 control_stream.setsockopt(zmq.IDENTITY, identity)
125 125 control_stream.connect(disambiguate_url(control_addr, self.location))
126 126
127 127 # create iopub stream:
128 128 iopub_addr = msg.content.iopub
129 129 iopub_stream = zmqstream.ZMQStream(ctx.socket(zmq.PUB), loop)
130 130 iopub_stream.setsockopt(zmq.IDENTITY, identity)
131 131 iopub_stream.connect(disambiguate_url(iopub_addr, self.location))
132 132
133 133 # launch heartbeat
134 134 hb_addrs = msg.content.heartbeat
135 135 # print (hb_addrs)
136 136
137 137 # # Redirect input streams and set a display hook.
138 138 if self.out_stream_factory:
139 139 sys.stdout = self.out_stream_factory(self.session, iopub_stream, u'stdout')
140 140 sys.stdout.topic = 'engine.%i.stdout'%self.id
141 141 sys.stderr = self.out_stream_factory(self.session, iopub_stream, u'stderr')
142 142 sys.stderr.topic = 'engine.%i.stderr'%self.id
143 143 if self.display_hook_factory:
144 144 sys.displayhook = self.display_hook_factory(self.session, iopub_stream)
145 145 sys.displayhook.topic = 'engine.%i.pyout'%self.id
146 146
147 147 self.kernel = Kernel(config=self.config, int_id=self.id, ident=self.ident, session=self.session,
148 148 control_stream=control_stream, shell_streams=shell_streams, iopub_stream=iopub_stream,
149 149 loop=loop, user_ns = self.user_ns, log=self.log)
150 150 self.kernel.start()
151 151 hb_addrs = [ disambiguate_url(addr, self.location) for addr in hb_addrs ]
152 152 heart = Heart(*map(str, hb_addrs), heart_id=identity)
153 153 heart.start()
154 154
155 155
156 156 else:
157 157 self.log.fatal("Registration Failed: %s"%msg)
158 158 raise Exception("Registration Failed: %s"%msg)
159 159
160 160 self.log.info("Completed registration with id %i"%self.id)
161 161
162 162
163 163 def abort(self):
164 164 self.log.fatal("Registration timed out after %.1f seconds"%self.timeout)
165 165 self.session.send(self.registrar, "unregistration_request", content=dict(id=self.id))
166 166 time.sleep(1)
167 167 sys.exit(255)
168 168
169 169 def start(self):
170 170 dc = ioloop.DelayedCallback(self.register, 0, self.loop)
171 171 dc.start()
172 172 self._abort_dc = ioloop.DelayedCallback(self.abort, self.timeout*1000, self.loop)
173 173 self._abort_dc.start()
174 174
@@ -1,230 +1,230 b''
1 1 """KernelStarter class that intercepts Control Queue messages, and handles process management.
2 2
3 3 Authors:
4 4
5 5 * Min RK
6 6 """
7 7 #-----------------------------------------------------------------------------
8 8 # Copyright (C) 2010-2011 The IPython Development Team
9 9 #
10 10 # Distributed under the terms of the BSD License. The full license is in
11 11 # the file COPYING, distributed as part of this software.
12 12 #-----------------------------------------------------------------------------
13 13
14 14 from zmq.eventloop import ioloop
15 15
16 16 from IPython.zmq.session import Session
17 17
18 18 class KernelStarter(object):
19 19 """Object for resetting/killing the Kernel."""
20 20
21 21
22 22 def __init__(self, session, upstream, downstream, *kernel_args, **kernel_kwargs):
23 23 self.session = session
24 24 self.upstream = upstream
25 25 self.downstream = downstream
26 26 self.kernel_args = kernel_args
27 27 self.kernel_kwargs = kernel_kwargs
28 28 self.handlers = {}
29 29 for method in 'shutdown_request shutdown_reply'.split():
30 30 self.handlers[method] = getattr(self, method)
31 31
32 32 def start(self):
33 33 self.upstream.on_recv(self.dispatch_request)
34 34 self.downstream.on_recv(self.dispatch_reply)
35 35
36 36 #--------------------------------------------------------------------------
37 37 # Dispatch methods
38 38 #--------------------------------------------------------------------------
39 39
40 40 def dispatch_request(self, raw_msg):
41 41 idents, msg = self.session.feed_identities()
42 42 try:
43 msg = self.session.unpack_message(msg, content=False)
43 msg = self.session.unserialize(msg, content=False)
44 44 except:
45 45 print ("bad msg: %s"%msg)
46 46
47 47 msgtype = msg['header']['msg_type']
48 48 handler = self.handlers.get(msgtype, None)
49 49 if handler is None:
50 50 self.downstream.send_multipart(raw_msg, copy=False)
51 51 else:
52 52 handler(msg)
53 53
54 54 def dispatch_reply(self, raw_msg):
55 55 idents, msg = self.session.feed_identities()
56 56 try:
57 msg = self.session.unpack_message(msg, content=False)
57 msg = self.session.unserialize(msg, content=False)
58 58 except:
59 59 print ("bad msg: %s"%msg)
60 60
61 61 msgtype = msg['header']['msg_type']
62 62 handler = self.handlers.get(msgtype, None)
63 63 if handler is None:
64 64 self.upstream.send_multipart(raw_msg, copy=False)
65 65 else:
66 66 handler(msg)
67 67
68 68 #--------------------------------------------------------------------------
69 69 # Handlers
70 70 #--------------------------------------------------------------------------
71 71
72 72 def shutdown_request(self, msg):
73 73 """"""
74 74 self.downstream.send_multipart(msg)
75 75
76 76 #--------------------------------------------------------------------------
77 77 # Kernel process management methods, from KernelManager:
78 78 #--------------------------------------------------------------------------
79 79
80 80 def _check_local(addr):
81 81 if isinstance(addr, tuple):
82 82 addr = addr[0]
83 83 return addr in LOCAL_IPS
84 84
85 85 def start_kernel(self, **kw):
86 86 """Starts a kernel process and configures the manager to use it.
87 87
88 88 If random ports (port=0) are being used, this method must be called
89 89 before the channels are created.
90 90
91 91 Parameters:
92 92 -----------
93 93 ipython : bool, optional (default True)
94 94 Whether to use an IPython kernel instead of a plain Python kernel.
95 95 """
96 96 self.kernel = Process(target=make_kernel, args=self.kernel_args,
97 97 kwargs=self.kernel_kwargs)
98 98
99 99 def shutdown_kernel(self, restart=False):
100 100 """ Attempts to the stop the kernel process cleanly. If the kernel
101 101 cannot be stopped, it is killed, if possible.
102 102 """
103 103 # FIXME: Shutdown does not work on Windows due to ZMQ errors!
104 104 if sys.platform == 'win32':
105 105 self.kill_kernel()
106 106 return
107 107
108 108 # Don't send any additional kernel kill messages immediately, to give
109 109 # the kernel a chance to properly execute shutdown actions. Wait for at
110 110 # most 1s, checking every 0.1s.
111 111 self.xreq_channel.shutdown(restart=restart)
112 112 for i in range(10):
113 113 if self.is_alive:
114 114 time.sleep(0.1)
115 115 else:
116 116 break
117 117 else:
118 118 # OK, we've waited long enough.
119 119 if self.has_kernel:
120 120 self.kill_kernel()
121 121
122 122 def restart_kernel(self, now=False):
123 123 """Restarts a kernel with the same arguments that were used to launch
124 124 it. If the old kernel was launched with random ports, the same ports
125 125 will be used for the new kernel.
126 126
127 127 Parameters
128 128 ----------
129 129 now : bool, optional
130 130 If True, the kernel is forcefully restarted *immediately*, without
131 131 having a chance to do any cleanup action. Otherwise the kernel is
132 132 given 1s to clean up before a forceful restart is issued.
133 133
134 134 In all cases the kernel is restarted, the only difference is whether
135 135 it is given a chance to perform a clean shutdown or not.
136 136 """
137 137 if self._launch_args is None:
138 138 raise RuntimeError("Cannot restart the kernel. "
139 139 "No previous call to 'start_kernel'.")
140 140 else:
141 141 if self.has_kernel:
142 142 if now:
143 143 self.kill_kernel()
144 144 else:
145 145 self.shutdown_kernel(restart=True)
146 146 self.start_kernel(**self._launch_args)
147 147
148 148 # FIXME: Messages get dropped in Windows due to probable ZMQ bug
149 149 # unless there is some delay here.
150 150 if sys.platform == 'win32':
151 151 time.sleep(0.2)
152 152
153 153 @property
154 154 def has_kernel(self):
155 155 """Returns whether a kernel process has been specified for the kernel
156 156 manager.
157 157 """
158 158 return self.kernel is not None
159 159
160 160 def kill_kernel(self):
161 161 """ Kill the running kernel. """
162 162 if self.has_kernel:
163 163 # Pause the heart beat channel if it exists.
164 164 if self._hb_channel is not None:
165 165 self._hb_channel.pause()
166 166
167 167 # Attempt to kill the kernel.
168 168 try:
169 169 self.kernel.kill()
170 170 except OSError, e:
171 171 # In Windows, we will get an Access Denied error if the process
172 172 # has already terminated. Ignore it.
173 173 if not (sys.platform == 'win32' and e.winerror == 5):
174 174 raise
175 175 self.kernel = None
176 176 else:
177 177 raise RuntimeError("Cannot kill kernel. No kernel is running!")
178 178
179 179 def interrupt_kernel(self):
180 180 """ Interrupts the kernel. Unlike ``signal_kernel``, this operation is
181 181 well supported on all platforms.
182 182 """
183 183 if self.has_kernel:
184 184 if sys.platform == 'win32':
185 185 from parentpoller import ParentPollerWindows as Poller
186 186 Poller.send_interrupt(self.kernel.win32_interrupt_event)
187 187 else:
188 188 self.kernel.send_signal(signal.SIGINT)
189 189 else:
190 190 raise RuntimeError("Cannot interrupt kernel. No kernel is running!")
191 191
192 192 def signal_kernel(self, signum):
193 193 """ Sends a signal to the kernel. Note that since only SIGTERM is
194 194 supported on Windows, this function is only useful on Unix systems.
195 195 """
196 196 if self.has_kernel:
197 197 self.kernel.send_signal(signum)
198 198 else:
199 199 raise RuntimeError("Cannot signal kernel. No kernel is running!")
200 200
201 201 @property
202 202 def is_alive(self):
203 203 """Is the kernel process still running?"""
204 204 # FIXME: not using a heartbeat means this method is broken for any
205 205 # remote kernel, it's only capable of handling local kernels.
206 206 if self.has_kernel:
207 207 if self.kernel.poll() is None:
208 208 return True
209 209 else:
210 210 return False
211 211 else:
212 212 # We didn't start the kernel with this KernelManager so we don't
213 213 # know if it is running. We should use a heartbeat for this case.
214 214 return True
215 215
216 216
217 217 def make_starter(up_addr, down_addr, *args, **kwargs):
218 218 """entry point function for launching a kernelstarter in a subprocess"""
219 219 loop = ioloop.IOLoop.instance()
220 220 ctx = zmq.Context()
221 221 session = Session()
222 222 upstream = zmqstream.ZMQStream(ctx.socket(zmq.XREQ),loop)
223 223 upstream.connect(up_addr)
224 224 downstream = zmqstream.ZMQStream(ctx.socket(zmq.XREQ),loop)
225 225 downstream.connect(down_addr)
226 226
227 227 starter = KernelStarter(session, upstream, downstream, *args, **kwargs)
228 228 starter.start()
229 229 loop.start()
230 230
@@ -1,438 +1,438 b''
1 1 #!/usr/bin/env python
2 2 """
3 3 Kernel adapted from kernel.py to use ZMQ Streams
4 4
5 5 Authors:
6 6
7 7 * Min RK
8 8 * Brian Granger
9 9 * Fernando Perez
10 10 * Evan Patterson
11 11 """
12 12 #-----------------------------------------------------------------------------
13 13 # Copyright (C) 2010-2011 The IPython Development Team
14 14 #
15 15 # Distributed under the terms of the BSD License. The full license is in
16 16 # the file COPYING, distributed as part of this software.
17 17 #-----------------------------------------------------------------------------
18 18
19 19 #-----------------------------------------------------------------------------
20 20 # Imports
21 21 #-----------------------------------------------------------------------------
22 22
23 23 # Standard library imports.
24 24 from __future__ import print_function
25 25
26 26 import sys
27 27 import time
28 28
29 29 from code import CommandCompiler
30 30 from datetime import datetime
31 31 from pprint import pprint
32 32
33 33 # System library imports.
34 34 import zmq
35 35 from zmq.eventloop import ioloop, zmqstream
36 36
37 37 # Local imports.
38 38 from IPython.utils.traitlets import Instance, List, Int, Dict, Set, Unicode, CBytes
39 39 from IPython.zmq.completer import KernelCompleter
40 40
41 41 from IPython.parallel.error import wrap_exception
42 42 from IPython.parallel.factory import SessionFactory
43 43 from IPython.parallel.util import serialize_object, unpack_apply_message, asbytes
44 44
45 45 def printer(*args):
46 46 pprint(args, stream=sys.__stdout__)
47 47
48 48
49 49 class _Passer(zmqstream.ZMQStream):
50 50 """Empty class that implements `send()` that does nothing.
51 51
52 52 Subclass ZMQStream for Session typechecking
53 53
54 54 """
55 55 def __init__(self, *args, **kwargs):
56 56 pass
57 57
58 58 def send(self, *args, **kwargs):
59 59 pass
60 60 send_multipart = send
61 61
62 62
63 63 #-----------------------------------------------------------------------------
64 64 # Main kernel class
65 65 #-----------------------------------------------------------------------------
66 66
67 67 class Kernel(SessionFactory):
68 68
69 69 #---------------------------------------------------------------------------
70 70 # Kernel interface
71 71 #---------------------------------------------------------------------------
72 72
73 73 # kwargs:
74 74 exec_lines = List(Unicode, config=True,
75 75 help="List of lines to execute")
76 76
77 77 # identities:
78 78 int_id = Int(-1)
79 79 bident = CBytes()
80 80 ident = Unicode()
81 81 def _ident_changed(self, name, old, new):
82 82 self.bident = asbytes(new)
83 83
84 84 user_ns = Dict(config=True, help="""Set the user's namespace of the Kernel""")
85 85
86 86 control_stream = Instance(zmqstream.ZMQStream)
87 87 task_stream = Instance(zmqstream.ZMQStream)
88 88 iopub_stream = Instance(zmqstream.ZMQStream)
89 89 client = Instance('IPython.parallel.Client')
90 90
91 91 # internals
92 92 shell_streams = List()
93 93 compiler = Instance(CommandCompiler, (), {})
94 94 completer = Instance(KernelCompleter)
95 95
96 96 aborted = Set()
97 97 shell_handlers = Dict()
98 98 control_handlers = Dict()
99 99
100 100 def _set_prefix(self):
101 101 self.prefix = "engine.%s"%self.int_id
102 102
103 103 def _connect_completer(self):
104 104 self.completer = KernelCompleter(self.user_ns)
105 105
106 106 def __init__(self, **kwargs):
107 107 super(Kernel, self).__init__(**kwargs)
108 108 self._set_prefix()
109 109 self._connect_completer()
110 110
111 111 self.on_trait_change(self._set_prefix, 'id')
112 112 self.on_trait_change(self._connect_completer, 'user_ns')
113 113
114 114 # Build dict of handlers for message types
115 115 for msg_type in ['execute_request', 'complete_request', 'apply_request',
116 116 'clear_request']:
117 117 self.shell_handlers[msg_type] = getattr(self, msg_type)
118 118
119 119 for msg_type in ['shutdown_request', 'abort_request']+self.shell_handlers.keys():
120 120 self.control_handlers[msg_type] = getattr(self, msg_type)
121 121
122 122 self._initial_exec_lines()
123 123
124 124 def _wrap_exception(self, method=None):
125 125 e_info = dict(engine_uuid=self.ident, engine_id=self.int_id, method=method)
126 126 content=wrap_exception(e_info)
127 127 return content
128 128
129 129 def _initial_exec_lines(self):
130 130 s = _Passer()
131 131 content = dict(silent=True, user_variable=[],user_expressions=[])
132 132 for line in self.exec_lines:
133 133 self.log.debug("executing initialization: %s"%line)
134 134 content.update({'code':line})
135 135 msg = self.session.msg('execute_request', content)
136 136 self.execute_request(s, [], msg)
137 137
138 138
139 139 #-------------------- control handlers -----------------------------
140 140 def abort_queues(self):
141 141 for stream in self.shell_streams:
142 142 if stream:
143 143 self.abort_queue(stream)
144 144
145 145 def abort_queue(self, stream):
146 146 while True:
147 147 idents,msg = self.session.recv(stream, zmq.NOBLOCK, content=True)
148 148 if msg is None:
149 149 return
150 150
151 151 self.log.info("Aborting:")
152 152 self.log.info(str(msg))
153 153 msg_type = msg['header']['msg_type']
154 154 reply_type = msg_type.split('_')[0] + '_reply'
155 155 # reply_msg = self.session.msg(reply_type, {'status' : 'aborted'}, msg)
156 156 # self.reply_socket.send(ident,zmq.SNDMORE)
157 157 # self.reply_socket.send_json(reply_msg)
158 158 reply_msg = self.session.send(stream, reply_type,
159 159 content={'status' : 'aborted'}, parent=msg, ident=idents)
160 160 self.log.debug(str(reply_msg))
161 161 # We need to wait a bit for requests to come in. This can probably
162 162 # be set shorter for true asynchronous clients.
163 163 time.sleep(0.05)
164 164
165 165 def abort_request(self, stream, ident, parent):
166 166 """abort a specifig msg by id"""
167 167 msg_ids = parent['content'].get('msg_ids', None)
168 168 if isinstance(msg_ids, basestring):
169 169 msg_ids = [msg_ids]
170 170 if not msg_ids:
171 171 self.abort_queues()
172 172 for mid in msg_ids:
173 173 self.aborted.add(str(mid))
174 174
175 175 content = dict(status='ok')
176 176 reply_msg = self.session.send(stream, 'abort_reply', content=content,
177 177 parent=parent, ident=ident)
178 178 self.log.debug(str(reply_msg))
179 179
180 180 def shutdown_request(self, stream, ident, parent):
181 181 """kill ourself. This should really be handled in an external process"""
182 182 try:
183 183 self.abort_queues()
184 184 except:
185 185 content = self._wrap_exception('shutdown')
186 186 else:
187 187 content = dict(parent['content'])
188 188 content['status'] = 'ok'
189 189 msg = self.session.send(stream, 'shutdown_reply',
190 190 content=content, parent=parent, ident=ident)
191 191 self.log.debug(str(msg))
192 192 dc = ioloop.DelayedCallback(lambda : sys.exit(0), 1000, self.loop)
193 193 dc.start()
194 194
195 195 def dispatch_control(self, msg):
196 196 idents,msg = self.session.feed_identities(msg, copy=False)
197 197 try:
198 msg = self.session.unpack_message(msg, content=True, copy=False)
198 msg = self.session.unserialize(msg, content=True, copy=False)
199 199 except:
200 200 self.log.error("Invalid Message", exc_info=True)
201 201 return
202 202 else:
203 203 self.log.debug("Control received, %s", msg)
204 204
205 205 header = msg['header']
206 206 msg_id = header['msg_id']
207 207
208 208 handler = self.control_handlers.get(msg['header']['msg_type'], None)
209 209 if handler is None:
210 210 self.log.error("UNKNOWN CONTROL MESSAGE TYPE: %r"%msg['header']['msg_type'])
211 211 else:
212 212 handler(self.control_stream, idents, msg)
213 213
214 214
215 215 #-------------------- queue helpers ------------------------------
216 216
217 217 def check_dependencies(self, dependencies):
218 218 if not dependencies:
219 219 return True
220 220 if len(dependencies) == 2 and dependencies[0] in 'any all'.split():
221 221 anyorall = dependencies[0]
222 222 dependencies = dependencies[1]
223 223 else:
224 224 anyorall = 'all'
225 225 results = self.client.get_results(dependencies,status_only=True)
226 226 if results['status'] != 'ok':
227 227 return False
228 228
229 229 if anyorall == 'any':
230 230 if not results['completed']:
231 231 return False
232 232 else:
233 233 if results['pending']:
234 234 return False
235 235
236 236 return True
237 237
238 238 def check_aborted(self, msg_id):
239 239 return msg_id in self.aborted
240 240
241 241 #-------------------- queue handlers -----------------------------
242 242
243 243 def clear_request(self, stream, idents, parent):
244 244 """Clear our namespace."""
245 245 self.user_ns = {}
246 246 msg = self.session.send(stream, 'clear_reply', ident=idents, parent=parent,
247 247 content = dict(status='ok'))
248 248 self._initial_exec_lines()
249 249
250 250 def execute_request(self, stream, ident, parent):
251 251 self.log.debug('execute request %s'%parent)
252 252 try:
253 253 code = parent[u'content'][u'code']
254 254 except:
255 255 self.log.error("Got bad msg: %s"%parent, exc_info=True)
256 256 return
257 257 self.session.send(self.iopub_stream, u'pyin', {u'code':code},parent=parent,
258 258 ident=asbytes('%s.pyin'%self.prefix))
259 259 started = datetime.now()
260 260 try:
261 261 comp_code = self.compiler(code, '<zmq-kernel>')
262 262 # allow for not overriding displayhook
263 263 if hasattr(sys.displayhook, 'set_parent'):
264 264 sys.displayhook.set_parent(parent)
265 265 sys.stdout.set_parent(parent)
266 266 sys.stderr.set_parent(parent)
267 267 exec comp_code in self.user_ns, self.user_ns
268 268 except:
269 269 exc_content = self._wrap_exception('execute')
270 270 # exc_msg = self.session.msg(u'pyerr', exc_content, parent)
271 271 self.session.send(self.iopub_stream, u'pyerr', exc_content, parent=parent,
272 272 ident=asbytes('%s.pyerr'%self.prefix))
273 273 reply_content = exc_content
274 274 else:
275 275 reply_content = {'status' : 'ok'}
276 276
277 277 reply_msg = self.session.send(stream, u'execute_reply', reply_content, parent=parent,
278 278 ident=ident, subheader = dict(started=started))
279 279 self.log.debug(str(reply_msg))
280 280 if reply_msg['content']['status'] == u'error':
281 281 self.abort_queues()
282 282
283 283 def complete_request(self, stream, ident, parent):
284 284 matches = {'matches' : self.complete(parent),
285 285 'status' : 'ok'}
286 286 completion_msg = self.session.send(stream, 'complete_reply',
287 287 matches, parent, ident)
288 288 # print >> sys.__stdout__, completion_msg
289 289
290 290 def complete(self, msg):
291 291 return self.completer.complete(msg.content.line, msg.content.text)
292 292
293 293 def apply_request(self, stream, ident, parent):
294 294 # flush previous reply, so this request won't block it
295 295 stream.flush(zmq.POLLOUT)
296 296 try:
297 297 content = parent[u'content']
298 298 bufs = parent[u'buffers']
299 299 msg_id = parent['header']['msg_id']
300 300 # bound = parent['header'].get('bound', False)
301 301 except:
302 302 self.log.error("Got bad msg: %s"%parent, exc_info=True)
303 303 return
304 304 # pyin_msg = self.session.msg(u'pyin',{u'code':code}, parent=parent)
305 305 # self.iopub_stream.send(pyin_msg)
306 306 # self.session.send(self.iopub_stream, u'pyin', {u'code':code},parent=parent)
307 307 sub = {'dependencies_met' : True, 'engine' : self.ident,
308 308 'started': datetime.now()}
309 309 try:
310 310 # allow for not overriding displayhook
311 311 if hasattr(sys.displayhook, 'set_parent'):
312 312 sys.displayhook.set_parent(parent)
313 313 sys.stdout.set_parent(parent)
314 314 sys.stderr.set_parent(parent)
315 315 # exec "f(*args,**kwargs)" in self.user_ns, self.user_ns
316 316 working = self.user_ns
317 317 # suffix =
318 318 prefix = "_"+str(msg_id).replace("-","")+"_"
319 319
320 320 f,args,kwargs = unpack_apply_message(bufs, working, copy=False)
321 321 # if bound:
322 322 # bound_ns = Namespace(working)
323 323 # args = [bound_ns]+list(args)
324 324
325 325 fname = getattr(f, '__name__', 'f')
326 326
327 327 fname = prefix+"f"
328 328 argname = prefix+"args"
329 329 kwargname = prefix+"kwargs"
330 330 resultname = prefix+"result"
331 331
332 332 ns = { fname : f, argname : args, kwargname : kwargs , resultname : None }
333 333 # print ns
334 334 working.update(ns)
335 335 code = "%s=%s(*%s,**%s)"%(resultname, fname, argname, kwargname)
336 336 try:
337 337 exec code in working,working
338 338 result = working.get(resultname)
339 339 finally:
340 340 for key in ns.iterkeys():
341 341 working.pop(key)
342 342 # if bound:
343 343 # working.update(bound_ns)
344 344
345 345 packed_result,buf = serialize_object(result)
346 346 result_buf = [packed_result]+buf
347 347 except:
348 348 exc_content = self._wrap_exception('apply')
349 349 # exc_msg = self.session.msg(u'pyerr', exc_content, parent)
350 350 self.session.send(self.iopub_stream, u'pyerr', exc_content, parent=parent,
351 351 ident=asbytes('%s.pyerr'%self.prefix))
352 352 reply_content = exc_content
353 353 result_buf = []
354 354
355 355 if exc_content['ename'] == 'UnmetDependency':
356 356 sub['dependencies_met'] = False
357 357 else:
358 358 reply_content = {'status' : 'ok'}
359 359
360 360 # put 'ok'/'error' status in header, for scheduler introspection:
361 361 sub['status'] = reply_content['status']
362 362
363 363 reply_msg = self.session.send(stream, u'apply_reply', reply_content,
364 364 parent=parent, ident=ident,buffers=result_buf, subheader=sub)
365 365
366 366 # flush i/o
367 367 # should this be before reply_msg is sent, like in the single-kernel code,
368 368 # or should nothing get in the way of real results?
369 369 sys.stdout.flush()
370 370 sys.stderr.flush()
371 371
372 372 def dispatch_queue(self, stream, msg):
373 373 self.control_stream.flush()
374 374 idents,msg = self.session.feed_identities(msg, copy=False)
375 375 try:
376 msg = self.session.unpack_message(msg, content=True, copy=False)
376 msg = self.session.unserialize(msg, content=True, copy=False)
377 377 except:
378 378 self.log.error("Invalid Message", exc_info=True)
379 379 return
380 380 else:
381 381 self.log.debug("Message received, %s", msg)
382 382
383 383
384 384 header = msg['header']
385 385 msg_id = header['msg_id']
386 386 if self.check_aborted(msg_id):
387 387 self.aborted.remove(msg_id)
388 388 # is it safe to assume a msg_id will not be resubmitted?
389 389 reply_type = msg['header']['msg_type'].split('_')[0] + '_reply'
390 390 status = {'status' : 'aborted'}
391 391 reply_msg = self.session.send(stream, reply_type, subheader=status,
392 392 content=status, parent=msg, ident=idents)
393 393 return
394 394 handler = self.shell_handlers.get(msg['header']['msg_type'], None)
395 395 if handler is None:
396 396 self.log.error("UNKNOWN MESSAGE TYPE: %r"%msg['header']['msg_type'])
397 397 else:
398 398 handler(stream, idents, msg)
399 399
400 400 def start(self):
401 401 #### stream mode:
402 402 if self.control_stream:
403 403 self.control_stream.on_recv(self.dispatch_control, copy=False)
404 404 self.control_stream.on_err(printer)
405 405
406 406 def make_dispatcher(stream):
407 407 def dispatcher(msg):
408 408 return self.dispatch_queue(stream, msg)
409 409 return dispatcher
410 410
411 411 for s in self.shell_streams:
412 412 s.on_recv(make_dispatcher(s), copy=False)
413 413 s.on_err(printer)
414 414
415 415 if self.iopub_stream:
416 416 self.iopub_stream.on_err(printer)
417 417
418 418 #### while True mode:
419 419 # while True:
420 420 # idle = True
421 421 # try:
422 422 # msg = self.shell_stream.socket.recv_multipart(
423 423 # zmq.NOBLOCK, copy=False)
424 424 # except zmq.ZMQError, e:
425 425 # if e.errno != zmq.EAGAIN:
426 426 # raise e
427 427 # else:
428 428 # idle=False
429 429 # self.dispatch_queue(self.shell_stream, msg)
430 430 #
431 431 # if not self.task_stream.empty():
432 432 # idle=False
433 433 # msg = self.task_stream.recv_multipart()
434 434 # self.dispatch_queue(self.task_stream, msg)
435 435 # if idle:
436 436 # # don't busywait
437 437 # time.sleep(1e-3)
438 438
@@ -1,676 +1,691 b''
1 1 #!/usr/bin/env python
2 2 """Session object for building, serializing, sending, and receiving messages in
3 3 IPython. The Session object supports serialization, HMAC signatures, and
4 4 metadata on messages.
5 5
6 6 Also defined here are utilities for working with Sessions:
7 7 * A SessionFactory to be used as a base class for configurables that work with
8 8 Sessions.
9 9 * A Message object for convenience that allows attribute-access to the msg dict.
10 10
11 11 Authors:
12 12
13 13 * Min RK
14 14 * Brian Granger
15 15 * Fernando Perez
16 16 """
17 17 #-----------------------------------------------------------------------------
18 18 # Copyright (C) 2010-2011 The IPython Development Team
19 19 #
20 20 # Distributed under the terms of the BSD License. The full license is in
21 21 # the file COPYING, distributed as part of this software.
22 22 #-----------------------------------------------------------------------------
23 23
24 24 #-----------------------------------------------------------------------------
25 25 # Imports
26 26 #-----------------------------------------------------------------------------
27 27
28 28 import hmac
29 29 import logging
30 30 import os
31 31 import pprint
32 32 import uuid
33 33 from datetime import datetime
34 34
35 35 try:
36 36 import cPickle
37 37 pickle = cPickle
38 38 except:
39 39 cPickle = None
40 40 import pickle
41 41
42 42 import zmq
43 43 from zmq.utils import jsonapi
44 44 from zmq.eventloop.ioloop import IOLoop
45 45 from zmq.eventloop.zmqstream import ZMQStream
46 46
47 47 from IPython.config.configurable import Configurable, LoggingConfigurable
48 48 from IPython.utils.importstring import import_item
49 49 from IPython.utils.jsonutil import extract_dates, squash_dates, date_default
50 50 from IPython.utils.traitlets import (CBytes, Unicode, Bool, Any, Instance, Set,
51 51 DottedObjectName)
52 52
53 53 #-----------------------------------------------------------------------------
54 54 # utility functions
55 55 #-----------------------------------------------------------------------------
56 56
57 57 def squash_unicode(obj):
58 58 """coerce unicode back to bytestrings."""
59 59 if isinstance(obj,dict):
60 60 for key in obj.keys():
61 61 obj[key] = squash_unicode(obj[key])
62 62 if isinstance(key, unicode):
63 63 obj[squash_unicode(key)] = obj.pop(key)
64 64 elif isinstance(obj, list):
65 65 for i,v in enumerate(obj):
66 66 obj[i] = squash_unicode(v)
67 67 elif isinstance(obj, unicode):
68 68 obj = obj.encode('utf8')
69 69 return obj
70 70
71 71 #-----------------------------------------------------------------------------
72 72 # globals and defaults
73 73 #-----------------------------------------------------------------------------
74 74 key = 'on_unknown' if jsonapi.jsonmod.__name__ == 'jsonlib' else 'default'
75 75 json_packer = lambda obj: jsonapi.dumps(obj, **{key:date_default})
76 76 json_unpacker = lambda s: extract_dates(jsonapi.loads(s))
77 77
78 78 pickle_packer = lambda o: pickle.dumps(o,-1)
79 79 pickle_unpacker = pickle.loads
80 80
81 81 default_packer = json_packer
82 82 default_unpacker = json_unpacker
83 83
84 84
85 85 DELIM=b"<IDS|MSG>"
86 86
87 87 #-----------------------------------------------------------------------------
88 88 # Classes
89 89 #-----------------------------------------------------------------------------
90 90
91 91 class SessionFactory(LoggingConfigurable):
92 92 """The Base class for configurables that have a Session, Context, logger,
93 93 and IOLoop.
94 94 """
95 95
96 96 logname = Unicode('')
97 97 def _logname_changed(self, name, old, new):
98 98 self.log = logging.getLogger(new)
99 99
100 100 # not configurable:
101 101 context = Instance('zmq.Context')
102 102 def _context_default(self):
103 103 return zmq.Context.instance()
104 104
105 105 session = Instance('IPython.zmq.session.Session')
106 106
107 107 loop = Instance('zmq.eventloop.ioloop.IOLoop', allow_none=False)
108 108 def _loop_default(self):
109 109 return IOLoop.instance()
110 110
111 111 def __init__(self, **kwargs):
112 112 super(SessionFactory, self).__init__(**kwargs)
113 113
114 114 if self.session is None:
115 115 # construct the session
116 116 self.session = Session(**kwargs)
117 117
118 118
119 119 class Message(object):
120 120 """A simple message object that maps dict keys to attributes.
121 121
122 122 A Message can be created from a dict and a dict from a Message instance
123 123 simply by calling dict(msg_obj)."""
124 124
125 125 def __init__(self, msg_dict):
126 126 dct = self.__dict__
127 127 for k, v in dict(msg_dict).iteritems():
128 128 if isinstance(v, dict):
129 129 v = Message(v)
130 130 dct[k] = v
131 131
132 132 # Having this iterator lets dict(msg_obj) work out of the box.
133 133 def __iter__(self):
134 134 return iter(self.__dict__.iteritems())
135 135
136 136 def __repr__(self):
137 137 return repr(self.__dict__)
138 138
139 139 def __str__(self):
140 140 return pprint.pformat(self.__dict__)
141 141
142 142 def __contains__(self, k):
143 143 return k in self.__dict__
144 144
145 145 def __getitem__(self, k):
146 146 return self.__dict__[k]
147 147
148 148
149 149 def msg_header(msg_id, msg_type, username, session):
150 150 date = datetime.now()
151 151 return locals()
152 152
153 153 def extract_header(msg_or_header):
154 154 """Given a message or header, return the header."""
155 155 if not msg_or_header:
156 156 return {}
157 157 try:
158 158 # See if msg_or_header is the entire message.
159 159 h = msg_or_header['header']
160 160 except KeyError:
161 161 try:
162 162 # See if msg_or_header is just the header
163 163 h = msg_or_header['msg_id']
164 164 except KeyError:
165 165 raise
166 166 else:
167 167 h = msg_or_header
168 168 if not isinstance(h, dict):
169 169 h = dict(h)
170 170 return h
171 171
172 172 class Session(Configurable):
173 173 """Object for handling serialization and sending of messages.
174 174
175 175 The Session object handles building messages and sending them
176 176 with ZMQ sockets or ZMQStream objects. Objects can communicate with each
177 177 other over the network via Session objects, and only need to work with the
178 178 dict-based IPython message spec. The Session will handle
179 179 serialization/deserialization, security, and metadata.
180 180
181 181 Sessions support configurable serialiization via packer/unpacker traits,
182 182 and signing with HMAC digests via the key/keyfile traits.
183 183
184 184 Parameters
185 185 ----------
186 186
187 187 debug : bool
188 188 whether to trigger extra debugging statements
189 189 packer/unpacker : str : 'json', 'pickle' or import_string
190 190 importstrings for methods to serialize message parts. If just
191 191 'json' or 'pickle', predefined JSON and pickle packers will be used.
192 192 Otherwise, the entire importstring must be used.
193 193
194 194 The functions must accept at least valid JSON input, and output *bytes*.
195 195
196 196 For example, to use msgpack:
197 197 packer = 'msgpack.packb', unpacker='msgpack.unpackb'
198 198 pack/unpack : callables
199 199 You can also set the pack/unpack callables for serialization directly.
200 200 session : bytes
201 201 the ID of this Session object. The default is to generate a new UUID.
202 202 username : unicode
203 203 username added to message headers. The default is to ask the OS.
204 204 key : bytes
205 205 The key used to initialize an HMAC signature. If unset, messages
206 206 will not be signed or checked.
207 207 keyfile : filepath
208 208 The file containing a key. If this is set, `key` will be initialized
209 209 to the contents of the file.
210 210
211 211 """
212 212
213 213 debug=Bool(False, config=True, help="""Debug output in the Session""")
214 214
215 215 packer = DottedObjectName('json',config=True,
216 216 help="""The name of the packer for serializing messages.
217 217 Should be one of 'json', 'pickle', or an import name
218 218 for a custom callable serializer.""")
219 219 def _packer_changed(self, name, old, new):
220 220 if new.lower() == 'json':
221 221 self.pack = json_packer
222 222 self.unpack = json_unpacker
223 223 elif new.lower() == 'pickle':
224 224 self.pack = pickle_packer
225 225 self.unpack = pickle_unpacker
226 226 else:
227 227 self.pack = import_item(str(new))
228 228
229 229 unpacker = DottedObjectName('json', config=True,
230 230 help="""The name of the unpacker for unserializing messages.
231 231 Only used with custom functions for `packer`.""")
232 232 def _unpacker_changed(self, name, old, new):
233 233 if new.lower() == 'json':
234 234 self.pack = json_packer
235 235 self.unpack = json_unpacker
236 236 elif new.lower() == 'pickle':
237 237 self.pack = pickle_packer
238 238 self.unpack = pickle_unpacker
239 239 else:
240 240 self.unpack = import_item(str(new))
241 241
242 242 session = CBytes(b'', config=True,
243 243 help="""The UUID identifying this session.""")
244 244 def _session_default(self):
245 245 return bytes(uuid.uuid4())
246 246
247 247 username = Unicode(os.environ.get('USER','username'), config=True,
248 248 help="""Username for the Session. Default is your system username.""")
249 249
250 250 # message signature related traits:
251 251 key = CBytes(b'', config=True,
252 252 help="""execution key, for extra authentication.""")
253 253 def _key_changed(self, name, old, new):
254 254 if new:
255 255 self.auth = hmac.HMAC(new)
256 256 else:
257 257 self.auth = None
258 258 auth = Instance(hmac.HMAC)
259 259 digest_history = Set()
260 260
261 261 keyfile = Unicode('', config=True,
262 262 help="""path to file containing execution key.""")
263 263 def _keyfile_changed(self, name, old, new):
264 264 with open(new, 'rb') as f:
265 265 self.key = f.read().strip()
266 266
267 267 pack = Any(default_packer) # the actual packer function
268 268 def _pack_changed(self, name, old, new):
269 269 if not callable(new):
270 270 raise TypeError("packer must be callable, not %s"%type(new))
271 271
272 272 unpack = Any(default_unpacker) # the actual packer function
273 273 def _unpack_changed(self, name, old, new):
274 274 # unpacker is not checked - it is assumed to be
275 275 if not callable(new):
276 276 raise TypeError("unpacker must be callable, not %s"%type(new))
277 277
278 278 def __init__(self, **kwargs):
279 279 """create a Session object
280 280
281 281 Parameters
282 282 ----------
283 283
284 284 debug : bool
285 285 whether to trigger extra debugging statements
286 286 packer/unpacker : str : 'json', 'pickle' or import_string
287 287 importstrings for methods to serialize message parts. If just
288 288 'json' or 'pickle', predefined JSON and pickle packers will be used.
289 289 Otherwise, the entire importstring must be used.
290 290
291 291 The functions must accept at least valid JSON input, and output
292 292 *bytes*.
293 293
294 294 For example, to use msgpack:
295 295 packer = 'msgpack.packb', unpacker='msgpack.unpackb'
296 296 pack/unpack : callables
297 297 You can also set the pack/unpack callables for serialization
298 298 directly.
299 299 session : bytes
300 300 the ID of this Session object. The default is to generate a new
301 301 UUID.
302 302 username : unicode
303 303 username added to message headers. The default is to ask the OS.
304 304 key : bytes
305 305 The key used to initialize an HMAC signature. If unset, messages
306 306 will not be signed or checked.
307 307 keyfile : filepath
308 308 The file containing a key. If this is set, `key` will be
309 309 initialized to the contents of the file.
310 310 """
311 311 super(Session, self).__init__(**kwargs)
312 312 self._check_packers()
313 313 self.none = self.pack({})
314 314
315 315 @property
316 316 def msg_id(self):
317 317 """always return new uuid"""
318 318 return str(uuid.uuid4())
319 319
320 320 def _check_packers(self):
321 321 """check packers for binary data and datetime support."""
322 322 pack = self.pack
323 323 unpack = self.unpack
324 324
325 325 # check simple serialization
326 326 msg = dict(a=[1,'hi'])
327 327 try:
328 328 packed = pack(msg)
329 329 except Exception:
330 330 raise ValueError("packer could not serialize a simple message")
331 331
332 332 # ensure packed message is bytes
333 333 if not isinstance(packed, bytes):
334 334 raise ValueError("message packed to %r, but bytes are required"%type(packed))
335 335
336 336 # check that unpack is pack's inverse
337 337 try:
338 338 unpacked = unpack(packed)
339 339 except Exception:
340 340 raise ValueError("unpacker could not handle the packer's output")
341 341
342 342 # check datetime support
343 343 msg = dict(t=datetime.now())
344 344 try:
345 345 unpacked = unpack(pack(msg))
346 346 except Exception:
347 347 self.pack = lambda o: pack(squash_dates(o))
348 348 self.unpack = lambda s: extract_dates(unpack(s))
349 349
350 350 def msg_header(self, msg_type):
351 351 return msg_header(self.msg_id, msg_type, self.username, self.session)
352 352
353 353 def msg(self, msg_type, content=None, parent=None, subheader=None):
354 354 """Return the nested message dict.
355 355
356 356 This format is different from what is sent over the wire. The
357 357 self.serialize method converts this nested message dict to the wire
358 358 format, which uses a message list.
359 359 """
360 360 msg = {}
361 361 msg['header'] = self.msg_header(msg_type)
362 362 msg['parent_header'] = {} if parent is None else extract_header(parent)
363 363 msg['content'] = {} if content is None else content
364 364 sub = {} if subheader is None else subheader
365 365 msg['header'].update(sub)
366 366 return msg
367 367
368 368 def sign(self, msg_list):
369 369 """Sign a message with HMAC digest. If no auth, return b''.
370 370
371 371 Parameters
372 372 ----------
373 373 msg_list : list
374 374 The [p_header,p_parent,p_content] part of the message list.
375 375 """
376 376 if self.auth is None:
377 377 return b''
378 378 h = self.auth.copy()
379 379 for m in msg_list:
380 380 h.update(m)
381 381 return h.hexdigest()
382 382
383 383 def serialize(self, msg, ident=None):
384 384 """Serialize the message components to bytes.
385 385
386 This is roughly the inverse of unserialize. The serialize/unserialize
387 methods work with full message lists, whereas pack/unpack work with
388 the individual message parts in the message list.
389
386 390 Parameters
387 391 ----------
388 392 msg : dict or Message
389 393 The nexted message dict as returned by the self.msg method.
390 394
391 395 Returns
392 396 -------
393 397 msg_list : list
394 398 The list of bytes objects to be sent with the format:
395 399 [ident1,ident2,...,DELIM,HMAC,p_header,p_parent,p_content,
396 400 buffer1,buffer2,...]. In this list, the p_* entities are
397 401 the packed or serialized versions, so if JSON is used, these
398 402 are uft8 encoded JSON strings.
399 403 """
400 404 content = msg.get('content', {})
401 405 if content is None:
402 406 content = self.none
403 407 elif isinstance(content, dict):
404 408 content = self.pack(content)
405 409 elif isinstance(content, bytes):
406 410 # content is already packed, as in a relayed message
407 411 pass
408 412 elif isinstance(content, unicode):
409 413 # should be bytes, but JSON often spits out unicode
410 414 content = content.encode('utf8')
411 415 else:
412 416 raise TypeError("Content incorrect type: %s"%type(content))
413 417
414 418 real_message = [self.pack(msg['header']),
415 419 self.pack(msg['parent_header']),
416 420 content
417 421 ]
418 422
419 423 to_send = []
420 424
421 425 if isinstance(ident, list):
422 426 # accept list of idents
423 427 to_send.extend(ident)
424 428 elif ident is not None:
425 429 to_send.append(ident)
426 430 to_send.append(DELIM)
427 431
428 432 signature = self.sign(real_message)
429 433 to_send.append(signature)
430 434
431 435 to_send.extend(real_message)
432 436
433 437 return to_send
434 438
435 439 def send(self, stream, msg_or_type, content=None, parent=None, ident=None,
436 440 buffers=None, subheader=None, track=False):
437 441 """Build and send a message via stream or socket.
438 442
439 443 The message format used by this function internally is as follows:
440 444
441 445 [ident1,ident2,...,DELIM,HMAC,p_header,p_parent,p_content,
442 446 buffer1,buffer2,...]
443 447
444 448 The self.serialize method converts the nested message dict into this
445 449 format.
446 450
447 451 Parameters
448 452 ----------
449 453
450 454 stream : zmq.Socket or ZMQStream
451 455 the socket-like object used to send the data
452 456 msg_or_type : str or Message/dict
453 457 Normally, msg_or_type will be a msg_type unless a message is being
454 458 sent more than once.
455 459
456 460 content : dict or None
457 461 the content of the message (ignored if msg_or_type is a message)
458 462 parent : Message or dict or None
459 463 the parent or parent header describing the parent of this message
460 464 ident : bytes or list of bytes
461 465 the zmq.IDENTITY routing path
462 466 subheader : dict or None
463 467 extra header keys for this message's header
464 468 buffers : list or None
465 469 the already-serialized buffers to be appended to the message
466 470 track : bool
467 471 whether to track. Only for use with Sockets,
468 472 because ZMQStream objects cannot track messages.
469 473
470 474 Returns
471 475 -------
472 476 msg : message dict
473 477 the constructed message
474 478 (msg,tracker) : (message dict, MessageTracker)
475 479 if track=True, then a 2-tuple will be returned,
476 480 the first element being the constructed
477 481 message, and the second being the MessageTracker
478 482
479 483 """
480 484
481 485 if not isinstance(stream, (zmq.Socket, ZMQStream)):
482 486 raise TypeError("stream must be Socket or ZMQStream, not %r"%type(stream))
483 487 elif track and isinstance(stream, ZMQStream):
484 488 raise TypeError("ZMQStream cannot track messages")
485 489
486 490 if isinstance(msg_or_type, (Message, dict)):
487 491 # we got a Message, not a msg_type
488 492 # don't build a new Message
489 493 msg = msg_or_type
490 494 else:
491 495 msg = self.msg(msg_or_type, content, parent, subheader)
492 496
493 497 buffers = [] if buffers is None else buffers
494 498 to_send = self.serialize(msg, ident)
495 499 flag = 0
496 500 if buffers:
497 501 flag = zmq.SNDMORE
498 502 _track = False
499 503 else:
500 504 _track=track
501 505 if track:
502 506 tracker = stream.send_multipart(to_send, flag, copy=False, track=_track)
503 507 else:
504 508 tracker = stream.send_multipart(to_send, flag, copy=False)
505 509 for b in buffers[:-1]:
506 510 stream.send(b, flag, copy=False)
507 511 if buffers:
508 512 if track:
509 513 tracker = stream.send(buffers[-1], copy=False, track=track)
510 514 else:
511 515 tracker = stream.send(buffers[-1], copy=False)
512 516
513 517 # omsg = Message(msg)
514 518 if self.debug:
515 519 pprint.pprint(msg)
516 520 pprint.pprint(to_send)
517 521 pprint.pprint(buffers)
518 522
519 523 msg['tracker'] = tracker
520 524
521 525 return msg
522 526
523 527 def send_raw(self, stream, msg_list, flags=0, copy=True, ident=None):
524 528 """Send a raw message via ident path.
525 529
526 530 This method is used to send a already serialized message.
527 531
528 532 Parameters
529 533 ----------
530 534 stream : ZMQStream or Socket
531 535 The ZMQ stream or socket to use for sending the message.
532 536 msg_list : list
533 537 The serialized list of messages to send. This only includes the
534 538 [p_header,p_parent,p_content,buffer1,buffer2,...] portion of
535 539 the message.
536 540 ident : ident or list
537 541 A single ident or a list of idents to use in sending.
538 542 """
539 543 to_send = []
540 544 if isinstance(ident, bytes):
541 545 ident = [ident]
542 546 if ident is not None:
543 547 to_send.extend(ident)
544 548
545 549 to_send.append(DELIM)
546 550 to_send.append(self.sign(msg_list))
547 551 to_send.extend(msg_list)
548 552 stream.send_multipart(msg_list, flags, copy=copy)
549 553
550 554 def recv(self, socket, mode=zmq.NOBLOCK, content=True, copy=True):
551 555 """Receive and unpack a message.
552 556
553 557 Parameters
554 558 ----------
555 559 socket : ZMQStream or Socket
556 560 The socket or stream to use in receiving.
557 561
558 562 Returns
559 563 -------
560 564 [idents], msg
561 565 [idents] is a list of idents and msg is a nested message dict of
562 566 same format as self.msg returns.
563 567 """
564 568 if isinstance(socket, ZMQStream):
565 569 socket = socket.socket
566 570 try:
567 571 msg_list = socket.recv_multipart(mode)
568 572 except zmq.ZMQError as e:
569 573 if e.errno == zmq.EAGAIN:
570 574 # We can convert EAGAIN to None as we know in this case
571 575 # recv_multipart won't return None.
572 576 return None,None
573 577 else:
574 578 raise
575 579 # split multipart message into identity list and message dict
576 580 # invalid large messages can cause very expensive string comparisons
577 581 idents, msg_list = self.feed_identities(msg_list, copy)
578 582 try:
579 return idents, self.unpack_message(msg_list, content=content, copy=copy)
583 return idents, self.unserialize(msg_list, content=content, copy=copy)
580 584 except Exception as e:
581 585 print (idents, msg_list)
582 586 # TODO: handle it
583 587 raise e
584 588
585 589 def feed_identities(self, msg_list, copy=True):
586 590 """Split the identities from the rest of the message.
587 591
588 592 Feed until DELIM is reached, then return the prefix as idents and
589 593 remainder as msg_list. This is easily broken by setting an IDENT to DELIM,
590 594 but that would be silly.
591 595
592 596 Parameters
593 597 ----------
594 598 msg_list : a list of Message or bytes objects
595 599 The message to be split.
596 600 copy : bool
597 601 flag determining whether the arguments are bytes or Messages
598 602
599 603 Returns
600 604 -------
601 (idents,msg_list) : two lists
602 idents will always be a list of bytes - the indentity prefix
603 msg_list will be a list of bytes or Messages, unchanged from input
604 msg_list should be unpackable via self.unpack_message at this point.
605 (idents, msg_list) : two lists
606 idents will always be a list of bytes, each of which is a ZMQ
607 identity. msg_list will be a list of bytes or zmq.Messages of the
608 form [HMAC,p_header,p_parent,p_content,buffer1,buffer2,...] and
609 should be unpackable/unserializable via self.unserialize at this
610 point.
605 611 """
606 612 if copy:
607 613 idx = msg_list.index(DELIM)
608 614 return msg_list[:idx], msg_list[idx+1:]
609 615 else:
610 616 failed = True
611 617 for idx,m in enumerate(msg_list):
612 618 if m.bytes == DELIM:
613 619 failed = False
614 620 break
615 621 if failed:
616 622 raise ValueError("DELIM not in msg_list")
617 623 idents, msg_list = msg_list[:idx], msg_list[idx+1:]
618 624 return [m.bytes for m in idents], msg_list
619 625
620 def unpack_message(self, msg_list, content=True, copy=True):
621 """Return a message object from the format
622 sent by self.send.
623
626 def unserialize(self, msg_list, content=True, copy=True):
627 """Unserialize a msg_list to a nested message dict.
628
629 This is roughly the inverse of serialize. The serialize/unserialize
630 methods work with full message lists, whereas pack/unpack work with
631 the individual message parts in the message list.
632
624 633 Parameters:
625 634 -----------
626
635 msg_list : list of bytes or Message objects
636 The list of message parts of the form [HMAC,p_header,p_parent,
637 p_content,buffer1,buffer2,...].
627 638 content : bool (True)
628 whether to unpack the content dict (True),
629 or leave it serialized (False)
630
639 Whether to unpack the content dict (True), or leave it packed
640 (False).
631 641 copy : bool (True)
632 whether to return the bytes (True),
633 or the non-copying Message object in each place (False)
634
642 Whether to return the bytes (True), or the non-copying Message
643 object in each place (False).
644
645 Returns
646 -------
647 msg : dict
648 The nested message dict with top-level keys [header, parent_header,
649 content, buffers].
635 650 """
636 651 minlen = 4
637 652 message = {}
638 653 if not copy:
639 654 for i in range(minlen):
640 655 msg_list[i] = msg_list[i].bytes
641 656 if self.auth is not None:
642 657 signature = msg_list[0]
643 658 if signature in self.digest_history:
644 659 raise ValueError("Duplicate Signature: %r"%signature)
645 660 self.digest_history.add(signature)
646 661 check = self.sign(msg_list[1:4])
647 662 if not signature == check:
648 663 raise ValueError("Invalid Signature: %r"%signature)
649 664 if not len(msg_list) >= minlen:
650 665 raise TypeError("malformed message, must have at least %i elements"%minlen)
651 666 message['header'] = self.unpack(msg_list[1])
652 667 message['parent_header'] = self.unpack(msg_list[2])
653 668 if content:
654 669 message['content'] = self.unpack(msg_list[3])
655 670 else:
656 671 message['content'] = msg_list[3]
657 672
658 673 message['buffers'] = msg_list[4:]
659 674 return message
660 675
661 676 def test_msg2obj():
662 677 am = dict(x=1)
663 678 ao = Message(am)
664 679 assert ao.x == am['x']
665 680
666 681 am['y'] = dict(z=1)
667 682 ao = Message(am)
668 683 assert ao.y.z == am['y']['z']
669 684
670 685 k1, k2 = 'y', 'z'
671 686 assert ao[k1][k2] == am[k1][k2]
672 687
673 688 am2 = dict(ao)
674 689 assert am['x'] == am2['x']
675 690 assert am['y']['z'] == am2['y']['z']
676 691
General Comments 0
You need to be logged in to leave comments. Login now