##// END OF EJS Templates
restore msg_id/msg_type aliases in top level of msg dict...
MinRK -
Show More
@@ -1,697 +1,703 b''
1 1 """Session object for building, serializing, sending, and receiving messages in
2 2 IPython. The Session object supports serialization, HMAC signatures, and
3 3 metadata on messages.
4 4
5 5 Also defined here are utilities for working with Sessions:
6 6 * A SessionFactory to be used as a base class for configurables that work with
7 7 Sessions.
8 8 * A Message object for convenience that allows attribute-access to the msg dict.
9 9
10 10 Authors:
11 11
12 12 * Min RK
13 13 * Brian Granger
14 14 * Fernando Perez
15 15 """
16 16 #-----------------------------------------------------------------------------
17 17 # Copyright (C) 2010-2011 The IPython Development Team
18 18 #
19 19 # Distributed under the terms of the BSD License. The full license is in
20 20 # the file COPYING, distributed as part of this software.
21 21 #-----------------------------------------------------------------------------
22 22
23 23 #-----------------------------------------------------------------------------
24 24 # Imports
25 25 #-----------------------------------------------------------------------------
26 26
27 27 import hmac
28 28 import logging
29 29 import os
30 30 import pprint
31 31 import uuid
32 32 from datetime import datetime
33 33
34 34 try:
35 35 import cPickle
36 36 pickle = cPickle
37 37 except:
38 38 cPickle = None
39 39 import pickle
40 40
41 41 import zmq
42 42 from zmq.utils import jsonapi
43 43 from zmq.eventloop.ioloop import IOLoop
44 44 from zmq.eventloop.zmqstream import ZMQStream
45 45
46 46 from IPython.config.configurable import Configurable, LoggingConfigurable
47 47 from IPython.utils.importstring import import_item
48 48 from IPython.utils.jsonutil import extract_dates, squash_dates, date_default
49 49 from IPython.utils.traitlets import (CBytes, Unicode, Bool, Any, Instance, Set,
50 50 DottedObjectName)
51 51
52 52 #-----------------------------------------------------------------------------
53 53 # utility functions
54 54 #-----------------------------------------------------------------------------
55 55
56 56 def squash_unicode(obj):
57 57 """coerce unicode back to bytestrings."""
58 58 if isinstance(obj,dict):
59 59 for key in obj.keys():
60 60 obj[key] = squash_unicode(obj[key])
61 61 if isinstance(key, unicode):
62 62 obj[squash_unicode(key)] = obj.pop(key)
63 63 elif isinstance(obj, list):
64 64 for i,v in enumerate(obj):
65 65 obj[i] = squash_unicode(v)
66 66 elif isinstance(obj, unicode):
67 67 obj = obj.encode('utf8')
68 68 return obj
69 69
70 70 #-----------------------------------------------------------------------------
71 71 # globals and defaults
72 72 #-----------------------------------------------------------------------------
73 73 key = 'on_unknown' if jsonapi.jsonmod.__name__ == 'jsonlib' else 'default'
74 74 json_packer = lambda obj: jsonapi.dumps(obj, **{key:date_default})
75 75 json_unpacker = lambda s: extract_dates(jsonapi.loads(s))
76 76
77 77 pickle_packer = lambda o: pickle.dumps(o,-1)
78 78 pickle_unpacker = pickle.loads
79 79
80 80 default_packer = json_packer
81 81 default_unpacker = json_unpacker
82 82
83 83
84 84 DELIM=b"<IDS|MSG>"
85 85
86 86 #-----------------------------------------------------------------------------
87 87 # Classes
88 88 #-----------------------------------------------------------------------------
89 89
90 90 class SessionFactory(LoggingConfigurable):
91 91 """The Base class for configurables that have a Session, Context, logger,
92 92 and IOLoop.
93 93 """
94 94
95 95 logname = Unicode('')
96 96 def _logname_changed(self, name, old, new):
97 97 self.log = logging.getLogger(new)
98 98
99 99 # not configurable:
100 100 context = Instance('zmq.Context')
101 101 def _context_default(self):
102 102 return zmq.Context.instance()
103 103
104 104 session = Instance('IPython.zmq.session.Session')
105 105
106 106 loop = Instance('zmq.eventloop.ioloop.IOLoop', allow_none=False)
107 107 def _loop_default(self):
108 108 return IOLoop.instance()
109 109
110 110 def __init__(self, **kwargs):
111 111 super(SessionFactory, self).__init__(**kwargs)
112 112
113 113 if self.session is None:
114 114 # construct the session
115 115 self.session = Session(**kwargs)
116 116
117 117
118 118 class Message(object):
119 119 """A simple message object that maps dict keys to attributes.
120 120
121 121 A Message can be created from a dict and a dict from a Message instance
122 122 simply by calling dict(msg_obj)."""
123 123
124 124 def __init__(self, msg_dict):
125 125 dct = self.__dict__
126 126 for k, v in dict(msg_dict).iteritems():
127 127 if isinstance(v, dict):
128 128 v = Message(v)
129 129 dct[k] = v
130 130
131 131 # Having this iterator lets dict(msg_obj) work out of the box.
132 132 def __iter__(self):
133 133 return iter(self.__dict__.iteritems())
134 134
135 135 def __repr__(self):
136 136 return repr(self.__dict__)
137 137
138 138 def __str__(self):
139 139 return pprint.pformat(self.__dict__)
140 140
141 141 def __contains__(self, k):
142 142 return k in self.__dict__
143 143
144 144 def __getitem__(self, k):
145 145 return self.__dict__[k]
146 146
147 147
148 148 def msg_header(msg_id, msg_type, username, session):
149 149 date = datetime.now()
150 150 return locals()
151 151
152 152 def extract_header(msg_or_header):
153 153 """Given a message or header, return the header."""
154 154 if not msg_or_header:
155 155 return {}
156 156 try:
157 157 # See if msg_or_header is the entire message.
158 158 h = msg_or_header['header']
159 159 except KeyError:
160 160 try:
161 161 # See if msg_or_header is just the header
162 162 h = msg_or_header['msg_id']
163 163 except KeyError:
164 164 raise
165 165 else:
166 166 h = msg_or_header
167 167 if not isinstance(h, dict):
168 168 h = dict(h)
169 169 return h
170 170
171 171 class Session(Configurable):
172 172 """Object for handling serialization and sending of messages.
173 173
174 174 The Session object handles building messages and sending them
175 175 with ZMQ sockets or ZMQStream objects. Objects can communicate with each
176 176 other over the network via Session objects, and only need to work with the
177 177 dict-based IPython message spec. The Session will handle
178 178 serialization/deserialization, security, and metadata.
179 179
180 180 Sessions support configurable serialiization via packer/unpacker traits,
181 181 and signing with HMAC digests via the key/keyfile traits.
182 182
183 183 Parameters
184 184 ----------
185 185
186 186 debug : bool
187 187 whether to trigger extra debugging statements
188 188 packer/unpacker : str : 'json', 'pickle' or import_string
189 189 importstrings for methods to serialize message parts. If just
190 190 'json' or 'pickle', predefined JSON and pickle packers will be used.
191 191 Otherwise, the entire importstring must be used.
192 192
193 193 The functions must accept at least valid JSON input, and output *bytes*.
194 194
195 195 For example, to use msgpack:
196 196 packer = 'msgpack.packb', unpacker='msgpack.unpackb'
197 197 pack/unpack : callables
198 198 You can also set the pack/unpack callables for serialization directly.
199 199 session : bytes
200 200 the ID of this Session object. The default is to generate a new UUID.
201 201 username : unicode
202 202 username added to message headers. The default is to ask the OS.
203 203 key : bytes
204 204 The key used to initialize an HMAC signature. If unset, messages
205 205 will not be signed or checked.
206 206 keyfile : filepath
207 207 The file containing a key. If this is set, `key` will be initialized
208 208 to the contents of the file.
209 209
210 210 """
211 211
212 212 debug=Bool(False, config=True, help="""Debug output in the Session""")
213 213
214 214 packer = DottedObjectName('json',config=True,
215 215 help="""The name of the packer for serializing messages.
216 216 Should be one of 'json', 'pickle', or an import name
217 217 for a custom callable serializer.""")
218 218 def _packer_changed(self, name, old, new):
219 219 if new.lower() == 'json':
220 220 self.pack = json_packer
221 221 self.unpack = json_unpacker
222 222 elif new.lower() == 'pickle':
223 223 self.pack = pickle_packer
224 224 self.unpack = pickle_unpacker
225 225 else:
226 226 self.pack = import_item(str(new))
227 227
228 228 unpacker = DottedObjectName('json', config=True,
229 229 help="""The name of the unpacker for unserializing messages.
230 230 Only used with custom functions for `packer`.""")
231 231 def _unpacker_changed(self, name, old, new):
232 232 if new.lower() == 'json':
233 233 self.pack = json_packer
234 234 self.unpack = json_unpacker
235 235 elif new.lower() == 'pickle':
236 236 self.pack = pickle_packer
237 237 self.unpack = pickle_unpacker
238 238 else:
239 239 self.unpack = import_item(str(new))
240 240
241 241 session = CBytes(b'', config=True,
242 242 help="""The UUID identifying this session.""")
243 243 def _session_default(self):
244 244 return bytes(uuid.uuid4())
245 245
246 246 username = Unicode(os.environ.get('USER',u'username'), config=True,
247 247 help="""Username for the Session. Default is your system username.""")
248 248
249 249 # message signature related traits:
250 250 key = CBytes(b'', config=True,
251 251 help="""execution key, for extra authentication.""")
252 252 def _key_changed(self, name, old, new):
253 253 if new:
254 254 self.auth = hmac.HMAC(new)
255 255 else:
256 256 self.auth = None
257 257 auth = Instance(hmac.HMAC)
258 258 digest_history = Set()
259 259
260 260 keyfile = Unicode('', config=True,
261 261 help="""path to file containing execution key.""")
262 262 def _keyfile_changed(self, name, old, new):
263 263 with open(new, 'rb') as f:
264 264 self.key = f.read().strip()
265 265
266 266 pack = Any(default_packer) # the actual packer function
267 267 def _pack_changed(self, name, old, new):
268 268 if not callable(new):
269 269 raise TypeError("packer must be callable, not %s"%type(new))
270 270
271 271 unpack = Any(default_unpacker) # the actual packer function
272 272 def _unpack_changed(self, name, old, new):
273 273 # unpacker is not checked - it is assumed to be
274 274 if not callable(new):
275 275 raise TypeError("unpacker must be callable, not %s"%type(new))
276 276
277 277 def __init__(self, **kwargs):
278 278 """create a Session object
279 279
280 280 Parameters
281 281 ----------
282 282
283 283 debug : bool
284 284 whether to trigger extra debugging statements
285 285 packer/unpacker : str : 'json', 'pickle' or import_string
286 286 importstrings for methods to serialize message parts. If just
287 287 'json' or 'pickle', predefined JSON and pickle packers will be used.
288 288 Otherwise, the entire importstring must be used.
289 289
290 290 The functions must accept at least valid JSON input, and output
291 291 *bytes*.
292 292
293 293 For example, to use msgpack:
294 294 packer = 'msgpack.packb', unpacker='msgpack.unpackb'
295 295 pack/unpack : callables
296 296 You can also set the pack/unpack callables for serialization
297 297 directly.
298 298 session : bytes
299 299 the ID of this Session object. The default is to generate a new
300 300 UUID.
301 301 username : unicode
302 302 username added to message headers. The default is to ask the OS.
303 303 key : bytes
304 304 The key used to initialize an HMAC signature. If unset, messages
305 305 will not be signed or checked.
306 306 keyfile : filepath
307 307 The file containing a key. If this is set, `key` will be
308 308 initialized to the contents of the file.
309 309 """
310 310 super(Session, self).__init__(**kwargs)
311 311 self._check_packers()
312 312 self.none = self.pack({})
313 313
314 314 @property
315 315 def msg_id(self):
316 316 """always return new uuid"""
317 317 return str(uuid.uuid4())
318 318
319 319 def _check_packers(self):
320 320 """check packers for binary data and datetime support."""
321 321 pack = self.pack
322 322 unpack = self.unpack
323 323
324 324 # check simple serialization
325 325 msg = dict(a=[1,'hi'])
326 326 try:
327 327 packed = pack(msg)
328 328 except Exception:
329 329 raise ValueError("packer could not serialize a simple message")
330 330
331 331 # ensure packed message is bytes
332 332 if not isinstance(packed, bytes):
333 333 raise ValueError("message packed to %r, but bytes are required"%type(packed))
334 334
335 335 # check that unpack is pack's inverse
336 336 try:
337 337 unpacked = unpack(packed)
338 338 except Exception:
339 339 raise ValueError("unpacker could not handle the packer's output")
340 340
341 341 # check datetime support
342 342 msg = dict(t=datetime.now())
343 343 try:
344 344 unpacked = unpack(pack(msg))
345 345 except Exception:
346 346 self.pack = lambda o: pack(squash_dates(o))
347 347 self.unpack = lambda s: extract_dates(unpack(s))
348 348
349 349 def msg_header(self, msg_type):
350 350 return msg_header(self.msg_id, msg_type, self.username, self.session)
351 351
352 352 def msg(self, msg_type, content=None, parent=None, subheader=None, header=None):
353 353 """Return the nested message dict.
354 354
355 355 This format is different from what is sent over the wire. The
356 356 serialize/unserialize methods converts this nested message dict to the wire
357 357 format, which is a list of message parts.
358 358 """
359 359 msg = {}
360 msg['header'] = self.msg_header(msg_type) if header is None else header
360 header = self.msg_header(msg_type) if header is None else header
361 msg['header'] = header
362 msg['msg_id'] = header['msg_id']
363 msg['msg_type'] = header['msg_type']
361 364 msg['parent_header'] = {} if parent is None else extract_header(parent)
362 365 msg['content'] = {} if content is None else content
363 366 sub = {} if subheader is None else subheader
364 367 msg['header'].update(sub)
365 368 return msg
366 369
367 370 def sign(self, msg_list):
368 371 """Sign a message with HMAC digest. If no auth, return b''.
369 372
370 373 Parameters
371 374 ----------
372 375 msg_list : list
373 376 The [p_header,p_parent,p_content] part of the message list.
374 377 """
375 378 if self.auth is None:
376 379 return b''
377 380 h = self.auth.copy()
378 381 for m in msg_list:
379 382 h.update(m)
380 383 return h.hexdigest()
381 384
382 385 def serialize(self, msg, ident=None):
383 386 """Serialize the message components to bytes.
384 387
385 388 This is roughly the inverse of unserialize. The serialize/unserialize
386 389 methods work with full message lists, whereas pack/unpack work with
387 390 the individual message parts in the message list.
388 391
389 392 Parameters
390 393 ----------
391 394 msg : dict or Message
392 395 The nexted message dict as returned by the self.msg method.
393 396
394 397 Returns
395 398 -------
396 399 msg_list : list
397 400 The list of bytes objects to be sent with the format:
398 401 [ident1,ident2,...,DELIM,HMAC,p_header,p_parent,p_content,
399 402 buffer1,buffer2,...]. In this list, the p_* entities are
400 403 the packed or serialized versions, so if JSON is used, these
401 404 are uft8 encoded JSON strings.
402 405 """
403 406 content = msg.get('content', {})
404 407 if content is None:
405 408 content = self.none
406 409 elif isinstance(content, dict):
407 410 content = self.pack(content)
408 411 elif isinstance(content, bytes):
409 412 # content is already packed, as in a relayed message
410 413 pass
411 414 elif isinstance(content, unicode):
412 415 # should be bytes, but JSON often spits out unicode
413 416 content = content.encode('utf8')
414 417 else:
415 418 raise TypeError("Content incorrect type: %s"%type(content))
416 419
417 420 real_message = [self.pack(msg['header']),
418 421 self.pack(msg['parent_header']),
419 422 content
420 423 ]
421 424
422 425 to_send = []
423 426
424 427 if isinstance(ident, list):
425 428 # accept list of idents
426 429 to_send.extend(ident)
427 430 elif ident is not None:
428 431 to_send.append(ident)
429 432 to_send.append(DELIM)
430 433
431 434 signature = self.sign(real_message)
432 435 to_send.append(signature)
433 436
434 437 to_send.extend(real_message)
435 438
436 439 return to_send
437 440
438 441 def send(self, stream, msg_or_type, content=None, parent=None, ident=None,
439 442 buffers=None, subheader=None, track=False, header=None):
440 443 """Build and send a message via stream or socket.
441 444
442 445 The message format used by this function internally is as follows:
443 446
444 447 [ident1,ident2,...,DELIM,HMAC,p_header,p_parent,p_content,
445 448 buffer1,buffer2,...]
446 449
447 450 The serialize/unserialize methods convert the nested message dict into this
448 451 format.
449 452
450 453 Parameters
451 454 ----------
452 455
453 456 stream : zmq.Socket or ZMQStream
454 457 The socket-like object used to send the data.
455 458 msg_or_type : str or Message/dict
456 459 Normally, msg_or_type will be a msg_type unless a message is being
457 460 sent more than once. If a header is supplied, this can be set to
458 461 None and the msg_type will be pulled from the header.
459 462
460 463 content : dict or None
461 464 The content of the message (ignored if msg_or_type is a message).
462 465 header : dict or None
463 466 The header dict for the message (ignores if msg_to_type is a message).
464 467 parent : Message or dict or None
465 468 The parent or parent header describing the parent of this message
466 469 (ignored if msg_or_type is a message).
467 470 ident : bytes or list of bytes
468 471 The zmq.IDENTITY routing path.
469 472 subheader : dict or None
470 473 Extra header keys for this message's header (ignored if msg_or_type
471 474 is a message).
472 475 buffers : list or None
473 476 The already-serialized buffers to be appended to the message.
474 477 track : bool
475 478 Whether to track. Only for use with Sockets, because ZMQStream
476 479 objects cannot track messages.
477 480
478 481 Returns
479 482 -------
480 483 msg : dict
481 484 The constructed message.
482 485 (msg,tracker) : (dict, MessageTracker)
483 486 if track=True, then a 2-tuple will be returned,
484 487 the first element being the constructed
485 488 message, and the second being the MessageTracker
486 489
487 490 """
488 491
489 492 if not isinstance(stream, (zmq.Socket, ZMQStream)):
490 493 raise TypeError("stream must be Socket or ZMQStream, not %r"%type(stream))
491 494 elif track and isinstance(stream, ZMQStream):
492 495 raise TypeError("ZMQStream cannot track messages")
493 496
494 497 if isinstance(msg_or_type, (Message, dict)):
495 498 # We got a Message or message dict, not a msg_type so don't
496 499 # build a new Message.
497 500 msg = msg_or_type
498 501 else:
499 502 msg = self.msg(msg_or_type, content=content, parent=parent,
500 503 subheader=subheader, header=header)
501 504
502 505 buffers = [] if buffers is None else buffers
503 506 to_send = self.serialize(msg, ident)
504 507 flag = 0
505 508 if buffers:
506 509 flag = zmq.SNDMORE
507 510 _track = False
508 511 else:
509 512 _track=track
510 513 if track:
511 514 tracker = stream.send_multipart(to_send, flag, copy=False, track=_track)
512 515 else:
513 516 tracker = stream.send_multipart(to_send, flag, copy=False)
514 517 for b in buffers[:-1]:
515 518 stream.send(b, flag, copy=False)
516 519 if buffers:
517 520 if track:
518 521 tracker = stream.send(buffers[-1], copy=False, track=track)
519 522 else:
520 523 tracker = stream.send(buffers[-1], copy=False)
521 524
522 525 # omsg = Message(msg)
523 526 if self.debug:
524 527 pprint.pprint(msg)
525 528 pprint.pprint(to_send)
526 529 pprint.pprint(buffers)
527 530
528 531 msg['tracker'] = tracker
529 532
530 533 return msg
531 534
532 535 def send_raw(self, stream, msg_list, flags=0, copy=True, ident=None):
533 536 """Send a raw message via ident path.
534 537
535 538 This method is used to send a already serialized message.
536 539
537 540 Parameters
538 541 ----------
539 542 stream : ZMQStream or Socket
540 543 The ZMQ stream or socket to use for sending the message.
541 544 msg_list : list
542 545 The serialized list of messages to send. This only includes the
543 546 [p_header,p_parent,p_content,buffer1,buffer2,...] portion of
544 547 the message.
545 548 ident : ident or list
546 549 A single ident or a list of idents to use in sending.
547 550 """
548 551 to_send = []
549 552 if isinstance(ident, bytes):
550 553 ident = [ident]
551 554 if ident is not None:
552 555 to_send.extend(ident)
553 556
554 557 to_send.append(DELIM)
555 558 to_send.append(self.sign(msg_list))
556 559 to_send.extend(msg_list)
557 560 stream.send_multipart(msg_list, flags, copy=copy)
558 561
559 562 def recv(self, socket, mode=zmq.NOBLOCK, content=True, copy=True):
560 563 """Receive and unpack a message.
561 564
562 565 Parameters
563 566 ----------
564 567 socket : ZMQStream or Socket
565 568 The socket or stream to use in receiving.
566 569
567 570 Returns
568 571 -------
569 572 [idents], msg
570 573 [idents] is a list of idents and msg is a nested message dict of
571 574 same format as self.msg returns.
572 575 """
573 576 if isinstance(socket, ZMQStream):
574 577 socket = socket.socket
575 578 try:
576 579 msg_list = socket.recv_multipart(mode)
577 580 except zmq.ZMQError as e:
578 581 if e.errno == zmq.EAGAIN:
579 582 # We can convert EAGAIN to None as we know in this case
580 583 # recv_multipart won't return None.
581 584 return None,None
582 585 else:
583 586 raise
584 587 # split multipart message into identity list and message dict
585 588 # invalid large messages can cause very expensive string comparisons
586 589 idents, msg_list = self.feed_identities(msg_list, copy)
587 590 try:
588 591 return idents, self.unserialize(msg_list, content=content, copy=copy)
589 592 except Exception as e:
590 593 # TODO: handle it
591 594 raise e
592 595
593 596 def feed_identities(self, msg_list, copy=True):
594 597 """Split the identities from the rest of the message.
595 598
596 599 Feed until DELIM is reached, then return the prefix as idents and
597 600 remainder as msg_list. This is easily broken by setting an IDENT to DELIM,
598 601 but that would be silly.
599 602
600 603 Parameters
601 604 ----------
602 605 msg_list : a list of Message or bytes objects
603 606 The message to be split.
604 607 copy : bool
605 608 flag determining whether the arguments are bytes or Messages
606 609
607 610 Returns
608 611 -------
609 612 (idents, msg_list) : two lists
610 613 idents will always be a list of bytes, each of which is a ZMQ
611 614 identity. msg_list will be a list of bytes or zmq.Messages of the
612 615 form [HMAC,p_header,p_parent,p_content,buffer1,buffer2,...] and
613 616 should be unpackable/unserializable via self.unserialize at this
614 617 point.
615 618 """
616 619 if copy:
617 620 idx = msg_list.index(DELIM)
618 621 return msg_list[:idx], msg_list[idx+1:]
619 622 else:
620 623 failed = True
621 624 for idx,m in enumerate(msg_list):
622 625 if m.bytes == DELIM:
623 626 failed = False
624 627 break
625 628 if failed:
626 629 raise ValueError("DELIM not in msg_list")
627 630 idents, msg_list = msg_list[:idx], msg_list[idx+1:]
628 631 return [m.bytes for m in idents], msg_list
629 632
630 633 def unserialize(self, msg_list, content=True, copy=True):
631 634 """Unserialize a msg_list to a nested message dict.
632 635
633 636 This is roughly the inverse of serialize. The serialize/unserialize
634 637 methods work with full message lists, whereas pack/unpack work with
635 638 the individual message parts in the message list.
636 639
637 640 Parameters:
638 641 -----------
639 642 msg_list : list of bytes or Message objects
640 643 The list of message parts of the form [HMAC,p_header,p_parent,
641 644 p_content,buffer1,buffer2,...].
642 645 content : bool (True)
643 646 Whether to unpack the content dict (True), or leave it packed
644 647 (False).
645 648 copy : bool (True)
646 649 Whether to return the bytes (True), or the non-copying Message
647 650 object in each place (False).
648 651
649 652 Returns
650 653 -------
651 654 msg : dict
652 655 The nested message dict with top-level keys [header, parent_header,
653 656 content, buffers].
654 657 """
655 658 minlen = 4
656 659 message = {}
657 660 if not copy:
658 661 for i in range(minlen):
659 662 msg_list[i] = msg_list[i].bytes
660 663 if self.auth is not None:
661 664 signature = msg_list[0]
662 665 if not signature:
663 666 raise ValueError("Unsigned Message")
664 667 if signature in self.digest_history:
665 668 raise ValueError("Duplicate Signature: %r"%signature)
666 669 self.digest_history.add(signature)
667 670 check = self.sign(msg_list[1:4])
668 671 if not signature == check:
669 672 raise ValueError("Invalid Signature: %r"%signature)
670 673 if not len(msg_list) >= minlen:
671 674 raise TypeError("malformed message, must have at least %i elements"%minlen)
672 message['header'] = self.unpack(msg_list[1])
675 header = self.unpack(msg_list[1])
676 message['header'] = header
677 message['msg_id'] = header['msg_id']
678 message['msg_type'] = header['msg_type']
673 679 message['parent_header'] = self.unpack(msg_list[2])
674 680 if content:
675 681 message['content'] = self.unpack(msg_list[3])
676 682 else:
677 683 message['content'] = msg_list[3]
678 684
679 685 message['buffers'] = msg_list[4:]
680 686 return message
681 687
682 688 def test_msg2obj():
683 689 am = dict(x=1)
684 690 ao = Message(am)
685 691 assert ao.x == am['x']
686 692
687 693 am['y'] = dict(z=1)
688 694 ao = Message(am)
689 695 assert ao.y.z == am['y']['z']
690 696
691 697 k1, k2 = 'y', 'z'
692 698 assert ao[k1][k2] == am[k1][k2]
693 699
694 700 am2 = dict(ao)
695 701 assert am['x'] == am2['x']
696 702 assert am['y']['z'] == am2['y']['z']
697 703
@@ -1,177 +1,188 b''
1 1 """test building messages with streamsession"""
2 2
3 3 #-------------------------------------------------------------------------------
4 4 # Copyright (C) 2011 The IPython Development Team
5 5 #
6 6 # Distributed under the terms of the BSD License. The full license is in
7 7 # the file COPYING, distributed as part of this software.
8 8 #-------------------------------------------------------------------------------
9 9
10 10 #-------------------------------------------------------------------------------
11 11 # Imports
12 12 #-------------------------------------------------------------------------------
13 13
14 14 import os
15 15 import uuid
16 16 import zmq
17 17
18 18 from zmq.tests import BaseZMQTestCase
19 19 from zmq.eventloop.zmqstream import ZMQStream
20 20
21 21 from IPython.zmq import session as ss
22 22
23 23 class SessionTestCase(BaseZMQTestCase):
24 24
25 25 def setUp(self):
26 26 BaseZMQTestCase.setUp(self)
27 27 self.session = ss.Session()
28 28
29 29
30 30 class MockSocket(zmq.Socket):
31 31
32 32 def __init__(self, *args, **kwargs):
33 33 super(MockSocket,self).__init__(*args,**kwargs)
34 34 self.data = []
35 35
36 36 def send_multipart(self, msgparts, *args, **kwargs):
37 37 self.data.extend(msgparts)
38 38
39 39 def send(self, part, *args, **kwargs):
40 40 self.data.append(part)
41 41
42 42 def recv_multipart(self, *args, **kwargs):
43 43 return self.data
44 44
45 45 class TestSession(SessionTestCase):
46 46
47 47 def test_msg(self):
48 48 """message format"""
49 49 msg = self.session.msg('execute')
50 thekeys = set('header parent_header content'.split())
50 thekeys = set('header parent_header content msg_type msg_id'.split())
51 51 s = set(msg.keys())
52 52 self.assertEquals(s, thekeys)
53 53 self.assertTrue(isinstance(msg['content'],dict))
54 54 self.assertTrue(isinstance(msg['header'],dict))
55 55 self.assertTrue(isinstance(msg['parent_header'],dict))
56 self.assertTrue(isinstance(msg['msg_id'],str))
57 self.assertTrue(isinstance(msg['msg_type'],str))
56 58 self.assertEquals(msg['header']['msg_type'], 'execute')
59 self.assertEquals(msg['msg_type'], 'execute')
57 60
58 61 def test_serialize(self):
59 62 msg = self.session.msg('execute',content=dict(a=10))
60 63 msg_list = self.session.serialize(msg, ident=b'foo')
61 64 ident, msg_list = self.session.feed_identities(msg_list)
62 65 new_msg = self.session.unserialize(msg_list)
63 66 self.assertEquals(ident[0], b'foo')
67 self.assertEquals(new_msg['msg_id'],msg['msg_id'])
68 self.assertEquals(new_msg['msg_type'],msg['msg_type'])
64 69 self.assertEquals(new_msg['header'],msg['header'])
65 70 self.assertEquals(new_msg['content'],msg['content'])
66 71 self.assertEquals(new_msg['parent_header'],msg['parent_header'])
67 72
68 73 def test_send(self):
69 74 socket = MockSocket(zmq.Context.instance(),zmq.PAIR)
70 75
71 76 msg = self.session.msg('execute', content=dict(a=10))
72 77 self.session.send(socket, msg, ident=b'foo', buffers=[b'bar'])
73 78 ident, msg_list = self.session.feed_identities(socket.data)
74 79 new_msg = self.session.unserialize(msg_list)
75 80 self.assertEquals(ident[0], b'foo')
81 self.assertEquals(new_msg['msg_id'],msg['msg_id'])
82 self.assertEquals(new_msg['msg_type'],msg['msg_type'])
76 83 self.assertEquals(new_msg['header'],msg['header'])
77 84 self.assertEquals(new_msg['content'],msg['content'])
78 85 self.assertEquals(new_msg['parent_header'],msg['parent_header'])
79 86 self.assertEquals(new_msg['buffers'],[b'bar'])
80 87
81 88 socket.data = []
82 89
83 90 content = msg['content']
84 91 header = msg['header']
85 92 parent = msg['parent_header']
86 93 msg_type = header['msg_type']
87 94 self.session.send(socket, None, content=content, parent=parent,
88 95 header=header, ident=b'foo', buffers=[b'bar'])
89 96 ident, msg_list = self.session.feed_identities(socket.data)
90 97 new_msg = self.session.unserialize(msg_list)
91 98 self.assertEquals(ident[0], b'foo')
99 self.assertEquals(new_msg['msg_id'],msg['msg_id'])
100 self.assertEquals(new_msg['msg_type'],msg['msg_type'])
92 101 self.assertEquals(new_msg['header'],msg['header'])
93 102 self.assertEquals(new_msg['content'],msg['content'])
94 103 self.assertEquals(new_msg['parent_header'],msg['parent_header'])
95 104 self.assertEquals(new_msg['buffers'],[b'bar'])
96 105
97 106 socket.data = []
98 107
99 108 self.session.send(socket, msg, ident=b'foo', buffers=[b'bar'])
100 109 ident, new_msg = self.session.recv(socket)
101 110 self.assertEquals(ident[0], b'foo')
111 self.assertEquals(new_msg['msg_id'],msg['msg_id'])
112 self.assertEquals(new_msg['msg_type'],msg['msg_type'])
102 113 self.assertEquals(new_msg['header'],msg['header'])
103 114 self.assertEquals(new_msg['content'],msg['content'])
104 115 self.assertEquals(new_msg['parent_header'],msg['parent_header'])
105 116 self.assertEquals(new_msg['buffers'],[b'bar'])
106 117
107 118 socket.close()
108 119
109 120 def test_args(self):
110 121 """initialization arguments for Session"""
111 122 s = self.session
112 123 self.assertTrue(s.pack is ss.default_packer)
113 124 self.assertTrue(s.unpack is ss.default_unpacker)
114 125 self.assertEquals(s.username, os.environ.get('USER', u'username'))
115 126
116 127 s = ss.Session()
117 128 self.assertEquals(s.username, os.environ.get('USER', u'username'))
118 129
119 130 self.assertRaises(TypeError, ss.Session, pack='hi')
120 131 self.assertRaises(TypeError, ss.Session, unpack='hi')
121 132 u = str(uuid.uuid4())
122 133 s = ss.Session(username=u'carrot', session=u)
123 134 self.assertEquals(s.session, u)
124 135 self.assertEquals(s.username, u'carrot')
125 136
126 137 def test_tracking(self):
127 138 """test tracking messages"""
128 139 a,b = self.create_bound_pair(zmq.PAIR, zmq.PAIR)
129 140 s = self.session
130 141 stream = ZMQStream(a)
131 142 msg = s.send(a, 'hello', track=False)
132 143 self.assertTrue(msg['tracker'] is None)
133 144 msg = s.send(a, 'hello', track=True)
134 145 self.assertTrue(isinstance(msg['tracker'], zmq.MessageTracker))
135 146 M = zmq.Message(b'hi there', track=True)
136 147 msg = s.send(a, 'hello', buffers=[M], track=True)
137 148 t = msg['tracker']
138 149 self.assertTrue(isinstance(t, zmq.MessageTracker))
139 150 self.assertRaises(zmq.NotDone, t.wait, .1)
140 151 del M
141 152 t.wait(1) # this will raise
142 153
143 154
144 155 # def test_rekey(self):
145 156 # """rekeying dict around json str keys"""
146 157 # d = {'0': uuid.uuid4(), 0:uuid.uuid4()}
147 158 # self.assertRaises(KeyError, ss.rekey, d)
148 159 #
149 160 # d = {'0': uuid.uuid4(), 1:uuid.uuid4(), 'asdf':uuid.uuid4()}
150 161 # d2 = {0:d['0'],1:d[1],'asdf':d['asdf']}
151 162 # rd = ss.rekey(d)
152 163 # self.assertEquals(d2,rd)
153 164 #
154 165 # d = {'1.5':uuid.uuid4(),'1':uuid.uuid4()}
155 166 # d2 = {1.5:d['1.5'],1:d['1']}
156 167 # rd = ss.rekey(d)
157 168 # self.assertEquals(d2,rd)
158 169 #
159 170 # d = {'1.0':uuid.uuid4(),'1':uuid.uuid4()}
160 171 # self.assertRaises(KeyError, ss.rekey, d)
161 172 #
162 173 def test_unique_msg_ids(self):
163 174 """test that messages receive unique ids"""
164 175 ids = set()
165 176 for i in range(2**12):
166 177 h = self.session.msg_header('test')
167 178 msg_id = h['msg_id']
168 179 self.assertTrue(msg_id not in ids)
169 180 ids.add(msg_id)
170 181
171 182 def test_feed_identities(self):
172 183 """scrub the front for zmq IDENTITIES"""
173 184 theids = "engine client other".split()
174 185 content = dict(code='whoda',stuff=object())
175 186 themsg = self.session.msg('execute',content=content)
176 187 pmsg = theids
177 188
@@ -1,937 +1,941 b''
1 1 .. _messaging:
2 2
3 3 ======================
4 4 Messaging in IPython
5 5 ======================
6 6
7 7
8 8 Introduction
9 9 ============
10 10
11 11 This document explains the basic communications design and messaging
12 12 specification for how the various IPython objects interact over a network
13 13 transport. The current implementation uses the ZeroMQ_ library for messaging
14 14 within and between hosts.
15 15
16 16 .. Note::
17 17
18 18 This document should be considered the authoritative description of the
19 19 IPython messaging protocol, and all developers are strongly encouraged to
20 20 keep it updated as the implementation evolves, so that we have a single
21 21 common reference for all protocol details.
22 22
23 23 The basic design is explained in the following diagram:
24 24
25 25 .. image:: figs/frontend-kernel.png
26 26 :width: 450px
27 27 :alt: IPython kernel/frontend messaging architecture.
28 28 :align: center
29 29 :target: ../_images/frontend-kernel.png
30 30
31 31 A single kernel can be simultaneously connected to one or more frontends. The
32 32 kernel has three sockets that serve the following functions:
33 33
34 34 1. REQ: this socket is connected to a *single* frontend at a time, and it allows
35 35 the kernel to request input from a frontend when :func:`raw_input` is called.
36 36 The frontend holding the matching REP socket acts as a 'virtual keyboard'
37 37 for the kernel while this communication is happening (illustrated in the
38 38 figure by the black outline around the central keyboard). In practice,
39 39 frontends may display such kernel requests using a special input widget or
40 40 otherwise indicating that the user is to type input for the kernel instead
41 41 of normal commands in the frontend.
42 42
43 43 2. XREP: this single sockets allows multiple incoming connections from
44 44 frontends, and this is the socket where requests for code execution, object
45 45 information, prompts, etc. are made to the kernel by any frontend. The
46 46 communication on this socket is a sequence of request/reply actions from
47 47 each frontend and the kernel.
48 48
49 49 3. PUB: this socket is the 'broadcast channel' where the kernel publishes all
50 50 side effects (stdout, stderr, etc.) as well as the requests coming from any
51 51 client over the XREP socket and its own requests on the REP socket. There
52 52 are a number of actions in Python which generate side effects: :func:`print`
53 53 writes to ``sys.stdout``, errors generate tracebacks, etc. Additionally, in
54 54 a multi-client scenario, we want all frontends to be able to know what each
55 55 other has sent to the kernel (this can be useful in collaborative scenarios,
56 56 for example). This socket allows both side effects and the information
57 57 about communications taking place with one client over the XREQ/XREP channel
58 58 to be made available to all clients in a uniform manner.
59 59
60 60 All messages are tagged with enough information (details below) for clients
61 61 to know which messages come from their own interaction with the kernel and
62 62 which ones are from other clients, so they can display each type
63 63 appropriately.
64 64
65 65 The actual format of the messages allowed on each of these channels is
66 66 specified below. Messages are dicts of dicts with string keys and values that
67 67 are reasonably representable in JSON. Our current implementation uses JSON
68 68 explicitly as its message format, but this shouldn't be considered a permanent
69 69 feature. As we've discovered that JSON has non-trivial performance issues due
70 70 to excessive copying, we may in the future move to a pure pickle-based raw
71 71 message format. However, it should be possible to easily convert from the raw
72 72 objects to JSON, since we may have non-python clients (e.g. a web frontend).
73 73 As long as it's easy to make a JSON version of the objects that is a faithful
74 74 representation of all the data, we can communicate with such clients.
75 75
76 76 .. Note::
77 77
78 78 Not all of these have yet been fully fleshed out, but the key ones are, see
79 79 kernel and frontend files for actual implementation details.
80 80
81 81
82 82 Python functional API
83 83 =====================
84 84
85 85 As messages are dicts, they map naturally to a ``func(**kw)`` call form. We
86 86 should develop, at a few key points, functional forms of all the requests that
87 87 take arguments in this manner and automatically construct the necessary dict
88 88 for sending.
89 89
90 90
91 91 General Message Format
92 92 ======================
93 93
94 94 All messages send or received by any IPython process should have the following
95 95 generic structure::
96 96
97 97 {
98 98 # The message header contains a pair of unique identifiers for the
99 99 # originating session and the actual message id, in addition to the
100 100 # username for the process that generated the message. This is useful in
101 101 # collaborative settings where multiple users may be interacting with the
102 102 # same kernel simultaneously, so that frontends can label the various
103 103 # messages in a meaningful way.
104 104 'header' : {
105 105 'msg_id' : uuid,
106 106 'username' : str,
107 107 'session' : uuid
108 108 # All recognized message type strings are listed below.
109 109 'msg_type' : str,
110 110 },
111 # The msg's unique identifier and type are stored in the header, but
112 # are also accessible at the top-level for convenience.
113 'msg_id' : uuid,
114 'msg_type' : str,
111 115
112 116 # In a chain of messages, the header from the parent is copied so that
113 117 # clients can track where messages come from.
114 118 'parent_header' : dict,
115 119
116 120 # The actual content of the message must be a dict, whose structure
117 121 # depends on the message type.x
118 122 'content' : dict,
119 123 }
120 124
121 125 For each message type, the actual content will differ and all existing message
122 126 types are specified in what follows of this document.
123 127
124 128
125 129 Messages on the XREP/XREQ socket
126 130 ================================
127 131
128 132 .. _execute:
129 133
130 134 Execute
131 135 -------
132 136
133 137 This message type is used by frontends to ask the kernel to execute code on
134 138 behalf of the user, in a namespace reserved to the user's variables (and thus
135 139 separate from the kernel's own internal code and variables).
136 140
137 141 Message type: ``execute_request``::
138 142
139 143 content = {
140 144 # Source code to be executed by the kernel, one or more lines.
141 145 'code' : str,
142 146
143 147 # A boolean flag which, if True, signals the kernel to execute this
144 148 # code as quietly as possible. This means that the kernel will compile
145 149 # the code witIPython/core/tests/h 'exec' instead of 'single' (so
146 150 # sys.displayhook will not fire), and will *not*:
147 151 # - broadcast exceptions on the PUB socket
148 152 # - do any logging
149 153 # - populate any history
150 154 #
151 155 # The default is False.
152 156 'silent' : bool,
153 157
154 158 # A list of variable names from the user's namespace to be retrieved. What
155 159 # returns is a JSON string of the variable's repr(), not a python object.
156 160 'user_variables' : list,
157 161
158 162 # Similarly, a dict mapping names to expressions to be evaluated in the
159 163 # user's dict.
160 164 'user_expressions' : dict,
161 165 }
162 166
163 167 The ``code`` field contains a single string (possibly multiline). The kernel
164 168 is responsible for splitting this into one or more independent execution blocks
165 169 and deciding whether to compile these in 'single' or 'exec' mode (see below for
166 170 detailed execution semantics).
167 171
168 172 The ``user_`` fields deserve a detailed explanation. In the past, IPython had
169 173 the notion of a prompt string that allowed arbitrary code to be evaluated, and
170 174 this was put to good use by many in creating prompts that displayed system
171 175 status, path information, and even more esoteric uses like remote instrument
172 176 status aqcuired over the network. But now that IPython has a clean separation
173 177 between the kernel and the clients, the kernel has no prompt knowledge; prompts
174 178 are a frontend-side feature, and it should be even possible for different
175 179 frontends to display different prompts while interacting with the same kernel.
176 180
177 181 The kernel now provides the ability to retrieve data from the user's namespace
178 182 after the execution of the main ``code``, thanks to two fields in the
179 183 ``execute_request`` message:
180 184
181 185 - ``user_variables``: If only variables from the user's namespace are needed, a
182 186 list of variable names can be passed and a dict with these names as keys and
183 187 their :func:`repr()` as values will be returned.
184 188
185 189 - ``user_expressions``: For more complex expressions that require function
186 190 evaluations, a dict can be provided with string keys and arbitrary python
187 191 expressions as values. The return message will contain also a dict with the
188 192 same keys and the :func:`repr()` of the evaluated expressions as value.
189 193
190 194 With this information, frontends can display any status information they wish
191 195 in the form that best suits each frontend (a status line, a popup, inline for a
192 196 terminal, etc).
193 197
194 198 .. Note::
195 199
196 200 In order to obtain the current execution counter for the purposes of
197 201 displaying input prompts, frontends simply make an execution request with an
198 202 empty code string and ``silent=True``.
199 203
200 204 Execution semantics
201 205 ~~~~~~~~~~~~~~~~~~~
202 206
203 207 When the silent flag is false, the execution of use code consists of the
204 208 following phases (in silent mode, only the ``code`` field is executed):
205 209
206 210 1. Run the ``pre_runcode_hook``.
207 211
208 212 2. Execute the ``code`` field, see below for details.
209 213
210 214 3. If #2 succeeds, compute ``user_variables`` and ``user_expressions`` are
211 215 computed. This ensures that any error in the latter don't harm the main
212 216 code execution.
213 217
214 218 4. Call any method registered with :meth:`register_post_execute`.
215 219
216 220 .. warning::
217 221
218 222 The API for running code before/after the main code block is likely to
219 223 change soon. Both the ``pre_runcode_hook`` and the
220 224 :meth:`register_post_execute` are susceptible to modification, as we find a
221 225 consistent model for both.
222 226
223 227 To understand how the ``code`` field is executed, one must know that Python
224 228 code can be compiled in one of three modes (controlled by the ``mode`` argument
225 229 to the :func:`compile` builtin):
226 230
227 231 *single*
228 232 Valid for a single interactive statement (though the source can contain
229 233 multiple lines, such as a for loop). When compiled in this mode, the
230 234 generated bytecode contains special instructions that trigger the calling of
231 235 :func:`sys.displayhook` for any expression in the block that returns a value.
232 236 This means that a single statement can actually produce multiple calls to
233 237 :func:`sys.displayhook`, if for example it contains a loop where each
234 238 iteration computes an unassigned expression would generate 10 calls::
235 239
236 240 for i in range(10):
237 241 i**2
238 242
239 243 *exec*
240 244 An arbitrary amount of source code, this is how modules are compiled.
241 245 :func:`sys.displayhook` is *never* implicitly called.
242 246
243 247 *eval*
244 248 A single expression that returns a value. :func:`sys.displayhook` is *never*
245 249 implicitly called.
246 250
247 251
248 252 The ``code`` field is split into individual blocks each of which is valid for
249 253 execution in 'single' mode, and then:
250 254
251 255 - If there is only a single block: it is executed in 'single' mode.
252 256
253 257 - If there is more than one block:
254 258
255 259 * if the last one is a single line long, run all but the last in 'exec' mode
256 260 and the very last one in 'single' mode. This makes it easy to type simple
257 261 expressions at the end to see computed values.
258 262
259 263 * if the last one is no more than two lines long, run all but the last in
260 264 'exec' mode and the very last one in 'single' mode. This makes it easy to
261 265 type simple expressions at the end to see computed values. - otherwise
262 266 (last one is also multiline), run all in 'exec' mode
263 267
264 268 * otherwise (last one is also multiline), run all in 'exec' mode as a single
265 269 unit.
266 270
267 271 Any error in retrieving the ``user_variables`` or evaluating the
268 272 ``user_expressions`` will result in a simple error message in the return fields
269 273 of the form::
270 274
271 275 [ERROR] ExceptionType: Exception message
272 276
273 277 The user can simply send the same variable name or expression for evaluation to
274 278 see a regular traceback.
275 279
276 280 Errors in any registered post_execute functions are also reported similarly,
277 281 and the failing function is removed from the post_execution set so that it does
278 282 not continue triggering failures.
279 283
280 284 Upon completion of the execution request, the kernel *always* sends a reply,
281 285 with a status code indicating what happened and additional data depending on
282 286 the outcome. See :ref:`below <execution_results>` for the possible return
283 287 codes and associated data.
284 288
285 289
286 290 Execution counter (old prompt number)
287 291 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
288 292
289 293 The kernel has a single, monotonically increasing counter of all execution
290 294 requests that are made with ``silent=False``. This counter is used to populate
291 295 the ``In[n]``, ``Out[n]`` and ``_n`` variables, so clients will likely want to
292 296 display it in some form to the user, which will typically (but not necessarily)
293 297 be done in the prompts. The value of this counter will be returned as the
294 298 ``execution_count`` field of all ``execute_reply`` messages.
295 299
296 300 .. _execution_results:
297 301
298 302 Execution results
299 303 ~~~~~~~~~~~~~~~~~
300 304
301 305 Message type: ``execute_reply``::
302 306
303 307 content = {
304 308 # One of: 'ok' OR 'error' OR 'abort'
305 309 'status' : str,
306 310
307 311 # The global kernel counter that increases by one with each non-silent
308 312 # executed request. This will typically be used by clients to display
309 313 # prompt numbers to the user. If the request was a silent one, this will
310 314 # be the current value of the counter in the kernel.
311 315 'execution_count' : int,
312 316 }
313 317
314 318 When status is 'ok', the following extra fields are present::
315 319
316 320 {
317 321 # The execution payload is a dict with string keys that may have been
318 322 # produced by the code being executed. It is retrieved by the kernel at
319 323 # the end of the execution and sent back to the front end, which can take
320 324 # action on it as needed. See main text for further details.
321 325 'payload' : dict,
322 326
323 327 # Results for the user_variables and user_expressions.
324 328 'user_variables' : dict,
325 329 'user_expressions' : dict,
326 330
327 331 # The kernel will often transform the input provided to it. If the
328 332 # '---->' transform had been applied, this is filled, otherwise it's the
329 333 # empty string. So transformations like magics don't appear here, only
330 334 # autocall ones.
331 335 'transformed_code' : str,
332 336 }
333 337
334 338 .. admonition:: Execution payloads
335 339
336 340 The notion of an 'execution payload' is different from a return value of a
337 341 given set of code, which normally is just displayed on the pyout stream
338 342 through the PUB socket. The idea of a payload is to allow special types of
339 343 code, typically magics, to populate a data container in the IPython kernel
340 344 that will be shipped back to the caller via this channel. The kernel will
341 345 have an API for this, probably something along the lines of::
342 346
343 347 ip.exec_payload_add(key, value)
344 348
345 349 though this API is still in the design stages. The data returned in this
346 350 payload will allow frontends to present special views of what just happened.
347 351
348 352
349 353 When status is 'error', the following extra fields are present::
350 354
351 355 {
352 356 'exc_name' : str, # Exception name, as a string
353 357 'exc_value' : str, # Exception value, as a string
354 358
355 359 # The traceback will contain a list of frames, represented each as a
356 360 # string. For now we'll stick to the existing design of ultraTB, which
357 361 # controls exception level of detail statefully. But eventually we'll
358 362 # want to grow into a model where more information is collected and
359 363 # packed into the traceback object, with clients deciding how little or
360 364 # how much of it to unpack. But for now, let's start with a simple list
361 365 # of strings, since that requires only minimal changes to ultratb as
362 366 # written.
363 367 'traceback' : list,
364 368 }
365 369
366 370
367 371 When status is 'abort', there are for now no additional data fields. This
368 372 happens when the kernel was interrupted by a signal.
369 373
370 374 Kernel attribute access
371 375 -----------------------
372 376
373 377 .. warning::
374 378
375 379 This part of the messaging spec is not actually implemented in the kernel
376 380 yet.
377 381
378 382 While this protocol does not specify full RPC access to arbitrary methods of
379 383 the kernel object, the kernel does allow read (and in some cases write) access
380 384 to certain attributes.
381 385
382 386 The policy for which attributes can be read is: any attribute of the kernel, or
383 387 its sub-objects, that belongs to a :class:`Configurable` object and has been
384 388 declared at the class-level with Traits validation, is in principle accessible
385 389 as long as its name does not begin with a leading underscore. The attribute
386 390 itself will have metadata indicating whether it allows remote read and/or write
387 391 access. The message spec follows for attribute read and write requests.
388 392
389 393 Message type: ``getattr_request``::
390 394
391 395 content = {
392 396 # The (possibly dotted) name of the attribute
393 397 'name' : str,
394 398 }
395 399
396 400 When a ``getattr_request`` fails, there are two possible error types:
397 401
398 402 - AttributeError: this type of error was raised when trying to access the
399 403 given name by the kernel itself. This means that the attribute likely
400 404 doesn't exist.
401 405
402 406 - AccessError: the attribute exists but its value is not readable remotely.
403 407
404 408
405 409 Message type: ``getattr_reply``::
406 410
407 411 content = {
408 412 # One of ['ok', 'AttributeError', 'AccessError'].
409 413 'status' : str,
410 414 # If status is 'ok', a JSON object.
411 415 'value' : object,
412 416 }
413 417
414 418 Message type: ``setattr_request``::
415 419
416 420 content = {
417 421 # The (possibly dotted) name of the attribute
418 422 'name' : str,
419 423
420 424 # A JSON-encoded object, that will be validated by the Traits
421 425 # information in the kernel
422 426 'value' : object,
423 427 }
424 428
425 429 When a ``setattr_request`` fails, there are also two possible error types with
426 430 similar meanings as those of the ``getattr_request`` case, but for writing.
427 431
428 432 Message type: ``setattr_reply``::
429 433
430 434 content = {
431 435 # One of ['ok', 'AttributeError', 'AccessError'].
432 436 'status' : str,
433 437 }
434 438
435 439
436 440
437 441 Object information
438 442 ------------------
439 443
440 444 One of IPython's most used capabilities is the introspection of Python objects
441 445 in the user's namespace, typically invoked via the ``?`` and ``??`` characters
442 446 (which in reality are shorthands for the ``%pinfo`` magic). This is used often
443 447 enough that it warrants an explicit message type, especially because frontends
444 448 may want to get object information in response to user keystrokes (like Tab or
445 449 F1) besides from the user explicitly typing code like ``x??``.
446 450
447 451 Message type: ``object_info_request``::
448 452
449 453 content = {
450 454 # The (possibly dotted) name of the object to be searched in all
451 455 # relevant namespaces
452 456 'name' : str,
453 457
454 458 # The level of detail desired. The default (0) is equivalent to typing
455 459 # 'x?' at the prompt, 1 is equivalent to 'x??'.
456 460 'detail_level' : int,
457 461 }
458 462
459 463 The returned information will be a dictionary with keys very similar to the
460 464 field names that IPython prints at the terminal.
461 465
462 466 Message type: ``object_info_reply``::
463 467
464 468 content = {
465 469 # The name the object was requested under
466 470 'name' : str,
467 471
468 472 # Boolean flag indicating whether the named object was found or not. If
469 473 # it's false, all other fields will be empty.
470 474 'found' : bool,
471 475
472 476 # Flags for magics and system aliases
473 477 'ismagic' : bool,
474 478 'isalias' : bool,
475 479
476 480 # The name of the namespace where the object was found ('builtin',
477 481 # 'magics', 'alias', 'interactive', etc.)
478 482 'namespace' : str,
479 483
480 484 # The type name will be type.__name__ for normal Python objects, but it
481 485 # can also be a string like 'Magic function' or 'System alias'
482 486 'type_name' : str,
483 487
484 488 # The string form of the object, possibly truncated for length if
485 489 # detail_level is 0
486 490 'string_form' : str,
487 491
488 492 # For objects with a __class__ attribute this will be set
489 493 'base_class' : str,
490 494
491 495 # For objects with a __len__ attribute this will be set
492 496 'length' : int,
493 497
494 498 # If the object is a function, class or method whose file we can find,
495 499 # we give its full path
496 500 'file' : str,
497 501
498 502 # For pure Python callable objects, we can reconstruct the object
499 503 # definition line which provides its call signature. For convenience this
500 504 # is returned as a single 'definition' field, but below the raw parts that
501 505 # compose it are also returned as the argspec field.
502 506 'definition' : str,
503 507
504 508 # The individual parts that together form the definition string. Clients
505 509 # with rich display capabilities may use this to provide a richer and more
506 510 # precise representation of the definition line (e.g. by highlighting
507 511 # arguments based on the user's cursor position). For non-callable
508 512 # objects, this field is empty.
509 513 'argspec' : { # The names of all the arguments
510 514 args : list,
511 515 # The name of the varargs (*args), if any
512 516 varargs : str,
513 517 # The name of the varkw (**kw), if any
514 518 varkw : str,
515 519 # The values (as strings) of all default arguments. Note
516 520 # that these must be matched *in reverse* with the 'args'
517 521 # list above, since the first positional args have no default
518 522 # value at all.
519 523 defaults : list,
520 524 },
521 525
522 526 # For instances, provide the constructor signature (the definition of
523 527 # the __init__ method):
524 528 'init_definition' : str,
525 529
526 530 # Docstrings: for any object (function, method, module, package) with a
527 531 # docstring, we show it. But in addition, we may provide additional
528 532 # docstrings. For example, for instances we will show the constructor
529 533 # and class docstrings as well, if available.
530 534 'docstring' : str,
531 535
532 536 # For instances, provide the constructor and class docstrings
533 537 'init_docstring' : str,
534 538 'class_docstring' : str,
535 539
536 540 # If it's a callable object whose call method has a separate docstring and
537 541 # definition line:
538 542 'call_def' : str,
539 543 'call_docstring' : str,
540 544
541 545 # If detail_level was 1, we also try to find the source code that
542 546 # defines the object, if possible. The string 'None' will indicate
543 547 # that no source was found.
544 548 'source' : str,
545 549 }
546 550 '
547 551
548 552 Complete
549 553 --------
550 554
551 555 Message type: ``complete_request``::
552 556
553 557 content = {
554 558 # The text to be completed, such as 'a.is'
555 559 'text' : str,
556 560
557 561 # The full line, such as 'print a.is'. This allows completers to
558 562 # make decisions that may require information about more than just the
559 563 # current word.
560 564 'line' : str,
561 565
562 566 # The entire block of text where the line is. This may be useful in the
563 567 # case of multiline completions where more context may be needed. Note: if
564 568 # in practice this field proves unnecessary, remove it to lighten the
565 569 # messages.
566 570
567 571 'block' : str,
568 572
569 573 # The position of the cursor where the user hit 'TAB' on the line.
570 574 'cursor_pos' : int,
571 575 }
572 576
573 577 Message type: ``complete_reply``::
574 578
575 579 content = {
576 580 # The list of all matches to the completion request, such as
577 581 # ['a.isalnum', 'a.isalpha'] for the above example.
578 582 'matches' : list
579 583 }
580 584
581 585
582 586 History
583 587 -------
584 588
585 589 For clients to explicitly request history from a kernel. The kernel has all
586 590 the actual execution history stored in a single location, so clients can
587 591 request it from the kernel when needed.
588 592
589 593 Message type: ``history_request``::
590 594
591 595 content = {
592 596
593 597 # If True, also return output history in the resulting dict.
594 598 'output' : bool,
595 599
596 600 # If True, return the raw input history, else the transformed input.
597 601 'raw' : bool,
598 602
599 603 # So far, this can be 'range', 'tail' or 'search'.
600 604 'hist_access_type' : str,
601 605
602 606 # If hist_access_type is 'range', get a range of input cells. session can
603 607 # be a positive session number, or a negative number to count back from
604 608 # the current session.
605 609 'session' : int,
606 610 # start and stop are line numbers within that session.
607 611 'start' : int,
608 612 'stop' : int,
609 613
610 614 # If hist_access_type is 'tail', get the last n cells.
611 615 'n' : int,
612 616
613 617 # If hist_access_type is 'search', get cells matching the specified glob
614 618 # pattern (with * and ? as wildcards).
615 619 'pattern' : str,
616 620
617 621 }
618 622
619 623 Message type: ``history_reply``::
620 624
621 625 content = {
622 626 # A list of 3 tuples, either:
623 627 # (session, line_number, input) or
624 628 # (session, line_number, (input, output)),
625 629 # depending on whether output was False or True, respectively.
626 630 'history' : list,
627 631 }
628 632
629 633
630 634 Connect
631 635 -------
632 636
633 637 When a client connects to the request/reply socket of the kernel, it can issue
634 638 a connect request to get basic information about the kernel, such as the ports
635 639 the other ZeroMQ sockets are listening on. This allows clients to only have
636 640 to know about a single port (the XREQ/XREP channel) to connect to a kernel.
637 641
638 642 Message type: ``connect_request``::
639 643
640 644 content = {
641 645 }
642 646
643 647 Message type: ``connect_reply``::
644 648
645 649 content = {
646 650 'xrep_port' : int # The port the XREP socket is listening on.
647 651 'pub_port' : int # The port the PUB socket is listening on.
648 652 'req_port' : int # The port the REQ socket is listening on.
649 653 'hb_port' : int # The port the heartbeat socket is listening on.
650 654 }
651 655
652 656
653 657
654 658 Kernel shutdown
655 659 ---------------
656 660
657 661 The clients can request the kernel to shut itself down; this is used in
658 662 multiple cases:
659 663
660 664 - when the user chooses to close the client application via a menu or window
661 665 control.
662 666 - when the user types 'exit' or 'quit' (or their uppercase magic equivalents).
663 667 - when the user chooses a GUI method (like the 'Ctrl-C' shortcut in the
664 668 IPythonQt client) to force a kernel restart to get a clean kernel without
665 669 losing client-side state like history or inlined figures.
666 670
667 671 The client sends a shutdown request to the kernel, and once it receives the
668 672 reply message (which is otherwise empty), it can assume that the kernel has
669 673 completed shutdown safely.
670 674
671 675 Upon their own shutdown, client applications will typically execute a last
672 676 minute sanity check and forcefully terminate any kernel that is still alive, to
673 677 avoid leaving stray processes in the user's machine.
674 678
675 679 For both shutdown request and reply, there is no actual content that needs to
676 680 be sent, so the content dict is empty.
677 681
678 682 Message type: ``shutdown_request``::
679 683
680 684 content = {
681 685 'restart' : bool # whether the shutdown is final, or precedes a restart
682 686 }
683 687
684 688 Message type: ``shutdown_reply``::
685 689
686 690 content = {
687 691 'restart' : bool # whether the shutdown is final, or precedes a restart
688 692 }
689 693
690 694 .. Note::
691 695
692 696 When the clients detect a dead kernel thanks to inactivity on the heartbeat
693 697 socket, they simply send a forceful process termination signal, since a dead
694 698 process is unlikely to respond in any useful way to messages.
695 699
696 700
697 701 Messages on the PUB/SUB socket
698 702 ==============================
699 703
700 704 Streams (stdout, stderr, etc)
701 705 ------------------------------
702 706
703 707 Message type: ``stream``::
704 708
705 709 content = {
706 710 # The name of the stream is one of 'stdin', 'stdout', 'stderr'
707 711 'name' : str,
708 712
709 713 # The data is an arbitrary string to be written to that stream
710 714 'data' : str,
711 715 }
712 716
713 717 When a kernel receives a raw_input call, it should also broadcast it on the pub
714 718 socket with the names 'stdin' and 'stdin_reply'. This will allow other clients
715 719 to monitor/display kernel interactions and possibly replay them to their user
716 720 or otherwise expose them.
717 721
718 722 Display Data
719 723 ------------
720 724
721 725 This type of message is used to bring back data that should be diplayed (text,
722 726 html, svg, etc.) in the frontends. This data is published to all frontends.
723 727 Each message can have multiple representations of the data; it is up to the
724 728 frontend to decide which to use and how. A single message should contain all
725 729 possible representations of the same information. Each representation should
726 730 be a JSON'able data structure, and should be a valid MIME type.
727 731
728 732 Some questions remain about this design:
729 733
730 734 * Do we use this message type for pyout/displayhook? Probably not, because
731 735 the displayhook also has to handle the Out prompt display. On the other hand
732 736 we could put that information into the metadata secion.
733 737
734 738 Message type: ``display_data``::
735 739
736 740 content = {
737 741
738 742 # Who create the data
739 743 'source' : str,
740 744
741 745 # The data dict contains key/value pairs, where the kids are MIME
742 746 # types and the values are the raw data of the representation in that
743 747 # format. The data dict must minimally contain the ``text/plain``
744 748 # MIME type which is used as a backup representation.
745 749 'data' : dict,
746 750
747 751 # Any metadata that describes the data
748 752 'metadata' : dict
749 753 }
750 754
751 755 Python inputs
752 756 -------------
753 757
754 758 These messages are the re-broadcast of the ``execute_request``.
755 759
756 760 Message type: ``pyin``::
757 761
758 762 content = {
759 763 'code' : str # Source code to be executed, one or more lines
760 764 }
761 765
762 766 Python outputs
763 767 --------------
764 768
765 769 When Python produces output from code that has been compiled in with the
766 770 'single' flag to :func:`compile`, any expression that produces a value (such as
767 771 ``1+1``) is passed to ``sys.displayhook``, which is a callable that can do with
768 772 this value whatever it wants. The default behavior of ``sys.displayhook`` in
769 773 the Python interactive prompt is to print to ``sys.stdout`` the :func:`repr` of
770 774 the value as long as it is not ``None`` (which isn't printed at all). In our
771 775 case, the kernel instantiates as ``sys.displayhook`` an object which has
772 776 similar behavior, but which instead of printing to stdout, broadcasts these
773 777 values as ``pyout`` messages for clients to display appropriately.
774 778
775 779 IPython's displayhook can handle multiple simultaneous formats depending on its
776 780 configuration. The default pretty-printed repr text is always given with the
777 781 ``data`` entry in this message. Any other formats are provided in the
778 782 ``extra_formats`` list. Frontends are free to display any or all of these
779 783 according to its capabilities. ``extra_formats`` list contains 3-tuples of an ID
780 784 string, a type string, and the data. The ID is unique to the formatter
781 785 implementation that created the data. Frontends will typically ignore the ID
782 786 unless if it has requested a particular formatter. The type string tells the
783 787 frontend how to interpret the data. It is often, but not always a MIME type.
784 788 Frontends should ignore types that it does not understand. The data itself is
785 789 any JSON object and depends on the format. It is often, but not always a string.
786 790
787 791 Message type: ``pyout``::
788 792
789 793 content = {
790 794
791 795 # The counter for this execution is also provided so that clients can
792 796 # display it, since IPython automatically creates variables called _N
793 797 # (for prompt N).
794 798 'execution_count' : int,
795 799
796 800 # The data dict contains key/value pairs, where the kids are MIME
797 801 # types and the values are the raw data of the representation in that
798 802 # format. The data dict must minimally contain the ``text/plain``
799 803 # MIME type which is used as a backup representation.
800 804 'data' : dict,
801 805
802 806 }
803 807
804 808 Python errors
805 809 -------------
806 810
807 811 When an error occurs during code execution
808 812
809 813 Message type: ``pyerr``::
810 814
811 815 content = {
812 816 # Similar content to the execute_reply messages for the 'error' case,
813 817 # except the 'status' field is omitted.
814 818 }
815 819
816 820 Kernel status
817 821 -------------
818 822
819 823 This message type is used by frontends to monitor the status of the kernel.
820 824
821 825 Message type: ``status``::
822 826
823 827 content = {
824 828 # When the kernel starts to execute code, it will enter the 'busy'
825 829 # state and when it finishes, it will enter the 'idle' state.
826 830 execution_state : ('busy', 'idle')
827 831 }
828 832
829 833 Kernel crashes
830 834 --------------
831 835
832 836 When the kernel has an unexpected exception, caught by the last-resort
833 837 sys.excepthook, we should broadcast the crash handler's output before exiting.
834 838 This will allow clients to notice that a kernel died, inform the user and
835 839 propose further actions.
836 840
837 841 Message type: ``crash``::
838 842
839 843 content = {
840 844 # Similarly to the 'error' case for execute_reply messages, this will
841 845 # contain exc_name, exc_type and traceback fields.
842 846
843 847 # An additional field with supplementary information such as where to
844 848 # send the crash message
845 849 'info' : str,
846 850 }
847 851
848 852
849 853 Future ideas
850 854 ------------
851 855
852 856 Other potential message types, currently unimplemented, listed below as ideas.
853 857
854 858 Message type: ``file``::
855 859
856 860 content = {
857 861 'path' : 'cool.jpg',
858 862 'mimetype' : str,
859 863 'data' : str,
860 864 }
861 865
862 866
863 867 Messages on the REQ/REP socket
864 868 ==============================
865 869
866 870 This is a socket that goes in the opposite direction: from the kernel to a
867 871 *single* frontend, and its purpose is to allow ``raw_input`` and similar
868 872 operations that read from ``sys.stdin`` on the kernel to be fulfilled by the
869 873 client. For now we will keep these messages as simple as possible, since they
870 874 basically only mean to convey the ``raw_input(prompt)`` call.
871 875
872 876 Message type: ``input_request``::
873 877
874 878 content = { 'prompt' : str }
875 879
876 880 Message type: ``input_reply``::
877 881
878 882 content = { 'value' : str }
879 883
880 884 .. Note::
881 885
882 886 We do not explicitly try to forward the raw ``sys.stdin`` object, because in
883 887 practice the kernel should behave like an interactive program. When a
884 888 program is opened on the console, the keyboard effectively takes over the
885 889 ``stdin`` file descriptor, and it can't be used for raw reading anymore.
886 890 Since the IPython kernel effectively behaves like a console program (albeit
887 891 one whose "keyboard" is actually living in a separate process and
888 892 transported over the zmq connection), raw ``stdin`` isn't expected to be
889 893 available.
890 894
891 895
892 896 Heartbeat for kernels
893 897 =====================
894 898
895 899 Initially we had considered using messages like those above over ZMQ for a
896 900 kernel 'heartbeat' (a way to detect quickly and reliably whether a kernel is
897 901 alive at all, even if it may be busy executing user code). But this has the
898 902 problem that if the kernel is locked inside extension code, it wouldn't execute
899 903 the python heartbeat code. But it turns out that we can implement a basic
900 904 heartbeat with pure ZMQ, without using any Python messaging at all.
901 905
902 906 The monitor sends out a single zmq message (right now, it is a str of the
903 907 monitor's lifetime in seconds), and gets the same message right back, prefixed
904 908 with the zmq identity of the XREQ socket in the heartbeat process. This can be
905 909 a uuid, or even a full message, but there doesn't seem to be a need for packing
906 910 up a message when the sender and receiver are the exact same Python object.
907 911
908 912 The model is this::
909 913
910 914 monitor.send(str(self.lifetime)) # '1.2345678910'
911 915
912 916 and the monitor receives some number of messages of the form::
913 917
914 918 ['uuid-abcd-dead-beef', '1.2345678910']
915 919
916 920 where the first part is the zmq.IDENTITY of the heart's XREQ on the engine, and
917 921 the rest is the message sent by the monitor. No Python code ever has any
918 922 access to the message between the monitor's send, and the monitor's recv.
919 923
920 924
921 925 ToDo
922 926 ====
923 927
924 928 Missing things include:
925 929
926 930 * Important: finish thinking through the payload concept and API.
927 931
928 932 * Important: ensure that we have a good solution for magics like %edit. It's
929 933 likely that with the payload concept we can build a full solution, but not
930 934 100% clear yet.
931 935
932 936 * Finishing the details of the heartbeat protocol.
933 937
934 938 * Signal handling: specify what kind of information kernel should broadcast (or
935 939 not) when it receives signals.
936 940
937 941 .. include:: ../links.rst
General Comments 0
You need to be logged in to leave comments. Login now