##// END OF EJS Templates
Merge pull request #4722 from minrk/purge-outstanding...
Min RK -
r13920:c24b633d merge
parent child Browse files
Show More
@@ -1,1862 +1,1866 b''
1 1 """A semi-synchronous Client for the ZMQ cluster
2 2
3 3 Authors:
4 4
5 5 * MinRK
6 6 """
7 7 from __future__ import print_function
8 8 #-----------------------------------------------------------------------------
9 9 # Copyright (C) 2010-2011 The IPython Development Team
10 10 #
11 11 # Distributed under the terms of the BSD License. The full license is in
12 12 # the file COPYING, distributed as part of this software.
13 13 #-----------------------------------------------------------------------------
14 14
15 15 #-----------------------------------------------------------------------------
16 16 # Imports
17 17 #-----------------------------------------------------------------------------
18 18
19 19 import os
20 20 import json
21 21 import sys
22 22 from threading import Thread, Event
23 23 import time
24 24 import warnings
25 25 from datetime import datetime
26 26 from getpass import getpass
27 27 from pprint import pprint
28 28
29 29 pjoin = os.path.join
30 30
31 31 import zmq
32 32 # from zmq.eventloop import ioloop, zmqstream
33 33
34 34 from IPython.config.configurable import MultipleInstanceError
35 35 from IPython.core.application import BaseIPythonApplication
36 36 from IPython.core.profiledir import ProfileDir, ProfileDirError
37 37
38 38 from IPython.utils.capture import RichOutput
39 39 from IPython.utils.coloransi import TermColors
40 40 from IPython.utils.jsonutil import rekey, extract_dates, parse_date
41 41 from IPython.utils.localinterfaces import localhost, is_local_ip
42 42 from IPython.utils.path import get_ipython_dir
43 43 from IPython.utils.py3compat import cast_bytes, string_types, xrange, iteritems
44 44 from IPython.utils.traitlets import (HasTraits, Integer, Instance, Unicode,
45 45 Dict, List, Bool, Set, Any)
46 46 from IPython.external.decorator import decorator
47 47 from IPython.external.ssh import tunnel
48 48
49 49 from IPython.parallel import Reference
50 50 from IPython.parallel import error
51 51 from IPython.parallel import util
52 52
53 53 from IPython.kernel.zmq.session import Session, Message
54 54 from IPython.kernel.zmq import serialize
55 55
56 56 from .asyncresult import AsyncResult, AsyncHubResult
57 57 from .view import DirectView, LoadBalancedView
58 58
59 59 #--------------------------------------------------------------------------
60 60 # Decorators for Client methods
61 61 #--------------------------------------------------------------------------
62 62
63 63 @decorator
64 64 def spin_first(f, self, *args, **kwargs):
65 65 """Call spin() to sync state prior to calling the method."""
66 66 self.spin()
67 67 return f(self, *args, **kwargs)
68 68
69 69
70 70 #--------------------------------------------------------------------------
71 71 # Classes
72 72 #--------------------------------------------------------------------------
73 73
74 74
75 75 class ExecuteReply(RichOutput):
76 76 """wrapper for finished Execute results"""
77 77 def __init__(self, msg_id, content, metadata):
78 78 self.msg_id = msg_id
79 79 self._content = content
80 80 self.execution_count = content['execution_count']
81 81 self.metadata = metadata
82 82
83 83 # RichOutput overrides
84 84
85 85 @property
86 86 def source(self):
87 87 pyout = self.metadata['pyout']
88 88 if pyout:
89 89 return pyout.get('source', '')
90 90
91 91 @property
92 92 def data(self):
93 93 pyout = self.metadata['pyout']
94 94 if pyout:
95 95 return pyout.get('data', {})
96 96
97 97 @property
98 98 def _metadata(self):
99 99 pyout = self.metadata['pyout']
100 100 if pyout:
101 101 return pyout.get('metadata', {})
102 102
103 103 def display(self):
104 104 from IPython.display import publish_display_data
105 105 publish_display_data(self.source, self.data, self.metadata)
106 106
107 107 def _repr_mime_(self, mime):
108 108 if mime not in self.data:
109 109 return
110 110 data = self.data[mime]
111 111 if mime in self._metadata:
112 112 return data, self._metadata[mime]
113 113 else:
114 114 return data
115 115
116 116 def __getitem__(self, key):
117 117 return self.metadata[key]
118 118
119 119 def __getattr__(self, key):
120 120 if key not in self.metadata:
121 121 raise AttributeError(key)
122 122 return self.metadata[key]
123 123
124 124 def __repr__(self):
125 125 pyout = self.metadata['pyout'] or {'data':{}}
126 126 text_out = pyout['data'].get('text/plain', '')
127 127 if len(text_out) > 32:
128 128 text_out = text_out[:29] + '...'
129 129
130 130 return "<ExecuteReply[%i]: %s>" % (self.execution_count, text_out)
131 131
132 132 def _repr_pretty_(self, p, cycle):
133 133 pyout = self.metadata['pyout'] or {'data':{}}
134 134 text_out = pyout['data'].get('text/plain', '')
135 135
136 136 if not text_out:
137 137 return
138 138
139 139 try:
140 140 ip = get_ipython()
141 141 except NameError:
142 142 colors = "NoColor"
143 143 else:
144 144 colors = ip.colors
145 145
146 146 if colors == "NoColor":
147 147 out = normal = ""
148 148 else:
149 149 out = TermColors.Red
150 150 normal = TermColors.Normal
151 151
152 152 if '\n' in text_out and not text_out.startswith('\n'):
153 153 # add newline for multiline reprs
154 154 text_out = '\n' + text_out
155 155
156 156 p.text(
157 157 out + u'Out[%i:%i]: ' % (
158 158 self.metadata['engine_id'], self.execution_count
159 159 ) + normal + text_out
160 160 )
161 161
162 162
163 163 class Metadata(dict):
164 164 """Subclass of dict for initializing metadata values.
165 165
166 166 Attribute access works on keys.
167 167
168 168 These objects have a strict set of keys - errors will raise if you try
169 169 to add new keys.
170 170 """
171 171 def __init__(self, *args, **kwargs):
172 172 dict.__init__(self)
173 173 md = {'msg_id' : None,
174 174 'submitted' : None,
175 175 'started' : None,
176 176 'completed' : None,
177 177 'received' : None,
178 178 'engine_uuid' : None,
179 179 'engine_id' : None,
180 180 'follow' : None,
181 181 'after' : None,
182 182 'status' : None,
183 183
184 184 'pyin' : None,
185 185 'pyout' : None,
186 186 'pyerr' : None,
187 187 'stdout' : '',
188 188 'stderr' : '',
189 189 'outputs' : [],
190 190 'data': {},
191 191 'outputs_ready' : False,
192 192 }
193 193 self.update(md)
194 194 self.update(dict(*args, **kwargs))
195 195
196 196 def __getattr__(self, key):
197 197 """getattr aliased to getitem"""
198 198 if key in self:
199 199 return self[key]
200 200 else:
201 201 raise AttributeError(key)
202 202
203 203 def __setattr__(self, key, value):
204 204 """setattr aliased to setitem, with strict"""
205 205 if key in self:
206 206 self[key] = value
207 207 else:
208 208 raise AttributeError(key)
209 209
210 210 def __setitem__(self, key, value):
211 211 """strict static key enforcement"""
212 212 if key in self:
213 213 dict.__setitem__(self, key, value)
214 214 else:
215 215 raise KeyError(key)
216 216
217 217
218 218 class Client(HasTraits):
219 219 """A semi-synchronous client to the IPython ZMQ cluster
220 220
221 221 Parameters
222 222 ----------
223 223
224 224 url_file : str/unicode; path to ipcontroller-client.json
225 225 This JSON file should contain all the information needed to connect to a cluster,
226 226 and is likely the only argument needed.
227 227 Connection information for the Hub's registration. If a json connector
228 228 file is given, then likely no further configuration is necessary.
229 229 [Default: use profile]
230 230 profile : bytes
231 231 The name of the Cluster profile to be used to find connector information.
232 232 If run from an IPython application, the default profile will be the same
233 233 as the running application, otherwise it will be 'default'.
234 234 cluster_id : str
235 235 String id to added to runtime files, to prevent name collisions when using
236 236 multiple clusters with a single profile simultaneously.
237 237 When set, will look for files named like: 'ipcontroller-<cluster_id>-client.json'
238 238 Since this is text inserted into filenames, typical recommendations apply:
239 239 Simple character strings are ideal, and spaces are not recommended (but
240 240 should generally work)
241 241 context : zmq.Context
242 242 Pass an existing zmq.Context instance, otherwise the client will create its own.
243 243 debug : bool
244 244 flag for lots of message printing for debug purposes
245 245 timeout : int/float
246 246 time (in seconds) to wait for connection replies from the Hub
247 247 [Default: 10]
248 248
249 249 #-------------- session related args ----------------
250 250
251 251 config : Config object
252 252 If specified, this will be relayed to the Session for configuration
253 253 username : str
254 254 set username for the session object
255 255
256 256 #-------------- ssh related args ----------------
257 257 # These are args for configuring the ssh tunnel to be used
258 258 # credentials are used to forward connections over ssh to the Controller
259 259 # Note that the ip given in `addr` needs to be relative to sshserver
260 260 # The most basic case is to leave addr as pointing to localhost (127.0.0.1),
261 261 # and set sshserver as the same machine the Controller is on. However,
262 262 # the only requirement is that sshserver is able to see the Controller
263 263 # (i.e. is within the same trusted network).
264 264
265 265 sshserver : str
266 266 A string of the form passed to ssh, i.e. 'server.tld' or 'user@server.tld:port'
267 267 If keyfile or password is specified, and this is not, it will default to
268 268 the ip given in addr.
269 269 sshkey : str; path to ssh private key file
270 270 This specifies a key to be used in ssh login, default None.
271 271 Regular default ssh keys will be used without specifying this argument.
272 272 password : str
273 273 Your ssh password to sshserver. Note that if this is left None,
274 274 you will be prompted for it if passwordless key based login is unavailable.
275 275 paramiko : bool
276 276 flag for whether to use paramiko instead of shell ssh for tunneling.
277 277 [default: True on win32, False else]
278 278
279 279
280 280 Attributes
281 281 ----------
282 282
283 283 ids : list of int engine IDs
284 284 requesting the ids attribute always synchronizes
285 285 the registration state. To request ids without synchronization,
286 286 use semi-private _ids attributes.
287 287
288 288 history : list of msg_ids
289 289 a list of msg_ids, keeping track of all the execution
290 290 messages you have submitted in order.
291 291
292 292 outstanding : set of msg_ids
293 293 a set of msg_ids that have been submitted, but whose
294 294 results have not yet been received.
295 295
296 296 results : dict
297 297 a dict of all our results, keyed by msg_id
298 298
299 299 block : bool
300 300 determines default behavior when block not specified
301 301 in execution methods
302 302
303 303 Methods
304 304 -------
305 305
306 306 spin
307 307 flushes incoming results and registration state changes
308 308 control methods spin, and requesting `ids` also ensures up to date
309 309
310 310 wait
311 311 wait on one or more msg_ids
312 312
313 313 execution methods
314 314 apply
315 315 legacy: execute, run
316 316
317 317 data movement
318 318 push, pull, scatter, gather
319 319
320 320 query methods
321 321 queue_status, get_result, purge, result_status
322 322
323 323 control methods
324 324 abort, shutdown
325 325
326 326 """
327 327
328 328
329 329 block = Bool(False)
330 330 outstanding = Set()
331 331 results = Instance('collections.defaultdict', (dict,))
332 332 metadata = Instance('collections.defaultdict', (Metadata,))
333 333 history = List()
334 334 debug = Bool(False)
335 335 _spin_thread = Any()
336 336 _stop_spinning = Any()
337 337
338 338 profile=Unicode()
339 339 def _profile_default(self):
340 340 if BaseIPythonApplication.initialized():
341 341 # an IPython app *might* be running, try to get its profile
342 342 try:
343 343 return BaseIPythonApplication.instance().profile
344 344 except (AttributeError, MultipleInstanceError):
345 345 # could be a *different* subclass of config.Application,
346 346 # which would raise one of these two errors.
347 347 return u'default'
348 348 else:
349 349 return u'default'
350 350
351 351
352 352 _outstanding_dict = Instance('collections.defaultdict', (set,))
353 353 _ids = List()
354 354 _connected=Bool(False)
355 355 _ssh=Bool(False)
356 356 _context = Instance('zmq.Context')
357 357 _config = Dict()
358 358 _engines=Instance(util.ReverseDict, (), {})
359 359 # _hub_socket=Instance('zmq.Socket')
360 360 _query_socket=Instance('zmq.Socket')
361 361 _control_socket=Instance('zmq.Socket')
362 362 _iopub_socket=Instance('zmq.Socket')
363 363 _notification_socket=Instance('zmq.Socket')
364 364 _mux_socket=Instance('zmq.Socket')
365 365 _task_socket=Instance('zmq.Socket')
366 366 _task_scheme=Unicode()
367 367 _closed = False
368 368 _ignored_control_replies=Integer(0)
369 369 _ignored_hub_replies=Integer(0)
370 370
371 371 def __new__(self, *args, **kw):
372 372 # don't raise on positional args
373 373 return HasTraits.__new__(self, **kw)
374 374
375 375 def __init__(self, url_file=None, profile=None, profile_dir=None, ipython_dir=None,
376 376 context=None, debug=False,
377 377 sshserver=None, sshkey=None, password=None, paramiko=None,
378 378 timeout=10, cluster_id=None, **extra_args
379 379 ):
380 380 if profile:
381 381 super(Client, self).__init__(debug=debug, profile=profile)
382 382 else:
383 383 super(Client, self).__init__(debug=debug)
384 384 if context is None:
385 385 context = zmq.Context.instance()
386 386 self._context = context
387 387 self._stop_spinning = Event()
388 388
389 389 if 'url_or_file' in extra_args:
390 390 url_file = extra_args['url_or_file']
391 391 warnings.warn("url_or_file arg no longer supported, use url_file", DeprecationWarning)
392 392
393 393 if url_file and util.is_url(url_file):
394 394 raise ValueError("single urls cannot be specified, url-files must be used.")
395 395
396 396 self._setup_profile_dir(self.profile, profile_dir, ipython_dir)
397 397
398 398 if self._cd is not None:
399 399 if url_file is None:
400 400 if not cluster_id:
401 401 client_json = 'ipcontroller-client.json'
402 402 else:
403 403 client_json = 'ipcontroller-%s-client.json' % cluster_id
404 404 url_file = pjoin(self._cd.security_dir, client_json)
405 405 if url_file is None:
406 406 raise ValueError(
407 407 "I can't find enough information to connect to a hub!"
408 408 " Please specify at least one of url_file or profile."
409 409 )
410 410
411 411 with open(url_file) as f:
412 412 cfg = json.load(f)
413 413
414 414 self._task_scheme = cfg['task_scheme']
415 415
416 416 # sync defaults from args, json:
417 417 if sshserver:
418 418 cfg['ssh'] = sshserver
419 419
420 420 location = cfg.setdefault('location', None)
421 421
422 422 proto,addr = cfg['interface'].split('://')
423 423 addr = util.disambiguate_ip_address(addr, location)
424 424 cfg['interface'] = "%s://%s" % (proto, addr)
425 425
426 426 # turn interface,port into full urls:
427 427 for key in ('control', 'task', 'mux', 'iopub', 'notification', 'registration'):
428 428 cfg[key] = cfg['interface'] + ':%i' % cfg[key]
429 429
430 430 url = cfg['registration']
431 431
432 432 if location is not None and addr == localhost():
433 433 # location specified, and connection is expected to be local
434 434 if not is_local_ip(location) and not sshserver:
435 435 # load ssh from JSON *only* if the controller is not on
436 436 # this machine
437 437 sshserver=cfg['ssh']
438 438 if not is_local_ip(location) and not sshserver:
439 439 # warn if no ssh specified, but SSH is probably needed
440 440 # This is only a warning, because the most likely cause
441 441 # is a local Controller on a laptop whose IP is dynamic
442 442 warnings.warn("""
443 443 Controller appears to be listening on localhost, but not on this machine.
444 444 If this is true, you should specify Client(...,sshserver='you@%s')
445 445 or instruct your controller to listen on an external IP."""%location,
446 446 RuntimeWarning)
447 447 elif not sshserver:
448 448 # otherwise sync with cfg
449 449 sshserver = cfg['ssh']
450 450
451 451 self._config = cfg
452 452
453 453 self._ssh = bool(sshserver or sshkey or password)
454 454 if self._ssh and sshserver is None:
455 455 # default to ssh via localhost
456 456 sshserver = addr
457 457 if self._ssh and password is None:
458 458 if tunnel.try_passwordless_ssh(sshserver, sshkey, paramiko):
459 459 password=False
460 460 else:
461 461 password = getpass("SSH Password for %s: "%sshserver)
462 462 ssh_kwargs = dict(keyfile=sshkey, password=password, paramiko=paramiko)
463 463
464 464 # configure and construct the session
465 465 try:
466 466 extra_args['packer'] = cfg['pack']
467 467 extra_args['unpacker'] = cfg['unpack']
468 468 extra_args['key'] = cast_bytes(cfg['key'])
469 469 extra_args['signature_scheme'] = cfg['signature_scheme']
470 470 except KeyError as exc:
471 471 msg = '\n'.join([
472 472 "Connection file is invalid (missing '{}'), possibly from an old version of IPython.",
473 473 "If you are reusing connection files, remove them and start ipcontroller again."
474 474 ])
475 475 raise ValueError(msg.format(exc.message))
476 476
477 477 self.session = Session(**extra_args)
478 478
479 479 self._query_socket = self._context.socket(zmq.DEALER)
480 480
481 481 if self._ssh:
482 482 tunnel.tunnel_connection(self._query_socket, cfg['registration'], sshserver, **ssh_kwargs)
483 483 else:
484 484 self._query_socket.connect(cfg['registration'])
485 485
486 486 self.session.debug = self.debug
487 487
488 488 self._notification_handlers = {'registration_notification' : self._register_engine,
489 489 'unregistration_notification' : self._unregister_engine,
490 490 'shutdown_notification' : lambda msg: self.close(),
491 491 }
492 492 self._queue_handlers = {'execute_reply' : self._handle_execute_reply,
493 493 'apply_reply' : self._handle_apply_reply}
494 494
495 495 try:
496 496 self._connect(sshserver, ssh_kwargs, timeout)
497 497 except:
498 498 self.close(linger=0)
499 499 raise
500 500
501 501 # last step: setup magics, if we are in IPython:
502 502
503 503 try:
504 504 ip = get_ipython()
505 505 except NameError:
506 506 return
507 507 else:
508 508 if 'px' not in ip.magics_manager.magics:
509 509 # in IPython but we are the first Client.
510 510 # activate a default view for parallel magics.
511 511 self.activate()
512 512
513 513 def __del__(self):
514 514 """cleanup sockets, but _not_ context."""
515 515 self.close()
516 516
517 517 def _setup_profile_dir(self, profile, profile_dir, ipython_dir):
518 518 if ipython_dir is None:
519 519 ipython_dir = get_ipython_dir()
520 520 if profile_dir is not None:
521 521 try:
522 522 self._cd = ProfileDir.find_profile_dir(profile_dir)
523 523 return
524 524 except ProfileDirError:
525 525 pass
526 526 elif profile is not None:
527 527 try:
528 528 self._cd = ProfileDir.find_profile_dir_by_name(
529 529 ipython_dir, profile)
530 530 return
531 531 except ProfileDirError:
532 532 pass
533 533 self._cd = None
534 534
535 535 def _update_engines(self, engines):
536 536 """Update our engines dict and _ids from a dict of the form: {id:uuid}."""
537 537 for k,v in iteritems(engines):
538 538 eid = int(k)
539 539 if eid not in self._engines:
540 540 self._ids.append(eid)
541 541 self._engines[eid] = v
542 542 self._ids = sorted(self._ids)
543 543 if sorted(self._engines.keys()) != list(range(len(self._engines))) and \
544 544 self._task_scheme == 'pure' and self._task_socket:
545 545 self._stop_scheduling_tasks()
546 546
547 547 def _stop_scheduling_tasks(self):
548 548 """Stop scheduling tasks because an engine has been unregistered
549 549 from a pure ZMQ scheduler.
550 550 """
551 551 self._task_socket.close()
552 552 self._task_socket = None
553 553 msg = "An engine has been unregistered, and we are using pure " +\
554 554 "ZMQ task scheduling. Task farming will be disabled."
555 555 if self.outstanding:
556 556 msg += " If you were running tasks when this happened, " +\
557 557 "some `outstanding` msg_ids may never resolve."
558 558 warnings.warn(msg, RuntimeWarning)
559 559
560 560 def _build_targets(self, targets):
561 561 """Turn valid target IDs or 'all' into two lists:
562 562 (int_ids, uuids).
563 563 """
564 564 if not self._ids:
565 565 # flush notification socket if no engines yet, just in case
566 566 if not self.ids:
567 567 raise error.NoEnginesRegistered("Can't build targets without any engines")
568 568
569 569 if targets is None:
570 570 targets = self._ids
571 571 elif isinstance(targets, string_types):
572 572 if targets.lower() == 'all':
573 573 targets = self._ids
574 574 else:
575 575 raise TypeError("%r not valid str target, must be 'all'"%(targets))
576 576 elif isinstance(targets, int):
577 577 if targets < 0:
578 578 targets = self.ids[targets]
579 579 if targets not in self._ids:
580 580 raise IndexError("No such engine: %i"%targets)
581 581 targets = [targets]
582 582
583 583 if isinstance(targets, slice):
584 584 indices = list(range(len(self._ids))[targets])
585 585 ids = self.ids
586 586 targets = [ ids[i] for i in indices ]
587 587
588 588 if not isinstance(targets, (tuple, list, xrange)):
589 589 raise TypeError("targets by int/slice/collection of ints only, not %s"%(type(targets)))
590 590
591 591 return [cast_bytes(self._engines[t]) for t in targets], list(targets)
592 592
593 593 def _connect(self, sshserver, ssh_kwargs, timeout):
594 594 """setup all our socket connections to the cluster. This is called from
595 595 __init__."""
596 596
597 597 # Maybe allow reconnecting?
598 598 if self._connected:
599 599 return
600 600 self._connected=True
601 601
602 602 def connect_socket(s, url):
603 603 if self._ssh:
604 604 return tunnel.tunnel_connection(s, url, sshserver, **ssh_kwargs)
605 605 else:
606 606 return s.connect(url)
607 607
608 608 self.session.send(self._query_socket, 'connection_request')
609 609 # use Poller because zmq.select has wrong units in pyzmq 2.1.7
610 610 poller = zmq.Poller()
611 611 poller.register(self._query_socket, zmq.POLLIN)
612 612 # poll expects milliseconds, timeout is seconds
613 613 evts = poller.poll(timeout*1000)
614 614 if not evts:
615 615 raise error.TimeoutError("Hub connection request timed out")
616 616 idents,msg = self.session.recv(self._query_socket,mode=0)
617 617 if self.debug:
618 618 pprint(msg)
619 619 content = msg['content']
620 620 # self._config['registration'] = dict(content)
621 621 cfg = self._config
622 622 if content['status'] == 'ok':
623 623 self._mux_socket = self._context.socket(zmq.DEALER)
624 624 connect_socket(self._mux_socket, cfg['mux'])
625 625
626 626 self._task_socket = self._context.socket(zmq.DEALER)
627 627 connect_socket(self._task_socket, cfg['task'])
628 628
629 629 self._notification_socket = self._context.socket(zmq.SUB)
630 630 self._notification_socket.setsockopt(zmq.SUBSCRIBE, b'')
631 631 connect_socket(self._notification_socket, cfg['notification'])
632 632
633 633 self._control_socket = self._context.socket(zmq.DEALER)
634 634 connect_socket(self._control_socket, cfg['control'])
635 635
636 636 self._iopub_socket = self._context.socket(zmq.SUB)
637 637 self._iopub_socket.setsockopt(zmq.SUBSCRIBE, b'')
638 638 connect_socket(self._iopub_socket, cfg['iopub'])
639 639
640 640 self._update_engines(dict(content['engines']))
641 641 else:
642 642 self._connected = False
643 643 raise Exception("Failed to connect!")
644 644
645 645 #--------------------------------------------------------------------------
646 646 # handlers and callbacks for incoming messages
647 647 #--------------------------------------------------------------------------
648 648
649 649 def _unwrap_exception(self, content):
650 650 """unwrap exception, and remap engine_id to int."""
651 651 e = error.unwrap_exception(content)
652 652 # print e.traceback
653 653 if e.engine_info:
654 654 e_uuid = e.engine_info['engine_uuid']
655 655 eid = self._engines[e_uuid]
656 656 e.engine_info['engine_id'] = eid
657 657 return e
658 658
659 659 def _extract_metadata(self, msg):
660 660 header = msg['header']
661 661 parent = msg['parent_header']
662 662 msg_meta = msg['metadata']
663 663 content = msg['content']
664 664 md = {'msg_id' : parent['msg_id'],
665 665 'received' : datetime.now(),
666 666 'engine_uuid' : msg_meta.get('engine', None),
667 667 'follow' : msg_meta.get('follow', []),
668 668 'after' : msg_meta.get('after', []),
669 669 'status' : content['status'],
670 670 }
671 671
672 672 if md['engine_uuid'] is not None:
673 673 md['engine_id'] = self._engines.get(md['engine_uuid'], None)
674 674
675 675 if 'date' in parent:
676 676 md['submitted'] = parent['date']
677 677 if 'started' in msg_meta:
678 678 md['started'] = parse_date(msg_meta['started'])
679 679 if 'date' in header:
680 680 md['completed'] = header['date']
681 681 return md
682 682
683 683 def _register_engine(self, msg):
684 684 """Register a new engine, and update our connection info."""
685 685 content = msg['content']
686 686 eid = content['id']
687 687 d = {eid : content['uuid']}
688 688 self._update_engines(d)
689 689
690 690 def _unregister_engine(self, msg):
691 691 """Unregister an engine that has died."""
692 692 content = msg['content']
693 693 eid = int(content['id'])
694 694 if eid in self._ids:
695 695 self._ids.remove(eid)
696 696 uuid = self._engines.pop(eid)
697 697
698 698 self._handle_stranded_msgs(eid, uuid)
699 699
700 700 if self._task_socket and self._task_scheme == 'pure':
701 701 self._stop_scheduling_tasks()
702 702
703 703 def _handle_stranded_msgs(self, eid, uuid):
704 704 """Handle messages known to be on an engine when the engine unregisters.
705 705
706 706 It is possible that this will fire prematurely - that is, an engine will
707 707 go down after completing a result, and the client will be notified
708 708 of the unregistration and later receive the successful result.
709 709 """
710 710
711 711 outstanding = self._outstanding_dict[uuid]
712 712
713 713 for msg_id in list(outstanding):
714 714 if msg_id in self.results:
715 715 # we already
716 716 continue
717 717 try:
718 718 raise error.EngineError("Engine %r died while running task %r"%(eid, msg_id))
719 719 except:
720 720 content = error.wrap_exception()
721 721 # build a fake message:
722 722 msg = self.session.msg('apply_reply', content=content)
723 723 msg['parent_header']['msg_id'] = msg_id
724 724 msg['metadata']['engine'] = uuid
725 725 self._handle_apply_reply(msg)
726 726
727 727 def _handle_execute_reply(self, msg):
728 728 """Save the reply to an execute_request into our results.
729 729
730 730 execute messages are never actually used. apply is used instead.
731 731 """
732 732
733 733 parent = msg['parent_header']
734 734 msg_id = parent['msg_id']
735 735 if msg_id not in self.outstanding:
736 736 if msg_id in self.history:
737 737 print("got stale result: %s"%msg_id)
738 738 else:
739 739 print("got unknown result: %s"%msg_id)
740 740 else:
741 741 self.outstanding.remove(msg_id)
742 742
743 743 content = msg['content']
744 744 header = msg['header']
745 745
746 746 # construct metadata:
747 747 md = self.metadata[msg_id]
748 748 md.update(self._extract_metadata(msg))
749 749 # is this redundant?
750 750 self.metadata[msg_id] = md
751 751
752 752 e_outstanding = self._outstanding_dict[md['engine_uuid']]
753 753 if msg_id in e_outstanding:
754 754 e_outstanding.remove(msg_id)
755 755
756 756 # construct result:
757 757 if content['status'] == 'ok':
758 758 self.results[msg_id] = ExecuteReply(msg_id, content, md)
759 759 elif content['status'] == 'aborted':
760 760 self.results[msg_id] = error.TaskAborted(msg_id)
761 761 elif content['status'] == 'resubmitted':
762 762 # TODO: handle resubmission
763 763 pass
764 764 else:
765 765 self.results[msg_id] = self._unwrap_exception(content)
766 766
767 767 def _handle_apply_reply(self, msg):
768 768 """Save the reply to an apply_request into our results."""
769 769 parent = msg['parent_header']
770 770 msg_id = parent['msg_id']
771 771 if msg_id not in self.outstanding:
772 772 if msg_id in self.history:
773 773 print("got stale result: %s"%msg_id)
774 774 print(self.results[msg_id])
775 775 print(msg)
776 776 else:
777 777 print("got unknown result: %s"%msg_id)
778 778 else:
779 779 self.outstanding.remove(msg_id)
780 780 content = msg['content']
781 781 header = msg['header']
782 782
783 783 # construct metadata:
784 784 md = self.metadata[msg_id]
785 785 md.update(self._extract_metadata(msg))
786 786 # is this redundant?
787 787 self.metadata[msg_id] = md
788 788
789 789 e_outstanding = self._outstanding_dict[md['engine_uuid']]
790 790 if msg_id in e_outstanding:
791 791 e_outstanding.remove(msg_id)
792 792
793 793 # construct result:
794 794 if content['status'] == 'ok':
795 795 self.results[msg_id] = serialize.unserialize_object(msg['buffers'])[0]
796 796 elif content['status'] == 'aborted':
797 797 self.results[msg_id] = error.TaskAborted(msg_id)
798 798 elif content['status'] == 'resubmitted':
799 799 # TODO: handle resubmission
800 800 pass
801 801 else:
802 802 self.results[msg_id] = self._unwrap_exception(content)
803 803
804 804 def _flush_notifications(self):
805 805 """Flush notifications of engine registrations waiting
806 806 in ZMQ queue."""
807 807 idents,msg = self.session.recv(self._notification_socket, mode=zmq.NOBLOCK)
808 808 while msg is not None:
809 809 if self.debug:
810 810 pprint(msg)
811 811 msg_type = msg['header']['msg_type']
812 812 handler = self._notification_handlers.get(msg_type, None)
813 813 if handler is None:
814 814 raise Exception("Unhandled message type: %s" % msg_type)
815 815 else:
816 816 handler(msg)
817 817 idents,msg = self.session.recv(self._notification_socket, mode=zmq.NOBLOCK)
818 818
819 819 def _flush_results(self, sock):
820 820 """Flush task or queue results waiting in ZMQ queue."""
821 821 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
822 822 while msg is not None:
823 823 if self.debug:
824 824 pprint(msg)
825 825 msg_type = msg['header']['msg_type']
826 826 handler = self._queue_handlers.get(msg_type, None)
827 827 if handler is None:
828 828 raise Exception("Unhandled message type: %s" % msg_type)
829 829 else:
830 830 handler(msg)
831 831 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
832 832
833 833 def _flush_control(self, sock):
834 834 """Flush replies from the control channel waiting
835 835 in the ZMQ queue.
836 836
837 837 Currently: ignore them."""
838 838 if self._ignored_control_replies <= 0:
839 839 return
840 840 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
841 841 while msg is not None:
842 842 self._ignored_control_replies -= 1
843 843 if self.debug:
844 844 pprint(msg)
845 845 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
846 846
847 847 def _flush_ignored_control(self):
848 848 """flush ignored control replies"""
849 849 while self._ignored_control_replies > 0:
850 850 self.session.recv(self._control_socket)
851 851 self._ignored_control_replies -= 1
852 852
853 853 def _flush_ignored_hub_replies(self):
854 854 ident,msg = self.session.recv(self._query_socket, mode=zmq.NOBLOCK)
855 855 while msg is not None:
856 856 ident,msg = self.session.recv(self._query_socket, mode=zmq.NOBLOCK)
857 857
858 858 def _flush_iopub(self, sock):
859 859 """Flush replies from the iopub channel waiting
860 860 in the ZMQ queue.
861 861 """
862 862 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
863 863 while msg is not None:
864 864 if self.debug:
865 865 pprint(msg)
866 866 parent = msg['parent_header']
867 867 # ignore IOPub messages with no parent.
868 868 # Caused by print statements or warnings from before the first execution.
869 869 if not parent:
870 870 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
871 871 continue
872 872 msg_id = parent['msg_id']
873 873 content = msg['content']
874 874 header = msg['header']
875 875 msg_type = msg['header']['msg_type']
876 876
877 877 # init metadata:
878 878 md = self.metadata[msg_id]
879 879
880 880 if msg_type == 'stream':
881 881 name = content['name']
882 882 s = md[name] or ''
883 883 md[name] = s + content['data']
884 884 elif msg_type == 'pyerr':
885 885 md.update({'pyerr' : self._unwrap_exception(content)})
886 886 elif msg_type == 'pyin':
887 887 md.update({'pyin' : content['code']})
888 888 elif msg_type == 'display_data':
889 889 md['outputs'].append(content)
890 890 elif msg_type == 'pyout':
891 891 md['pyout'] = content
892 892 elif msg_type == 'data_message':
893 893 data, remainder = serialize.unserialize_object(msg['buffers'])
894 894 md['data'].update(data)
895 895 elif msg_type == 'status':
896 896 # idle message comes after all outputs
897 897 if content['execution_state'] == 'idle':
898 898 md['outputs_ready'] = True
899 899 else:
900 900 # unhandled msg_type (status, etc.)
901 901 pass
902 902
903 903 # reduntant?
904 904 self.metadata[msg_id] = md
905 905
906 906 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
907 907
908 908 #--------------------------------------------------------------------------
909 909 # len, getitem
910 910 #--------------------------------------------------------------------------
911 911
912 912 def __len__(self):
913 913 """len(client) returns # of engines."""
914 914 return len(self.ids)
915 915
916 916 def __getitem__(self, key):
917 917 """index access returns DirectView multiplexer objects
918 918
919 919 Must be int, slice, or list/tuple/xrange of ints"""
920 920 if not isinstance(key, (int, slice, tuple, list, xrange)):
921 921 raise TypeError("key by int/slice/iterable of ints only, not %s"%(type(key)))
922 922 else:
923 923 return self.direct_view(key)
924 924
925 925 #--------------------------------------------------------------------------
926 926 # Begin public methods
927 927 #--------------------------------------------------------------------------
928 928
929 929 @property
930 930 def ids(self):
931 931 """Always up-to-date ids property."""
932 932 self._flush_notifications()
933 933 # always copy:
934 934 return list(self._ids)
935 935
936 936 def activate(self, targets='all', suffix=''):
937 937 """Create a DirectView and register it with IPython magics
938 938
939 939 Defines the magics `%px, %autopx, %pxresult, %%px`
940 940
941 941 Parameters
942 942 ----------
943 943
944 944 targets: int, list of ints, or 'all'
945 945 The engines on which the view's magics will run
946 946 suffix: str [default: '']
947 947 The suffix, if any, for the magics. This allows you to have
948 948 multiple views associated with parallel magics at the same time.
949 949
950 950 e.g. ``rc.activate(targets=0, suffix='0')`` will give you
951 951 the magics ``%px0``, ``%pxresult0``, etc. for running magics just
952 952 on engine 0.
953 953 """
954 954 view = self.direct_view(targets)
955 955 view.block = True
956 956 view.activate(suffix)
957 957 return view
958 958
959 959 def close(self, linger=None):
960 960 """Close my zmq Sockets
961 961
962 962 If `linger`, set the zmq LINGER socket option,
963 963 which allows discarding of messages.
964 964 """
965 965 if self._closed:
966 966 return
967 967 self.stop_spin_thread()
968 968 snames = [ trait for trait in self.trait_names() if trait.endswith("socket") ]
969 969 for name in snames:
970 970 socket = getattr(self, name)
971 971 if socket is not None and not socket.closed:
972 972 if linger is not None:
973 973 socket.close(linger=linger)
974 974 else:
975 975 socket.close()
976 976 self._closed = True
977 977
978 978 def _spin_every(self, interval=1):
979 979 """target func for use in spin_thread"""
980 980 while True:
981 981 if self._stop_spinning.is_set():
982 982 return
983 983 time.sleep(interval)
984 984 self.spin()
985 985
986 986 def spin_thread(self, interval=1):
987 987 """call Client.spin() in a background thread on some regular interval
988 988
989 989 This helps ensure that messages don't pile up too much in the zmq queue
990 990 while you are working on other things, or just leaving an idle terminal.
991 991
992 992 It also helps limit potential padding of the `received` timestamp
993 993 on AsyncResult objects, used for timings.
994 994
995 995 Parameters
996 996 ----------
997 997
998 998 interval : float, optional
999 999 The interval on which to spin the client in the background thread
1000 1000 (simply passed to time.sleep).
1001 1001
1002 1002 Notes
1003 1003 -----
1004 1004
1005 1005 For precision timing, you may want to use this method to put a bound
1006 1006 on the jitter (in seconds) in `received` timestamps used
1007 1007 in AsyncResult.wall_time.
1008 1008
1009 1009 """
1010 1010 if self._spin_thread is not None:
1011 1011 self.stop_spin_thread()
1012 1012 self._stop_spinning.clear()
1013 1013 self._spin_thread = Thread(target=self._spin_every, args=(interval,))
1014 1014 self._spin_thread.daemon = True
1015 1015 self._spin_thread.start()
1016 1016
1017 1017 def stop_spin_thread(self):
1018 1018 """stop background spin_thread, if any"""
1019 1019 if self._spin_thread is not None:
1020 1020 self._stop_spinning.set()
1021 1021 self._spin_thread.join()
1022 1022 self._spin_thread = None
1023 1023
1024 1024 def spin(self):
1025 1025 """Flush any registration notifications and execution results
1026 1026 waiting in the ZMQ queue.
1027 1027 """
1028 1028 if self._notification_socket:
1029 1029 self._flush_notifications()
1030 1030 if self._iopub_socket:
1031 1031 self._flush_iopub(self._iopub_socket)
1032 1032 if self._mux_socket:
1033 1033 self._flush_results(self._mux_socket)
1034 1034 if self._task_socket:
1035 1035 self._flush_results(self._task_socket)
1036 1036 if self._control_socket:
1037 1037 self._flush_control(self._control_socket)
1038 1038 if self._query_socket:
1039 1039 self._flush_ignored_hub_replies()
1040 1040
1041 1041 def wait(self, jobs=None, timeout=-1):
1042 1042 """waits on one or more `jobs`, for up to `timeout` seconds.
1043 1043
1044 1044 Parameters
1045 1045 ----------
1046 1046
1047 1047 jobs : int, str, or list of ints and/or strs, or one or more AsyncResult objects
1048 1048 ints are indices to self.history
1049 1049 strs are msg_ids
1050 1050 default: wait on all outstanding messages
1051 1051 timeout : float
1052 1052 a time in seconds, after which to give up.
1053 1053 default is -1, which means no timeout
1054 1054
1055 1055 Returns
1056 1056 -------
1057 1057
1058 1058 True : when all msg_ids are done
1059 1059 False : timeout reached, some msg_ids still outstanding
1060 1060 """
1061 1061 tic = time.time()
1062 1062 if jobs is None:
1063 1063 theids = self.outstanding
1064 1064 else:
1065 1065 if isinstance(jobs, string_types + (int, AsyncResult)):
1066 1066 jobs = [jobs]
1067 1067 theids = set()
1068 1068 for job in jobs:
1069 1069 if isinstance(job, int):
1070 1070 # index access
1071 1071 job = self.history[job]
1072 1072 elif isinstance(job, AsyncResult):
1073 1073 theids.update(job.msg_ids)
1074 1074 continue
1075 1075 theids.add(job)
1076 1076 if not theids.intersection(self.outstanding):
1077 1077 return True
1078 1078 self.spin()
1079 1079 while theids.intersection(self.outstanding):
1080 1080 if timeout >= 0 and ( time.time()-tic ) > timeout:
1081 1081 break
1082 1082 time.sleep(1e-3)
1083 1083 self.spin()
1084 1084 return len(theids.intersection(self.outstanding)) == 0
1085 1085
1086 1086 #--------------------------------------------------------------------------
1087 1087 # Control methods
1088 1088 #--------------------------------------------------------------------------
1089 1089
1090 1090 @spin_first
1091 1091 def clear(self, targets=None, block=None):
1092 1092 """Clear the namespace in target(s)."""
1093 1093 block = self.block if block is None else block
1094 1094 targets = self._build_targets(targets)[0]
1095 1095 for t in targets:
1096 1096 self.session.send(self._control_socket, 'clear_request', content={}, ident=t)
1097 1097 error = False
1098 1098 if block:
1099 1099 self._flush_ignored_control()
1100 1100 for i in range(len(targets)):
1101 1101 idents,msg = self.session.recv(self._control_socket,0)
1102 1102 if self.debug:
1103 1103 pprint(msg)
1104 1104 if msg['content']['status'] != 'ok':
1105 1105 error = self._unwrap_exception(msg['content'])
1106 1106 else:
1107 1107 self._ignored_control_replies += len(targets)
1108 1108 if error:
1109 1109 raise error
1110 1110
1111 1111
1112 1112 @spin_first
1113 1113 def abort(self, jobs=None, targets=None, block=None):
1114 1114 """Abort specific jobs from the execution queues of target(s).
1115 1115
1116 1116 This is a mechanism to prevent jobs that have already been submitted
1117 1117 from executing.
1118 1118
1119 1119 Parameters
1120 1120 ----------
1121 1121
1122 1122 jobs : msg_id, list of msg_ids, or AsyncResult
1123 1123 The jobs to be aborted
1124 1124
1125 1125 If unspecified/None: abort all outstanding jobs.
1126 1126
1127 1127 """
1128 1128 block = self.block if block is None else block
1129 1129 jobs = jobs if jobs is not None else list(self.outstanding)
1130 1130 targets = self._build_targets(targets)[0]
1131 1131
1132 1132 msg_ids = []
1133 1133 if isinstance(jobs, string_types + (AsyncResult,)):
1134 1134 jobs = [jobs]
1135 1135 bad_ids = [obj for obj in jobs if not isinstance(obj, string_types + (AsyncResult,))]
1136 1136 if bad_ids:
1137 1137 raise TypeError("Invalid msg_id type %r, expected str or AsyncResult"%bad_ids[0])
1138 1138 for j in jobs:
1139 1139 if isinstance(j, AsyncResult):
1140 1140 msg_ids.extend(j.msg_ids)
1141 1141 else:
1142 1142 msg_ids.append(j)
1143 1143 content = dict(msg_ids=msg_ids)
1144 1144 for t in targets:
1145 1145 self.session.send(self._control_socket, 'abort_request',
1146 1146 content=content, ident=t)
1147 1147 error = False
1148 1148 if block:
1149 1149 self._flush_ignored_control()
1150 1150 for i in range(len(targets)):
1151 1151 idents,msg = self.session.recv(self._control_socket,0)
1152 1152 if self.debug:
1153 1153 pprint(msg)
1154 1154 if msg['content']['status'] != 'ok':
1155 1155 error = self._unwrap_exception(msg['content'])
1156 1156 else:
1157 1157 self._ignored_control_replies += len(targets)
1158 1158 if error:
1159 1159 raise error
1160 1160
1161 1161 @spin_first
1162 1162 def shutdown(self, targets='all', restart=False, hub=False, block=None):
1163 1163 """Terminates one or more engine processes, optionally including the hub.
1164 1164
1165 1165 Parameters
1166 1166 ----------
1167 1167
1168 1168 targets: list of ints or 'all' [default: all]
1169 1169 Which engines to shutdown.
1170 1170 hub: bool [default: False]
1171 1171 Whether to include the Hub. hub=True implies targets='all'.
1172 1172 block: bool [default: self.block]
1173 1173 Whether to wait for clean shutdown replies or not.
1174 1174 restart: bool [default: False]
1175 1175 NOT IMPLEMENTED
1176 1176 whether to restart engines after shutting them down.
1177 1177 """
1178 1178 from IPython.parallel.error import NoEnginesRegistered
1179 1179 if restart:
1180 1180 raise NotImplementedError("Engine restart is not yet implemented")
1181 1181
1182 1182 block = self.block if block is None else block
1183 1183 if hub:
1184 1184 targets = 'all'
1185 1185 try:
1186 1186 targets = self._build_targets(targets)[0]
1187 1187 except NoEnginesRegistered:
1188 1188 targets = []
1189 1189 for t in targets:
1190 1190 self.session.send(self._control_socket, 'shutdown_request',
1191 1191 content={'restart':restart},ident=t)
1192 1192 error = False
1193 1193 if block or hub:
1194 1194 self._flush_ignored_control()
1195 1195 for i in range(len(targets)):
1196 1196 idents,msg = self.session.recv(self._control_socket, 0)
1197 1197 if self.debug:
1198 1198 pprint(msg)
1199 1199 if msg['content']['status'] != 'ok':
1200 1200 error = self._unwrap_exception(msg['content'])
1201 1201 else:
1202 1202 self._ignored_control_replies += len(targets)
1203 1203
1204 1204 if hub:
1205 1205 time.sleep(0.25)
1206 1206 self.session.send(self._query_socket, 'shutdown_request')
1207 1207 idents,msg = self.session.recv(self._query_socket, 0)
1208 1208 if self.debug:
1209 1209 pprint(msg)
1210 1210 if msg['content']['status'] != 'ok':
1211 1211 error = self._unwrap_exception(msg['content'])
1212 1212
1213 1213 if error:
1214 1214 raise error
1215 1215
1216 1216 #--------------------------------------------------------------------------
1217 1217 # Execution related methods
1218 1218 #--------------------------------------------------------------------------
1219 1219
1220 1220 def _maybe_raise(self, result):
1221 1221 """wrapper for maybe raising an exception if apply failed."""
1222 1222 if isinstance(result, error.RemoteError):
1223 1223 raise result
1224 1224
1225 1225 return result
1226 1226
1227 1227 def send_apply_request(self, socket, f, args=None, kwargs=None, metadata=None, track=False,
1228 1228 ident=None):
1229 1229 """construct and send an apply message via a socket.
1230 1230
1231 1231 This is the principal method with which all engine execution is performed by views.
1232 1232 """
1233 1233
1234 1234 if self._closed:
1235 1235 raise RuntimeError("Client cannot be used after its sockets have been closed")
1236 1236
1237 1237 # defaults:
1238 1238 args = args if args is not None else []
1239 1239 kwargs = kwargs if kwargs is not None else {}
1240 1240 metadata = metadata if metadata is not None else {}
1241 1241
1242 1242 # validate arguments
1243 1243 if not callable(f) and not isinstance(f, Reference):
1244 1244 raise TypeError("f must be callable, not %s"%type(f))
1245 1245 if not isinstance(args, (tuple, list)):
1246 1246 raise TypeError("args must be tuple or list, not %s"%type(args))
1247 1247 if not isinstance(kwargs, dict):
1248 1248 raise TypeError("kwargs must be dict, not %s"%type(kwargs))
1249 1249 if not isinstance(metadata, dict):
1250 1250 raise TypeError("metadata must be dict, not %s"%type(metadata))
1251 1251
1252 1252 bufs = serialize.pack_apply_message(f, args, kwargs,
1253 1253 buffer_threshold=self.session.buffer_threshold,
1254 1254 item_threshold=self.session.item_threshold,
1255 1255 )
1256 1256
1257 1257 msg = self.session.send(socket, "apply_request", buffers=bufs, ident=ident,
1258 1258 metadata=metadata, track=track)
1259 1259
1260 1260 msg_id = msg['header']['msg_id']
1261 1261 self.outstanding.add(msg_id)
1262 1262 if ident:
1263 1263 # possibly routed to a specific engine
1264 1264 if isinstance(ident, list):
1265 1265 ident = ident[-1]
1266 1266 if ident in self._engines.values():
1267 1267 # save for later, in case of engine death
1268 1268 self._outstanding_dict[ident].add(msg_id)
1269 1269 self.history.append(msg_id)
1270 1270 self.metadata[msg_id]['submitted'] = datetime.now()
1271 1271
1272 1272 return msg
1273 1273
1274 1274 def send_execute_request(self, socket, code, silent=True, metadata=None, ident=None):
1275 1275 """construct and send an execute request via a socket.
1276 1276
1277 1277 """
1278 1278
1279 1279 if self._closed:
1280 1280 raise RuntimeError("Client cannot be used after its sockets have been closed")
1281 1281
1282 1282 # defaults:
1283 1283 metadata = metadata if metadata is not None else {}
1284 1284
1285 1285 # validate arguments
1286 1286 if not isinstance(code, string_types):
1287 1287 raise TypeError("code must be text, not %s" % type(code))
1288 1288 if not isinstance(metadata, dict):
1289 1289 raise TypeError("metadata must be dict, not %s" % type(metadata))
1290 1290
1291 1291 content = dict(code=code, silent=bool(silent), user_variables=[], user_expressions={})
1292 1292
1293 1293
1294 1294 msg = self.session.send(socket, "execute_request", content=content, ident=ident,
1295 1295 metadata=metadata)
1296 1296
1297 1297 msg_id = msg['header']['msg_id']
1298 1298 self.outstanding.add(msg_id)
1299 1299 if ident:
1300 1300 # possibly routed to a specific engine
1301 1301 if isinstance(ident, list):
1302 1302 ident = ident[-1]
1303 1303 if ident in self._engines.values():
1304 1304 # save for later, in case of engine death
1305 1305 self._outstanding_dict[ident].add(msg_id)
1306 1306 self.history.append(msg_id)
1307 1307 self.metadata[msg_id]['submitted'] = datetime.now()
1308 1308
1309 1309 return msg
1310 1310
1311 1311 #--------------------------------------------------------------------------
1312 1312 # construct a View object
1313 1313 #--------------------------------------------------------------------------
1314 1314
1315 1315 def load_balanced_view(self, targets=None):
1316 1316 """construct a DirectView object.
1317 1317
1318 1318 If no arguments are specified, create a LoadBalancedView
1319 1319 using all engines.
1320 1320
1321 1321 Parameters
1322 1322 ----------
1323 1323
1324 1324 targets: list,slice,int,etc. [default: use all engines]
1325 1325 The subset of engines across which to load-balance
1326 1326 """
1327 1327 if targets == 'all':
1328 1328 targets = None
1329 1329 if targets is not None:
1330 1330 targets = self._build_targets(targets)[1]
1331 1331 return LoadBalancedView(client=self, socket=self._task_socket, targets=targets)
1332 1332
1333 1333 def direct_view(self, targets='all'):
1334 1334 """construct a DirectView object.
1335 1335
1336 1336 If no targets are specified, create a DirectView using all engines.
1337 1337
1338 1338 rc.direct_view('all') is distinguished from rc[:] in that 'all' will
1339 1339 evaluate the target engines at each execution, whereas rc[:] will connect to
1340 1340 all *current* engines, and that list will not change.
1341 1341
1342 1342 That is, 'all' will always use all engines, whereas rc[:] will not use
1343 1343 engines added after the DirectView is constructed.
1344 1344
1345 1345 Parameters
1346 1346 ----------
1347 1347
1348 1348 targets: list,slice,int,etc. [default: use all engines]
1349 1349 The engines to use for the View
1350 1350 """
1351 1351 single = isinstance(targets, int)
1352 1352 # allow 'all' to be lazily evaluated at each execution
1353 1353 if targets != 'all':
1354 1354 targets = self._build_targets(targets)[1]
1355 1355 if single:
1356 1356 targets = targets[0]
1357 1357 return DirectView(client=self, socket=self._mux_socket, targets=targets)
1358 1358
1359 1359 #--------------------------------------------------------------------------
1360 1360 # Query methods
1361 1361 #--------------------------------------------------------------------------
1362 1362
1363 1363 @spin_first
1364 1364 def get_result(self, indices_or_msg_ids=None, block=None):
1365 1365 """Retrieve a result by msg_id or history index, wrapped in an AsyncResult object.
1366 1366
1367 1367 If the client already has the results, no request to the Hub will be made.
1368 1368
1369 1369 This is a convenient way to construct AsyncResult objects, which are wrappers
1370 1370 that include metadata about execution, and allow for awaiting results that
1371 1371 were not submitted by this Client.
1372 1372
1373 1373 It can also be a convenient way to retrieve the metadata associated with
1374 1374 blocking execution, since it always retrieves
1375 1375
1376 1376 Examples
1377 1377 --------
1378 1378 ::
1379 1379
1380 1380 In [10]: r = client.apply()
1381 1381
1382 1382 Parameters
1383 1383 ----------
1384 1384
1385 1385 indices_or_msg_ids : integer history index, str msg_id, or list of either
1386 1386 The indices or msg_ids of indices to be retrieved
1387 1387
1388 1388 block : bool
1389 1389 Whether to wait for the result to be done
1390 1390
1391 1391 Returns
1392 1392 -------
1393 1393
1394 1394 AsyncResult
1395 1395 A single AsyncResult object will always be returned.
1396 1396
1397 1397 AsyncHubResult
1398 1398 A subclass of AsyncResult that retrieves results from the Hub
1399 1399
1400 1400 """
1401 1401 block = self.block if block is None else block
1402 1402 if indices_or_msg_ids is None:
1403 1403 indices_or_msg_ids = -1
1404 1404
1405 1405 single_result = False
1406 1406 if not isinstance(indices_or_msg_ids, (list,tuple)):
1407 1407 indices_or_msg_ids = [indices_or_msg_ids]
1408 1408 single_result = True
1409 1409
1410 1410 theids = []
1411 1411 for id in indices_or_msg_ids:
1412 1412 if isinstance(id, int):
1413 1413 id = self.history[id]
1414 1414 if not isinstance(id, string_types):
1415 1415 raise TypeError("indices must be str or int, not %r"%id)
1416 1416 theids.append(id)
1417 1417
1418 1418 local_ids = [msg_id for msg_id in theids if (msg_id in self.outstanding or msg_id in self.results)]
1419 1419 remote_ids = [msg_id for msg_id in theids if msg_id not in local_ids]
1420 1420
1421 1421 # given single msg_id initially, get_result shot get the result itself,
1422 1422 # not a length-one list
1423 1423 if single_result:
1424 1424 theids = theids[0]
1425 1425
1426 1426 if remote_ids:
1427 1427 ar = AsyncHubResult(self, msg_ids=theids)
1428 1428 else:
1429 1429 ar = AsyncResult(self, msg_ids=theids)
1430 1430
1431 1431 if block:
1432 1432 ar.wait()
1433 1433
1434 1434 return ar
1435 1435
1436 1436 @spin_first
1437 1437 def resubmit(self, indices_or_msg_ids=None, metadata=None, block=None):
1438 1438 """Resubmit one or more tasks.
1439 1439
1440 1440 in-flight tasks may not be resubmitted.
1441 1441
1442 1442 Parameters
1443 1443 ----------
1444 1444
1445 1445 indices_or_msg_ids : integer history index, str msg_id, or list of either
1446 1446 The indices or msg_ids of indices to be retrieved
1447 1447
1448 1448 block : bool
1449 1449 Whether to wait for the result to be done
1450 1450
1451 1451 Returns
1452 1452 -------
1453 1453
1454 1454 AsyncHubResult
1455 1455 A subclass of AsyncResult that retrieves results from the Hub
1456 1456
1457 1457 """
1458 1458 block = self.block if block is None else block
1459 1459 if indices_or_msg_ids is None:
1460 1460 indices_or_msg_ids = -1
1461 1461
1462 1462 if not isinstance(indices_or_msg_ids, (list,tuple)):
1463 1463 indices_or_msg_ids = [indices_or_msg_ids]
1464 1464
1465 1465 theids = []
1466 1466 for id in indices_or_msg_ids:
1467 1467 if isinstance(id, int):
1468 1468 id = self.history[id]
1469 1469 if not isinstance(id, string_types):
1470 1470 raise TypeError("indices must be str or int, not %r"%id)
1471 1471 theids.append(id)
1472 1472
1473 1473 content = dict(msg_ids = theids)
1474 1474
1475 1475 self.session.send(self._query_socket, 'resubmit_request', content)
1476 1476
1477 1477 zmq.select([self._query_socket], [], [])
1478 1478 idents,msg = self.session.recv(self._query_socket, zmq.NOBLOCK)
1479 1479 if self.debug:
1480 1480 pprint(msg)
1481 1481 content = msg['content']
1482 1482 if content['status'] != 'ok':
1483 1483 raise self._unwrap_exception(content)
1484 1484 mapping = content['resubmitted']
1485 1485 new_ids = [ mapping[msg_id] for msg_id in theids ]
1486 1486
1487 1487 ar = AsyncHubResult(self, msg_ids=new_ids)
1488 1488
1489 1489 if block:
1490 1490 ar.wait()
1491 1491
1492 1492 return ar
1493 1493
1494 1494 @spin_first
1495 1495 def result_status(self, msg_ids, status_only=True):
1496 1496 """Check on the status of the result(s) of the apply request with `msg_ids`.
1497 1497
1498 1498 If status_only is False, then the actual results will be retrieved, else
1499 1499 only the status of the results will be checked.
1500 1500
1501 1501 Parameters
1502 1502 ----------
1503 1503
1504 1504 msg_ids : list of msg_ids
1505 1505 if int:
1506 1506 Passed as index to self.history for convenience.
1507 1507 status_only : bool (default: True)
1508 1508 if False:
1509 1509 Retrieve the actual results of completed tasks.
1510 1510
1511 1511 Returns
1512 1512 -------
1513 1513
1514 1514 results : dict
1515 1515 There will always be the keys 'pending' and 'completed', which will
1516 1516 be lists of msg_ids that are incomplete or complete. If `status_only`
1517 1517 is False, then completed results will be keyed by their `msg_id`.
1518 1518 """
1519 1519 if not isinstance(msg_ids, (list,tuple)):
1520 1520 msg_ids = [msg_ids]
1521 1521
1522 1522 theids = []
1523 1523 for msg_id in msg_ids:
1524 1524 if isinstance(msg_id, int):
1525 1525 msg_id = self.history[msg_id]
1526 1526 if not isinstance(msg_id, string_types):
1527 1527 raise TypeError("msg_ids must be str, not %r"%msg_id)
1528 1528 theids.append(msg_id)
1529 1529
1530 1530 completed = []
1531 1531 local_results = {}
1532 1532
1533 1533 # comment this block out to temporarily disable local shortcut:
1534 1534 for msg_id in theids:
1535 1535 if msg_id in self.results:
1536 1536 completed.append(msg_id)
1537 1537 local_results[msg_id] = self.results[msg_id]
1538 1538 theids.remove(msg_id)
1539 1539
1540 1540 if theids: # some not locally cached
1541 1541 content = dict(msg_ids=theids, status_only=status_only)
1542 1542 msg = self.session.send(self._query_socket, "result_request", content=content)
1543 1543 zmq.select([self._query_socket], [], [])
1544 1544 idents,msg = self.session.recv(self._query_socket, zmq.NOBLOCK)
1545 1545 if self.debug:
1546 1546 pprint(msg)
1547 1547 content = msg['content']
1548 1548 if content['status'] != 'ok':
1549 1549 raise self._unwrap_exception(content)
1550 1550 buffers = msg['buffers']
1551 1551 else:
1552 1552 content = dict(completed=[],pending=[])
1553 1553
1554 1554 content['completed'].extend(completed)
1555 1555
1556 1556 if status_only:
1557 1557 return content
1558 1558
1559 1559 failures = []
1560 1560 # load cached results into result:
1561 1561 content.update(local_results)
1562 1562
1563 1563 # update cache with results:
1564 1564 for msg_id in sorted(theids):
1565 1565 if msg_id in content['completed']:
1566 1566 rec = content[msg_id]
1567 1567 parent = extract_dates(rec['header'])
1568 1568 header = extract_dates(rec['result_header'])
1569 1569 rcontent = rec['result_content']
1570 1570 iodict = rec['io']
1571 1571 if isinstance(rcontent, str):
1572 1572 rcontent = self.session.unpack(rcontent)
1573 1573
1574 1574 md = self.metadata[msg_id]
1575 1575 md_msg = dict(
1576 1576 content=rcontent,
1577 1577 parent_header=parent,
1578 1578 header=header,
1579 1579 metadata=rec['result_metadata'],
1580 1580 )
1581 1581 md.update(self._extract_metadata(md_msg))
1582 1582 if rec.get('received'):
1583 1583 md['received'] = parse_date(rec['received'])
1584 1584 md.update(iodict)
1585 1585
1586 1586 if rcontent['status'] == 'ok':
1587 1587 if header['msg_type'] == 'apply_reply':
1588 1588 res,buffers = serialize.unserialize_object(buffers)
1589 1589 elif header['msg_type'] == 'execute_reply':
1590 1590 res = ExecuteReply(msg_id, rcontent, md)
1591 1591 else:
1592 1592 raise KeyError("unhandled msg type: %r" % header['msg_type'])
1593 1593 else:
1594 1594 res = self._unwrap_exception(rcontent)
1595 1595 failures.append(res)
1596 1596
1597 1597 self.results[msg_id] = res
1598 1598 content[msg_id] = res
1599 1599
1600 1600 if len(theids) == 1 and failures:
1601 1601 raise failures[0]
1602 1602
1603 1603 error.collect_exceptions(failures, "result_status")
1604 1604 return content
1605 1605
1606 1606 @spin_first
1607 1607 def queue_status(self, targets='all', verbose=False):
1608 1608 """Fetch the status of engine queues.
1609 1609
1610 1610 Parameters
1611 1611 ----------
1612 1612
1613 1613 targets : int/str/list of ints/strs
1614 1614 the engines whose states are to be queried.
1615 1615 default : all
1616 1616 verbose : bool
1617 1617 Whether to return lengths only, or lists of ids for each element
1618 1618 """
1619 1619 if targets == 'all':
1620 1620 # allow 'all' to be evaluated on the engine
1621 1621 engine_ids = None
1622 1622 else:
1623 1623 engine_ids = self._build_targets(targets)[1]
1624 1624 content = dict(targets=engine_ids, verbose=verbose)
1625 1625 self.session.send(self._query_socket, "queue_request", content=content)
1626 1626 idents,msg = self.session.recv(self._query_socket, 0)
1627 1627 if self.debug:
1628 1628 pprint(msg)
1629 1629 content = msg['content']
1630 1630 status = content.pop('status')
1631 1631 if status != 'ok':
1632 1632 raise self._unwrap_exception(content)
1633 1633 content = rekey(content)
1634 1634 if isinstance(targets, int):
1635 1635 return content[targets]
1636 1636 else:
1637 1637 return content
1638 1638
1639 1639 def _build_msgids_from_target(self, targets=None):
1640 1640 """Build a list of msg_ids from the list of engine targets"""
1641 1641 if not targets: # needed as _build_targets otherwise uses all engines
1642 1642 return []
1643 1643 target_ids = self._build_targets(targets)[0]
1644 1644 return [md_id for md_id in self.metadata if self.metadata[md_id]["engine_uuid"] in target_ids]
1645 1645
1646 1646 def _build_msgids_from_jobs(self, jobs=None):
1647 1647 """Build a list of msg_ids from "jobs" """
1648 1648 if not jobs:
1649 1649 return []
1650 1650 msg_ids = []
1651 1651 if isinstance(jobs, string_types + (AsyncResult,)):
1652 1652 jobs = [jobs]
1653 1653 bad_ids = [obj for obj in jobs if not isinstance(obj, string_types + (AsyncResult,))]
1654 1654 if bad_ids:
1655 1655 raise TypeError("Invalid msg_id type %r, expected str or AsyncResult"%bad_ids[0])
1656 1656 for j in jobs:
1657 1657 if isinstance(j, AsyncResult):
1658 1658 msg_ids.extend(j.msg_ids)
1659 1659 else:
1660 1660 msg_ids.append(j)
1661 1661 return msg_ids
1662 1662
1663 1663 def purge_local_results(self, jobs=[], targets=[]):
1664 """Clears the client caches of results and frees such memory.
1664 """Clears the client caches of results and their metadata.
1665 1665
1666 1666 Individual results can be purged by msg_id, or the entire
1667 1667 history of specific targets can be purged.
1668 1668
1669 Use `purge_local_results('all')` to scrub everything from the Clients's db.
1670
1671 The client must have no outstanding tasks before purging the caches.
1672 Raises `AssertionError` if there are still outstanding tasks.
1669 Use `purge_local_results('all')` to scrub everything from the Clients's
1670 results and metadata caches.
1673 1671
1674 1672 After this call all `AsyncResults` are invalid and should be discarded.
1675 1673
1676 1674 If you must "reget" the results, you can still do so by using
1677 1675 `client.get_result(msg_id)` or `client.get_result(asyncresult)`. This will
1678 1676 redownload the results from the hub if they are still available
1679 1677 (i.e `client.purge_hub_results(...)` has not been called.
1680 1678
1681 1679 Parameters
1682 1680 ----------
1683 1681
1684 1682 jobs : str or list of str or AsyncResult objects
1685 1683 the msg_ids whose results should be purged.
1686 targets : int/str/list of ints/strs
1687 The targets, by int_id, whose entire results are to be purged.
1684 targets : int/list of ints
1685 The engines, by integer ID, whose entire result histories are to be purged.
1688 1686
1689 default : None
1690 """
1691 assert not self.outstanding, "Can't purge a client with outstanding tasks!"
1687 Raises
1688 ------
1689
1690 RuntimeError : if any of the tasks to be purged are still outstanding.
1692 1691
1692 """
1693 1693 if not targets and not jobs:
1694 1694 raise ValueError("Must specify at least one of `targets` and `jobs`")
1695 1695
1696 1696 if jobs == 'all':
1697 if self.outstanding:
1698 raise RuntimeError("Can't purge outstanding tasks: %s" % self.outstanding)
1697 1699 self.results.clear()
1698 1700 self.metadata.clear()
1699 return
1700 1701 else:
1701 msg_ids = []
1702 msg_ids.extend(self._build_msgids_from_target(targets))
1703 msg_ids.extend(self._build_msgids_from_jobs(jobs))
1702 msg_ids = set()
1703 msg_ids.update(self._build_msgids_from_target(targets))
1704 msg_ids.update(self._build_msgids_from_jobs(jobs))
1705 still_outstanding = self.outstanding.intersection(msg_ids)
1706 if still_outstanding:
1707 raise RuntimeError("Can't purge outstanding tasks: %s" % still_outstanding)
1704 1708 for mid in msg_ids:
1705 1709 self.results.pop(mid)
1706 1710 self.metadata.pop(mid)
1707 1711
1708 1712
1709 1713 @spin_first
1710 1714 def purge_hub_results(self, jobs=[], targets=[]):
1711 1715 """Tell the Hub to forget results.
1712 1716
1713 1717 Individual results can be purged by msg_id, or the entire
1714 1718 history of specific targets can be purged.
1715 1719
1716 1720 Use `purge_results('all')` to scrub everything from the Hub's db.
1717 1721
1718 1722 Parameters
1719 1723 ----------
1720 1724
1721 1725 jobs : str or list of str or AsyncResult objects
1722 1726 the msg_ids whose results should be forgotten.
1723 1727 targets : int/str/list of ints/strs
1724 1728 The targets, by int_id, whose entire history is to be purged.
1725 1729
1726 1730 default : None
1727 1731 """
1728 1732 if not targets and not jobs:
1729 1733 raise ValueError("Must specify at least one of `targets` and `jobs`")
1730 1734 if targets:
1731 1735 targets = self._build_targets(targets)[1]
1732 1736
1733 1737 # construct msg_ids from jobs
1734 1738 if jobs == 'all':
1735 1739 msg_ids = jobs
1736 1740 else:
1737 1741 msg_ids = self._build_msgids_from_jobs(jobs)
1738 1742
1739 1743 content = dict(engine_ids=targets, msg_ids=msg_ids)
1740 1744 self.session.send(self._query_socket, "purge_request", content=content)
1741 1745 idents, msg = self.session.recv(self._query_socket, 0)
1742 1746 if self.debug:
1743 1747 pprint(msg)
1744 1748 content = msg['content']
1745 1749 if content['status'] != 'ok':
1746 1750 raise self._unwrap_exception(content)
1747 1751
1748 1752 def purge_results(self, jobs=[], targets=[]):
1749 1753 """Clears the cached results from both the hub and the local client
1750 1754
1751 1755 Individual results can be purged by msg_id, or the entire
1752 1756 history of specific targets can be purged.
1753 1757
1754 1758 Use `purge_results('all')` to scrub every cached result from both the Hub's and
1755 1759 the Client's db.
1756 1760
1757 1761 Equivalent to calling both `purge_hub_results()` and `purge_client_results()` with
1758 1762 the same arguments.
1759 1763
1760 1764 Parameters
1761 1765 ----------
1762 1766
1763 1767 jobs : str or list of str or AsyncResult objects
1764 1768 the msg_ids whose results should be forgotten.
1765 1769 targets : int/str/list of ints/strs
1766 1770 The targets, by int_id, whose entire history is to be purged.
1767 1771
1768 1772 default : None
1769 1773 """
1770 1774 self.purge_local_results(jobs=jobs, targets=targets)
1771 1775 self.purge_hub_results(jobs=jobs, targets=targets)
1772 1776
1773 1777 def purge_everything(self):
1774 1778 """Clears all content from previous Tasks from both the hub and the local client
1775 1779
1776 1780 In addition to calling `purge_results("all")` it also deletes the history and
1777 1781 other bookkeeping lists.
1778 1782 """
1779 1783 self.purge_results("all")
1780 1784 self.history = []
1781 1785 self.session.digest_history.clear()
1782 1786
1783 1787 @spin_first
1784 1788 def hub_history(self):
1785 1789 """Get the Hub's history
1786 1790
1787 1791 Just like the Client, the Hub has a history, which is a list of msg_ids.
1788 1792 This will contain the history of all clients, and, depending on configuration,
1789 1793 may contain history across multiple cluster sessions.
1790 1794
1791 1795 Any msg_id returned here is a valid argument to `get_result`.
1792 1796
1793 1797 Returns
1794 1798 -------
1795 1799
1796 1800 msg_ids : list of strs
1797 1801 list of all msg_ids, ordered by task submission time.
1798 1802 """
1799 1803
1800 1804 self.session.send(self._query_socket, "history_request", content={})
1801 1805 idents, msg = self.session.recv(self._query_socket, 0)
1802 1806
1803 1807 if self.debug:
1804 1808 pprint(msg)
1805 1809 content = msg['content']
1806 1810 if content['status'] != 'ok':
1807 1811 raise self._unwrap_exception(content)
1808 1812 else:
1809 1813 return content['history']
1810 1814
1811 1815 @spin_first
1812 1816 def db_query(self, query, keys=None):
1813 1817 """Query the Hub's TaskRecord database
1814 1818
1815 1819 This will return a list of task record dicts that match `query`
1816 1820
1817 1821 Parameters
1818 1822 ----------
1819 1823
1820 1824 query : mongodb query dict
1821 1825 The search dict. See mongodb query docs for details.
1822 1826 keys : list of strs [optional]
1823 1827 The subset of keys to be returned. The default is to fetch everything but buffers.
1824 1828 'msg_id' will *always* be included.
1825 1829 """
1826 1830 if isinstance(keys, string_types):
1827 1831 keys = [keys]
1828 1832 content = dict(query=query, keys=keys)
1829 1833 self.session.send(self._query_socket, "db_request", content=content)
1830 1834 idents, msg = self.session.recv(self._query_socket, 0)
1831 1835 if self.debug:
1832 1836 pprint(msg)
1833 1837 content = msg['content']
1834 1838 if content['status'] != 'ok':
1835 1839 raise self._unwrap_exception(content)
1836 1840
1837 1841 records = content['records']
1838 1842
1839 1843 buffer_lens = content['buffer_lens']
1840 1844 result_buffer_lens = content['result_buffer_lens']
1841 1845 buffers = msg['buffers']
1842 1846 has_bufs = buffer_lens is not None
1843 1847 has_rbufs = result_buffer_lens is not None
1844 1848 for i,rec in enumerate(records):
1845 1849 # unpack datetime objects
1846 1850 for hkey in ('header', 'result_header'):
1847 1851 if hkey in rec:
1848 1852 rec[hkey] = extract_dates(rec[hkey])
1849 1853 for dtkey in ('submitted', 'started', 'completed', 'received'):
1850 1854 if dtkey in rec:
1851 1855 rec[dtkey] = parse_date(rec[dtkey])
1852 1856 # relink buffers
1853 1857 if has_bufs:
1854 1858 blen = buffer_lens[i]
1855 1859 rec['buffers'], buffers = buffers[:blen],buffers[blen:]
1856 1860 if has_rbufs:
1857 1861 blen = result_buffer_lens[i]
1858 1862 rec['result_buffers'], buffers = buffers[:blen],buffers[blen:]
1859 1863
1860 1864 return records
1861 1865
1862 1866 __all__ = [ 'Client' ]
@@ -1,522 +1,547 b''
1 1 """Tests for parallel client.py
2 2
3 3 Authors:
4 4
5 5 * Min RK
6 6 """
7 7
8 8 #-------------------------------------------------------------------------------
9 9 # Copyright (C) 2011 The IPython Development Team
10 10 #
11 11 # Distributed under the terms of the BSD License. The full license is in
12 12 # the file COPYING, distributed as part of this software.
13 13 #-------------------------------------------------------------------------------
14 14
15 15 #-------------------------------------------------------------------------------
16 16 # Imports
17 17 #-------------------------------------------------------------------------------
18 18
19 19 from __future__ import division
20 20
21 21 import time
22 22 from datetime import datetime
23 23 from tempfile import mktemp
24 24
25 25 import zmq
26 26
27 27 from IPython import parallel
28 28 from IPython.parallel.client import client as clientmod
29 29 from IPython.parallel import error
30 30 from IPython.parallel import AsyncResult, AsyncHubResult
31 31 from IPython.parallel import LoadBalancedView, DirectView
32 32
33 33 from .clienttest import ClusterTestCase, segfault, wait, add_engines
34 34
35 35 def setup():
36 36 add_engines(4, total=True)
37 37
38 38 class TestClient(ClusterTestCase):
39 39
40 40 def test_ids(self):
41 41 n = len(self.client.ids)
42 42 self.add_engines(2)
43 43 self.assertEqual(len(self.client.ids), n+2)
44 44
45 45 def test_view_indexing(self):
46 46 """test index access for views"""
47 47 self.minimum_engines(4)
48 48 targets = self.client._build_targets('all')[-1]
49 49 v = self.client[:]
50 50 self.assertEqual(v.targets, targets)
51 51 t = self.client.ids[2]
52 52 v = self.client[t]
53 53 self.assertTrue(isinstance(v, DirectView))
54 54 self.assertEqual(v.targets, t)
55 55 t = self.client.ids[2:4]
56 56 v = self.client[t]
57 57 self.assertTrue(isinstance(v, DirectView))
58 58 self.assertEqual(v.targets, t)
59 59 v = self.client[::2]
60 60 self.assertTrue(isinstance(v, DirectView))
61 61 self.assertEqual(v.targets, targets[::2])
62 62 v = self.client[1::3]
63 63 self.assertTrue(isinstance(v, DirectView))
64 64 self.assertEqual(v.targets, targets[1::3])
65 65 v = self.client[:-3]
66 66 self.assertTrue(isinstance(v, DirectView))
67 67 self.assertEqual(v.targets, targets[:-3])
68 68 v = self.client[-1]
69 69 self.assertTrue(isinstance(v, DirectView))
70 70 self.assertEqual(v.targets, targets[-1])
71 71 self.assertRaises(TypeError, lambda : self.client[None])
72 72
73 73 def test_lbview_targets(self):
74 74 """test load_balanced_view targets"""
75 75 v = self.client.load_balanced_view()
76 76 self.assertEqual(v.targets, None)
77 77 v = self.client.load_balanced_view(-1)
78 78 self.assertEqual(v.targets, [self.client.ids[-1]])
79 79 v = self.client.load_balanced_view('all')
80 80 self.assertEqual(v.targets, None)
81 81
82 82 def test_dview_targets(self):
83 83 """test direct_view targets"""
84 84 v = self.client.direct_view()
85 85 self.assertEqual(v.targets, 'all')
86 86 v = self.client.direct_view('all')
87 87 self.assertEqual(v.targets, 'all')
88 88 v = self.client.direct_view(-1)
89 89 self.assertEqual(v.targets, self.client.ids[-1])
90 90
91 91 def test_lazy_all_targets(self):
92 92 """test lazy evaluation of rc.direct_view('all')"""
93 93 v = self.client.direct_view()
94 94 self.assertEqual(v.targets, 'all')
95 95
96 96 def double(x):
97 97 return x*2
98 98 seq = list(range(100))
99 99 ref = [ double(x) for x in seq ]
100 100
101 101 # add some engines, which should be used
102 102 self.add_engines(1)
103 103 n1 = len(self.client.ids)
104 104
105 105 # simple apply
106 106 r = v.apply_sync(lambda : 1)
107 107 self.assertEqual(r, [1] * n1)
108 108
109 109 # map goes through remotefunction
110 110 r = v.map_sync(double, seq)
111 111 self.assertEqual(r, ref)
112 112
113 113 # add a couple more engines, and try again
114 114 self.add_engines(2)
115 115 n2 = len(self.client.ids)
116 116 self.assertNotEqual(n2, n1)
117 117
118 118 # apply
119 119 r = v.apply_sync(lambda : 1)
120 120 self.assertEqual(r, [1] * n2)
121 121
122 122 # map
123 123 r = v.map_sync(double, seq)
124 124 self.assertEqual(r, ref)
125 125
126 126 def test_targets(self):
127 127 """test various valid targets arguments"""
128 128 build = self.client._build_targets
129 129 ids = self.client.ids
130 130 idents,targets = build(None)
131 131 self.assertEqual(ids, targets)
132 132
133 133 def test_clear(self):
134 134 """test clear behavior"""
135 135 self.minimum_engines(2)
136 136 v = self.client[:]
137 137 v.block=True
138 138 v.push(dict(a=5))
139 139 v.pull('a')
140 140 id0 = self.client.ids[-1]
141 141 self.client.clear(targets=id0, block=True)
142 142 a = self.client[:-1].get('a')
143 143 self.assertRaisesRemote(NameError, self.client[id0].get, 'a')
144 144 self.client.clear(block=True)
145 145 for i in self.client.ids:
146 146 self.assertRaisesRemote(NameError, self.client[i].get, 'a')
147 147
148 148 def test_get_result(self):
149 149 """test getting results from the Hub."""
150 150 c = clientmod.Client(profile='iptest')
151 151 t = c.ids[-1]
152 152 ar = c[t].apply_async(wait, 1)
153 153 # give the monitor time to notice the message
154 154 time.sleep(.25)
155 155 ahr = self.client.get_result(ar.msg_ids[0])
156 156 self.assertTrue(isinstance(ahr, AsyncHubResult))
157 157 self.assertEqual(ahr.get(), ar.get())
158 158 ar2 = self.client.get_result(ar.msg_ids[0])
159 159 self.assertFalse(isinstance(ar2, AsyncHubResult))
160 160 c.close()
161 161
162 162 def test_get_execute_result(self):
163 163 """test getting execute results from the Hub."""
164 164 c = clientmod.Client(profile='iptest')
165 165 t = c.ids[-1]
166 166 cell = '\n'.join([
167 167 'import time',
168 168 'time.sleep(0.25)',
169 169 '5'
170 170 ])
171 171 ar = c[t].execute("import time; time.sleep(1)", silent=False)
172 172 # give the monitor time to notice the message
173 173 time.sleep(.25)
174 174 ahr = self.client.get_result(ar.msg_ids[0])
175 175 self.assertTrue(isinstance(ahr, AsyncHubResult))
176 176 self.assertEqual(ahr.get().pyout, ar.get().pyout)
177 177 ar2 = self.client.get_result(ar.msg_ids[0])
178 178 self.assertFalse(isinstance(ar2, AsyncHubResult))
179 179 c.close()
180 180
181 181 def test_ids_list(self):
182 182 """test client.ids"""
183 183 ids = self.client.ids
184 184 self.assertEqual(ids, self.client._ids)
185 185 self.assertFalse(ids is self.client._ids)
186 186 ids.remove(ids[-1])
187 187 self.assertNotEqual(ids, self.client._ids)
188 188
189 189 def test_queue_status(self):
190 190 ids = self.client.ids
191 191 id0 = ids[0]
192 192 qs = self.client.queue_status(targets=id0)
193 193 self.assertTrue(isinstance(qs, dict))
194 194 self.assertEqual(sorted(qs.keys()), ['completed', 'queue', 'tasks'])
195 195 allqs = self.client.queue_status()
196 196 self.assertTrue(isinstance(allqs, dict))
197 197 intkeys = list(allqs.keys())
198 198 intkeys.remove('unassigned')
199 199 self.assertEqual(sorted(intkeys), sorted(self.client.ids))
200 200 unassigned = allqs.pop('unassigned')
201 201 for eid,qs in allqs.items():
202 202 self.assertTrue(isinstance(qs, dict))
203 203 self.assertEqual(sorted(qs.keys()), ['completed', 'queue', 'tasks'])
204 204
205 205 def test_shutdown(self):
206 206 ids = self.client.ids
207 207 id0 = ids[0]
208 208 self.client.shutdown(id0, block=True)
209 209 while id0 in self.client.ids:
210 210 time.sleep(0.1)
211 211 self.client.spin()
212 212
213 213 self.assertRaises(IndexError, lambda : self.client[id0])
214 214
215 215 def test_result_status(self):
216 216 pass
217 217 # to be written
218 218
219 219 def test_db_query_dt(self):
220 220 """test db query by date"""
221 221 hist = self.client.hub_history()
222 222 middle = self.client.db_query({'msg_id' : hist[len(hist)//2]})[0]
223 223 tic = middle['submitted']
224 224 before = self.client.db_query({'submitted' : {'$lt' : tic}})
225 225 after = self.client.db_query({'submitted' : {'$gte' : tic}})
226 226 self.assertEqual(len(before)+len(after),len(hist))
227 227 for b in before:
228 228 self.assertTrue(b['submitted'] < tic)
229 229 for a in after:
230 230 self.assertTrue(a['submitted'] >= tic)
231 231 same = self.client.db_query({'submitted' : tic})
232 232 for s in same:
233 233 self.assertTrue(s['submitted'] == tic)
234 234
235 235 def test_db_query_keys(self):
236 236 """test extracting subset of record keys"""
237 237 found = self.client.db_query({'msg_id': {'$ne' : ''}},keys=['submitted', 'completed'])
238 238 for rec in found:
239 239 self.assertEqual(set(rec.keys()), set(['msg_id', 'submitted', 'completed']))
240 240
241 241 def test_db_query_default_keys(self):
242 242 """default db_query excludes buffers"""
243 243 found = self.client.db_query({'msg_id': {'$ne' : ''}})
244 244 for rec in found:
245 245 keys = set(rec.keys())
246 246 self.assertFalse('buffers' in keys, "'buffers' should not be in: %s" % keys)
247 247 self.assertFalse('result_buffers' in keys, "'result_buffers' should not be in: %s" % keys)
248 248
249 249 def test_db_query_msg_id(self):
250 250 """ensure msg_id is always in db queries"""
251 251 found = self.client.db_query({'msg_id': {'$ne' : ''}},keys=['submitted', 'completed'])
252 252 for rec in found:
253 253 self.assertTrue('msg_id' in rec.keys())
254 254 found = self.client.db_query({'msg_id': {'$ne' : ''}},keys=['submitted'])
255 255 for rec in found:
256 256 self.assertTrue('msg_id' in rec.keys())
257 257 found = self.client.db_query({'msg_id': {'$ne' : ''}},keys=['msg_id'])
258 258 for rec in found:
259 259 self.assertTrue('msg_id' in rec.keys())
260 260
261 261 def test_db_query_get_result(self):
262 262 """pop in db_query shouldn't pop from result itself"""
263 263 self.client[:].apply_sync(lambda : 1)
264 264 found = self.client.db_query({'msg_id': {'$ne' : ''}})
265 265 rc2 = clientmod.Client(profile='iptest')
266 266 # If this bug is not fixed, this call will hang:
267 267 ar = rc2.get_result(self.client.history[-1])
268 268 ar.wait(2)
269 269 self.assertTrue(ar.ready())
270 270 ar.get()
271 271 rc2.close()
272 272
273 273 def test_db_query_in(self):
274 274 """test db query with '$in','$nin' operators"""
275 275 hist = self.client.hub_history()
276 276 even = hist[::2]
277 277 odd = hist[1::2]
278 278 recs = self.client.db_query({ 'msg_id' : {'$in' : even}})
279 279 found = [ r['msg_id'] for r in recs ]
280 280 self.assertEqual(set(even), set(found))
281 281 recs = self.client.db_query({ 'msg_id' : {'$nin' : even}})
282 282 found = [ r['msg_id'] for r in recs ]
283 283 self.assertEqual(set(odd), set(found))
284 284
285 285 def test_hub_history(self):
286 286 hist = self.client.hub_history()
287 287 recs = self.client.db_query({ 'msg_id' : {"$ne":''}})
288 288 recdict = {}
289 289 for rec in recs:
290 290 recdict[rec['msg_id']] = rec
291 291
292 292 latest = datetime(1984,1,1)
293 293 for msg_id in hist:
294 294 rec = recdict[msg_id]
295 295 newt = rec['submitted']
296 296 self.assertTrue(newt >= latest)
297 297 latest = newt
298 298 ar = self.client[-1].apply_async(lambda : 1)
299 299 ar.get()
300 300 time.sleep(0.25)
301 301 self.assertEqual(self.client.hub_history()[-1:],ar.msg_ids)
302 302
303 303 def _wait_for_idle(self):
304 304 """wait for the cluster to become idle, according to the everyone."""
305 305 rc = self.client
306 306
307 307 # step 0. wait for local results
308 308 # this should be sufficient 99% of the time.
309 309 rc.wait(timeout=5)
310 310
311 311 # step 1. wait for all requests to be noticed
312 312 # timeout 5s, polling every 100ms
313 313 msg_ids = set(rc.history)
314 314 hub_hist = rc.hub_history()
315 315 for i in range(50):
316 316 if msg_ids.difference(hub_hist):
317 317 time.sleep(0.1)
318 318 hub_hist = rc.hub_history()
319 319 else:
320 320 break
321 321
322 322 self.assertEqual(len(msg_ids.difference(hub_hist)), 0)
323 323
324 324 # step 2. wait for all requests to be done
325 325 # timeout 5s, polling every 100ms
326 326 qs = rc.queue_status()
327 327 for i in range(50):
328 328 if qs['unassigned'] or any(qs[eid]['tasks'] + qs[eid]['queue'] for eid in qs if eid != 'unassigned'):
329 329 time.sleep(0.1)
330 330 qs = rc.queue_status()
331 331 else:
332 332 break
333 333
334 334 # ensure Hub up to date:
335 335 self.assertEqual(qs['unassigned'], 0)
336 336 for eid in rc.ids:
337 337 self.assertEqual(qs[eid]['tasks'], 0)
338 338 self.assertEqual(qs[eid]['queue'], 0)
339 339
340 340
341 341 def test_resubmit(self):
342 342 def f():
343 343 import random
344 344 return random.random()
345 345 v = self.client.load_balanced_view()
346 346 ar = v.apply_async(f)
347 347 r1 = ar.get(1)
348 348 # give the Hub a chance to notice:
349 349 self._wait_for_idle()
350 350 ahr = self.client.resubmit(ar.msg_ids)
351 351 r2 = ahr.get(1)
352 352 self.assertFalse(r1 == r2)
353 353
354 354 def test_resubmit_chain(self):
355 355 """resubmit resubmitted tasks"""
356 356 v = self.client.load_balanced_view()
357 357 ar = v.apply_async(lambda x: x, 'x'*1024)
358 358 ar.get()
359 359 self._wait_for_idle()
360 360 ars = [ar]
361 361
362 362 for i in range(10):
363 363 ar = ars[-1]
364 364 ar2 = self.client.resubmit(ar.msg_ids)
365 365
366 366 [ ar.get() for ar in ars ]
367 367
368 368 def test_resubmit_header(self):
369 369 """resubmit shouldn't clobber the whole header"""
370 370 def f():
371 371 import random
372 372 return random.random()
373 373 v = self.client.load_balanced_view()
374 374 v.retries = 1
375 375 ar = v.apply_async(f)
376 376 r1 = ar.get(1)
377 377 # give the Hub a chance to notice:
378 378 self._wait_for_idle()
379 379 ahr = self.client.resubmit(ar.msg_ids)
380 380 ahr.get(1)
381 381 time.sleep(0.5)
382 382 records = self.client.db_query({'msg_id': {'$in': ar.msg_ids + ahr.msg_ids}}, keys='header')
383 383 h1,h2 = [ r['header'] for r in records ]
384 384 for key in set(h1.keys()).union(set(h2.keys())):
385 385 if key in ('msg_id', 'date'):
386 386 self.assertNotEqual(h1[key], h2[key])
387 387 else:
388 388 self.assertEqual(h1[key], h2[key])
389 389
390 390 def test_resubmit_aborted(self):
391 391 def f():
392 392 import random
393 393 return random.random()
394 394 v = self.client.load_balanced_view()
395 395 # restrict to one engine, so we can put a sleep
396 396 # ahead of the task, so it will get aborted
397 397 eid = self.client.ids[-1]
398 398 v.targets = [eid]
399 399 sleep = v.apply_async(time.sleep, 0.5)
400 400 ar = v.apply_async(f)
401 401 ar.abort()
402 402 self.assertRaises(error.TaskAborted, ar.get)
403 403 # Give the Hub a chance to get up to date:
404 404 self._wait_for_idle()
405 405 ahr = self.client.resubmit(ar.msg_ids)
406 406 r2 = ahr.get(1)
407 407
408 408 def test_resubmit_inflight(self):
409 409 """resubmit of inflight task"""
410 410 v = self.client.load_balanced_view()
411 411 ar = v.apply_async(time.sleep,1)
412 412 # give the message a chance to arrive
413 413 time.sleep(0.2)
414 414 ahr = self.client.resubmit(ar.msg_ids)
415 415 ar.get(2)
416 416 ahr.get(2)
417 417
418 418 def test_resubmit_badkey(self):
419 419 """ensure KeyError on resubmit of nonexistant task"""
420 420 self.assertRaisesRemote(KeyError, self.client.resubmit, ['invalid'])
421 421
422 422 def test_purge_hub_results(self):
423 423 # ensure there are some tasks
424 424 for i in range(5):
425 425 self.client[:].apply_sync(lambda : 1)
426 426 # Wait for the Hub to realise the result is done:
427 427 # This prevents a race condition, where we
428 428 # might purge a result the Hub still thinks is pending.
429 429 self._wait_for_idle()
430 430 rc2 = clientmod.Client(profile='iptest')
431 431 hist = self.client.hub_history()
432 432 ahr = rc2.get_result([hist[-1]])
433 433 ahr.wait(10)
434 434 self.client.purge_hub_results(hist[-1])
435 435 newhist = self.client.hub_history()
436 436 self.assertEqual(len(newhist)+1,len(hist))
437 437 rc2.spin()
438 438 rc2.close()
439 439
440 440 def test_purge_local_results(self):
441 441 # ensure there are some tasks
442 442 res = []
443 443 for i in range(5):
444 444 res.append(self.client[:].apply_async(lambda : 1))
445 445 self._wait_for_idle()
446 446 self.client.wait(10) # wait for the results to come back
447 447 before = len(self.client.results)
448 448 self.assertEqual(len(self.client.metadata),before)
449 449 self.client.purge_local_results(res[-1])
450 450 self.assertEqual(len(self.client.results),before-len(res[-1]), msg="Not removed from results")
451 451 self.assertEqual(len(self.client.metadata),before-len(res[-1]), msg="Not removed from metadata")
452 452
453 def test_purge_local_results_outstanding(self):
454 v = self.client[-1]
455 ar = v.apply_async(lambda : 1)
456 msg_id = ar.msg_ids[0]
457 ar.get()
458 self._wait_for_idle()
459 ar2 = v.apply_async(time.sleep, 1)
460 self.assertIn(msg_id, self.client.results)
461 self.assertIn(msg_id, self.client.metadata)
462 self.client.purge_local_results(ar)
463 self.assertNotIn(msg_id, self.client.results)
464 self.assertNotIn(msg_id, self.client.metadata)
465 with self.assertRaises(RuntimeError):
466 self.client.purge_local_results(ar2)
467 ar2.get()
468 self.client.purge_local_results(ar2)
469
470 def test_purge_all_local_results_outstanding(self):
471 v = self.client[-1]
472 ar = v.apply_async(time.sleep, 1)
473 with self.assertRaises(RuntimeError):
474 self.client.purge_local_results('all')
475 ar.get()
476 self.client.purge_local_results('all')
477
453 478 def test_purge_all_hub_results(self):
454 479 self.client.purge_hub_results('all')
455 480 hist = self.client.hub_history()
456 481 self.assertEqual(len(hist), 0)
457 482
458 483 def test_purge_all_local_results(self):
459 484 self.client.purge_local_results('all')
460 485 self.assertEqual(len(self.client.results), 0, msg="Results not empty")
461 486 self.assertEqual(len(self.client.metadata), 0, msg="metadata not empty")
462 487
463 488 def test_purge_all_results(self):
464 489 # ensure there are some tasks
465 490 for i in range(5):
466 491 self.client[:].apply_sync(lambda : 1)
467 492 self.client.wait(10)
468 493 self._wait_for_idle()
469 494 self.client.purge_results('all')
470 495 self.assertEqual(len(self.client.results), 0, msg="Results not empty")
471 496 self.assertEqual(len(self.client.metadata), 0, msg="metadata not empty")
472 497 hist = self.client.hub_history()
473 498 self.assertEqual(len(hist), 0, msg="hub history not empty")
474 499
475 500 def test_purge_everything(self):
476 501 # ensure there are some tasks
477 502 for i in range(5):
478 503 self.client[:].apply_sync(lambda : 1)
479 504 self.client.wait(10)
480 505 self._wait_for_idle()
481 506 self.client.purge_everything()
482 507 # The client results
483 508 self.assertEqual(len(self.client.results), 0, msg="Results not empty")
484 509 self.assertEqual(len(self.client.metadata), 0, msg="metadata not empty")
485 510 # The client "bookkeeping"
486 511 self.assertEqual(len(self.client.session.digest_history), 0, msg="session digest not empty")
487 512 self.assertEqual(len(self.client.history), 0, msg="client history not empty")
488 513 # the hub results
489 514 hist = self.client.hub_history()
490 515 self.assertEqual(len(hist), 0, msg="hub history not empty")
491 516
492 517
493 518 def test_spin_thread(self):
494 519 self.client.spin_thread(0.01)
495 520 ar = self.client[-1].apply_async(lambda : 1)
496 521 time.sleep(0.1)
497 522 self.assertTrue(ar.wall_time < 0.1,
498 523 "spin should have kept wall_time < 0.1, but got %f" % ar.wall_time
499 524 )
500 525
501 526 def test_stop_spin_thread(self):
502 527 self.client.spin_thread(0.01)
503 528 self.client.stop_spin_thread()
504 529 ar = self.client[-1].apply_async(lambda : 1)
505 530 time.sleep(0.15)
506 531 self.assertTrue(ar.wall_time > 0.1,
507 532 "Shouldn't be spinning, but got wall_time=%f" % ar.wall_time
508 533 )
509 534
510 535 def test_activate(self):
511 536 ip = get_ipython()
512 537 magics = ip.magics_manager.magics
513 538 self.assertTrue('px' in magics['line'])
514 539 self.assertTrue('px' in magics['cell'])
515 540 v0 = self.client.activate(-1, '0')
516 541 self.assertTrue('px0' in magics['line'])
517 542 self.assertTrue('px0' in magics['cell'])
518 543 self.assertEqual(v0.targets, self.client.ids[-1])
519 544 v0 = self.client.activate('all', 'all')
520 545 self.assertTrue('pxall' in magics['line'])
521 546 self.assertTrue('pxall' in magics['cell'])
522 547 self.assertEqual(v0.targets, 'all')
General Comments 0
You need to be logged in to leave comments. Login now