##// END OF EJS Templates
cleanup parallel traits...
MinRK -
Show More
@@ -1,539 +1,539 b''
1 1 #!/usr/bin/env python
2 2 # encoding: utf-8
3 3 """
4 4 The IPython cluster directory
5 5 """
6 6
7 7 #-----------------------------------------------------------------------------
8 8 # Copyright (C) 2008-2009 The IPython Development Team
9 9 #
10 10 # Distributed under the terms of the BSD License. The full license is in
11 11 # the file COPYING, distributed as part of this software.
12 12 #-----------------------------------------------------------------------------
13 13
14 14 #-----------------------------------------------------------------------------
15 15 # Imports
16 16 #-----------------------------------------------------------------------------
17 17
18 18 from __future__ import with_statement
19 19
20 20 import os
21 21 import logging
22 22 import re
23 23 import shutil
24 24 import sys
25 25
26 26 from subprocess import Popen, PIPE
27 27
28 28 from IPython.config.loader import PyFileConfigLoader, Config
29 29 from IPython.config.configurable import Configurable
30 30 from IPython.config.application import Application
31 31 from IPython.core.crashhandler import CrashHandler
32 32 from IPython.core.newapplication import BaseIPythonApplication
33 33 from IPython.core import release
34 34 from IPython.utils.path import (
35 35 get_ipython_package_dir,
36 36 get_ipython_dir,
37 37 expand_path
38 38 )
39 from IPython.utils.traitlets import Unicode, Bool, CStr, Instance, Dict
39 from IPython.utils.traitlets import Unicode, Bool, Instance, Dict
40 40
41 41 #-----------------------------------------------------------------------------
42 42 # Module errors
43 43 #-----------------------------------------------------------------------------
44 44
45 45 class ClusterDirError(Exception):
46 46 pass
47 47
48 48
49 49 class PIDFileError(Exception):
50 50 pass
51 51
52 52
53 53 #-----------------------------------------------------------------------------
54 54 # Class for managing cluster directories
55 55 #-----------------------------------------------------------------------------
56 56
57 57 class ClusterDir(Configurable):
58 58 """An object to manage the cluster directory and its resources.
59 59
60 60 The cluster directory is used by :command:`ipengine`,
61 61 :command:`ipcontroller` and :command:`ipclsuter` to manage the
62 62 configuration, logging and security of these applications.
63 63
64 64 This object knows how to find, create and manage these directories. This
65 65 should be used by any code that want's to handle cluster directories.
66 66 """
67 67
68 68 security_dir_name = Unicode('security')
69 69 log_dir_name = Unicode('log')
70 70 pid_dir_name = Unicode('pid')
71 71 security_dir = Unicode(u'')
72 72 log_dir = Unicode(u'')
73 73 pid_dir = Unicode(u'')
74 74
75 75 auto_create = Bool(False,
76 76 help="""Whether to automatically create the ClusterDirectory if it does
77 77 not exist""")
78 78 overwrite = Bool(False,
79 79 help="""Whether to overwrite existing config files""")
80 80 location = Unicode(u'', config=True,
81 81 help="""Set the cluster dir. This overrides the logic used by the
82 82 `profile` option.""",
83 83 )
84 84 profile = Unicode(u'default', config=True,
85 85 help="""The string name of the profile to be used. This determines the name
86 86 of the cluster dir as: cluster_<profile>. The default profile is named
87 87 'default'. The cluster directory is resolve this way if the
88 88 `cluster_dir` option is not used."""
89 89 )
90 90
91 91 _location_isset = Bool(False) # flag for detecting multiply set location
92 92 _new_dir = Bool(False) # flag for whether a new dir was created
93 93
94 94 def __init__(self, **kwargs):
95 95 # make sure auto_create,overwrite are set *before* location
96 96 for name in ('auto_create', 'overwrite'):
97 97 v = kwargs.pop(name, None)
98 98 if v is not None:
99 99 setattr(self, name, v)
100 100 super(ClusterDir, self).__init__(**kwargs)
101 101 if not self.location:
102 102 self._profile_changed('profile', 'default', self.profile)
103 103
104 104 def _location_changed(self, name, old, new):
105 105 if self._location_isset:
106 106 raise RuntimeError("Cannot set ClusterDir more than once.")
107 107 self._location_isset = True
108 108 if not os.path.isdir(new):
109 109 if self.auto_create:# or self.config.ClusterDir.auto_create:
110 110 os.makedirs(new)
111 111 self._new_dir = True
112 112 else:
113 113 raise ClusterDirError('Directory not found: %s' % new)
114 114
115 115 # ensure config files exist:
116 116 self.copy_all_config_files(overwrite=self.overwrite)
117 117 self.security_dir = os.path.join(new, self.security_dir_name)
118 118 self.log_dir = os.path.join(new, self.log_dir_name)
119 119 self.pid_dir = os.path.join(new, self.pid_dir_name)
120 120 self.check_dirs()
121 121
122 122 def _profile_changed(self, name, old, new):
123 123 if self._location_isset:
124 124 raise RuntimeError("ClusterDir already set. Cannot set by profile.")
125 125 self.location = os.path.join(get_ipython_dir(), 'cluster_'+new)
126 126
127 127 def _log_dir_changed(self, name, old, new):
128 128 self.check_log_dir()
129 129
130 130 def check_log_dir(self):
131 131 if not os.path.isdir(self.log_dir):
132 132 os.mkdir(self.log_dir)
133 133
134 134 def _security_dir_changed(self, name, old, new):
135 135 self.check_security_dir()
136 136
137 137 def check_security_dir(self):
138 138 if not os.path.isdir(self.security_dir):
139 139 os.mkdir(self.security_dir, 0700)
140 140 os.chmod(self.security_dir, 0700)
141 141
142 142 def _pid_dir_changed(self, name, old, new):
143 143 self.check_pid_dir()
144 144
145 145 def check_pid_dir(self):
146 146 if not os.path.isdir(self.pid_dir):
147 147 os.mkdir(self.pid_dir, 0700)
148 148 os.chmod(self.pid_dir, 0700)
149 149
150 150 def check_dirs(self):
151 151 self.check_security_dir()
152 152 self.check_log_dir()
153 153 self.check_pid_dir()
154 154
155 155 def copy_config_file(self, config_file, path=None, overwrite=False):
156 156 """Copy a default config file into the active cluster directory.
157 157
158 158 Default configuration files are kept in :mod:`IPython.config.default`.
159 159 This function moves these from that location to the working cluster
160 160 directory.
161 161 """
162 162 if path is None:
163 163 import IPython.config.default
164 164 path = IPython.config.default.__file__.split(os.path.sep)[:-1]
165 165 path = os.path.sep.join(path)
166 166 src = os.path.join(path, config_file)
167 167 dst = os.path.join(self.location, config_file)
168 168 if not os.path.isfile(dst) or overwrite:
169 169 shutil.copy(src, dst)
170 170
171 171 def copy_all_config_files(self, path=None, overwrite=False):
172 172 """Copy all config files into the active cluster directory."""
173 173 for f in [u'ipcontroller_config.py', u'ipengine_config.py',
174 174 u'ipcluster_config.py']:
175 175 self.copy_config_file(f, path=path, overwrite=overwrite)
176 176
177 177 @classmethod
178 178 def create_cluster_dir(csl, cluster_dir):
179 179 """Create a new cluster directory given a full path.
180 180
181 181 Parameters
182 182 ----------
183 183 cluster_dir : str
184 184 The full path to the cluster directory. If it does exist, it will
185 185 be used. If not, it will be created.
186 186 """
187 187 return ClusterDir(location=cluster_dir)
188 188
189 189 @classmethod
190 190 def create_cluster_dir_by_profile(cls, path, profile=u'default'):
191 191 """Create a cluster dir by profile name and path.
192 192
193 193 Parameters
194 194 ----------
195 195 path : str
196 196 The path (directory) to put the cluster directory in.
197 197 profile : str
198 198 The name of the profile. The name of the cluster directory will
199 199 be "cluster_<profile>".
200 200 """
201 201 if not os.path.isdir(path):
202 202 raise ClusterDirError('Directory not found: %s' % path)
203 203 cluster_dir = os.path.join(path, u'cluster_' + profile)
204 204 return ClusterDir(location=cluster_dir)
205 205
206 206 @classmethod
207 207 def find_cluster_dir_by_profile(cls, ipython_dir, profile=u'default'):
208 208 """Find an existing cluster dir by profile name, return its ClusterDir.
209 209
210 210 This searches through a sequence of paths for a cluster dir. If it
211 211 is not found, a :class:`ClusterDirError` exception will be raised.
212 212
213 213 The search path algorithm is:
214 214 1. ``os.getcwd()``
215 215 2. ``ipython_dir``
216 216 3. The directories found in the ":" separated
217 217 :env:`IPCLUSTER_DIR_PATH` environment variable.
218 218
219 219 Parameters
220 220 ----------
221 221 ipython_dir : unicode or str
222 222 The IPython directory to use.
223 223 profile : unicode or str
224 224 The name of the profile. The name of the cluster directory
225 225 will be "cluster_<profile>".
226 226 """
227 227 dirname = u'cluster_' + profile
228 228 cluster_dir_paths = os.environ.get('IPCLUSTER_DIR_PATH','')
229 229 if cluster_dir_paths:
230 230 cluster_dir_paths = cluster_dir_paths.split(':')
231 231 else:
232 232 cluster_dir_paths = []
233 233 paths = [os.getcwd(), ipython_dir] + cluster_dir_paths
234 234 for p in paths:
235 235 cluster_dir = os.path.join(p, dirname)
236 236 if os.path.isdir(cluster_dir):
237 237 return ClusterDir(location=cluster_dir)
238 238 else:
239 239 raise ClusterDirError('Cluster directory not found in paths: %s' % dirname)
240 240
241 241 @classmethod
242 242 def find_cluster_dir(cls, cluster_dir):
243 243 """Find/create a cluster dir and return its ClusterDir.
244 244
245 245 This will create the cluster directory if it doesn't exist.
246 246
247 247 Parameters
248 248 ----------
249 249 cluster_dir : unicode or str
250 250 The path of the cluster directory. This is expanded using
251 251 :func:`IPython.utils.genutils.expand_path`.
252 252 """
253 253 cluster_dir = expand_path(cluster_dir)
254 254 if not os.path.isdir(cluster_dir):
255 255 raise ClusterDirError('Cluster directory not found: %s' % cluster_dir)
256 256 return ClusterDir(location=cluster_dir)
257 257
258 258
259 259 #-----------------------------------------------------------------------------
260 260 # Crash handler for this application
261 261 #-----------------------------------------------------------------------------
262 262
263 263
264 264 _message_template = """\
265 265 Oops, $self.app_name crashed. We do our best to make it stable, but...
266 266
267 267 A crash report was automatically generated with the following information:
268 268 - A verbatim copy of the crash traceback.
269 269 - Data on your current $self.app_name configuration.
270 270
271 271 It was left in the file named:
272 272 \t'$self.crash_report_fname'
273 273 If you can email this file to the developers, the information in it will help
274 274 them in understanding and correcting the problem.
275 275
276 276 You can mail it to: $self.contact_name at $self.contact_email
277 277 with the subject '$self.app_name Crash Report'.
278 278
279 279 If you want to do it now, the following command will work (under Unix):
280 280 mail -s '$self.app_name Crash Report' $self.contact_email < $self.crash_report_fname
281 281
282 282 To ensure accurate tracking of this issue, please file a report about it at:
283 283 $self.bug_tracker
284 284 """
285 285
286 286 class ClusterDirCrashHandler(CrashHandler):
287 287 """sys.excepthook for IPython itself, leaves a detailed report on disk."""
288 288
289 289 message_template = _message_template
290 290
291 291 def __init__(self, app):
292 292 contact_name = release.authors['Min'][0]
293 293 contact_email = release.authors['Min'][1]
294 294 bug_tracker = 'http://github.com/ipython/ipython/issues'
295 295 super(ClusterDirCrashHandler,self).__init__(
296 296 app, contact_name, contact_email, bug_tracker
297 297 )
298 298
299 299
300 300 #-----------------------------------------------------------------------------
301 301 # Main application
302 302 #-----------------------------------------------------------------------------
303 303 base_aliases = {
304 304 'profile' : "ClusterDir.profile",
305 305 'cluster_dir' : 'ClusterDir.location',
306 306 'auto_create' : 'ClusterDirApplication.auto_create',
307 307 'log_level' : 'ClusterApplication.log_level',
308 308 'work_dir' : 'ClusterApplication.work_dir',
309 309 'log_to_file' : 'ClusterApplication.log_to_file',
310 310 'clean_logs' : 'ClusterApplication.clean_logs',
311 311 'log_url' : 'ClusterApplication.log_url',
312 312 }
313 313
314 314 base_flags = {
315 315 'debug' : ( {"ClusterApplication" : {"log_level" : logging.DEBUG}}, "set loglevel to DEBUG"),
316 316 'quiet' : ( {"ClusterApplication" : {"log_level" : logging.CRITICAL}}, "set loglevel to CRITICAL (minimal output)"),
317 317 'log-to-file' : ( {"ClusterApplication" : {"log_to_file" : True}}, "redirect log output to a file"),
318 318 }
319 319 for k,v in base_flags.iteritems():
320 320 base_flags[k] = (Config(v[0]),v[1])
321 321
322 322 class ClusterApplication(BaseIPythonApplication):
323 323 """An application that puts everything into a cluster directory.
324 324
325 325 Instead of looking for things in the ipython_dir, this type of application
326 326 will use its own private directory called the "cluster directory"
327 327 for things like config files, log files, etc.
328 328
329 329 The cluster directory is resolved as follows:
330 330
331 331 * If the ``--cluster-dir`` option is given, it is used.
332 332 * If ``--cluster-dir`` is not given, the application directory is
333 333 resolve using the profile name as ``cluster_<profile>``. The search
334 334 path for this directory is then i) cwd if it is found there
335 335 and ii) in ipython_dir otherwise.
336 336
337 337 The config file for the application is to be put in the cluster
338 338 dir and named the value of the ``config_file_name`` class attribute.
339 339 """
340 340
341 341 crash_handler_class = ClusterDirCrashHandler
342 342 auto_create_cluster_dir = Bool(True, config=True,
343 343 help="whether to create the cluster_dir if it doesn't exist")
344 344 cluster_dir = Instance(ClusterDir)
345 345 classes = [ClusterDir]
346 346
347 347 def _log_level_default(self):
348 348 # temporarily override default_log_level to INFO
349 349 return logging.INFO
350 350
351 351 work_dir = Unicode(os.getcwdu(), config=True,
352 352 help='Set the working dir for the process.'
353 353 )
354 354 def _work_dir_changed(self, name, old, new):
355 355 self.work_dir = unicode(expand_path(new))
356 356
357 357 log_to_file = Bool(config=True,
358 358 help="whether to log to a file")
359 359
360 360 clean_logs = Bool(False, shortname='--clean-logs', config=True,
361 361 help="whether to cleanup old logfiles before starting")
362 362
363 log_url = CStr('', shortname='--log-url', config=True,
364 help="The ZMQ URL of the iplooger to aggregate logging.")
363 log_url = Unicode('', shortname='--log-url', config=True,
364 help="The ZMQ URL of the iplogger to aggregate logging.")
365 365
366 366 config_file = Unicode(u'', config=True,
367 367 help="""Path to ipcontroller configuration file. The default is to use
368 368 <appname>_config.py, as found by cluster-dir."""
369 369 )
370 370
371 371 loop = Instance('zmq.eventloop.ioloop.IOLoop')
372 372 def _loop_default(self):
373 373 from zmq.eventloop.ioloop import IOLoop
374 374 return IOLoop.instance()
375 375
376 376 aliases = Dict(base_aliases)
377 377 flags = Dict(base_flags)
378 378
379 379 def init_clusterdir(self):
380 380 """This resolves the cluster directory.
381 381
382 382 This tries to find the cluster directory and if successful, it will
383 383 have done:
384 384 * Sets ``self.cluster_dir_obj`` to the :class:`ClusterDir` object for
385 385 the application.
386 386 * Sets ``self.cluster_dir`` attribute of the application and config
387 387 objects.
388 388
389 389 The algorithm used for this is as follows:
390 390 1. Try ``Global.cluster_dir``.
391 391 2. Try using ``Global.profile``.
392 392 3. If both of these fail and ``self.auto_create_cluster_dir`` is
393 393 ``True``, then create the new cluster dir in the IPython directory.
394 394 4. If all fails, then raise :class:`ClusterDirError`.
395 395 """
396 396 try:
397 397 self.cluster_dir = ClusterDir(auto_create=self.auto_create_cluster_dir, config=self.config)
398 398 except ClusterDirError as e:
399 399 self.log.fatal("Error initializing cluster dir: %s"%e)
400 400 self.log.fatal("A cluster dir must be created before running this command.")
401 401 self.log.fatal("Do 'ipcluster create -h' or 'ipcluster list -h' for more "
402 402 "information about creating and listing cluster dirs."
403 403 )
404 404 self.exit(1)
405 405
406 406 if self.cluster_dir._new_dir:
407 407 self.log.info('Creating new cluster dir: %s' % \
408 408 self.cluster_dir.location)
409 409 else:
410 410 self.log.info('Using existing cluster dir: %s' % \
411 411 self.cluster_dir.location)
412 412
413 413 def initialize(self, argv=None):
414 414 """initialize the app"""
415 415 self.init_crash_handler()
416 416 self.parse_command_line(argv)
417 417 cl_config = self.config
418 418 self.init_clusterdir()
419 419 if self.config_file:
420 420 self.load_config_file(self.config_file)
421 421 else:
422 422 self.load_config_file(self.default_config_file_name, path=self.cluster_dir.location)
423 423 # command-line should *override* config file, but command-line is necessary
424 424 # to determine clusterdir, etc.
425 425 self.update_config(cl_config)
426 426 self.reinit_logging()
427 427
428 428 self.to_work_dir()
429 429
430 430 def to_work_dir(self):
431 431 wd = self.work_dir
432 432 if unicode(wd) != os.getcwdu():
433 433 os.chdir(wd)
434 434 self.log.info("Changing to working dir: %s" % wd)
435 435
436 436 def load_config_file(self, filename, path=None):
437 437 """Load a .py based config file by filename and path."""
438 438 # use config.application.Application.load_config
439 439 # instead of inflexible core.newapplication.BaseIPythonApplication.load_config
440 440 return Application.load_config_file(self, filename, path=path)
441 441 #
442 442 # def load_default_config_file(self):
443 443 # """Load a .py based config file by filename and path."""
444 444 # return BaseIPythonApplication.load_config_file(self)
445 445
446 446 # disable URL-logging
447 447 def reinit_logging(self):
448 448 # Remove old log files
449 449 log_dir = self.cluster_dir.log_dir
450 450 if self.clean_logs:
451 451 for f in os.listdir(log_dir):
452 452 if re.match(r'%s-\d+\.(log|err|out)'%self.name,f):
453 453 os.remove(os.path.join(log_dir, f))
454 454 if self.log_to_file:
455 455 # Start logging to the new log file
456 456 log_filename = self.name + u'-' + str(os.getpid()) + u'.log'
457 457 logfile = os.path.join(log_dir, log_filename)
458 458 open_log_file = open(logfile, 'w')
459 459 else:
460 460 open_log_file = None
461 461 if open_log_file is not None:
462 462 self.log.removeHandler(self._log_handler)
463 463 self._log_handler = logging.StreamHandler(open_log_file)
464 464 self._log_formatter = logging.Formatter("[%(name)s] %(message)s")
465 465 self._log_handler.setFormatter(self._log_formatter)
466 466 self.log.addHandler(self._log_handler)
467 467
468 468 def write_pid_file(self, overwrite=False):
469 469 """Create a .pid file in the pid_dir with my pid.
470 470
471 471 This must be called after pre_construct, which sets `self.pid_dir`.
472 472 This raises :exc:`PIDFileError` if the pid file exists already.
473 473 """
474 474 pid_file = os.path.join(self.cluster_dir.pid_dir, self.name + u'.pid')
475 475 if os.path.isfile(pid_file):
476 476 pid = self.get_pid_from_file()
477 477 if not overwrite:
478 478 raise PIDFileError(
479 479 'The pid file [%s] already exists. \nThis could mean that this '
480 480 'server is already running with [pid=%s].' % (pid_file, pid)
481 481 )
482 482 with open(pid_file, 'w') as f:
483 483 self.log.info("Creating pid file: %s" % pid_file)
484 484 f.write(repr(os.getpid())+'\n')
485 485
486 486 def remove_pid_file(self):
487 487 """Remove the pid file.
488 488
489 489 This should be called at shutdown by registering a callback with
490 490 :func:`reactor.addSystemEventTrigger`. This needs to return
491 491 ``None``.
492 492 """
493 493 pid_file = os.path.join(self.cluster_dir.pid_dir, self.name + u'.pid')
494 494 if os.path.isfile(pid_file):
495 495 try:
496 496 self.log.info("Removing pid file: %s" % pid_file)
497 497 os.remove(pid_file)
498 498 except:
499 499 self.log.warn("Error removing the pid file: %s" % pid_file)
500 500
501 501 def get_pid_from_file(self):
502 502 """Get the pid from the pid file.
503 503
504 504 If the pid file doesn't exist a :exc:`PIDFileError` is raised.
505 505 """
506 506 pid_file = os.path.join(self.cluster_dir.pid_dir, self.name + u'.pid')
507 507 if os.path.isfile(pid_file):
508 508 with open(pid_file, 'r') as f:
509 509 pid = int(f.read().strip())
510 510 return pid
511 511 else:
512 512 raise PIDFileError('pid file not found: %s' % pid_file)
513 513
514 514 def check_pid(self, pid):
515 515 if os.name == 'nt':
516 516 try:
517 517 import ctypes
518 518 # returns 0 if no such process (of ours) exists
519 519 # positive int otherwise
520 520 p = ctypes.windll.kernel32.OpenProcess(1,0,pid)
521 521 except Exception:
522 522 self.log.warn(
523 523 "Could not determine whether pid %i is running via `OpenProcess`. "
524 524 " Making the likely assumption that it is."%pid
525 525 )
526 526 return True
527 527 return bool(p)
528 528 else:
529 529 try:
530 530 p = Popen(['ps','x'], stdout=PIPE, stderr=PIPE)
531 531 output,_ = p.communicate()
532 532 except OSError:
533 533 self.log.warn(
534 534 "Could not determine whether pid %i is running via `ps x`. "
535 535 " Making the likely assumption that it is."%pid
536 536 )
537 537 return True
538 538 pids = map(int, re.findall(r'^\W*\d+', output, re.MULTILINE))
539 539 return pid in pids
@@ -1,537 +1,537 b''
1 1 #!/usr/bin/env python
2 2 # encoding: utf-8
3 3 """
4 4 The ipcluster application.
5 5 """
6 6
7 7 #-----------------------------------------------------------------------------
8 8 # Copyright (C) 2008-2009 The IPython Development Team
9 9 #
10 10 # Distributed under the terms of the BSD License. The full license is in
11 11 # the file COPYING, distributed as part of this software.
12 12 #-----------------------------------------------------------------------------
13 13
14 14 #-----------------------------------------------------------------------------
15 15 # Imports
16 16 #-----------------------------------------------------------------------------
17 17
18 18 import errno
19 19 import logging
20 20 import os
21 21 import re
22 22 import signal
23 23
24 24 from subprocess import check_call, CalledProcessError, PIPE
25 25 import zmq
26 26 from zmq.eventloop import ioloop
27 27
28 28 from IPython.config.application import Application, boolean_flag
29 29 from IPython.config.loader import Config
30 30 from IPython.core.newapplication import BaseIPythonApplication
31 31 from IPython.utils.importstring import import_item
32 from IPython.utils.traitlets import Int, CStr, CUnicode, Str, Bool, CFloat, Dict, List
32 from IPython.utils.traitlets import Int, Unicode, Bool, CFloat, Dict, List
33 33
34 34 from IPython.parallel.apps.clusterdir import (
35 35 ClusterApplication, ClusterDirError, ClusterDir,
36 36 PIDFileError,
37 37 base_flags, base_aliases
38 38 )
39 39
40 40
41 41 #-----------------------------------------------------------------------------
42 42 # Module level variables
43 43 #-----------------------------------------------------------------------------
44 44
45 45
46 46 default_config_file_name = u'ipcluster_config.py'
47 47
48 48
49 49 _description = """\
50 50 Start an IPython cluster for parallel computing.\n\n
51 51
52 52 An IPython cluster consists of 1 controller and 1 or more engines.
53 53 This command automates the startup of these processes using a wide
54 54 range of startup methods (SSH, local processes, PBS, mpiexec,
55 55 Windows HPC Server 2008). To start a cluster with 4 engines on your
56 56 local host simply do 'ipcluster start n=4'. For more complex usage
57 57 you will typically do 'ipcluster create profile=mycluster', then edit
58 58 configuration files, followed by 'ipcluster start profile=mycluster n=4'.
59 59 """
60 60
61 61
62 62 # Exit codes for ipcluster
63 63
64 64 # This will be the exit code if the ipcluster appears to be running because
65 65 # a .pid file exists
66 66 ALREADY_STARTED = 10
67 67
68 68
69 69 # This will be the exit code if ipcluster stop is run, but there is not .pid
70 70 # file to be found.
71 71 ALREADY_STOPPED = 11
72 72
73 73 # This will be the exit code if ipcluster engines is run, but there is not .pid
74 74 # file to be found.
75 75 NO_CLUSTER = 12
76 76
77 77
78 78 #-----------------------------------------------------------------------------
79 79 # Main application
80 80 #-----------------------------------------------------------------------------
81 81 start_help = """
82 82 Start an ipython cluster by its profile name or cluster
83 83 directory. Cluster directories contain configuration, log and
84 84 security related files and are named using the convention
85 85 'cluster_<profile>' and should be creating using the 'start'
86 86 subcommand of 'ipcluster'. If your cluster directory is in
87 87 the cwd or the ipython directory, you can simply refer to it
88 88 using its profile name, 'ipcluster start n=4 profile=<profile>`,
89 89 otherwise use the 'cluster_dir' option.
90 90 """
91 91 stop_help = """
92 92 Stop a running ipython cluster by its profile name or cluster
93 93 directory. Cluster directories are named using the convention
94 94 'cluster_<profile>'. If your cluster directory is in
95 95 the cwd or the ipython directory, you can simply refer to it
96 96 using its profile name, 'ipcluster stop profile=<profile>`, otherwise
97 97 use the 'cluster_dir' option.
98 98 """
99 99 engines_help = """
100 100 Start one or more engines to connect to an existing Cluster
101 101 by profile name or cluster directory.
102 102 Cluster directories contain configuration, log and
103 103 security related files and are named using the convention
104 104 'cluster_<profile>' and should be creating using the 'start'
105 105 subcommand of 'ipcluster'. If your cluster directory is in
106 106 the cwd or the ipython directory, you can simply refer to it
107 107 using its profile name, 'ipcluster engines n=4 profile=<profile>`,
108 108 otherwise use the 'cluster_dir' option.
109 109 """
110 110 create_help = """
111 111 Create an ipython cluster directory by its profile name or
112 112 cluster directory path. Cluster directories contain
113 113 configuration, log and security related files and are named
114 114 using the convention 'cluster_<profile>'. By default they are
115 115 located in your ipython directory. Once created, you will
116 116 probably need to edit the configuration files in the cluster
117 117 directory to configure your cluster. Most users will create a
118 118 cluster directory by profile name,
119 119 `ipcluster create profile=mycluster`, which will put the directory
120 120 in `<ipython_dir>/cluster_mycluster`.
121 121 """
122 122 list_help = """List all available clusters, by cluster directory, that can
123 123 be found in the current working directly or in the ipython
124 124 directory. Cluster directories are named using the convention
125 125 'cluster_<profile>'.
126 126 """
127 127
128 128
129 129 class IPClusterList(BaseIPythonApplication):
130 130 name = u'ipcluster-list'
131 131 description = list_help
132 132
133 133 # empty aliases
134 134 aliases=Dict()
135 135 flags = Dict(base_flags)
136 136
137 137 def _log_level_default(self):
138 138 return 20
139 139
140 140 def list_cluster_dirs(self):
141 141 # Find the search paths
142 142 cluster_dir_paths = os.environ.get('IPCLUSTER_DIR_PATH','')
143 143 if cluster_dir_paths:
144 144 cluster_dir_paths = cluster_dir_paths.split(':')
145 145 else:
146 146 cluster_dir_paths = []
147 147
148 148 ipython_dir = self.ipython_dir
149 149
150 150 paths = [os.getcwd(), ipython_dir] + cluster_dir_paths
151 151 paths = list(set(paths))
152 152
153 153 self.log.info('Searching for cluster dirs in paths: %r' % paths)
154 154 for path in paths:
155 155 files = os.listdir(path)
156 156 for f in files:
157 157 full_path = os.path.join(path, f)
158 158 if os.path.isdir(full_path) and f.startswith('cluster_'):
159 159 profile = full_path.split('_')[-1]
160 160 start_cmd = 'ipcluster start profile=%s n=4' % profile
161 161 print start_cmd + " ==> " + full_path
162 162
163 163 def start(self):
164 164 self.list_cluster_dirs()
165 165
166 166 create_flags = {}
167 167 create_flags.update(base_flags)
168 168 create_flags.update(boolean_flag('reset', 'IPClusterCreate.reset',
169 169 "reset config files to defaults", "leave existing config files"))
170 170
171 171 class IPClusterCreate(ClusterApplication):
172 172 name = u'ipcluster'
173 173 description = create_help
174 174 auto_create_cluster_dir = Bool(True,
175 175 help="whether to create the cluster_dir if it doesn't exist")
176 176 default_config_file_name = default_config_file_name
177 177
178 178 reset = Bool(False, config=True,
179 179 help="Whether to reset config files as part of 'create'."
180 180 )
181 181
182 182 flags = Dict(create_flags)
183 183
184 184 aliases = Dict(dict(profile='ClusterDir.profile'))
185 185
186 186 classes = [ClusterDir]
187 187
188 188 def init_clusterdir(self):
189 189 super(IPClusterCreate, self).init_clusterdir()
190 190 self.log.info('Copying default config files to cluster directory '
191 191 '[overwrite=%r]' % (self.reset,))
192 192 self.cluster_dir.copy_all_config_files(overwrite=self.reset)
193 193
194 194 def initialize(self, argv=None):
195 195 self.parse_command_line(argv)
196 196 self.init_clusterdir()
197 197
198 198 stop_aliases = dict(
199 199 signal='IPClusterStop.signal',
200 200 profile='ClusterDir.profile',
201 201 cluster_dir='ClusterDir.location',
202 202 )
203 203
204 204 class IPClusterStop(ClusterApplication):
205 205 name = u'ipcluster'
206 206 description = stop_help
207 207 auto_create_cluster_dir = Bool(False)
208 208 default_config_file_name = default_config_file_name
209 209
210 210 signal = Int(signal.SIGINT, config=True,
211 211 help="signal to use for stopping processes.")
212 212
213 213 aliases = Dict(stop_aliases)
214 214
215 215 def init_clusterdir(self):
216 216 try:
217 217 super(IPClusterStop, self).init_clusterdir()
218 218 except ClusterDirError as e:
219 219 self.log.fatal("Failed ClusterDir init: %s"%e)
220 220 self.exit(1)
221 221
222 222 def start(self):
223 223 """Start the app for the stop subcommand."""
224 224 try:
225 225 pid = self.get_pid_from_file()
226 226 except PIDFileError:
227 227 self.log.critical(
228 228 'Could not read pid file, cluster is probably not running.'
229 229 )
230 230 # Here I exit with a unusual exit status that other processes
231 231 # can watch for to learn how I existed.
232 232 self.remove_pid_file()
233 233 self.exit(ALREADY_STOPPED)
234 234
235 235 if not self.check_pid(pid):
236 236 self.log.critical(
237 237 'Cluster [pid=%r] is not running.' % pid
238 238 )
239 239 self.remove_pid_file()
240 240 # Here I exit with a unusual exit status that other processes
241 241 # can watch for to learn how I existed.
242 242 self.exit(ALREADY_STOPPED)
243 243
244 244 elif os.name=='posix':
245 245 sig = self.signal
246 246 self.log.info(
247 247 "Stopping cluster [pid=%r] with [signal=%r]" % (pid, sig)
248 248 )
249 249 try:
250 250 os.kill(pid, sig)
251 251 except OSError:
252 252 self.log.error("Stopping cluster failed, assuming already dead.",
253 253 exc_info=True)
254 254 self.remove_pid_file()
255 255 elif os.name=='nt':
256 256 try:
257 257 # kill the whole tree
258 258 p = check_call(['taskkill', '-pid', str(pid), '-t', '-f'], stdout=PIPE,stderr=PIPE)
259 259 except (CalledProcessError, OSError):
260 260 self.log.error("Stopping cluster failed, assuming already dead.",
261 261 exc_info=True)
262 262 self.remove_pid_file()
263 263
264 264 engine_aliases = {}
265 265 engine_aliases.update(base_aliases)
266 266 engine_aliases.update(dict(
267 267 n='IPClusterEngines.n',
268 268 elauncher = 'IPClusterEngines.engine_launcher_class',
269 269 ))
270 270 class IPClusterEngines(ClusterApplication):
271 271
272 272 name = u'ipcluster'
273 273 description = engines_help
274 274 usage = None
275 275 default_config_file_name = default_config_file_name
276 276 default_log_level = logging.INFO
277 277 auto_create_cluster_dir = Bool(False)
278 278 classes = List()
279 279 def _classes_default(self):
280 280 from IPython.parallel.apps import launcher
281 281 launchers = launcher.all_launchers
282 282 eslaunchers = [ l for l in launchers if 'EngineSet' in l.__name__]
283 283 return [ClusterDir]+eslaunchers
284 284
285 285 n = Int(2, config=True,
286 286 help="The number of engines to start.")
287 287
288 engine_launcher_class = Str('LocalEngineSetLauncher',
288 engine_launcher_class = Unicode('LocalEngineSetLauncher',
289 289 config=True,
290 290 help="The class for launching a set of Engines."
291 291 )
292 292 daemonize = Bool(False, config=True,
293 293 help='Daemonize the ipcluster program. This implies --log-to-file')
294 294
295 295 def _daemonize_changed(self, name, old, new):
296 296 if new:
297 297 self.log_to_file = True
298 298
299 299 aliases = Dict(engine_aliases)
300 300 # flags = Dict(flags)
301 301 _stopping = False
302 302
303 303 def initialize(self, argv=None):
304 304 super(IPClusterEngines, self).initialize(argv)
305 305 self.init_signal()
306 306 self.init_launchers()
307 307
308 308 def init_launchers(self):
309 309 self.engine_launcher = self.build_launcher(self.engine_launcher_class)
310 310 self.engine_launcher.on_stop(lambda r: self.loop.stop())
311 311
312 312 def init_signal(self):
313 313 # Setup signals
314 314 signal.signal(signal.SIGINT, self.sigint_handler)
315 315
316 316 def build_launcher(self, clsname):
317 317 """import and instantiate a Launcher based on importstring"""
318 318 if '.' not in clsname:
319 319 # not a module, presume it's the raw name in apps.launcher
320 320 clsname = 'IPython.parallel.apps.launcher.'+clsname
321 321 # print repr(clsname)
322 322 klass = import_item(clsname)
323 323
324 324 launcher = klass(
325 325 work_dir=self.cluster_dir.location, config=self.config, logname=self.log.name
326 326 )
327 327 return launcher
328 328
329 329 def start_engines(self):
330 330 self.log.info("Starting %i engines"%self.n)
331 331 self.engine_launcher.start(
332 332 self.n,
333 333 cluster_dir=self.cluster_dir.location
334 334 )
335 335
336 336 def stop_engines(self):
337 337 self.log.info("Stopping Engines...")
338 338 if self.engine_launcher.running:
339 339 d = self.engine_launcher.stop()
340 340 return d
341 341 else:
342 342 return None
343 343
344 344 def stop_launchers(self, r=None):
345 345 if not self._stopping:
346 346 self._stopping = True
347 347 self.log.error("IPython cluster: stopping")
348 348 self.stop_engines()
349 349 # Wait a few seconds to let things shut down.
350 350 dc = ioloop.DelayedCallback(self.loop.stop, 4000, self.loop)
351 351 dc.start()
352 352
353 353 def sigint_handler(self, signum, frame):
354 354 self.log.debug("SIGINT received, stopping launchers...")
355 355 self.stop_launchers()
356 356
357 357 def start_logging(self):
358 358 # Remove old log files of the controller and engine
359 359 if self.clean_logs:
360 360 log_dir = self.cluster_dir.log_dir
361 361 for f in os.listdir(log_dir):
362 362 if re.match(r'ip(engine|controller)z-\d+\.(log|err|out)',f):
363 363 os.remove(os.path.join(log_dir, f))
364 364 # This will remove old log files for ipcluster itself
365 365 # super(IPClusterApp, self).start_logging()
366 366
367 367 def start(self):
368 368 """Start the app for the engines subcommand."""
369 369 self.log.info("IPython cluster: started")
370 370 # First see if the cluster is already running
371 371
372 372 # Now log and daemonize
373 373 self.log.info(
374 374 'Starting engines with [daemon=%r]' % self.daemonize
375 375 )
376 376 # TODO: Get daemonize working on Windows or as a Windows Server.
377 377 if self.daemonize:
378 378 if os.name=='posix':
379 379 from twisted.scripts._twistd_unix import daemonize
380 380 daemonize()
381 381
382 382 dc = ioloop.DelayedCallback(self.start_engines, 0, self.loop)
383 383 dc.start()
384 384 # Now write the new pid file AFTER our new forked pid is active.
385 385 # self.write_pid_file()
386 386 try:
387 387 self.loop.start()
388 388 except KeyboardInterrupt:
389 389 pass
390 390 except zmq.ZMQError as e:
391 391 if e.errno == errno.EINTR:
392 392 pass
393 393 else:
394 394 raise
395 395
396 396 start_aliases = {}
397 397 start_aliases.update(engine_aliases)
398 398 start_aliases.update(dict(
399 399 delay='IPClusterStart.delay',
400 400 clean_logs='IPClusterStart.clean_logs',
401 401 ))
402 402
403 403 class IPClusterStart(IPClusterEngines):
404 404
405 405 name = u'ipcluster'
406 406 description = start_help
407 407 usage = None
408 408 default_config_file_name = default_config_file_name
409 409 default_log_level = logging.INFO
410 410 auto_create_cluster_dir = Bool(True, config=True,
411 411 help="whether to create the cluster_dir if it doesn't exist")
412 412 classes = List()
413 413 def _classes_default(self,):
414 414 from IPython.parallel.apps import launcher
415 415 return [ClusterDir]+launcher.all_launchers
416 416
417 417 clean_logs = Bool(True, config=True,
418 418 help="whether to cleanup old logs before starting")
419 419
420 420 delay = CFloat(1., config=True,
421 421 help="delay (in s) between starting the controller and the engines")
422 422
423 controller_launcher_class = Str('LocalControllerLauncher',
423 controller_launcher_class = Unicode('LocalControllerLauncher',
424 424 config=True,
425 425 help="The class for launching a Controller."
426 426 )
427 427 reset = Bool(False, config=True,
428 428 help="Whether to reset config files as part of '--create'."
429 429 )
430 430
431 431 # flags = Dict(flags)
432 432 aliases = Dict(start_aliases)
433 433
434 434 def init_launchers(self):
435 435 self.controller_launcher = self.build_launcher(self.controller_launcher_class)
436 436 self.engine_launcher = self.build_launcher(self.engine_launcher_class)
437 437 self.controller_launcher.on_stop(self.stop_launchers)
438 438
439 439 def start_controller(self):
440 440 self.controller_launcher.start(
441 441 cluster_dir=self.cluster_dir.location
442 442 )
443 443
444 444 def stop_controller(self):
445 445 # self.log.info("In stop_controller")
446 446 if self.controller_launcher and self.controller_launcher.running:
447 447 return self.controller_launcher.stop()
448 448
449 449 def stop_launchers(self, r=None):
450 450 if not self._stopping:
451 451 self.stop_controller()
452 452 super(IPClusterStart, self).stop_launchers()
453 453
454 454 def start(self):
455 455 """Start the app for the start subcommand."""
456 456 # First see if the cluster is already running
457 457 try:
458 458 pid = self.get_pid_from_file()
459 459 except PIDFileError:
460 460 pass
461 461 else:
462 462 if self.check_pid(pid):
463 463 self.log.critical(
464 464 'Cluster is already running with [pid=%s]. '
465 465 'use "ipcluster stop" to stop the cluster.' % pid
466 466 )
467 467 # Here I exit with a unusual exit status that other processes
468 468 # can watch for to learn how I existed.
469 469 self.exit(ALREADY_STARTED)
470 470 else:
471 471 self.remove_pid_file()
472 472
473 473
474 474 # Now log and daemonize
475 475 self.log.info(
476 476 'Starting ipcluster with [daemon=%r]' % self.daemonize
477 477 )
478 478 # TODO: Get daemonize working on Windows or as a Windows Server.
479 479 if self.daemonize:
480 480 if os.name=='posix':
481 481 from twisted.scripts._twistd_unix import daemonize
482 482 daemonize()
483 483
484 484 dc = ioloop.DelayedCallback(self.start_controller, 0, self.loop)
485 485 dc.start()
486 486 dc = ioloop.DelayedCallback(self.start_engines, 1000*self.delay, self.loop)
487 487 dc.start()
488 488 # Now write the new pid file AFTER our new forked pid is active.
489 489 self.write_pid_file()
490 490 try:
491 491 self.loop.start()
492 492 except KeyboardInterrupt:
493 493 pass
494 494 except zmq.ZMQError as e:
495 495 if e.errno == errno.EINTR:
496 496 pass
497 497 else:
498 498 raise
499 499 finally:
500 500 self.remove_pid_file()
501 501
502 502 base='IPython.parallel.apps.ipclusterapp.IPCluster'
503 503
504 504 class IPClusterApp(Application):
505 505 name = u'ipcluster'
506 506 description = _description
507 507
508 508 subcommands = {'create' : (base+'Create', create_help),
509 509 'list' : (base+'List', list_help),
510 510 'start' : (base+'Start', start_help),
511 511 'stop' : (base+'Stop', stop_help),
512 512 'engines' : (base+'Engines', engines_help),
513 513 }
514 514
515 515 # no aliases or flags for parent App
516 516 aliases = Dict()
517 517 flags = Dict()
518 518
519 519 def start(self):
520 520 if self.subapp is None:
521 521 print "No subcommand specified! Must specify one of: %s"%(self.subcommands.keys())
522 522 print
523 523 self.print_subcommands()
524 524 self.exit(1)
525 525 else:
526 526 return self.subapp.start()
527 527
528 528 def launch_new_instance():
529 529 """Create and run the IPython cluster."""
530 530 app = IPClusterApp()
531 531 app.initialize()
532 532 app.start()
533 533
534 534
535 535 if __name__ == '__main__':
536 536 launch_new_instance()
537 537
@@ -1,401 +1,401 b''
1 1 #!/usr/bin/env python
2 2 # encoding: utf-8
3 3 """
4 4 The IPython controller application.
5 5 """
6 6
7 7 #-----------------------------------------------------------------------------
8 8 # Copyright (C) 2008-2009 The IPython Development Team
9 9 #
10 10 # Distributed under the terms of the BSD License. The full license is in
11 11 # the file COPYING, distributed as part of this software.
12 12 #-----------------------------------------------------------------------------
13 13
14 14 #-----------------------------------------------------------------------------
15 15 # Imports
16 16 #-----------------------------------------------------------------------------
17 17
18 18 from __future__ import with_statement
19 19
20 20 import copy
21 21 import os
22 22 import logging
23 23 import socket
24 24 import stat
25 25 import sys
26 26 import uuid
27 27
28 28 from multiprocessing import Process
29 29
30 30 import zmq
31 31 from zmq.devices import ProcessMonitoredQueue
32 32 from zmq.log.handlers import PUBHandler
33 33 from zmq.utils import jsonapi as json
34 34
35 35 from IPython.config.loader import Config
36 36
37 37 from IPython.parallel import factory
38 38
39 39 from IPython.parallel.apps.clusterdir import (
40 40 ClusterDir,
41 41 ClusterApplication,
42 42 base_flags
43 43 # ClusterDirConfigLoader
44 44 )
45 45 from IPython.utils.importstring import import_item
46 from IPython.utils.traitlets import Instance, Unicode, Bool, List, CStr, Dict
46 from IPython.utils.traitlets import Instance, Unicode, Bool, List, Dict
47 47
48 48 # from IPython.parallel.controller.controller import ControllerFactory
49 49 from IPython.parallel.streamsession import StreamSession
50 50 from IPython.parallel.controller.heartmonitor import HeartMonitor
51 51 from IPython.parallel.controller.hub import Hub, HubFactory
52 52 from IPython.parallel.controller.scheduler import TaskScheduler,launch_scheduler
53 53 from IPython.parallel.controller.sqlitedb import SQLiteDB
54 54
55 55 from IPython.parallel.util import signal_children,disambiguate_ip_address, split_url
56 56
57 57 # conditional import of MongoDB backend class
58 58
59 59 try:
60 60 from IPython.parallel.controller.mongodb import MongoDB
61 61 except ImportError:
62 62 maybe_mongo = []
63 63 else:
64 64 maybe_mongo = [MongoDB]
65 65
66 66
67 67 #-----------------------------------------------------------------------------
68 68 # Module level variables
69 69 #-----------------------------------------------------------------------------
70 70
71 71
72 72 #: The default config file name for this application
73 73 default_config_file_name = u'ipcontroller_config.py'
74 74
75 75
76 76 _description = """Start the IPython controller for parallel computing.
77 77
78 78 The IPython controller provides a gateway between the IPython engines and
79 79 clients. The controller needs to be started before the engines and can be
80 80 configured using command line options or using a cluster directory. Cluster
81 81 directories contain config, log and security files and are usually located in
82 82 your ipython directory and named as "cluster_<profile>". See the --profile
83 83 and --cluster-dir options for details.
84 84 """
85 85
86 86
87 87
88 88
89 89 #-----------------------------------------------------------------------------
90 90 # The main application
91 91 #-----------------------------------------------------------------------------
92 92 flags = {}
93 93 flags.update(base_flags)
94 94 flags.update({
95 95 'usethreads' : ( {'IPControllerApp' : {'usethreads' : True}},
96 96 'Use threads instead of processes for the schedulers'),
97 97 'sqlitedb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.sqlitedb.SQLiteDB'}},
98 98 'use the SQLiteDB backend'),
99 99 'mongodb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.mongodb.MongoDB'}},
100 100 'use the MongoDB backend'),
101 101 'dictdb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.dictdb.DictDB'}},
102 102 'use the in-memory DictDB backend'),
103 103 })
104 104
105 105 flags.update()
106 106
107 107 class IPControllerApp(ClusterApplication):
108 108
109 109 name = u'ipcontroller'
110 110 description = _description
111 111 # command_line_loader = IPControllerAppConfigLoader
112 112 default_config_file_name = default_config_file_name
113 113 classes = [ClusterDir, StreamSession, HubFactory, TaskScheduler, HeartMonitor, SQLiteDB] + maybe_mongo
114 114
115 115 auto_create_cluster_dir = Bool(True, config=True,
116 116 help="Whether to create cluster_dir if it exists.")
117 117 reuse_files = Bool(False, config=True,
118 118 help='Whether to reuse existing json connection files [default: False]'
119 119 )
120 120 secure = Bool(True, config=True,
121 121 help='Whether to use exec_keys for extra authentication [default: True]'
122 122 )
123 123 ssh_server = Unicode(u'', config=True,
124 124 help="""ssh url for clients to use when connecting to the Controller
125 125 processes. It should be of the form: [user@]server[:port]. The
126 126 Controller\'s listening addresses must be accessible from the ssh server""",
127 127 )
128 128 location = Unicode(u'', config=True,
129 129 help="""The external IP or domain name of the Controller, used for disambiguating
130 130 engine and client connections.""",
131 131 )
132 132 import_statements = List([], config=True,
133 133 help="import statements to be run at startup. Necessary in some environments"
134 134 )
135 135
136 136 usethreads = Bool(False, config=True,
137 137 help='Use threads instead of processes for the schedulers',
138 138 )
139 139
140 140 # internal
141 141 children = List()
142 mq_class = CStr('zmq.devices.ProcessMonitoredQueue')
142 mq_class = Unicode('zmq.devices.ProcessMonitoredQueue')
143 143
144 144 def _usethreads_changed(self, name, old, new):
145 145 self.mq_class = 'zmq.devices.%sMonitoredQueue'%('Thread' if new else 'Process')
146 146
147 147 aliases = Dict(dict(
148 148 config = 'IPControllerApp.config_file',
149 149 # file = 'IPControllerApp.url_file',
150 150 log_level = 'IPControllerApp.log_level',
151 151 reuse_files = 'IPControllerApp.reuse_files',
152 152 secure = 'IPControllerApp.secure',
153 153 ssh = 'IPControllerApp.ssh_server',
154 154 usethreads = 'IPControllerApp.usethreads',
155 155 import_statements = 'IPControllerApp.import_statements',
156 156 location = 'IPControllerApp.location',
157 157
158 158 ident = 'StreamSession.session',
159 159 user = 'StreamSession.username',
160 160 exec_key = 'StreamSession.keyfile',
161 161
162 162 url = 'HubFactory.url',
163 163 ip = 'HubFactory.ip',
164 164 transport = 'HubFactory.transport',
165 165 port = 'HubFactory.regport',
166 166
167 167 ping = 'HeartMonitor.period',
168 168
169 169 scheme = 'TaskScheduler.scheme_name',
170 170 hwm = 'TaskScheduler.hwm',
171 171
172 172
173 173 profile = "ClusterDir.profile",
174 174 cluster_dir = 'ClusterDir.location',
175 175
176 176 ))
177 177 flags = Dict(flags)
178 178
179 179
180 180 def save_connection_dict(self, fname, cdict):
181 181 """save a connection dict to json file."""
182 182 c = self.config
183 183 url = cdict['url']
184 184 location = cdict['location']
185 185 if not location:
186 186 try:
187 187 proto,ip,port = split_url(url)
188 188 except AssertionError:
189 189 pass
190 190 else:
191 191 location = socket.gethostbyname_ex(socket.gethostname())[2][-1]
192 192 cdict['location'] = location
193 193 fname = os.path.join(self.cluster_dir.security_dir, fname)
194 194 with open(fname, 'w') as f:
195 195 f.write(json.dumps(cdict, indent=2))
196 196 os.chmod(fname, stat.S_IRUSR|stat.S_IWUSR)
197 197
198 198 def load_config_from_json(self):
199 199 """load config from existing json connector files."""
200 200 c = self.config
201 201 # load from engine config
202 202 with open(os.path.join(self.cluster_dir.security_dir, 'ipcontroller-engine.json')) as f:
203 203 cfg = json.loads(f.read())
204 204 key = c.StreamSession.key = cfg['exec_key']
205 205 xport,addr = cfg['url'].split('://')
206 206 c.HubFactory.engine_transport = xport
207 207 ip,ports = addr.split(':')
208 208 c.HubFactory.engine_ip = ip
209 209 c.HubFactory.regport = int(ports)
210 210 self.location = cfg['location']
211 211
212 212 # load client config
213 213 with open(os.path.join(self.cluster_dir.security_dir, 'ipcontroller-client.json')) as f:
214 214 cfg = json.loads(f.read())
215 215 assert key == cfg['exec_key'], "exec_key mismatch between engine and client keys"
216 216 xport,addr = cfg['url'].split('://')
217 217 c.HubFactory.client_transport = xport
218 218 ip,ports = addr.split(':')
219 219 c.HubFactory.client_ip = ip
220 220 self.ssh_server = cfg['ssh']
221 221 assert int(ports) == c.HubFactory.regport, "regport mismatch"
222 222
223 223 def init_hub(self):
224 224 # This is the working dir by now.
225 225 sys.path.insert(0, '')
226 226 c = self.config
227 227
228 228 self.do_import_statements()
229 229 reusing = self.reuse_files
230 230 if reusing:
231 231 try:
232 232 self.load_config_from_json()
233 233 except (AssertionError,IOError):
234 234 reusing=False
235 235 # check again, because reusing may have failed:
236 236 if reusing:
237 237 pass
238 238 elif self.secure:
239 239 key = str(uuid.uuid4())
240 240 # keyfile = os.path.join(self.cluster_dir.security_dir, self.exec_key)
241 241 # with open(keyfile, 'w') as f:
242 242 # f.write(key)
243 243 # os.chmod(keyfile, stat.S_IRUSR|stat.S_IWUSR)
244 244 c.StreamSession.key = key
245 245 else:
246 246 key = c.StreamSession.key = ''
247 247
248 248 try:
249 249 self.factory = HubFactory(config=c, log=self.log)
250 250 # self.start_logging()
251 251 self.factory.init_hub()
252 252 except:
253 253 self.log.error("Couldn't construct the Controller", exc_info=True)
254 254 self.exit(1)
255 255
256 256 if not reusing:
257 257 # save to new json config files
258 258 f = self.factory
259 259 cdict = {'exec_key' : key,
260 260 'ssh' : self.ssh_server,
261 261 'url' : "%s://%s:%s"%(f.client_transport, f.client_ip, f.regport),
262 262 'location' : self.location
263 263 }
264 264 self.save_connection_dict('ipcontroller-client.json', cdict)
265 265 edict = cdict
266 266 edict['url']="%s://%s:%s"%((f.client_transport, f.client_ip, f.regport))
267 267 self.save_connection_dict('ipcontroller-engine.json', edict)
268 268
269 269 #
270 270 def init_schedulers(self):
271 271 children = self.children
272 272 mq = import_item(self.mq_class)
273 273
274 274 hub = self.factory
275 275 # maybe_inproc = 'inproc://monitor' if self.usethreads else self.monitor_url
276 276 # IOPub relay (in a Process)
277 277 q = mq(zmq.PUB, zmq.SUB, zmq.PUB, 'N/A','iopub')
278 278 q.bind_in(hub.client_info['iopub'])
279 279 q.bind_out(hub.engine_info['iopub'])
280 280 q.setsockopt_out(zmq.SUBSCRIBE, '')
281 281 q.connect_mon(hub.monitor_url)
282 282 q.daemon=True
283 283 children.append(q)
284 284
285 285 # Multiplexer Queue (in a Process)
286 286 q = mq(zmq.XREP, zmq.XREP, zmq.PUB, 'in', 'out')
287 287 q.bind_in(hub.client_info['mux'])
288 288 q.setsockopt_in(zmq.IDENTITY, 'mux')
289 289 q.bind_out(hub.engine_info['mux'])
290 290 q.connect_mon(hub.monitor_url)
291 291 q.daemon=True
292 292 children.append(q)
293 293
294 294 # Control Queue (in a Process)
295 295 q = mq(zmq.XREP, zmq.XREP, zmq.PUB, 'incontrol', 'outcontrol')
296 296 q.bind_in(hub.client_info['control'])
297 297 q.setsockopt_in(zmq.IDENTITY, 'control')
298 298 q.bind_out(hub.engine_info['control'])
299 299 q.connect_mon(hub.monitor_url)
300 300 q.daemon=True
301 301 children.append(q)
302 302 try:
303 303 scheme = self.config.TaskScheduler.scheme_name
304 304 except AttributeError:
305 305 scheme = TaskScheduler.scheme_name.get_default_value()
306 306 # Task Queue (in a Process)
307 307 if scheme == 'pure':
308 308 self.log.warn("task::using pure XREQ Task scheduler")
309 309 q = mq(zmq.XREP, zmq.XREQ, zmq.PUB, 'intask', 'outtask')
310 310 # q.setsockopt_out(zmq.HWM, hub.hwm)
311 311 q.bind_in(hub.client_info['task'][1])
312 312 q.setsockopt_in(zmq.IDENTITY, 'task')
313 313 q.bind_out(hub.engine_info['task'])
314 314 q.connect_mon(hub.monitor_url)
315 315 q.daemon=True
316 316 children.append(q)
317 317 elif scheme == 'none':
318 318 self.log.warn("task::using no Task scheduler")
319 319
320 320 else:
321 321 self.log.info("task::using Python %s Task scheduler"%scheme)
322 322 sargs = (hub.client_info['task'][1], hub.engine_info['task'],
323 323 hub.monitor_url, hub.client_info['notification'])
324 324 kwargs = dict(logname=self.log.name, loglevel=self.log_level,
325 325 config=dict(self.config))
326 326 q = Process(target=launch_scheduler, args=sargs, kwargs=kwargs)
327 327 q.daemon=True
328 328 children.append(q)
329 329
330 330
331 331 def save_urls(self):
332 332 """save the registration urls to files."""
333 333 c = self.config
334 334
335 335 sec_dir = self.cluster_dir.security_dir
336 336 cf = self.factory
337 337
338 338 with open(os.path.join(sec_dir, 'ipcontroller-engine.url'), 'w') as f:
339 339 f.write("%s://%s:%s"%(cf.engine_transport, cf.engine_ip, cf.regport))
340 340
341 341 with open(os.path.join(sec_dir, 'ipcontroller-client.url'), 'w') as f:
342 342 f.write("%s://%s:%s"%(cf.client_transport, cf.client_ip, cf.regport))
343 343
344 344
345 345 def do_import_statements(self):
346 346 statements = self.import_statements
347 347 for s in statements:
348 348 try:
349 349 self.log.msg("Executing statement: '%s'" % s)
350 350 exec s in globals(), locals()
351 351 except:
352 352 self.log.msg("Error running statement: %s" % s)
353 353
354 354 # def start_logging(self):
355 355 # super(IPControllerApp, self).start_logging()
356 356 # if self.config.Global.log_url:
357 357 # context = self.factory.context
358 358 # lsock = context.socket(zmq.PUB)
359 359 # lsock.connect(self.config.Global.log_url)
360 360 # handler = PUBHandler(lsock)
361 361 # handler.root_topic = 'controller'
362 362 # handler.setLevel(self.log_level)
363 363 # self.log.addHandler(handler)
364 364 # #
365 365
366 366 def initialize(self, argv=None):
367 367 super(IPControllerApp, self).initialize(argv)
368 368 self.init_hub()
369 369 self.init_schedulers()
370 370
371 371 def start(self):
372 372 # Start the subprocesses:
373 373 self.factory.start()
374 374 child_procs = []
375 375 for child in self.children:
376 376 child.start()
377 377 if isinstance(child, ProcessMonitoredQueue):
378 378 child_procs.append(child.launcher)
379 379 elif isinstance(child, Process):
380 380 child_procs.append(child)
381 381 if child_procs:
382 382 signal_children(child_procs)
383 383
384 384 self.write_pid_file(overwrite=True)
385 385
386 386 try:
387 387 self.factory.loop.start()
388 388 except KeyboardInterrupt:
389 389 self.log.critical("Interrupted, Exiting...\n")
390 390
391 391
392 392
393 393 def launch_new_instance():
394 394 """Create and run the IPython controller"""
395 395 app = IPControllerApp()
396 396 app.initialize()
397 397 app.start()
398 398
399 399
400 400 if __name__ == '__main__':
401 401 launch_new_instance()
@@ -1,289 +1,289 b''
1 1 #!/usr/bin/env python
2 2 # encoding: utf-8
3 3 """
4 4 The IPython engine application
5 5 """
6 6
7 7 #-----------------------------------------------------------------------------
8 8 # Copyright (C) 2008-2009 The IPython Development Team
9 9 #
10 10 # Distributed under the terms of the BSD License. The full license is in
11 11 # the file COPYING, distributed as part of this software.
12 12 #-----------------------------------------------------------------------------
13 13
14 14 #-----------------------------------------------------------------------------
15 15 # Imports
16 16 #-----------------------------------------------------------------------------
17 17
18 18 import json
19 19 import os
20 20 import sys
21 21
22 22 import zmq
23 23 from zmq.eventloop import ioloop
24 24
25 25 from IPython.parallel.apps.clusterdir import (
26 26 ClusterApplication,
27 27 ClusterDir,
28 28 base_aliases,
29 29 # ClusterDirConfigLoader
30 30 )
31 31 from IPython.zmq.log import EnginePUBHandler
32 32
33 33 from IPython.config.configurable import Configurable
34 34 from IPython.parallel.streamsession import StreamSession
35 35 from IPython.parallel.engine.engine import EngineFactory
36 36 from IPython.parallel.engine.streamkernel import Kernel
37 37 from IPython.parallel.util import disambiguate_url
38 38
39 39 from IPython.utils.importstring import import_item
40 from IPython.utils.traitlets import Str, Bool, Unicode, Dict, List, CStr
40 from IPython.utils.traitlets import Bool, Unicode, Dict, List, CStr
41 41
42 42
43 43 #-----------------------------------------------------------------------------
44 44 # Module level variables
45 45 #-----------------------------------------------------------------------------
46 46
47 47 #: The default config file name for this application
48 48 default_config_file_name = u'ipengine_config.py'
49 49
50 50 _description = """Start an IPython engine for parallel computing.\n\n
51 51
52 52 IPython engines run in parallel and perform computations on behalf of a client
53 53 and controller. A controller needs to be started before the engines. The
54 54 engine can be configured using command line options or using a cluster
55 55 directory. Cluster directories contain config, log and security files and are
56 56 usually located in your ipython directory and named as "cluster_<profile>".
57 57 See the `profile` and `cluster_dir` options for details.
58 58 """
59 59
60 60
61 61 #-----------------------------------------------------------------------------
62 62 # MPI configuration
63 63 #-----------------------------------------------------------------------------
64 64
65 65 mpi4py_init = """from mpi4py import MPI as mpi
66 66 mpi.size = mpi.COMM_WORLD.Get_size()
67 67 mpi.rank = mpi.COMM_WORLD.Get_rank()
68 68 """
69 69
70 70
71 71 pytrilinos_init = """from PyTrilinos import Epetra
72 72 class SimpleStruct:
73 73 pass
74 74 mpi = SimpleStruct()
75 75 mpi.rank = 0
76 76 mpi.size = 0
77 77 """
78 78
79 79 class MPI(Configurable):
80 80 """Configurable for MPI initialization"""
81 use = Str('', config=True,
81 use = Unicode('', config=True,
82 82 help='How to enable MPI (mpi4py, pytrilinos, or empty string to disable).'
83 83 )
84 84
85 85 def _on_use_changed(self, old, new):
86 86 # load default init script if it's not set
87 87 if not self.init_script:
88 88 self.init_script = self.default_inits.get(new, '')
89 89
90 init_script = Str('', config=True,
90 init_script = Unicode('', config=True,
91 91 help="Initialization code for MPI")
92 92
93 93 default_inits = Dict({'mpi4py' : mpi4py_init, 'pytrilinos':pytrilinos_init},
94 94 config=True)
95 95
96 96
97 97 #-----------------------------------------------------------------------------
98 98 # Main application
99 99 #-----------------------------------------------------------------------------
100 100
101 101
102 102 class IPEngineApp(ClusterApplication):
103 103
104 104 app_name = Unicode(u'ipengine')
105 105 description = Unicode(_description)
106 106 default_config_file_name = default_config_file_name
107 107 classes = List([ClusterDir, StreamSession, EngineFactory, Kernel, MPI])
108 108
109 109 auto_create_cluster_dir = Bool(False,
110 110 help="whether to create the cluster_dir if it doesn't exist")
111 111
112 112 startup_script = Unicode(u'', config=True,
113 113 help='specify a script to be run at startup')
114 startup_command = Str('', config=True,
114 startup_command = Unicode('', config=True,
115 115 help='specify a command to be run at startup')
116 116
117 117 url_file = Unicode(u'', config=True,
118 118 help="""The full location of the file containing the connection information for
119 119 the controller. If this is not given, the file must be in the
120 120 security directory of the cluster directory. This location is
121 121 resolved using the `profile` or `cluster_dir` options.""",
122 122 )
123 123
124 124 url_file_name = Unicode(u'ipcontroller-engine.json')
125 125
126 126 aliases = Dict(dict(
127 127 config = 'IPEngineApp.config_file',
128 128 file = 'IPEngineApp.url_file',
129 129 c = 'IPEngineApp.startup_command',
130 130 s = 'IPEngineApp.startup_script',
131 131
132 132 ident = 'StreamSession.session',
133 133 user = 'StreamSession.username',
134 134 exec_key = 'StreamSession.keyfile',
135 135
136 136 url = 'EngineFactory.url',
137 137 ip = 'EngineFactory.ip',
138 138 transport = 'EngineFactory.transport',
139 139 port = 'EngineFactory.regport',
140 140 location = 'EngineFactory.location',
141 141
142 142 timeout = 'EngineFactory.timeout',
143 143
144 144 profile = "ClusterDir.profile",
145 145 cluster_dir = 'ClusterDir.location',
146 146
147 147 mpi = 'MPI.use',
148 148
149 149 log_level = 'IPEngineApp.log_level',
150 150 ))
151 151
152 152 # def find_key_file(self):
153 153 # """Set the key file.
154 154 #
155 155 # Here we don't try to actually see if it exists for is valid as that
156 156 # is hadled by the connection logic.
157 157 # """
158 158 # config = self.master_config
159 159 # # Find the actual controller key file
160 160 # if not config.Global.key_file:
161 161 # try_this = os.path.join(
162 162 # config.Global.cluster_dir,
163 163 # config.Global.security_dir,
164 164 # config.Global.key_file_name
165 165 # )
166 166 # config.Global.key_file = try_this
167 167
168 168 def find_url_file(self):
169 169 """Set the key file.
170 170
171 171 Here we don't try to actually see if it exists for is valid as that
172 172 is hadled by the connection logic.
173 173 """
174 174 config = self.config
175 175 # Find the actual controller key file
176 176 if not self.url_file:
177 177 self.url_file = os.path.join(
178 178 self.cluster_dir.security_dir,
179 179 self.url_file_name
180 180 )
181 181 def init_engine(self):
182 182 # This is the working dir by now.
183 183 sys.path.insert(0, '')
184 184 config = self.config
185 185 # print config
186 186 self.find_url_file()
187 187
188 188 # if os.path.exists(config.Global.key_file) and config.Global.secure:
189 189 # config.SessionFactory.exec_key = config.Global.key_file
190 190 if os.path.exists(self.url_file):
191 191 with open(self.url_file) as f:
192 192 d = json.loads(f.read())
193 193 for k,v in d.iteritems():
194 194 if isinstance(v, unicode):
195 195 d[k] = v.encode()
196 196 if d['exec_key']:
197 197 config.StreamSession.key = d['exec_key']
198 198 d['url'] = disambiguate_url(d['url'], d['location'])
199 199 config.EngineFactory.url = d['url']
200 200 config.EngineFactory.location = d['location']
201 201
202 202 try:
203 203 exec_lines = config.Kernel.exec_lines
204 204 except AttributeError:
205 205 config.Kernel.exec_lines = []
206 206 exec_lines = config.Kernel.exec_lines
207 207
208 208 if self.startup_script:
209 209 enc = sys.getfilesystemencoding() or 'utf8'
210 210 cmd="execfile(%r)"%self.startup_script.encode(enc)
211 211 exec_lines.append(cmd)
212 212 if self.startup_command:
213 213 exec_lines.append(self.startup_command)
214 214
215 215 # Create the underlying shell class and Engine
216 216 # shell_class = import_item(self.master_config.Global.shell_class)
217 217 # print self.config
218 218 try:
219 219 self.engine = EngineFactory(config=config, log=self.log)
220 220 except:
221 221 self.log.error("Couldn't start the Engine", exc_info=True)
222 222 self.exit(1)
223 223
224 224 # self.start_logging()
225 225
226 226 # Create the service hierarchy
227 227 # self.main_service = service.MultiService()
228 228 # self.engine_service.setServiceParent(self.main_service)
229 229 # self.tub_service = Tub()
230 230 # self.tub_service.setServiceParent(self.main_service)
231 231 # # This needs to be called before the connection is initiated
232 232 # self.main_service.startService()
233 233
234 234 # This initiates the connection to the controller and calls
235 235 # register_engine to tell the controller we are ready to do work
236 236 # self.engine_connector = EngineConnector(self.tub_service)
237 237
238 238 # self.log.info("Using furl file: %s" % self.master_config.Global.furl_file)
239 239
240 240 # reactor.callWhenRunning(self.call_connect)
241 241
242 242 # def start_logging(self):
243 243 # super(IPEngineApp, self).start_logging()
244 244 # if self.master_config.Global.log_url:
245 245 # context = self.engine.context
246 246 # lsock = context.socket(zmq.PUB)
247 247 # lsock.connect(self.master_config.Global.log_url)
248 248 # handler = EnginePUBHandler(self.engine, lsock)
249 249 # handler.setLevel(self.log_level)
250 250 # self.log.addHandler(handler)
251 251 #
252 252 def init_mpi(self):
253 253 global mpi
254 254 self.mpi = MPI(config=self.config)
255 255
256 256 mpi_import_statement = self.mpi.init_script
257 257 if mpi_import_statement:
258 258 try:
259 259 self.log.info("Initializing MPI:")
260 260 self.log.info(mpi_import_statement)
261 261 exec mpi_import_statement in globals()
262 262 except:
263 263 mpi = None
264 264 else:
265 265 mpi = None
266 266
267 267 def initialize(self, argv=None):
268 268 super(IPEngineApp, self).initialize(argv)
269 269 self.init_mpi()
270 270 self.init_engine()
271 271
272 272 def start(self):
273 273 self.engine.start()
274 274 try:
275 275 self.engine.loop.start()
276 276 except KeyboardInterrupt:
277 277 self.log.critical("Engine Interrupted, shutting down...\n")
278 278
279 279
280 280 def launch_new_instance():
281 281 """Create and run the IPython engine"""
282 282 app = IPEngineApp()
283 283 app.initialize()
284 284 app.start()
285 285
286 286
287 287 if __name__ == '__main__':
288 288 launch_new_instance()
289 289
@@ -1,1070 +1,1070 b''
1 1 #!/usr/bin/env python
2 2 # encoding: utf-8
3 3 """
4 4 Facilities for launching IPython processes asynchronously.
5 5 """
6 6
7 7 #-----------------------------------------------------------------------------
8 8 # Copyright (C) 2008-2009 The IPython Development Team
9 9 #
10 10 # Distributed under the terms of the BSD License. The full license is in
11 11 # the file COPYING, distributed as part of this software.
12 12 #-----------------------------------------------------------------------------
13 13
14 14 #-----------------------------------------------------------------------------
15 15 # Imports
16 16 #-----------------------------------------------------------------------------
17 17
18 18 import copy
19 19 import logging
20 20 import os
21 21 import re
22 22 import stat
23 23
24 24 # signal imports, handling various platforms, versions
25 25
26 26 from signal import SIGINT, SIGTERM
27 27 try:
28 28 from signal import SIGKILL
29 29 except ImportError:
30 30 # Windows
31 31 SIGKILL=SIGTERM
32 32
33 33 try:
34 34 # Windows >= 2.7, 3.2
35 35 from signal import CTRL_C_EVENT as SIGINT
36 36 except ImportError:
37 37 pass
38 38
39 39 from subprocess import Popen, PIPE, STDOUT
40 40 try:
41 41 from subprocess import check_output
42 42 except ImportError:
43 43 # pre-2.7, define check_output with Popen
44 44 def check_output(*args, **kwargs):
45 45 kwargs.update(dict(stdout=PIPE))
46 46 p = Popen(*args, **kwargs)
47 47 out,err = p.communicate()
48 48 return out
49 49
50 50 from zmq.eventloop import ioloop
51 51
52 52 from IPython.external import Itpl
53 53 # from IPython.config.configurable import Configurable
54 from IPython.utils.traitlets import Any, Str, Int, List, Unicode, Dict, Instance, CUnicode
54 from IPython.utils.traitlets import Any, Int, List, Unicode, Dict, Instance
55 55 from IPython.utils.path import get_ipython_module_path
56 56 from IPython.utils.process import find_cmd, pycmd2argv, FindCmdError
57 57
58 58 from IPython.parallel.factory import LoggingFactory
59 59
60 60 from .win32support import forward_read_events
61 61
62 62 from .winhpcjob import IPControllerTask, IPEngineTask, IPControllerJob, IPEngineSetJob
63 63
64 64 WINDOWS = os.name == 'nt'
65 65
66 66 #-----------------------------------------------------------------------------
67 67 # Paths to the kernel apps
68 68 #-----------------------------------------------------------------------------
69 69
70 70
71 71 ipcluster_cmd_argv = pycmd2argv(get_ipython_module_path(
72 72 'IPython.parallel.apps.ipclusterapp'
73 73 ))
74 74
75 75 ipengine_cmd_argv = pycmd2argv(get_ipython_module_path(
76 76 'IPython.parallel.apps.ipengineapp'
77 77 ))
78 78
79 79 ipcontroller_cmd_argv = pycmd2argv(get_ipython_module_path(
80 80 'IPython.parallel.apps.ipcontrollerapp'
81 81 ))
82 82
83 83 #-----------------------------------------------------------------------------
84 84 # Base launchers and errors
85 85 #-----------------------------------------------------------------------------
86 86
87 87
88 88 class LauncherError(Exception):
89 89 pass
90 90
91 91
92 92 class ProcessStateError(LauncherError):
93 93 pass
94 94
95 95
96 96 class UnknownStatus(LauncherError):
97 97 pass
98 98
99 99
100 100 class BaseLauncher(LoggingFactory):
101 101 """An asbtraction for starting, stopping and signaling a process."""
102 102
103 103 # In all of the launchers, the work_dir is where child processes will be
104 104 # run. This will usually be the cluster_dir, but may not be. any work_dir
105 105 # passed into the __init__ method will override the config value.
106 106 # This should not be used to set the work_dir for the actual engine
107 107 # and controller. Instead, use their own config files or the
108 108 # controller_args, engine_args attributes of the launchers to add
109 109 # the work_dir option.
110 110 work_dir = Unicode(u'.')
111 111 loop = Instance('zmq.eventloop.ioloop.IOLoop')
112 112
113 113 start_data = Any()
114 114 stop_data = Any()
115 115
116 116 def _loop_default(self):
117 117 return ioloop.IOLoop.instance()
118 118
119 119 def __init__(self, work_dir=u'.', config=None, **kwargs):
120 120 super(BaseLauncher, self).__init__(work_dir=work_dir, config=config, **kwargs)
121 121 self.state = 'before' # can be before, running, after
122 122 self.stop_callbacks = []
123 123 self.start_data = None
124 124 self.stop_data = None
125 125
126 126 @property
127 127 def args(self):
128 128 """A list of cmd and args that will be used to start the process.
129 129
130 130 This is what is passed to :func:`spawnProcess` and the first element
131 131 will be the process name.
132 132 """
133 133 return self.find_args()
134 134
135 135 def find_args(self):
136 136 """The ``.args`` property calls this to find the args list.
137 137
138 138 Subcommand should implement this to construct the cmd and args.
139 139 """
140 140 raise NotImplementedError('find_args must be implemented in a subclass')
141 141
142 142 @property
143 143 def arg_str(self):
144 144 """The string form of the program arguments."""
145 145 return ' '.join(self.args)
146 146
147 147 @property
148 148 def running(self):
149 149 """Am I running."""
150 150 if self.state == 'running':
151 151 return True
152 152 else:
153 153 return False
154 154
155 155 def start(self):
156 156 """Start the process.
157 157
158 158 This must return a deferred that fires with information about the
159 159 process starting (like a pid, job id, etc.).
160 160 """
161 161 raise NotImplementedError('start must be implemented in a subclass')
162 162
163 163 def stop(self):
164 164 """Stop the process and notify observers of stopping.
165 165
166 166 This must return a deferred that fires with information about the
167 167 processing stopping, like errors that occur while the process is
168 168 attempting to be shut down. This deferred won't fire when the process
169 169 actually stops. To observe the actual process stopping, see
170 170 :func:`observe_stop`.
171 171 """
172 172 raise NotImplementedError('stop must be implemented in a subclass')
173 173
174 174 def on_stop(self, f):
175 175 """Get a deferred that will fire when the process stops.
176 176
177 177 The deferred will fire with data that contains information about
178 178 the exit status of the process.
179 179 """
180 180 if self.state=='after':
181 181 return f(self.stop_data)
182 182 else:
183 183 self.stop_callbacks.append(f)
184 184
185 185 def notify_start(self, data):
186 186 """Call this to trigger startup actions.
187 187
188 188 This logs the process startup and sets the state to 'running'. It is
189 189 a pass-through so it can be used as a callback.
190 190 """
191 191
192 192 self.log.info('Process %r started: %r' % (self.args[0], data))
193 193 self.start_data = data
194 194 self.state = 'running'
195 195 return data
196 196
197 197 def notify_stop(self, data):
198 198 """Call this to trigger process stop actions.
199 199
200 200 This logs the process stopping and sets the state to 'after'. Call
201 201 this to trigger all the deferreds from :func:`observe_stop`."""
202 202
203 203 self.log.info('Process %r stopped: %r' % (self.args[0], data))
204 204 self.stop_data = data
205 205 self.state = 'after'
206 206 for i in range(len(self.stop_callbacks)):
207 207 d = self.stop_callbacks.pop()
208 208 d(data)
209 209 return data
210 210
211 211 def signal(self, sig):
212 212 """Signal the process.
213 213
214 214 Return a semi-meaningless deferred after signaling the process.
215 215
216 216 Parameters
217 217 ----------
218 218 sig : str or int
219 219 'KILL', 'INT', etc., or any signal number
220 220 """
221 221 raise NotImplementedError('signal must be implemented in a subclass')
222 222
223 223
224 224 #-----------------------------------------------------------------------------
225 225 # Local process launchers
226 226 #-----------------------------------------------------------------------------
227 227
228 228
229 229 class LocalProcessLauncher(BaseLauncher):
230 230 """Start and stop an external process in an asynchronous manner.
231 231
232 232 This will launch the external process with a working directory of
233 233 ``self.work_dir``.
234 234 """
235 235
236 236 # This is used to to construct self.args, which is passed to
237 237 # spawnProcess.
238 238 cmd_and_args = List([])
239 239 poll_frequency = Int(100) # in ms
240 240
241 241 def __init__(self, work_dir=u'.', config=None, **kwargs):
242 242 super(LocalProcessLauncher, self).__init__(
243 243 work_dir=work_dir, config=config, **kwargs
244 244 )
245 245 self.process = None
246 246 self.start_deferred = None
247 247 self.poller = None
248 248
249 249 def find_args(self):
250 250 return self.cmd_and_args
251 251
252 252 def start(self):
253 253 if self.state == 'before':
254 254 self.process = Popen(self.args,
255 255 stdout=PIPE,stderr=PIPE,stdin=PIPE,
256 256 env=os.environ,
257 257 cwd=self.work_dir
258 258 )
259 259 if WINDOWS:
260 260 self.stdout = forward_read_events(self.process.stdout)
261 261 self.stderr = forward_read_events(self.process.stderr)
262 262 else:
263 263 self.stdout = self.process.stdout.fileno()
264 264 self.stderr = self.process.stderr.fileno()
265 265 self.loop.add_handler(self.stdout, self.handle_stdout, self.loop.READ)
266 266 self.loop.add_handler(self.stderr, self.handle_stderr, self.loop.READ)
267 267 self.poller = ioloop.PeriodicCallback(self.poll, self.poll_frequency, self.loop)
268 268 self.poller.start()
269 269 self.notify_start(self.process.pid)
270 270 else:
271 271 s = 'The process was already started and has state: %r' % self.state
272 272 raise ProcessStateError(s)
273 273
274 274 def stop(self):
275 275 return self.interrupt_then_kill()
276 276
277 277 def signal(self, sig):
278 278 if self.state == 'running':
279 279 if WINDOWS and sig != SIGINT:
280 280 # use Windows tree-kill for better child cleanup
281 281 check_output(['taskkill', '-pid', str(self.process.pid), '-t', '-f'])
282 282 else:
283 283 self.process.send_signal(sig)
284 284
285 285 def interrupt_then_kill(self, delay=2.0):
286 286 """Send INT, wait a delay and then send KILL."""
287 287 try:
288 288 self.signal(SIGINT)
289 289 except Exception:
290 290 self.log.debug("interrupt failed")
291 291 pass
292 292 self.killer = ioloop.DelayedCallback(lambda : self.signal(SIGKILL), delay*1000, self.loop)
293 293 self.killer.start()
294 294
295 295 # callbacks, etc:
296 296
297 297 def handle_stdout(self, fd, events):
298 298 if WINDOWS:
299 299 line = self.stdout.recv()
300 300 else:
301 301 line = self.process.stdout.readline()
302 302 # a stopped process will be readable but return empty strings
303 303 if line:
304 304 self.log.info(line[:-1])
305 305 else:
306 306 self.poll()
307 307
308 308 def handle_stderr(self, fd, events):
309 309 if WINDOWS:
310 310 line = self.stderr.recv()
311 311 else:
312 312 line = self.process.stderr.readline()
313 313 # a stopped process will be readable but return empty strings
314 314 if line:
315 315 self.log.error(line[:-1])
316 316 else:
317 317 self.poll()
318 318
319 319 def poll(self):
320 320 status = self.process.poll()
321 321 if status is not None:
322 322 self.poller.stop()
323 323 self.loop.remove_handler(self.stdout)
324 324 self.loop.remove_handler(self.stderr)
325 325 self.notify_stop(dict(exit_code=status, pid=self.process.pid))
326 326 return status
327 327
328 328 class LocalControllerLauncher(LocalProcessLauncher):
329 329 """Launch a controller as a regular external process."""
330 330
331 331 controller_cmd = List(ipcontroller_cmd_argv, config=True,
332 332 help="""Popen command to launch ipcontroller.""")
333 333 # Command line arguments to ipcontroller.
334 334 controller_args = List(['--log-to-file','log_level=%i'%logging.INFO], config=True,
335 335 help="""command-line args to pass to ipcontroller""")
336 336
337 337 def find_args(self):
338 338 return self.controller_cmd + self.controller_args
339 339
340 340 def start(self, cluster_dir):
341 341 """Start the controller by cluster_dir."""
342 342 self.controller_args.extend(['cluster_dir=%s'%cluster_dir])
343 343 self.cluster_dir = unicode(cluster_dir)
344 344 self.log.info("Starting LocalControllerLauncher: %r" % self.args)
345 345 return super(LocalControllerLauncher, self).start()
346 346
347 347
348 348 class LocalEngineLauncher(LocalProcessLauncher):
349 349 """Launch a single engine as a regular externall process."""
350 350
351 351 engine_cmd = List(ipengine_cmd_argv, config=True,
352 352 help="""command to launch the Engine.""")
353 353 # Command line arguments for ipengine.
354 354 engine_args = List(['--log-to-file','log_level=%i'%logging.INFO], config=True,
355 355 help="command-line arguments to pass to ipengine"
356 356 )
357 357
358 358 def find_args(self):
359 359 return self.engine_cmd + self.engine_args
360 360
361 361 def start(self, cluster_dir):
362 362 """Start the engine by cluster_dir."""
363 363 self.engine_args.extend(['cluster_dir=%s'%cluster_dir])
364 364 self.cluster_dir = unicode(cluster_dir)
365 365 return super(LocalEngineLauncher, self).start()
366 366
367 367
368 368 class LocalEngineSetLauncher(BaseLauncher):
369 369 """Launch a set of engines as regular external processes."""
370 370
371 371 # Command line arguments for ipengine.
372 372 engine_args = List(
373 373 ['--log-to-file','log_level=%i'%logging.INFO], config=True,
374 374 help="command-line arguments to pass to ipengine"
375 375 )
376 376 # launcher class
377 377 launcher_class = LocalEngineLauncher
378 378
379 379 launchers = Dict()
380 380 stop_data = Dict()
381 381
382 382 def __init__(self, work_dir=u'.', config=None, **kwargs):
383 383 super(LocalEngineSetLauncher, self).__init__(
384 384 work_dir=work_dir, config=config, **kwargs
385 385 )
386 386 self.stop_data = {}
387 387
388 388 def start(self, n, cluster_dir):
389 389 """Start n engines by profile or cluster_dir."""
390 390 self.cluster_dir = unicode(cluster_dir)
391 391 dlist = []
392 392 for i in range(n):
393 393 el = self.launcher_class(work_dir=self.work_dir, config=self.config, logname=self.log.name)
394 394 # Copy the engine args over to each engine launcher.
395 395 el.engine_args = copy.deepcopy(self.engine_args)
396 396 el.on_stop(self._notice_engine_stopped)
397 397 d = el.start(cluster_dir)
398 398 if i==0:
399 399 self.log.info("Starting LocalEngineSetLauncher: %r" % el.args)
400 400 self.launchers[i] = el
401 401 dlist.append(d)
402 402 self.notify_start(dlist)
403 403 # The consumeErrors here could be dangerous
404 404 # dfinal = gatherBoth(dlist, consumeErrors=True)
405 405 # dfinal.addCallback(self.notify_start)
406 406 return dlist
407 407
408 408 def find_args(self):
409 409 return ['engine set']
410 410
411 411 def signal(self, sig):
412 412 dlist = []
413 413 for el in self.launchers.itervalues():
414 414 d = el.signal(sig)
415 415 dlist.append(d)
416 416 # dfinal = gatherBoth(dlist, consumeErrors=True)
417 417 return dlist
418 418
419 419 def interrupt_then_kill(self, delay=1.0):
420 420 dlist = []
421 421 for el in self.launchers.itervalues():
422 422 d = el.interrupt_then_kill(delay)
423 423 dlist.append(d)
424 424 # dfinal = gatherBoth(dlist, consumeErrors=True)
425 425 return dlist
426 426
427 427 def stop(self):
428 428 return self.interrupt_then_kill()
429 429
430 430 def _notice_engine_stopped(self, data):
431 431 pid = data['pid']
432 432 for idx,el in self.launchers.iteritems():
433 433 if el.process.pid == pid:
434 434 break
435 435 self.launchers.pop(idx)
436 436 self.stop_data[idx] = data
437 437 if not self.launchers:
438 438 self.notify_stop(self.stop_data)
439 439
440 440
441 441 #-----------------------------------------------------------------------------
442 442 # MPIExec launchers
443 443 #-----------------------------------------------------------------------------
444 444
445 445
446 446 class MPIExecLauncher(LocalProcessLauncher):
447 447 """Launch an external process using mpiexec."""
448 448
449 449 mpi_cmd = List(['mpiexec'], config=True,
450 450 help="The mpiexec command to use in starting the process."
451 451 )
452 452 mpi_args = List([], config=True,
453 453 help="The command line arguments to pass to mpiexec."
454 454 )
455 455 program = List(['date'], config=True,
456 456 help="The program to start via mpiexec.")
457 457 program_args = List([], config=True,
458 458 help="The command line argument to the program."
459 459 )
460 460 n = Int(1)
461 461
462 462 def find_args(self):
463 463 """Build self.args using all the fields."""
464 464 return self.mpi_cmd + ['-n', str(self.n)] + self.mpi_args + \
465 465 self.program + self.program_args
466 466
467 467 def start(self, n):
468 468 """Start n instances of the program using mpiexec."""
469 469 self.n = n
470 470 return super(MPIExecLauncher, self).start()
471 471
472 472
473 473 class MPIExecControllerLauncher(MPIExecLauncher):
474 474 """Launch a controller using mpiexec."""
475 475
476 476 controller_cmd = List(ipcontroller_cmd_argv, config=True,
477 477 help="Popen command to launch the Contropper"
478 478 )
479 479 controller_args = List(['--log-to-file','log_level=%i'%logging.INFO], config=True,
480 480 help="Command line arguments to pass to ipcontroller."
481 481 )
482 482 n = Int(1)
483 483
484 484 def start(self, cluster_dir):
485 485 """Start the controller by cluster_dir."""
486 486 self.controller_args.extend(['cluster_dir=%s'%cluster_dir])
487 487 self.cluster_dir = unicode(cluster_dir)
488 488 self.log.info("Starting MPIExecControllerLauncher: %r" % self.args)
489 489 return super(MPIExecControllerLauncher, self).start(1)
490 490
491 491 def find_args(self):
492 492 return self.mpi_cmd + ['-n', self.n] + self.mpi_args + \
493 493 self.controller_cmd + self.controller_args
494 494
495 495
496 496 class MPIExecEngineSetLauncher(MPIExecLauncher):
497 497
498 498 program = List(ipengine_cmd_argv, config=True,
499 499 help="Popen command for ipengine"
500 500 )
501 501 program_args = List(
502 502 ['--log-to-file','log_level=%i'%logging.INFO], config=True,
503 503 help="Command line arguments for ipengine."
504 504 )
505 505 n = Int(1)
506 506
507 507 def start(self, n, cluster_dir):
508 508 """Start n engines by profile or cluster_dir."""
509 509 self.program_args.extend(['cluster_dir=%s'%cluster_dir])
510 510 self.cluster_dir = unicode(cluster_dir)
511 511 self.n = n
512 512 self.log.info('Starting MPIExecEngineSetLauncher: %r' % self.args)
513 513 return super(MPIExecEngineSetLauncher, self).start(n)
514 514
515 515 #-----------------------------------------------------------------------------
516 516 # SSH launchers
517 517 #-----------------------------------------------------------------------------
518 518
519 519 # TODO: Get SSH Launcher working again.
520 520
521 521 class SSHLauncher(LocalProcessLauncher):
522 522 """A minimal launcher for ssh.
523 523
524 524 To be useful this will probably have to be extended to use the ``sshx``
525 525 idea for environment variables. There could be other things this needs
526 526 as well.
527 527 """
528 528
529 529 ssh_cmd = List(['ssh'], config=True,
530 530 help="command for starting ssh")
531 531 ssh_args = List(['-tt'], config=True,
532 532 help="args to pass to ssh")
533 533 program = List(['date'], config=True,
534 534 help="Program to launch via ssh")
535 535 program_args = List([], config=True,
536 536 help="args to pass to remote program")
537 hostname = CUnicode('', config=True,
537 hostname = Unicode('', config=True,
538 538 help="hostname on which to launch the program")
539 user = CUnicode('', config=True,
539 user = Unicode('', config=True,
540 540 help="username for ssh")
541 location = CUnicode('', config=True,
541 location = Unicode('', config=True,
542 542 help="user@hostname location for ssh in one setting")
543 543
544 544 def _hostname_changed(self, name, old, new):
545 545 if self.user:
546 546 self.location = u'%s@%s' % (self.user, new)
547 547 else:
548 548 self.location = new
549 549
550 550 def _user_changed(self, name, old, new):
551 551 self.location = u'%s@%s' % (new, self.hostname)
552 552
553 553 def find_args(self):
554 554 return self.ssh_cmd + self.ssh_args + [self.location] + \
555 555 self.program + self.program_args
556 556
557 557 def start(self, cluster_dir, hostname=None, user=None):
558 558 self.cluster_dir = unicode(cluster_dir)
559 559 if hostname is not None:
560 560 self.hostname = hostname
561 561 if user is not None:
562 562 self.user = user
563 563
564 564 return super(SSHLauncher, self).start()
565 565
566 566 def signal(self, sig):
567 567 if self.state == 'running':
568 568 # send escaped ssh connection-closer
569 569 self.process.stdin.write('~.')
570 570 self.process.stdin.flush()
571 571
572 572
573 573
574 574 class SSHControllerLauncher(SSHLauncher):
575 575
576 576 program = List(ipcontroller_cmd_argv, config=True,
577 577 help="remote ipcontroller command.")
578 578 program_args = List(['--reuse-files', '--log-to-file','log_level=%i'%logging.INFO], config=True,
579 579 help="Command line arguments to ipcontroller.")
580 580
581 581
582 582 class SSHEngineLauncher(SSHLauncher):
583 583 program = List(ipengine_cmd_argv, config=True,
584 584 help="remote ipengine command.")
585 585 # Command line arguments for ipengine.
586 586 program_args = List(
587 587 ['--log-to-file','log_level=%i'%logging.INFO], config=True,
588 588 help="Command line arguments to ipengine."
589 589 )
590 590
591 591 class SSHEngineSetLauncher(LocalEngineSetLauncher):
592 592 launcher_class = SSHEngineLauncher
593 593 engines = Dict(config=True,
594 594 help="""dict of engines to launch. This is a dict by hostname of ints,
595 595 corresponding to the number of engines to start on that host.""")
596 596
597 597 def start(self, n, cluster_dir):
598 598 """Start engines by profile or cluster_dir.
599 599 `n` is ignored, and the `engines` config property is used instead.
600 600 """
601 601
602 602 self.cluster_dir = unicode(cluster_dir)
603 603 dlist = []
604 604 for host, n in self.engines.iteritems():
605 605 if isinstance(n, (tuple, list)):
606 606 n, args = n
607 607 else:
608 608 args = copy.deepcopy(self.engine_args)
609 609
610 610 if '@' in host:
611 611 user,host = host.split('@',1)
612 612 else:
613 613 user=None
614 614 for i in range(n):
615 615 el = self.launcher_class(work_dir=self.work_dir, config=self.config, logname=self.log.name)
616 616
617 617 # Copy the engine args over to each engine launcher.
618 618 i
619 619 el.program_args = args
620 620 el.on_stop(self._notice_engine_stopped)
621 621 d = el.start(cluster_dir, user=user, hostname=host)
622 622 if i==0:
623 623 self.log.info("Starting SSHEngineSetLauncher: %r" % el.args)
624 624 self.launchers[host+str(i)] = el
625 625 dlist.append(d)
626 626 self.notify_start(dlist)
627 627 return dlist
628 628
629 629
630 630
631 631 #-----------------------------------------------------------------------------
632 632 # Windows HPC Server 2008 scheduler launchers
633 633 #-----------------------------------------------------------------------------
634 634
635 635
636 636 # This is only used on Windows.
637 637 def find_job_cmd():
638 638 if WINDOWS:
639 639 try:
640 640 return find_cmd('job')
641 641 except (FindCmdError, ImportError):
642 642 # ImportError will be raised if win32api is not installed
643 643 return 'job'
644 644 else:
645 645 return 'job'
646 646
647 647
648 648 class WindowsHPCLauncher(BaseLauncher):
649 649
650 job_id_regexp = Str(r'\d+', config=True,
650 job_id_regexp = Unicode(r'\d+', config=True,
651 651 help="""A regular expression used to get the job id from the output of the
652 652 submit_command. """
653 653 )
654 job_file_name = CUnicode(u'ipython_job.xml', config=True,
654 job_file_name = Unicode(u'ipython_job.xml', config=True,
655 655 help="The filename of the instantiated job script.")
656 656 # The full path to the instantiated job script. This gets made dynamically
657 657 # by combining the work_dir with the job_file_name.
658 job_file = CUnicode(u'')
659 scheduler = CUnicode('', config=True,
658 job_file = Unicode(u'')
659 scheduler = Unicode('', config=True,
660 660 help="The hostname of the scheduler to submit the job to.")
661 job_cmd = CUnicode(find_job_cmd(), config=True,
661 job_cmd = Unicode(find_job_cmd(), config=True,
662 662 help="The command for submitting jobs.")
663 663
664 664 def __init__(self, work_dir=u'.', config=None, **kwargs):
665 665 super(WindowsHPCLauncher, self).__init__(
666 666 work_dir=work_dir, config=config, **kwargs
667 667 )
668 668
669 669 @property
670 670 def job_file(self):
671 671 return os.path.join(self.work_dir, self.job_file_name)
672 672
673 673 def write_job_file(self, n):
674 674 raise NotImplementedError("Implement write_job_file in a subclass.")
675 675
676 676 def find_args(self):
677 677 return [u'job.exe']
678 678
679 679 def parse_job_id(self, output):
680 680 """Take the output of the submit command and return the job id."""
681 681 m = re.search(self.job_id_regexp, output)
682 682 if m is not None:
683 683 job_id = m.group()
684 684 else:
685 685 raise LauncherError("Job id couldn't be determined: %s" % output)
686 686 self.job_id = job_id
687 687 self.log.info('Job started with job id: %r' % job_id)
688 688 return job_id
689 689
690 690 def start(self, n):
691 691 """Start n copies of the process using the Win HPC job scheduler."""
692 692 self.write_job_file(n)
693 693 args = [
694 694 'submit',
695 695 '/jobfile:%s' % self.job_file,
696 696 '/scheduler:%s' % self.scheduler
697 697 ]
698 698 self.log.info("Starting Win HPC Job: %s" % (self.job_cmd + ' ' + ' '.join(args),))
699 699 # Twisted will raise DeprecationWarnings if we try to pass unicode to this
700 700 output = check_output([self.job_cmd]+args,
701 701 env=os.environ,
702 702 cwd=self.work_dir,
703 703 stderr=STDOUT
704 704 )
705 705 job_id = self.parse_job_id(output)
706 706 self.notify_start(job_id)
707 707 return job_id
708 708
709 709 def stop(self):
710 710 args = [
711 711 'cancel',
712 712 self.job_id,
713 713 '/scheduler:%s' % self.scheduler
714 714 ]
715 715 self.log.info("Stopping Win HPC Job: %s" % (self.job_cmd + ' ' + ' '.join(args),))
716 716 try:
717 717 output = check_output([self.job_cmd]+args,
718 718 env=os.environ,
719 719 cwd=self.work_dir,
720 720 stderr=STDOUT
721 721 )
722 722 except:
723 723 output = 'The job already appears to be stoppped: %r' % self.job_id
724 724 self.notify_stop(dict(job_id=self.job_id, output=output)) # Pass the output of the kill cmd
725 725 return output
726 726
727 727
728 728 class WindowsHPCControllerLauncher(WindowsHPCLauncher):
729 729
730 job_file_name = CUnicode(u'ipcontroller_job.xml', config=True,
730 job_file_name = Unicode(u'ipcontroller_job.xml', config=True,
731 731 help="WinHPC xml job file.")
732 732 extra_args = List([], config=False,
733 733 help="extra args to pass to ipcontroller")
734 734
735 735 def write_job_file(self, n):
736 736 job = IPControllerJob(config=self.config)
737 737
738 738 t = IPControllerTask(config=self.config)
739 739 # The tasks work directory is *not* the actual work directory of
740 740 # the controller. It is used as the base path for the stdout/stderr
741 741 # files that the scheduler redirects to.
742 742 t.work_directory = self.cluster_dir
743 743 # Add the cluster_dir and from self.start().
744 744 t.controller_args.extend(self.extra_args)
745 745 job.add_task(t)
746 746
747 747 self.log.info("Writing job description file: %s" % self.job_file)
748 748 job.write(self.job_file)
749 749
750 750 @property
751 751 def job_file(self):
752 752 return os.path.join(self.cluster_dir, self.job_file_name)
753 753
754 754 def start(self, cluster_dir):
755 755 """Start the controller by cluster_dir."""
756 756 self.extra_args = ['cluster_dir=%s'%cluster_dir]
757 757 self.cluster_dir = unicode(cluster_dir)
758 758 return super(WindowsHPCControllerLauncher, self).start(1)
759 759
760 760
761 761 class WindowsHPCEngineSetLauncher(WindowsHPCLauncher):
762 762
763 job_file_name = CUnicode(u'ipengineset_job.xml', config=True,
763 job_file_name = Unicode(u'ipengineset_job.xml', config=True,
764 764 help="jobfile for ipengines job")
765 765 extra_args = List([], config=False,
766 766 help="extra args to pas to ipengine")
767 767
768 768 def write_job_file(self, n):
769 769 job = IPEngineSetJob(config=self.config)
770 770
771 771 for i in range(n):
772 772 t = IPEngineTask(config=self.config)
773 773 # The tasks work directory is *not* the actual work directory of
774 774 # the engine. It is used as the base path for the stdout/stderr
775 775 # files that the scheduler redirects to.
776 776 t.work_directory = self.cluster_dir
777 777 # Add the cluster_dir and from self.start().
778 778 t.engine_args.extend(self.extra_args)
779 779 job.add_task(t)
780 780
781 781 self.log.info("Writing job description file: %s" % self.job_file)
782 782 job.write(self.job_file)
783 783
784 784 @property
785 785 def job_file(self):
786 786 return os.path.join(self.cluster_dir, self.job_file_name)
787 787
788 788 def start(self, n, cluster_dir):
789 789 """Start the controller by cluster_dir."""
790 790 self.extra_args = ['cluster_dir=%s'%cluster_dir]
791 791 self.cluster_dir = unicode(cluster_dir)
792 792 return super(WindowsHPCEngineSetLauncher, self).start(n)
793 793
794 794
795 795 #-----------------------------------------------------------------------------
796 796 # Batch (PBS) system launchers
797 797 #-----------------------------------------------------------------------------
798 798
799 799 class BatchSystemLauncher(BaseLauncher):
800 800 """Launch an external process using a batch system.
801 801
802 802 This class is designed to work with UNIX batch systems like PBS, LSF,
803 803 GridEngine, etc. The overall model is that there are different commands
804 804 like qsub, qdel, etc. that handle the starting and stopping of the process.
805 805
806 806 This class also has the notion of a batch script. The ``batch_template``
807 807 attribute can be set to a string that is a template for the batch script.
808 808 This template is instantiated using Itpl. Thus the template can use
809 809 ${n} fot the number of instances. Subclasses can add additional variables
810 810 to the template dict.
811 811 """
812 812
813 813 # Subclasses must fill these in. See PBSEngineSet
814 814 submit_command = List([''], config=True,
815 815 help="The name of the command line program used to submit jobs.")
816 816 delete_command = List([''], config=True,
817 817 help="The name of the command line program used to delete jobs.")
818 job_id_regexp = CUnicode('', config=True,
818 job_id_regexp = Unicode('', config=True,
819 819 help="""A regular expression used to get the job id from the output of the
820 820 submit_command.""")
821 batch_template = CUnicode('', config=True,
821 batch_template = Unicode('', config=True,
822 822 help="The string that is the batch script template itself.")
823 batch_template_file = CUnicode(u'', config=True,
823 batch_template_file = Unicode(u'', config=True,
824 824 help="The file that contains the batch template.")
825 batch_file_name = CUnicode(u'batch_script', config=True,
825 batch_file_name = Unicode(u'batch_script', config=True,
826 826 help="The filename of the instantiated batch script.")
827 queue = CUnicode(u'', config=True,
827 queue = Unicode(u'', config=True,
828 828 help="The PBS Queue.")
829 829
830 830 # not configurable, override in subclasses
831 831 # PBS Job Array regex
832 job_array_regexp = CUnicode('')
833 job_array_template = CUnicode('')
832 job_array_regexp = Unicode('')
833 job_array_template = Unicode('')
834 834 # PBS Queue regex
835 queue_regexp = CUnicode('')
836 queue_template = CUnicode('')
835 queue_regexp = Unicode('')
836 queue_template = Unicode('')
837 837 # The default batch template, override in subclasses
838 default_template = CUnicode('')
838 default_template = Unicode('')
839 839 # The full path to the instantiated batch script.
840 batch_file = CUnicode(u'')
840 batch_file = Unicode(u'')
841 841 # the format dict used with batch_template:
842 842 context = Dict()
843 843
844 844
845 845 def find_args(self):
846 846 return self.submit_command + [self.batch_file]
847 847
848 848 def __init__(self, work_dir=u'.', config=None, **kwargs):
849 849 super(BatchSystemLauncher, self).__init__(
850 850 work_dir=work_dir, config=config, **kwargs
851 851 )
852 852 self.batch_file = os.path.join(self.work_dir, self.batch_file_name)
853 853
854 854 def parse_job_id(self, output):
855 855 """Take the output of the submit command and return the job id."""
856 856 m = re.search(self.job_id_regexp, output)
857 857 if m is not None:
858 858 job_id = m.group()
859 859 else:
860 860 raise LauncherError("Job id couldn't be determined: %s" % output)
861 861 self.job_id = job_id
862 862 self.log.info('Job submitted with job id: %r' % job_id)
863 863 return job_id
864 864
865 865 def write_batch_script(self, n):
866 866 """Instantiate and write the batch script to the work_dir."""
867 867 self.context['n'] = n
868 868 self.context['queue'] = self.queue
869 869 print self.context
870 870 # first priority is batch_template if set
871 871 if self.batch_template_file and not self.batch_template:
872 872 # second priority is batch_template_file
873 873 with open(self.batch_template_file) as f:
874 874 self.batch_template = f.read()
875 875 if not self.batch_template:
876 876 # third (last) priority is default_template
877 877 self.batch_template = self.default_template
878 878
879 879 regex = re.compile(self.job_array_regexp)
880 880 # print regex.search(self.batch_template)
881 881 if not regex.search(self.batch_template):
882 882 self.log.info("adding job array settings to batch script")
883 883 firstline, rest = self.batch_template.split('\n',1)
884 884 self.batch_template = u'\n'.join([firstline, self.job_array_template, rest])
885 885
886 886 regex = re.compile(self.queue_regexp)
887 887 # print regex.search(self.batch_template)
888 888 if self.queue and not regex.search(self.batch_template):
889 889 self.log.info("adding PBS queue settings to batch script")
890 890 firstline, rest = self.batch_template.split('\n',1)
891 891 self.batch_template = u'\n'.join([firstline, self.queue_template, rest])
892 892
893 893 script_as_string = Itpl.itplns(self.batch_template, self.context)
894 894 self.log.info('Writing instantiated batch script: %s' % self.batch_file)
895 895
896 896 with open(self.batch_file, 'w') as f:
897 897 f.write(script_as_string)
898 898 os.chmod(self.batch_file, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
899 899
900 900 def start(self, n, cluster_dir):
901 901 """Start n copies of the process using a batch system."""
902 902 # Here we save profile and cluster_dir in the context so they
903 903 # can be used in the batch script template as ${profile} and
904 904 # ${cluster_dir}
905 905 self.context['cluster_dir'] = cluster_dir
906 906 self.cluster_dir = unicode(cluster_dir)
907 907 self.write_batch_script(n)
908 908 output = check_output(self.args, env=os.environ)
909 909
910 910 job_id = self.parse_job_id(output)
911 911 self.notify_start(job_id)
912 912 return job_id
913 913
914 914 def stop(self):
915 915 output = check_output(self.delete_command+[self.job_id], env=os.environ)
916 916 self.notify_stop(dict(job_id=self.job_id, output=output)) # Pass the output of the kill cmd
917 917 return output
918 918
919 919
920 920 class PBSLauncher(BatchSystemLauncher):
921 921 """A BatchSystemLauncher subclass for PBS."""
922 922
923 923 submit_command = List(['qsub'], config=True,
924 924 help="The PBS submit command ['qsub']")
925 925 delete_command = List(['qdel'], config=True,
926 926 help="The PBS delete command ['qsub']")
927 job_id_regexp = CUnicode(r'\d+', config=True,
927 job_id_regexp = Unicode(r'\d+', config=True,
928 928 help="Regular expresion for identifying the job ID [r'\d+']")
929 929
930 batch_file = CUnicode(u'')
931 job_array_regexp = CUnicode('#PBS\W+-t\W+[\w\d\-\$]+')
932 job_array_template = CUnicode('#PBS -t 1-$n')
933 queue_regexp = CUnicode('#PBS\W+-q\W+\$?\w+')
934 queue_template = CUnicode('#PBS -q $queue')
930 batch_file = Unicode(u'')
931 job_array_regexp = Unicode('#PBS\W+-t\W+[\w\d\-\$]+')
932 job_array_template = Unicode('#PBS -t 1-$n')
933 queue_regexp = Unicode('#PBS\W+-q\W+\$?\w+')
934 queue_template = Unicode('#PBS -q $queue')
935 935
936 936
937 937 class PBSControllerLauncher(PBSLauncher):
938 938 """Launch a controller using PBS."""
939 939
940 batch_file_name = CUnicode(u'pbs_controller', config=True,
940 batch_file_name = Unicode(u'pbs_controller', config=True,
941 941 help="batch file name for the controller job.")
942 default_template= CUnicode("""#!/bin/sh
942 default_template= Unicode("""#!/bin/sh
943 943 #PBS -V
944 944 #PBS -N ipcontroller
945 945 %s --log-to-file cluster_dir $cluster_dir
946 946 """%(' '.join(ipcontroller_cmd_argv)))
947 947
948 948 def start(self, cluster_dir):
949 949 """Start the controller by profile or cluster_dir."""
950 950 self.log.info("Starting PBSControllerLauncher: %r" % self.args)
951 951 return super(PBSControllerLauncher, self).start(1, cluster_dir)
952 952
953 953
954 954 class PBSEngineSetLauncher(PBSLauncher):
955 955 """Launch Engines using PBS"""
956 batch_file_name = CUnicode(u'pbs_engines', config=True,
956 batch_file_name = Unicode(u'pbs_engines', config=True,
957 957 help="batch file name for the engine(s) job.")
958 default_template= CUnicode(u"""#!/bin/sh
958 default_template= Unicode(u"""#!/bin/sh
959 959 #PBS -V
960 960 #PBS -N ipengine
961 961 %s cluster_dir $cluster_dir
962 962 """%(' '.join(ipengine_cmd_argv)))
963 963
964 964 def start(self, n, cluster_dir):
965 965 """Start n engines by profile or cluster_dir."""
966 966 self.log.info('Starting %i engines with PBSEngineSetLauncher: %r' % (n, self.args))
967 967 return super(PBSEngineSetLauncher, self).start(n, cluster_dir)
968 968
969 969 #SGE is very similar to PBS
970 970
971 971 class SGELauncher(PBSLauncher):
972 972 """Sun GridEngine is a PBS clone with slightly different syntax"""
973 job_array_regexp = CUnicode('#$$\W+-t\W+[\w\d\-\$]+')
974 job_array_template = CUnicode('#$$ -t 1-$n')
975 queue_regexp = CUnicode('#$$\W+-q\W+\$?\w+')
976 queue_template = CUnicode('#$$ -q $queue')
973 job_array_regexp = Unicode('#$$\W+-t\W+[\w\d\-\$]+')
974 job_array_template = Unicode('#$$ -t 1-$n')
975 queue_regexp = Unicode('#$$\W+-q\W+\$?\w+')
976 queue_template = Unicode('#$$ -q $queue')
977 977
978 978 class SGEControllerLauncher(SGELauncher):
979 979 """Launch a controller using SGE."""
980 980
981 batch_file_name = CUnicode(u'sge_controller', config=True,
981 batch_file_name = Unicode(u'sge_controller', config=True,
982 982 help="batch file name for the ipontroller job.")
983 default_template= CUnicode(u"""#$$ -V
983 default_template= Unicode(u"""#$$ -V
984 984 #$$ -S /bin/sh
985 985 #$$ -N ipcontroller
986 986 %s --log-to-file cluster_dir=$cluster_dir
987 987 """%(' '.join(ipcontroller_cmd_argv)))
988 988
989 989 def start(self, cluster_dir):
990 990 """Start the controller by profile or cluster_dir."""
991 991 self.log.info("Starting PBSControllerLauncher: %r" % self.args)
992 992 return super(PBSControllerLauncher, self).start(1, cluster_dir)
993 993
994 994 class SGEEngineSetLauncher(SGELauncher):
995 995 """Launch Engines with SGE"""
996 batch_file_name = CUnicode(u'sge_engines', config=True,
996 batch_file_name = Unicode(u'sge_engines', config=True,
997 997 help="batch file name for the engine(s) job.")
998 default_template = CUnicode("""#$$ -V
998 default_template = Unicode("""#$$ -V
999 999 #$$ -S /bin/sh
1000 1000 #$$ -N ipengine
1001 1001 %s cluster_dir=$cluster_dir
1002 1002 """%(' '.join(ipengine_cmd_argv)))
1003 1003
1004 1004 def start(self, n, cluster_dir):
1005 1005 """Start n engines by profile or cluster_dir."""
1006 1006 self.log.info('Starting %i engines with SGEEngineSetLauncher: %r' % (n, self.args))
1007 1007 return super(SGEEngineSetLauncher, self).start(n, cluster_dir)
1008 1008
1009 1009
1010 1010 #-----------------------------------------------------------------------------
1011 1011 # A launcher for ipcluster itself!
1012 1012 #-----------------------------------------------------------------------------
1013 1013
1014 1014
1015 1015 class IPClusterLauncher(LocalProcessLauncher):
1016 1016 """Launch the ipcluster program in an external process."""
1017 1017
1018 1018 ipcluster_cmd = List(ipcluster_cmd_argv, config=True,
1019 1019 help="Popen command for ipcluster")
1020 1020 ipcluster_args = List(
1021 1021 ['--clean-logs', '--log-to-file', 'log_level=%i'%logging.INFO], config=True,
1022 1022 help="Command line arguments to pass to ipcluster.")
1023 ipcluster_subcommand = Str('start')
1023 ipcluster_subcommand = Unicode('start')
1024 1024 ipcluster_n = Int(2)
1025 1025
1026 1026 def find_args(self):
1027 1027 return self.ipcluster_cmd + ['--'+self.ipcluster_subcommand] + \
1028 1028 ['n=%i'%self.ipcluster_n] + self.ipcluster_args
1029 1029
1030 1030 def start(self):
1031 1031 self.log.info("Starting ipcluster: %r" % self.args)
1032 1032 return super(IPClusterLauncher, self).start()
1033 1033
1034 1034 #-----------------------------------------------------------------------------
1035 1035 # Collections of launchers
1036 1036 #-----------------------------------------------------------------------------
1037 1037
1038 1038 local_launchers = [
1039 1039 LocalControllerLauncher,
1040 1040 LocalEngineLauncher,
1041 1041 LocalEngineSetLauncher,
1042 1042 ]
1043 1043 mpi_launchers = [
1044 1044 MPIExecLauncher,
1045 1045 MPIExecControllerLauncher,
1046 1046 MPIExecEngineSetLauncher,
1047 1047 ]
1048 1048 ssh_launchers = [
1049 1049 SSHLauncher,
1050 1050 SSHControllerLauncher,
1051 1051 SSHEngineLauncher,
1052 1052 SSHEngineSetLauncher,
1053 1053 ]
1054 1054 winhpc_launchers = [
1055 1055 WindowsHPCLauncher,
1056 1056 WindowsHPCControllerLauncher,
1057 1057 WindowsHPCEngineSetLauncher,
1058 1058 ]
1059 1059 pbs_launchers = [
1060 1060 PBSLauncher,
1061 1061 PBSControllerLauncher,
1062 1062 PBSEngineSetLauncher,
1063 1063 ]
1064 1064 sge_launchers = [
1065 1065 SGELauncher,
1066 1066 SGEControllerLauncher,
1067 1067 SGEEngineSetLauncher,
1068 1068 ]
1069 1069 all_launchers = local_launchers + mpi_launchers + ssh_launchers + winhpc_launchers\
1070 1070 + pbs_launchers + sge_launchers No newline at end of file
@@ -1,98 +1,98 b''
1 1 #!/usr/bin/env python
2 2 """A simple logger object that consolidates messages incoming from ipcluster processes."""
3 3
4 4 #-----------------------------------------------------------------------------
5 5 # Copyright (C) 2011 The IPython Development Team
6 6 #
7 7 # Distributed under the terms of the BSD License. The full license is in
8 8 # the file COPYING, distributed as part of this software.
9 9 #-----------------------------------------------------------------------------
10 10
11 11 #-----------------------------------------------------------------------------
12 12 # Imports
13 13 #-----------------------------------------------------------------------------
14 14
15 15
16 16 import logging
17 17 import sys
18 18
19 19 import zmq
20 20 from zmq.eventloop import ioloop, zmqstream
21 21
22 from IPython.utils.traitlets import Int, Str, Instance, List
22 from IPython.utils.traitlets import Int, Unicode, Instance, List
23 23
24 24 from IPython.parallel.factory import LoggingFactory
25 25
26 26 #-----------------------------------------------------------------------------
27 27 # Classes
28 28 #-----------------------------------------------------------------------------
29 29
30 30
31 31 class LogWatcher(LoggingFactory):
32 32 """A simple class that receives messages on a SUB socket, as published
33 33 by subclasses of `zmq.log.handlers.PUBHandler`, and logs them itself.
34 34
35 35 This can subscribe to multiple topics, but defaults to all topics.
36 36 """
37 37 # configurables
38 38 topics = List([''], config=True)
39 url = Str('tcp://127.0.0.1:20202', config=True)
39 url = Unicode('tcp://127.0.0.1:20202', config=True)
40 40
41 41 # internals
42 42 context = Instance(zmq.Context, (), {})
43 43 stream = Instance('zmq.eventloop.zmqstream.ZMQStream')
44 44 loop = Instance('zmq.eventloop.ioloop.IOLoop')
45 45 def _loop_default(self):
46 46 return ioloop.IOLoop.instance()
47 47
48 48 def __init__(self, **kwargs):
49 49 super(LogWatcher, self).__init__(**kwargs)
50 50 s = self.context.socket(zmq.SUB)
51 51 s.bind(self.url)
52 52 self.stream = zmqstream.ZMQStream(s, self.loop)
53 53 self.subscribe()
54 54 self.on_trait_change(self.subscribe, 'topics')
55 55
56 56 def start(self):
57 57 self.stream.on_recv(self.log_message)
58 58
59 59 def stop(self):
60 60 self.stream.stop_on_recv()
61 61
62 62 def subscribe(self):
63 63 """Update our SUB socket's subscriptions."""
64 64 self.stream.setsockopt(zmq.UNSUBSCRIBE, '')
65 65 for topic in self.topics:
66 66 self.log.debug("Subscribing to: %r"%topic)
67 67 self.stream.setsockopt(zmq.SUBSCRIBE, topic)
68 68
69 69 def _extract_level(self, topic_str):
70 70 """Turn 'engine.0.INFO.extra' into (logging.INFO, 'engine.0.extra')"""
71 71 topics = topic_str.split('.')
72 72 for idx,t in enumerate(topics):
73 73 level = getattr(logging, t, None)
74 74 if level is not None:
75 75 break
76 76
77 77 if level is None:
78 78 level = logging.INFO
79 79 else:
80 80 topics.pop(idx)
81 81
82 82 return level, '.'.join(topics)
83 83
84 84
85 85 def log_message(self, raw):
86 86 """receive and parse a message, then log it."""
87 87 if len(raw) != 2 or '.' not in raw[0]:
88 88 self.log.error("Invalid log message: %s"%raw)
89 89 return
90 90 else:
91 91 topic, msg = raw
92 92 # don't newline, since log messages always newline:
93 93 topic,level_name = topic.rsplit('.',1)
94 94 level,topic = self._extract_level(topic)
95 95 if msg[-1] == '\n':
96 96 msg = msg[:-1]
97 97 logging.log(level, "[%s] %s" % (topic, msg))
98 98
@@ -1,314 +1,314 b''
1 1 #!/usr/bin/env python
2 2 # encoding: utf-8
3 3 """
4 4 Job and task components for writing .xml files that the Windows HPC Server
5 5 2008 can use to start jobs.
6 6 """
7 7
8 8 #-----------------------------------------------------------------------------
9 9 # Copyright (C) 2008-2009 The IPython Development Team
10 10 #
11 11 # Distributed under the terms of the BSD License. The full license is in
12 12 # the file COPYING, distributed as part of this software.
13 13 #-----------------------------------------------------------------------------
14 14
15 15 #-----------------------------------------------------------------------------
16 16 # Imports
17 17 #-----------------------------------------------------------------------------
18 18
19 19 import os
20 20 import re
21 21 import uuid
22 22
23 23 from xml.etree import ElementTree as ET
24 24
25 25 from IPython.config.configurable import Configurable
26 26 from IPython.utils.traitlets import (
27 Str, Int, List, Instance,
28 Enum, Bool, CStr
27 Unicode, Int, List, Instance,
28 Enum, Bool
29 29 )
30 30
31 31 #-----------------------------------------------------------------------------
32 32 # Job and Task classes
33 33 #-----------------------------------------------------------------------------
34 34
35 35
36 36 def as_str(value):
37 37 if isinstance(value, str):
38 38 return value
39 39 elif isinstance(value, bool):
40 40 if value:
41 41 return 'true'
42 42 else:
43 43 return 'false'
44 44 elif isinstance(value, (int, float)):
45 45 return repr(value)
46 46 else:
47 47 return value
48 48
49 49
50 50 def indent(elem, level=0):
51 51 i = "\n" + level*" "
52 52 if len(elem):
53 53 if not elem.text or not elem.text.strip():
54 54 elem.text = i + " "
55 55 if not elem.tail or not elem.tail.strip():
56 56 elem.tail = i
57 57 for elem in elem:
58 58 indent(elem, level+1)
59 59 if not elem.tail or not elem.tail.strip():
60 60 elem.tail = i
61 61 else:
62 62 if level and (not elem.tail or not elem.tail.strip()):
63 63 elem.tail = i
64 64
65 65
66 66 def find_username():
67 67 domain = os.environ.get('USERDOMAIN')
68 68 username = os.environ.get('USERNAME','')
69 69 if domain is None:
70 70 return username
71 71 else:
72 72 return '%s\\%s' % (domain, username)
73 73
74 74
75 75 class WinHPCJob(Configurable):
76 76
77 job_id = Str('')
78 job_name = Str('MyJob', config=True)
77 job_id = Unicode('')
78 job_name = Unicode('MyJob', config=True)
79 79 min_cores = Int(1, config=True)
80 80 max_cores = Int(1, config=True)
81 81 min_sockets = Int(1, config=True)
82 82 max_sockets = Int(1, config=True)
83 83 min_nodes = Int(1, config=True)
84 84 max_nodes = Int(1, config=True)
85 unit_type = Str("Core", config=True)
85 unit_type = Unicode("Core", config=True)
86 86 auto_calculate_min = Bool(True, config=True)
87 87 auto_calculate_max = Bool(True, config=True)
88 88 run_until_canceled = Bool(False, config=True)
89 89 is_exclusive = Bool(False, config=True)
90 username = Str(find_username(), config=True)
91 job_type = Str('Batch', config=True)
90 username = Unicode(find_username(), config=True)
91 job_type = Unicode('Batch', config=True)
92 92 priority = Enum(('Lowest','BelowNormal','Normal','AboveNormal','Highest'),
93 93 default_value='Highest', config=True)
94 requested_nodes = Str('', config=True)
95 project = Str('IPython', config=True)
96 xmlns = Str('http://schemas.microsoft.com/HPCS2008/scheduler/')
97 version = Str("2.000")
94 requested_nodes = Unicode('', config=True)
95 project = Unicode('IPython', config=True)
96 xmlns = Unicode('http://schemas.microsoft.com/HPCS2008/scheduler/')
97 version = Unicode("2.000")
98 98 tasks = List([])
99 99
100 100 @property
101 101 def owner(self):
102 102 return self.username
103 103
104 104 def _write_attr(self, root, attr, key):
105 105 s = as_str(getattr(self, attr, ''))
106 106 if s:
107 107 root.set(key, s)
108 108
109 109 def as_element(self):
110 110 # We have to add _A_ type things to get the right order than
111 111 # the MSFT XML parser expects.
112 112 root = ET.Element('Job')
113 113 self._write_attr(root, 'version', '_A_Version')
114 114 self._write_attr(root, 'job_name', '_B_Name')
115 115 self._write_attr(root, 'unit_type', '_C_UnitType')
116 116 self._write_attr(root, 'min_cores', '_D_MinCores')
117 117 self._write_attr(root, 'max_cores', '_E_MaxCores')
118 118 self._write_attr(root, 'min_sockets', '_F_MinSockets')
119 119 self._write_attr(root, 'max_sockets', '_G_MaxSockets')
120 120 self._write_attr(root, 'min_nodes', '_H_MinNodes')
121 121 self._write_attr(root, 'max_nodes', '_I_MaxNodes')
122 122 self._write_attr(root, 'run_until_canceled', '_J_RunUntilCanceled')
123 123 self._write_attr(root, 'is_exclusive', '_K_IsExclusive')
124 124 self._write_attr(root, 'username', '_L_UserName')
125 125 self._write_attr(root, 'job_type', '_M_JobType')
126 126 self._write_attr(root, 'priority', '_N_Priority')
127 127 self._write_attr(root, 'requested_nodes', '_O_RequestedNodes')
128 128 self._write_attr(root, 'auto_calculate_max', '_P_AutoCalculateMax')
129 129 self._write_attr(root, 'auto_calculate_min', '_Q_AutoCalculateMin')
130 130 self._write_attr(root, 'project', '_R_Project')
131 131 self._write_attr(root, 'owner', '_S_Owner')
132 132 self._write_attr(root, 'xmlns', '_T_xmlns')
133 133 dependencies = ET.SubElement(root, "Dependencies")
134 134 etasks = ET.SubElement(root, "Tasks")
135 135 for t in self.tasks:
136 136 etasks.append(t.as_element())
137 137 return root
138 138
139 139 def tostring(self):
140 140 """Return the string representation of the job description XML."""
141 141 root = self.as_element()
142 142 indent(root)
143 143 txt = ET.tostring(root, encoding="utf-8")
144 144 # Now remove the tokens used to order the attributes.
145 145 txt = re.sub(r'_[A-Z]_','',txt)
146 146 txt = '<?xml version="1.0" encoding="utf-8"?>\n' + txt
147 147 return txt
148 148
149 149 def write(self, filename):
150 150 """Write the XML job description to a file."""
151 151 txt = self.tostring()
152 152 with open(filename, 'w') as f:
153 153 f.write(txt)
154 154
155 155 def add_task(self, task):
156 156 """Add a task to the job.
157 157
158 158 Parameters
159 159 ----------
160 160 task : :class:`WinHPCTask`
161 161 The task object to add.
162 162 """
163 163 self.tasks.append(task)
164 164
165 165
166 166 class WinHPCTask(Configurable):
167 167
168 task_id = Str('')
169 task_name = Str('')
170 version = Str("2.000")
168 task_id = Unicode('')
169 task_name = Unicode('')
170 version = Unicode("2.000")
171 171 min_cores = Int(1, config=True)
172 172 max_cores = Int(1, config=True)
173 173 min_sockets = Int(1, config=True)
174 174 max_sockets = Int(1, config=True)
175 175 min_nodes = Int(1, config=True)
176 176 max_nodes = Int(1, config=True)
177 unit_type = Str("Core", config=True)
178 command_line = CStr('', config=True)
179 work_directory = CStr('', config=True)
177 unit_type = Unicode("Core", config=True)
178 command_line = Unicode('', config=True)
179 work_directory = Unicode('', config=True)
180 180 is_rerunnaable = Bool(True, config=True)
181 std_out_file_path = CStr('', config=True)
182 std_err_file_path = CStr('', config=True)
181 std_out_file_path = Unicode('', config=True)
182 std_err_file_path = Unicode('', config=True)
183 183 is_parametric = Bool(False, config=True)
184 184 environment_variables = Instance(dict, args=(), config=True)
185 185
186 186 def _write_attr(self, root, attr, key):
187 187 s = as_str(getattr(self, attr, ''))
188 188 if s:
189 189 root.set(key, s)
190 190
191 191 def as_element(self):
192 192 root = ET.Element('Task')
193 193 self._write_attr(root, 'version', '_A_Version')
194 194 self._write_attr(root, 'task_name', '_B_Name')
195 195 self._write_attr(root, 'min_cores', '_C_MinCores')
196 196 self._write_attr(root, 'max_cores', '_D_MaxCores')
197 197 self._write_attr(root, 'min_sockets', '_E_MinSockets')
198 198 self._write_attr(root, 'max_sockets', '_F_MaxSockets')
199 199 self._write_attr(root, 'min_nodes', '_G_MinNodes')
200 200 self._write_attr(root, 'max_nodes', '_H_MaxNodes')
201 201 self._write_attr(root, 'command_line', '_I_CommandLine')
202 202 self._write_attr(root, 'work_directory', '_J_WorkDirectory')
203 203 self._write_attr(root, 'is_rerunnaable', '_K_IsRerunnable')
204 204 self._write_attr(root, 'std_out_file_path', '_L_StdOutFilePath')
205 205 self._write_attr(root, 'std_err_file_path', '_M_StdErrFilePath')
206 206 self._write_attr(root, 'is_parametric', '_N_IsParametric')
207 207 self._write_attr(root, 'unit_type', '_O_UnitType')
208 208 root.append(self.get_env_vars())
209 209 return root
210 210
211 211 def get_env_vars(self):
212 212 env_vars = ET.Element('EnvironmentVariables')
213 213 for k, v in self.environment_variables.iteritems():
214 214 variable = ET.SubElement(env_vars, "Variable")
215 215 name = ET.SubElement(variable, "Name")
216 216 name.text = k
217 217 value = ET.SubElement(variable, "Value")
218 218 value.text = v
219 219 return env_vars
220 220
221 221
222 222
223 223 # By declaring these, we can configure the controller and engine separately!
224 224
225 225 class IPControllerJob(WinHPCJob):
226 job_name = Str('IPController', config=False)
226 job_name = Unicode('IPController', config=False)
227 227 is_exclusive = Bool(False, config=True)
228 username = Str(find_username(), config=True)
228 username = Unicode(find_username(), config=True)
229 229 priority = Enum(('Lowest','BelowNormal','Normal','AboveNormal','Highest'),
230 230 default_value='Highest', config=True)
231 requested_nodes = Str('', config=True)
232 project = Str('IPython', config=True)
231 requested_nodes = Unicode('', config=True)
232 project = Unicode('IPython', config=True)
233 233
234 234
235 235 class IPEngineSetJob(WinHPCJob):
236 job_name = Str('IPEngineSet', config=False)
236 job_name = Unicode('IPEngineSet', config=False)
237 237 is_exclusive = Bool(False, config=True)
238 username = Str(find_username(), config=True)
238 username = Unicode(find_username(), config=True)
239 239 priority = Enum(('Lowest','BelowNormal','Normal','AboveNormal','Highest'),
240 240 default_value='Highest', config=True)
241 requested_nodes = Str('', config=True)
242 project = Str('IPython', config=True)
241 requested_nodes = Unicode('', config=True)
242 project = Unicode('IPython', config=True)
243 243
244 244
245 245 class IPControllerTask(WinHPCTask):
246 246
247 task_name = Str('IPController', config=True)
247 task_name = Unicode('IPController', config=True)
248 248 controller_cmd = List(['ipcontroller.exe'], config=True)
249 249 controller_args = List(['--log-to-file', '--log-level', '40'], config=True)
250 250 # I don't want these to be configurable
251 std_out_file_path = CStr('', config=False)
252 std_err_file_path = CStr('', config=False)
251 std_out_file_path = Unicode('', config=False)
252 std_err_file_path = Unicode('', config=False)
253 253 min_cores = Int(1, config=False)
254 254 max_cores = Int(1, config=False)
255 255 min_sockets = Int(1, config=False)
256 256 max_sockets = Int(1, config=False)
257 257 min_nodes = Int(1, config=False)
258 258 max_nodes = Int(1, config=False)
259 unit_type = Str("Core", config=False)
260 work_directory = CStr('', config=False)
259 unit_type = Unicode("Core", config=False)
260 work_directory = Unicode('', config=False)
261 261
262 262 def __init__(self, config=None):
263 263 super(IPControllerTask, self).__init__(config=config)
264 264 the_uuid = uuid.uuid1()
265 265 self.std_out_file_path = os.path.join('log','ipcontroller-%s.out' % the_uuid)
266 266 self.std_err_file_path = os.path.join('log','ipcontroller-%s.err' % the_uuid)
267 267
268 268 @property
269 269 def command_line(self):
270 270 return ' '.join(self.controller_cmd + self.controller_args)
271 271
272 272
273 273 class IPEngineTask(WinHPCTask):
274 274
275 task_name = Str('IPEngine', config=True)
275 task_name = Unicode('IPEngine', config=True)
276 276 engine_cmd = List(['ipengine.exe'], config=True)
277 277 engine_args = List(['--log-to-file', '--log-level', '40'], config=True)
278 278 # I don't want these to be configurable
279 std_out_file_path = CStr('', config=False)
280 std_err_file_path = CStr('', config=False)
279 std_out_file_path = Unicode('', config=False)
280 std_err_file_path = Unicode('', config=False)
281 281 min_cores = Int(1, config=False)
282 282 max_cores = Int(1, config=False)
283 283 min_sockets = Int(1, config=False)
284 284 max_sockets = Int(1, config=False)
285 285 min_nodes = Int(1, config=False)
286 286 max_nodes = Int(1, config=False)
287 unit_type = Str("Core", config=False)
288 work_directory = CStr('', config=False)
287 unit_type = Unicode("Core", config=False)
288 work_directory = Unicode('', config=False)
289 289
290 290 def __init__(self, config=None):
291 291 super(IPEngineTask,self).__init__(config=config)
292 292 the_uuid = uuid.uuid1()
293 293 self.std_out_file_path = os.path.join('log','ipengine-%s.out' % the_uuid)
294 294 self.std_err_file_path = os.path.join('log','ipengine-%s.err' % the_uuid)
295 295
296 296 @property
297 297 def command_line(self):
298 298 return ' '.join(self.engine_cmd + self.engine_args)
299 299
300 300
301 301 # j = WinHPCJob(None)
302 302 # j.job_name = 'IPCluster'
303 303 # j.username = 'GNET\\bgranger'
304 304 # j.requested_nodes = 'GREEN'
305 305 #
306 306 # t = WinHPCTask(None)
307 307 # t.task_name = 'Controller'
308 308 # t.command_line = r"\\blue\domainusers$\bgranger\Python\Python25\Scripts\ipcontroller.exe --log-to-file -p default --log-level 10"
309 309 # t.work_directory = r"\\blue\domainusers$\bgranger\.ipython\cluster_default"
310 310 # t.std_out_file_path = 'controller-out.txt'
311 311 # t.std_err_file_path = 'controller-err.txt'
312 312 # t.environment_variables['PYTHONPATH'] = r"\\blue\domainusers$\bgranger\Python\Python25\Lib\site-packages"
313 313 # j.add_task(t)
314 314
@@ -1,1356 +1,1356 b''
1 1 """A semi-synchronous Client for the ZMQ cluster"""
2 2 #-----------------------------------------------------------------------------
3 3 # Copyright (C) 2010 The IPython Development Team
4 4 #
5 5 # Distributed under the terms of the BSD License. The full license is in
6 6 # the file COPYING, distributed as part of this software.
7 7 #-----------------------------------------------------------------------------
8 8
9 9 #-----------------------------------------------------------------------------
10 10 # Imports
11 11 #-----------------------------------------------------------------------------
12 12
13 13 import os
14 14 import json
15 15 import time
16 16 import warnings
17 17 from datetime import datetime
18 18 from getpass import getpass
19 19 from pprint import pprint
20 20
21 21 pjoin = os.path.join
22 22
23 23 import zmq
24 24 # from zmq.eventloop import ioloop, zmqstream
25 25
26 26 from IPython.utils.path import get_ipython_dir
27 from IPython.utils.traitlets import (HasTraits, Int, Instance, CUnicode,
28 Dict, List, Bool, Str, Set)
27 from IPython.utils.traitlets import (HasTraits, Int, Instance, Unicode,
28 Dict, List, Bool, Set)
29 29 from IPython.external.decorator import decorator
30 30 from IPython.external.ssh import tunnel
31 31
32 32 from IPython.parallel import error
33 33 from IPython.parallel import streamsession as ss
34 34 from IPython.parallel import util
35 35
36 36 from .asyncresult import AsyncResult, AsyncHubResult
37 37 from IPython.parallel.apps.clusterdir import ClusterDir, ClusterDirError
38 38 from .view import DirectView, LoadBalancedView
39 39
40 40 #--------------------------------------------------------------------------
41 41 # Decorators for Client methods
42 42 #--------------------------------------------------------------------------
43 43
44 44 @decorator
45 45 def spin_first(f, self, *args, **kwargs):
46 46 """Call spin() to sync state prior to calling the method."""
47 47 self.spin()
48 48 return f(self, *args, **kwargs)
49 49
50 50
51 51 #--------------------------------------------------------------------------
52 52 # Classes
53 53 #--------------------------------------------------------------------------
54 54
55 55 class Metadata(dict):
56 56 """Subclass of dict for initializing metadata values.
57 57
58 58 Attribute access works on keys.
59 59
60 60 These objects have a strict set of keys - errors will raise if you try
61 61 to add new keys.
62 62 """
63 63 def __init__(self, *args, **kwargs):
64 64 dict.__init__(self)
65 65 md = {'msg_id' : None,
66 66 'submitted' : None,
67 67 'started' : None,
68 68 'completed' : None,
69 69 'received' : None,
70 70 'engine_uuid' : None,
71 71 'engine_id' : None,
72 72 'follow' : None,
73 73 'after' : None,
74 74 'status' : None,
75 75
76 76 'pyin' : None,
77 77 'pyout' : None,
78 78 'pyerr' : None,
79 79 'stdout' : '',
80 80 'stderr' : '',
81 81 }
82 82 self.update(md)
83 83 self.update(dict(*args, **kwargs))
84 84
85 85 def __getattr__(self, key):
86 86 """getattr aliased to getitem"""
87 87 if key in self.iterkeys():
88 88 return self[key]
89 89 else:
90 90 raise AttributeError(key)
91 91
92 92 def __setattr__(self, key, value):
93 93 """setattr aliased to setitem, with strict"""
94 94 if key in self.iterkeys():
95 95 self[key] = value
96 96 else:
97 97 raise AttributeError(key)
98 98
99 99 def __setitem__(self, key, value):
100 100 """strict static key enforcement"""
101 101 if key in self.iterkeys():
102 102 dict.__setitem__(self, key, value)
103 103 else:
104 104 raise KeyError(key)
105 105
106 106
107 107 class Client(HasTraits):
108 108 """A semi-synchronous client to the IPython ZMQ cluster
109 109
110 110 Parameters
111 111 ----------
112 112
113 113 url_or_file : bytes; zmq url or path to ipcontroller-client.json
114 114 Connection information for the Hub's registration. If a json connector
115 115 file is given, then likely no further configuration is necessary.
116 116 [Default: use profile]
117 117 profile : bytes
118 118 The name of the Cluster profile to be used to find connector information.
119 119 [Default: 'default']
120 120 context : zmq.Context
121 121 Pass an existing zmq.Context instance, otherwise the client will create its own.
122 122 username : bytes
123 123 set username to be passed to the Session object
124 124 debug : bool
125 125 flag for lots of message printing for debug purposes
126 126
127 127 #-------------- ssh related args ----------------
128 128 # These are args for configuring the ssh tunnel to be used
129 129 # credentials are used to forward connections over ssh to the Controller
130 130 # Note that the ip given in `addr` needs to be relative to sshserver
131 131 # The most basic case is to leave addr as pointing to localhost (127.0.0.1),
132 132 # and set sshserver as the same machine the Controller is on. However,
133 133 # the only requirement is that sshserver is able to see the Controller
134 134 # (i.e. is within the same trusted network).
135 135
136 136 sshserver : str
137 137 A string of the form passed to ssh, i.e. 'server.tld' or 'user@server.tld:port'
138 138 If keyfile or password is specified, and this is not, it will default to
139 139 the ip given in addr.
140 140 sshkey : str; path to public ssh key file
141 141 This specifies a key to be used in ssh login, default None.
142 142 Regular default ssh keys will be used without specifying this argument.
143 143 password : str
144 144 Your ssh password to sshserver. Note that if this is left None,
145 145 you will be prompted for it if passwordless key based login is unavailable.
146 146 paramiko : bool
147 147 flag for whether to use paramiko instead of shell ssh for tunneling.
148 148 [default: True on win32, False else]
149 149
150 150 ------- exec authentication args -------
151 151 If even localhost is untrusted, you can have some protection against
152 152 unauthorized execution by using a key. Messages are still sent
153 153 as cleartext, so if someone can snoop your loopback traffic this will
154 154 not help against malicious attacks.
155 155
156 156 exec_key : str
157 157 an authentication key or file containing a key
158 158 default: None
159 159
160 160
161 161 Attributes
162 162 ----------
163 163
164 164 ids : list of int engine IDs
165 165 requesting the ids attribute always synchronizes
166 166 the registration state. To request ids without synchronization,
167 167 use semi-private _ids attributes.
168 168
169 169 history : list of msg_ids
170 170 a list of msg_ids, keeping track of all the execution
171 171 messages you have submitted in order.
172 172
173 173 outstanding : set of msg_ids
174 174 a set of msg_ids that have been submitted, but whose
175 175 results have not yet been received.
176 176
177 177 results : dict
178 178 a dict of all our results, keyed by msg_id
179 179
180 180 block : bool
181 181 determines default behavior when block not specified
182 182 in execution methods
183 183
184 184 Methods
185 185 -------
186 186
187 187 spin
188 188 flushes incoming results and registration state changes
189 189 control methods spin, and requesting `ids` also ensures up to date
190 190
191 191 wait
192 192 wait on one or more msg_ids
193 193
194 194 execution methods
195 195 apply
196 196 legacy: execute, run
197 197
198 198 data movement
199 199 push, pull, scatter, gather
200 200
201 201 query methods
202 202 queue_status, get_result, purge, result_status
203 203
204 204 control methods
205 205 abort, shutdown
206 206
207 207 """
208 208
209 209
210 210 block = Bool(False)
211 211 outstanding = Set()
212 212 results = Instance('collections.defaultdict', (dict,))
213 213 metadata = Instance('collections.defaultdict', (Metadata,))
214 214 history = List()
215 215 debug = Bool(False)
216 profile=CUnicode('default')
216 profile=Unicode('default')
217 217
218 218 _outstanding_dict = Instance('collections.defaultdict', (set,))
219 219 _ids = List()
220 220 _connected=Bool(False)
221 221 _ssh=Bool(False)
222 222 _context = Instance('zmq.Context')
223 223 _config = Dict()
224 224 _engines=Instance(util.ReverseDict, (), {})
225 225 # _hub_socket=Instance('zmq.Socket')
226 226 _query_socket=Instance('zmq.Socket')
227 227 _control_socket=Instance('zmq.Socket')
228 228 _iopub_socket=Instance('zmq.Socket')
229 229 _notification_socket=Instance('zmq.Socket')
230 230 _mux_socket=Instance('zmq.Socket')
231 231 _task_socket=Instance('zmq.Socket')
232 _task_scheme=Str()
232 _task_scheme=Unicode()
233 233 _closed = False
234 234 _ignored_control_replies=Int(0)
235 235 _ignored_hub_replies=Int(0)
236 236
237 237 def __init__(self, url_or_file=None, profile='default', cluster_dir=None, ipython_dir=None,
238 238 context=None, username=None, debug=False, exec_key=None,
239 239 sshserver=None, sshkey=None, password=None, paramiko=None,
240 240 timeout=10
241 241 ):
242 242 super(Client, self).__init__(debug=debug, profile=profile)
243 243 if context is None:
244 244 context = zmq.Context.instance()
245 245 self._context = context
246 246
247 247
248 248 self._setup_cluster_dir(profile, cluster_dir, ipython_dir)
249 249 if self._cd is not None:
250 250 if url_or_file is None:
251 251 url_or_file = pjoin(self._cd.security_dir, 'ipcontroller-client.json')
252 252 assert url_or_file is not None, "I can't find enough information to connect to a hub!"\
253 253 " Please specify at least one of url_or_file or profile."
254 254
255 255 try:
256 256 util.validate_url(url_or_file)
257 257 except AssertionError:
258 258 if not os.path.exists(url_or_file):
259 259 if self._cd:
260 260 url_or_file = os.path.join(self._cd.security_dir, url_or_file)
261 261 assert os.path.exists(url_or_file), "Not a valid connection file or url: %r"%url_or_file
262 262 with open(url_or_file) as f:
263 263 cfg = json.loads(f.read())
264 264 else:
265 265 cfg = {'url':url_or_file}
266 266
267 267 # sync defaults from args, json:
268 268 if sshserver:
269 269 cfg['ssh'] = sshserver
270 270 if exec_key:
271 271 cfg['exec_key'] = exec_key
272 272 exec_key = cfg['exec_key']
273 273 sshserver=cfg['ssh']
274 274 url = cfg['url']
275 275 location = cfg.setdefault('location', None)
276 276 cfg['url'] = util.disambiguate_url(cfg['url'], location)
277 277 url = cfg['url']
278 278
279 279 self._config = cfg
280 280
281 281 self._ssh = bool(sshserver or sshkey or password)
282 282 if self._ssh and sshserver is None:
283 283 # default to ssh via localhost
284 284 sshserver = url.split('://')[1].split(':')[0]
285 285 if self._ssh and password is None:
286 286 if tunnel.try_passwordless_ssh(sshserver, sshkey, paramiko):
287 287 password=False
288 288 else:
289 289 password = getpass("SSH Password for %s: "%sshserver)
290 290 ssh_kwargs = dict(keyfile=sshkey, password=password, paramiko=paramiko)
291 291 if exec_key is not None and os.path.isfile(exec_key):
292 292 arg = 'keyfile'
293 293 else:
294 294 arg = 'key'
295 295 key_arg = {arg:exec_key}
296 296 if username is None:
297 297 self.session = ss.StreamSession(**key_arg)
298 298 else:
299 299 self.session = ss.StreamSession(username=username, **key_arg)
300 300 self._query_socket = self._context.socket(zmq.XREQ)
301 301 self._query_socket.setsockopt(zmq.IDENTITY, self.session.session)
302 302 if self._ssh:
303 303 tunnel.tunnel_connection(self._query_socket, url, sshserver, **ssh_kwargs)
304 304 else:
305 305 self._query_socket.connect(url)
306 306
307 307 self.session.debug = self.debug
308 308
309 309 self._notification_handlers = {'registration_notification' : self._register_engine,
310 310 'unregistration_notification' : self._unregister_engine,
311 311 'shutdown_notification' : lambda msg: self.close(),
312 312 }
313 313 self._queue_handlers = {'execute_reply' : self._handle_execute_reply,
314 314 'apply_reply' : self._handle_apply_reply}
315 315 self._connect(sshserver, ssh_kwargs, timeout)
316 316
317 317 def __del__(self):
318 318 """cleanup sockets, but _not_ context."""
319 319 self.close()
320 320
321 321 def _setup_cluster_dir(self, profile, cluster_dir, ipython_dir):
322 322 if ipython_dir is None:
323 323 ipython_dir = get_ipython_dir()
324 324 if cluster_dir is not None:
325 325 try:
326 326 self._cd = ClusterDir.find_cluster_dir(cluster_dir)
327 327 return
328 328 except ClusterDirError:
329 329 pass
330 330 elif profile is not None:
331 331 try:
332 332 self._cd = ClusterDir.find_cluster_dir_by_profile(
333 333 ipython_dir, profile)
334 334 return
335 335 except ClusterDirError:
336 336 pass
337 337 self._cd = None
338 338
339 339 def _update_engines(self, engines):
340 340 """Update our engines dict and _ids from a dict of the form: {id:uuid}."""
341 341 for k,v in engines.iteritems():
342 342 eid = int(k)
343 343 self._engines[eid] = bytes(v) # force not unicode
344 344 self._ids.append(eid)
345 345 self._ids = sorted(self._ids)
346 346 if sorted(self._engines.keys()) != range(len(self._engines)) and \
347 347 self._task_scheme == 'pure' and self._task_socket:
348 348 self._stop_scheduling_tasks()
349 349
350 350 def _stop_scheduling_tasks(self):
351 351 """Stop scheduling tasks because an engine has been unregistered
352 352 from a pure ZMQ scheduler.
353 353 """
354 354 self._task_socket.close()
355 355 self._task_socket = None
356 356 msg = "An engine has been unregistered, and we are using pure " +\
357 357 "ZMQ task scheduling. Task farming will be disabled."
358 358 if self.outstanding:
359 359 msg += " If you were running tasks when this happened, " +\
360 360 "some `outstanding` msg_ids may never resolve."
361 361 warnings.warn(msg, RuntimeWarning)
362 362
363 363 def _build_targets(self, targets):
364 364 """Turn valid target IDs or 'all' into two lists:
365 365 (int_ids, uuids).
366 366 """
367 367 if not self._ids:
368 368 # flush notification socket if no engines yet, just in case
369 369 if not self.ids:
370 370 raise error.NoEnginesRegistered("Can't build targets without any engines")
371 371
372 372 if targets is None:
373 373 targets = self._ids
374 374 elif isinstance(targets, str):
375 375 if targets.lower() == 'all':
376 376 targets = self._ids
377 377 else:
378 378 raise TypeError("%r not valid str target, must be 'all'"%(targets))
379 379 elif isinstance(targets, int):
380 380 if targets < 0:
381 381 targets = self.ids[targets]
382 382 if targets not in self._ids:
383 383 raise IndexError("No such engine: %i"%targets)
384 384 targets = [targets]
385 385
386 386 if isinstance(targets, slice):
387 387 indices = range(len(self._ids))[targets]
388 388 ids = self.ids
389 389 targets = [ ids[i] for i in indices ]
390 390
391 391 if not isinstance(targets, (tuple, list, xrange)):
392 392 raise TypeError("targets by int/slice/collection of ints only, not %s"%(type(targets)))
393 393
394 394 return [self._engines[t] for t in targets], list(targets)
395 395
396 396 def _connect(self, sshserver, ssh_kwargs, timeout):
397 397 """setup all our socket connections to the cluster. This is called from
398 398 __init__."""
399 399
400 400 # Maybe allow reconnecting?
401 401 if self._connected:
402 402 return
403 403 self._connected=True
404 404
405 405 def connect_socket(s, url):
406 406 url = util.disambiguate_url(url, self._config['location'])
407 407 if self._ssh:
408 408 return tunnel.tunnel_connection(s, url, sshserver, **ssh_kwargs)
409 409 else:
410 410 return s.connect(url)
411 411
412 412 self.session.send(self._query_socket, 'connection_request')
413 413 r,w,x = zmq.select([self._query_socket],[],[], timeout)
414 414 if not r:
415 415 raise error.TimeoutError("Hub connection request timed out")
416 416 idents,msg = self.session.recv(self._query_socket,mode=0)
417 417 if self.debug:
418 418 pprint(msg)
419 419 msg = ss.Message(msg)
420 420 content = msg.content
421 421 self._config['registration'] = dict(content)
422 422 if content.status == 'ok':
423 423 if content.mux:
424 424 self._mux_socket = self._context.socket(zmq.XREQ)
425 425 self._mux_socket.setsockopt(zmq.IDENTITY, self.session.session)
426 426 connect_socket(self._mux_socket, content.mux)
427 427 if content.task:
428 428 self._task_scheme, task_addr = content.task
429 429 self._task_socket = self._context.socket(zmq.XREQ)
430 430 self._task_socket.setsockopt(zmq.IDENTITY, self.session.session)
431 431 connect_socket(self._task_socket, task_addr)
432 432 if content.notification:
433 433 self._notification_socket = self._context.socket(zmq.SUB)
434 434 connect_socket(self._notification_socket, content.notification)
435 435 self._notification_socket.setsockopt(zmq.SUBSCRIBE, b'')
436 436 # if content.query:
437 437 # self._query_socket = self._context.socket(zmq.XREQ)
438 438 # self._query_socket.setsockopt(zmq.IDENTITY, self.session.session)
439 439 # connect_socket(self._query_socket, content.query)
440 440 if content.control:
441 441 self._control_socket = self._context.socket(zmq.XREQ)
442 442 self._control_socket.setsockopt(zmq.IDENTITY, self.session.session)
443 443 connect_socket(self._control_socket, content.control)
444 444 if content.iopub:
445 445 self._iopub_socket = self._context.socket(zmq.SUB)
446 446 self._iopub_socket.setsockopt(zmq.SUBSCRIBE, b'')
447 447 self._iopub_socket.setsockopt(zmq.IDENTITY, self.session.session)
448 448 connect_socket(self._iopub_socket, content.iopub)
449 449 self._update_engines(dict(content.engines))
450 450 else:
451 451 self._connected = False
452 452 raise Exception("Failed to connect!")
453 453
454 454 #--------------------------------------------------------------------------
455 455 # handlers and callbacks for incoming messages
456 456 #--------------------------------------------------------------------------
457 457
458 458 def _unwrap_exception(self, content):
459 459 """unwrap exception, and remap engine_id to int."""
460 460 e = error.unwrap_exception(content)
461 461 # print e.traceback
462 462 if e.engine_info:
463 463 e_uuid = e.engine_info['engine_uuid']
464 464 eid = self._engines[e_uuid]
465 465 e.engine_info['engine_id'] = eid
466 466 return e
467 467
468 468 def _extract_metadata(self, header, parent, content):
469 469 md = {'msg_id' : parent['msg_id'],
470 470 'received' : datetime.now(),
471 471 'engine_uuid' : header.get('engine', None),
472 472 'follow' : parent.get('follow', []),
473 473 'after' : parent.get('after', []),
474 474 'status' : content['status'],
475 475 }
476 476
477 477 if md['engine_uuid'] is not None:
478 478 md['engine_id'] = self._engines.get(md['engine_uuid'], None)
479 479
480 480 if 'date' in parent:
481 481 md['submitted'] = datetime.strptime(parent['date'], util.ISO8601)
482 482 if 'started' in header:
483 483 md['started'] = datetime.strptime(header['started'], util.ISO8601)
484 484 if 'date' in header:
485 485 md['completed'] = datetime.strptime(header['date'], util.ISO8601)
486 486 return md
487 487
488 488 def _register_engine(self, msg):
489 489 """Register a new engine, and update our connection info."""
490 490 content = msg['content']
491 491 eid = content['id']
492 492 d = {eid : content['queue']}
493 493 self._update_engines(d)
494 494
495 495 def _unregister_engine(self, msg):
496 496 """Unregister an engine that has died."""
497 497 content = msg['content']
498 498 eid = int(content['id'])
499 499 if eid in self._ids:
500 500 self._ids.remove(eid)
501 501 uuid = self._engines.pop(eid)
502 502
503 503 self._handle_stranded_msgs(eid, uuid)
504 504
505 505 if self._task_socket and self._task_scheme == 'pure':
506 506 self._stop_scheduling_tasks()
507 507
508 508 def _handle_stranded_msgs(self, eid, uuid):
509 509 """Handle messages known to be on an engine when the engine unregisters.
510 510
511 511 It is possible that this will fire prematurely - that is, an engine will
512 512 go down after completing a result, and the client will be notified
513 513 of the unregistration and later receive the successful result.
514 514 """
515 515
516 516 outstanding = self._outstanding_dict[uuid]
517 517
518 518 for msg_id in list(outstanding):
519 519 if msg_id in self.results:
520 520 # we already
521 521 continue
522 522 try:
523 523 raise error.EngineError("Engine %r died while running task %r"%(eid, msg_id))
524 524 except:
525 525 content = error.wrap_exception()
526 526 # build a fake message:
527 527 parent = {}
528 528 header = {}
529 529 parent['msg_id'] = msg_id
530 530 header['engine'] = uuid
531 531 header['date'] = datetime.now().strftime(util.ISO8601)
532 532 msg = dict(parent_header=parent, header=header, content=content)
533 533 self._handle_apply_reply(msg)
534 534
535 535 def _handle_execute_reply(self, msg):
536 536 """Save the reply to an execute_request into our results.
537 537
538 538 execute messages are never actually used. apply is used instead.
539 539 """
540 540
541 541 parent = msg['parent_header']
542 542 msg_id = parent['msg_id']
543 543 if msg_id not in self.outstanding:
544 544 if msg_id in self.history:
545 545 print ("got stale result: %s"%msg_id)
546 546 else:
547 547 print ("got unknown result: %s"%msg_id)
548 548 else:
549 549 self.outstanding.remove(msg_id)
550 550 self.results[msg_id] = self._unwrap_exception(msg['content'])
551 551
552 552 def _handle_apply_reply(self, msg):
553 553 """Save the reply to an apply_request into our results."""
554 554 parent = msg['parent_header']
555 555 msg_id = parent['msg_id']
556 556 if msg_id not in self.outstanding:
557 557 if msg_id in self.history:
558 558 print ("got stale result: %s"%msg_id)
559 559 print self.results[msg_id]
560 560 print msg
561 561 else:
562 562 print ("got unknown result: %s"%msg_id)
563 563 else:
564 564 self.outstanding.remove(msg_id)
565 565 content = msg['content']
566 566 header = msg['header']
567 567
568 568 # construct metadata:
569 569 md = self.metadata[msg_id]
570 570 md.update(self._extract_metadata(header, parent, content))
571 571 # is this redundant?
572 572 self.metadata[msg_id] = md
573 573
574 574 e_outstanding = self._outstanding_dict[md['engine_uuid']]
575 575 if msg_id in e_outstanding:
576 576 e_outstanding.remove(msg_id)
577 577
578 578 # construct result:
579 579 if content['status'] == 'ok':
580 580 self.results[msg_id] = util.unserialize_object(msg['buffers'])[0]
581 581 elif content['status'] == 'aborted':
582 582 self.results[msg_id] = error.TaskAborted(msg_id)
583 583 elif content['status'] == 'resubmitted':
584 584 # TODO: handle resubmission
585 585 pass
586 586 else:
587 587 self.results[msg_id] = self._unwrap_exception(content)
588 588
589 589 def _flush_notifications(self):
590 590 """Flush notifications of engine registrations waiting
591 591 in ZMQ queue."""
592 592 msg = self.session.recv(self._notification_socket, mode=zmq.NOBLOCK)
593 593 while msg is not None:
594 594 if self.debug:
595 595 pprint(msg)
596 596 msg = msg[-1]
597 597 msg_type = msg['msg_type']
598 598 handler = self._notification_handlers.get(msg_type, None)
599 599 if handler is None:
600 600 raise Exception("Unhandled message type: %s"%msg.msg_type)
601 601 else:
602 602 handler(msg)
603 603 msg = self.session.recv(self._notification_socket, mode=zmq.NOBLOCK)
604 604
605 605 def _flush_results(self, sock):
606 606 """Flush task or queue results waiting in ZMQ queue."""
607 607 msg = self.session.recv(sock, mode=zmq.NOBLOCK)
608 608 while msg is not None:
609 609 if self.debug:
610 610 pprint(msg)
611 611 msg = msg[-1]
612 612 msg_type = msg['msg_type']
613 613 handler = self._queue_handlers.get(msg_type, None)
614 614 if handler is None:
615 615 raise Exception("Unhandled message type: %s"%msg.msg_type)
616 616 else:
617 617 handler(msg)
618 618 msg = self.session.recv(sock, mode=zmq.NOBLOCK)
619 619
620 620 def _flush_control(self, sock):
621 621 """Flush replies from the control channel waiting
622 622 in the ZMQ queue.
623 623
624 624 Currently: ignore them."""
625 625 if self._ignored_control_replies <= 0:
626 626 return
627 627 msg = self.session.recv(sock, mode=zmq.NOBLOCK)
628 628 while msg is not None:
629 629 self._ignored_control_replies -= 1
630 630 if self.debug:
631 631 pprint(msg)
632 632 msg = self.session.recv(sock, mode=zmq.NOBLOCK)
633 633
634 634 def _flush_ignored_control(self):
635 635 """flush ignored control replies"""
636 636 while self._ignored_control_replies > 0:
637 637 self.session.recv(self._control_socket)
638 638 self._ignored_control_replies -= 1
639 639
640 640 def _flush_ignored_hub_replies(self):
641 641 msg = self.session.recv(self._query_socket, mode=zmq.NOBLOCK)
642 642 while msg is not None:
643 643 msg = self.session.recv(self._query_socket, mode=zmq.NOBLOCK)
644 644
645 645 def _flush_iopub(self, sock):
646 646 """Flush replies from the iopub channel waiting
647 647 in the ZMQ queue.
648 648 """
649 649 msg = self.session.recv(sock, mode=zmq.NOBLOCK)
650 650 while msg is not None:
651 651 if self.debug:
652 652 pprint(msg)
653 653 msg = msg[-1]
654 654 parent = msg['parent_header']
655 655 msg_id = parent['msg_id']
656 656 content = msg['content']
657 657 header = msg['header']
658 658 msg_type = msg['msg_type']
659 659
660 660 # init metadata:
661 661 md = self.metadata[msg_id]
662 662
663 663 if msg_type == 'stream':
664 664 name = content['name']
665 665 s = md[name] or ''
666 666 md[name] = s + content['data']
667 667 elif msg_type == 'pyerr':
668 668 md.update({'pyerr' : self._unwrap_exception(content)})
669 669 elif msg_type == 'pyin':
670 670 md.update({'pyin' : content['code']})
671 671 else:
672 672 md.update({msg_type : content.get('data', '')})
673 673
674 674 # reduntant?
675 675 self.metadata[msg_id] = md
676 676
677 677 msg = self.session.recv(sock, mode=zmq.NOBLOCK)
678 678
679 679 #--------------------------------------------------------------------------
680 680 # len, getitem
681 681 #--------------------------------------------------------------------------
682 682
683 683 def __len__(self):
684 684 """len(client) returns # of engines."""
685 685 return len(self.ids)
686 686
687 687 def __getitem__(self, key):
688 688 """index access returns DirectView multiplexer objects
689 689
690 690 Must be int, slice, or list/tuple/xrange of ints"""
691 691 if not isinstance(key, (int, slice, tuple, list, xrange)):
692 692 raise TypeError("key by int/slice/iterable of ints only, not %s"%(type(key)))
693 693 else:
694 694 return self.direct_view(key)
695 695
696 696 #--------------------------------------------------------------------------
697 697 # Begin public methods
698 698 #--------------------------------------------------------------------------
699 699
700 700 @property
701 701 def ids(self):
702 702 """Always up-to-date ids property."""
703 703 self._flush_notifications()
704 704 # always copy:
705 705 return list(self._ids)
706 706
707 707 def close(self):
708 708 if self._closed:
709 709 return
710 710 snames = filter(lambda n: n.endswith('socket'), dir(self))
711 711 for socket in map(lambda name: getattr(self, name), snames):
712 712 if isinstance(socket, zmq.Socket) and not socket.closed:
713 713 socket.close()
714 714 self._closed = True
715 715
716 716 def spin(self):
717 717 """Flush any registration notifications and execution results
718 718 waiting in the ZMQ queue.
719 719 """
720 720 if self._notification_socket:
721 721 self._flush_notifications()
722 722 if self._mux_socket:
723 723 self._flush_results(self._mux_socket)
724 724 if self._task_socket:
725 725 self._flush_results(self._task_socket)
726 726 if self._control_socket:
727 727 self._flush_control(self._control_socket)
728 728 if self._iopub_socket:
729 729 self._flush_iopub(self._iopub_socket)
730 730 if self._query_socket:
731 731 self._flush_ignored_hub_replies()
732 732
733 733 def wait(self, jobs=None, timeout=-1):
734 734 """waits on one or more `jobs`, for up to `timeout` seconds.
735 735
736 736 Parameters
737 737 ----------
738 738
739 739 jobs : int, str, or list of ints and/or strs, or one or more AsyncResult objects
740 740 ints are indices to self.history
741 741 strs are msg_ids
742 742 default: wait on all outstanding messages
743 743 timeout : float
744 744 a time in seconds, after which to give up.
745 745 default is -1, which means no timeout
746 746
747 747 Returns
748 748 -------
749 749
750 750 True : when all msg_ids are done
751 751 False : timeout reached, some msg_ids still outstanding
752 752 """
753 753 tic = time.time()
754 754 if jobs is None:
755 755 theids = self.outstanding
756 756 else:
757 757 if isinstance(jobs, (int, str, AsyncResult)):
758 758 jobs = [jobs]
759 759 theids = set()
760 760 for job in jobs:
761 761 if isinstance(job, int):
762 762 # index access
763 763 job = self.history[job]
764 764 elif isinstance(job, AsyncResult):
765 765 map(theids.add, job.msg_ids)
766 766 continue
767 767 theids.add(job)
768 768 if not theids.intersection(self.outstanding):
769 769 return True
770 770 self.spin()
771 771 while theids.intersection(self.outstanding):
772 772 if timeout >= 0 and ( time.time()-tic ) > timeout:
773 773 break
774 774 time.sleep(1e-3)
775 775 self.spin()
776 776 return len(theids.intersection(self.outstanding)) == 0
777 777
778 778 #--------------------------------------------------------------------------
779 779 # Control methods
780 780 #--------------------------------------------------------------------------
781 781
782 782 @spin_first
783 783 def clear(self, targets=None, block=None):
784 784 """Clear the namespace in target(s)."""
785 785 block = self.block if block is None else block
786 786 targets = self._build_targets(targets)[0]
787 787 for t in targets:
788 788 self.session.send(self._control_socket, 'clear_request', content={}, ident=t)
789 789 error = False
790 790 if block:
791 791 self._flush_ignored_control()
792 792 for i in range(len(targets)):
793 793 idents,msg = self.session.recv(self._control_socket,0)
794 794 if self.debug:
795 795 pprint(msg)
796 796 if msg['content']['status'] != 'ok':
797 797 error = self._unwrap_exception(msg['content'])
798 798 else:
799 799 self._ignored_control_replies += len(targets)
800 800 if error:
801 801 raise error
802 802
803 803
804 804 @spin_first
805 805 def abort(self, jobs=None, targets=None, block=None):
806 806 """Abort specific jobs from the execution queues of target(s).
807 807
808 808 This is a mechanism to prevent jobs that have already been submitted
809 809 from executing.
810 810
811 811 Parameters
812 812 ----------
813 813
814 814 jobs : msg_id, list of msg_ids, or AsyncResult
815 815 The jobs to be aborted
816 816
817 817
818 818 """
819 819 block = self.block if block is None else block
820 820 targets = self._build_targets(targets)[0]
821 821 msg_ids = []
822 822 if isinstance(jobs, (basestring,AsyncResult)):
823 823 jobs = [jobs]
824 824 bad_ids = filter(lambda obj: not isinstance(obj, (basestring, AsyncResult)), jobs)
825 825 if bad_ids:
826 826 raise TypeError("Invalid msg_id type %r, expected str or AsyncResult"%bad_ids[0])
827 827 for j in jobs:
828 828 if isinstance(j, AsyncResult):
829 829 msg_ids.extend(j.msg_ids)
830 830 else:
831 831 msg_ids.append(j)
832 832 content = dict(msg_ids=msg_ids)
833 833 for t in targets:
834 834 self.session.send(self._control_socket, 'abort_request',
835 835 content=content, ident=t)
836 836 error = False
837 837 if block:
838 838 self._flush_ignored_control()
839 839 for i in range(len(targets)):
840 840 idents,msg = self.session.recv(self._control_socket,0)
841 841 if self.debug:
842 842 pprint(msg)
843 843 if msg['content']['status'] != 'ok':
844 844 error = self._unwrap_exception(msg['content'])
845 845 else:
846 846 self._ignored_control_replies += len(targets)
847 847 if error:
848 848 raise error
849 849
850 850 @spin_first
851 851 def shutdown(self, targets=None, restart=False, hub=False, block=None):
852 852 """Terminates one or more engine processes, optionally including the hub."""
853 853 block = self.block if block is None else block
854 854 if hub:
855 855 targets = 'all'
856 856 targets = self._build_targets(targets)[0]
857 857 for t in targets:
858 858 self.session.send(self._control_socket, 'shutdown_request',
859 859 content={'restart':restart},ident=t)
860 860 error = False
861 861 if block or hub:
862 862 self._flush_ignored_control()
863 863 for i in range(len(targets)):
864 864 idents,msg = self.session.recv(self._control_socket, 0)
865 865 if self.debug:
866 866 pprint(msg)
867 867 if msg['content']['status'] != 'ok':
868 868 error = self._unwrap_exception(msg['content'])
869 869 else:
870 870 self._ignored_control_replies += len(targets)
871 871
872 872 if hub:
873 873 time.sleep(0.25)
874 874 self.session.send(self._query_socket, 'shutdown_request')
875 875 idents,msg = self.session.recv(self._query_socket, 0)
876 876 if self.debug:
877 877 pprint(msg)
878 878 if msg['content']['status'] != 'ok':
879 879 error = self._unwrap_exception(msg['content'])
880 880
881 881 if error:
882 882 raise error
883 883
884 884 #--------------------------------------------------------------------------
885 885 # Execution related methods
886 886 #--------------------------------------------------------------------------
887 887
888 888 def _maybe_raise(self, result):
889 889 """wrapper for maybe raising an exception if apply failed."""
890 890 if isinstance(result, error.RemoteError):
891 891 raise result
892 892
893 893 return result
894 894
895 895 def send_apply_message(self, socket, f, args=None, kwargs=None, subheader=None, track=False,
896 896 ident=None):
897 897 """construct and send an apply message via a socket.
898 898
899 899 This is the principal method with which all engine execution is performed by views.
900 900 """
901 901
902 902 assert not self._closed, "cannot use me anymore, I'm closed!"
903 903 # defaults:
904 904 args = args if args is not None else []
905 905 kwargs = kwargs if kwargs is not None else {}
906 906 subheader = subheader if subheader is not None else {}
907 907
908 908 # validate arguments
909 909 if not callable(f):
910 910 raise TypeError("f must be callable, not %s"%type(f))
911 911 if not isinstance(args, (tuple, list)):
912 912 raise TypeError("args must be tuple or list, not %s"%type(args))
913 913 if not isinstance(kwargs, dict):
914 914 raise TypeError("kwargs must be dict, not %s"%type(kwargs))
915 915 if not isinstance(subheader, dict):
916 916 raise TypeError("subheader must be dict, not %s"%type(subheader))
917 917
918 918 bufs = util.pack_apply_message(f,args,kwargs)
919 919
920 920 msg = self.session.send(socket, "apply_request", buffers=bufs, ident=ident,
921 921 subheader=subheader, track=track)
922 922
923 923 msg_id = msg['msg_id']
924 924 self.outstanding.add(msg_id)
925 925 if ident:
926 926 # possibly routed to a specific engine
927 927 if isinstance(ident, list):
928 928 ident = ident[-1]
929 929 if ident in self._engines.values():
930 930 # save for later, in case of engine death
931 931 self._outstanding_dict[ident].add(msg_id)
932 932 self.history.append(msg_id)
933 933 self.metadata[msg_id]['submitted'] = datetime.now()
934 934
935 935 return msg
936 936
937 937 #--------------------------------------------------------------------------
938 938 # construct a View object
939 939 #--------------------------------------------------------------------------
940 940
941 941 def load_balanced_view(self, targets=None):
942 942 """construct a DirectView object.
943 943
944 944 If no arguments are specified, create a LoadBalancedView
945 945 using all engines.
946 946
947 947 Parameters
948 948 ----------
949 949
950 950 targets: list,slice,int,etc. [default: use all engines]
951 951 The subset of engines across which to load-balance
952 952 """
953 953 if targets is not None:
954 954 targets = self._build_targets(targets)[1]
955 955 return LoadBalancedView(client=self, socket=self._task_socket, targets=targets)
956 956
957 957 def direct_view(self, targets='all'):
958 958 """construct a DirectView object.
959 959
960 960 If no targets are specified, create a DirectView
961 961 using all engines.
962 962
963 963 Parameters
964 964 ----------
965 965
966 966 targets: list,slice,int,etc. [default: use all engines]
967 967 The engines to use for the View
968 968 """
969 969 single = isinstance(targets, int)
970 970 targets = self._build_targets(targets)[1]
971 971 if single:
972 972 targets = targets[0]
973 973 return DirectView(client=self, socket=self._mux_socket, targets=targets)
974 974
975 975 #--------------------------------------------------------------------------
976 976 # Query methods
977 977 #--------------------------------------------------------------------------
978 978
979 979 @spin_first
980 980 def get_result(self, indices_or_msg_ids=None, block=None):
981 981 """Retrieve a result by msg_id or history index, wrapped in an AsyncResult object.
982 982
983 983 If the client already has the results, no request to the Hub will be made.
984 984
985 985 This is a convenient way to construct AsyncResult objects, which are wrappers
986 986 that include metadata about execution, and allow for awaiting results that
987 987 were not submitted by this Client.
988 988
989 989 It can also be a convenient way to retrieve the metadata associated with
990 990 blocking execution, since it always retrieves
991 991
992 992 Examples
993 993 --------
994 994 ::
995 995
996 996 In [10]: r = client.apply()
997 997
998 998 Parameters
999 999 ----------
1000 1000
1001 1001 indices_or_msg_ids : integer history index, str msg_id, or list of either
1002 1002 The indices or msg_ids of indices to be retrieved
1003 1003
1004 1004 block : bool
1005 1005 Whether to wait for the result to be done
1006 1006
1007 1007 Returns
1008 1008 -------
1009 1009
1010 1010 AsyncResult
1011 1011 A single AsyncResult object will always be returned.
1012 1012
1013 1013 AsyncHubResult
1014 1014 A subclass of AsyncResult that retrieves results from the Hub
1015 1015
1016 1016 """
1017 1017 block = self.block if block is None else block
1018 1018 if indices_or_msg_ids is None:
1019 1019 indices_or_msg_ids = -1
1020 1020
1021 1021 if not isinstance(indices_or_msg_ids, (list,tuple)):
1022 1022 indices_or_msg_ids = [indices_or_msg_ids]
1023 1023
1024 1024 theids = []
1025 1025 for id in indices_or_msg_ids:
1026 1026 if isinstance(id, int):
1027 1027 id = self.history[id]
1028 1028 if not isinstance(id, str):
1029 1029 raise TypeError("indices must be str or int, not %r"%id)
1030 1030 theids.append(id)
1031 1031
1032 1032 local_ids = filter(lambda msg_id: msg_id in self.history or msg_id in self.results, theids)
1033 1033 remote_ids = filter(lambda msg_id: msg_id not in local_ids, theids)
1034 1034
1035 1035 if remote_ids:
1036 1036 ar = AsyncHubResult(self, msg_ids=theids)
1037 1037 else:
1038 1038 ar = AsyncResult(self, msg_ids=theids)
1039 1039
1040 1040 if block:
1041 1041 ar.wait()
1042 1042
1043 1043 return ar
1044 1044
1045 1045 @spin_first
1046 1046 def resubmit(self, indices_or_msg_ids=None, subheader=None, block=None):
1047 1047 """Resubmit one or more tasks.
1048 1048
1049 1049 in-flight tasks may not be resubmitted.
1050 1050
1051 1051 Parameters
1052 1052 ----------
1053 1053
1054 1054 indices_or_msg_ids : integer history index, str msg_id, or list of either
1055 1055 The indices or msg_ids of indices to be retrieved
1056 1056
1057 1057 block : bool
1058 1058 Whether to wait for the result to be done
1059 1059
1060 1060 Returns
1061 1061 -------
1062 1062
1063 1063 AsyncHubResult
1064 1064 A subclass of AsyncResult that retrieves results from the Hub
1065 1065
1066 1066 """
1067 1067 block = self.block if block is None else block
1068 1068 if indices_or_msg_ids is None:
1069 1069 indices_or_msg_ids = -1
1070 1070
1071 1071 if not isinstance(indices_or_msg_ids, (list,tuple)):
1072 1072 indices_or_msg_ids = [indices_or_msg_ids]
1073 1073
1074 1074 theids = []
1075 1075 for id in indices_or_msg_ids:
1076 1076 if isinstance(id, int):
1077 1077 id = self.history[id]
1078 1078 if not isinstance(id, str):
1079 1079 raise TypeError("indices must be str or int, not %r"%id)
1080 1080 theids.append(id)
1081 1081
1082 1082 for msg_id in theids:
1083 1083 self.outstanding.discard(msg_id)
1084 1084 if msg_id in self.history:
1085 1085 self.history.remove(msg_id)
1086 1086 self.results.pop(msg_id, None)
1087 1087 self.metadata.pop(msg_id, None)
1088 1088 content = dict(msg_ids = theids)
1089 1089
1090 1090 self.session.send(self._query_socket, 'resubmit_request', content)
1091 1091
1092 1092 zmq.select([self._query_socket], [], [])
1093 1093 idents,msg = self.session.recv(self._query_socket, zmq.NOBLOCK)
1094 1094 if self.debug:
1095 1095 pprint(msg)
1096 1096 content = msg['content']
1097 1097 if content['status'] != 'ok':
1098 1098 raise self._unwrap_exception(content)
1099 1099
1100 1100 ar = AsyncHubResult(self, msg_ids=theids)
1101 1101
1102 1102 if block:
1103 1103 ar.wait()
1104 1104
1105 1105 return ar
1106 1106
1107 1107 @spin_first
1108 1108 def result_status(self, msg_ids, status_only=True):
1109 1109 """Check on the status of the result(s) of the apply request with `msg_ids`.
1110 1110
1111 1111 If status_only is False, then the actual results will be retrieved, else
1112 1112 only the status of the results will be checked.
1113 1113
1114 1114 Parameters
1115 1115 ----------
1116 1116
1117 1117 msg_ids : list of msg_ids
1118 1118 if int:
1119 1119 Passed as index to self.history for convenience.
1120 1120 status_only : bool (default: True)
1121 1121 if False:
1122 1122 Retrieve the actual results of completed tasks.
1123 1123
1124 1124 Returns
1125 1125 -------
1126 1126
1127 1127 results : dict
1128 1128 There will always be the keys 'pending' and 'completed', which will
1129 1129 be lists of msg_ids that are incomplete or complete. If `status_only`
1130 1130 is False, then completed results will be keyed by their `msg_id`.
1131 1131 """
1132 1132 if not isinstance(msg_ids, (list,tuple)):
1133 1133 msg_ids = [msg_ids]
1134 1134
1135 1135 theids = []
1136 1136 for msg_id in msg_ids:
1137 1137 if isinstance(msg_id, int):
1138 1138 msg_id = self.history[msg_id]
1139 1139 if not isinstance(msg_id, basestring):
1140 1140 raise TypeError("msg_ids must be str, not %r"%msg_id)
1141 1141 theids.append(msg_id)
1142 1142
1143 1143 completed = []
1144 1144 local_results = {}
1145 1145
1146 1146 # comment this block out to temporarily disable local shortcut:
1147 1147 for msg_id in theids:
1148 1148 if msg_id in self.results:
1149 1149 completed.append(msg_id)
1150 1150 local_results[msg_id] = self.results[msg_id]
1151 1151 theids.remove(msg_id)
1152 1152
1153 1153 if theids: # some not locally cached
1154 1154 content = dict(msg_ids=theids, status_only=status_only)
1155 1155 msg = self.session.send(self._query_socket, "result_request", content=content)
1156 1156 zmq.select([self._query_socket], [], [])
1157 1157 idents,msg = self.session.recv(self._query_socket, zmq.NOBLOCK)
1158 1158 if self.debug:
1159 1159 pprint(msg)
1160 1160 content = msg['content']
1161 1161 if content['status'] != 'ok':
1162 1162 raise self._unwrap_exception(content)
1163 1163 buffers = msg['buffers']
1164 1164 else:
1165 1165 content = dict(completed=[],pending=[])
1166 1166
1167 1167 content['completed'].extend(completed)
1168 1168
1169 1169 if status_only:
1170 1170 return content
1171 1171
1172 1172 failures = []
1173 1173 # load cached results into result:
1174 1174 content.update(local_results)
1175 1175 # update cache with results:
1176 1176 for msg_id in sorted(theids):
1177 1177 if msg_id in content['completed']:
1178 1178 rec = content[msg_id]
1179 1179 parent = rec['header']
1180 1180 header = rec['result_header']
1181 1181 rcontent = rec['result_content']
1182 1182 iodict = rec['io']
1183 1183 if isinstance(rcontent, str):
1184 1184 rcontent = self.session.unpack(rcontent)
1185 1185
1186 1186 md = self.metadata[msg_id]
1187 1187 md.update(self._extract_metadata(header, parent, rcontent))
1188 1188 md.update(iodict)
1189 1189
1190 1190 if rcontent['status'] == 'ok':
1191 1191 res,buffers = util.unserialize_object(buffers)
1192 1192 else:
1193 1193 print rcontent
1194 1194 res = self._unwrap_exception(rcontent)
1195 1195 failures.append(res)
1196 1196
1197 1197 self.results[msg_id] = res
1198 1198 content[msg_id] = res
1199 1199
1200 1200 if len(theids) == 1 and failures:
1201 1201 raise failures[0]
1202 1202
1203 1203 error.collect_exceptions(failures, "result_status")
1204 1204 return content
1205 1205
1206 1206 @spin_first
1207 1207 def queue_status(self, targets='all', verbose=False):
1208 1208 """Fetch the status of engine queues.
1209 1209
1210 1210 Parameters
1211 1211 ----------
1212 1212
1213 1213 targets : int/str/list of ints/strs
1214 1214 the engines whose states are to be queried.
1215 1215 default : all
1216 1216 verbose : bool
1217 1217 Whether to return lengths only, or lists of ids for each element
1218 1218 """
1219 1219 engine_ids = self._build_targets(targets)[1]
1220 1220 content = dict(targets=engine_ids, verbose=verbose)
1221 1221 self.session.send(self._query_socket, "queue_request", content=content)
1222 1222 idents,msg = self.session.recv(self._query_socket, 0)
1223 1223 if self.debug:
1224 1224 pprint(msg)
1225 1225 content = msg['content']
1226 1226 status = content.pop('status')
1227 1227 if status != 'ok':
1228 1228 raise self._unwrap_exception(content)
1229 1229 content = util.rekey(content)
1230 1230 if isinstance(targets, int):
1231 1231 return content[targets]
1232 1232 else:
1233 1233 return content
1234 1234
1235 1235 @spin_first
1236 1236 def purge_results(self, jobs=[], targets=[]):
1237 1237 """Tell the Hub to forget results.
1238 1238
1239 1239 Individual results can be purged by msg_id, or the entire
1240 1240 history of specific targets can be purged.
1241 1241
1242 1242 Parameters
1243 1243 ----------
1244 1244
1245 1245 jobs : str or list of str or AsyncResult objects
1246 1246 the msg_ids whose results should be forgotten.
1247 1247 targets : int/str/list of ints/strs
1248 1248 The targets, by uuid or int_id, whose entire history is to be purged.
1249 1249 Use `targets='all'` to scrub everything from the Hub's memory.
1250 1250
1251 1251 default : None
1252 1252 """
1253 1253 if not targets and not jobs:
1254 1254 raise ValueError("Must specify at least one of `targets` and `jobs`")
1255 1255 if targets:
1256 1256 targets = self._build_targets(targets)[1]
1257 1257
1258 1258 # construct msg_ids from jobs
1259 1259 msg_ids = []
1260 1260 if isinstance(jobs, (basestring,AsyncResult)):
1261 1261 jobs = [jobs]
1262 1262 bad_ids = filter(lambda obj: not isinstance(obj, (basestring, AsyncResult)), jobs)
1263 1263 if bad_ids:
1264 1264 raise TypeError("Invalid msg_id type %r, expected str or AsyncResult"%bad_ids[0])
1265 1265 for j in jobs:
1266 1266 if isinstance(j, AsyncResult):
1267 1267 msg_ids.extend(j.msg_ids)
1268 1268 else:
1269 1269 msg_ids.append(j)
1270 1270
1271 1271 content = dict(targets=targets, msg_ids=msg_ids)
1272 1272 self.session.send(self._query_socket, "purge_request", content=content)
1273 1273 idents, msg = self.session.recv(self._query_socket, 0)
1274 1274 if self.debug:
1275 1275 pprint(msg)
1276 1276 content = msg['content']
1277 1277 if content['status'] != 'ok':
1278 1278 raise self._unwrap_exception(content)
1279 1279
1280 1280 @spin_first
1281 1281 def hub_history(self):
1282 1282 """Get the Hub's history
1283 1283
1284 1284 Just like the Client, the Hub has a history, which is a list of msg_ids.
1285 1285 This will contain the history of all clients, and, depending on configuration,
1286 1286 may contain history across multiple cluster sessions.
1287 1287
1288 1288 Any msg_id returned here is a valid argument to `get_result`.
1289 1289
1290 1290 Returns
1291 1291 -------
1292 1292
1293 1293 msg_ids : list of strs
1294 1294 list of all msg_ids, ordered by task submission time.
1295 1295 """
1296 1296
1297 1297 self.session.send(self._query_socket, "history_request", content={})
1298 1298 idents, msg = self.session.recv(self._query_socket, 0)
1299 1299
1300 1300 if self.debug:
1301 1301 pprint(msg)
1302 1302 content = msg['content']
1303 1303 if content['status'] != 'ok':
1304 1304 raise self._unwrap_exception(content)
1305 1305 else:
1306 1306 return content['history']
1307 1307
1308 1308 @spin_first
1309 1309 def db_query(self, query, keys=None):
1310 1310 """Query the Hub's TaskRecord database
1311 1311
1312 1312 This will return a list of task record dicts that match `query`
1313 1313
1314 1314 Parameters
1315 1315 ----------
1316 1316
1317 1317 query : mongodb query dict
1318 1318 The search dict. See mongodb query docs for details.
1319 1319 keys : list of strs [optional]
1320 1320 The subset of keys to be returned. The default is to fetch everything but buffers.
1321 1321 'msg_id' will *always* be included.
1322 1322 """
1323 1323 if isinstance(keys, basestring):
1324 1324 keys = [keys]
1325 1325 content = dict(query=query, keys=keys)
1326 1326 self.session.send(self._query_socket, "db_request", content=content)
1327 1327 idents, msg = self.session.recv(self._query_socket, 0)
1328 1328 if self.debug:
1329 1329 pprint(msg)
1330 1330 content = msg['content']
1331 1331 if content['status'] != 'ok':
1332 1332 raise self._unwrap_exception(content)
1333 1333
1334 1334 records = content['records']
1335 1335 buffer_lens = content['buffer_lens']
1336 1336 result_buffer_lens = content['result_buffer_lens']
1337 1337 buffers = msg['buffers']
1338 1338 has_bufs = buffer_lens is not None
1339 1339 has_rbufs = result_buffer_lens is not None
1340 1340 for i,rec in enumerate(records):
1341 1341 # relink buffers
1342 1342 if has_bufs:
1343 1343 blen = buffer_lens[i]
1344 1344 rec['buffers'], buffers = buffers[:blen],buffers[blen:]
1345 1345 if has_rbufs:
1346 1346 blen = result_buffer_lens[i]
1347 1347 rec['result_buffers'], buffers = buffers[:blen],buffers[blen:]
1348 1348 # turn timestamps back into times
1349 1349 for key in 'submitted started completed resubmitted'.split():
1350 1350 maybedate = rec.get(key, None)
1351 1351 if maybedate and util.ISO8601_RE.match(maybedate):
1352 1352 rec[key] = datetime.strptime(maybedate, util.ISO8601)
1353 1353
1354 1354 return records
1355 1355
1356 1356 __all__ = [ 'Client' ]
@@ -1,180 +1,180 b''
1 1 """A Task logger that presents our DB interface,
2 2 but exists entirely in memory and implemented with dicts.
3 3
4 4 TaskRecords are dicts of the form:
5 5 {
6 6 'msg_id' : str(uuid),
7 7 'client_uuid' : str(uuid),
8 8 'engine_uuid' : str(uuid) or None,
9 9 'header' : dict(header),
10 10 'content': dict(content),
11 11 'buffers': list(buffers),
12 12 'submitted': datetime,
13 13 'started': datetime or None,
14 14 'completed': datetime or None,
15 15 'resubmitted': datetime or None,
16 16 'result_header' : dict(header) or None,
17 17 'result_content' : dict(content) or None,
18 18 'result_buffers' : list(buffers) or None,
19 19 }
20 20 With this info, many of the special categories of tasks can be defined by query:
21 21
22 22 pending: completed is None
23 23 client's outstanding: client_uuid = uuid && completed is None
24 24 MIA: arrived is None (and completed is None)
25 25 etc.
26 26
27 27 EngineRecords are dicts of the form:
28 28 {
29 29 'eid' : int(id),
30 30 'uuid': str(uuid)
31 31 }
32 32 This may be extended, but is currently.
33 33
34 34 We support a subset of mongodb operators:
35 35 $lt,$gt,$lte,$gte,$ne,$in,$nin,$all,$mod,$exists
36 36 """
37 37 #-----------------------------------------------------------------------------
38 38 # Copyright (C) 2010 The IPython Development Team
39 39 #
40 40 # Distributed under the terms of the BSD License. The full license is in
41 41 # the file COPYING, distributed as part of this software.
42 42 #-----------------------------------------------------------------------------
43 43
44 44
45 45 from datetime import datetime
46 46
47 47 from IPython.config.configurable import Configurable
48 48
49 from IPython.utils.traitlets import Dict, CUnicode
49 from IPython.utils.traitlets import Dict, Unicode
50 50
51 51 filters = {
52 52 '$lt' : lambda a,b: a < b,
53 53 '$gt' : lambda a,b: b > a,
54 54 '$eq' : lambda a,b: a == b,
55 55 '$ne' : lambda a,b: a != b,
56 56 '$lte': lambda a,b: a <= b,
57 57 '$gte': lambda a,b: a >= b,
58 58 '$in' : lambda a,b: a in b,
59 59 '$nin': lambda a,b: a not in b,
60 60 '$all': lambda a,b: all([ a in bb for bb in b ]),
61 61 '$mod': lambda a,b: a%b[0] == b[1],
62 62 '$exists' : lambda a,b: (b and a is not None) or (a is None and not b)
63 63 }
64 64
65 65
66 66 class CompositeFilter(object):
67 67 """Composite filter for matching multiple properties."""
68 68
69 69 def __init__(self, dikt):
70 70 self.tests = []
71 71 self.values = []
72 72 for key, value in dikt.iteritems():
73 73 self.tests.append(filters[key])
74 74 self.values.append(value)
75 75
76 76 def __call__(self, value):
77 77 for test,check in zip(self.tests, self.values):
78 78 if not test(value, check):
79 79 return False
80 80 return True
81 81
82 82 class BaseDB(Configurable):
83 83 """Empty Parent class so traitlets work on DB."""
84 84 # base configurable traits:
85 session = CUnicode("")
85 session = Unicode("")
86 86
87 87 class DictDB(BaseDB):
88 88 """Basic in-memory dict-based object for saving Task Records.
89 89
90 90 This is the first object to present the DB interface
91 91 for logging tasks out of memory.
92 92
93 93 The interface is based on MongoDB, so adding a MongoDB
94 94 backend should be straightforward.
95 95 """
96 96
97 97 _records = Dict()
98 98
99 99 def _match_one(self, rec, tests):
100 100 """Check if a specific record matches tests."""
101 101 for key,test in tests.iteritems():
102 102 if not test(rec.get(key, None)):
103 103 return False
104 104 return True
105 105
106 106 def _match(self, check):
107 107 """Find all the matches for a check dict."""
108 108 matches = []
109 109 tests = {}
110 110 for k,v in check.iteritems():
111 111 if isinstance(v, dict):
112 112 tests[k] = CompositeFilter(v)
113 113 else:
114 114 tests[k] = lambda o: o==v
115 115
116 116 for rec in self._records.itervalues():
117 117 if self._match_one(rec, tests):
118 118 matches.append(rec)
119 119 return matches
120 120
121 121 def _extract_subdict(self, rec, keys):
122 122 """extract subdict of keys"""
123 123 d = {}
124 124 d['msg_id'] = rec['msg_id']
125 125 for key in keys:
126 126 d[key] = rec[key]
127 127 return d
128 128
129 129 def add_record(self, msg_id, rec):
130 130 """Add a new Task Record, by msg_id."""
131 131 if self._records.has_key(msg_id):
132 132 raise KeyError("Already have msg_id %r"%(msg_id))
133 133 self._records[msg_id] = rec
134 134
135 135 def get_record(self, msg_id):
136 136 """Get a specific Task Record, by msg_id."""
137 137 if not self._records.has_key(msg_id):
138 138 raise KeyError("No such msg_id %r"%(msg_id))
139 139 return self._records[msg_id]
140 140
141 141 def update_record(self, msg_id, rec):
142 142 """Update the data in an existing record."""
143 143 self._records[msg_id].update(rec)
144 144
145 145 def drop_matching_records(self, check):
146 146 """Remove a record from the DB."""
147 147 matches = self._match(check)
148 148 for m in matches:
149 149 del self._records[m['msg_id']]
150 150
151 151 def drop_record(self, msg_id):
152 152 """Remove a record from the DB."""
153 153 del self._records[msg_id]
154 154
155 155
156 156 def find_records(self, check, keys=None):
157 157 """Find records matching a query dict, optionally extracting subset of keys.
158 158
159 159 Returns dict keyed by msg_id of matching records.
160 160
161 161 Parameters
162 162 ----------
163 163
164 164 check: dict
165 165 mongodb-style query argument
166 166 keys: list of strs [optional]
167 167 if specified, the subset of keys to extract. msg_id will *always* be
168 168 included.
169 169 """
170 170 matches = self._match(check)
171 171 if keys:
172 172 return [ self._extract_subdict(rec, keys) for rec in matches ]
173 173 else:
174 174 return matches
175 175
176 176
177 177 def get_history(self):
178 178 """get all msg_ids, ordered by time submitted."""
179 179 msg_ids = self._records.keys()
180 180 return sorted(msg_ids, key=lambda m: self._records[m]['submitted'])
@@ -1,165 +1,165 b''
1 1 #!/usr/bin/env python
2 2 """
3 3 A multi-heart Heartbeat system using PUB and XREP sockets. pings are sent out on the PUB,
4 4 and hearts are tracked based on their XREQ identities.
5 5 """
6 6 #-----------------------------------------------------------------------------
7 7 # Copyright (C) 2010-2011 The IPython Development Team
8 8 #
9 9 # Distributed under the terms of the BSD License. The full license is in
10 10 # the file COPYING, distributed as part of this software.
11 11 #-----------------------------------------------------------------------------
12 12
13 13 from __future__ import print_function
14 14 import time
15 15 import uuid
16 16
17 17 import zmq
18 from zmq.devices import ProcessDevice, ThreadDevice
18 from zmq.devices import ThreadDevice
19 19 from zmq.eventloop import ioloop, zmqstream
20 20
21 from IPython.utils.traitlets import Set, Instance, CFloat, Bool, CStr
21 from IPython.utils.traitlets import Set, Instance, CFloat
22 22 from IPython.parallel.factory import LoggingFactory
23 23
24 24 class Heart(object):
25 25 """A basic heart object for responding to a HeartMonitor.
26 26 This is a simple wrapper with defaults for the most common
27 27 Device model for responding to heartbeats.
28 28
29 29 It simply builds a threadsafe zmq.FORWARDER Device, defaulting to using
30 30 SUB/XREQ for in/out.
31 31
32 32 You can specify the XREQ's IDENTITY via the optional heart_id argument."""
33 33 device=None
34 34 id=None
35 35 def __init__(self, in_addr, out_addr, in_type=zmq.SUB, out_type=zmq.XREQ, heart_id=None):
36 36 self.device = ThreadDevice(zmq.FORWARDER, in_type, out_type)
37 37 self.device.daemon=True
38 38 self.device.connect_in(in_addr)
39 39 self.device.connect_out(out_addr)
40 40 if in_type == zmq.SUB:
41 41 self.device.setsockopt_in(zmq.SUBSCRIBE, "")
42 42 if heart_id is None:
43 43 heart_id = str(uuid.uuid4())
44 44 self.device.setsockopt_out(zmq.IDENTITY, heart_id)
45 45 self.id = heart_id
46 46
47 47 def start(self):
48 48 return self.device.start()
49 49
50 50 class HeartMonitor(LoggingFactory):
51 51 """A basic HeartMonitor class
52 52 pingstream: a PUB stream
53 53 pongstream: an XREP stream
54 54 period: the period of the heartbeat in milliseconds"""
55 55
56 56 period=CFloat(1000, config=True,
57 57 help='The frequency at which the Hub pings the engines for heartbeats '
58 58 ' (in ms) [default: 100]',
59 59 )
60 60
61 61 pingstream=Instance('zmq.eventloop.zmqstream.ZMQStream')
62 62 pongstream=Instance('zmq.eventloop.zmqstream.ZMQStream')
63 63 loop = Instance('zmq.eventloop.ioloop.IOLoop')
64 64 def _loop_default(self):
65 65 return ioloop.IOLoop.instance()
66 66
67 67 # not settable:
68 68 hearts=Set()
69 69 responses=Set()
70 70 on_probation=Set()
71 71 last_ping=CFloat(0)
72 72 _new_handlers = Set()
73 73 _failure_handlers = Set()
74 74 lifetime = CFloat(0)
75 75 tic = CFloat(0)
76 76
77 77 def __init__(self, **kwargs):
78 78 super(HeartMonitor, self).__init__(**kwargs)
79 79
80 80 self.pongstream.on_recv(self.handle_pong)
81 81
82 82 def start(self):
83 83 self.caller = ioloop.PeriodicCallback(self.beat, self.period, self.loop)
84 84 self.caller.start()
85 85
86 86 def add_new_heart_handler(self, handler):
87 87 """add a new handler for new hearts"""
88 88 self.log.debug("heartbeat::new_heart_handler: %s"%handler)
89 89 self._new_handlers.add(handler)
90 90
91 91 def add_heart_failure_handler(self, handler):
92 92 """add a new handler for heart failure"""
93 93 self.log.debug("heartbeat::new heart failure handler: %s"%handler)
94 94 self._failure_handlers.add(handler)
95 95
96 96 def beat(self):
97 97 self.pongstream.flush()
98 98 self.last_ping = self.lifetime
99 99
100 100 toc = time.time()
101 101 self.lifetime += toc-self.tic
102 102 self.tic = toc
103 103 # self.log.debug("heartbeat::%s"%self.lifetime)
104 104 goodhearts = self.hearts.intersection(self.responses)
105 105 missed_beats = self.hearts.difference(goodhearts)
106 106 heartfailures = self.on_probation.intersection(missed_beats)
107 107 newhearts = self.responses.difference(goodhearts)
108 108 map(self.handle_new_heart, newhearts)
109 109 map(self.handle_heart_failure, heartfailures)
110 110 self.on_probation = missed_beats.intersection(self.hearts)
111 111 self.responses = set()
112 112 # print self.on_probation, self.hearts
113 113 # self.log.debug("heartbeat::beat %.3f, %i beating hearts"%(self.lifetime, len(self.hearts)))
114 114 self.pingstream.send(str(self.lifetime))
115 115
116 116 def handle_new_heart(self, heart):
117 117 if self._new_handlers:
118 118 for handler in self._new_handlers:
119 119 handler(heart)
120 120 else:
121 121 self.log.info("heartbeat::yay, got new heart %s!"%heart)
122 122 self.hearts.add(heart)
123 123
124 124 def handle_heart_failure(self, heart):
125 125 if self._failure_handlers:
126 126 for handler in self._failure_handlers:
127 127 try:
128 128 handler(heart)
129 129 except Exception as e:
130 130 self.log.error("heartbeat::Bad Handler! %s"%handler, exc_info=True)
131 131 pass
132 132 else:
133 133 self.log.info("heartbeat::Heart %s failed :("%heart)
134 134 self.hearts.remove(heart)
135 135
136 136
137 137 def handle_pong(self, msg):
138 138 "a heart just beat"
139 139 if msg[1] == str(self.lifetime):
140 140 delta = time.time()-self.tic
141 141 # self.log.debug("heartbeat::heart %r took %.2f ms to respond"%(msg[0], 1000*delta))
142 142 self.responses.add(msg[0])
143 143 elif msg[1] == str(self.last_ping):
144 144 delta = time.time()-self.tic + (self.lifetime-self.last_ping)
145 145 self.log.warn("heartbeat::heart %r missed a beat, and took %.2f ms to respond"%(msg[0], 1000*delta))
146 146 self.responses.add(msg[0])
147 147 else:
148 148 self.log.warn("heartbeat::got bad heartbeat (possibly old?): %s (current=%.3f)"%
149 149 (msg[1],self.lifetime))
150 150
151 151
152 152 if __name__ == '__main__':
153 153 loop = ioloop.IOLoop.instance()
154 154 context = zmq.Context()
155 155 pub = context.socket(zmq.PUB)
156 156 pub.bind('tcp://127.0.0.1:5555')
157 157 xrep = context.socket(zmq.XREP)
158 158 xrep.bind('tcp://127.0.0.1:5556')
159 159
160 160 outstream = zmqstream.ZMQStream(pub, loop)
161 161 instream = zmqstream.ZMQStream(xrep, loop)
162 162
163 163 hb = HeartMonitor(loop, outstream, instream)
164 164
165 165 loop.start()
@@ -1,1293 +1,1293 b''
1 1 #!/usr/bin/env python
2 2 """The IPython Controller Hub with 0MQ
3 3 This is the master object that handles connections from engines and clients,
4 4 and monitors traffic through the various queues.
5 5 """
6 6 #-----------------------------------------------------------------------------
7 7 # Copyright (C) 2010 The IPython Development Team
8 8 #
9 9 # Distributed under the terms of the BSD License. The full license is in
10 10 # the file COPYING, distributed as part of this software.
11 11 #-----------------------------------------------------------------------------
12 12
13 13 #-----------------------------------------------------------------------------
14 14 # Imports
15 15 #-----------------------------------------------------------------------------
16 16 from __future__ import print_function
17 17
18 18 import sys
19 19 import time
20 20 from datetime import datetime
21 21
22 22 import zmq
23 23 from zmq.eventloop import ioloop
24 24 from zmq.eventloop.zmqstream import ZMQStream
25 25
26 26 # internal:
27 27 from IPython.utils.importstring import import_item
28 28 from IPython.utils.traitlets import (
29 HasTraits, Instance, Int, CStr, Str, Dict, Set, List, Bool, Tuple
29 HasTraits, Instance, Int, Unicode, Dict, Set, Tuple, CStr
30 30 )
31 31
32 32 from IPython.parallel import error, util
33 33 from IPython.parallel.factory import RegistrationFactory, LoggingFactory
34 34
35 35 from .heartmonitor import HeartMonitor
36 36
37 37 #-----------------------------------------------------------------------------
38 38 # Code
39 39 #-----------------------------------------------------------------------------
40 40
41 41 def _passer(*args, **kwargs):
42 42 return
43 43
44 44 def _printer(*args, **kwargs):
45 45 print (args)
46 46 print (kwargs)
47 47
48 48 def empty_record():
49 49 """Return an empty dict with all record keys."""
50 50 return {
51 51 'msg_id' : None,
52 52 'header' : None,
53 53 'content': None,
54 54 'buffers': None,
55 55 'submitted': None,
56 56 'client_uuid' : None,
57 57 'engine_uuid' : None,
58 58 'started': None,
59 59 'completed': None,
60 60 'resubmitted': None,
61 61 'result_header' : None,
62 62 'result_content' : None,
63 63 'result_buffers' : None,
64 64 'queue' : None,
65 65 'pyin' : None,
66 66 'pyout': None,
67 67 'pyerr': None,
68 68 'stdout': '',
69 69 'stderr': '',
70 70 }
71 71
72 72 def init_record(msg):
73 73 """Initialize a TaskRecord based on a request."""
74 74 header = msg['header']
75 75 return {
76 76 'msg_id' : header['msg_id'],
77 77 'header' : header,
78 78 'content': msg['content'],
79 79 'buffers': msg['buffers'],
80 80 'submitted': datetime.strptime(header['date'], util.ISO8601),
81 81 'client_uuid' : None,
82 82 'engine_uuid' : None,
83 83 'started': None,
84 84 'completed': None,
85 85 'resubmitted': None,
86 86 'result_header' : None,
87 87 'result_content' : None,
88 88 'result_buffers' : None,
89 89 'queue' : None,
90 90 'pyin' : None,
91 91 'pyout': None,
92 92 'pyerr': None,
93 93 'stdout': '',
94 94 'stderr': '',
95 95 }
96 96
97 97
98 98 class EngineConnector(HasTraits):
99 99 """A simple object for accessing the various zmq connections of an object.
100 100 Attributes are:
101 101 id (int): engine ID
102 102 uuid (str): uuid (unused?)
103 103 queue (str): identity of queue's XREQ socket
104 104 registration (str): identity of registration XREQ socket
105 105 heartbeat (str): identity of heartbeat XREQ socket
106 106 """
107 107 id=Int(0)
108 queue=Str()
109 control=Str()
110 registration=Str()
111 heartbeat=Str()
108 queue=CStr()
109 control=CStr()
110 registration=CStr()
111 heartbeat=CStr()
112 112 pending=Set()
113 113
114 114 class HubFactory(RegistrationFactory):
115 115 """The Configurable for setting up a Hub."""
116 116
117 117 # port-pairs for monitoredqueues:
118 118 hb = Tuple(Int,Int,config=True,
119 119 help="""XREQ/SUB Port pair for Engine heartbeats""")
120 120 def _hb_default(self):
121 121 return tuple(util.select_random_ports(2))
122 122
123 123 mux = Tuple(Int,Int,config=True,
124 124 help="""Engine/Client Port pair for MUX queue""")
125 125
126 126 def _mux_default(self):
127 127 return tuple(util.select_random_ports(2))
128 128
129 129 task = Tuple(Int,Int,config=True,
130 130 help="""Engine/Client Port pair for Task queue""")
131 131 def _task_default(self):
132 132 return tuple(util.select_random_ports(2))
133 133
134 134 control = Tuple(Int,Int,config=True,
135 135 help="""Engine/Client Port pair for Control queue""")
136 136
137 137 def _control_default(self):
138 138 return tuple(util.select_random_ports(2))
139 139
140 140 iopub = Tuple(Int,Int,config=True,
141 141 help="""Engine/Client Port pair for IOPub relay""")
142 142
143 143 def _iopub_default(self):
144 144 return tuple(util.select_random_ports(2))
145 145
146 146 # single ports:
147 147 mon_port = Int(config=True,
148 148 help="""Monitor (SUB) port for queue traffic""")
149 149
150 150 def _mon_port_default(self):
151 151 return util.select_random_ports(1)[0]
152 152
153 153 notifier_port = Int(config=True,
154 154 help="""PUB port for sending engine status notifications""")
155 155
156 156 def _notifier_port_default(self):
157 157 return util.select_random_ports(1)[0]
158 158
159 engine_ip = CStr('127.0.0.1', config=True,
159 engine_ip = Unicode('127.0.0.1', config=True,
160 160 help="IP on which to listen for engine connections. [default: loopback]")
161 engine_transport = CStr('tcp', config=True,
161 engine_transport = Unicode('tcp', config=True,
162 162 help="0MQ transport for engine connections. [default: tcp]")
163 163
164 client_ip = CStr('127.0.0.1', config=True,
164 client_ip = Unicode('127.0.0.1', config=True,
165 165 help="IP on which to listen for client connections. [default: loopback]")
166 client_transport = CStr('tcp', config=True,
166 client_transport = Unicode('tcp', config=True,
167 167 help="0MQ transport for client connections. [default : tcp]")
168 168
169 monitor_ip = CStr('127.0.0.1', config=True,
169 monitor_ip = Unicode('127.0.0.1', config=True,
170 170 help="IP on which to listen for monitor messages. [default: loopback]")
171 monitor_transport = CStr('tcp', config=True,
171 monitor_transport = Unicode('tcp', config=True,
172 172 help="0MQ transport for monitor messages. [default : tcp]")
173 173
174 monitor_url = CStr('')
174 monitor_url = Unicode('')
175 175
176 db_class = CStr('IPython.parallel.controller.dictdb.DictDB', config=True,
176 db_class = Unicode('IPython.parallel.controller.dictdb.DictDB', config=True,
177 177 help="""The class to use for the DB backend""")
178 178
179 179 # not configurable
180 180 db = Instance('IPython.parallel.controller.dictdb.BaseDB')
181 181 heartmonitor = Instance('IPython.parallel.controller.heartmonitor.HeartMonitor')
182 182
183 183 def _ip_changed(self, name, old, new):
184 184 self.engine_ip = new
185 185 self.client_ip = new
186 186 self.monitor_ip = new
187 187 self._update_monitor_url()
188 188
189 189 def _update_monitor_url(self):
190 190 self.monitor_url = "%s://%s:%i"%(self.monitor_transport, self.monitor_ip, self.mon_port)
191 191
192 192 def _transport_changed(self, name, old, new):
193 193 self.engine_transport = new
194 194 self.client_transport = new
195 195 self.monitor_transport = new
196 196 self._update_monitor_url()
197 197
198 198 def __init__(self, **kwargs):
199 199 super(HubFactory, self).__init__(**kwargs)
200 200 self._update_monitor_url()
201 201 # self.on_trait_change(self._sync_ips, 'ip')
202 202 # self.on_trait_change(self._sync_transports, 'transport')
203 203 # self.subconstructors.append(self.construct_hub)
204 204
205 205
206 206 def construct(self):
207 207 self.init_hub()
208 208
209 209 def start(self):
210 210 self.heartmonitor.start()
211 211 self.log.info("Heartmonitor started")
212 212
213 213 def init_hub(self):
214 214 """construct"""
215 215 client_iface = "%s://%s:"%(self.client_transport, self.client_ip) + "%i"
216 216 engine_iface = "%s://%s:"%(self.engine_transport, self.engine_ip) + "%i"
217 217
218 218 ctx = self.context
219 219 loop = self.loop
220 220
221 221 # Registrar socket
222 222 q = ZMQStream(ctx.socket(zmq.XREP), loop)
223 223 q.bind(client_iface % self.regport)
224 224 self.log.info("Hub listening on %s for registration."%(client_iface%self.regport))
225 225 if self.client_ip != self.engine_ip:
226 226 q.bind(engine_iface % self.regport)
227 227 self.log.info("Hub listening on %s for registration."%(engine_iface%self.regport))
228 228
229 229 ### Engine connections ###
230 230
231 231 # heartbeat
232 232 hpub = ctx.socket(zmq.PUB)
233 233 hpub.bind(engine_iface % self.hb[0])
234 234 hrep = ctx.socket(zmq.XREP)
235 235 hrep.bind(engine_iface % self.hb[1])
236 236 self.heartmonitor = HeartMonitor(loop=loop, pingstream=ZMQStream(hpub,loop), pongstream=ZMQStream(hrep,loop),
237 237 config=self.config)
238 238
239 239 ### Client connections ###
240 240 # Notifier socket
241 241 n = ZMQStream(ctx.socket(zmq.PUB), loop)
242 242 n.bind(client_iface%self.notifier_port)
243 243
244 244 ### build and launch the queues ###
245 245
246 246 # monitor socket
247 247 sub = ctx.socket(zmq.SUB)
248 248 sub.setsockopt(zmq.SUBSCRIBE, "")
249 249 sub.bind(self.monitor_url)
250 250 sub.bind('inproc://monitor')
251 251 sub = ZMQStream(sub, loop)
252 252
253 253 # connect the db
254 254 self.log.info('Hub using DB backend: %r'%(self.db_class.split()[-1]))
255 255 # cdir = self.config.Global.cluster_dir
256 self.db = import_item(self.db_class)(session=self.session.session, config=self.config)
256 self.db = import_item(str(self.db_class))(session=self.session.session, config=self.config)
257 257 time.sleep(.25)
258 258 try:
259 259 scheme = self.config.TaskScheduler.scheme_name
260 260 except AttributeError:
261 261 from .scheduler import TaskScheduler
262 262 scheme = TaskScheduler.scheme_name.get_default_value()
263 263 # build connection dicts
264 264 self.engine_info = {
265 265 'control' : engine_iface%self.control[1],
266 266 'mux': engine_iface%self.mux[1],
267 267 'heartbeat': (engine_iface%self.hb[0], engine_iface%self.hb[1]),
268 268 'task' : engine_iface%self.task[1],
269 269 'iopub' : engine_iface%self.iopub[1],
270 270 # 'monitor' : engine_iface%self.mon_port,
271 271 }
272 272
273 273 self.client_info = {
274 274 'control' : client_iface%self.control[0],
275 275 'mux': client_iface%self.mux[0],
276 276 'task' : (scheme, client_iface%self.task[0]),
277 277 'iopub' : client_iface%self.iopub[0],
278 278 'notification': client_iface%self.notifier_port
279 279 }
280 280 self.log.debug("Hub engine addrs: %s"%self.engine_info)
281 281 self.log.debug("Hub client addrs: %s"%self.client_info)
282 282
283 283 # resubmit stream
284 284 r = ZMQStream(ctx.socket(zmq.XREQ), loop)
285 285 url = util.disambiguate_url(self.client_info['task'][-1])
286 286 r.setsockopt(zmq.IDENTITY, self.session.session)
287 287 r.connect(url)
288 288
289 289 self.hub = Hub(loop=loop, session=self.session, monitor=sub, heartmonitor=self.heartmonitor,
290 290 query=q, notifier=n, resubmit=r, db=self.db,
291 291 engine_info=self.engine_info, client_info=self.client_info,
292 292 logname=self.log.name)
293 293
294 294
295 295 class Hub(LoggingFactory):
296 296 """The IPython Controller Hub with 0MQ connections
297 297
298 298 Parameters
299 299 ==========
300 300 loop: zmq IOLoop instance
301 301 session: StreamSession object
302 302 <removed> context: zmq context for creating new connections (?)
303 303 queue: ZMQStream for monitoring the command queue (SUB)
304 304 query: ZMQStream for engine registration and client queries requests (XREP)
305 305 heartbeat: HeartMonitor object checking the pulse of the engines
306 306 notifier: ZMQStream for broadcasting engine registration changes (PUB)
307 307 db: connection to db for out of memory logging of commands
308 308 NotImplemented
309 309 engine_info: dict of zmq connection information for engines to connect
310 310 to the queues.
311 311 client_info: dict of zmq connection information for engines to connect
312 312 to the queues.
313 313 """
314 314 # internal data structures:
315 315 ids=Set() # engine IDs
316 316 keytable=Dict()
317 317 by_ident=Dict()
318 318 engines=Dict()
319 319 clients=Dict()
320 320 hearts=Dict()
321 321 pending=Set()
322 322 queues=Dict() # pending msg_ids keyed by engine_id
323 323 tasks=Dict() # pending msg_ids submitted as tasks, keyed by client_id
324 324 completed=Dict() # completed msg_ids keyed by engine_id
325 325 all_completed=Set() # completed msg_ids keyed by engine_id
326 326 dead_engines=Set() # completed msg_ids keyed by engine_id
327 327 unassigned=Set() # set of task msg_ds not yet assigned a destination
328 328 incoming_registrations=Dict()
329 329 registration_timeout=Int()
330 330 _idcounter=Int(0)
331 331
332 332 # objects from constructor:
333 333 loop=Instance(ioloop.IOLoop)
334 334 query=Instance(ZMQStream)
335 335 monitor=Instance(ZMQStream)
336 336 notifier=Instance(ZMQStream)
337 337 resubmit=Instance(ZMQStream)
338 338 heartmonitor=Instance(HeartMonitor)
339 339 db=Instance(object)
340 340 client_info=Dict()
341 341 engine_info=Dict()
342 342
343 343
344 344 def __init__(self, **kwargs):
345 345 """
346 346 # universal:
347 347 loop: IOLoop for creating future connections
348 348 session: streamsession for sending serialized data
349 349 # engine:
350 350 queue: ZMQStream for monitoring queue messages
351 351 query: ZMQStream for engine+client registration and client requests
352 352 heartbeat: HeartMonitor object for tracking engines
353 353 # extra:
354 354 db: ZMQStream for db connection (NotImplemented)
355 355 engine_info: zmq address/protocol dict for engine connections
356 356 client_info: zmq address/protocol dict for client connections
357 357 """
358 358
359 359 super(Hub, self).__init__(**kwargs)
360 360 self.registration_timeout = max(5000, 2*self.heartmonitor.period)
361 361
362 362 # validate connection dicts:
363 363 for k,v in self.client_info.iteritems():
364 364 if k == 'task':
365 365 util.validate_url_container(v[1])
366 366 else:
367 367 util.validate_url_container(v)
368 368 # util.validate_url_container(self.client_info)
369 369 util.validate_url_container(self.engine_info)
370 370
371 371 # register our callbacks
372 372 self.query.on_recv(self.dispatch_query)
373 373 self.monitor.on_recv(self.dispatch_monitor_traffic)
374 374
375 375 self.heartmonitor.add_heart_failure_handler(self.handle_heart_failure)
376 376 self.heartmonitor.add_new_heart_handler(self.handle_new_heart)
377 377
378 378 self.monitor_handlers = { 'in' : self.save_queue_request,
379 379 'out': self.save_queue_result,
380 380 'intask': self.save_task_request,
381 381 'outtask': self.save_task_result,
382 382 'tracktask': self.save_task_destination,
383 383 'incontrol': _passer,
384 384 'outcontrol': _passer,
385 385 'iopub': self.save_iopub_message,
386 386 }
387 387
388 388 self.query_handlers = {'queue_request': self.queue_status,
389 389 'result_request': self.get_results,
390 390 'history_request': self.get_history,
391 391 'db_request': self.db_query,
392 392 'purge_request': self.purge_results,
393 393 'load_request': self.check_load,
394 394 'resubmit_request': self.resubmit_task,
395 395 'shutdown_request': self.shutdown_request,
396 396 'registration_request' : self.register_engine,
397 397 'unregistration_request' : self.unregister_engine,
398 398 'connection_request': self.connection_request,
399 399 }
400 400
401 401 # ignore resubmit replies
402 402 self.resubmit.on_recv(lambda msg: None, copy=False)
403 403
404 404 self.log.info("hub::created hub")
405 405
406 406 @property
407 407 def _next_id(self):
408 408 """gemerate a new ID.
409 409
410 410 No longer reuse old ids, just count from 0."""
411 411 newid = self._idcounter
412 412 self._idcounter += 1
413 413 return newid
414 414 # newid = 0
415 415 # incoming = [id[0] for id in self.incoming_registrations.itervalues()]
416 416 # # print newid, self.ids, self.incoming_registrations
417 417 # while newid in self.ids or newid in incoming:
418 418 # newid += 1
419 419 # return newid
420 420
421 421 #-----------------------------------------------------------------------------
422 422 # message validation
423 423 #-----------------------------------------------------------------------------
424 424
425 425 def _validate_targets(self, targets):
426 426 """turn any valid targets argument into a list of integer ids"""
427 427 if targets is None:
428 428 # default to all
429 429 targets = self.ids
430 430
431 431 if isinstance(targets, (int,str,unicode)):
432 432 # only one target specified
433 433 targets = [targets]
434 434 _targets = []
435 435 for t in targets:
436 436 # map raw identities to ids
437 437 if isinstance(t, (str,unicode)):
438 438 t = self.by_ident.get(t, t)
439 439 _targets.append(t)
440 440 targets = _targets
441 441 bad_targets = [ t for t in targets if t not in self.ids ]
442 442 if bad_targets:
443 443 raise IndexError("No Such Engine: %r"%bad_targets)
444 444 if not targets:
445 445 raise IndexError("No Engines Registered")
446 446 return targets
447 447
448 448 #-----------------------------------------------------------------------------
449 449 # dispatch methods (1 per stream)
450 450 #-----------------------------------------------------------------------------
451 451
452 452 # def dispatch_registration_request(self, msg):
453 453 # """"""
454 454 # self.log.debug("registration::dispatch_register_request(%s)"%msg)
455 455 # idents,msg = self.session.feed_identities(msg)
456 456 # if not idents:
457 457 # self.log.error("Bad Query Message: %s"%msg, exc_info=True)
458 458 # return
459 459 # try:
460 460 # msg = self.session.unpack_message(msg,content=True)
461 461 # except:
462 462 # self.log.error("registration::got bad registration message: %s"%msg, exc_info=True)
463 463 # return
464 464 #
465 465 # msg_type = msg['msg_type']
466 466 # content = msg['content']
467 467 #
468 468 # handler = self.query_handlers.get(msg_type, None)
469 469 # if handler is None:
470 470 # self.log.error("registration::got bad registration message: %s"%msg)
471 471 # else:
472 472 # handler(idents, msg)
473 473
474 474 def dispatch_monitor_traffic(self, msg):
475 475 """all ME and Task queue messages come through here, as well as
476 476 IOPub traffic."""
477 477 self.log.debug("monitor traffic: %r"%msg[:2])
478 478 switch = msg[0]
479 479 idents, msg = self.session.feed_identities(msg[1:])
480 480 if not idents:
481 481 self.log.error("Bad Monitor Message: %r"%msg)
482 482 return
483 483 handler = self.monitor_handlers.get(switch, None)
484 484 if handler is not None:
485 485 handler(idents, msg)
486 486 else:
487 487 self.log.error("Invalid monitor topic: %r"%switch)
488 488
489 489
490 490 def dispatch_query(self, msg):
491 491 """Route registration requests and queries from clients."""
492 492 idents, msg = self.session.feed_identities(msg)
493 493 if not idents:
494 494 self.log.error("Bad Query Message: %r"%msg)
495 495 return
496 496 client_id = idents[0]
497 497 try:
498 498 msg = self.session.unpack_message(msg, content=True)
499 499 except:
500 500 content = error.wrap_exception()
501 501 self.log.error("Bad Query Message: %r"%msg, exc_info=True)
502 502 self.session.send(self.query, "hub_error", ident=client_id,
503 503 content=content)
504 504 return
505 505
506 506 # print client_id, header, parent, content
507 507 #switch on message type:
508 508 msg_type = msg['msg_type']
509 509 self.log.info("client::client %r requested %r"%(client_id, msg_type))
510 510 handler = self.query_handlers.get(msg_type, None)
511 511 try:
512 512 assert handler is not None, "Bad Message Type: %r"%msg_type
513 513 except:
514 514 content = error.wrap_exception()
515 515 self.log.error("Bad Message Type: %r"%msg_type, exc_info=True)
516 516 self.session.send(self.query, "hub_error", ident=client_id,
517 517 content=content)
518 518 return
519 519
520 520 else:
521 521 handler(idents, msg)
522 522
523 523 def dispatch_db(self, msg):
524 524 """"""
525 525 raise NotImplementedError
526 526
527 527 #---------------------------------------------------------------------------
528 528 # handler methods (1 per event)
529 529 #---------------------------------------------------------------------------
530 530
531 531 #----------------------- Heartbeat --------------------------------------
532 532
533 533 def handle_new_heart(self, heart):
534 534 """handler to attach to heartbeater.
535 535 Called when a new heart starts to beat.
536 536 Triggers completion of registration."""
537 537 self.log.debug("heartbeat::handle_new_heart(%r)"%heart)
538 538 if heart not in self.incoming_registrations:
539 539 self.log.info("heartbeat::ignoring new heart: %r"%heart)
540 540 else:
541 541 self.finish_registration(heart)
542 542
543 543
544 544 def handle_heart_failure(self, heart):
545 545 """handler to attach to heartbeater.
546 546 called when a previously registered heart fails to respond to beat request.
547 547 triggers unregistration"""
548 548 self.log.debug("heartbeat::handle_heart_failure(%r)"%heart)
549 549 eid = self.hearts.get(heart, None)
550 550 queue = self.engines[eid].queue
551 551 if eid is None:
552 552 self.log.info("heartbeat::ignoring heart failure %r"%heart)
553 553 else:
554 554 self.unregister_engine(heart, dict(content=dict(id=eid, queue=queue)))
555 555
556 556 #----------------------- MUX Queue Traffic ------------------------------
557 557
558 558 def save_queue_request(self, idents, msg):
559 559 if len(idents) < 2:
560 560 self.log.error("invalid identity prefix: %s"%idents)
561 561 return
562 562 queue_id, client_id = idents[:2]
563 563 try:
564 564 msg = self.session.unpack_message(msg, content=False)
565 565 except:
566 566 self.log.error("queue::client %r sent invalid message to %r: %s"%(client_id, queue_id, msg), exc_info=True)
567 567 return
568 568
569 569 eid = self.by_ident.get(queue_id, None)
570 570 if eid is None:
571 571 self.log.error("queue::target %r not registered"%queue_id)
572 572 self.log.debug("queue:: valid are: %s"%(self.by_ident.keys()))
573 573 return
574 574
575 575 header = msg['header']
576 576 msg_id = header['msg_id']
577 577 record = init_record(msg)
578 578 record['engine_uuid'] = queue_id
579 579 record['client_uuid'] = client_id
580 580 record['queue'] = 'mux'
581 581
582 582 try:
583 583 # it's posible iopub arrived first:
584 584 existing = self.db.get_record(msg_id)
585 585 for key,evalue in existing.iteritems():
586 586 rvalue = record.get(key, None)
587 587 if evalue and rvalue and evalue != rvalue:
588 588 self.log.warn("conflicting initial state for record: %r:%r <%r> %r"%(msg_id, rvalue, key, evalue))
589 589 elif evalue and not rvalue:
590 590 record[key] = evalue
591 591 self.db.update_record(msg_id, record)
592 592 except KeyError:
593 593 self.db.add_record(msg_id, record)
594 594
595 595 self.pending.add(msg_id)
596 596 self.queues[eid].append(msg_id)
597 597
598 598 def save_queue_result(self, idents, msg):
599 599 if len(idents) < 2:
600 600 self.log.error("invalid identity prefix: %s"%idents)
601 601 return
602 602
603 603 client_id, queue_id = idents[:2]
604 604 try:
605 605 msg = self.session.unpack_message(msg, content=False)
606 606 except:
607 607 self.log.error("queue::engine %r sent invalid message to %r: %s"%(
608 608 queue_id,client_id, msg), exc_info=True)
609 609 return
610 610
611 611 eid = self.by_ident.get(queue_id, None)
612 612 if eid is None:
613 613 self.log.error("queue::unknown engine %r is sending a reply: "%queue_id)
614 614 # self.log.debug("queue:: %s"%msg[2:])
615 615 return
616 616
617 617 parent = msg['parent_header']
618 618 if not parent:
619 619 return
620 620 msg_id = parent['msg_id']
621 621 if msg_id in self.pending:
622 622 self.pending.remove(msg_id)
623 623 self.all_completed.add(msg_id)
624 624 self.queues[eid].remove(msg_id)
625 625 self.completed[eid].append(msg_id)
626 626 elif msg_id not in self.all_completed:
627 627 # it could be a result from a dead engine that died before delivering the
628 628 # result
629 629 self.log.warn("queue:: unknown msg finished %s"%msg_id)
630 630 return
631 631 # update record anyway, because the unregistration could have been premature
632 632 rheader = msg['header']
633 633 completed = datetime.strptime(rheader['date'], util.ISO8601)
634 634 started = rheader.get('started', None)
635 635 if started is not None:
636 636 started = datetime.strptime(started, util.ISO8601)
637 637 result = {
638 638 'result_header' : rheader,
639 639 'result_content': msg['content'],
640 640 'started' : started,
641 641 'completed' : completed
642 642 }
643 643
644 644 result['result_buffers'] = msg['buffers']
645 645 try:
646 646 self.db.update_record(msg_id, result)
647 647 except Exception:
648 648 self.log.error("DB Error updating record %r"%msg_id, exc_info=True)
649 649
650 650
651 651 #--------------------- Task Queue Traffic ------------------------------
652 652
653 653 def save_task_request(self, idents, msg):
654 654 """Save the submission of a task."""
655 655 client_id = idents[0]
656 656
657 657 try:
658 658 msg = self.session.unpack_message(msg, content=False)
659 659 except:
660 660 self.log.error("task::client %r sent invalid task message: %s"%(
661 661 client_id, msg), exc_info=True)
662 662 return
663 663 record = init_record(msg)
664 664
665 665 record['client_uuid'] = client_id
666 666 record['queue'] = 'task'
667 667 header = msg['header']
668 668 msg_id = header['msg_id']
669 669 self.pending.add(msg_id)
670 670 self.unassigned.add(msg_id)
671 671 try:
672 672 # it's posible iopub arrived first:
673 673 existing = self.db.get_record(msg_id)
674 674 if existing['resubmitted']:
675 675 for key in ('submitted', 'client_uuid', 'buffers'):
676 676 # don't clobber these keys on resubmit
677 677 # submitted and client_uuid should be different
678 678 # and buffers might be big, and shouldn't have changed
679 679 record.pop(key)
680 680 # still check content,header which should not change
681 681 # but are not expensive to compare as buffers
682 682
683 683 for key,evalue in existing.iteritems():
684 684 if key.endswith('buffers'):
685 685 # don't compare buffers
686 686 continue
687 687 rvalue = record.get(key, None)
688 688 if evalue and rvalue and evalue != rvalue:
689 689 self.log.warn("conflicting initial state for record: %r:%r <%r> %r"%(msg_id, rvalue, key, evalue))
690 690 elif evalue and not rvalue:
691 691 record[key] = evalue
692 692 self.db.update_record(msg_id, record)
693 693 except KeyError:
694 694 self.db.add_record(msg_id, record)
695 695 except Exception:
696 696 self.log.error("DB Error saving task request %r"%msg_id, exc_info=True)
697 697
698 698 def save_task_result(self, idents, msg):
699 699 """save the result of a completed task."""
700 700 client_id = idents[0]
701 701 try:
702 702 msg = self.session.unpack_message(msg, content=False)
703 703 except:
704 704 self.log.error("task::invalid task result message send to %r: %s"%(
705 705 client_id, msg), exc_info=True)
706 706 raise
707 707 return
708 708
709 709 parent = msg['parent_header']
710 710 if not parent:
711 711 # print msg
712 712 self.log.warn("Task %r had no parent!"%msg)
713 713 return
714 714 msg_id = parent['msg_id']
715 715 if msg_id in self.unassigned:
716 716 self.unassigned.remove(msg_id)
717 717
718 718 header = msg['header']
719 719 engine_uuid = header.get('engine', None)
720 720 eid = self.by_ident.get(engine_uuid, None)
721 721
722 722 if msg_id in self.pending:
723 723 self.pending.remove(msg_id)
724 724 self.all_completed.add(msg_id)
725 725 if eid is not None:
726 726 self.completed[eid].append(msg_id)
727 727 if msg_id in self.tasks[eid]:
728 728 self.tasks[eid].remove(msg_id)
729 729 completed = datetime.strptime(header['date'], util.ISO8601)
730 730 started = header.get('started', None)
731 731 if started is not None:
732 732 started = datetime.strptime(started, util.ISO8601)
733 733 result = {
734 734 'result_header' : header,
735 735 'result_content': msg['content'],
736 736 'started' : started,
737 737 'completed' : completed,
738 738 'engine_uuid': engine_uuid
739 739 }
740 740
741 741 result['result_buffers'] = msg['buffers']
742 742 try:
743 743 self.db.update_record(msg_id, result)
744 744 except Exception:
745 745 self.log.error("DB Error saving task request %r"%msg_id, exc_info=True)
746 746
747 747 else:
748 748 self.log.debug("task::unknown task %s finished"%msg_id)
749 749
750 750 def save_task_destination(self, idents, msg):
751 751 try:
752 752 msg = self.session.unpack_message(msg, content=True)
753 753 except:
754 754 self.log.error("task::invalid task tracking message", exc_info=True)
755 755 return
756 756 content = msg['content']
757 757 # print (content)
758 758 msg_id = content['msg_id']
759 759 engine_uuid = content['engine_id']
760 760 eid = self.by_ident[engine_uuid]
761 761
762 762 self.log.info("task::task %s arrived on %s"%(msg_id, eid))
763 763 if msg_id in self.unassigned:
764 764 self.unassigned.remove(msg_id)
765 765 # else:
766 766 # self.log.debug("task::task %s not listed as MIA?!"%(msg_id))
767 767
768 768 self.tasks[eid].append(msg_id)
769 769 # self.pending[msg_id][1].update(received=datetime.now(),engine=(eid,engine_uuid))
770 770 try:
771 771 self.db.update_record(msg_id, dict(engine_uuid=engine_uuid))
772 772 except Exception:
773 773 self.log.error("DB Error saving task destination %r"%msg_id, exc_info=True)
774 774
775 775
776 776 def mia_task_request(self, idents, msg):
777 777 raise NotImplementedError
778 778 client_id = idents[0]
779 779 # content = dict(mia=self.mia,status='ok')
780 780 # self.session.send('mia_reply', content=content, idents=client_id)
781 781
782 782
783 783 #--------------------- IOPub Traffic ------------------------------
784 784
785 785 def save_iopub_message(self, topics, msg):
786 786 """save an iopub message into the db"""
787 787 # print (topics)
788 788 try:
789 789 msg = self.session.unpack_message(msg, content=True)
790 790 except:
791 791 self.log.error("iopub::invalid IOPub message", exc_info=True)
792 792 return
793 793
794 794 parent = msg['parent_header']
795 795 if not parent:
796 796 self.log.error("iopub::invalid IOPub message: %s"%msg)
797 797 return
798 798 msg_id = parent['msg_id']
799 799 msg_type = msg['msg_type']
800 800 content = msg['content']
801 801
802 802 # ensure msg_id is in db
803 803 try:
804 804 rec = self.db.get_record(msg_id)
805 805 except KeyError:
806 806 rec = empty_record()
807 807 rec['msg_id'] = msg_id
808 808 self.db.add_record(msg_id, rec)
809 809 # stream
810 810 d = {}
811 811 if msg_type == 'stream':
812 812 name = content['name']
813 813 s = rec[name] or ''
814 814 d[name] = s + content['data']
815 815
816 816 elif msg_type == 'pyerr':
817 817 d['pyerr'] = content
818 818 elif msg_type == 'pyin':
819 819 d['pyin'] = content['code']
820 820 else:
821 821 d[msg_type] = content.get('data', '')
822 822
823 823 try:
824 824 self.db.update_record(msg_id, d)
825 825 except Exception:
826 826 self.log.error("DB Error saving iopub message %r"%msg_id, exc_info=True)
827 827
828 828
829 829
830 830 #-------------------------------------------------------------------------
831 831 # Registration requests
832 832 #-------------------------------------------------------------------------
833 833
834 834 def connection_request(self, client_id, msg):
835 835 """Reply with connection addresses for clients."""
836 836 self.log.info("client::client %s connected"%client_id)
837 837 content = dict(status='ok')
838 838 content.update(self.client_info)
839 839 jsonable = {}
840 840 for k,v in self.keytable.iteritems():
841 841 if v not in self.dead_engines:
842 842 jsonable[str(k)] = v
843 843 content['engines'] = jsonable
844 844 self.session.send(self.query, 'connection_reply', content, parent=msg, ident=client_id)
845 845
846 846 def register_engine(self, reg, msg):
847 847 """Register a new engine."""
848 848 content = msg['content']
849 849 try:
850 850 queue = content['queue']
851 851 except KeyError:
852 852 self.log.error("registration::queue not specified", exc_info=True)
853 853 return
854 854 heart = content.get('heartbeat', None)
855 855 """register a new engine, and create the socket(s) necessary"""
856 856 eid = self._next_id
857 857 # print (eid, queue, reg, heart)
858 858
859 859 self.log.debug("registration::register_engine(%i, %r, %r, %r)"%(eid, queue, reg, heart))
860 860
861 861 content = dict(id=eid,status='ok')
862 862 content.update(self.engine_info)
863 863 # check if requesting available IDs:
864 864 if queue in self.by_ident:
865 865 try:
866 866 raise KeyError("queue_id %r in use"%queue)
867 867 except:
868 868 content = error.wrap_exception()
869 869 self.log.error("queue_id %r in use"%queue, exc_info=True)
870 870 elif heart in self.hearts: # need to check unique hearts?
871 871 try:
872 872 raise KeyError("heart_id %r in use"%heart)
873 873 except:
874 874 self.log.error("heart_id %r in use"%heart, exc_info=True)
875 875 content = error.wrap_exception()
876 876 else:
877 877 for h, pack in self.incoming_registrations.iteritems():
878 878 if heart == h:
879 879 try:
880 880 raise KeyError("heart_id %r in use"%heart)
881 881 except:
882 882 self.log.error("heart_id %r in use"%heart, exc_info=True)
883 883 content = error.wrap_exception()
884 884 break
885 885 elif queue == pack[1]:
886 886 try:
887 887 raise KeyError("queue_id %r in use"%queue)
888 888 except:
889 889 self.log.error("queue_id %r in use"%queue, exc_info=True)
890 890 content = error.wrap_exception()
891 891 break
892 892
893 893 msg = self.session.send(self.query, "registration_reply",
894 894 content=content,
895 895 ident=reg)
896 896
897 897 if content['status'] == 'ok':
898 898 if heart in self.heartmonitor.hearts:
899 899 # already beating
900 900 self.incoming_registrations[heart] = (eid,queue,reg[0],None)
901 901 self.finish_registration(heart)
902 902 else:
903 903 purge = lambda : self._purge_stalled_registration(heart)
904 904 dc = ioloop.DelayedCallback(purge, self.registration_timeout, self.loop)
905 905 dc.start()
906 906 self.incoming_registrations[heart] = (eid,queue,reg[0],dc)
907 907 else:
908 908 self.log.error("registration::registration %i failed: %s"%(eid, content['evalue']))
909 909 return eid
910 910
911 911 def unregister_engine(self, ident, msg):
912 912 """Unregister an engine that explicitly requested to leave."""
913 913 try:
914 914 eid = msg['content']['id']
915 915 except:
916 916 self.log.error("registration::bad engine id for unregistration: %s"%ident, exc_info=True)
917 917 return
918 918 self.log.info("registration::unregister_engine(%s)"%eid)
919 919 # print (eid)
920 920 uuid = self.keytable[eid]
921 921 content=dict(id=eid, queue=uuid)
922 922 self.dead_engines.add(uuid)
923 923 # self.ids.remove(eid)
924 924 # uuid = self.keytable.pop(eid)
925 925 #
926 926 # ec = self.engines.pop(eid)
927 927 # self.hearts.pop(ec.heartbeat)
928 928 # self.by_ident.pop(ec.queue)
929 929 # self.completed.pop(eid)
930 930 handleit = lambda : self._handle_stranded_msgs(eid, uuid)
931 931 dc = ioloop.DelayedCallback(handleit, self.registration_timeout, self.loop)
932 932 dc.start()
933 933 ############## TODO: HANDLE IT ################
934 934
935 935 if self.notifier:
936 936 self.session.send(self.notifier, "unregistration_notification", content=content)
937 937
938 938 def _handle_stranded_msgs(self, eid, uuid):
939 939 """Handle messages known to be on an engine when the engine unregisters.
940 940
941 941 It is possible that this will fire prematurely - that is, an engine will
942 942 go down after completing a result, and the client will be notified
943 943 that the result failed and later receive the actual result.
944 944 """
945 945
946 946 outstanding = self.queues[eid]
947 947
948 948 for msg_id in outstanding:
949 949 self.pending.remove(msg_id)
950 950 self.all_completed.add(msg_id)
951 951 try:
952 952 raise error.EngineError("Engine %r died while running task %r"%(eid, msg_id))
953 953 except:
954 954 content = error.wrap_exception()
955 955 # build a fake header:
956 956 header = {}
957 957 header['engine'] = uuid
958 958 header['date'] = datetime.now()
959 959 rec = dict(result_content=content, result_header=header, result_buffers=[])
960 960 rec['completed'] = header['date']
961 961 rec['engine_uuid'] = uuid
962 962 try:
963 963 self.db.update_record(msg_id, rec)
964 964 except Exception:
965 965 self.log.error("DB Error handling stranded msg %r"%msg_id, exc_info=True)
966 966
967 967
968 968 def finish_registration(self, heart):
969 969 """Second half of engine registration, called after our HeartMonitor
970 970 has received a beat from the Engine's Heart."""
971 971 try:
972 972 (eid,queue,reg,purge) = self.incoming_registrations.pop(heart)
973 973 except KeyError:
974 974 self.log.error("registration::tried to finish nonexistant registration", exc_info=True)
975 975 return
976 976 self.log.info("registration::finished registering engine %i:%r"%(eid,queue))
977 977 if purge is not None:
978 978 purge.stop()
979 979 control = queue
980 980 self.ids.add(eid)
981 981 self.keytable[eid] = queue
982 982 self.engines[eid] = EngineConnector(id=eid, queue=queue, registration=reg,
983 983 control=control, heartbeat=heart)
984 984 self.by_ident[queue] = eid
985 985 self.queues[eid] = list()
986 986 self.tasks[eid] = list()
987 987 self.completed[eid] = list()
988 988 self.hearts[heart] = eid
989 989 content = dict(id=eid, queue=self.engines[eid].queue)
990 990 if self.notifier:
991 991 self.session.send(self.notifier, "registration_notification", content=content)
992 992 self.log.info("engine::Engine Connected: %i"%eid)
993 993
994 994 def _purge_stalled_registration(self, heart):
995 995 if heart in self.incoming_registrations:
996 996 eid = self.incoming_registrations.pop(heart)[0]
997 997 self.log.info("registration::purging stalled registration: %i"%eid)
998 998 else:
999 999 pass
1000 1000
1001 1001 #-------------------------------------------------------------------------
1002 1002 # Client Requests
1003 1003 #-------------------------------------------------------------------------
1004 1004
1005 1005 def shutdown_request(self, client_id, msg):
1006 1006 """handle shutdown request."""
1007 1007 self.session.send(self.query, 'shutdown_reply', content={'status': 'ok'}, ident=client_id)
1008 1008 # also notify other clients of shutdown
1009 1009 self.session.send(self.notifier, 'shutdown_notice', content={'status': 'ok'})
1010 1010 dc = ioloop.DelayedCallback(lambda : self._shutdown(), 1000, self.loop)
1011 1011 dc.start()
1012 1012
1013 1013 def _shutdown(self):
1014 1014 self.log.info("hub::hub shutting down.")
1015 1015 time.sleep(0.1)
1016 1016 sys.exit(0)
1017 1017
1018 1018
1019 1019 def check_load(self, client_id, msg):
1020 1020 content = msg['content']
1021 1021 try:
1022 1022 targets = content['targets']
1023 1023 targets = self._validate_targets(targets)
1024 1024 except:
1025 1025 content = error.wrap_exception()
1026 1026 self.session.send(self.query, "hub_error",
1027 1027 content=content, ident=client_id)
1028 1028 return
1029 1029
1030 1030 content = dict(status='ok')
1031 1031 # loads = {}
1032 1032 for t in targets:
1033 1033 content[bytes(t)] = len(self.queues[t])+len(self.tasks[t])
1034 1034 self.session.send(self.query, "load_reply", content=content, ident=client_id)
1035 1035
1036 1036
1037 1037 def queue_status(self, client_id, msg):
1038 1038 """Return the Queue status of one or more targets.
1039 1039 if verbose: return the msg_ids
1040 1040 else: return len of each type.
1041 1041 keys: queue (pending MUX jobs)
1042 1042 tasks (pending Task jobs)
1043 1043 completed (finished jobs from both queues)"""
1044 1044 content = msg['content']
1045 1045 targets = content['targets']
1046 1046 try:
1047 1047 targets = self._validate_targets(targets)
1048 1048 except:
1049 1049 content = error.wrap_exception()
1050 1050 self.session.send(self.query, "hub_error",
1051 1051 content=content, ident=client_id)
1052 1052 return
1053 1053 verbose = content.get('verbose', False)
1054 1054 content = dict(status='ok')
1055 1055 for t in targets:
1056 1056 queue = self.queues[t]
1057 1057 completed = self.completed[t]
1058 1058 tasks = self.tasks[t]
1059 1059 if not verbose:
1060 1060 queue = len(queue)
1061 1061 completed = len(completed)
1062 1062 tasks = len(tasks)
1063 1063 content[bytes(t)] = {'queue': queue, 'completed': completed , 'tasks': tasks}
1064 1064 content['unassigned'] = list(self.unassigned) if verbose else len(self.unassigned)
1065 1065
1066 1066 self.session.send(self.query, "queue_reply", content=content, ident=client_id)
1067 1067
1068 1068 def purge_results(self, client_id, msg):
1069 1069 """Purge results from memory. This method is more valuable before we move
1070 1070 to a DB based message storage mechanism."""
1071 1071 content = msg['content']
1072 1072 msg_ids = content.get('msg_ids', [])
1073 1073 reply = dict(status='ok')
1074 1074 if msg_ids == 'all':
1075 1075 try:
1076 1076 self.db.drop_matching_records(dict(completed={'$ne':None}))
1077 1077 except Exception:
1078 1078 reply = error.wrap_exception()
1079 1079 else:
1080 1080 pending = filter(lambda m: m in self.pending, msg_ids)
1081 1081 if pending:
1082 1082 try:
1083 1083 raise IndexError("msg pending: %r"%pending[0])
1084 1084 except:
1085 1085 reply = error.wrap_exception()
1086 1086 else:
1087 1087 try:
1088 1088 self.db.drop_matching_records(dict(msg_id={'$in':msg_ids}))
1089 1089 except Exception:
1090 1090 reply = error.wrap_exception()
1091 1091
1092 1092 if reply['status'] == 'ok':
1093 1093 eids = content.get('engine_ids', [])
1094 1094 for eid in eids:
1095 1095 if eid not in self.engines:
1096 1096 try:
1097 1097 raise IndexError("No such engine: %i"%eid)
1098 1098 except:
1099 1099 reply = error.wrap_exception()
1100 1100 break
1101 1101 msg_ids = self.completed.pop(eid)
1102 1102 uid = self.engines[eid].queue
1103 1103 try:
1104 1104 self.db.drop_matching_records(dict(engine_uuid=uid, completed={'$ne':None}))
1105 1105 except Exception:
1106 1106 reply = error.wrap_exception()
1107 1107 break
1108 1108
1109 1109 self.session.send(self.query, 'purge_reply', content=reply, ident=client_id)
1110 1110
1111 1111 def resubmit_task(self, client_id, msg):
1112 1112 """Resubmit one or more tasks."""
1113 1113 def finish(reply):
1114 1114 self.session.send(self.query, 'resubmit_reply', content=reply, ident=client_id)
1115 1115
1116 1116 content = msg['content']
1117 1117 msg_ids = content['msg_ids']
1118 1118 reply = dict(status='ok')
1119 1119 try:
1120 1120 records = self.db.find_records({'msg_id' : {'$in' : msg_ids}}, keys=[
1121 1121 'header', 'content', 'buffers'])
1122 1122 except Exception:
1123 1123 self.log.error('db::db error finding tasks to resubmit', exc_info=True)
1124 1124 return finish(error.wrap_exception())
1125 1125
1126 1126 # validate msg_ids
1127 1127 found_ids = [ rec['msg_id'] for rec in records ]
1128 1128 invalid_ids = filter(lambda m: m in self.pending, found_ids)
1129 1129 if len(records) > len(msg_ids):
1130 1130 try:
1131 1131 raise RuntimeError("DB appears to be in an inconsistent state."
1132 1132 "More matching records were found than should exist")
1133 1133 except Exception:
1134 1134 return finish(error.wrap_exception())
1135 1135 elif len(records) < len(msg_ids):
1136 1136 missing = [ m for m in msg_ids if m not in found_ids ]
1137 1137 try:
1138 1138 raise KeyError("No such msg(s): %s"%missing)
1139 1139 except KeyError:
1140 1140 return finish(error.wrap_exception())
1141 1141 elif invalid_ids:
1142 1142 msg_id = invalid_ids[0]
1143 1143 try:
1144 1144 raise ValueError("Task %r appears to be inflight"%(msg_id))
1145 1145 except Exception:
1146 1146 return finish(error.wrap_exception())
1147 1147
1148 1148 # clear the existing records
1149 1149 rec = empty_record()
1150 1150 map(rec.pop, ['msg_id', 'header', 'content', 'buffers', 'submitted'])
1151 1151 rec['resubmitted'] = datetime.now()
1152 1152 rec['queue'] = 'task'
1153 1153 rec['client_uuid'] = client_id[0]
1154 1154 try:
1155 1155 for msg_id in msg_ids:
1156 1156 self.all_completed.discard(msg_id)
1157 1157 self.db.update_record(msg_id, rec)
1158 1158 except Exception:
1159 1159 self.log.error('db::db error upating record', exc_info=True)
1160 1160 reply = error.wrap_exception()
1161 1161 else:
1162 1162 # send the messages
1163 1163 for rec in records:
1164 1164 header = rec['header']
1165 1165 msg = self.session.msg(header['msg_type'])
1166 1166 msg['content'] = rec['content']
1167 1167 msg['header'] = header
1168 1168 msg['msg_id'] = rec['msg_id']
1169 1169 self.session.send(self.resubmit, msg, buffers=rec['buffers'])
1170 1170
1171 1171 finish(dict(status='ok'))
1172 1172
1173 1173
1174 1174 def _extract_record(self, rec):
1175 1175 """decompose a TaskRecord dict into subsection of reply for get_result"""
1176 1176 io_dict = {}
1177 1177 for key in 'pyin pyout pyerr stdout stderr'.split():
1178 1178 io_dict[key] = rec[key]
1179 1179 content = { 'result_content': rec['result_content'],
1180 1180 'header': rec['header'],
1181 1181 'result_header' : rec['result_header'],
1182 1182 'io' : io_dict,
1183 1183 }
1184 1184 if rec['result_buffers']:
1185 1185 buffers = map(str, rec['result_buffers'])
1186 1186 else:
1187 1187 buffers = []
1188 1188
1189 1189 return content, buffers
1190 1190
1191 1191 def get_results(self, client_id, msg):
1192 1192 """Get the result of 1 or more messages."""
1193 1193 content = msg['content']
1194 1194 msg_ids = sorted(set(content['msg_ids']))
1195 1195 statusonly = content.get('status_only', False)
1196 1196 pending = []
1197 1197 completed = []
1198 1198 content = dict(status='ok')
1199 1199 content['pending'] = pending
1200 1200 content['completed'] = completed
1201 1201 buffers = []
1202 1202 if not statusonly:
1203 1203 try:
1204 1204 matches = self.db.find_records(dict(msg_id={'$in':msg_ids}))
1205 1205 # turn match list into dict, for faster lookup
1206 1206 records = {}
1207 1207 for rec in matches:
1208 1208 records[rec['msg_id']] = rec
1209 1209 except Exception:
1210 1210 content = error.wrap_exception()
1211 1211 self.session.send(self.query, "result_reply", content=content,
1212 1212 parent=msg, ident=client_id)
1213 1213 return
1214 1214 else:
1215 1215 records = {}
1216 1216 for msg_id in msg_ids:
1217 1217 if msg_id in self.pending:
1218 1218 pending.append(msg_id)
1219 1219 elif msg_id in self.all_completed:
1220 1220 completed.append(msg_id)
1221 1221 if not statusonly:
1222 1222 c,bufs = self._extract_record(records[msg_id])
1223 1223 content[msg_id] = c
1224 1224 buffers.extend(bufs)
1225 1225 elif msg_id in records:
1226 1226 if rec['completed']:
1227 1227 completed.append(msg_id)
1228 1228 c,bufs = self._extract_record(records[msg_id])
1229 1229 content[msg_id] = c
1230 1230 buffers.extend(bufs)
1231 1231 else:
1232 1232 pending.append(msg_id)
1233 1233 else:
1234 1234 try:
1235 1235 raise KeyError('No such message: '+msg_id)
1236 1236 except:
1237 1237 content = error.wrap_exception()
1238 1238 break
1239 1239 self.session.send(self.query, "result_reply", content=content,
1240 1240 parent=msg, ident=client_id,
1241 1241 buffers=buffers)
1242 1242
1243 1243 def get_history(self, client_id, msg):
1244 1244 """Get a list of all msg_ids in our DB records"""
1245 1245 try:
1246 1246 msg_ids = self.db.get_history()
1247 1247 except Exception as e:
1248 1248 content = error.wrap_exception()
1249 1249 else:
1250 1250 content = dict(status='ok', history=msg_ids)
1251 1251
1252 1252 self.session.send(self.query, "history_reply", content=content,
1253 1253 parent=msg, ident=client_id)
1254 1254
1255 1255 def db_query(self, client_id, msg):
1256 1256 """Perform a raw query on the task record database."""
1257 1257 content = msg['content']
1258 1258 query = content.get('query', {})
1259 1259 keys = content.get('keys', None)
1260 1260 query = util.extract_dates(query)
1261 1261 buffers = []
1262 1262 empty = list()
1263 1263
1264 1264 try:
1265 1265 records = self.db.find_records(query, keys)
1266 1266 except Exception as e:
1267 1267 content = error.wrap_exception()
1268 1268 else:
1269 1269 # extract buffers from reply content:
1270 1270 if keys is not None:
1271 1271 buffer_lens = [] if 'buffers' in keys else None
1272 1272 result_buffer_lens = [] if 'result_buffers' in keys else None
1273 1273 else:
1274 1274 buffer_lens = []
1275 1275 result_buffer_lens = []
1276 1276
1277 1277 for rec in records:
1278 1278 # buffers may be None, so double check
1279 1279 if buffer_lens is not None:
1280 1280 b = rec.pop('buffers', empty) or empty
1281 1281 buffer_lens.append(len(b))
1282 1282 buffers.extend(b)
1283 1283 if result_buffer_lens is not None:
1284 1284 rb = rec.pop('result_buffers', empty) or empty
1285 1285 result_buffer_lens.append(len(rb))
1286 1286 buffers.extend(rb)
1287 1287 content = dict(status='ok', records=records, buffer_lens=buffer_lens,
1288 1288 result_buffer_lens=result_buffer_lens)
1289 1289
1290 1290 self.session.send(self.query, "db_reply", content=content,
1291 1291 parent=msg, ident=client_id,
1292 1292 buffers=buffers)
1293 1293
@@ -1,112 +1,112 b''
1 1 """A TaskRecord backend using mongodb"""
2 2 #-----------------------------------------------------------------------------
3 3 # Copyright (C) 2010 The IPython Development Team
4 4 #
5 5 # Distributed under the terms of the BSD License. The full license is in
6 6 # the file COPYING, distributed as part of this software.
7 7 #-----------------------------------------------------------------------------
8 8
9 9 from pymongo import Connection
10 10 from pymongo.binary import Binary
11 11
12 from IPython.utils.traitlets import Dict, List, CUnicode, CStr, Instance
12 from IPython.utils.traitlets import Dict, List, Unicode, Instance
13 13
14 14 from .dictdb import BaseDB
15 15
16 16 #-----------------------------------------------------------------------------
17 17 # MongoDB class
18 18 #-----------------------------------------------------------------------------
19 19
20 20 class MongoDB(BaseDB):
21 21 """MongoDB TaskRecord backend."""
22 22
23 23 connection_args = List(config=True,
24 24 help="""Positional arguments to be passed to pymongo.Connection. Only
25 25 necessary if the default mongodb configuration does not point to your
26 26 mongod instance.""")
27 27 connection_kwargs = Dict(config=True,
28 28 help="""Keyword arguments to be passed to pymongo.Connection. Only
29 29 necessary if the default mongodb configuration does not point to your
30 30 mongod instance."""
31 31 )
32 database = CUnicode(config=True,
32 database = Unicode(config=True,
33 33 help="""The MongoDB database name to use for storing tasks for this session. If unspecified,
34 34 a new database will be created with the Hub's IDENT. Specifying the database will result
35 35 in tasks from previous sessions being available via Clients' db_query and
36 36 get_result methods.""")
37 37
38 38 _connection = Instance(Connection) # pymongo connection
39 39
40 40 def __init__(self, **kwargs):
41 41 super(MongoDB, self).__init__(**kwargs)
42 42 if self._connection is None:
43 43 self._connection = Connection(*self.connection_args, **self.connection_kwargs)
44 44 if not self.database:
45 45 self.database = self.session
46 46 self._db = self._connection[self.database]
47 47 self._records = self._db['task_records']
48 48 self._records.ensure_index('msg_id', unique=True)
49 49 self._records.ensure_index('submitted') # for sorting history
50 50 # for rec in self._records.find
51 51
52 52 def _binary_buffers(self, rec):
53 53 for key in ('buffers', 'result_buffers'):
54 54 if rec.get(key, None):
55 55 rec[key] = map(Binary, rec[key])
56 56 return rec
57 57
58 58 def add_record(self, msg_id, rec):
59 59 """Add a new Task Record, by msg_id."""
60 60 # print rec
61 61 rec = self._binary_buffers(rec)
62 62 self._records.insert(rec)
63 63
64 64 def get_record(self, msg_id):
65 65 """Get a specific Task Record, by msg_id."""
66 66 r = self._records.find_one({'msg_id': msg_id})
67 67 if not r:
68 68 # r will be '' if nothing is found
69 69 raise KeyError(msg_id)
70 70 return r
71 71
72 72 def update_record(self, msg_id, rec):
73 73 """Update the data in an existing record."""
74 74 rec = self._binary_buffers(rec)
75 75
76 76 self._records.update({'msg_id':msg_id}, {'$set': rec})
77 77
78 78 def drop_matching_records(self, check):
79 79 """Remove a record from the DB."""
80 80 self._records.remove(check)
81 81
82 82 def drop_record(self, msg_id):
83 83 """Remove a record from the DB."""
84 84 self._records.remove({'msg_id':msg_id})
85 85
86 86 def find_records(self, check, keys=None):
87 87 """Find records matching a query dict, optionally extracting subset of keys.
88 88
89 89 Returns list of matching records.
90 90
91 91 Parameters
92 92 ----------
93 93
94 94 check: dict
95 95 mongodb-style query argument
96 96 keys: list of strs [optional]
97 97 if specified, the subset of keys to extract. msg_id will *always* be
98 98 included.
99 99 """
100 100 if keys and 'msg_id' not in keys:
101 101 keys.append('msg_id')
102 102 matches = list(self._records.find(check,keys))
103 103 for rec in matches:
104 104 rec.pop('_id')
105 105 return matches
106 106
107 107 def get_history(self):
108 108 """get all msg_ids, ordered by time submitted."""
109 109 cursor = self._records.find({},{'msg_id':1}).sort('submitted')
110 110 return [ rec['msg_id'] for rec in cursor ]
111 111
112 112
@@ -1,333 +1,333 b''
1 1 """A TaskRecord backend using sqlite3"""
2 2 #-----------------------------------------------------------------------------
3 3 # Copyright (C) 2011 The IPython Development Team
4 4 #
5 5 # Distributed under the terms of the BSD License. The full license is in
6 6 # the file COPYING, distributed as part of this software.
7 7 #-----------------------------------------------------------------------------
8 8
9 9 import json
10 10 import os
11 11 import cPickle as pickle
12 12 from datetime import datetime
13 13
14 14 import sqlite3
15 15
16 16 from zmq.eventloop import ioloop
17 17
18 from IPython.utils.traitlets import CUnicode, CStr, Instance, List
18 from IPython.utils.traitlets import Unicode, Instance, List
19 19 from .dictdb import BaseDB
20 20 from IPython.parallel.util import ISO8601
21 21
22 22 #-----------------------------------------------------------------------------
23 23 # SQLite operators, adapters, and converters
24 24 #-----------------------------------------------------------------------------
25 25
26 26 operators = {
27 27 '$lt' : "<",
28 28 '$gt' : ">",
29 29 # null is handled weird with ==,!=
30 30 '$eq' : "=",
31 31 '$ne' : "!=",
32 32 '$lte': "<=",
33 33 '$gte': ">=",
34 34 '$in' : ('=', ' OR '),
35 35 '$nin': ('!=', ' AND '),
36 36 # '$all': None,
37 37 # '$mod': None,
38 38 # '$exists' : None
39 39 }
40 40 null_operators = {
41 41 '=' : "IS NULL",
42 42 '!=' : "IS NOT NULL",
43 43 }
44 44
45 45 def _adapt_datetime(dt):
46 46 return dt.strftime(ISO8601)
47 47
48 48 def _convert_datetime(ds):
49 49 if ds is None:
50 50 return ds
51 51 else:
52 52 return datetime.strptime(ds, ISO8601)
53 53
54 54 def _adapt_dict(d):
55 55 return json.dumps(d)
56 56
57 57 def _convert_dict(ds):
58 58 if ds is None:
59 59 return ds
60 60 else:
61 61 return json.loads(ds)
62 62
63 63 def _adapt_bufs(bufs):
64 64 # this is *horrible*
65 65 # copy buffers into single list and pickle it:
66 66 if bufs and isinstance(bufs[0], (bytes, buffer)):
67 67 return sqlite3.Binary(pickle.dumps(map(bytes, bufs),-1))
68 68 elif bufs:
69 69 return bufs
70 70 else:
71 71 return None
72 72
73 73 def _convert_bufs(bs):
74 74 if bs is None:
75 75 return []
76 76 else:
77 77 return pickle.loads(bytes(bs))
78 78
79 79 #-----------------------------------------------------------------------------
80 80 # SQLiteDB class
81 81 #-----------------------------------------------------------------------------
82 82
83 83 class SQLiteDB(BaseDB):
84 84 """SQLite3 TaskRecord backend."""
85 85
86 filename = CUnicode('tasks.db', config=True,
86 filename = Unicode('tasks.db', config=True,
87 87 help="""The filename of the sqlite task database. [default: 'tasks.db']""")
88 location = CUnicode('', config=True,
88 location = Unicode('', config=True,
89 89 help="""The directory containing the sqlite task database. The default
90 90 is to use the cluster_dir location.""")
91 table = CUnicode("", config=True,
91 table = Unicode("", config=True,
92 92 help="""The SQLite Table to use for storing tasks for this session. If unspecified,
93 93 a new table will be created with the Hub's IDENT. Specifying the table will result
94 94 in tasks from previous sessions being available via Clients' db_query and
95 95 get_result methods.""")
96 96
97 97 _db = Instance('sqlite3.Connection')
98 98 _keys = List(['msg_id' ,
99 99 'header' ,
100 100 'content',
101 101 'buffers',
102 102 'submitted',
103 103 'client_uuid' ,
104 104 'engine_uuid' ,
105 105 'started',
106 106 'completed',
107 107 'resubmitted',
108 108 'result_header' ,
109 109 'result_content' ,
110 110 'result_buffers' ,
111 111 'queue' ,
112 112 'pyin' ,
113 113 'pyout',
114 114 'pyerr',
115 115 'stdout',
116 116 'stderr',
117 117 ])
118 118
119 119 def __init__(self, **kwargs):
120 120 super(SQLiteDB, self).__init__(**kwargs)
121 121 if not self.table:
122 122 # use session, and prefix _, since starting with # is illegal
123 123 self.table = '_'+self.session.replace('-','_')
124 124 if not self.location:
125 125 if hasattr(self.config.Global, 'cluster_dir'):
126 126 self.location = self.config.Global.cluster_dir
127 127 else:
128 128 self.location = '.'
129 129 self._init_db()
130 130
131 131 # register db commit as 2s periodic callback
132 132 # to prevent clogging pipes
133 133 # assumes we are being run in a zmq ioloop app
134 134 loop = ioloop.IOLoop.instance()
135 135 pc = ioloop.PeriodicCallback(self._db.commit, 2000, loop)
136 136 pc.start()
137 137
138 138 def _defaults(self, keys=None):
139 139 """create an empty record"""
140 140 d = {}
141 141 keys = self._keys if keys is None else keys
142 142 for key in keys:
143 143 d[key] = None
144 144 return d
145 145
146 146 def _init_db(self):
147 147 """Connect to the database and get new session number."""
148 148 # register adapters
149 149 sqlite3.register_adapter(datetime, _adapt_datetime)
150 150 sqlite3.register_converter('datetime', _convert_datetime)
151 151 sqlite3.register_adapter(dict, _adapt_dict)
152 152 sqlite3.register_converter('dict', _convert_dict)
153 153 sqlite3.register_adapter(list, _adapt_bufs)
154 154 sqlite3.register_converter('bufs', _convert_bufs)
155 155 # connect to the db
156 156 dbfile = os.path.join(self.location, self.filename)
157 157 self._db = sqlite3.connect(dbfile, detect_types=sqlite3.PARSE_DECLTYPES,
158 158 # isolation_level = None)#,
159 159 cached_statements=64)
160 160 # print dir(self._db)
161 161
162 162 self._db.execute("""CREATE TABLE IF NOT EXISTS %s
163 163 (msg_id text PRIMARY KEY,
164 164 header dict text,
165 165 content dict text,
166 166 buffers bufs blob,
167 167 submitted datetime text,
168 168 client_uuid text,
169 169 engine_uuid text,
170 170 started datetime text,
171 171 completed datetime text,
172 172 resubmitted datetime text,
173 173 result_header dict text,
174 174 result_content dict text,
175 175 result_buffers bufs blob,
176 176 queue text,
177 177 pyin text,
178 178 pyout text,
179 179 pyerr text,
180 180 stdout text,
181 181 stderr text)
182 182 """%self.table)
183 183 self._db.commit()
184 184
185 185 def _dict_to_list(self, d):
186 186 """turn a mongodb-style record dict into a list."""
187 187
188 188 return [ d[key] for key in self._keys ]
189 189
190 190 def _list_to_dict(self, line, keys=None):
191 191 """Inverse of dict_to_list"""
192 192 keys = self._keys if keys is None else keys
193 193 d = self._defaults(keys)
194 194 for key,value in zip(keys, line):
195 195 d[key] = value
196 196
197 197 return d
198 198
199 199 def _render_expression(self, check):
200 200 """Turn a mongodb-style search dict into an SQL query."""
201 201 expressions = []
202 202 args = []
203 203
204 204 skeys = set(check.keys())
205 205 skeys.difference_update(set(self._keys))
206 206 skeys.difference_update(set(['buffers', 'result_buffers']))
207 207 if skeys:
208 208 raise KeyError("Illegal testing key(s): %s"%skeys)
209 209
210 210 for name,sub_check in check.iteritems():
211 211 if isinstance(sub_check, dict):
212 212 for test,value in sub_check.iteritems():
213 213 try:
214 214 op = operators[test]
215 215 except KeyError:
216 216 raise KeyError("Unsupported operator: %r"%test)
217 217 if isinstance(op, tuple):
218 218 op, join = op
219 219
220 220 if value is None and op in null_operators:
221 221 expr = "%s %s"%null_operators[op]
222 222 else:
223 223 expr = "%s %s ?"%(name, op)
224 224 if isinstance(value, (tuple,list)):
225 225 if op in null_operators and any([v is None for v in value]):
226 226 # equality tests don't work with NULL
227 227 raise ValueError("Cannot use %r test with NULL values on SQLite backend"%test)
228 228 expr = '( %s )'%( join.join([expr]*len(value)) )
229 229 args.extend(value)
230 230 else:
231 231 args.append(value)
232 232 expressions.append(expr)
233 233 else:
234 234 # it's an equality check
235 235 if sub_check is None:
236 236 expressions.append("%s IS NULL")
237 237 else:
238 238 expressions.append("%s = ?"%name)
239 239 args.append(sub_check)
240 240
241 241 expr = " AND ".join(expressions)
242 242 return expr, args
243 243
244 244 def add_record(self, msg_id, rec):
245 245 """Add a new Task Record, by msg_id."""
246 246 d = self._defaults()
247 247 d.update(rec)
248 248 d['msg_id'] = msg_id
249 249 line = self._dict_to_list(d)
250 250 tups = '(%s)'%(','.join(['?']*len(line)))
251 251 self._db.execute("INSERT INTO %s VALUES %s"%(self.table, tups), line)
252 252 # self._db.commit()
253 253
254 254 def get_record(self, msg_id):
255 255 """Get a specific Task Record, by msg_id."""
256 256 cursor = self._db.execute("""SELECT * FROM %s WHERE msg_id==?"""%self.table, (msg_id,))
257 257 line = cursor.fetchone()
258 258 if line is None:
259 259 raise KeyError("No such msg: %r"%msg_id)
260 260 return self._list_to_dict(line)
261 261
262 262 def update_record(self, msg_id, rec):
263 263 """Update the data in an existing record."""
264 264 query = "UPDATE %s SET "%self.table
265 265 sets = []
266 266 keys = sorted(rec.keys())
267 267 values = []
268 268 for key in keys:
269 269 sets.append('%s = ?'%key)
270 270 values.append(rec[key])
271 271 query += ', '.join(sets)
272 272 query += ' WHERE msg_id == ?'
273 273 values.append(msg_id)
274 274 self._db.execute(query, values)
275 275 # self._db.commit()
276 276
277 277 def drop_record(self, msg_id):
278 278 """Remove a record from the DB."""
279 279 self._db.execute("""DELETE FROM %s WHERE msg_id==?"""%self.table, (msg_id,))
280 280 # self._db.commit()
281 281
282 282 def drop_matching_records(self, check):
283 283 """Remove a record from the DB."""
284 284 expr,args = self._render_expression(check)
285 285 query = "DELETE FROM %s WHERE %s"%(self.table, expr)
286 286 self._db.execute(query,args)
287 287 # self._db.commit()
288 288
289 289 def find_records(self, check, keys=None):
290 290 """Find records matching a query dict, optionally extracting subset of keys.
291 291
292 292 Returns list of matching records.
293 293
294 294 Parameters
295 295 ----------
296 296
297 297 check: dict
298 298 mongodb-style query argument
299 299 keys: list of strs [optional]
300 300 if specified, the subset of keys to extract. msg_id will *always* be
301 301 included.
302 302 """
303 303 if keys:
304 304 bad_keys = [ key for key in keys if key not in self._keys ]
305 305 if bad_keys:
306 306 raise KeyError("Bad record key(s): %s"%bad_keys)
307 307
308 308 if keys:
309 309 # ensure msg_id is present and first:
310 310 if 'msg_id' in keys:
311 311 keys.remove('msg_id')
312 312 keys.insert(0, 'msg_id')
313 313 req = ', '.join(keys)
314 314 else:
315 315 req = '*'
316 316 expr,args = self._render_expression(check)
317 317 query = """SELECT %s FROM %s WHERE %s"""%(req, self.table, expr)
318 318 cursor = self._db.execute(query, args)
319 319 matches = cursor.fetchall()
320 320 records = []
321 321 for line in matches:
322 322 rec = self._list_to_dict(line, keys)
323 323 records.append(rec)
324 324 return records
325 325
326 326 def get_history(self):
327 327 """get all msg_ids, ordered by time submitted."""
328 328 query = """SELECT msg_id FROM %s ORDER by submitted ASC"""%self.table
329 329 cursor = self._db.execute(query)
330 330 # will be a list of length 1 tuples
331 331 return [ tup[0] for tup in cursor.fetchall()]
332 332
333 333 __all__ = ['SQLiteDB'] No newline at end of file
@@ -1,166 +1,166 b''
1 1 #!/usr/bin/env python
2 2 """A simple engine that talks to a controller over 0MQ.
3 3 it handles registration, etc. and launches a kernel
4 4 connected to the Controller's Schedulers.
5 5 """
6 6 #-----------------------------------------------------------------------------
7 7 # Copyright (C) 2010-2011 The IPython Development Team
8 8 #
9 9 # Distributed under the terms of the BSD License. The full license is in
10 10 # the file COPYING, distributed as part of this software.
11 11 #-----------------------------------------------------------------------------
12 12
13 13 from __future__ import print_function
14 14
15 15 import sys
16 16 import time
17 17
18 18 import zmq
19 19 from zmq.eventloop import ioloop, zmqstream
20 20
21 21 # internal
22 from IPython.utils.traitlets import Instance, Str, Dict, Int, Type, CFloat
22 from IPython.utils.traitlets import Instance, Dict, Int, Type, CFloat, Unicode
23 23 # from IPython.utils.localinterfaces import LOCALHOST
24 24
25 25 from IPython.parallel.controller.heartmonitor import Heart
26 26 from IPython.parallel.factory import RegistrationFactory
27 27 from IPython.parallel.streamsession import Message
28 28 from IPython.parallel.util import disambiguate_url
29 29
30 30 from .streamkernel import Kernel
31 31
32 32 class EngineFactory(RegistrationFactory):
33 33 """IPython engine"""
34 34
35 35 # configurables:
36 36 out_stream_factory=Type('IPython.zmq.iostream.OutStream', config=True,
37 37 help="""The OutStream for handling stdout/err.
38 38 Typically 'IPython.zmq.iostream.OutStream'""")
39 39 display_hook_factory=Type('IPython.zmq.displayhook.DisplayHook', config=True,
40 40 help="""The class for handling displayhook.
41 41 Typically 'IPython.zmq.displayhook.DisplayHook'""")
42 location=Str(config=True,
42 location=Unicode(config=True,
43 43 help="""The location (an IP address) of the controller. This is
44 44 used for disambiguating URLs, to determine whether
45 45 loopback should be used to connect or the public address.""")
46 46 timeout=CFloat(2,config=True,
47 47 help="""The time (in seconds) to wait for the Controller to respond
48 48 to registration requests before giving up.""")
49 49
50 50 # not configurable:
51 51 user_ns=Dict()
52 52 id=Int(allow_none=True)
53 53 registrar=Instance('zmq.eventloop.zmqstream.ZMQStream')
54 54 kernel=Instance(Kernel)
55 55
56 56
57 57 def __init__(self, **kwargs):
58 58 super(EngineFactory, self).__init__(**kwargs)
59 59 self.ident = self.session.session
60 60 ctx = self.context
61 61
62 62 reg = ctx.socket(zmq.XREQ)
63 63 reg.setsockopt(zmq.IDENTITY, self.ident)
64 64 reg.connect(self.url)
65 65 self.registrar = zmqstream.ZMQStream(reg, self.loop)
66 66
67 67 def register(self):
68 68 """send the registration_request"""
69 69
70 70 self.log.info("registering")
71 71 content = dict(queue=self.ident, heartbeat=self.ident, control=self.ident)
72 72 self.registrar.on_recv(self.complete_registration)
73 73 # print (self.session.key)
74 74 self.session.send(self.registrar, "registration_request",content=content)
75 75
76 76 def complete_registration(self, msg):
77 77 # print msg
78 78 self._abort_dc.stop()
79 79 ctx = self.context
80 80 loop = self.loop
81 81 identity = self.ident
82 82
83 83 idents,msg = self.session.feed_identities(msg)
84 84 msg = Message(self.session.unpack_message(msg))
85 85
86 86 if msg.content.status == 'ok':
87 87 self.id = int(msg.content.id)
88 88
89 89 # create Shell Streams (MUX, Task, etc.):
90 90 queue_addr = msg.content.mux
91 91 shell_addrs = [ str(queue_addr) ]
92 92 task_addr = msg.content.task
93 93 if task_addr:
94 94 shell_addrs.append(str(task_addr))
95 95
96 96 # Uncomment this to go back to two-socket model
97 97 # shell_streams = []
98 98 # for addr in shell_addrs:
99 99 # stream = zmqstream.ZMQStream(ctx.socket(zmq.XREP), loop)
100 100 # stream.setsockopt(zmq.IDENTITY, identity)
101 101 # stream.connect(disambiguate_url(addr, self.location))
102 102 # shell_streams.append(stream)
103 103
104 104 # Now use only one shell stream for mux and tasks
105 105 stream = zmqstream.ZMQStream(ctx.socket(zmq.XREP), loop)
106 106 stream.setsockopt(zmq.IDENTITY, identity)
107 107 shell_streams = [stream]
108 108 for addr in shell_addrs:
109 109 stream.connect(disambiguate_url(addr, self.location))
110 110 # end single stream-socket
111 111
112 112 # control stream:
113 113 control_addr = str(msg.content.control)
114 114 control_stream = zmqstream.ZMQStream(ctx.socket(zmq.XREP), loop)
115 115 control_stream.setsockopt(zmq.IDENTITY, identity)
116 116 control_stream.connect(disambiguate_url(control_addr, self.location))
117 117
118 118 # create iopub stream:
119 119 iopub_addr = msg.content.iopub
120 120 iopub_stream = zmqstream.ZMQStream(ctx.socket(zmq.PUB), loop)
121 121 iopub_stream.setsockopt(zmq.IDENTITY, identity)
122 122 iopub_stream.connect(disambiguate_url(iopub_addr, self.location))
123 123
124 124 # launch heartbeat
125 125 hb_addrs = msg.content.heartbeat
126 126 # print (hb_addrs)
127 127
128 128 # # Redirect input streams and set a display hook.
129 129 if self.out_stream_factory:
130 130 sys.stdout = self.out_stream_factory(self.session, iopub_stream, u'stdout')
131 131 sys.stdout.topic = 'engine.%i.stdout'%self.id
132 132 sys.stderr = self.out_stream_factory(self.session, iopub_stream, u'stderr')
133 133 sys.stderr.topic = 'engine.%i.stderr'%self.id
134 134 if self.display_hook_factory:
135 135 sys.displayhook = self.display_hook_factory(self.session, iopub_stream)
136 136 sys.displayhook.topic = 'engine.%i.pyout'%self.id
137 137
138 138 self.kernel = Kernel(config=self.config, int_id=self.id, ident=self.ident, session=self.session,
139 139 control_stream=control_stream, shell_streams=shell_streams, iopub_stream=iopub_stream,
140 140 loop=loop, user_ns = self.user_ns, log=self.log)
141 141 self.kernel.start()
142 142 hb_addrs = [ disambiguate_url(addr, self.location) for addr in hb_addrs ]
143 143 heart = Heart(*map(str, hb_addrs), heart_id=identity)
144 144 # ioloop.DelayedCallback(heart.start, 1000, self.loop).start()
145 145 heart.start()
146 146
147 147
148 148 else:
149 149 self.log.fatal("Registration Failed: %s"%msg)
150 150 raise Exception("Registration Failed: %s"%msg)
151 151
152 152 self.log.info("Completed registration with id %i"%self.id)
153 153
154 154
155 155 def abort(self):
156 156 self.log.fatal("Registration timed out after %.1f seconds"%self.timeout)
157 157 self.session.send(self.registrar, "unregistration_request", content=dict(id=self.id))
158 158 time.sleep(1)
159 159 sys.exit(255)
160 160
161 161 def start(self):
162 162 dc = ioloop.DelayedCallback(self.register, 0, self.loop)
163 163 dc.start()
164 164 self._abort_dc = ioloop.DelayedCallback(self.abort, self.timeout*1000, self.loop)
165 165 self._abort_dc.start()
166 166
@@ -1,433 +1,433 b''
1 1 #!/usr/bin/env python
2 2 """
3 3 Kernel adapted from kernel.py to use ZMQ Streams
4 4 """
5 5 #-----------------------------------------------------------------------------
6 6 # Copyright (C) 2010-2011 The IPython Development Team
7 7 #
8 8 # Distributed under the terms of the BSD License. The full license is in
9 9 # the file COPYING, distributed as part of this software.
10 10 #-----------------------------------------------------------------------------
11 11
12 12 #-----------------------------------------------------------------------------
13 13 # Imports
14 14 #-----------------------------------------------------------------------------
15 15
16 16 # Standard library imports.
17 17 from __future__ import print_function
18 18
19 19 import sys
20 20 import time
21 21
22 22 from code import CommandCompiler
23 23 from datetime import datetime
24 24 from pprint import pprint
25 25
26 26 # System library imports.
27 27 import zmq
28 28 from zmq.eventloop import ioloop, zmqstream
29 29
30 30 # Local imports.
31 from IPython.utils.traitlets import Instance, List, Int, Dict, Set, Str, CStr
31 from IPython.utils.traitlets import Instance, List, Int, Dict, Set, Unicode
32 32 from IPython.zmq.completer import KernelCompleter
33 33
34 34 from IPython.parallel.error import wrap_exception
35 35 from IPython.parallel.factory import SessionFactory
36 36 from IPython.parallel.util import serialize_object, unpack_apply_message, ISO8601
37 37
38 38 def printer(*args):
39 39 pprint(args, stream=sys.__stdout__)
40 40
41 41
42 42 class _Passer(zmqstream.ZMQStream):
43 43 """Empty class that implements `send()` that does nothing.
44 44
45 45 Subclass ZMQStream for StreamSession typechecking
46 46
47 47 """
48 48 def __init__(self, *args, **kwargs):
49 49 pass
50 50
51 51 def send(self, *args, **kwargs):
52 52 pass
53 53 send_multipart = send
54 54
55 55
56 56 #-----------------------------------------------------------------------------
57 57 # Main kernel class
58 58 #-----------------------------------------------------------------------------
59 59
60 60 class Kernel(SessionFactory):
61 61
62 62 #---------------------------------------------------------------------------
63 63 # Kernel interface
64 64 #---------------------------------------------------------------------------
65 65
66 66 # kwargs:
67 exec_lines = List(CStr, config=True,
67 exec_lines = List(Unicode, config=True,
68 68 help="List of lines to execute")
69 69
70 70 int_id = Int(-1)
71 71 user_ns = Dict(config=True, help="""Set the user's namespace of the Kernel""")
72 72
73 73 control_stream = Instance(zmqstream.ZMQStream)
74 74 task_stream = Instance(zmqstream.ZMQStream)
75 75 iopub_stream = Instance(zmqstream.ZMQStream)
76 76 client = Instance('IPython.parallel.Client')
77 77
78 78 # internals
79 79 shell_streams = List()
80 80 compiler = Instance(CommandCompiler, (), {})
81 81 completer = Instance(KernelCompleter)
82 82
83 83 aborted = Set()
84 84 shell_handlers = Dict()
85 85 control_handlers = Dict()
86 86
87 87 def _set_prefix(self):
88 88 self.prefix = "engine.%s"%self.int_id
89 89
90 90 def _connect_completer(self):
91 91 self.completer = KernelCompleter(self.user_ns)
92 92
93 93 def __init__(self, **kwargs):
94 94 super(Kernel, self).__init__(**kwargs)
95 95 self._set_prefix()
96 96 self._connect_completer()
97 97
98 98 self.on_trait_change(self._set_prefix, 'id')
99 99 self.on_trait_change(self._connect_completer, 'user_ns')
100 100
101 101 # Build dict of handlers for message types
102 102 for msg_type in ['execute_request', 'complete_request', 'apply_request',
103 103 'clear_request']:
104 104 self.shell_handlers[msg_type] = getattr(self, msg_type)
105 105
106 106 for msg_type in ['shutdown_request', 'abort_request']+self.shell_handlers.keys():
107 107 self.control_handlers[msg_type] = getattr(self, msg_type)
108 108
109 109 self._initial_exec_lines()
110 110
111 111 def _wrap_exception(self, method=None):
112 112 e_info = dict(engine_uuid=self.ident, engine_id=self.int_id, method=method)
113 113 content=wrap_exception(e_info)
114 114 return content
115 115
116 116 def _initial_exec_lines(self):
117 117 s = _Passer()
118 118 content = dict(silent=True, user_variable=[],user_expressions=[])
119 119 for line in self.exec_lines:
120 120 self.log.debug("executing initialization: %s"%line)
121 121 content.update({'code':line})
122 122 msg = self.session.msg('execute_request', content)
123 123 self.execute_request(s, [], msg)
124 124
125 125
126 126 #-------------------- control handlers -----------------------------
127 127 def abort_queues(self):
128 128 for stream in self.shell_streams:
129 129 if stream:
130 130 self.abort_queue(stream)
131 131
132 132 def abort_queue(self, stream):
133 133 while True:
134 134 try:
135 135 msg = self.session.recv(stream, zmq.NOBLOCK,content=True)
136 136 except zmq.ZMQError as e:
137 137 if e.errno == zmq.EAGAIN:
138 138 break
139 139 else:
140 140 return
141 141 else:
142 142 if msg is None:
143 143 return
144 144 else:
145 145 idents,msg = msg
146 146
147 147 # assert self.reply_socketly_socket.rcvmore(), "Unexpected missing message part."
148 148 # msg = self.reply_socket.recv_json()
149 149 self.log.info("Aborting:")
150 150 self.log.info(str(msg))
151 151 msg_type = msg['msg_type']
152 152 reply_type = msg_type.split('_')[0] + '_reply'
153 153 # reply_msg = self.session.msg(reply_type, {'status' : 'aborted'}, msg)
154 154 # self.reply_socket.send(ident,zmq.SNDMORE)
155 155 # self.reply_socket.send_json(reply_msg)
156 156 reply_msg = self.session.send(stream, reply_type,
157 157 content={'status' : 'aborted'}, parent=msg, ident=idents)[0]
158 158 self.log.debug(str(reply_msg))
159 159 # We need to wait a bit for requests to come in. This can probably
160 160 # be set shorter for true asynchronous clients.
161 161 time.sleep(0.05)
162 162
163 163 def abort_request(self, stream, ident, parent):
164 164 """abort a specifig msg by id"""
165 165 msg_ids = parent['content'].get('msg_ids', None)
166 166 if isinstance(msg_ids, basestring):
167 167 msg_ids = [msg_ids]
168 168 if not msg_ids:
169 169 self.abort_queues()
170 170 for mid in msg_ids:
171 171 self.aborted.add(str(mid))
172 172
173 173 content = dict(status='ok')
174 174 reply_msg = self.session.send(stream, 'abort_reply', content=content,
175 175 parent=parent, ident=ident)
176 176 self.log.debug(str(reply_msg))
177 177
178 178 def shutdown_request(self, stream, ident, parent):
179 179 """kill ourself. This should really be handled in an external process"""
180 180 try:
181 181 self.abort_queues()
182 182 except:
183 183 content = self._wrap_exception('shutdown')
184 184 else:
185 185 content = dict(parent['content'])
186 186 content['status'] = 'ok'
187 187 msg = self.session.send(stream, 'shutdown_reply',
188 188 content=content, parent=parent, ident=ident)
189 189 self.log.debug(str(msg))
190 190 dc = ioloop.DelayedCallback(lambda : sys.exit(0), 1000, self.loop)
191 191 dc.start()
192 192
193 193 def dispatch_control(self, msg):
194 194 idents,msg = self.session.feed_identities(msg, copy=False)
195 195 try:
196 196 msg = self.session.unpack_message(msg, content=True, copy=False)
197 197 except:
198 198 self.log.error("Invalid Message", exc_info=True)
199 199 return
200 200
201 201 header = msg['header']
202 202 msg_id = header['msg_id']
203 203
204 204 handler = self.control_handlers.get(msg['msg_type'], None)
205 205 if handler is None:
206 206 self.log.error("UNKNOWN CONTROL MESSAGE TYPE: %r"%msg['msg_type'])
207 207 else:
208 208 handler(self.control_stream, idents, msg)
209 209
210 210
211 211 #-------------------- queue helpers ------------------------------
212 212
213 213 def check_dependencies(self, dependencies):
214 214 if not dependencies:
215 215 return True
216 216 if len(dependencies) == 2 and dependencies[0] in 'any all'.split():
217 217 anyorall = dependencies[0]
218 218 dependencies = dependencies[1]
219 219 else:
220 220 anyorall = 'all'
221 221 results = self.client.get_results(dependencies,status_only=True)
222 222 if results['status'] != 'ok':
223 223 return False
224 224
225 225 if anyorall == 'any':
226 226 if not results['completed']:
227 227 return False
228 228 else:
229 229 if results['pending']:
230 230 return False
231 231
232 232 return True
233 233
234 234 def check_aborted(self, msg_id):
235 235 return msg_id in self.aborted
236 236
237 237 #-------------------- queue handlers -----------------------------
238 238
239 239 def clear_request(self, stream, idents, parent):
240 240 """Clear our namespace."""
241 241 self.user_ns = {}
242 242 msg = self.session.send(stream, 'clear_reply', ident=idents, parent=parent,
243 243 content = dict(status='ok'))
244 244 self._initial_exec_lines()
245 245
246 246 def execute_request(self, stream, ident, parent):
247 247 self.log.debug('execute request %s'%parent)
248 248 try:
249 249 code = parent[u'content'][u'code']
250 250 except:
251 251 self.log.error("Got bad msg: %s"%parent, exc_info=True)
252 252 return
253 253 self.session.send(self.iopub_stream, u'pyin', {u'code':code},parent=parent,
254 254 ident='%s.pyin'%self.prefix)
255 255 started = datetime.now().strftime(ISO8601)
256 256 try:
257 257 comp_code = self.compiler(code, '<zmq-kernel>')
258 258 # allow for not overriding displayhook
259 259 if hasattr(sys.displayhook, 'set_parent'):
260 260 sys.displayhook.set_parent(parent)
261 261 sys.stdout.set_parent(parent)
262 262 sys.stderr.set_parent(parent)
263 263 exec comp_code in self.user_ns, self.user_ns
264 264 except:
265 265 exc_content = self._wrap_exception('execute')
266 266 # exc_msg = self.session.msg(u'pyerr', exc_content, parent)
267 267 self.session.send(self.iopub_stream, u'pyerr', exc_content, parent=parent,
268 268 ident='%s.pyerr'%self.prefix)
269 269 reply_content = exc_content
270 270 else:
271 271 reply_content = {'status' : 'ok'}
272 272
273 273 reply_msg = self.session.send(stream, u'execute_reply', reply_content, parent=parent,
274 274 ident=ident, subheader = dict(started=started))
275 275 self.log.debug(str(reply_msg))
276 276 if reply_msg['content']['status'] == u'error':
277 277 self.abort_queues()
278 278
279 279 def complete_request(self, stream, ident, parent):
280 280 matches = {'matches' : self.complete(parent),
281 281 'status' : 'ok'}
282 282 completion_msg = self.session.send(stream, 'complete_reply',
283 283 matches, parent, ident)
284 284 # print >> sys.__stdout__, completion_msg
285 285
286 286 def complete(self, msg):
287 287 return self.completer.complete(msg.content.line, msg.content.text)
288 288
289 289 def apply_request(self, stream, ident, parent):
290 290 # flush previous reply, so this request won't block it
291 291 stream.flush(zmq.POLLOUT)
292 292
293 293 try:
294 294 content = parent[u'content']
295 295 bufs = parent[u'buffers']
296 296 msg_id = parent['header']['msg_id']
297 297 # bound = parent['header'].get('bound', False)
298 298 except:
299 299 self.log.error("Got bad msg: %s"%parent, exc_info=True)
300 300 return
301 301 # pyin_msg = self.session.msg(u'pyin',{u'code':code}, parent=parent)
302 302 # self.iopub_stream.send(pyin_msg)
303 303 # self.session.send(self.iopub_stream, u'pyin', {u'code':code},parent=parent)
304 304 sub = {'dependencies_met' : True, 'engine' : self.ident,
305 305 'started': datetime.now().strftime(ISO8601)}
306 306 try:
307 307 # allow for not overriding displayhook
308 308 if hasattr(sys.displayhook, 'set_parent'):
309 309 sys.displayhook.set_parent(parent)
310 310 sys.stdout.set_parent(parent)
311 311 sys.stderr.set_parent(parent)
312 312 # exec "f(*args,**kwargs)" in self.user_ns, self.user_ns
313 313 working = self.user_ns
314 314 # suffix =
315 315 prefix = "_"+str(msg_id).replace("-","")+"_"
316 316
317 317 f,args,kwargs = unpack_apply_message(bufs, working, copy=False)
318 318 # if bound:
319 319 # bound_ns = Namespace(working)
320 320 # args = [bound_ns]+list(args)
321 321
322 322 fname = getattr(f, '__name__', 'f')
323 323
324 324 fname = prefix+"f"
325 325 argname = prefix+"args"
326 326 kwargname = prefix+"kwargs"
327 327 resultname = prefix+"result"
328 328
329 329 ns = { fname : f, argname : args, kwargname : kwargs , resultname : None }
330 330 # print ns
331 331 working.update(ns)
332 332 code = "%s=%s(*%s,**%s)"%(resultname, fname, argname, kwargname)
333 333 try:
334 334 exec code in working,working
335 335 result = working.get(resultname)
336 336 finally:
337 337 for key in ns.iterkeys():
338 338 working.pop(key)
339 339 # if bound:
340 340 # working.update(bound_ns)
341 341
342 342 packed_result,buf = serialize_object(result)
343 343 result_buf = [packed_result]+buf
344 344 except:
345 345 exc_content = self._wrap_exception('apply')
346 346 # exc_msg = self.session.msg(u'pyerr', exc_content, parent)
347 347 self.session.send(self.iopub_stream, u'pyerr', exc_content, parent=parent,
348 348 ident='%s.pyerr'%self.prefix)
349 349 reply_content = exc_content
350 350 result_buf = []
351 351
352 352 if exc_content['ename'] == 'UnmetDependency':
353 353 sub['dependencies_met'] = False
354 354 else:
355 355 reply_content = {'status' : 'ok'}
356 356
357 357 # put 'ok'/'error' status in header, for scheduler introspection:
358 358 sub['status'] = reply_content['status']
359 359
360 360 reply_msg = self.session.send(stream, u'apply_reply', reply_content,
361 361 parent=parent, ident=ident,buffers=result_buf, subheader=sub)
362 362
363 363 # flush i/o
364 364 # should this be before reply_msg is sent, like in the single-kernel code,
365 365 # or should nothing get in the way of real results?
366 366 sys.stdout.flush()
367 367 sys.stderr.flush()
368 368
369 369 def dispatch_queue(self, stream, msg):
370 370 self.control_stream.flush()
371 371 idents,msg = self.session.feed_identities(msg, copy=False)
372 372 try:
373 373 msg = self.session.unpack_message(msg, content=True, copy=False)
374 374 except:
375 375 self.log.error("Invalid Message", exc_info=True)
376 376 return
377 377
378 378
379 379 header = msg['header']
380 380 msg_id = header['msg_id']
381 381 if self.check_aborted(msg_id):
382 382 self.aborted.remove(msg_id)
383 383 # is it safe to assume a msg_id will not be resubmitted?
384 384 reply_type = msg['msg_type'].split('_')[0] + '_reply'
385 385 status = {'status' : 'aborted'}
386 386 reply_msg = self.session.send(stream, reply_type, subheader=status,
387 387 content=status, parent=msg, ident=idents)
388 388 return
389 389 handler = self.shell_handlers.get(msg['msg_type'], None)
390 390 if handler is None:
391 391 self.log.error("UNKNOWN MESSAGE TYPE: %r"%msg['msg_type'])
392 392 else:
393 393 handler(stream, idents, msg)
394 394
395 395 def start(self):
396 396 #### stream mode:
397 397 if self.control_stream:
398 398 self.control_stream.on_recv(self.dispatch_control, copy=False)
399 399 self.control_stream.on_err(printer)
400 400
401 401 def make_dispatcher(stream):
402 402 def dispatcher(msg):
403 403 return self.dispatch_queue(stream, msg)
404 404 return dispatcher
405 405
406 406 for s in self.shell_streams:
407 407 s.on_recv(make_dispatcher(s), copy=False)
408 408 s.on_err(printer)
409 409
410 410 if self.iopub_stream:
411 411 self.iopub_stream.on_err(printer)
412 412
413 413 #### while True mode:
414 414 # while True:
415 415 # idle = True
416 416 # try:
417 417 # msg = self.shell_stream.socket.recv_multipart(
418 418 # zmq.NOBLOCK, copy=False)
419 419 # except zmq.ZMQError, e:
420 420 # if e.errno != zmq.EAGAIN:
421 421 # raise e
422 422 # else:
423 423 # idle=False
424 424 # self.dispatch_queue(self.shell_stream, msg)
425 425 #
426 426 # if not self.task_stream.empty():
427 427 # idle=False
428 428 # msg = self.task_stream.recv_multipart()
429 429 # self.dispatch_queue(self.task_stream, msg)
430 430 # if idle:
431 431 # # don't busywait
432 432 # time.sleep(1e-3)
433 433
@@ -1,95 +1,95 b''
1 1 """Base config factories."""
2 2
3 3 #-----------------------------------------------------------------------------
4 4 # Copyright (C) 2008-2009 The IPython Development Team
5 5 #
6 6 # Distributed under the terms of the BSD License. The full license is in
7 7 # the file COPYING, distributed as part of this software.
8 8 #-----------------------------------------------------------------------------
9 9
10 10 #-----------------------------------------------------------------------------
11 11 # Imports
12 12 #-----------------------------------------------------------------------------
13 13
14 14
15 15 import logging
16 16 import os
17 17
18 18 from zmq.eventloop.ioloop import IOLoop
19 19
20 20 from IPython.config.configurable import Configurable
21 from IPython.utils.traitlets import Str,Int,Instance, CUnicode, CStr
21 from IPython.utils.traitlets import Int, Instance, Unicode
22 22
23 23 import IPython.parallel.streamsession as ss
24 24 from IPython.parallel.util import select_random_ports
25 25
26 26 #-----------------------------------------------------------------------------
27 27 # Classes
28 28 #-----------------------------------------------------------------------------
29 29 class LoggingFactory(Configurable):
30 30 """A most basic class, that has a `log` (type:`Logger`) attribute, set via a `logname` Trait."""
31 31 log = Instance('logging.Logger', ('ZMQ', logging.WARN))
32 logname = CUnicode('ZMQ')
32 logname = Unicode('ZMQ')
33 33 def _logname_changed(self, name, old, new):
34 34 self.log = logging.getLogger(new)
35 35
36 36
37 37 class SessionFactory(LoggingFactory):
38 38 """The Base factory from which every factory in IPython.parallel inherits"""
39 39
40 40 # not configurable:
41 41 context = Instance('zmq.Context', (), {})
42 42 session = Instance('IPython.parallel.streamsession.StreamSession')
43 43 loop = Instance('zmq.eventloop.ioloop.IOLoop', allow_none=False)
44 44 def _loop_default(self):
45 45 return IOLoop.instance()
46 46
47 47
48 48 def __init__(self, **kwargs):
49 49 super(SessionFactory, self).__init__(**kwargs)
50 50
51 51 # construct the session
52 52 self.session = ss.StreamSession(**kwargs)
53 53
54 54
55 55 class RegistrationFactory(SessionFactory):
56 56 """The Base Configurable for objects that involve registration."""
57 57
58 url = Str('', config=True,
58 url = Unicode('', config=True,
59 59 help="""The 0MQ url used for registration. This sets transport, ip, and port
60 60 in one variable. For example: url='tcp://127.0.0.1:12345' or
61 61 url='epgm://*:90210'""") # url takes precedence over ip,regport,transport
62 transport = Str('tcp', config=True,
62 transport = Unicode('tcp', config=True,
63 63 help="""The 0MQ transport for communications. This will likely be
64 64 the default of 'tcp', but other values include 'ipc', 'epgm', 'inproc'.""")
65 ip = Str('127.0.0.1', config=True,
65 ip = Unicode('127.0.0.1', config=True,
66 66 help="""The IP address for registration. This is generally either
67 67 '127.0.0.1' for loopback only or '*' for all interfaces.
68 68 [default: '127.0.0.1']""")
69 69 regport = Int(config=True,
70 70 help="""The port on which the Hub listens for registration.""")
71 71 def _regport_default(self):
72 72 return select_random_ports(1)[0]
73 73
74 74 def __init__(self, **kwargs):
75 75 super(RegistrationFactory, self).__init__(**kwargs)
76 76 self._propagate_url()
77 77 self._rebuild_url()
78 78 self.on_trait_change(self._propagate_url, 'url')
79 79 self.on_trait_change(self._rebuild_url, 'ip')
80 80 self.on_trait_change(self._rebuild_url, 'transport')
81 81 self.on_trait_change(self._rebuild_url, 'regport')
82 82
83 83 def _rebuild_url(self):
84 84 self.url = "%s://%s:%i"%(self.transport, self.ip, self.regport)
85 85
86 86 def _propagate_url(self):
87 87 """Ensure self.url contains full transport://interface:port"""
88 88 if self.url:
89 89 iface = self.url.split('://',1)
90 90 if len(iface) == 2:
91 91 self.transport,iface = iface
92 92 iface = iface.split(':')
93 93 self.ip = iface[0]
94 94 if iface[1]:
95 95 self.regport = int(iface[1])
@@ -1,446 +1,446 b''
1 1 #!/usr/bin/env python
2 2 """edited session.py to work with streams, and move msg_type to the header
3 3 """
4 4 #-----------------------------------------------------------------------------
5 5 # Copyright (C) 2010-2011 The IPython Development Team
6 6 #
7 7 # Distributed under the terms of the BSD License. The full license is in
8 8 # the file COPYING, distributed as part of this software.
9 9 #-----------------------------------------------------------------------------
10 10
11 11
12 12 import os
13 13 import pprint
14 14 import uuid
15 15 from datetime import datetime
16 16
17 17 try:
18 18 import cPickle
19 19 pickle = cPickle
20 20 except:
21 21 cPickle = None
22 22 import pickle
23 23
24 24 import zmq
25 25 from zmq.utils import jsonapi
26 26 from zmq.eventloop.zmqstream import ZMQStream
27 27
28 28 from IPython.config.configurable import Configurable
29 29 from IPython.utils.importstring import import_item
30 from IPython.utils.traitlets import Str, CStr, CUnicode, Bool, Any
30 from IPython.utils.traitlets import CStr, Unicode, Bool, Any
31 31
32 32 from .util import ISO8601
33 33
34 34
35 35 def squash_unicode(obj):
36 36 """coerce unicode back to bytestrings."""
37 37 if isinstance(obj,dict):
38 38 for key in obj.keys():
39 39 obj[key] = squash_unicode(obj[key])
40 40 if isinstance(key, unicode):
41 41 obj[squash_unicode(key)] = obj.pop(key)
42 42 elif isinstance(obj, list):
43 43 for i,v in enumerate(obj):
44 44 obj[i] = squash_unicode(v)
45 45 elif isinstance(obj, unicode):
46 46 obj = obj.encode('utf8')
47 47 return obj
48 48
49 49 def _date_default(obj):
50 50 if isinstance(obj, datetime):
51 51 return obj.strftime(ISO8601)
52 52 else:
53 53 raise TypeError("%r is not JSON serializable"%obj)
54 54
55 55 _default_key = 'on_unknown' if jsonapi.jsonmod.__name__ == 'jsonlib' else 'default'
56 56 json_packer = lambda obj: jsonapi.dumps(obj, **{_default_key:_date_default})
57 57 json_unpacker = lambda s: squash_unicode(jsonapi.loads(s))
58 58
59 59 pickle_packer = lambda o: pickle.dumps(o,-1)
60 60 pickle_unpacker = pickle.loads
61 61
62 62 default_packer = json_packer
63 63 default_unpacker = json_unpacker
64 64
65 65
66 66 DELIM="<IDS|MSG>"
67 67
68 68 class Message(object):
69 69 """A simple message object that maps dict keys to attributes.
70 70
71 71 A Message can be created from a dict and a dict from a Message instance
72 72 simply by calling dict(msg_obj)."""
73 73
74 74 def __init__(self, msg_dict):
75 75 dct = self.__dict__
76 76 for k, v in dict(msg_dict).iteritems():
77 77 if isinstance(v, dict):
78 78 v = Message(v)
79 79 dct[k] = v
80 80
81 81 # Having this iterator lets dict(msg_obj) work out of the box.
82 82 def __iter__(self):
83 83 return iter(self.__dict__.iteritems())
84 84
85 85 def __repr__(self):
86 86 return repr(self.__dict__)
87 87
88 88 def __str__(self):
89 89 return pprint.pformat(self.__dict__)
90 90
91 91 def __contains__(self, k):
92 92 return k in self.__dict__
93 93
94 94 def __getitem__(self, k):
95 95 return self.__dict__[k]
96 96
97 97
98 98 def msg_header(msg_id, msg_type, username, session):
99 99 date=datetime.now().strftime(ISO8601)
100 100 return locals()
101 101
102 102 def extract_header(msg_or_header):
103 103 """Given a message or header, return the header."""
104 104 if not msg_or_header:
105 105 return {}
106 106 try:
107 107 # See if msg_or_header is the entire message.
108 108 h = msg_or_header['header']
109 109 except KeyError:
110 110 try:
111 111 # See if msg_or_header is just the header
112 112 h = msg_or_header['msg_id']
113 113 except KeyError:
114 114 raise
115 115 else:
116 116 h = msg_or_header
117 117 if not isinstance(h, dict):
118 118 h = dict(h)
119 119 return h
120 120
121 121 class StreamSession(Configurable):
122 122 """tweaked version of IPython.zmq.session.Session, for development in Parallel"""
123 123 debug=Bool(False, config=True, help="""Debug output in the StreamSession""")
124 packer = Str('json',config=True,
124 packer = Unicode('json',config=True,
125 125 help="""The name of the packer for serializing messages.
126 126 Should be one of 'json', 'pickle', or an import name
127 127 for a custom serializer.""")
128 128 def _packer_changed(self, name, old, new):
129 129 if new.lower() == 'json':
130 130 self.pack = json_packer
131 131 self.unpack = json_unpacker
132 132 elif new.lower() == 'pickle':
133 133 self.pack = pickle_packer
134 134 self.unpack = pickle_unpacker
135 135 else:
136 136 self.pack = import_item(new)
137 137
138 unpacker = Str('json',config=True,
138 unpacker = Unicode('json',config=True,
139 139 help="""The name of the unpacker for unserializing messages.
140 140 Only used with custom functions for `packer`.""")
141 141 def _unpacker_changed(self, name, old, new):
142 142 if new.lower() == 'json':
143 143 self.pack = json_packer
144 144 self.unpack = json_unpacker
145 145 elif new.lower() == 'pickle':
146 146 self.pack = pickle_packer
147 147 self.unpack = pickle_unpacker
148 148 else:
149 149 self.unpack = import_item(new)
150 150
151 151 session = CStr('',config=True,
152 152 help="""The UUID identifying this session.""")
153 153 def _session_default(self):
154 return str(uuid.uuid4())
155 username = CUnicode(os.environ.get('USER','username'),config=True,
154 return bytes(uuid.uuid4())
155 username = Unicode(os.environ.get('USER','username'),config=True,
156 156 help="""Username for the Session. Default is your system username.""")
157 157 key = CStr('', config=True,
158 158 help="""execution key, for extra authentication.""")
159 159
160 keyfile = CUnicode('', config=True,
160 keyfile = Unicode('', config=True,
161 161 help="""path to file containing execution key.""")
162 162 def _keyfile_changed(self, name, old, new):
163 163 with open(new, 'rb') as f:
164 164 self.key = f.read().strip()
165 165
166 166 pack = Any(default_packer) # the actual packer function
167 167 def _pack_changed(self, name, old, new):
168 168 if not callable(new):
169 169 raise TypeError("packer must be callable, not %s"%type(new))
170 170
171 171 unpack = Any(default_unpacker) # the actual packer function
172 172 def _unpack_changed(self, name, old, new):
173 173 if not callable(new):
174 174 raise TypeError("packer must be callable, not %s"%type(new))
175 175
176 176 def __init__(self, **kwargs):
177 177 super(StreamSession, self).__init__(**kwargs)
178 178 self.none = self.pack({})
179 179
180 180 @property
181 181 def msg_id(self):
182 182 """always return new uuid"""
183 183 return str(uuid.uuid4())
184 184
185 185 def msg_header(self, msg_type):
186 186 return msg_header(self.msg_id, msg_type, self.username, self.session)
187 187
188 188 def msg(self, msg_type, content=None, parent=None, subheader=None):
189 189 msg = {}
190 190 msg['header'] = self.msg_header(msg_type)
191 191 msg['msg_id'] = msg['header']['msg_id']
192 192 msg['parent_header'] = {} if parent is None else extract_header(parent)
193 193 msg['msg_type'] = msg_type
194 194 msg['content'] = {} if content is None else content
195 195 sub = {} if subheader is None else subheader
196 196 msg['header'].update(sub)
197 197 return msg
198 198
199 199 def check_key(self, msg_or_header):
200 200 """Check that a message's header has the right key"""
201 201 if not self.key:
202 202 return True
203 203 header = extract_header(msg_or_header)
204 204 return header.get('key', '') == self.key
205 205
206 206
207 207 def serialize(self, msg, ident=None):
208 208 content = msg.get('content', {})
209 209 if content is None:
210 210 content = self.none
211 211 elif isinstance(content, dict):
212 212 content = self.pack(content)
213 213 elif isinstance(content, bytes):
214 214 # content is already packed, as in a relayed message
215 215 pass
216 216 elif isinstance(content, unicode):
217 217 # should be bytes, but JSON often spits out unicode
218 218 content = content.encode('utf8')
219 219 else:
220 220 raise TypeError("Content incorrect type: %s"%type(content))
221 221
222 222 to_send = []
223 223
224 224 if isinstance(ident, list):
225 225 # accept list of idents
226 226 to_send.extend(ident)
227 227 elif ident is not None:
228 228 to_send.append(ident)
229 229 to_send.append(DELIM)
230 230 if self.key:
231 231 to_send.append(self.key)
232 232 to_send.append(self.pack(msg['header']))
233 233 to_send.append(self.pack(msg['parent_header']))
234 234 to_send.append(content)
235 235
236 236 return to_send
237 237
238 238 def send(self, stream, msg_or_type, content=None, buffers=None, parent=None, subheader=None, ident=None, track=False):
239 239 """Build and send a message via stream or socket.
240 240
241 241 Parameters
242 242 ----------
243 243
244 244 stream : zmq.Socket or ZMQStream
245 245 the socket-like object used to send the data
246 246 msg_or_type : str or Message/dict
247 247 Normally, msg_or_type will be a msg_type unless a message is being sent more
248 248 than once.
249 249
250 250 content : dict or None
251 251 the content of the message (ignored if msg_or_type is a message)
252 252 buffers : list or None
253 253 the already-serialized buffers to be appended to the message
254 254 parent : Message or dict or None
255 255 the parent or parent header describing the parent of this message
256 256 subheader : dict or None
257 257 extra header keys for this message's header
258 258 ident : bytes or list of bytes
259 259 the zmq.IDENTITY routing path
260 260 track : bool
261 261 whether to track. Only for use with Sockets, because ZMQStream objects cannot track messages.
262 262
263 263 Returns
264 264 -------
265 265 msg : message dict
266 266 the constructed message
267 267 (msg,tracker) : (message dict, MessageTracker)
268 268 if track=True, then a 2-tuple will be returned, the first element being the constructed
269 269 message, and the second being the MessageTracker
270 270
271 271 """
272 272
273 273 if not isinstance(stream, (zmq.Socket, ZMQStream)):
274 274 raise TypeError("stream must be Socket or ZMQStream, not %r"%type(stream))
275 275 elif track and isinstance(stream, ZMQStream):
276 276 raise TypeError("ZMQStream cannot track messages")
277 277
278 278 if isinstance(msg_or_type, (Message, dict)):
279 279 # we got a Message, not a msg_type
280 280 # don't build a new Message
281 281 msg = msg_or_type
282 282 else:
283 283 msg = self.msg(msg_or_type, content, parent, subheader)
284 284
285 285 buffers = [] if buffers is None else buffers
286 286 to_send = self.serialize(msg, ident)
287 287 flag = 0
288 288 if buffers:
289 289 flag = zmq.SNDMORE
290 290 _track = False
291 291 else:
292 292 _track=track
293 293 if track:
294 294 tracker = stream.send_multipart(to_send, flag, copy=False, track=_track)
295 295 else:
296 296 tracker = stream.send_multipart(to_send, flag, copy=False)
297 297 for b in buffers[:-1]:
298 298 stream.send(b, flag, copy=False)
299 299 if buffers:
300 300 if track:
301 301 tracker = stream.send(buffers[-1], copy=False, track=track)
302 302 else:
303 303 tracker = stream.send(buffers[-1], copy=False)
304 304
305 305 # omsg = Message(msg)
306 306 if self.debug:
307 307 pprint.pprint(msg)
308 308 pprint.pprint(to_send)
309 309 pprint.pprint(buffers)
310 310
311 311 msg['tracker'] = tracker
312 312
313 313 return msg
314 314
315 315 def send_raw(self, stream, msg, flags=0, copy=True, ident=None):
316 316 """Send a raw message via ident path.
317 317
318 318 Parameters
319 319 ----------
320 320 msg : list of sendable buffers"""
321 321 to_send = []
322 322 if isinstance(ident, bytes):
323 323 ident = [ident]
324 324 if ident is not None:
325 325 to_send.extend(ident)
326 326 to_send.append(DELIM)
327 327 if self.key:
328 328 to_send.append(self.key)
329 329 to_send.extend(msg)
330 330 stream.send_multipart(msg, flags, copy=copy)
331 331
332 332 def recv(self, socket, mode=zmq.NOBLOCK, content=True, copy=True):
333 333 """receives and unpacks a message
334 334 returns [idents], msg"""
335 335 if isinstance(socket, ZMQStream):
336 336 socket = socket.socket
337 337 try:
338 338 msg = socket.recv_multipart(mode, copy=copy)
339 339 except zmq.ZMQError as e:
340 340 if e.errno == zmq.EAGAIN:
341 341 # We can convert EAGAIN to None as we know in this case
342 342 # recv_multipart won't return None.
343 343 return None
344 344 else:
345 345 raise
346 346 # return an actual Message object
347 347 # determine the number of idents by trying to unpack them.
348 348 # this is terrible:
349 349 idents, msg = self.feed_identities(msg, copy)
350 350 try:
351 351 return idents, self.unpack_message(msg, content=content, copy=copy)
352 352 except Exception as e:
353 353 print (idents, msg)
354 354 # TODO: handle it
355 355 raise e
356 356
357 357 def feed_identities(self, msg, copy=True):
358 358 """feed until DELIM is reached, then return the prefix as idents and remainder as
359 359 msg. This is easily broken by setting an IDENT to DELIM, but that would be silly.
360 360
361 361 Parameters
362 362 ----------
363 363 msg : a list of Message or bytes objects
364 364 the message to be split
365 365 copy : bool
366 366 flag determining whether the arguments are bytes or Messages
367 367
368 368 Returns
369 369 -------
370 370 (idents,msg) : two lists
371 371 idents will always be a list of bytes - the indentity prefix
372 372 msg will be a list of bytes or Messages, unchanged from input
373 373 msg should be unpackable via self.unpack_message at this point.
374 374 """
375 375 ikey = int(self.key != '')
376 376 minlen = 3 + ikey
377 377 msg = list(msg)
378 378 idents = []
379 379 while len(msg) > minlen:
380 380 if copy:
381 381 s = msg[0]
382 382 else:
383 383 s = msg[0].bytes
384 384 if s == DELIM:
385 385 msg.pop(0)
386 386 break
387 387 else:
388 388 idents.append(s)
389 389 msg.pop(0)
390 390
391 391 return idents, msg
392 392
393 393 def unpack_message(self, msg, content=True, copy=True):
394 394 """Return a message object from the format
395 395 sent by self.send.
396 396
397 397 Parameters:
398 398 -----------
399 399
400 400 content : bool (True)
401 401 whether to unpack the content dict (True),
402 402 or leave it serialized (False)
403 403
404 404 copy : bool (True)
405 405 whether to return the bytes (True),
406 406 or the non-copying Message object in each place (False)
407 407
408 408 """
409 409 ikey = int(self.key != '')
410 410 minlen = 3 + ikey
411 411 message = {}
412 412 if not copy:
413 413 for i in range(minlen):
414 414 msg[i] = msg[i].bytes
415 415 if ikey:
416 416 if not self.key == msg[0]:
417 417 raise KeyError("Invalid Session Key: %s"%msg[0])
418 418 if not len(msg) >= minlen:
419 419 raise TypeError("malformed message, must have at least %i elements"%minlen)
420 420 message['header'] = self.unpack(msg[ikey+0])
421 421 message['msg_type'] = message['header']['msg_type']
422 422 message['parent_header'] = self.unpack(msg[ikey+1])
423 423 if content:
424 424 message['content'] = self.unpack(msg[ikey+2])
425 425 else:
426 426 message['content'] = msg[ikey+2]
427 427
428 428 message['buffers'] = msg[ikey+3:]# [ m.buffer for m in msg[3:] ]
429 429 return message
430 430
431 431
432 432 def test_msg2obj():
433 433 am = dict(x=1)
434 434 ao = Message(am)
435 435 assert ao.x == am['x']
436 436
437 437 am['y'] = dict(z=1)
438 438 ao = Message(am)
439 439 assert ao.y.z == am['y']['z']
440 440
441 441 k1, k2 = 'y', 'z'
442 442 assert ao[k1][k2] == am[k1][k2]
443 443
444 444 am2 = dict(ao)
445 445 assert am['x'] == am2['x']
446 446 assert am['y']['z'] == am2['y']['z']
General Comments 0
You need to be logged in to leave comments. Login now