launcher.py
1342 lines
| 46.0 KiB
| text/x-python
|
PythonLexer
MinRK
|
r3605 | # encoding: utf-8 | ||
""" | ||||
Facilities for launching IPython processes asynchronously. | ||||
MinRK
|
r4018 | |||
Authors: | ||||
* Brian Granger | ||||
* MinRK | ||||
MinRK
|
r3605 | """ | ||
#----------------------------------------------------------------------------- | ||||
MinRK
|
r4018 | # Copyright (C) 2008-2011 The IPython Development Team | ||
MinRK
|
r3605 | # | ||
# Distributed under the terms of the BSD License. The full license is in | ||||
# the file COPYING, distributed as part of this software. | ||||
#----------------------------------------------------------------------------- | ||||
#----------------------------------------------------------------------------- | ||||
# Imports | ||||
#----------------------------------------------------------------------------- | ||||
MinRK
|
r3647 | import copy | ||
MinRK
|
r3631 | import logging | ||
MinRK
|
r3605 | import os | ||
MinRK
|
r7995 | import pipes | ||
MinRK
|
r3659 | import stat | ||
MinRK
|
r6894 | import sys | ||
Ben Edwards
|
r4667 | import time | ||
MinRK
|
r3605 | |||
MinRK
|
r3778 | # signal imports, handling various platforms, versions | ||
MinRK
|
r3613 | from signal import SIGINT, SIGTERM | ||
MinRK
|
r3605 | try: | ||
from signal import SIGKILL | ||||
except ImportError: | ||||
MinRK
|
r3778 | # Windows | ||
MinRK
|
r3605 | SIGKILL=SIGTERM | ||
MinRK
|
r3778 | try: | ||
# Windows >= 2.7, 3.2 | ||||
from signal import CTRL_C_EVENT as SIGINT | ||||
except ImportError: | ||||
pass | ||||
MinRK
|
r3613 | from subprocess import Popen, PIPE, STDOUT | ||
try: | ||||
MinRK
|
r3615 | from subprocess import check_output | ||
MinRK
|
r3613 | except ImportError: | ||
MinRK
|
r3647 | # pre-2.7, define check_output with Popen | ||
MinRK
|
r3615 | def check_output(*args, **kwargs): | ||
MinRK
|
r3647 | kwargs.update(dict(stdout=PIPE)) | ||
MinRK
|
r3613 | p = Popen(*args, **kwargs) | ||
out,err = p.communicate() | ||||
return out | ||||
MinRK
|
r3605 | |||
from zmq.eventloop import ioloop | ||||
MinRK
|
r4012 | from IPython.config.application import Application | ||
MinRK
|
r4016 | from IPython.config.configurable import LoggingConfigurable | ||
MinRK
|
r4004 | from IPython.utils.text import EvalFormatter | ||
MinRK
|
r4848 | from IPython.utils.traitlets import ( | ||
Bradley M. Froehle
|
r6748 | Any, Integer, CFloat, List, Unicode, Dict, Instance, HasTraits, CRegExp | ||
MinRK
|
r4848 | ) | ||
MinRK
|
r6894 | from IPython.utils.path import get_home_dir | ||
from IPython.utils.process import find_cmd, FindCmdError | ||||
MinRK
|
r3605 | |||
MinRK
|
r3771 | from .win32support import forward_read_events | ||
MinRK
|
r3778 | from .winhpcjob import IPControllerTask, IPEngineTask, IPControllerJob, IPEngineSetJob | ||
MinRK
|
r3605 | |||
MinRK
|
r3771 | WINDOWS = os.name == 'nt' | ||
MinRK
|
r3772 | |||
MinRK
|
r3605 | #----------------------------------------------------------------------------- | ||
# Paths to the kernel apps | ||||
#----------------------------------------------------------------------------- | ||||
MinRK
|
r6894 | cmd = "from IPython.parallel.apps.%s import launch_new_instance; launch_new_instance()" | ||
MinRK
|
r3605 | |||
MinRK
|
r6894 | ipcluster_cmd_argv = [sys.executable, "-c", cmd % "ipclusterapp"] | ||
MinRK
|
r3605 | |||
MinRK
|
r6894 | ipengine_cmd_argv = [sys.executable, "-c", cmd % "ipengineapp"] | ||
MinRK
|
r3605 | |||
MinRK
|
r6894 | ipcontroller_cmd_argv = [sys.executable, "-c", cmd % "ipcontrollerapp"] | ||
MinRK
|
r3605 | |||
#----------------------------------------------------------------------------- | ||||
# Base launchers and errors | ||||
#----------------------------------------------------------------------------- | ||||
class LauncherError(Exception): | ||||
pass | ||||
class ProcessStateError(LauncherError): | ||||
pass | ||||
class UnknownStatus(LauncherError): | ||||
pass | ||||
MinRK
|
r4016 | class BaseLauncher(LoggingConfigurable): | ||
MinRK
|
r3605 | """An asbtraction for starting, stopping and signaling a process.""" | ||
# In all of the launchers, the work_dir is where child processes will be | ||||
MinRK
|
r3992 | # run. This will usually be the profile_dir, but may not be. any work_dir | ||
MinRK
|
r3605 | # passed into the __init__ method will override the config value. | ||
# This should not be used to set the work_dir for the actual engine | ||||
# and controller. Instead, use their own config files or the | ||||
# controller_args, engine_args attributes of the launchers to add | ||||
MinRK
|
r3985 | # the work_dir option. | ||
MinRK
|
r3605 | work_dir = Unicode(u'.') | ||
loop = Instance('zmq.eventloop.ioloop.IOLoop') | ||||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r3647 | start_data = Any() | ||
stop_data = Any() | ||||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r3605 | def _loop_default(self): | ||
return ioloop.IOLoop.instance() | ||||
MinRK
|
r3610 | def __init__(self, work_dir=u'.', config=None, **kwargs): | ||
super(BaseLauncher, self).__init__(work_dir=work_dir, config=config, **kwargs) | ||||
MinRK
|
r3605 | self.state = 'before' # can be before, running, after | ||
self.stop_callbacks = [] | ||||
self.start_data = None | ||||
self.stop_data = None | ||||
@property | ||||
def args(self): | ||||
"""A list of cmd and args that will be used to start the process. | ||||
This is what is passed to :func:`spawnProcess` and the first element | ||||
will be the process name. | ||||
""" | ||||
return self.find_args() | ||||
def find_args(self): | ||||
"""The ``.args`` property calls this to find the args list. | ||||
Subcommand should implement this to construct the cmd and args. | ||||
""" | ||||
raise NotImplementedError('find_args must be implemented in a subclass') | ||||
@property | ||||
def arg_str(self): | ||||
"""The string form of the program arguments.""" | ||||
return ' '.join(self.args) | ||||
@property | ||||
def running(self): | ||||
"""Am I running.""" | ||||
if self.state == 'running': | ||||
return True | ||||
else: | ||||
return False | ||||
def start(self): | ||||
MinRK
|
r4019 | """Start the process.""" | ||
MinRK
|
r3605 | raise NotImplementedError('start must be implemented in a subclass') | ||
def stop(self): | ||||
"""Stop the process and notify observers of stopping. | ||||
MinRK
|
r4019 | This method will return None immediately. | ||
To observe the actual process stopping, see :meth:`on_stop`. | ||||
MinRK
|
r3605 | """ | ||
raise NotImplementedError('stop must be implemented in a subclass') | ||||
def on_stop(self, f): | ||||
MinRK
|
r4019 | """Register a callback to be called with this Launcher's stop_data | ||
when the process actually finishes. | ||||
MinRK
|
r3605 | """ | ||
if self.state=='after': | ||||
return f(self.stop_data) | ||||
else: | ||||
self.stop_callbacks.append(f) | ||||
def notify_start(self, data): | ||||
"""Call this to trigger startup actions. | ||||
This logs the process startup and sets the state to 'running'. It is | ||||
a pass-through so it can be used as a callback. | ||||
""" | ||||
MinRK
|
r5754 | self.log.debug('Process %r started: %r', self.args[0], data) | ||
MinRK
|
r3605 | self.start_data = data | ||
self.state = 'running' | ||||
return data | ||||
def notify_stop(self, data): | ||||
"""Call this to trigger process stop actions. | ||||
This logs the process stopping and sets the state to 'after'. Call | ||||
MinRK
|
r4019 | this to trigger callbacks registered via :meth:`on_stop`.""" | ||
MinRK
|
r3605 | |||
MinRK
|
r5754 | self.log.debug('Process %r stopped: %r', self.args[0], data) | ||
MinRK
|
r3605 | self.stop_data = data | ||
self.state = 'after' | ||||
for i in range(len(self.stop_callbacks)): | ||||
d = self.stop_callbacks.pop() | ||||
d(data) | ||||
return data | ||||
def signal(self, sig): | ||||
"""Signal the process. | ||||
Parameters | ||||
---------- | ||||
sig : str or int | ||||
'KILL', 'INT', etc., or any signal number | ||||
""" | ||||
raise NotImplementedError('signal must be implemented in a subclass') | ||||
MinRK
|
r4848 | class ClusterAppMixin(HasTraits): | ||
"""MixIn for cluster args as traits""" | ||||
profile_dir=Unicode('') | ||||
cluster_id=Unicode('') | ||||
MinRK
|
r6418 | |||
@property | ||||
def cluster_args(self): | ||||
return ['--profile-dir', self.profile_dir, '--cluster-id', self.cluster_id] | ||||
MinRK
|
r4848 | |||
class ControllerMixin(ClusterAppMixin): | ||||
controller_cmd = List(ipcontroller_cmd_argv, config=True, | ||||
help="""Popen command to launch ipcontroller.""") | ||||
# Command line arguments to ipcontroller. | ||||
MinRK
|
r4850 | controller_args = List(['--log-to-file','--log-level=%i' % logging.INFO], config=True, | ||
MinRK
|
r4848 | help="""command-line args to pass to ipcontroller""") | ||
class EngineMixin(ClusterAppMixin): | ||||
engine_cmd = List(ipengine_cmd_argv, config=True, | ||||
help="""command to launch the Engine.""") | ||||
# Command line arguments for ipengine. | ||||
MinRK
|
r4850 | engine_args = List(['--log-to-file','--log-level=%i' % logging.INFO], config=True, | ||
MinRK
|
r4848 | help="command-line arguments to pass to ipengine" | ||
) | ||||
MinRK
|
r3605 | |||
MinRK
|
r6418 | |||
MinRK
|
r3605 | #----------------------------------------------------------------------------- | ||
# Local process launchers | ||||
#----------------------------------------------------------------------------- | ||||
class LocalProcessLauncher(BaseLauncher): | ||||
"""Start and stop an external process in an asynchronous manner. | ||||
This will launch the external process with a working directory of | ||||
``self.work_dir``. | ||||
""" | ||||
Bernardo B. Marques
|
r4872 | # This is used to to construct self.args, which is passed to | ||
MinRK
|
r3605 | # spawnProcess. | ||
cmd_and_args = List([]) | ||||
MinRK
|
r5344 | poll_frequency = Integer(100) # in ms | ||
MinRK
|
r3605 | |||
MinRK
|
r3610 | def __init__(self, work_dir=u'.', config=None, **kwargs): | ||
MinRK
|
r3605 | super(LocalProcessLauncher, self).__init__( | ||
MinRK
|
r3610 | work_dir=work_dir, config=config, **kwargs | ||
MinRK
|
r3605 | ) | ||
self.process = None | ||||
self.poller = None | ||||
def find_args(self): | ||||
return self.cmd_and_args | ||||
def start(self): | ||||
MinRK
|
r5754 | self.log.debug("Starting %s: %r", self.__class__.__name__, self.args) | ||
MinRK
|
r3605 | if self.state == 'before': | ||
self.process = Popen(self.args, | ||||
stdout=PIPE,stderr=PIPE,stdin=PIPE, | ||||
env=os.environ, | ||||
cwd=self.work_dir | ||||
) | ||||
MinRK
|
r3771 | if WINDOWS: | ||
self.stdout = forward_read_events(self.process.stdout) | ||||
self.stderr = forward_read_events(self.process.stderr) | ||||
else: | ||||
self.stdout = self.process.stdout.fileno() | ||||
self.stderr = self.process.stderr.fileno() | ||||
self.loop.add_handler(self.stdout, self.handle_stdout, self.loop.READ) | ||||
self.loop.add_handler(self.stderr, self.handle_stderr, self.loop.READ) | ||||
MinRK
|
r3605 | self.poller = ioloop.PeriodicCallback(self.poll, self.poll_frequency, self.loop) | ||
self.poller.start() | ||||
self.notify_start(self.process.pid) | ||||
else: | ||||
s = 'The process was already started and has state: %r' % self.state | ||||
raise ProcessStateError(s) | ||||
def stop(self): | ||||
return self.interrupt_then_kill() | ||||
def signal(self, sig): | ||||
if self.state == 'running': | ||||
MinRK
|
r3778 | if WINDOWS and sig != SIGINT: | ||
# use Windows tree-kill for better child cleanup | ||||
check_output(['taskkill', '-pid', str(self.process.pid), '-t', '-f']) | ||||
else: | ||||
self.process.send_signal(sig) | ||||
MinRK
|
r3605 | |||
def interrupt_then_kill(self, delay=2.0): | ||||
"""Send INT, wait a delay and then send KILL.""" | ||||
MinRK
|
r3778 | try: | ||
self.signal(SIGINT) | ||||
except Exception: | ||||
self.log.debug("interrupt failed") | ||||
pass | ||||
MinRK
|
r3605 | self.killer = ioloop.DelayedCallback(lambda : self.signal(SIGKILL), delay*1000, self.loop) | ||
self.killer.start() | ||||
# callbacks, etc: | ||||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r3605 | def handle_stdout(self, fd, events): | ||
MinRK
|
r3771 | if WINDOWS: | ||
line = self.stdout.recv() | ||||
else: | ||||
line = self.process.stdout.readline() | ||||
MinRK
|
r3605 | # a stopped process will be readable but return empty strings | ||
if line: | ||||
MinRK
|
r5754 | self.log.debug(line[:-1]) | ||
MinRK
|
r3605 | else: | ||
self.poll() | ||||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r3605 | def handle_stderr(self, fd, events): | ||
MinRK
|
r3771 | if WINDOWS: | ||
line = self.stderr.recv() | ||||
else: | ||||
line = self.process.stderr.readline() | ||||
MinRK
|
r3605 | # a stopped process will be readable but return empty strings | ||
if line: | ||||
MinRK
|
r5754 | self.log.debug(line[:-1]) | ||
MinRK
|
r3605 | else: | ||
self.poll() | ||||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r3605 | def poll(self): | ||
status = self.process.poll() | ||||
if status is not None: | ||||
self.poller.stop() | ||||
MinRK
|
r3771 | self.loop.remove_handler(self.stdout) | ||
self.loop.remove_handler(self.stderr) | ||||
MinRK
|
r3605 | self.notify_stop(dict(exit_code=status, pid=self.process.pid)) | ||
return status | ||||
MinRK
|
r4848 | class LocalControllerLauncher(LocalProcessLauncher, ControllerMixin): | ||
MinRK
|
r3605 | """Launch a controller as a regular external process.""" | ||
def find_args(self): | ||||
MinRK
|
r4848 | return self.controller_cmd + self.cluster_args + self.controller_args | ||
MinRK
|
r3605 | |||
MinRK
|
r4848 | def start(self): | ||
MinRK
|
r3992 | """Start the controller by profile_dir.""" | ||
MinRK
|
r3605 | return super(LocalControllerLauncher, self).start() | ||
MinRK
|
r4848 | class LocalEngineLauncher(LocalProcessLauncher, EngineMixin): | ||
MinRK
|
r3605 | """Launch a single engine as a regular externall process.""" | ||
def find_args(self): | ||||
MinRK
|
r4848 | return self.engine_cmd + self.cluster_args + self.engine_args | ||
MinRK
|
r3605 | |||
MinRK
|
r4848 | class LocalEngineSetLauncher(LocalEngineLauncher): | ||
MinRK
|
r3605 | """Launch a set of engines as regular external processes.""" | ||
MinRK
|
r4587 | delay = CFloat(0.1, config=True, | ||
help="""delay (in seconds) between starting each engine after the first. | ||||
This can help force the engines to get their ids in order, or limit | ||||
process flood when starting many engines.""" | ||||
) | ||||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r3605 | # launcher class | ||
launcher_class = LocalEngineLauncher | ||||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r3647 | launchers = Dict() | ||
stop_data = Dict() | ||||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r3610 | def __init__(self, work_dir=u'.', config=None, **kwargs): | ||
MinRK
|
r3605 | super(LocalEngineSetLauncher, self).__init__( | ||
MinRK
|
r3610 | work_dir=work_dir, config=config, **kwargs | ||
MinRK
|
r3605 | ) | ||
self.stop_data = {} | ||||
MinRK
|
r4848 | def start(self, n): | ||
MinRK
|
r3992 | """Start n engines by profile or profile_dir.""" | ||
MinRK
|
r3605 | dlist = [] | ||
for i in range(n): | ||||
MinRK
|
r4587 | if i > 0: | ||
time.sleep(self.delay) | ||||
MinRK
|
r4848 | el = self.launcher_class(work_dir=self.work_dir, config=self.config, log=self.log, | ||
profile_dir=self.profile_dir, cluster_id=self.cluster_id, | ||||
) | ||||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r3605 | # Copy the engine args over to each engine launcher. | ||
MinRK
|
r4848 | el.engine_cmd = copy.deepcopy(self.engine_cmd) | ||
MinRK
|
r3605 | el.engine_args = copy.deepcopy(self.engine_args) | ||
el.on_stop(self._notice_engine_stopped) | ||||
MinRK
|
r4848 | d = el.start() | ||
MinRK
|
r3605 | self.launchers[i] = el | ||
dlist.append(d) | ||||
self.notify_start(dlist) | ||||
return dlist | ||||
def find_args(self): | ||||
return ['engine set'] | ||||
def signal(self, sig): | ||||
dlist = [] | ||||
for el in self.launchers.itervalues(): | ||||
d = el.signal(sig) | ||||
dlist.append(d) | ||||
return dlist | ||||
def interrupt_then_kill(self, delay=1.0): | ||||
dlist = [] | ||||
for el in self.launchers.itervalues(): | ||||
d = el.interrupt_then_kill(delay) | ||||
dlist.append(d) | ||||
return dlist | ||||
def stop(self): | ||||
return self.interrupt_then_kill() | ||||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r3605 | def _notice_engine_stopped(self, data): | ||
pid = data['pid'] | ||||
for idx,el in self.launchers.iteritems(): | ||||
if el.process.pid == pid: | ||||
break | ||||
self.launchers.pop(idx) | ||||
self.stop_data[idx] = data | ||||
if not self.launchers: | ||||
self.notify_stop(self.stop_data) | ||||
#----------------------------------------------------------------------------- | ||||
MinRK
|
r5696 | # MPI launchers | ||
MinRK
|
r3605 | #----------------------------------------------------------------------------- | ||
MinRK
|
r5696 | class MPILauncher(LocalProcessLauncher): | ||
MinRK
|
r3605 | """Launch an external process using mpiexec.""" | ||
MinRK
|
r3985 | mpi_cmd = List(['mpiexec'], config=True, | ||
help="The mpiexec command to use in starting the process." | ||||
) | ||||
mpi_args = List([], config=True, | ||||
help="The command line arguments to pass to mpiexec." | ||||
) | ||||
MinRK
|
r4848 | program = List(['date'], | ||
MinRK
|
r3985 | help="The program to start via mpiexec.") | ||
MinRK
|
r4848 | program_args = List([], | ||
MinRK
|
r3985 | help="The command line argument to the program." | ||
) | ||||
MinRK
|
r5344 | n = Integer(1) | ||
MinRK
|
r5696 | |||
def __init__(self, *args, **kwargs): | ||||
# deprecation for old MPIExec names: | ||||
config = kwargs.get('config', {}) | ||||
for oldname in ('MPIExecLauncher', 'MPIExecControllerLauncher', 'MPIExecEngineSetLauncher'): | ||||
deprecated = config.get(oldname) | ||||
if deprecated: | ||||
newname = oldname.replace('MPIExec', 'MPI') | ||||
config[newname].update(deprecated) | ||||
self.log.warn("WARNING: %s name has been deprecated, use %s", oldname, newname) | ||||
super(MPILauncher, self).__init__(*args, **kwargs) | ||||
MinRK
|
r3605 | |||
def find_args(self): | ||||
"""Build self.args using all the fields.""" | ||||
MinRK
|
r3647 | return self.mpi_cmd + ['-n', str(self.n)] + self.mpi_args + \ | ||
MinRK
|
r3605 | self.program + self.program_args | ||
def start(self, n): | ||||
"""Start n instances of the program using mpiexec.""" | ||||
self.n = n | ||||
MinRK
|
r5696 | return super(MPILauncher, self).start() | ||
MinRK
|
r3605 | |||
MinRK
|
r5696 | class MPIControllerLauncher(MPILauncher, ControllerMixin): | ||
MinRK
|
r3605 | """Launch a controller using mpiexec.""" | ||
MinRK
|
r4848 | # alias back to *non-configurable* program[_args] for use in find_args() | ||
# this way all Controller/EngineSetLaunchers have the same form, rather | ||||
# than *some* having `program_args` and others `controller_args` | ||||
@property | ||||
def program(self): | ||||
return self.controller_cmd | ||||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r4848 | @property | ||
def program_args(self): | ||||
return self.cluster_args + self.controller_args | ||||
MinRK
|
r3605 | |||
MinRK
|
r4848 | def start(self): | ||
MinRK
|
r3992 | """Start the controller by profile_dir.""" | ||
MinRK
|
r5696 | return super(MPIControllerLauncher, self).start(1) | ||
MinRK
|
r3605 | |||
MinRK
|
r5696 | class MPIEngineSetLauncher(MPILauncher, EngineMixin): | ||
MinRK
|
r4848 | """Launch engines using mpiexec""" | ||
MinRK
|
r3605 | |||
MinRK
|
r4848 | # alias back to *non-configurable* program[_args] for use in find_args() | ||
# this way all Controller/EngineSetLaunchers have the same form, rather | ||||
# than *some* having `program_args` and others `controller_args` | ||||
@property | ||||
def program(self): | ||||
return self.engine_cmd | ||||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r4848 | @property | ||
def program_args(self): | ||||
return self.cluster_args + self.engine_args | ||||
MinRK
|
r3605 | |||
MinRK
|
r4848 | def start(self, n): | ||
MinRK
|
r3992 | """Start n engines by profile or profile_dir.""" | ||
MinRK
|
r3605 | self.n = n | ||
MinRK
|
r5696 | return super(MPIEngineSetLauncher, self).start(n) | ||
# deprecated MPIExec names | ||||
class DeprecatedMPILauncher(object): | ||||
def warn(self): | ||||
oldname = self.__class__.__name__ | ||||
newname = oldname.replace('MPIExec', 'MPI') | ||||
self.log.warn("WARNING: %s name is deprecated, use %s", oldname, newname) | ||||
class MPIExecLauncher(MPILauncher, DeprecatedMPILauncher): | ||||
"""Deprecated, use MPILauncher""" | ||||
def __init__(self, *args, **kwargs): | ||||
super(MPIExecLauncher, self).__init__(*args, **kwargs) | ||||
self.warn() | ||||
class MPIExecControllerLauncher(MPIControllerLauncher, DeprecatedMPILauncher): | ||||
"""Deprecated, use MPIControllerLauncher""" | ||||
def __init__(self, *args, **kwargs): | ||||
super(MPIExecControllerLauncher, self).__init__(*args, **kwargs) | ||||
self.warn() | ||||
class MPIExecEngineSetLauncher(MPIEngineSetLauncher, DeprecatedMPILauncher): | ||||
"""Deprecated, use MPIEngineSetLauncher""" | ||||
def __init__(self, *args, **kwargs): | ||||
super(MPIExecEngineSetLauncher, self).__init__(*args, **kwargs) | ||||
self.warn() | ||||
MinRK
|
r3605 | |||
#----------------------------------------------------------------------------- | ||||
# SSH launchers | ||||
#----------------------------------------------------------------------------- | ||||
MinRK
|
r4019 | # TODO: Get SSH Launcher back to level of sshx in 0.10.2 | ||
MinRK
|
r3605 | |||
class SSHLauncher(LocalProcessLauncher): | ||||
"""A minimal launcher for ssh. | ||||
To be useful this will probably have to be extended to use the ``sshx`` | ||||
idea for environment variables. There could be other things this needs | ||||
as well. | ||||
""" | ||||
MinRK
|
r3985 | ssh_cmd = List(['ssh'], config=True, | ||
help="command for starting ssh") | ||||
ssh_args = List(['-tt'], config=True, | ||||
help="args to pass to ssh") | ||||
MinRK
|
r6418 | scp_cmd = List(['scp'], config=True, | ||
help="command for sending files") | ||||
MinRK
|
r4848 | program = List(['date'], | ||
MinRK
|
r3985 | help="Program to launch via ssh") | ||
MinRK
|
r4848 | program_args = List([], | ||
MinRK
|
r3985 | help="args to pass to remote program") | ||
MinRK
|
r3988 | hostname = Unicode('', config=True, | ||
MinRK
|
r3985 | help="hostname on which to launch the program") | ||
MinRK
|
r3988 | user = Unicode('', config=True, | ||
MinRK
|
r3985 | help="username for ssh") | ||
MinRK
|
r3988 | location = Unicode('', config=True, | ||
MinRK
|
r3985 | help="user@hostname location for ssh in one setting") | ||
MinRK
|
r6418 | to_fetch = List([], config=True, | ||
help="List of (remote, local) files to fetch after starting") | ||||
to_send = List([], config=True, | ||||
help="List of (local, remote) files to send before starting") | ||||
MinRK
|
r3605 | |||
def _hostname_changed(self, name, old, new): | ||||
MinRK
|
r3647 | if self.user: | ||
MinRK
|
r3659 | self.location = u'%s@%s' % (self.user, new) | ||
MinRK
|
r3647 | else: | ||
self.location = new | ||||
MinRK
|
r3605 | |||
def _user_changed(self, name, old, new): | ||||
MinRK
|
r3659 | self.location = u'%s@%s' % (new, self.hostname) | ||
MinRK
|
r3605 | |||
def find_args(self): | ||||
return self.ssh_cmd + self.ssh_args + [self.location] + \ | ||||
self.program + self.program_args | ||||
MinRK
|
r6418 | |||
def _send_file(self, local, remote): | ||||
"""send a single file""" | ||||
remote = "%s:%s" % (self.location, remote) | ||||
for i in range(10): | ||||
if not os.path.exists(local): | ||||
self.log.debug("waiting for %s" % local) | ||||
time.sleep(1) | ||||
else: | ||||
break | ||||
self.log.info("sending %s to %s", local, remote) | ||||
check_output(self.scp_cmd + [local, remote]) | ||||
def send_files(self): | ||||
MinRK
|
r6619 | """send our files (called before start)""" | ||
if not self.to_send: | ||||
MinRK
|
r6418 | return | ||
for local_file, remote_file in self.to_send: | ||||
self._send_file(local_file, remote_file) | ||||
def _fetch_file(self, remote, local): | ||||
MinRK
|
r6619 | """fetch a single file""" | ||
MinRK
|
r6418 | full_remote = "%s:%s" % (self.location, remote) | ||
self.log.info("fetching %s from %s", local, full_remote) | ||||
for i in range(10): | ||||
# wait up to 10s for remote file to exist | ||||
check = check_output(self.ssh_cmd + self.ssh_args + \ | ||||
[self.location, 'test -e', remote, "&& echo 'yes' || echo 'no'"]) | ||||
check = check.strip() | ||||
MinRK
|
r6619 | if check == 'no': | ||
MinRK
|
r6418 | time.sleep(1) | ||
MinRK
|
r6619 | elif check == 'yes': | ||
MinRK
|
r6418 | break | ||
check_output(self.scp_cmd + [full_remote, local]) | ||||
def fetch_files(self): | ||||
MinRK
|
r6619 | """fetch remote files (called after start)""" | ||
if not self.to_fetch: | ||||
MinRK
|
r6418 | return | ||
for remote_file, local_file in self.to_fetch: | ||||
self._fetch_file(remote_file, local_file) | ||||
MinRK
|
r3605 | |||
MinRK
|
r4848 | def start(self, hostname=None, user=None): | ||
MinRK
|
r3605 | if hostname is not None: | ||
self.hostname = hostname | ||||
if user is not None: | ||||
self.user = user | ||||
MinRK
|
r6418 | |||
self.send_files() | ||||
super(SSHLauncher, self).start() | ||||
self.fetch_files() | ||||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r3615 | def signal(self, sig): | ||
if self.state == 'running': | ||||
# send escaped ssh connection-closer | ||||
self.process.stdin.write('~.') | ||||
self.process.stdin.flush() | ||||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r6418 | class SSHClusterLauncher(SSHLauncher): | ||
remote_profile_dir = Unicode('', config=True, | ||||
help="""The remote profile_dir to use. | ||||
If not specified, use calling profile, stripping out possible leading homedir. | ||||
""") | ||||
MinRK
|
r7997 | def _remote_profile_dir_default(self): | ||
MinRK
|
r6418 | """turns /home/you/.ipython/profile_foo into .ipython/profile_foo | ||
""" | ||||
home = get_home_dir() | ||||
if not home.endswith('/'): | ||||
home = home+'/' | ||||
if self.profile_dir.startswith(home): | ||||
return self.profile_dir[len(home):] | ||||
else: | ||||
return self.profile_dir | ||||
def _cluster_id_changed(self, name, old, new): | ||||
if new: | ||||
raise ValueError("cluster id not supported by SSH launchers") | ||||
@property | ||||
def cluster_args(self): | ||||
return ['--profile-dir', self.remote_profile_dir] | ||||
MinRK
|
r3605 | |||
MinRK
|
r6418 | class SSHControllerLauncher(SSHClusterLauncher, ControllerMixin): | ||
MinRK
|
r4848 | |||
# alias back to *non-configurable* program[_args] for use in find_args() | ||||
# this way all Controller/EngineSetLaunchers have the same form, rather | ||||
# than *some* having `program_args` and others `controller_args` | ||||
MinRK
|
r6418 | |||
def _controller_cmd_default(self): | ||||
return ['ipcontroller'] | ||||
MinRK
|
r4848 | @property | ||
def program(self): | ||||
return self.controller_cmd | ||||
MinRK
|
r6418 | |||
MinRK
|
r4848 | @property | ||
def program_args(self): | ||||
return self.cluster_args + self.controller_args | ||||
MinRK
|
r3605 | |||
MinRK
|
r6418 | def _to_fetch_default(self): | ||
return [ | ||||
(os.path.join(self.remote_profile_dir, 'security', cf), | ||||
os.path.join(self.profile_dir, 'security', cf),) | ||||
for cf in ('ipcontroller-client.json', 'ipcontroller-engine.json') | ||||
] | ||||
MinRK
|
r3605 | |||
MinRK
|
r6418 | class SSHEngineLauncher(SSHClusterLauncher, EngineMixin): | ||
MinRK
|
r3605 | |||
MinRK
|
r4848 | # alias back to *non-configurable* program[_args] for use in find_args() | ||
# this way all Controller/EngineSetLaunchers have the same form, rather | ||||
# than *some* having `program_args` and others `controller_args` | ||||
MinRK
|
r6418 | |||
def _engine_cmd_default(self): | ||||
return ['ipengine'] | ||||
MinRK
|
r4848 | @property | ||
def program(self): | ||||
return self.engine_cmd | ||||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r4848 | @property | ||
def program_args(self): | ||||
return self.cluster_args + self.engine_args | ||||
MinRK
|
r6418 | |||
def _to_send_default(self): | ||||
return [ | ||||
(os.path.join(self.profile_dir, 'security', cf), | ||||
os.path.join(self.remote_profile_dir, 'security', cf)) | ||||
for cf in ('ipcontroller-client.json', 'ipcontroller-engine.json') | ||||
] | ||||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r3605 | class SSHEngineSetLauncher(LocalEngineSetLauncher): | ||
launcher_class = SSHEngineLauncher | ||||
MinRK
|
r3985 | engines = Dict(config=True, | ||
help="""dict of engines to launch. This is a dict by hostname of ints, | ||||
corresponding to the number of engines to start on that host.""") | ||||
MinRK
|
r5754 | |||
@property | ||||
def engine_count(self): | ||||
"""determine engine count from `engines` dict""" | ||||
count = 0 | ||||
for n in self.engines.itervalues(): | ||||
if isinstance(n, (tuple,list)): | ||||
n,args = n | ||||
count += n | ||||
return count | ||||
MinRK
|
r4848 | def start(self, n): | ||
MinRK
|
r3992 | """Start engines by profile or profile_dir. | ||
MinRK
|
r3647 | `n` is ignored, and the `engines` config property is used instead. | ||
""" | ||||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r3647 | dlist = [] | ||
for host, n in self.engines.iteritems(): | ||||
if isinstance(n, (tuple, list)): | ||||
n, args = n | ||||
else: | ||||
args = copy.deepcopy(self.engine_args) | ||||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r3647 | if '@' in host: | ||
user,host = host.split('@',1) | ||||
else: | ||||
user=None | ||||
for i in range(n): | ||||
MinRK
|
r4587 | if i > 0: | ||
time.sleep(self.delay) | ||||
MinRK
|
r4848 | el = self.launcher_class(work_dir=self.work_dir, config=self.config, log=self.log, | ||
profile_dir=self.profile_dir, cluster_id=self.cluster_id, | ||||
) | ||||
MinRK
|
r6418 | if i > 0: | ||
# only send files for the first engine on each host | ||||
el.to_send = [] | ||||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r3647 | # Copy the engine args over to each engine launcher. | ||
MinRK
|
r4848 | el.engine_cmd = self.engine_cmd | ||
el.engine_args = args | ||||
MinRK
|
r3647 | el.on_stop(self._notice_engine_stopped) | ||
MinRK
|
r4848 | d = el.start(user=user, hostname=host) | ||
MinRK
|
r5181 | self.launchers[ "%s/%i" % (host,i) ] = el | ||
MinRK
|
r3647 | dlist.append(d) | ||
self.notify_start(dlist) | ||||
return dlist | ||||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r3605 | |||
MinRK
|
r6418 | class SSHProxyEngineSetLauncher(SSHClusterLauncher): | ||
"""Launcher for calling | ||||
`ipcluster engines` on a remote machine. | ||||
Requires that remote profile is already configured. | ||||
""" | ||||
n = Integer() | ||||
ipcluster_cmd = List(['ipcluster'], config=True) | ||||
@property | ||||
def program(self): | ||||
return self.ipcluster_cmd + ['engines'] | ||||
@property | ||||
def program_args(self): | ||||
return ['-n', str(self.n), '--profile-dir', self.remote_profile_dir] | ||||
def _to_send_default(self): | ||||
return [ | ||||
(os.path.join(self.profile_dir, 'security', cf), | ||||
os.path.join(self.remote_profile_dir, 'security', cf)) | ||||
for cf in ('ipcontroller-client.json', 'ipcontroller-engine.json') | ||||
] | ||||
def start(self, n): | ||||
self.n = n | ||||
super(SSHProxyEngineSetLauncher, self).start() | ||||
MinRK
|
r3605 | |||
#----------------------------------------------------------------------------- | ||||
# Windows HPC Server 2008 scheduler launchers | ||||
#----------------------------------------------------------------------------- | ||||
MinRK
|
r3613 | # This is only used on Windows. | ||
def find_job_cmd(): | ||||
MinRK
|
r3771 | if WINDOWS: | ||
MinRK
|
r3613 | try: | ||
return find_cmd('job') | ||||
MinRK
|
r3770 | except (FindCmdError, ImportError): | ||
# ImportError will be raised if win32api is not installed | ||||
MinRK
|
r3613 | return 'job' | ||
else: | ||||
return 'job' | ||||
class WindowsHPCLauncher(BaseLauncher): | ||||
Bradley M. Froehle
|
r6748 | job_id_regexp = CRegExp(r'\d+', config=True, | ||
MinRK
|
r3985 | help="""A regular expression used to get the job id from the output of the | ||
submit_command. """ | ||||
) | ||||
MinRK
|
r3988 | job_file_name = Unicode(u'ipython_job.xml', config=True, | ||
MinRK
|
r3985 | help="The filename of the instantiated job script.") | ||
MinRK
|
r3613 | # The full path to the instantiated job script. This gets made dynamically | ||
# by combining the work_dir with the job_file_name. | ||||
MinRK
|
r3988 | job_file = Unicode(u'') | ||
scheduler = Unicode('', config=True, | ||||
MinRK
|
r3985 | help="The hostname of the scheduler to submit the job to.") | ||
MinRK
|
r3988 | job_cmd = Unicode(find_job_cmd(), config=True, | ||
MinRK
|
r3985 | help="The command for submitting jobs.") | ||
MinRK
|
r3613 | |||
MinRK
|
r3615 | def __init__(self, work_dir=u'.', config=None, **kwargs): | ||
MinRK
|
r3613 | super(WindowsHPCLauncher, self).__init__( | ||
MinRK
|
r3615 | work_dir=work_dir, config=config, **kwargs | ||
MinRK
|
r3613 | ) | ||
@property | ||||
def job_file(self): | ||||
return os.path.join(self.work_dir, self.job_file_name) | ||||
def write_job_file(self, n): | ||||
raise NotImplementedError("Implement write_job_file in a subclass.") | ||||
def find_args(self): | ||||
MinRK
|
r3659 | return [u'job.exe'] | ||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r3613 | def parse_job_id(self, output): | ||
"""Take the output of the submit command and return the job id.""" | ||||
Bradley M. Froehle
|
r6748 | m = self.job_id_regexp.search(output) | ||
MinRK
|
r3613 | if m is not None: | ||
job_id = m.group() | ||||
else: | ||||
raise LauncherError("Job id couldn't be determined: %s" % output) | ||||
self.job_id = job_id | ||||
MinRK
|
r5755 | self.log.info('Job started with id: %r', job_id) | ||
MinRK
|
r3613 | return job_id | ||
def start(self, n): | ||||
"""Start n copies of the process using the Win HPC job scheduler.""" | ||||
self.write_job_file(n) | ||||
args = [ | ||||
'submit', | ||||
'/jobfile:%s' % self.job_file, | ||||
'/scheduler:%s' % self.scheduler | ||||
] | ||||
MinRK
|
r5754 | self.log.debug("Starting Win HPC Job: %s" % (self.job_cmd + ' ' + ' '.join(args),)) | ||
MinRK
|
r4019 | |||
MinRK
|
r3613 | output = check_output([self.job_cmd]+args, | ||
env=os.environ, | ||||
cwd=self.work_dir, | ||||
stderr=STDOUT | ||||
) | ||||
job_id = self.parse_job_id(output) | ||||
MinRK
|
r3647 | self.notify_start(job_id) | ||
MinRK
|
r3613 | return job_id | ||
def stop(self): | ||||
args = [ | ||||
'cancel', | ||||
self.job_id, | ||||
'/scheduler:%s' % self.scheduler | ||||
] | ||||
self.log.info("Stopping Win HPC Job: %s" % (self.job_cmd + ' ' + ' '.join(args),)) | ||||
try: | ||||
output = check_output([self.job_cmd]+args, | ||||
env=os.environ, | ||||
cwd=self.work_dir, | ||||
stderr=STDOUT | ||||
) | ||||
except: | ||||
output = 'The job already appears to be stoppped: %r' % self.job_id | ||||
MinRK
|
r3647 | self.notify_stop(dict(job_id=self.job_id, output=output)) # Pass the output of the kill cmd | ||
MinRK
|
r3613 | return output | ||
MinRK
|
r4848 | class WindowsHPCControllerLauncher(WindowsHPCLauncher, ClusterAppMixin): | ||
MinRK
|
r3613 | |||
MinRK
|
r3988 | job_file_name = Unicode(u'ipcontroller_job.xml', config=True, | ||
MinRK
|
r3985 | help="WinHPC xml job file.") | ||
MinRK
|
r4848 | controller_args = List([], config=False, | ||
MinRK
|
r3985 | help="extra args to pass to ipcontroller") | ||
MinRK
|
r3613 | |||
def write_job_file(self, n): | ||||
job = IPControllerJob(config=self.config) | ||||
t = IPControllerTask(config=self.config) | ||||
Bernardo B. Marques
|
r4872 | # The tasks work directory is *not* the actual work directory of | ||
MinRK
|
r3613 | # the controller. It is used as the base path for the stdout/stderr | ||
# files that the scheduler redirects to. | ||||
MinRK
|
r3992 | t.work_directory = self.profile_dir | ||
# Add the profile_dir and from self.start(). | ||||
MinRK
|
r4848 | t.controller_args.extend(self.cluster_args) | ||
t.controller_args.extend(self.controller_args) | ||||
MinRK
|
r3613 | job.add_task(t) | ||
MinRK
|
r5755 | self.log.debug("Writing job description file: %s", self.job_file) | ||
MinRK
|
r3613 | job.write(self.job_file) | ||
@property | ||||
def job_file(self): | ||||
MinRK
|
r3992 | return os.path.join(self.profile_dir, self.job_file_name) | ||
MinRK
|
r3613 | |||
MinRK
|
r4848 | def start(self): | ||
MinRK
|
r3992 | """Start the controller by profile_dir.""" | ||
MinRK
|
r3613 | return super(WindowsHPCControllerLauncher, self).start(1) | ||
MinRK
|
r4848 | class WindowsHPCEngineSetLauncher(WindowsHPCLauncher, ClusterAppMixin): | ||
MinRK
|
r3613 | |||
MinRK
|
r3988 | job_file_name = Unicode(u'ipengineset_job.xml', config=True, | ||
MinRK
|
r3985 | help="jobfile for ipengines job") | ||
MinRK
|
r4848 | engine_args = List([], config=False, | ||
MinRK
|
r3985 | help="extra args to pas to ipengine") | ||
MinRK
|
r3613 | |||
def write_job_file(self, n): | ||||
job = IPEngineSetJob(config=self.config) | ||||
for i in range(n): | ||||
t = IPEngineTask(config=self.config) | ||||
Bernardo B. Marques
|
r4872 | # The tasks work directory is *not* the actual work directory of | ||
MinRK
|
r3613 | # the engine. It is used as the base path for the stdout/stderr | ||
# files that the scheduler redirects to. | ||||
MinRK
|
r3992 | t.work_directory = self.profile_dir | ||
# Add the profile_dir and from self.start(). | ||||
MinRK
|
r5484 | t.engine_args.extend(self.cluster_args) | ||
t.engine_args.extend(self.engine_args) | ||||
MinRK
|
r3613 | job.add_task(t) | ||
MinRK
|
r5755 | self.log.debug("Writing job description file: %s", self.job_file) | ||
MinRK
|
r3613 | job.write(self.job_file) | ||
@property | ||||
def job_file(self): | ||||
MinRK
|
r3992 | return os.path.join(self.profile_dir, self.job_file_name) | ||
MinRK
|
r3613 | |||
MinRK
|
r4848 | def start(self, n): | ||
MinRK
|
r3992 | """Start the controller by profile_dir.""" | ||
MinRK
|
r3613 | return super(WindowsHPCEngineSetLauncher, self).start(n) | ||
#----------------------------------------------------------------------------- | ||||
# Batch (PBS) system launchers | ||||
#----------------------------------------------------------------------------- | ||||
MinRK
|
r4848 | class BatchClusterAppMixin(ClusterAppMixin): | ||
MinRK
|
r4850 | """ClusterApp mixin that updates the self.context dict, rather than cl-args.""" | ||
MinRK
|
r4848 | def _profile_dir_changed(self, name, old, new): | ||
self.context[name] = new | ||||
_cluster_id_changed = _profile_dir_changed | ||||
MinRK
|
r4850 | def _profile_dir_default(self): | ||
self.context['profile_dir'] = '' | ||||
return '' | ||||
def _cluster_id_default(self): | ||||
self.context['cluster_id'] = '' | ||||
return '' | ||||
MinRK
|
r3613 | class BatchSystemLauncher(BaseLauncher): | ||
"""Launch an external process using a batch system. | ||||
This class is designed to work with UNIX batch systems like PBS, LSF, | ||||
GridEngine, etc. The overall model is that there are different commands | ||||
like qsub, qdel, etc. that handle the starting and stopping of the process. | ||||
This class also has the notion of a batch script. The ``batch_template`` | ||||
attribute can be set to a string that is a template for the batch script. | ||||
Thomas Kluyver
|
r4003 | This template is instantiated using string formatting. Thus the template can | ||
use {n} fot the number of instances. Subclasses can add additional variables | ||||
MinRK
|
r3613 | to the template dict. | ||
""" | ||||
# Subclasses must fill these in. See PBSEngineSet | ||||
MinRK
|
r3985 | submit_command = List([''], config=True, | ||
help="The name of the command line program used to submit jobs.") | ||||
delete_command = List([''], config=True, | ||||
help="The name of the command line program used to delete jobs.") | ||||
Bradley M. Froehle
|
r6748 | job_id_regexp = CRegExp('', config=True, | ||
MinRK
|
r3985 | help="""A regular expression used to get the job id from the output of the | ||
submit_command.""") | ||||
MinRK
|
r3988 | batch_template = Unicode('', config=True, | ||
MinRK
|
r3985 | help="The string that is the batch script template itself.") | ||
MinRK
|
r3988 | batch_template_file = Unicode(u'', config=True, | ||
MinRK
|
r3985 | help="The file that contains the batch template.") | ||
MinRK
|
r3988 | batch_file_name = Unicode(u'batch_script', config=True, | ||
MinRK
|
r3985 | help="The filename of the instantiated batch script.") | ||
MinRK
|
r3988 | queue = Unicode(u'', config=True, | ||
MinRK
|
r3985 | help="The PBS Queue.") | ||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r4848 | def _queue_changed(self, name, old, new): | ||
self.context[name] = new | ||||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r5344 | n = Integer(1) | ||
MinRK
|
r4848 | _n_changed = _queue_changed | ||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r3659 | # not configurable, override in subclasses | ||
# PBS Job Array regex | ||||
Bradley M. Froehle
|
r6748 | job_array_regexp = CRegExp('') | ||
MinRK
|
r3988 | job_array_template = Unicode('') | ||
MinRK
|
r3659 | # PBS Queue regex | ||
Bradley M. Froehle
|
r6748 | queue_regexp = CRegExp('') | ||
MinRK
|
r3988 | queue_template = Unicode('') | ||
MinRK
|
r3659 | # The default batch template, override in subclasses | ||
MinRK
|
r3988 | default_template = Unicode('') | ||
MinRK
|
r3613 | # The full path to the instantiated batch script. | ||
MinRK
|
r3988 | batch_file = Unicode(u'') | ||
MinRK
|
r3645 | # the format dict used with batch_template: | ||
context = Dict() | ||||
MinRK
|
r5305 | def _context_default(self): | ||
"""load the default context with the default values for the basic keys | ||||
because the _trait_changed methods only load the context if they | ||||
are set to something other than the default value. | ||||
""" | ||||
return dict(n=1, queue=u'', profile_dir=u'', cluster_id=u'') | ||||
MinRK
|
r4004 | # the Formatter instance for rendering the templates: | ||
formatter = Instance(EvalFormatter, (), {}) | ||||
MinRK
|
r3613 | |||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r3645 | def find_args(self): | ||
MinRK
|
r3659 | return self.submit_command + [self.batch_file] | ||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r3615 | def __init__(self, work_dir=u'.', config=None, **kwargs): | ||
MinRK
|
r3613 | super(BatchSystemLauncher, self).__init__( | ||
MinRK
|
r3615 | work_dir=work_dir, config=config, **kwargs | ||
MinRK
|
r3613 | ) | ||
self.batch_file = os.path.join(self.work_dir, self.batch_file_name) | ||||
def parse_job_id(self, output): | ||||
"""Take the output of the submit command and return the job id.""" | ||||
Bradley M. Froehle
|
r6748 | m = self.job_id_regexp.search(output) | ||
MinRK
|
r3613 | if m is not None: | ||
job_id = m.group() | ||||
else: | ||||
raise LauncherError("Job id couldn't be determined: %s" % output) | ||||
self.job_id = job_id | ||||
MinRK
|
r5755 | self.log.info('Job submitted with job id: %r', job_id) | ||
MinRK
|
r3613 | return job_id | ||
def write_batch_script(self, n): | ||||
"""Instantiate and write the batch script to the work_dir.""" | ||||
MinRK
|
r4848 | self.n = n | ||
MinRK
|
r3659 | # first priority is batch_template if set | ||
if self.batch_template_file and not self.batch_template: | ||||
# second priority is batch_template_file | ||||
with open(self.batch_template_file) as f: | ||||
self.batch_template = f.read() | ||||
if not self.batch_template: | ||||
# third (last) priority is default_template | ||||
self.batch_template = self.default_template | ||||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r4183 | # add jobarray or queue lines to user-specified template | ||
# note that this is *only* when user did not specify a template. | ||||
Bradley M. Froehle
|
r6748 | # print self.job_array_regexp.search(self.batch_template) | ||
if not self.job_array_regexp.search(self.batch_template): | ||||
MinRK
|
r5755 | self.log.debug("adding job array settings to batch script") | ||
MinRK
|
r4183 | firstline, rest = self.batch_template.split('\n',1) | ||
self.batch_template = u'\n'.join([firstline, self.job_array_template, rest]) | ||||
Bernardo B. Marques
|
r4872 | |||
Bradley M. Froehle
|
r6748 | # print self.queue_regexp.search(self.batch_template) | ||
if self.queue and not self.queue_regexp.search(self.batch_template): | ||||
MinRK
|
r5755 | self.log.debug("adding PBS queue settings to batch script") | ||
MinRK
|
r4183 | firstline, rest = self.batch_template.split('\n',1) | ||
self.batch_template = u'\n'.join([firstline, self.queue_template, rest]) | ||||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r4004 | script_as_string = self.formatter.format(self.batch_template, **self.context) | ||
MinRK
|
r5755 | self.log.debug('Writing batch script: %s', self.batch_file) | ||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r3659 | with open(self.batch_file, 'w') as f: | ||
f.write(script_as_string) | ||||
os.chmod(self.batch_file, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR) | ||||
MinRK
|
r3613 | |||
MinRK
|
r4848 | def start(self, n): | ||
MinRK
|
r3613 | """Start n copies of the process using a batch system.""" | ||
MinRK
|
r5754 | self.log.debug("Starting %s: %r", self.__class__.__name__, self.args) | ||
Thomas Kluyver
|
r4003 | # Here we save profile_dir in the context so they | ||
# can be used in the batch script template as {profile_dir} | ||||
MinRK
|
r3613 | self.write_batch_script(n) | ||
MinRK
|
r3647 | output = check_output(self.args, env=os.environ) | ||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r3613 | job_id = self.parse_job_id(output) | ||
MinRK
|
r3647 | self.notify_start(job_id) | ||
MinRK
|
r3613 | return job_id | ||
def stop(self): | ||||
MinRK
|
r3659 | output = check_output(self.delete_command+[self.job_id], env=os.environ) | ||
MinRK
|
r3647 | self.notify_stop(dict(job_id=self.job_id, output=output)) # Pass the output of the kill cmd | ||
MinRK
|
r3613 | return output | ||
class PBSLauncher(BatchSystemLauncher): | ||||
"""A BatchSystemLauncher subclass for PBS.""" | ||||
MinRK
|
r3985 | submit_command = List(['qsub'], config=True, | ||
help="The PBS submit command ['qsub']") | ||||
delete_command = List(['qdel'], config=True, | ||||
help="The PBS delete command ['qsub']") | ||||
Bradley M. Froehle
|
r6748 | job_id_regexp = CRegExp(r'\d+', config=True, | ||
MinRK
|
r3985 | help="Regular expresion for identifying the job ID [r'\d+']") | ||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r3988 | batch_file = Unicode(u'') | ||
Bradley M. Froehle
|
r6748 | job_array_regexp = CRegExp('#PBS\W+-t\W+[\w\d\-\$]+') | ||
Thomas Kluyver
|
r4003 | job_array_template = Unicode('#PBS -t 1-{n}') | ||
Bradley M. Froehle
|
r6748 | queue_regexp = CRegExp('#PBS\W+-q\W+\$?\w+') | ||
Thomas Kluyver
|
r4003 | queue_template = Unicode('#PBS -q {queue}') | ||
MinRK
|
r3613 | |||
MinRK
|
r4850 | class PBSControllerLauncher(PBSLauncher, BatchClusterAppMixin): | ||
MinRK
|
r3613 | """Launch a controller using PBS.""" | ||
MinRK
|
r3988 | batch_file_name = Unicode(u'pbs_controller', config=True, | ||
MinRK
|
r3985 | help="batch file name for the controller job.") | ||
MinRK
|
r3988 | default_template= Unicode("""#!/bin/sh | ||
MinRK
|
r3659 | #PBS -V | ||
MinRK
|
r3672 | #PBS -N ipcontroller | ||
MinRK
|
r4848 | %s --log-to-file --profile-dir="{profile_dir}" --cluster-id="{cluster_id}" | ||
MinRK
|
r7995 | """%(' '.join(map(pipes.quote, ipcontroller_cmd_argv)))) | ||
MinRK
|
r3613 | |||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r4848 | def start(self): | ||
MinRK
|
r3992 | """Start the controller by profile or profile_dir.""" | ||
MinRK
|
r4848 | return super(PBSControllerLauncher, self).start(1) | ||
MinRK
|
r3613 | |||
MinRK
|
r4850 | class PBSEngineSetLauncher(PBSLauncher, BatchClusterAppMixin): | ||
MinRK
|
r3659 | """Launch Engines using PBS""" | ||
MinRK
|
r3988 | batch_file_name = Unicode(u'pbs_engines', config=True, | ||
MinRK
|
r3985 | help="batch file name for the engine(s) job.") | ||
MinRK
|
r3988 | default_template= Unicode(u"""#!/bin/sh | ||
MinRK
|
r3659 | #PBS -V | ||
MinRK
|
r3672 | #PBS -N ipengine | ||
MinRK
|
r4848 | %s --profile-dir="{profile_dir}" --cluster-id="{cluster_id}" | ||
MinRK
|
r7995 | """%(' '.join(map(pipes.quote,ipengine_cmd_argv)))) | ||
MinRK
|
r3613 | |||
MinRK
|
r4848 | def start(self, n): | ||
MinRK
|
r3992 | """Start n engines by profile or profile_dir.""" | ||
MinRK
|
r4848 | return super(PBSEngineSetLauncher, self).start(n) | ||
MinRK
|
r3605 | |||
MinRK
|
r3659 | #SGE is very similar to PBS | ||
class SGELauncher(PBSLauncher): | ||||
"""Sun GridEngine is a PBS clone with slightly different syntax""" | ||||
Bradley M. Froehle
|
r6748 | job_array_regexp = CRegExp('#\$\W+\-t') | ||
Thomas Kluyver
|
r4003 | job_array_template = Unicode('#$ -t 1-{n}') | ||
Bradley M. Froehle
|
r6748 | queue_regexp = CRegExp('#\$\W+-q\W+\$?\w+') | ||
MinRK
|
r4089 | queue_template = Unicode('#$ -q {queue}') | ||
MinRK
|
r3659 | |||
MinRK
|
r4850 | class SGEControllerLauncher(SGELauncher, BatchClusterAppMixin): | ||
MinRK
|
r3659 | """Launch a controller using SGE.""" | ||
MinRK
|
r3988 | batch_file_name = Unicode(u'sge_controller', config=True, | ||
MinRK
|
r3985 | help="batch file name for the ipontroller job.") | ||
Thomas Kluyver
|
r4003 | default_template= Unicode(u"""#$ -V | ||
#$ -S /bin/sh | ||||
#$ -N ipcontroller | ||||
MinRK
|
r4848 | %s --log-to-file --profile-dir="{profile_dir}" --cluster-id="{cluster_id}" | ||
MinRK
|
r7995 | """%(' '.join(map(pipes.quote, ipcontroller_cmd_argv)))) | ||
MinRK
|
r3659 | |||
MinRK
|
r4848 | def start(self): | ||
MinRK
|
r3992 | """Start the controller by profile or profile_dir.""" | ||
MinRK
|
r4848 | return super(SGEControllerLauncher, self).start(1) | ||
MinRK
|
r3659 | |||
MinRK
|
r4850 | class SGEEngineSetLauncher(SGELauncher, BatchClusterAppMixin): | ||
MinRK
|
r3659 | """Launch Engines with SGE""" | ||
MinRK
|
r3988 | batch_file_name = Unicode(u'sge_engines', config=True, | ||
MinRK
|
r3985 | help="batch file name for the engine(s) job.") | ||
Thomas Kluyver
|
r4003 | default_template = Unicode("""#$ -V | ||
#$ -S /bin/sh | ||||
#$ -N ipengine | ||||
MinRK
|
r4848 | %s --profile-dir="{profile_dir}" --cluster-id="{cluster_id}" | ||
MinRK
|
r7995 | """%(' '.join(map(pipes.quote, ipengine_cmd_argv)))) | ||
MinRK
|
r3659 | |||
MinRK
|
r4848 | def start(self, n): | ||
MinRK
|
r3992 | """Start n engines by profile or profile_dir.""" | ||
MinRK
|
r4848 | return super(SGEEngineSetLauncher, self).start(n) | ||
MinRK
|
r3659 | |||
MinRK
|
r3605 | |||
Johann Cohen-Tanugi
|
r4229 | # LSF launchers | ||
class LSFLauncher(BatchSystemLauncher): | ||||
"""A BatchSystemLauncher subclass for LSF.""" | ||||
Bernardo B. Marques
|
r4872 | |||
Johann Cohen-Tanugi
|
r4229 | submit_command = List(['bsub'], config=True, | ||
help="The PBS submit command ['bsub']") | ||||
delete_command = List(['bkill'], config=True, | ||||
help="The PBS delete command ['bkill']") | ||||
Bradley M. Froehle
|
r6748 | job_id_regexp = CRegExp(r'\d+', config=True, | ||
Johann Cohen-Tanugi
|
r4229 | help="Regular expresion for identifying the job ID [r'\d+']") | ||
Bernardo B. Marques
|
r4872 | |||
Johann Cohen-Tanugi
|
r4229 | batch_file = Unicode(u'') | ||
Bradley M. Froehle
|
r6748 | job_array_regexp = CRegExp('#BSUB[ \t]-J+\w+\[\d+-\d+\]') | ||
Johann Cohen-Tanugi
|
r4229 | job_array_template = Unicode('#BSUB -J ipengine[1-{n}]') | ||
Bradley M. Froehle
|
r6748 | queue_regexp = CRegExp('#BSUB[ \t]+-q[ \t]+\w+') | ||
Johann Cohen-Tanugi
|
r4229 | queue_template = Unicode('#BSUB -q {queue}') | ||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r4848 | def start(self, n): | ||
Johann Cohen-Tanugi
|
r4229 | """Start n copies of the process using LSF batch system. | ||
This cant inherit from the base class because bsub expects | ||||
to be piped a shell script in order to honor the #BSUB directives : | ||||
bsub < script | ||||
""" | ||||
# Here we save profile_dir in the context so they | ||||
# can be used in the batch script template as {profile_dir} | ||||
self.write_batch_script(n) | ||||
#output = check_output(self.args, env=os.environ) | ||||
piped_cmd = self.args[0]+'<\"'+self.args[1]+'\"' | ||||
MinRK
|
r5754 | self.log.debug("Starting %s: %s", self.__class__.__name__, piped_cmd) | ||
Johann Cohen-Tanugi
|
r4229 | p = Popen(piped_cmd, shell=True,env=os.environ,stdout=PIPE) | ||
output,err = p.communicate() | ||||
job_id = self.parse_job_id(output) | ||||
self.notify_start(job_id) | ||||
return job_id | ||||
MinRK
|
r4850 | class LSFControllerLauncher(LSFLauncher, BatchClusterAppMixin): | ||
Johann Cohen-Tanugi
|
r4229 | """Launch a controller using LSF.""" | ||
Bernardo B. Marques
|
r4872 | |||
Johann Cohen-Tanugi
|
r4229 | batch_file_name = Unicode(u'lsf_controller', config=True, | ||
help="batch file name for the controller job.") | ||||
default_template= Unicode("""#!/bin/sh | ||||
#BSUB -J ipcontroller | ||||
Bernardo B. Marques
|
r4872 | #BSUB -oo ipcontroller.o.%%J | ||
#BSUB -eo ipcontroller.e.%%J | ||||
MinRK
|
r4848 | %s --log-to-file --profile-dir="{profile_dir}" --cluster-id="{cluster_id}" | ||
MinRK
|
r7995 | """%(' '.join(map(pipes.quote,ipcontroller_cmd_argv)))) | ||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r4848 | def start(self): | ||
Johann Cohen-Tanugi
|
r4229 | """Start the controller by profile or profile_dir.""" | ||
MinRK
|
r4848 | return super(LSFControllerLauncher, self).start(1) | ||
Johann Cohen-Tanugi
|
r4229 | |||
MinRK
|
r4850 | class LSFEngineSetLauncher(LSFLauncher, BatchClusterAppMixin): | ||
Johann Cohen-Tanugi
|
r4229 | """Launch Engines using LSF""" | ||
batch_file_name = Unicode(u'lsf_engines', config=True, | ||||
help="batch file name for the engine(s) job.") | ||||
default_template= Unicode(u"""#!/bin/sh | ||||
Bernardo B. Marques
|
r4872 | #BSUB -oo ipengine.o.%%J | ||
#BSUB -eo ipengine.e.%%J | ||||
MinRK
|
r4848 | %s --profile-dir="{profile_dir}" --cluster-id="{cluster_id}" | ||
MinRK
|
r7995 | """%(' '.join(map(pipes.quote, ipengine_cmd_argv)))) | ||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r4848 | def start(self, n): | ||
Johann Cohen-Tanugi
|
r4229 | """Start n engines by profile or profile_dir.""" | ||
MinRK
|
r4848 | return super(LSFEngineSetLauncher, self).start(n) | ||
Johann Cohen-Tanugi
|
r4229 | |||
MinRK
|
r3605 | #----------------------------------------------------------------------------- | ||
# A launcher for ipcluster itself! | ||||
#----------------------------------------------------------------------------- | ||||
class IPClusterLauncher(LocalProcessLauncher): | ||||
"""Launch the ipcluster program in an external process.""" | ||||
MinRK
|
r3985 | ipcluster_cmd = List(ipcluster_cmd_argv, config=True, | ||
help="Popen command for ipcluster") | ||||
MinRK
|
r3605 | ipcluster_args = List( | ||
Brian Granger
|
r6191 | ['--clean-logs=True', '--log-to-file', '--log-level=%i'%logging.INFO], config=True, | ||
MinRK
|
r3985 | help="Command line arguments to pass to ipcluster.") | ||
MinRK
|
r3988 | ipcluster_subcommand = Unicode('start') | ||
Brian Granger
|
r6199 | profile = Unicode('default') | ||
n = Integer(2) | ||||
MinRK
|
r3605 | |||
def find_args(self): | ||||
MinRK
|
r4197 | return self.ipcluster_cmd + [self.ipcluster_subcommand] + \ | ||
Brian Granger
|
r6199 | ['--n=%i'%self.n, '--profile=%s'%self.profile] + \ | ||
Brian Granger
|
r6191 | self.ipcluster_args | ||
MinRK
|
r3605 | |||
def start(self): | ||||
return super(IPClusterLauncher, self).start() | ||||
MinRK
|
r3985 | #----------------------------------------------------------------------------- | ||
# Collections of launchers | ||||
#----------------------------------------------------------------------------- | ||||
local_launchers = [ | ||||
LocalControllerLauncher, | ||||
LocalEngineLauncher, | ||||
LocalEngineSetLauncher, | ||||
] | ||||
mpi_launchers = [ | ||||
MinRK
|
r5696 | MPILauncher, | ||
MPIControllerLauncher, | ||||
MPIEngineSetLauncher, | ||||
MinRK
|
r3985 | ] | ||
ssh_launchers = [ | ||||
SSHLauncher, | ||||
SSHControllerLauncher, | ||||
SSHEngineLauncher, | ||||
SSHEngineSetLauncher, | ||||
] | ||||
winhpc_launchers = [ | ||||
WindowsHPCLauncher, | ||||
WindowsHPCControllerLauncher, | ||||
WindowsHPCEngineSetLauncher, | ||||
] | ||||
pbs_launchers = [ | ||||
PBSLauncher, | ||||
PBSControllerLauncher, | ||||
PBSEngineSetLauncher, | ||||
] | ||||
sge_launchers = [ | ||||
SGELauncher, | ||||
SGEControllerLauncher, | ||||
SGEEngineSetLauncher, | ||||
] | ||||
Johann Cohen-Tanugi
|
r4229 | lsf_launchers = [ | ||
LSFLauncher, | ||||
LSFControllerLauncher, | ||||
LSFEngineSetLauncher, | ||||
] | ||||
MinRK
|
r3985 | all_launchers = local_launchers + mpi_launchers + ssh_launchers + winhpc_launchers\ | ||
Johann Cohen-Tanugi
|
r4229 | + pbs_launchers + sge_launchers + lsf_launchers | ||
MinRK
|
r4019 | |||