##// END OF EJS Templates
Miscellaneous docs fixes
Miscellaneous docs fixes

File last commit:

r13361:52dae015
r13597:db0ba1df
Show More
launcher.py
1450 lines | 50.7 KiB | text/x-python | PythonLexer
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 # encoding: utf-8
"""
Facilities for launching IPython processes asynchronously.
MinRK
update recently changed modules with Authors in docstring
r4018
Authors:
* Brian Granger
* MinRK
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 """
#-----------------------------------------------------------------------------
MinRK
update recently changed modules with Authors in docstring
r4018 # Copyright (C) 2008-2011 The IPython Development Team
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 #
# Distributed under the terms of the BSD License. The full license is in
# the file COPYING, distributed as part of this software.
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Imports
#-----------------------------------------------------------------------------
MinRK
adjustments to PBS/SGE, SSH Launchers + docs update
r3647 import copy
MinRK
resort imports in a cleaner order
r3631 import logging
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 import os
Guy Haskin Fernald
Added pipes.quote for batch system lauchers. Fixes bug where LSF would not launch engines or controller.
r7845 import pipes
MinRK
Update PBS/SGE launchers with 0.10.1 options and defaults
r3659 import stat
MinRK
use [sys.exe, "-c", "…launch_new_instance()"] in launchers...
r6894 import sys
Ben Edwards
Added import time to IPython/parallel/apps/launcher.py...
r4667 import time
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
MinRK
improve process cleanup on Windows...
r3778 # signal imports, handling various platforms, versions
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 from signal import SIGINT, SIGTERM
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 try:
from signal import SIGKILL
except ImportError:
MinRK
improve process cleanup on Windows...
r3778 # Windows
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 SIGKILL=SIGTERM
MinRK
improve process cleanup on Windows...
r3778 try:
# Windows >= 2.7, 3.2
from signal import CTRL_C_EVENT as SIGINT
except ImportError:
pass
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 from subprocess import Popen, PIPE, STDOUT
try:
MinRK
add ipcluster engines; fix ssh process shutdown
r3615 from subprocess import check_output
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 except ImportError:
MinRK
adjustments to PBS/SGE, SSH Launchers + docs update
r3647 # pre-2.7, define check_output with Popen
MinRK
add ipcluster engines; fix ssh process shutdown
r3615 def check_output(*args, **kwargs):
MinRK
adjustments to PBS/SGE, SSH Launchers + docs update
r3647 kwargs.update(dict(stdout=PIPE))
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 p = Popen(*args, **kwargs)
out,err = p.communicate()
return out
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
from zmq.eventloop import ioloop
MinRK
get default logger from Application.instance()
r4012 from IPython.config.application import Application
MinRK
add LoggingConfigurable base class
r4016 from IPython.config.configurable import LoggingConfigurable
MinRK
add EvalFormatter for batch system (PBS) launcher templates...
r4004 from IPython.utils.text import EvalFormatter
MinRK
add cluster_id support to ipcluster/launchers...
r4848 from IPython.utils.traitlets import (
Bradley M. Froehle
Use CRegExp trait for regular expressions.
r6748 Any, Integer, CFloat, List, Unicode, Dict, Instance, HasTraits, CRegExp
MinRK
add cluster_id support to ipcluster/launchers...
r4848 )
MinRK
decode subprocess output in launchers...
r10658 from IPython.utils.encoding import DEFAULT_ENCODING
MinRK
use [sys.exe, "-c", "…launch_new_instance()"] in launchers...
r6894 from IPython.utils.path import get_home_dir
from IPython.utils.process import find_cmd, FindCmdError
Thomas Kluyver
Fix references to dict.iteritems and dict.itervalues
r13361 from IPython.utils.py3compat import iteritems, itervalues
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
MinRK
forward subprocess IO over zmq on Windows...
r3771 from .win32support import forward_read_events
MinRK
improve process cleanup on Windows...
r3778 from .winhpcjob import IPControllerTask, IPEngineTask, IPControllerJob, IPEngineSetJob
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
MinRK
forward subprocess IO over zmq on Windows...
r3771 WINDOWS = os.name == 'nt'
MinRK
interrupt windows subprocesses with CTRL-C instead of SIGINT...
r3772
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 #-----------------------------------------------------------------------------
# Paths to the kernel apps
#-----------------------------------------------------------------------------
MinRK
use [sys.exe, "-c", "…launch_new_instance()"] in launchers...
r6894 cmd = "from IPython.parallel.apps.%s import launch_new_instance; launch_new_instance()"
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
MinRK
use [sys.exe, "-c", "…launch_new_instance()"] in launchers...
r6894 ipcluster_cmd_argv = [sys.executable, "-c", cmd % "ipclusterapp"]
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
MinRK
use [sys.exe, "-c", "…launch_new_instance()"] in launchers...
r6894 ipengine_cmd_argv = [sys.executable, "-c", cmd % "ipengineapp"]
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
MinRK
use [sys.exe, "-c", "…launch_new_instance()"] in launchers...
r6894 ipcontroller_cmd_argv = [sys.executable, "-c", cmd % "ipcontrollerapp"]
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
#-----------------------------------------------------------------------------
# Base launchers and errors
#-----------------------------------------------------------------------------
class LauncherError(Exception):
pass
class ProcessStateError(LauncherError):
pass
class UnknownStatus(LauncherError):
pass
MinRK
add LoggingConfigurable base class
r4016 class BaseLauncher(LoggingConfigurable):
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 """An asbtraction for starting, stopping and signaling a process."""
# In all of the launchers, the work_dir is where child processes will be
MinRK
update parallel apps to use ProfileDir
r3992 # run. This will usually be the profile_dir, but may not be. any work_dir
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 # passed into the __init__ method will override the config value.
# This should not be used to set the work_dir for the actual engine
# and controller. Instead, use their own config files or the
# controller_args, engine_args attributes of the launchers to add
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 # the work_dir option.
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 work_dir = Unicode(u'.')
loop = Instance('zmq.eventloop.ioloop.IOLoop')
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
adjustments to PBS/SGE, SSH Launchers + docs update
r3647 start_data = Any()
stop_data = Any()
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 def _loop_default(self):
return ioloop.IOLoop.instance()
MinRK
rework logging connections
r3610 def __init__(self, work_dir=u'.', config=None, **kwargs):
super(BaseLauncher, self).__init__(work_dir=work_dir, config=config, **kwargs)
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 self.state = 'before' # can be before, running, after
self.stop_callbacks = []
self.start_data = None
self.stop_data = None
@property
def args(self):
"""A list of cmd and args that will be used to start the process.
This is what is passed to :func:`spawnProcess` and the first element
will be the process name.
"""
return self.find_args()
def find_args(self):
"""The ``.args`` property calls this to find the args list.
Subcommand should implement this to construct the cmd and args.
"""
raise NotImplementedError('find_args must be implemented in a subclass')
@property
def arg_str(self):
"""The string form of the program arguments."""
return ' '.join(self.args)
@property
def running(self):
"""Am I running."""
if self.state == 'running':
return True
else:
return False
def start(self):
MinRK
scrub twisted/deferred references from launchers...
r4019 """Start the process."""
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 raise NotImplementedError('start must be implemented in a subclass')
def stop(self):
"""Stop the process and notify observers of stopping.
MinRK
scrub twisted/deferred references from launchers...
r4019 This method will return None immediately.
To observe the actual process stopping, see :meth:`on_stop`.
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 """
raise NotImplementedError('stop must be implemented in a subclass')
def on_stop(self, f):
MinRK
scrub twisted/deferred references from launchers...
r4019 """Register a callback to be called with this Launcher's stop_data
when the process actually finishes.
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 """
if self.state=='after':
return f(self.stop_data)
else:
self.stop_callbacks.append(f)
def notify_start(self, data):
"""Call this to trigger startup actions.
This logs the process startup and sets the state to 'running'. It is
a pass-through so it can be used as a callback.
"""
MinRK
ipcluster logging adjustments...
r5754 self.log.debug('Process %r started: %r', self.args[0], data)
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 self.start_data = data
self.state = 'running'
return data
def notify_stop(self, data):
"""Call this to trigger process stop actions.
This logs the process stopping and sets the state to 'after'. Call
MinRK
scrub twisted/deferred references from launchers...
r4019 this to trigger callbacks registered via :meth:`on_stop`."""
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
MinRK
ipcluster logging adjustments...
r5754 self.log.debug('Process %r stopped: %r', self.args[0], data)
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 self.stop_data = data
self.state = 'after'
for i in range(len(self.stop_callbacks)):
d = self.stop_callbacks.pop()
d(data)
return data
def signal(self, sig):
"""Signal the process.
Parameters
----------
sig : str or int
'KILL', 'INT', etc., or any signal number
"""
raise NotImplementedError('signal must be implemented in a subclass')
MinRK
add cluster_id support to ipcluster/launchers...
r4848 class ClusterAppMixin(HasTraits):
"""MixIn for cluster args as traits"""
profile_dir=Unicode('')
cluster_id=Unicode('')
MinRK
incremental improvements to SSH launchers...
r6418
@property
def cluster_args(self):
return ['--profile-dir', self.profile_dir, '--cluster-id', self.cluster_id]
MinRK
add cluster_id support to ipcluster/launchers...
r4848
class ControllerMixin(ClusterAppMixin):
controller_cmd = List(ipcontroller_cmd_argv, config=True,
help="""Popen command to launch ipcontroller.""")
# Command line arguments to ipcontroller.
MinRK
parallel.apps cleanup per review...
r4850 controller_args = List(['--log-to-file','--log-level=%i' % logging.INFO], config=True,
MinRK
add cluster_id support to ipcluster/launchers...
r4848 help="""command-line args to pass to ipcontroller""")
class EngineMixin(ClusterAppMixin):
engine_cmd = List(ipengine_cmd_argv, config=True,
help="""command to launch the Engine.""")
# Command line arguments for ipengine.
MinRK
parallel.apps cleanup per review...
r4850 engine_args = List(['--log-to-file','--log-level=%i' % logging.INFO], config=True,
MinRK
add cluster_id support to ipcluster/launchers...
r4848 help="command-line arguments to pass to ipengine"
)
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
MinRK
incremental improvements to SSH launchers...
r6418
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 #-----------------------------------------------------------------------------
# Local process launchers
#-----------------------------------------------------------------------------
class LocalProcessLauncher(BaseLauncher):
"""Start and stop an external process in an asynchronous manner.
This will launch the external process with a working directory of
``self.work_dir``.
"""
Bernardo B. Marques
remove all trailling spaces
r4872 # This is used to to construct self.args, which is passed to
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 # spawnProcess.
cmd_and_args = List([])
MinRK
add Integer traitlet...
r5344 poll_frequency = Integer(100) # in ms
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
MinRK
rework logging connections
r3610 def __init__(self, work_dir=u'.', config=None, **kwargs):
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 super(LocalProcessLauncher, self).__init__(
MinRK
rework logging connections
r3610 work_dir=work_dir, config=config, **kwargs
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 )
self.process = None
self.poller = None
def find_args(self):
return self.cmd_and_args
def start(self):
MinRK
ipcluster logging adjustments...
r5754 self.log.debug("Starting %s: %r", self.__class__.__name__, self.args)
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 if self.state == 'before':
self.process = Popen(self.args,
stdout=PIPE,stderr=PIPE,stdin=PIPE,
env=os.environ,
cwd=self.work_dir
)
MinRK
forward subprocess IO over zmq on Windows...
r3771 if WINDOWS:
self.stdout = forward_read_events(self.process.stdout)
self.stderr = forward_read_events(self.process.stderr)
else:
self.stdout = self.process.stdout.fileno()
self.stderr = self.process.stderr.fileno()
self.loop.add_handler(self.stdout, self.handle_stdout, self.loop.READ)
self.loop.add_handler(self.stderr, self.handle_stderr, self.loop.READ)
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 self.poller = ioloop.PeriodicCallback(self.poll, self.poll_frequency, self.loop)
self.poller.start()
self.notify_start(self.process.pid)
else:
s = 'The process was already started and has state: %r' % self.state
raise ProcessStateError(s)
def stop(self):
return self.interrupt_then_kill()
def signal(self, sig):
if self.state == 'running':
MinRK
improve process cleanup on Windows...
r3778 if WINDOWS and sig != SIGINT:
# use Windows tree-kill for better child cleanup
check_output(['taskkill', '-pid', str(self.process.pid), '-t', '-f'])
else:
self.process.send_signal(sig)
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
def interrupt_then_kill(self, delay=2.0):
"""Send INT, wait a delay and then send KILL."""
MinRK
improve process cleanup on Windows...
r3778 try:
self.signal(SIGINT)
except Exception:
self.log.debug("interrupt failed")
pass
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 self.killer = ioloop.DelayedCallback(lambda : self.signal(SIGKILL), delay*1000, self.loop)
self.killer.start()
# callbacks, etc:
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 def handle_stdout(self, fd, events):
MinRK
forward subprocess IO over zmq on Windows...
r3771 if WINDOWS:
line = self.stdout.recv()
else:
line = self.process.stdout.readline()
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 # a stopped process will be readable but return empty strings
if line:
MinRK
ipcluster logging adjustments...
r5754 self.log.debug(line[:-1])
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 else:
self.poll()
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 def handle_stderr(self, fd, events):
MinRK
forward subprocess IO over zmq on Windows...
r3771 if WINDOWS:
line = self.stderr.recv()
else:
line = self.process.stderr.readline()
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 # a stopped process will be readable but return empty strings
if line:
MinRK
ipcluster logging adjustments...
r5754 self.log.debug(line[:-1])
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 else:
self.poll()
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 def poll(self):
status = self.process.poll()
if status is not None:
self.poller.stop()
MinRK
forward subprocess IO over zmq on Windows...
r3771 self.loop.remove_handler(self.stdout)
self.loop.remove_handler(self.stderr)
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 self.notify_stop(dict(exit_code=status, pid=self.process.pid))
return status
MinRK
add cluster_id support to ipcluster/launchers...
r4848 class LocalControllerLauncher(LocalProcessLauncher, ControllerMixin):
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 """Launch a controller as a regular external process."""
def find_args(self):
MinRK
add cluster_id support to ipcluster/launchers...
r4848 return self.controller_cmd + self.cluster_args + self.controller_args
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
MinRK
add cluster_id support to ipcluster/launchers...
r4848 def start(self):
MinRK
update parallel apps to use ProfileDir
r3992 """Start the controller by profile_dir."""
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 return super(LocalControllerLauncher, self).start()
MinRK
add cluster_id support to ipcluster/launchers...
r4848 class LocalEngineLauncher(LocalProcessLauncher, EngineMixin):
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 """Launch a single engine as a regular externall process."""
def find_args(self):
MinRK
add cluster_id support to ipcluster/launchers...
r4848 return self.engine_cmd + self.cluster_args + self.engine_args
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
MinRK
add cluster_id support to ipcluster/launchers...
r4848 class LocalEngineSetLauncher(LocalEngineLauncher):
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 """Launch a set of engines as regular external processes."""
MinRK
add delay configurable to EngineSetLaunchers...
r4587 delay = CFloat(0.1, config=True,
help="""delay (in seconds) between starting each engine after the first.
This can help force the engines to get their ids in order, or limit
process flood when starting many engines."""
)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 # launcher class
launcher_class = LocalEngineLauncher
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
adjustments to PBS/SGE, SSH Launchers + docs update
r3647 launchers = Dict()
stop_data = Dict()
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
rework logging connections
r3610 def __init__(self, work_dir=u'.', config=None, **kwargs):
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 super(LocalEngineSetLauncher, self).__init__(
MinRK
rework logging connections
r3610 work_dir=work_dir, config=config, **kwargs
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 )
self.stop_data = {}
MinRK
add cluster_id support to ipcluster/launchers...
r4848 def start(self, n):
MinRK
update parallel apps to use ProfileDir
r3992 """Start n engines by profile or profile_dir."""
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 dlist = []
for i in range(n):
MinRK
add delay configurable to EngineSetLaunchers...
r4587 if i > 0:
time.sleep(self.delay)
MinRK
use `parent=self` throughout IPython...
r11064 el = self.launcher_class(work_dir=self.work_dir, parent=self, log=self.log,
MinRK
add cluster_id support to ipcluster/launchers...
r4848 profile_dir=self.profile_dir, cluster_id=self.cluster_id,
)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 # Copy the engine args over to each engine launcher.
MinRK
add cluster_id support to ipcluster/launchers...
r4848 el.engine_cmd = copy.deepcopy(self.engine_cmd)
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 el.engine_args = copy.deepcopy(self.engine_args)
el.on_stop(self._notice_engine_stopped)
MinRK
add cluster_id support to ipcluster/launchers...
r4848 d = el.start()
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 self.launchers[i] = el
dlist.append(d)
self.notify_start(dlist)
return dlist
def find_args(self):
return ['engine set']
def signal(self, sig):
dlist = []
Thomas Kluyver
Fix references to dict.iteritems and dict.itervalues
r13361 for el in itervalues(self.launchers):
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 d = el.signal(sig)
dlist.append(d)
return dlist
def interrupt_then_kill(self, delay=1.0):
dlist = []
Thomas Kluyver
Fix references to dict.iteritems and dict.itervalues
r13361 for el in itervalues(self.launchers):
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 d = el.interrupt_then_kill(delay)
dlist.append(d)
return dlist
def stop(self):
return self.interrupt_then_kill()
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 def _notice_engine_stopped(self, data):
pid = data['pid']
Thomas Kluyver
Fix references to dict.iteritems and dict.itervalues
r13361 for idx,el in iteritems(self.launchers):
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 if el.process.pid == pid:
break
self.launchers.pop(idx)
self.stop_data[idx] = data
if not self.launchers:
self.notify_stop(self.stop_data)
#-----------------------------------------------------------------------------
MinRK
rename MPIExecLaunchers to MPILaunchers...
r5696 # MPI launchers
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 #-----------------------------------------------------------------------------
MinRK
rename MPIExecLaunchers to MPILaunchers...
r5696 class MPILauncher(LocalProcessLauncher):
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 """Launch an external process using mpiexec."""
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 mpi_cmd = List(['mpiexec'], config=True,
help="The mpiexec command to use in starting the process."
)
mpi_args = List([], config=True,
help="The command line arguments to pass to mpiexec."
)
MinRK
add cluster_id support to ipcluster/launchers...
r4848 program = List(['date'],
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="The program to start via mpiexec.")
MinRK
add cluster_id support to ipcluster/launchers...
r4848 program_args = List([],
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="The command line argument to the program."
)
MinRK
add Integer traitlet...
r5344 n = Integer(1)
MinRK
rename MPIExecLaunchers to MPILaunchers...
r5696
def __init__(self, *args, **kwargs):
# deprecation for old MPIExec names:
config = kwargs.get('config', {})
for oldname in ('MPIExecLauncher', 'MPIExecControllerLauncher', 'MPIExecEngineSetLauncher'):
deprecated = config.get(oldname)
if deprecated:
newname = oldname.replace('MPIExec', 'MPI')
config[newname].update(deprecated)
self.log.warn("WARNING: %s name has been deprecated, use %s", oldname, newname)
super(MPILauncher, self).__init__(*args, **kwargs)
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
def find_args(self):
"""Build self.args using all the fields."""
MinRK
adjustments to PBS/SGE, SSH Launchers + docs update
r3647 return self.mpi_cmd + ['-n', str(self.n)] + self.mpi_args + \
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 self.program + self.program_args
def start(self, n):
"""Start n instances of the program using mpiexec."""
self.n = n
MinRK
rename MPIExecLaunchers to MPILaunchers...
r5696 return super(MPILauncher, self).start()
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
MinRK
rename MPIExecLaunchers to MPILaunchers...
r5696 class MPIControllerLauncher(MPILauncher, ControllerMixin):
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 """Launch a controller using mpiexec."""
MinRK
add cluster_id support to ipcluster/launchers...
r4848 # alias back to *non-configurable* program[_args] for use in find_args()
# this way all Controller/EngineSetLaunchers have the same form, rather
# than *some* having `program_args` and others `controller_args`
@property
def program(self):
return self.controller_cmd
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
add cluster_id support to ipcluster/launchers...
r4848 @property
def program_args(self):
return self.cluster_args + self.controller_args
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
MinRK
add cluster_id support to ipcluster/launchers...
r4848 def start(self):
MinRK
update parallel apps to use ProfileDir
r3992 """Start the controller by profile_dir."""
MinRK
rename MPIExecLaunchers to MPILaunchers...
r5696 return super(MPIControllerLauncher, self).start(1)
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
MinRK
rename MPIExecLaunchers to MPILaunchers...
r5696 class MPIEngineSetLauncher(MPILauncher, EngineMixin):
MinRK
add cluster_id support to ipcluster/launchers...
r4848 """Launch engines using mpiexec"""
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
MinRK
add cluster_id support to ipcluster/launchers...
r4848 # alias back to *non-configurable* program[_args] for use in find_args()
# this way all Controller/EngineSetLaunchers have the same form, rather
# than *some* having `program_args` and others `controller_args`
@property
def program(self):
return self.engine_cmd
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
add cluster_id support to ipcluster/launchers...
r4848 @property
def program_args(self):
return self.cluster_args + self.engine_args
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
MinRK
add cluster_id support to ipcluster/launchers...
r4848 def start(self, n):
MinRK
update parallel apps to use ProfileDir
r3992 """Start n engines by profile or profile_dir."""
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 self.n = n
MinRK
rename MPIExecLaunchers to MPILaunchers...
r5696 return super(MPIEngineSetLauncher, self).start(n)
# deprecated MPIExec names
class DeprecatedMPILauncher(object):
def warn(self):
oldname = self.__class__.__name__
newname = oldname.replace('MPIExec', 'MPI')
self.log.warn("WARNING: %s name is deprecated, use %s", oldname, newname)
class MPIExecLauncher(MPILauncher, DeprecatedMPILauncher):
"""Deprecated, use MPILauncher"""
def __init__(self, *args, **kwargs):
super(MPIExecLauncher, self).__init__(*args, **kwargs)
self.warn()
class MPIExecControllerLauncher(MPIControllerLauncher, DeprecatedMPILauncher):
"""Deprecated, use MPIControllerLauncher"""
def __init__(self, *args, **kwargs):
super(MPIExecControllerLauncher, self).__init__(*args, **kwargs)
self.warn()
class MPIExecEngineSetLauncher(MPIEngineSetLauncher, DeprecatedMPILauncher):
"""Deprecated, use MPIEngineSetLauncher"""
def __init__(self, *args, **kwargs):
super(MPIExecEngineSetLauncher, self).__init__(*args, **kwargs)
self.warn()
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
#-----------------------------------------------------------------------------
# SSH launchers
#-----------------------------------------------------------------------------
MinRK
scrub twisted/deferred references from launchers...
r4019 # TODO: Get SSH Launcher back to level of sshx in 0.10.2
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
class SSHLauncher(LocalProcessLauncher):
"""A minimal launcher for ssh.
To be useful this will probably have to be extended to use the ``sshx``
idea for environment variables. There could be other things this needs
as well.
"""
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 ssh_cmd = List(['ssh'], config=True,
help="command for starting ssh")
ssh_args = List(['-tt'], config=True,
help="args to pass to ssh")
MinRK
incremental improvements to SSH launchers...
r6418 scp_cmd = List(['scp'], config=True,
help="command for sending files")
MinRK
add cluster_id support to ipcluster/launchers...
r4848 program = List(['date'],
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="Program to launch via ssh")
MinRK
add cluster_id support to ipcluster/launchers...
r4848 program_args = List([],
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="args to pass to remote program")
MinRK
cleanup parallel traits...
r3988 hostname = Unicode('', config=True,
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="hostname on which to launch the program")
MinRK
cleanup parallel traits...
r3988 user = Unicode('', config=True,
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="username for ssh")
MinRK
cleanup parallel traits...
r3988 location = Unicode('', config=True,
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="user@hostname location for ssh in one setting")
MinRK
incremental improvements to SSH launchers...
r6418 to_fetch = List([], config=True,
help="List of (remote, local) files to fetch after starting")
to_send = List([], config=True,
help="List of (local, remote) files to send before starting")
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
def _hostname_changed(self, name, old, new):
MinRK
adjustments to PBS/SGE, SSH Launchers + docs update
r3647 if self.user:
MinRK
Update PBS/SGE launchers with 0.10.1 options and defaults
r3659 self.location = u'%s@%s' % (self.user, new)
MinRK
adjustments to PBS/SGE, SSH Launchers + docs update
r3647 else:
self.location = new
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
def _user_changed(self, name, old, new):
MinRK
Update PBS/SGE launchers with 0.10.1 options and defaults
r3659 self.location = u'%s@%s' % (new, self.hostname)
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
def find_args(self):
return self.ssh_cmd + self.ssh_args + [self.location] + \
MinRK
quote args passed through SSH in SSHLaunchers...
r8159 list(map(pipes.quote, self.program + self.program_args))
MinRK
incremental improvements to SSH launchers...
r6418
def _send_file(self, local, remote):
"""send a single file"""
remote = "%s:%s" % (self.location, remote)
for i in range(10):
if not os.path.exists(local):
self.log.debug("waiting for %s" % local)
time.sleep(1)
else:
break
self.log.info("sending %s to %s", local, remote)
check_output(self.scp_cmd + [local, remote])
def send_files(self):
MinRK
SSHProxyLauncher edits per review...
r6619 """send our files (called before start)"""
if not self.to_send:
MinRK
incremental improvements to SSH launchers...
r6418 return
for local_file, remote_file in self.to_send:
self._send_file(local_file, remote_file)
def _fetch_file(self, remote, local):
MinRK
SSHProxyLauncher edits per review...
r6619 """fetch a single file"""
MinRK
incremental improvements to SSH launchers...
r6418 full_remote = "%s:%s" % (self.location, remote)
self.log.info("fetching %s from %s", local, full_remote)
for i in range(10):
# wait up to 10s for remote file to exist
check = check_output(self.ssh_cmd + self.ssh_args + \
[self.location, 'test -e', remote, "&& echo 'yes' || echo 'no'"])
MinRK
decode subprocess output in launchers...
r10658 check = check.decode(DEFAULT_ENCODING, 'replace').strip()
if check == u'no':
MinRK
incremental improvements to SSH launchers...
r6418 time.sleep(1)
MinRK
decode subprocess output in launchers...
r10658 elif check == u'yes':
MinRK
incremental improvements to SSH launchers...
r6418 break
check_output(self.scp_cmd + [full_remote, local])
def fetch_files(self):
MinRK
SSHProxyLauncher edits per review...
r6619 """fetch remote files (called after start)"""
if not self.to_fetch:
MinRK
incremental improvements to SSH launchers...
r6418 return
for remote_file, local_file in self.to_fetch:
self._fetch_file(remote_file, local_file)
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
MinRK
add cluster_id support to ipcluster/launchers...
r4848 def start(self, hostname=None, user=None):
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 if hostname is not None:
self.hostname = hostname
if user is not None:
self.user = user
MinRK
incremental improvements to SSH launchers...
r6418
self.send_files()
super(SSHLauncher, self).start()
self.fetch_files()
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
add ipcluster engines; fix ssh process shutdown
r3615 def signal(self, sig):
if self.state == 'running':
# send escaped ssh connection-closer
self.process.stdin.write('~.')
self.process.stdin.flush()
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
add missing ClusterApp mixin to SSHClusterLauncher...
r10733 class SSHClusterLauncher(SSHLauncher, ClusterAppMixin):
MinRK
incremental improvements to SSH launchers...
r6418
remote_profile_dir = Unicode('', config=True,
help="""The remote profile_dir to use.
If not specified, use calling profile, stripping out possible leading homedir.
""")
MinRK
trigger default remote_profile_dir when profile_dir is set...
r8596
def _profile_dir_changed(self, name, old, new):
if not self.remote_profile_dir:
# trigger remote_profile_dir_default logic again,
# in case it was already triggered before profile_dir was set
self.remote_profile_dir = self._strip_home(new)
@staticmethod
def _strip_home(path):
"""turns /home/you/.ipython/profile_foo into .ipython/profile_foo"""
MinRK
incremental improvements to SSH launchers...
r6418 home = get_home_dir()
if not home.endswith('/'):
home = home+'/'
MinRK
trigger default remote_profile_dir when profile_dir is set...
r8596 if path.startswith(home):
return path[len(home):]
MinRK
incremental improvements to SSH launchers...
r6418 else:
MinRK
trigger default remote_profile_dir when profile_dir is set...
r8596 return path
def _remote_profile_dir_default(self):
return self._strip_home(self.profile_dir)
MinRK
incremental improvements to SSH launchers...
r6418
def _cluster_id_changed(self, name, old, new):
if new:
raise ValueError("cluster id not supported by SSH launchers")
@property
def cluster_args(self):
return ['--profile-dir', self.remote_profile_dir]
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
MinRK
incremental improvements to SSH launchers...
r6418 class SSHControllerLauncher(SSHClusterLauncher, ControllerMixin):
MinRK
add cluster_id support to ipcluster/launchers...
r4848
# alias back to *non-configurable* program[_args] for use in find_args()
# this way all Controller/EngineSetLaunchers have the same form, rather
# than *some* having `program_args` and others `controller_args`
MinRK
incremental improvements to SSH launchers...
r6418
def _controller_cmd_default(self):
return ['ipcontroller']
MinRK
add cluster_id support to ipcluster/launchers...
r4848 @property
def program(self):
return self.controller_cmd
MinRK
incremental improvements to SSH launchers...
r6418
MinRK
add cluster_id support to ipcluster/launchers...
r4848 @property
def program_args(self):
return self.cluster_args + self.controller_args
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
MinRK
incremental improvements to SSH launchers...
r6418 def _to_fetch_default(self):
return [
(os.path.join(self.remote_profile_dir, 'security', cf),
os.path.join(self.profile_dir, 'security', cf),)
for cf in ('ipcontroller-client.json', 'ipcontroller-engine.json')
]
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
MinRK
incremental improvements to SSH launchers...
r6418 class SSHEngineLauncher(SSHClusterLauncher, EngineMixin):
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
MinRK
add cluster_id support to ipcluster/launchers...
r4848 # alias back to *non-configurable* program[_args] for use in find_args()
# this way all Controller/EngineSetLaunchers have the same form, rather
# than *some* having `program_args` and others `controller_args`
MinRK
incremental improvements to SSH launchers...
r6418
def _engine_cmd_default(self):
return ['ipengine']
MinRK
add cluster_id support to ipcluster/launchers...
r4848 @property
def program(self):
return self.engine_cmd
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
add cluster_id support to ipcluster/launchers...
r4848 @property
def program_args(self):
return self.cluster_args + self.engine_args
MinRK
incremental improvements to SSH launchers...
r6418
def _to_send_default(self):
return [
(os.path.join(self.profile_dir, 'security', cf),
os.path.join(self.remote_profile_dir, 'security', cf))
for cf in ('ipcontroller-client.json', 'ipcontroller-engine.json')
]
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 class SSHEngineSetLauncher(LocalEngineSetLauncher):
launcher_class = SSHEngineLauncher
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 engines = Dict(config=True,
help="""dict of engines to launch. This is a dict by hostname of ints,
corresponding to the number of engines to start on that host.""")
MinRK
ipcluster logging adjustments...
r5754
MinRK
default to just `ipengine` for SSHEngineSetLauncher...
r8160 def _engine_cmd_default(self):
return ['ipengine']
MinRK
ipcluster logging adjustments...
r5754 @property
def engine_count(self):
"""determine engine count from `engines` dict"""
count = 0
Thomas Kluyver
Fix references to dict.iteritems and dict.itervalues
r13361 for n in itervalues(self.engines):
MinRK
ipcluster logging adjustments...
r5754 if isinstance(n, (tuple,list)):
n,args = n
count += n
return count
MinRK
add cluster_id support to ipcluster/launchers...
r4848 def start(self, n):
MinRK
update parallel apps to use ProfileDir
r3992 """Start engines by profile or profile_dir.
MinRK
adjustments to PBS/SGE, SSH Launchers + docs update
r3647 `n` is ignored, and the `engines` config property is used instead.
"""
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
adjustments to PBS/SGE, SSH Launchers + docs update
r3647 dlist = []
Thomas Kluyver
Fix references to dict.iteritems and dict.itervalues
r13361 for host, n in iteritems(self.engines):
MinRK
adjustments to PBS/SGE, SSH Launchers + docs update
r3647 if isinstance(n, (tuple, list)):
n, args = n
else:
args = copy.deepcopy(self.engine_args)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
adjustments to PBS/SGE, SSH Launchers + docs update
r3647 if '@' in host:
user,host = host.split('@',1)
else:
user=None
for i in range(n):
MinRK
add delay configurable to EngineSetLaunchers...
r4587 if i > 0:
time.sleep(self.delay)
MinRK
use `parent=self` throughout IPython...
r11064 el = self.launcher_class(work_dir=self.work_dir, parent=self, log=self.log,
MinRK
add cluster_id support to ipcluster/launchers...
r4848 profile_dir=self.profile_dir, cluster_id=self.cluster_id,
)
MinRK
incremental improvements to SSH launchers...
r6418 if i > 0:
# only send files for the first engine on each host
el.to_send = []
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
adjustments to PBS/SGE, SSH Launchers + docs update
r3647 # Copy the engine args over to each engine launcher.
MinRK
add cluster_id support to ipcluster/launchers...
r4848 el.engine_cmd = self.engine_cmd
el.engine_args = args
MinRK
adjustments to PBS/SGE, SSH Launchers + docs update
r3647 el.on_stop(self._notice_engine_stopped)
MinRK
add cluster_id support to ipcluster/launchers...
r4848 d = el.start(user=user, hostname=host)
MinRK
add '/' separator to keys in SSH engine dict...
r5181 self.launchers[ "%s/%i" % (host,i) ] = el
MinRK
adjustments to PBS/SGE, SSH Launchers + docs update
r3647 dlist.append(d)
self.notify_start(dlist)
return dlist
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
MinRK
incremental improvements to SSH launchers...
r6418 class SSHProxyEngineSetLauncher(SSHClusterLauncher):
"""Launcher for calling
`ipcluster engines` on a remote machine.
Requires that remote profile is already configured.
"""
n = Integer()
ipcluster_cmd = List(['ipcluster'], config=True)
@property
def program(self):
return self.ipcluster_cmd + ['engines']
@property
def program_args(self):
return ['-n', str(self.n), '--profile-dir', self.remote_profile_dir]
def _to_send_default(self):
return [
(os.path.join(self.profile_dir, 'security', cf),
os.path.join(self.remote_profile_dir, 'security', cf))
for cf in ('ipcontroller-client.json', 'ipcontroller-engine.json')
]
def start(self, n):
self.n = n
super(SSHProxyEngineSetLauncher, self).start()
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
#-----------------------------------------------------------------------------
# Windows HPC Server 2008 scheduler launchers
#-----------------------------------------------------------------------------
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 # This is only used on Windows.
def find_job_cmd():
MinRK
forward subprocess IO over zmq on Windows...
r3771 if WINDOWS:
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 try:
return find_cmd('job')
MinRK
handle potentially absent win32api in launcher.py
r3770 except (FindCmdError, ImportError):
# ImportError will be raised if win32api is not installed
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 return 'job'
else:
return 'job'
class WindowsHPCLauncher(BaseLauncher):
Bradley M. Froehle
Use CRegExp trait for regular expressions.
r6748 job_id_regexp = CRegExp(r'\d+', config=True,
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="""A regular expression used to get the job id from the output of the
submit_command. """
)
MinRK
cleanup parallel traits...
r3988 job_file_name = Unicode(u'ipython_job.xml', config=True,
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="The filename of the instantiated job script.")
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 # The full path to the instantiated job script. This gets made dynamically
# by combining the work_dir with the job_file_name.
MinRK
cleanup parallel traits...
r3988 job_file = Unicode(u'')
scheduler = Unicode('', config=True,
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="The hostname of the scheduler to submit the job to.")
MinRK
cleanup parallel traits...
r3988 job_cmd = Unicode(find_job_cmd(), config=True,
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="The command for submitting jobs.")
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613
MinRK
add ipcluster engines; fix ssh process shutdown
r3615 def __init__(self, work_dir=u'.', config=None, **kwargs):
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 super(WindowsHPCLauncher, self).__init__(
MinRK
add ipcluster engines; fix ssh process shutdown
r3615 work_dir=work_dir, config=config, **kwargs
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 )
@property
def job_file(self):
return os.path.join(self.work_dir, self.job_file_name)
def write_job_file(self, n):
raise NotImplementedError("Implement write_job_file in a subclass.")
def find_args(self):
MinRK
Update PBS/SGE launchers with 0.10.1 options and defaults
r3659 return [u'job.exe']
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 def parse_job_id(self, output):
"""Take the output of the submit command and return the job id."""
Bradley M. Froehle
Use CRegExp trait for regular expressions.
r6748 m = self.job_id_regexp.search(output)
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 if m is not None:
job_id = m.group()
else:
raise LauncherError("Job id couldn't be determined: %s" % output)
self.job_id = job_id
MinRK
Move a few PBS launcher messages from info to debug
r5755 self.log.info('Job started with id: %r', job_id)
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 return job_id
def start(self, n):
"""Start n copies of the process using the Win HPC job scheduler."""
self.write_job_file(n)
args = [
'submit',
'/jobfile:%s' % self.job_file,
'/scheduler:%s' % self.scheduler
]
MinRK
ipcluster logging adjustments...
r5754 self.log.debug("Starting Win HPC Job: %s" % (self.job_cmd + ' ' + ' '.join(args),))
MinRK
scrub twisted/deferred references from launchers...
r4019
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 output = check_output([self.job_cmd]+args,
env=os.environ,
cwd=self.work_dir,
stderr=STDOUT
)
MinRK
decode subprocess output in launchers...
r10658 output = output.decode(DEFAULT_ENCODING, 'replace')
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 job_id = self.parse_job_id(output)
MinRK
adjustments to PBS/SGE, SSH Launchers + docs update
r3647 self.notify_start(job_id)
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 return job_id
def stop(self):
args = [
'cancel',
self.job_id,
'/scheduler:%s' % self.scheduler
]
self.log.info("Stopping Win HPC Job: %s" % (self.job_cmd + ' ' + ' '.join(args),))
try:
output = check_output([self.job_cmd]+args,
env=os.environ,
cwd=self.work_dir,
stderr=STDOUT
)
MinRK
decode subprocess output in launchers...
r10658 output = output.decode(DEFAULT_ENCODING, 'replace')
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 except:
MinRK
decode subprocess output in launchers...
r10658 output = u'The job already appears to be stopped: %r' % self.job_id
MinRK
adjustments to PBS/SGE, SSH Launchers + docs update
r3647 self.notify_stop(dict(job_id=self.job_id, output=output)) # Pass the output of the kill cmd
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 return output
MinRK
add cluster_id support to ipcluster/launchers...
r4848 class WindowsHPCControllerLauncher(WindowsHPCLauncher, ClusterAppMixin):
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613
MinRK
cleanup parallel traits...
r3988 job_file_name = Unicode(u'ipcontroller_job.xml', config=True,
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="WinHPC xml job file.")
MinRK
add cluster_id support to ipcluster/launchers...
r4848 controller_args = List([], config=False,
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="extra args to pass to ipcontroller")
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613
def write_job_file(self, n):
MinRK
use `parent=self` throughout IPython...
r11064 job = IPControllerJob(parent=self)
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613
MinRK
use `parent=self` throughout IPython...
r11064 t = IPControllerTask(parent=self)
Bernardo B. Marques
remove all trailling spaces
r4872 # The tasks work directory is *not* the actual work directory of
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 # the controller. It is used as the base path for the stdout/stderr
# files that the scheduler redirects to.
MinRK
update parallel apps to use ProfileDir
r3992 t.work_directory = self.profile_dir
# Add the profile_dir and from self.start().
MinRK
add cluster_id support to ipcluster/launchers...
r4848 t.controller_args.extend(self.cluster_args)
t.controller_args.extend(self.controller_args)
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 job.add_task(t)
MinRK
Move a few PBS launcher messages from info to debug
r5755 self.log.debug("Writing job description file: %s", self.job_file)
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 job.write(self.job_file)
@property
def job_file(self):
MinRK
update parallel apps to use ProfileDir
r3992 return os.path.join(self.profile_dir, self.job_file_name)
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613
MinRK
add cluster_id support to ipcluster/launchers...
r4848 def start(self):
MinRK
update parallel apps to use ProfileDir
r3992 """Start the controller by profile_dir."""
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 return super(WindowsHPCControllerLauncher, self).start(1)
MinRK
add cluster_id support to ipcluster/launchers...
r4848 class WindowsHPCEngineSetLauncher(WindowsHPCLauncher, ClusterAppMixin):
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613
MinRK
cleanup parallel traits...
r3988 job_file_name = Unicode(u'ipengineset_job.xml', config=True,
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="jobfile for ipengines job")
MinRK
add cluster_id support to ipcluster/launchers...
r4848 engine_args = List([], config=False,
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="extra args to pas to ipengine")
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613
def write_job_file(self, n):
MinRK
use `parent=self` throughout IPython...
r11064 job = IPEngineSetJob(parent=self)
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613
for i in range(n):
MinRK
use `parent=self` throughout IPython...
r11064 t = IPEngineTask(parent=self)
Bernardo B. Marques
remove all trailling spaces
r4872 # The tasks work directory is *not* the actual work directory of
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 # the engine. It is used as the base path for the stdout/stderr
# files that the scheduler redirects to.
MinRK
update parallel apps to use ProfileDir
r3992 t.work_directory = self.profile_dir
# Add the profile_dir and from self.start().
MinRK
fix controller_args -> engine_args typo in WinHPCEngine
r5484 t.engine_args.extend(self.cluster_args)
t.engine_args.extend(self.engine_args)
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 job.add_task(t)
MinRK
Move a few PBS launcher messages from info to debug
r5755 self.log.debug("Writing job description file: %s", self.job_file)
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 job.write(self.job_file)
@property
def job_file(self):
MinRK
update parallel apps to use ProfileDir
r3992 return os.path.join(self.profile_dir, self.job_file_name)
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613
MinRK
add cluster_id support to ipcluster/launchers...
r4848 def start(self, n):
MinRK
update parallel apps to use ProfileDir
r3992 """Start the controller by profile_dir."""
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 return super(WindowsHPCEngineSetLauncher, self).start(n)
#-----------------------------------------------------------------------------
# Batch (PBS) system launchers
#-----------------------------------------------------------------------------
MinRK
add cluster_id support to ipcluster/launchers...
r4848 class BatchClusterAppMixin(ClusterAppMixin):
MinRK
parallel.apps cleanup per review...
r4850 """ClusterApp mixin that updates the self.context dict, rather than cl-args."""
MinRK
add cluster_id support to ipcluster/launchers...
r4848 def _profile_dir_changed(self, name, old, new):
self.context[name] = new
_cluster_id_changed = _profile_dir_changed
MinRK
parallel.apps cleanup per review...
r4850 def _profile_dir_default(self):
self.context['profile_dir'] = ''
return ''
def _cluster_id_default(self):
self.context['cluster_id'] = ''
return ''
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 class BatchSystemLauncher(BaseLauncher):
"""Launch an external process using a batch system.
This class is designed to work with UNIX batch systems like PBS, LSF,
GridEngine, etc. The overall model is that there are different commands
like qsub, qdel, etc. that handle the starting and stopping of the process.
This class also has the notion of a batch script. The ``batch_template``
attribute can be set to a string that is a template for the batch script.
Thomas Kluyver
Replace Itpl with str.format in parallel launcher....
r4003 This template is instantiated using string formatting. Thus the template can
use {n} fot the number of instances. Subclasses can add additional variables
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 to the template dict.
"""
# Subclasses must fill these in. See PBSEngineSet
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 submit_command = List([''], config=True,
help="The name of the command line program used to submit jobs.")
delete_command = List([''], config=True,
help="The name of the command line program used to delete jobs.")
Bradley M. Froehle
Use CRegExp trait for regular expressions.
r6748 job_id_regexp = CRegExp('', config=True,
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="""A regular expression used to get the job id from the output of the
submit_command.""")
James Booth
regexp group in BatchLauncher, no Condor -verbose...
r10991 job_id_regexp_group = Integer(0, config=True,
help="""The group we wish to match in job_id_regexp (0 to match all)""")
MinRK
cleanup parallel traits...
r3988 batch_template = Unicode('', config=True,
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="The string that is the batch script template itself.")
MinRK
cleanup parallel traits...
r3988 batch_template_file = Unicode(u'', config=True,
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="The file that contains the batch template.")
MinRK
cleanup parallel traits...
r3988 batch_file_name = Unicode(u'batch_script', config=True,
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="The filename of the instantiated batch script.")
MinRK
cleanup parallel traits...
r3988 queue = Unicode(u'', config=True,
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="The PBS Queue.")
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
add cluster_id support to ipcluster/launchers...
r4848 def _queue_changed(self, name, old, new):
self.context[name] = new
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
add Integer traitlet...
r5344 n = Integer(1)
MinRK
add cluster_id support to ipcluster/launchers...
r4848 _n_changed = _queue_changed
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Update PBS/SGE launchers with 0.10.1 options and defaults
r3659 # not configurable, override in subclasses
# PBS Job Array regex
Bradley M. Froehle
Use CRegExp trait for regular expressions.
r6748 job_array_regexp = CRegExp('')
MinRK
cleanup parallel traits...
r3988 job_array_template = Unicode('')
MinRK
Update PBS/SGE launchers with 0.10.1 options and defaults
r3659 # PBS Queue regex
Bradley M. Froehle
Use CRegExp trait for regular expressions.
r6748 queue_regexp = CRegExp('')
MinRK
cleanup parallel traits...
r3988 queue_template = Unicode('')
MinRK
Update PBS/SGE launchers with 0.10.1 options and defaults
r3659 # The default batch template, override in subclasses
MinRK
cleanup parallel traits...
r3988 default_template = Unicode('')
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 # The full path to the instantiated batch script.
MinRK
cleanup parallel traits...
r3988 batch_file = Unicode(u'')
MinRK
launcher updates for PBS
r3645 # the format dict used with batch_template:
context = Dict()
James Booth
Add Condor bindings for IPython.parallel
r10982
MinRK
fix initial context dict for batch launchers
r5305 def _context_default(self):
"""load the default context with the default values for the basic keys
because the _trait_changed methods only load the context if they
are set to something other than the default value.
"""
return dict(n=1, queue=u'', profile_dir=u'', cluster_id=u'')
MinRK
add EvalFormatter for batch system (PBS) launcher templates...
r4004 # the Formatter instance for rendering the templates:
formatter = Instance(EvalFormatter, (), {})
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613
MinRK
launcher updates for PBS
r3645 def find_args(self):
MinRK
Update PBS/SGE launchers with 0.10.1 options and defaults
r3659 return self.submit_command + [self.batch_file]
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
add ipcluster engines; fix ssh process shutdown
r3615 def __init__(self, work_dir=u'.', config=None, **kwargs):
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 super(BatchSystemLauncher, self).__init__(
MinRK
add ipcluster engines; fix ssh process shutdown
r3615 work_dir=work_dir, config=config, **kwargs
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 )
self.batch_file = os.path.join(self.work_dir, self.batch_file_name)
def parse_job_id(self, output):
"""Take the output of the submit command and return the job id."""
Bradley M. Froehle
Use CRegExp trait for regular expressions.
r6748 m = self.job_id_regexp.search(output)
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 if m is not None:
James Booth
regexp group in BatchLauncher, no Condor -verbose...
r10991 job_id = m.group(self.job_id_regexp_group)
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 else:
raise LauncherError("Job id couldn't be determined: %s" % output)
self.job_id = job_id
MinRK
Move a few PBS launcher messages from info to debug
r5755 self.log.info('Job submitted with job id: %r', job_id)
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 return job_id
def write_batch_script(self, n):
"""Instantiate and write the batch script to the work_dir."""
MinRK
add cluster_id support to ipcluster/launchers...
r4848 self.n = n
MinRK
Update PBS/SGE launchers with 0.10.1 options and defaults
r3659 # first priority is batch_template if set
if self.batch_template_file and not self.batch_template:
# second priority is batch_template_file
with open(self.batch_template_file) as f:
self.batch_template = f.read()
if not self.batch_template:
# third (last) priority is default_template
self.batch_template = self.default_template
MinRK
don't automatically add jobarray or queue lines to user template...
r4183 # add jobarray or queue lines to user-specified template
# note that this is *only* when user did not specify a template.
James Booth
Add Condor bindings for IPython.parallel
r10982 self._insert_queue_in_script()
self._insert_job_array_in_script()
MinRK
add EvalFormatter for batch system (PBS) launcher templates...
r4004 script_as_string = self.formatter.format(self.batch_template, **self.context)
MinRK
Move a few PBS launcher messages from info to debug
r5755 self.log.debug('Writing batch script: %s', self.batch_file)
MinRK
Update PBS/SGE launchers with 0.10.1 options and defaults
r3659 with open(self.batch_file, 'w') as f:
f.write(script_as_string)
os.chmod(self.batch_file, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613
James Booth
Add Condor bindings for IPython.parallel
r10982 def _insert_queue_in_script(self):
"""Inserts a queue if required into the batch script.
"""
if self.queue and not self.queue_regexp.search(self.batch_template):
self.log.debug("adding PBS queue settings to batch script")
firstline, rest = self.batch_template.split('\n',1)
self.batch_template = u'\n'.join([firstline, self.queue_template, rest])
def _insert_job_array_in_script(self):
"""Inserts a job array if required into the batch script.
"""
if not self.job_array_regexp.search(self.batch_template):
self.log.debug("adding job array settings to batch script")
firstline, rest = self.batch_template.split('\n',1)
self.batch_template = u'\n'.join([firstline, self.job_array_template, rest])
MinRK
add cluster_id support to ipcluster/launchers...
r4848 def start(self, n):
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 """Start n copies of the process using a batch system."""
MinRK
ipcluster logging adjustments...
r5754 self.log.debug("Starting %s: %r", self.__class__.__name__, self.args)
Thomas Kluyver
Replace Itpl with str.format in parallel launcher....
r4003 # Here we save profile_dir in the context so they
# can be used in the batch script template as {profile_dir}
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 self.write_batch_script(n)
MinRK
adjustments to PBS/SGE, SSH Launchers + docs update
r3647 output = check_output(self.args, env=os.environ)
MinRK
decode subprocess output in launchers...
r10658 output = output.decode(DEFAULT_ENCODING, 'replace')
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 job_id = self.parse_job_id(output)
MinRK
adjustments to PBS/SGE, SSH Launchers + docs update
r3647 self.notify_start(job_id)
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 return job_id
def stop(self):
chapmanb
Capture error messages while stopping ipython clusters using batch (qdel, bkill) commands. Helps diagnosing issues during cluster shutdown, which are otherwise silently ignored.
r11285 try:
p = Popen(self.delete_command+[self.job_id], env=os.environ,
stdout=PIPE, stderr=PIPE)
out, err = p.communicate()
output = out + err
except:
self.log.exception("Problem stopping cluster with command: %s" %
(self.delete_command + [self.job_id]))
output = ""
MinRK
decode subprocess output in launchers...
r10658 output = output.decode(DEFAULT_ENCODING, 'replace')
MinRK
adjustments to PBS/SGE, SSH Launchers + docs update
r3647 self.notify_stop(dict(job_id=self.job_id, output=output)) # Pass the output of the kill cmd
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 return output
class PBSLauncher(BatchSystemLauncher):
"""A BatchSystemLauncher subclass for PBS."""
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 submit_command = List(['qsub'], config=True,
help="The PBS submit command ['qsub']")
delete_command = List(['qdel'], config=True,
help="The PBS delete command ['qsub']")
Bradley M. Froehle
Use CRegExp trait for regular expressions.
r6748 job_id_regexp = CRegExp(r'\d+', config=True,
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="Regular expresion for identifying the job ID [r'\d+']")
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
cleanup parallel traits...
r3988 batch_file = Unicode(u'')
Bradley M. Froehle
Use CRegExp trait for regular expressions.
r6748 job_array_regexp = CRegExp('#PBS\W+-t\W+[\w\d\-\$]+')
Thomas Kluyver
Replace Itpl with str.format in parallel launcher....
r4003 job_array_template = Unicode('#PBS -t 1-{n}')
Bradley M. Froehle
Use CRegExp trait for regular expressions.
r6748 queue_regexp = CRegExp('#PBS\W+-q\W+\$?\w+')
Thomas Kluyver
Replace Itpl with str.format in parallel launcher....
r4003 queue_template = Unicode('#PBS -q {queue}')
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613
MinRK
parallel.apps cleanup per review...
r4850 class PBSControllerLauncher(PBSLauncher, BatchClusterAppMixin):
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613 """Launch a controller using PBS."""
MinRK
cleanup parallel traits...
r3988 batch_file_name = Unicode(u'pbs_controller', config=True,
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="batch file name for the controller job.")
MinRK
cleanup parallel traits...
r3988 default_template= Unicode("""#!/bin/sh
MinRK
Update PBS/SGE launchers with 0.10.1 options and defaults
r3659 #PBS -V
MinRK
rebase IPython.parallel after removal of IPython.kernel...
r3672 #PBS -N ipcontroller
MinRK
add cluster_id support to ipcluster/launchers...
r4848 %s --log-to-file --profile-dir="{profile_dir}" --cluster-id="{cluster_id}"
Guy Haskin Fernald
Added pipes.quote for batch system lauchers. Fixes bug where LSF would not launch engines or controller.
r7845 """%(' '.join(map(pipes.quote, ipcontroller_cmd_argv))))
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613
MinRK
add cluster_id support to ipcluster/launchers...
r4848 def start(self):
MinRK
update parallel apps to use ProfileDir
r3992 """Start the controller by profile or profile_dir."""
MinRK
add cluster_id support to ipcluster/launchers...
r4848 return super(PBSControllerLauncher, self).start(1)
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613
MinRK
parallel.apps cleanup per review...
r4850 class PBSEngineSetLauncher(PBSLauncher, BatchClusterAppMixin):
MinRK
Update PBS/SGE launchers with 0.10.1 options and defaults
r3659 """Launch Engines using PBS"""
MinRK
cleanup parallel traits...
r3988 batch_file_name = Unicode(u'pbs_engines', config=True,
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="batch file name for the engine(s) job.")
MinRK
cleanup parallel traits...
r3988 default_template= Unicode(u"""#!/bin/sh
MinRK
Update PBS/SGE launchers with 0.10.1 options and defaults
r3659 #PBS -V
MinRK
rebase IPython.parallel after removal of IPython.kernel...
r3672 #PBS -N ipengine
MinRK
add cluster_id support to ipcluster/launchers...
r4848 %s --profile-dir="{profile_dir}" --cluster-id="{cluster_id}"
Guy Haskin Fernald
Added pipes.quote for batch system lauchers. Fixes bug where LSF would not launch engines or controller.
r7845 """%(' '.join(map(pipes.quote,ipengine_cmd_argv))))
MinRK
untwist PBS, WinHPC Launchers in newparallel
r3613
James Booth
Add Condor bindings for IPython.parallel
r10982
MinRK
Update PBS/SGE launchers with 0.10.1 options and defaults
r3659 #SGE is very similar to PBS
class SGELauncher(PBSLauncher):
"""Sun GridEngine is a PBS clone with slightly different syntax"""
Bradley M. Froehle
Use CRegExp trait for regular expressions.
r6748 job_array_regexp = CRegExp('#\$\W+\-t')
Thomas Kluyver
Replace Itpl with str.format in parallel launcher....
r4003 job_array_template = Unicode('#$ -t 1-{n}')
Bradley M. Froehle
Use CRegExp trait for regular expressions.
r6748 queue_regexp = CRegExp('#\$\W+-q\W+\$?\w+')
MinRK
fix remaining Itpl syntax in SGE queue_template...
r4089 queue_template = Unicode('#$ -q {queue}')
MinRK
Update PBS/SGE launchers with 0.10.1 options and defaults
r3659
James Booth
Add Condor bindings for IPython.parallel
r10982
MinRK
parallel.apps cleanup per review...
r4850 class SGEControllerLauncher(SGELauncher, BatchClusterAppMixin):
MinRK
Update PBS/SGE launchers with 0.10.1 options and defaults
r3659 """Launch a controller using SGE."""
MinRK
cleanup parallel traits...
r3988 batch_file_name = Unicode(u'sge_controller', config=True,
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="batch file name for the ipontroller job.")
Thomas Kluyver
Replace Itpl with str.format in parallel launcher....
r4003 default_template= Unicode(u"""#$ -V
#$ -S /bin/sh
#$ -N ipcontroller
MinRK
add cluster_id support to ipcluster/launchers...
r4848 %s --log-to-file --profile-dir="{profile_dir}" --cluster-id="{cluster_id}"
Guy Haskin Fernald
Added pipes.quote for batch system lauchers. Fixes bug where LSF would not launch engines or controller.
r7845 """%(' '.join(map(pipes.quote, ipcontroller_cmd_argv))))
MinRK
Update PBS/SGE launchers with 0.10.1 options and defaults
r3659
MinRK
add cluster_id support to ipcluster/launchers...
r4848 def start(self):
MinRK
update parallel apps to use ProfileDir
r3992 """Start the controller by profile or profile_dir."""
MinRK
add cluster_id support to ipcluster/launchers...
r4848 return super(SGEControllerLauncher, self).start(1)
MinRK
Update PBS/SGE launchers with 0.10.1 options and defaults
r3659
James Booth
Add Condor bindings for IPython.parallel
r10982
MinRK
parallel.apps cleanup per review...
r4850 class SGEEngineSetLauncher(SGELauncher, BatchClusterAppMixin):
MinRK
Update PBS/SGE launchers with 0.10.1 options and defaults
r3659 """Launch Engines with SGE"""
MinRK
cleanup parallel traits...
r3988 batch_file_name = Unicode(u'sge_engines', config=True,
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="batch file name for the engine(s) job.")
Thomas Kluyver
Replace Itpl with str.format in parallel launcher....
r4003 default_template = Unicode("""#$ -V
#$ -S /bin/sh
#$ -N ipengine
MinRK
add cluster_id support to ipcluster/launchers...
r4848 %s --profile-dir="{profile_dir}" --cluster-id="{cluster_id}"
Guy Haskin Fernald
Added pipes.quote for batch system lauchers. Fixes bug where LSF would not launch engines or controller.
r7845 """%(' '.join(map(pipes.quote, ipengine_cmd_argv))))
MinRK
Update PBS/SGE launchers with 0.10.1 options and defaults
r3659
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
Johann Cohen-Tanugi
add LSF launcher for ipcluster
r4229 # LSF launchers
class LSFLauncher(BatchSystemLauncher):
"""A BatchSystemLauncher subclass for LSF."""
Bernardo B. Marques
remove all trailling spaces
r4872
Johann Cohen-Tanugi
add LSF launcher for ipcluster
r4229 submit_command = List(['bsub'], config=True,
help="The PBS submit command ['bsub']")
delete_command = List(['bkill'], config=True,
help="The PBS delete command ['bkill']")
Bradley M. Froehle
Use CRegExp trait for regular expressions.
r6748 job_id_regexp = CRegExp(r'\d+', config=True,
Johann Cohen-Tanugi
add LSF launcher for ipcluster
r4229 help="Regular expresion for identifying the job ID [r'\d+']")
Bernardo B. Marques
remove all trailling spaces
r4872
Johann Cohen-Tanugi
add LSF launcher for ipcluster
r4229 batch_file = Unicode(u'')
Bradley M. Froehle
Use CRegExp trait for regular expressions.
r6748 job_array_regexp = CRegExp('#BSUB[ \t]-J+\w+\[\d+-\d+\]')
Johann Cohen-Tanugi
add LSF launcher for ipcluster
r4229 job_array_template = Unicode('#BSUB -J ipengine[1-{n}]')
Bradley M. Froehle
Use CRegExp trait for regular expressions.
r6748 queue_regexp = CRegExp('#BSUB[ \t]+-q[ \t]+\w+')
Johann Cohen-Tanugi
add LSF launcher for ipcluster
r4229 queue_template = Unicode('#BSUB -q {queue}')
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
add cluster_id support to ipcluster/launchers...
r4848 def start(self, n):
Johann Cohen-Tanugi
add LSF launcher for ipcluster
r4229 """Start n copies of the process using LSF batch system.
This cant inherit from the base class because bsub expects
to be piped a shell script in order to honor the #BSUB directives :
bsub < script
"""
# Here we save profile_dir in the context so they
# can be used in the batch script template as {profile_dir}
self.write_batch_script(n)
piped_cmd = self.args[0]+'<\"'+self.args[1]+'\"'
MinRK
ipcluster logging adjustments...
r5754 self.log.debug("Starting %s: %s", self.__class__.__name__, piped_cmd)
Johann Cohen-Tanugi
add LSF launcher for ipcluster
r4229 p = Popen(piped_cmd, shell=True,env=os.environ,stdout=PIPE)
output,err = p.communicate()
MinRK
decode subprocess output in launchers...
r10658 output = output.decode(DEFAULT_ENCODING, 'replace')
Johann Cohen-Tanugi
add LSF launcher for ipcluster
r4229 job_id = self.parse_job_id(output)
self.notify_start(job_id)
return job_id
MinRK
parallel.apps cleanup per review...
r4850 class LSFControllerLauncher(LSFLauncher, BatchClusterAppMixin):
Johann Cohen-Tanugi
add LSF launcher for ipcluster
r4229 """Launch a controller using LSF."""
Bernardo B. Marques
remove all trailling spaces
r4872
Johann Cohen-Tanugi
add LSF launcher for ipcluster
r4229 batch_file_name = Unicode(u'lsf_controller', config=True,
help="batch file name for the controller job.")
default_template= Unicode("""#!/bin/sh
#BSUB -J ipcontroller
Bernardo B. Marques
remove all trailling spaces
r4872 #BSUB -oo ipcontroller.o.%%J
#BSUB -eo ipcontroller.e.%%J
MinRK
add cluster_id support to ipcluster/launchers...
r4848 %s --log-to-file --profile-dir="{profile_dir}" --cluster-id="{cluster_id}"
Guy Haskin Fernald
Added pipes.quote for batch system lauchers. Fixes bug where LSF would not launch engines or controller.
r7845 """%(' '.join(map(pipes.quote,ipcontroller_cmd_argv))))
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
add cluster_id support to ipcluster/launchers...
r4848 def start(self):
Johann Cohen-Tanugi
add LSF launcher for ipcluster
r4229 """Start the controller by profile or profile_dir."""
MinRK
add cluster_id support to ipcluster/launchers...
r4848 return super(LSFControllerLauncher, self).start(1)
Johann Cohen-Tanugi
add LSF launcher for ipcluster
r4229
MinRK
parallel.apps cleanup per review...
r4850 class LSFEngineSetLauncher(LSFLauncher, BatchClusterAppMixin):
Johann Cohen-Tanugi
add LSF launcher for ipcluster
r4229 """Launch Engines using LSF"""
batch_file_name = Unicode(u'lsf_engines', config=True,
help="batch file name for the engine(s) job.")
default_template= Unicode(u"""#!/bin/sh
Bernardo B. Marques
remove all trailling spaces
r4872 #BSUB -oo ipengine.o.%%J
#BSUB -eo ipengine.e.%%J
MinRK
add cluster_id support to ipcluster/launchers...
r4848 %s --profile-dir="{profile_dir}" --cluster-id="{cluster_id}"
Guy Haskin Fernald
Added pipes.quote for batch system lauchers. Fixes bug where LSF would not launch engines or controller.
r7845 """%(' '.join(map(pipes.quote, ipengine_cmd_argv))))
Bernardo B. Marques
remove all trailling spaces
r4872
Johann Cohen-Tanugi
add LSF launcher for ipcluster
r4229
James Booth
Add Condor bindings for IPython.parallel
r10982
James Booth
rename Condor -> HTCondor in all instances
r11004 class HTCondorLauncher(BatchSystemLauncher):
"""A BatchSystemLauncher subclass for HTCondor.
James Booth
launch ipcontroller, added documentation
r10999
James Booth
rename Condor -> HTCondor in all instances
r11004 HTCondor requires that we launch the ipengine/ipcontroller scripts rather
James Booth
launch ipcontroller, added documentation
r10999 that the python instance but otherwise is very similar to PBS. This is because
HTCondor destroys sys.executable when launching remote processes - a launched
python process depends on sys.executable to effectively evaluate its
module search paths. Without it, regardless of which python interpreter you launch
you will get the to built in module search paths.
We use the ip{cluster, engine, controller} scripts as our executable to circumvent
this - the mechanism of shebanged scripts means that the python binary will be
launched with argv[0] set to the *location of the ip{cluster, engine, controller}
James Booth
Cleaned up doc on CondorLauncher
r11000 scripts on the remote node*. This means you need to take care that:
a. Your remote nodes have their paths configured correctly, with the ipengine and ipcontroller
of the python environment you wish to execute code in having top precedence.
James Booth
launch ipcontroller, added documentation
r10999 b. This functionality is untested on Windows.
If you need different behavior, consider making you own template.
"""
James Booth
Add Condor bindings for IPython.parallel
r10982
James Booth
regexp group in BatchLauncher, no Condor -verbose...
r10991 submit_command = List(['condor_submit'], config=True,
James Booth
rename Condor -> HTCondor in all instances
r11004 help="The HTCondor submit command ['condor_submit']")
James Booth
Add Condor bindings for IPython.parallel
r10982 delete_command = List(['condor_rm'], config=True,
James Booth
rename Condor -> HTCondor in all instances
r11004 help="The HTCondor delete command ['condor_rm']")
James Booth
regexp group in BatchLauncher, no Condor -verbose...
r10991 job_id_regexp = CRegExp(r'(\d+)\.$', config=True,
help="Regular expression for identifying the job ID [r'(\d+)\.$']")
job_id_regexp_group = Integer(1, config=True,
help="""The group we wish to match in job_id_regexp [1]""")
James Booth
Add Condor bindings for IPython.parallel
r10982
job_array_regexp = CRegExp('queue\W+\$')
job_array_template = Unicode('queue {n}')
James Booth
Tided up queue configuration(no support in Condor)
r10992
James Booth
Add Condor bindings for IPython.parallel
r10982
def _insert_job_array_in_script(self):
"""Inserts a job array if required into the batch script.
"""
if not self.job_array_regexp.search(self.batch_template):
self.log.debug("adding job array settings to batch script")
James Booth
rename Condor -> HTCondor in all instances
r11004 #HTCondor requires that the job array goes at the bottom of the script
James Booth
Add Condor bindings for IPython.parallel
r10982 self.batch_template = '\n'.join([self.batch_template,
self.job_array_template])
James Booth
Tided up queue configuration(no support in Condor)
r10992 def _insert_queue_in_script(self):
James Booth
rename Condor -> HTCondor in all instances
r11004 """AFAIK, HTCondor doesn't have a concept of multiple queues that can be
James Booth
fixed typo
r11001 specified in the script.
James Booth
Tided up queue configuration(no support in Condor)
r10992 """
pass
James Booth
Add Condor bindings for IPython.parallel
r10982
James Booth
rename Condor -> HTCondor in all instances
r11004 class HTCondorControllerLauncher(HTCondorLauncher, BatchClusterAppMixin):
"""Launch a controller using HTCondor."""
James Booth
Add Condor bindings for IPython.parallel
r10982
James Booth
rename Condor -> HTCondor in all instances
r11004 batch_file_name = Unicode(u'htcondor_controller', config=True,
James Booth
Add Condor bindings for IPython.parallel
r10982 help="batch file name for the controller job.")
default_template = Unicode(r"""
universe = vanilla
James Booth
launch ipcontroller, added documentation
r10999 executable = ipcontroller
James Booth
Add Condor bindings for IPython.parallel
r10982 # by default we expect a shared file system
transfer_executable = False
arguments = --log-to-file '--profile-dir={profile_dir}' --cluster-id='{cluster_id}'
James Booth
launch ipcontroller, added documentation
r10999 """)
James Booth
Add Condor bindings for IPython.parallel
r10982
def start(self):
"""Start the controller by profile or profile_dir."""
James Booth
rename Condor -> HTCondor in all instances
r11004 return super(HTCondorControllerLauncher, self).start(1)
James Booth
Add Condor bindings for IPython.parallel
r10982
James Booth
rename Condor -> HTCondor in all instances
r11004 class HTCondorEngineSetLauncher(HTCondorLauncher, BatchClusterAppMixin):
"""Launch Engines using HTCondor"""
batch_file_name = Unicode(u'htcondor_engines', config=True,
James Booth
Add Condor bindings for IPython.parallel
r10982 help="batch file name for the engine(s) job.")
default_template = Unicode("""
universe = vanilla
James Booth
launch ipcontroller, added documentation
r10999 executable = ipengine
James Booth
Add Condor bindings for IPython.parallel
r10982 # by default we expect a shared file system
transfer_executable = False
arguments = "--log-to-file '--profile-dir={profile_dir}' '--cluster-id={cluster_id}'"
James Booth
launch ipcontroller, added documentation
r10999 """)
James Booth
Add Condor bindings for IPython.parallel
r10982
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 #-----------------------------------------------------------------------------
# A launcher for ipcluster itself!
#-----------------------------------------------------------------------------
class IPClusterLauncher(LocalProcessLauncher):
"""Launch the ipcluster program in an external process."""
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 ipcluster_cmd = List(ipcluster_cmd_argv, config=True,
help="Popen command for ipcluster")
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 ipcluster_args = List(
Brian Granger
First version of cluster web service....
r6191 ['--clean-logs=True', '--log-to-file', '--log-level=%i'%logging.INFO], config=True,
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="Command line arguments to pass to ipcluster.")
MinRK
cleanup parallel traits...
r3988 ipcluster_subcommand = Unicode('start')
Brian Granger
Notebook cluster manager now uses proper launchers.
r6199 profile = Unicode('default')
n = Integer(2)
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
def find_args(self):
MinRK
disallow no-prefix `ipython foo=bar` argument style....
r4197 return self.ipcluster_cmd + [self.ipcluster_subcommand] + \
Brian Granger
Notebook cluster manager now uses proper launchers.
r6199 ['--n=%i'%self.n, '--profile=%s'%self.profile] + \
Brian Granger
First version of cluster web service....
r6191 self.ipcluster_args
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605
def start(self):
return super(IPClusterLauncher, self).start()
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 #-----------------------------------------------------------------------------
# Collections of launchers
#-----------------------------------------------------------------------------
local_launchers = [
LocalControllerLauncher,
LocalEngineLauncher,
LocalEngineSetLauncher,
]
mpi_launchers = [
MinRK
rename MPIExecLaunchers to MPILaunchers...
r5696 MPILauncher,
MPIControllerLauncher,
MPIEngineSetLauncher,
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 ]
ssh_launchers = [
SSHLauncher,
SSHControllerLauncher,
SSHEngineLauncher,
SSHEngineSetLauncher,
MinRK
add missing ClusterApp mixin to SSHClusterLauncher...
r10733 SSHProxyEngineSetLauncher,
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 ]
winhpc_launchers = [
WindowsHPCLauncher,
WindowsHPCControllerLauncher,
WindowsHPCEngineSetLauncher,
]
pbs_launchers = [
PBSLauncher,
PBSControllerLauncher,
PBSEngineSetLauncher,
]
sge_launchers = [
SGELauncher,
SGEControllerLauncher,
SGEEngineSetLauncher,
]
Johann Cohen-Tanugi
add LSF launcher for ipcluster
r4229 lsf_launchers = [
LSFLauncher,
LSFControllerLauncher,
LSFEngineSetLauncher,
]
James Booth
rename Condor -> HTCondor in all instances
r11004 htcondor_launchers = [
HTCondorLauncher,
HTCondorControllerLauncher,
HTCondorEngineSetLauncher,
James Booth
Add Condor bindings for IPython.parallel
r10982 ]
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 all_launchers = local_launchers + mpi_launchers + ssh_launchers + winhpc_launchers\
James Booth
rename Condor -> HTCondor in all instances
r11004 + pbs_launchers + sge_launchers + lsf_launchers + htcondor_launchers