##// END OF EJS Templates
Adding files for the refactored kernel scripts.
Adding files for the refactored kernel scripts.

File last commit:

r2304:6dfefc6a
r2304:6dfefc6a
Show More
launcher.py
585 lines | 18.8 KiB | text/x-python | PythonLexer
#!/usr/bin/env python
# encoding: utf-8
"""
Facilities for launching processing asynchronously.
"""
#-----------------------------------------------------------------------------
# Copyright (C) 2008-2009 The IPython Development Team
#
# Distributed under the terms of the BSD License. The full license is in
# the file COPYING, distributed as part of this software.
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Imports
#-----------------------------------------------------------------------------
import os
import re
import sys
from IPython.core.component import Component
from IPython.external import Itpl
from IPython.utils.traitlets import Str, Int, List, Unicode
from IPython.kernel.twistedutil import gatherBoth, make_deferred, sleep_deferred
from twisted.internet import reactor, defer
from twisted.internet.defer import inlineCallbacks
from twisted.internet.protocol import ProcessProtocol
from twisted.internet.utils import getProcessOutput
from twisted.internet.error import ProcessDone, ProcessTerminated
from twisted.python import log
from twisted.python.failure import Failure
#-----------------------------------------------------------------------------
# Generic launchers
#-----------------------------------------------------------------------------
class LauncherError(Exception):
pass
class ProcessStateError(LauncherError):
pass
class UnknownStatus(LauncherError):
pass
class BaseLauncher(Component):
"""An asbtraction for starting, stopping and signaling a process."""
working_dir = Unicode(u'')
def __init__(self, working_dir, parent=None, name=None, config=None):
super(BaseLauncher, self).__init__(parent, name, config)
self.working_dir = working_dir
self.state = 'before' # can be before, running, after
self.stop_deferreds = []
self.start_data = None
self.stop_data = None
@property
def args(self):
"""A list of cmd and args that will be used to start the process."""
return self.find_args()
def find_args(self):
"""The ``.args`` property calls this to find the args list."""
raise NotImplementedError('find_args must be implemented in a subclass')
@property
def arg_str(self):
"""The string form of the program arguments."""
return ' '.join(self.args)
@property
def running(self):
if self.state == 'running':
return True
else:
return False
def start(self):
"""Start the process.
This must return a deferred that fires with information about the
process starting (like a pid, job id, etc.)
"""
return defer.fail(
Failure(NotImplementedError(
'start must be implemented in a subclass')
)
)
def stop(self):
"""Stop the process and notify observers of ProcessStopped.
This must return a deferred that fires with any errors that occur
while the process is attempting to be shut down. This deferred
won't fire when the process actually stops. These events are
handled by calling :func:`observe_stop`.
"""
return defer.fail(
Failure(NotImplementedError(
'stop must be implemented in a subclass')
)
)
def observe_stop(self):
"""Get a deferred that will fire when the process stops.
The deferred will fire with data that contains information about
the exit status of the process.
"""
if self.state=='after':
return defer.succeed(self.stop_data)
else:
d = defer.Deferred()
self.stop_deferreds.append(d)
return d
def notify_start(self, data):
"""Call this to tigger startup actions.
This logs the process startup and sets the state to running. It is
a pass-through so it can be used as a callback.
"""
log.msg('Process %r started: %r' % (self.args[0], data))
self.start_data = data
self.state = 'running'
return data
def notify_stop(self, data):
"""Call this to trigger all the deferreds from :func:`observe_stop`."""
log.msg('Process %r stopped: %r' % (self.args[0], data))
self.stop_data = data
self.state = 'after'
for i in range(len(self.stop_deferreds)):
d = self.stop_deferreds.pop()
d.callback(data)
return data
def signal(self, sig):
"""Signal the process.
Return a semi-meaningless deferred after signaling the process.
Parameters
----------
sig : str or int
'KILL', 'INT', etc., or any signal number
"""
return defer.fail(
Failure(NotImplementedError(
'signal must be implemented in a subclass')
)
)
class LocalProcessLauncherProtocol(ProcessProtocol):
"""A ProcessProtocol to go with the LocalProcessLauncher."""
def __init__(self, process_launcher):
self.process_launcher = process_launcher
self.pid = None
def connectionMade(self):
self.pid = self.transport.pid
self.process_launcher.notify_start(self.transport.pid)
def processEnded(self, status):
value = status.value
if isinstance(value, ProcessDone):
self.process_launcher.notify_stop(
{'exit_code':0,
'signal':None,
'status':None,
'pid':self.pid
}
)
elif isinstance(value, ProcessTerminated):
self.process_launcher.notify_stop(
{'exit_code':value.exitCode,
'signal':value.signal,
'status':value.status,
'pid':self.pid
}
)
else:
raise UnknownStatus("Unknown exit status, this is probably a "
"bug in Twisted")
def outReceived(self, data):
log.msg(data)
def errReceived(self, data):
log.err(data)
class LocalProcessLauncher(BaseLauncher):
"""Start and stop an external process in an asynchronous manner."""
cmd_and_args = List([])
def __init__(self, working_dir, parent=None, name=None, config=None):
super(LocalProcessLauncher, self).__init__(
working_dir, parent, name, config
)
self.process_protocol = None
self.start_deferred = None
def find_args(self):
return self.cmd_and_args
def start(self):
if self.state == 'before':
self.process_protocol = LocalProcessLauncherProtocol(self)
self.start_deferred = defer.Deferred()
self.process_transport = reactor.spawnProcess(
self.process_protocol,
str(self.args[0]),
[str(a) for a in self.args],
env=os.environ
)
return self.start_deferred
else:
s = 'The process was already started and has state: %r' % self.state
return defer.fail(ProcessStateError(s))
def notify_start(self, data):
super(LocalProcessLauncher, self).notify_start(data)
self.start_deferred.callback(data)
def stop(self):
return self.interrupt_then_kill()
@make_deferred
def signal(self, sig):
if self.state == 'running':
self.process_transport.signalProcess(sig)
@inlineCallbacks
def interrupt_then_kill(self, delay=1.0):
yield self.signal('INT')
yield sleep_deferred(delay)
yield self.signal('KILL')
class MPIExecLauncher(LocalProcessLauncher):
mpi_cmd = List(['mpiexec'], config=True)
mpi_args = List([], config=True)
program = List(['date'], config=True)
program_args = List([], config=True)
n = Int(1, config=True)
def find_args(self):
return self.mpi_cmd + ['-n', self.n] + self.mpi_args + \
self.program + self.program_args
def start(self, n):
self.n = n
return super(MPIExecLauncher, self).start()
class SSHLauncher(BaseLauncher):
"""A minimal launcher for ssh.
To be useful this will probably have to be extended to use the ``sshx``
idea for environment variables. There could be other things this needs
as well.
"""
ssh_cmd = List(['ssh'], config=True)
ssh_args = List([], config=True)
program = List(['date'], config=True)
program_args = List([], config=True)
hostname = Str('', config=True)
user = Str(os.environ['USER'], config=True)
location = Str('')
def _hostname_changed(self, name, old, new):
self.location = '%s@%s' % (self.user, new)
def _user_changed(self, name, old, new):
self.location = '%s@%s' % (new, self.hostname)
def find_args(self):
return self.ssh_cmd + self.ssh_args + [self.location] + \
self.program + self.program_args
def start(self, n, hostname=None, user=None):
if hostname is not None:
self.hostname = hostname
if user is not None:
self.user = user
return super(SSHLauncher, self).start()
class WindowsHPCLauncher(BaseLauncher):
pass
class BatchSystemLauncher(BaseLauncher):
# Subclasses must fill these in. See PBSEngineSet
submit_command = Str('', config=True)
delete_command = Str('', config=True)
job_id_regexp = Str('', config=True)
batch_template = Str('', config=True)
batch_file_name = Unicode(u'batch_script', config=True)
batch_file = Unicode(u'')
def __init__(self, working_dir, parent=None, name=None, config=None):
super(BatchSystemLauncher, self).__init__(
working_dir, parent, name, config
)
self.batch_file = os.path.join(self.working_dir, self.batch_file_name)
self.context = {}
def parse_job_id(self, output):
m = re.match(self.job_id_regexp, output)
if m is not None:
job_id = m.group()
else:
raise LauncherError("Job id couldn't be determined: %s" % output)
self.job_id = job_id
log.msg('Job started with job id: %r' % job_id)
return job_id
def write_batch_script(self, n):
self.context['n'] = n
script_as_string = Itpl.itplns(self.batch_template, self.context)
log.msg('Writing instantiated batch script: %s' % self.batch_file)
f = open(self.batch_file, 'w')
f.write(script_as_string)
f.close()
@inlineCallbacks
def start(self, n):
"""Start n copies of the process using a batch system."""
self.write_batch_script(n)
output = yield getProcessOutput(self.submit_command,
[self.batch_file], env=os.environ)
job_id = self.parse_job_id(output)
self.notify_start(job_id)
defer.returnValue(job_id)
@inlineCallbacks
def stop(self):
output = yield getProcessOutput(self.delete_command,
[self.job_id], env=os.environ
)
self.notify_stop(output) # Pass the output of the kill cmd
defer.returnValue(output)
class PBSLauncher(BatchSystemLauncher):
submit_command = Str('qsub', config=True)
delete_command = Str('qdel', config=True)
job_id_regexp = Str('\d+', config=True)
batch_template = Str('', config=True)
batch_file_name = Unicode(u'pbs_batch_script', config=True)
batch_file = Unicode(u'')
#-----------------------------------------------------------------------------
# Controller launchers
#-----------------------------------------------------------------------------
def find_controller_cmd():
if sys.platform == 'win32':
# This logic is needed because the ipcontroller script doesn't
# always get installed in the same way or in the same location.
from IPython.kernel import ipcontrollerapp
script_location = ipcontrollerapp.__file__.replace('.pyc', '.py')
# The -u option here turns on unbuffered output, which is required
# on Win32 to prevent wierd conflict and problems with Twisted.
# Also, use sys.executable to make sure we are picking up the
# right python exe.
cmd = [sys.executable, '-u', script_location]
else:
# ipcontroller has to be on the PATH in this case.
cmd = ['ipcontroller']
return cmd
class LocalControllerLauncher(LocalProcessLauncher):
controller_cmd = List(find_controller_cmd())
controller_args = List(['--log-to-file','--log-level', '40'], config=True)
def find_args(self):
return self.controller_cmd + self.controller_args
def start(self, profile=None, cluster_dir=None):
if cluster_dir is not None:
self.controller_args.extend(['--cluster-dir', cluster_dir])
if profile is not None:
self.controller_args.extend(['--profile', profile])
log.msg("Starting LocalControllerLauncher: %r" % self.args)
return super(LocalControllerLauncher, self).start()
class WindowsHPCControllerLauncher(WindowsHPCLauncher):
pass
class MPIExecControllerLauncher(MPIExecLauncher):
controller_cmd = List(find_controller_cmd(), config=False)
controller_args = List(['--log-to-file','--log-level', '40'], config=True)
n = Int(1, config=False)
def start(self, profile=None, cluster_dir=None):
if cluster_dir is not None:
self.controller_args.extend(['--cluster-dir', cluster_dir])
if profile is not None:
self.controller_args.extend(['--profile', profile])
log.msg("Starting MPIExecControllerLauncher: %r" % self.args)
return super(MPIExecControllerLauncher, self).start(1)
def find_args(self):
return self.mpi_cmd + ['-n', self.n] + self.mpi_args + \
self.controller_cmd + self.controller_args
class PBSControllerLauncher(PBSLauncher):
def start(self, profile=None, cluster_dir=None):
# Here we save profile and cluster_dir in the context so they
# can be used in the batch script template as ${profile} and
# ${cluster_dir}
if cluster_dir is not None:
self.context['cluster_dir'] = cluster_dir
if profile is not None:
self.context['profile'] = profile
log.msg("Starting PBSControllerLauncher: %r" % self.args)
return super(PBSControllerLauncher, self).start(1)
class SSHControllerLauncher(SSHLauncher):
pass
#-----------------------------------------------------------------------------
# Engine launchers
#-----------------------------------------------------------------------------
def find_engine_cmd():
if sys.platform == 'win32':
# This logic is needed because the ipengine script doesn't
# always get installed in the same way or in the same location.
from IPython.kernel import ipengineapp
script_location = ipengineapp.__file__.replace('.pyc', '.py')
# The -u option here turns on unbuffered output, which is required
# on Win32 to prevent wierd conflict and problems with Twisted.
# Also, use sys.executable to make sure we are picking up the
# right python exe.
cmd = [sys.executable, '-u', script_location]
else:
# ipcontroller has to be on the PATH in this case.
cmd = ['ipengine']
return cmd
class LocalEngineLauncher(LocalProcessLauncher):
engine_cmd = List(find_engine_cmd())
engine_args = List(['--log-to-file','--log-level', '40'], config=True)
def find_args(self):
return self.engine_cmd + self.engine_args
def start(self, profile=None, cluster_dir=None):
if cluster_dir is not None:
self.engine_args.extend(['--cluster-dir', cluster_dir])
if profile is not None:
self.engine_args.extend(['--profile', profile])
return super(LocalEngineLauncher, self).start()
class LocalEngineSetLauncher(BaseLauncher):
engine_args = List(['--log-to-file','--log-level', '40'], config=True)
def __init__(self, working_dir, parent=None, name=None, config=None):
super(LocalEngineSetLauncher, self).__init__(
working_dir, parent, name, config
)
self.launchers = []
def start(self, n, profile=None, cluster_dir=None):
dlist = []
for i in range(n):
el = LocalEngineLauncher(self.working_dir, self)
# Copy the engine args over to each engine launcher.
import copy
el.engine_args = copy.deepcopy(self.engine_args)
d = el.start(profile, cluster_dir)
if i==0:
log.msg("Starting LocalEngineSetLauncher: %r" % el.args)
self.launchers.append(el)
dlist.append(d)
# The consumeErrors here could be dangerous
dfinal = gatherBoth(dlist, consumeErrors=True)
dfinal.addCallback(self.notify_start)
return dfinal
def find_args(self):
return ['engine set']
def signal(self, sig):
dlist = []
for el in self.launchers:
d = el.signal(sig)
dlist.append(d)
dfinal = gatherBoth(dlist, consumeErrors=True)
return dfinal
def interrupt_then_kill(self, delay=1.0):
dlist = []
for el in self.launchers:
d = el.interrupt_then_kill(delay)
dlist.append(d)
dfinal = gatherBoth(dlist, consumeErrors=True)
return dfinal
def stop(self):
return self.interrupt_then_kill()
def observe_stop(self):
dlist = [el.observe_stop() for el in self.launchers]
dfinal = gatherBoth(dlist, consumeErrors=False)
dfinal.addCallback(self.notify_stop)
return dfinal
class MPIExecEngineSetLauncher(MPIExecLauncher):
engine_cmd = List(find_engine_cmd(), config=False)
engine_args = List(['--log-to-file','--log-level', '40'], config=True)
n = Int(1, config=True)
def start(self, n, profile=None, cluster_dir=None):
if cluster_dir is not None:
self.engine_args.extend(['--cluster-dir', cluster_dir])
if profile is not None:
self.engine_args.extend(['--profile', profile])
log.msg('Starting MPIExecEngineSetLauncher: %r' % self.args)
return super(MPIExecEngineSetLauncher, self).start(n)
def find_args(self):
return self.mpi_cmd + ['-n', self.n] + self.mpi_args + \
self.engine_cmd + self.engine_args
class WindowsHPCEngineSetLauncher(WindowsHPCLauncher):
pass
class PBSEngineSetLauncher(PBSLauncher):
def start(self, n, profile=None, cluster_dir=None):
if cluster_dir is not None:
self.program_args.extend(['--cluster-dir', cluster_dir])
if profile is not None:
self.program_args.extend(['-p', profile])
log.msg('Starting PBSEngineSetLauncher: %r' % self.args)
return super(PBSEngineSetLauncher, self).start(n)
class SSHEngineSetLauncher(BaseLauncher):
pass