##// END OF EJS Templates
Add %guiref to give a quick reference to the GUI console.
Add %guiref to give a quick reference to the GUI console.

File last commit:

r2740:a7c52804
r3008:18358c8e
Show More
launcher.py
834 lines | 28.0 KiB | text/x-python | PythonLexer
Brian Granger
Adding files for the refactored kernel scripts.
r2304 #!/usr/bin/env python
# encoding: utf-8
"""
Brian Granger
More work on the launchers and Win HPC support.
r2333 Facilities for launching IPython processes asynchronously.
Brian Granger
Adding files for the refactored kernel scripts.
r2304 """
#-----------------------------------------------------------------------------
# Copyright (C) 2008-2009 The IPython Development Team
#
# Distributed under the terms of the BSD License. The full license is in
# the file COPYING, distributed as part of this software.
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Imports
#-----------------------------------------------------------------------------
import os
import re
import sys
Brian Granger
First draft of refactored Component->Configurable.
r2731 from IPython.config.configurable import Configurable
Brian Granger
Adding files for the refactored kernel scripts.
r2304 from IPython.external import Itpl
Brian Granger
Work to address the review comments on Fernando's branch....
r2498 from IPython.utils.traitlets import Str, Int, List, Unicode
from IPython.utils.path import get_ipython_module_path
Brian Granger
Fixing two small bugs in :mod:`IPython.kernel`....
r2520 from IPython.utils.process import find_cmd, pycmd2argv, FindCmdError
Brian Granger
Work to address the review comments on Fernando's branch....
r2498 from IPython.kernel.twistedutil import (
gatherBoth,
make_deferred,
sleep_deferred
)
bgranger
Initial version of Win HPC job scehduler support.
r2327 from IPython.kernel.winhpcjob import (
Brian Granger
More work on the launchers and Win HPC support.
r2333 IPControllerTask, IPEngineTask,
IPControllerJob, IPEngineSetJob
bgranger
Initial version of Win HPC job scehduler support.
r2327 )
Brian Granger
Adding files for the refactored kernel scripts.
r2304
from twisted.internet import reactor, defer
from twisted.internet.defer import inlineCallbacks
from twisted.internet.protocol import ProcessProtocol
from twisted.internet.utils import getProcessOutput
from twisted.internet.error import ProcessDone, ProcessTerminated
from twisted.python import log
from twisted.python.failure import Failure
Brian Granger
Work to address the review comments on Fernando's branch....
r2498
Brian Granger
Adding files for the refactored kernel scripts.
r2304 #-----------------------------------------------------------------------------
Brian Granger
Work to address the review comments on Fernando's branch....
r2498 # Paths to the kernel apps
Brian Granger
More work on the launchers and Win HPC support.
r2333 #-----------------------------------------------------------------------------
Brian Granger
Work to address the review comments on Fernando's branch....
r2498 ipcluster_cmd_argv = pycmd2argv(get_ipython_module_path(
'IPython.kernel.ipclusterapp'
))
Brian Granger
More work on the launchers and Win HPC support.
r2333
Brian Granger
Work to address the review comments on Fernando's branch....
r2498 ipengine_cmd_argv = pycmd2argv(get_ipython_module_path(
'IPython.kernel.ipengineapp'
))
ipcontroller_cmd_argv = pycmd2argv(get_ipython_module_path(
'IPython.kernel.ipcontrollerapp'
))
Brian Granger
More work on the launchers and Win HPC support.
r2333
#-----------------------------------------------------------------------------
# Base launchers and errors
Brian Granger
Adding files for the refactored kernel scripts.
r2304 #-----------------------------------------------------------------------------
class LauncherError(Exception):
pass
class ProcessStateError(LauncherError):
pass
class UnknownStatus(LauncherError):
pass
Brian Granger
First draft of refactored Component->Configurable.
r2731 class BaseLauncher(Configurable):
Brian Granger
Adding files for the refactored kernel scripts.
r2304 """An asbtraction for starting, stopping and signaling a process."""
Brian Granger
Fixing how the working directory is handled in kernel....
r2336 # In all of the launchers, the work_dir is where child processes will be
# run. This will usually be the cluster_dir, but may not be. any work_dir
# passed into the __init__ method will override the config value.
# This should not be used to set the work_dir for the actual engine
# and controller. Instead, use their own config files or the
# controller_args, engine_args attributes of the launchers to add
# the --work-dir option.
work_dir = Unicode(u'')
Brian Granger
Adding support for HasTraits to take keyword arguments.
r2740 def __init__(self, work_dir=u'', config=None):
super(BaseLauncher, self).__init__(work_dir=work_dir, config=config)
Brian Granger
Adding files for the refactored kernel scripts.
r2304 self.state = 'before' # can be before, running, after
self.stop_deferreds = []
self.start_data = None
self.stop_data = None
@property
def args(self):
Brian Granger
Work on default config files and docstrings....
r2310 """A list of cmd and args that will be used to start the process.
This is what is passed to :func:`spawnProcess` and the first element
will be the process name.
"""
Brian Granger
Adding files for the refactored kernel scripts.
r2304 return self.find_args()
def find_args(self):
Brian Granger
Work on default config files and docstrings....
r2310 """The ``.args`` property calls this to find the args list.
Subcommand should implement this to construct the cmd and args.
"""
Brian Granger
Adding files for the refactored kernel scripts.
r2304 raise NotImplementedError('find_args must be implemented in a subclass')
@property
def arg_str(self):
"""The string form of the program arguments."""
return ' '.join(self.args)
@property
def running(self):
Brian Granger
Work on default config files and docstrings....
r2310 """Am I running."""
Brian Granger
Adding files for the refactored kernel scripts.
r2304 if self.state == 'running':
return True
else:
return False
def start(self):
"""Start the process.
This must return a deferred that fires with information about the
Brian Granger
Work on default config files and docstrings....
r2310 process starting (like a pid, job id, etc.).
Brian Granger
Adding files for the refactored kernel scripts.
r2304 """
return defer.fail(
Failure(NotImplementedError(
'start must be implemented in a subclass')
)
)
def stop(self):
Brian Granger
Work on default config files and docstrings....
r2310 """Stop the process and notify observers of stopping.
Brian Granger
Adding files for the refactored kernel scripts.
r2304
Brian Granger
Work on default config files and docstrings....
r2310 This must return a deferred that fires with information about the
processing stopping, like errors that occur while the process is
attempting to be shut down. This deferred won't fire when the process
actually stops. To observe the actual process stopping, see
:func:`observe_stop`.
Brian Granger
Adding files for the refactored kernel scripts.
r2304 """
return defer.fail(
Failure(NotImplementedError(
'stop must be implemented in a subclass')
)
)
def observe_stop(self):
"""Get a deferred that will fire when the process stops.
The deferred will fire with data that contains information about
the exit status of the process.
"""
if self.state=='after':
return defer.succeed(self.stop_data)
else:
d = defer.Deferred()
self.stop_deferreds.append(d)
return d
def notify_start(self, data):
Brian Granger
Work on default config files and docstrings....
r2310 """Call this to trigger startup actions.
Brian Granger
Adding files for the refactored kernel scripts.
r2304
Brian Granger
Work on default config files and docstrings....
r2310 This logs the process startup and sets the state to 'running'. It is
Brian Granger
Adding files for the refactored kernel scripts.
r2304 a pass-through so it can be used as a callback.
"""
log.msg('Process %r started: %r' % (self.args[0], data))
self.start_data = data
self.state = 'running'
return data
def notify_stop(self, data):
Brian Granger
Work on default config files and docstrings....
r2310 """Call this to trigger process stop actions.
This logs the process stopping and sets the state to 'after'. Call
this to trigger all the deferreds from :func:`observe_stop`."""
Brian Granger
Adding files for the refactored kernel scripts.
r2304
log.msg('Process %r stopped: %r' % (self.args[0], data))
self.stop_data = data
self.state = 'after'
for i in range(len(self.stop_deferreds)):
d = self.stop_deferreds.pop()
d.callback(data)
return data
def signal(self, sig):
"""Signal the process.
Return a semi-meaningless deferred after signaling the process.
Parameters
----------
sig : str or int
'KILL', 'INT', etc., or any signal number
"""
return defer.fail(
Failure(NotImplementedError(
'signal must be implemented in a subclass')
)
)
Brian Granger
More work on the launchers and Win HPC support.
r2333 #-----------------------------------------------------------------------------
# Local process launchers
#-----------------------------------------------------------------------------
Brian Granger
Adding files for the refactored kernel scripts.
r2304 class LocalProcessLauncherProtocol(ProcessProtocol):
"""A ProcessProtocol to go with the LocalProcessLauncher."""
def __init__(self, process_launcher):
self.process_launcher = process_launcher
self.pid = None
def connectionMade(self):
self.pid = self.transport.pid
self.process_launcher.notify_start(self.transport.pid)
def processEnded(self, status):
value = status.value
if isinstance(value, ProcessDone):
self.process_launcher.notify_stop(
{'exit_code':0,
'signal':None,
'status':None,
'pid':self.pid
}
)
elif isinstance(value, ProcessTerminated):
self.process_launcher.notify_stop(
{'exit_code':value.exitCode,
'signal':value.signal,
'status':value.status,
'pid':self.pid
}
)
else:
raise UnknownStatus("Unknown exit status, this is probably a "
"bug in Twisted")
def outReceived(self, data):
log.msg(data)
def errReceived(self, data):
log.err(data)
class LocalProcessLauncher(BaseLauncher):
Brian Granger
The cluster applications now have a working directory option.....
r2330 """Start and stop an external process in an asynchronous manner.
This will launch the external process with a working directory of
Brian Granger
Fixing how the working directory is handled in kernel....
r2336 ``self.work_dir``.
Brian Granger
The cluster applications now have a working directory option.....
r2330 """
Brian Granger
Adding files for the refactored kernel scripts.
r2304
Brian Granger
Work on default config files and docstrings....
r2310 # This is used to to construct self.args, which is passed to
# spawnProcess.
Brian Granger
Adding files for the refactored kernel scripts.
r2304 cmd_and_args = List([])
Brian Granger
Adding support for HasTraits to take keyword arguments.
r2740 def __init__(self, work_dir=u'', config=None):
Brian Granger
Adding files for the refactored kernel scripts.
r2304 super(LocalProcessLauncher, self).__init__(
Brian Granger
Adding support for HasTraits to take keyword arguments.
r2740 work_dir=work_dir, config=config
Brian Granger
Adding files for the refactored kernel scripts.
r2304 )
self.process_protocol = None
self.start_deferred = None
def find_args(self):
return self.cmd_and_args
def start(self):
if self.state == 'before':
self.process_protocol = LocalProcessLauncherProtocol(self)
self.start_deferred = defer.Deferred()
self.process_transport = reactor.spawnProcess(
self.process_protocol,
Brian Granger
Beginning to transition all paths, files, dirs over to unicode....
r2328 str(self.args[0]), # twisted expects these to be str, not unicode
[str(a) for a in self.args], # str expected, not unicode
Brian Granger
The cluster applications now have a working directory option.....
r2330 env=os.environ,
Brian Granger
Fixing how the working directory is handled in kernel....
r2336 path=self.work_dir # start in the work_dir
Brian Granger
Adding files for the refactored kernel scripts.
r2304 )
return self.start_deferred
else:
s = 'The process was already started and has state: %r' % self.state
return defer.fail(ProcessStateError(s))
def notify_start(self, data):
super(LocalProcessLauncher, self).notify_start(data)
self.start_deferred.callback(data)
def stop(self):
return self.interrupt_then_kill()
@make_deferred
def signal(self, sig):
if self.state == 'running':
self.process_transport.signalProcess(sig)
@inlineCallbacks
Brian Granger
Changed the INT then KILL delay to 2 s to be a little more friendly.
r2308 def interrupt_then_kill(self, delay=2.0):
Brian Granger
Work on default config files and docstrings....
r2310 """Send INT, wait a delay and then send KILL."""
Brian Granger
Adding files for the refactored kernel scripts.
r2304 yield self.signal('INT')
yield sleep_deferred(delay)
yield self.signal('KILL')
Brian Granger
More work on the launchers and Win HPC support.
r2333 class LocalControllerLauncher(LocalProcessLauncher):
"""Launch a controller as a regular external process."""
Brian Granger
Work to address the review comments on Fernando's branch....
r2498 controller_cmd = List(ipcontroller_cmd_argv, config=True)
Brian Granger
More work on the launchers and Win HPC support.
r2333 # Command line arguments to ipcontroller.
controller_args = List(['--log-to-file','--log-level', '40'], config=True)
def find_args(self):
Brian Granger
Fixing how the working directory is handled in kernel....
r2336 return self.controller_cmd + self.controller_args
Brian Granger
More work on the launchers and Win HPC support.
r2333
def start(self, cluster_dir):
"""Start the controller by cluster_dir."""
self.controller_args.extend(['--cluster-dir', cluster_dir])
self.cluster_dir = unicode(cluster_dir)
log.msg("Starting LocalControllerLauncher: %r" % self.args)
return super(LocalControllerLauncher, self).start()
class LocalEngineLauncher(LocalProcessLauncher):
"""Launch a single engine as a regular externall process."""
Brian Granger
Work to address the review comments on Fernando's branch....
r2498 engine_cmd = List(ipengine_cmd_argv, config=True)
Brian Granger
More work on the launchers and Win HPC support.
r2333 # Command line arguments for ipengine.
engine_args = List(
['--log-to-file','--log-level', '40'], config=True
)
def find_args(self):
Brian Granger
Fixing how the working directory is handled in kernel....
r2336 return self.engine_cmd + self.engine_args
Brian Granger
More work on the launchers and Win HPC support.
r2333
def start(self, cluster_dir):
"""Start the engine by cluster_dir."""
self.engine_args.extend(['--cluster-dir', cluster_dir])
self.cluster_dir = unicode(cluster_dir)
return super(LocalEngineLauncher, self).start()
class LocalEngineSetLauncher(BaseLauncher):
"""Launch a set of engines as regular external processes."""
# Command line arguments for ipengine.
engine_args = List(
['--log-to-file','--log-level', '40'], config=True
)
Brian Granger
Adding support for HasTraits to take keyword arguments.
r2740 def __init__(self, work_dir=u'', config=None):
Brian Granger
More work on the launchers and Win HPC support.
r2333 super(LocalEngineSetLauncher, self).__init__(
Brian Granger
Adding support for HasTraits to take keyword arguments.
r2740 work_dir=work_dir, config=config
Brian Granger
More work on the launchers and Win HPC support.
r2333 )
self.launchers = []
def start(self, n, cluster_dir):
"""Start n engines by profile or cluster_dir."""
self.cluster_dir = unicode(cluster_dir)
dlist = []
for i in range(n):
Brian Granger
Adding support for HasTraits to take keyword arguments.
r2740 el = LocalEngineLauncher(work_dir=self.work_dir, config=self.config)
Brian Granger
More work on the launchers and Win HPC support.
r2333 # Copy the engine args over to each engine launcher.
import copy
el.engine_args = copy.deepcopy(self.engine_args)
d = el.start(cluster_dir)
if i==0:
log.msg("Starting LocalEngineSetLauncher: %r" % el.args)
self.launchers.append(el)
dlist.append(d)
# The consumeErrors here could be dangerous
dfinal = gatherBoth(dlist, consumeErrors=True)
dfinal.addCallback(self.notify_start)
return dfinal
def find_args(self):
return ['engine set']
def signal(self, sig):
dlist = []
for el in self.launchers:
d = el.signal(sig)
dlist.append(d)
dfinal = gatherBoth(dlist, consumeErrors=True)
return dfinal
def interrupt_then_kill(self, delay=1.0):
dlist = []
for el in self.launchers:
d = el.interrupt_then_kill(delay)
dlist.append(d)
dfinal = gatherBoth(dlist, consumeErrors=True)
return dfinal
def stop(self):
return self.interrupt_then_kill()
def observe_stop(self):
dlist = [el.observe_stop() for el in self.launchers]
dfinal = gatherBoth(dlist, consumeErrors=False)
dfinal.addCallback(self.notify_stop)
return dfinal
#-----------------------------------------------------------------------------
# MPIExec launchers
#-----------------------------------------------------------------------------
Brian Granger
Adding files for the refactored kernel scripts.
r2304 class MPIExecLauncher(LocalProcessLauncher):
Brian Granger
Work on default config files and docstrings....
r2310 """Launch an external process using mpiexec."""
Brian Granger
Adding files for the refactored kernel scripts.
r2304
Brian Granger
Work on default config files and docstrings....
r2310 # The mpiexec command to use in starting the process.
Brian Granger
Adding files for the refactored kernel scripts.
r2304 mpi_cmd = List(['mpiexec'], config=True)
Brian Granger
Work on default config files and docstrings....
r2310 # The command line arguments to pass to mpiexec.
Brian Granger
Adding files for the refactored kernel scripts.
r2304 mpi_args = List([], config=True)
Brian Granger
Work on default config files and docstrings....
r2310 # The program to start using mpiexec.
Brian Granger
Adding files for the refactored kernel scripts.
r2304 program = List(['date'], config=True)
Brian Granger
Work on default config files and docstrings....
r2310 # The command line argument to the program.
Brian Granger
Adding files for the refactored kernel scripts.
r2304 program_args = List([], config=True)
Brian Granger
Work on default config files and docstrings....
r2310 # The number of instances of the program to start.
Brian Granger
Adding files for the refactored kernel scripts.
r2304 n = Int(1, config=True)
def find_args(self):
Brian Granger
Work on default config files and docstrings....
r2310 """Build self.args using all the fields."""
Brian Granger
Adding files for the refactored kernel scripts.
r2304 return self.mpi_cmd + ['-n', self.n] + self.mpi_args + \
self.program + self.program_args
def start(self, n):
Brian Granger
Work on default config files and docstrings....
r2310 """Start n instances of the program using mpiexec."""
Brian Granger
Adding files for the refactored kernel scripts.
r2304 self.n = n
return super(MPIExecLauncher, self).start()
Brian Granger
More work on the launchers and Win HPC support.
r2333 class MPIExecControllerLauncher(MPIExecLauncher):
"""Launch a controller using mpiexec."""
Brian Granger
Work to address the review comments on Fernando's branch....
r2498 controller_cmd = List(ipcontroller_cmd_argv, config=True)
Brian Granger
More work on the launchers and Win HPC support.
r2333 # Command line arguments to ipcontroller.
controller_args = List(['--log-to-file','--log-level', '40'], config=True)
n = Int(1, config=False)
def start(self, cluster_dir):
"""Start the controller by cluster_dir."""
self.controller_args.extend(['--cluster-dir', cluster_dir])
self.cluster_dir = unicode(cluster_dir)
log.msg("Starting MPIExecControllerLauncher: %r" % self.args)
return super(MPIExecControllerLauncher, self).start(1)
def find_args(self):
return self.mpi_cmd + ['-n', self.n] + self.mpi_args + \
Brian Granger
Fixing how the working directory is handled in kernel....
r2336 self.controller_cmd + self.controller_args
Brian Granger
More work on the launchers and Win HPC support.
r2333
class MPIExecEngineSetLauncher(MPIExecLauncher):
Brian Granger
Work to address the review comments on Fernando's branch....
r2498 engine_cmd = List(ipengine_cmd_argv, config=True)
Brian Granger
More work on the launchers and Win HPC support.
r2333 # Command line arguments for ipengine.
engine_args = List(
['--log-to-file','--log-level', '40'], config=True
Brian Granger
Fixing how the working directory is handled in kernel....
r2336 )
Brian Granger
More work on the launchers and Win HPC support.
r2333 n = Int(1, config=True)
def start(self, n, cluster_dir):
"""Start n engines by profile or cluster_dir."""
self.engine_args.extend(['--cluster-dir', cluster_dir])
self.cluster_dir = unicode(cluster_dir)
Brian Granger
Fixing how the working directory is handled in kernel....
r2336 self.n = n
Brian Granger
More work on the launchers and Win HPC support.
r2333 log.msg('Starting MPIExecEngineSetLauncher: %r' % self.args)
return super(MPIExecEngineSetLauncher, self).start(n)
def find_args(self):
return self.mpi_cmd + ['-n', self.n] + self.mpi_args + \
Brian Granger
Fixing how the working directory is handled in kernel....
r2336 self.engine_cmd + self.engine_args
Brian Granger
More work on the launchers and Win HPC support.
r2333
#-----------------------------------------------------------------------------
# SSH launchers
#-----------------------------------------------------------------------------
Brian Granger
Fixing minor bugs in IPython.kernel....
r2342 # TODO: Get SSH Launcher working again.
Brian Granger
More work on the launchers and Win HPC support.
r2333
Brian Granger
Adding files for the refactored kernel scripts.
r2304 class SSHLauncher(BaseLauncher):
"""A minimal launcher for ssh.
To be useful this will probably have to be extended to use the ``sshx``
idea for environment variables. There could be other things this needs
as well.
"""
ssh_cmd = List(['ssh'], config=True)
ssh_args = List([], config=True)
program = List(['date'], config=True)
program_args = List([], config=True)
hostname = Str('', config=True)
bgranger
Fixing a few small things on Windows....
r2315 user = Str('', config=True)
Brian Granger
Adding files for the refactored kernel scripts.
r2304 location = Str('')
def _hostname_changed(self, name, old, new):
self.location = '%s@%s' % (self.user, new)
def _user_changed(self, name, old, new):
self.location = '%s@%s' % (new, self.hostname)
def find_args(self):
return self.ssh_cmd + self.ssh_args + [self.location] + \
self.program + self.program_args
def start(self, n, hostname=None, user=None):
if hostname is not None:
self.hostname = hostname
if user is not None:
self.user = user
return super(SSHLauncher, self).start()
Brian Granger
More work on the launchers and Win HPC support.
r2333 class SSHControllerLauncher(SSHLauncher):
pass
class SSHEngineSetLauncher(BaseLauncher):
pass
#-----------------------------------------------------------------------------
# Windows HPC Server 2008 scheduler launchers
#-----------------------------------------------------------------------------
Brian Granger
Fixing a few bugs in the unicode path changes.
r2329 # This is only used on Windows.
Brian Granger
More work on the launchers and Win HPC support.
r2333 def find_job_cmd():
if os.name=='nt':
Brian Granger
Fixing two small bugs in :mod:`IPython.kernel`....
r2520 try:
return find_cmd('job')
except FindCmdError:
return 'job'
Brian Granger
More work on the launchers and Win HPC support.
r2333 else:
return 'job'
Brian Granger
Fixing a few bugs in the unicode path changes.
r2329
Brian Granger
Adding files for the refactored kernel scripts.
r2304 class WindowsHPCLauncher(BaseLauncher):
bgranger
Initial version of Win HPC job scehduler support.
r2327 # A regular expression used to get the job id from the output of the
# submit_command.
Brian Granger
Fixing minor bugs in IPython.kernel....
r2342 job_id_regexp = Str(r'\d+', config=True)
bgranger
Initial version of Win HPC job scehduler support.
r2327 # The filename of the instantiated job script.
job_file_name = Unicode(u'ipython_job.xml', config=True)
# The full path to the instantiated job script. This gets made dynamically
Brian Granger
Fixing how the working directory is handled in kernel....
r2336 # by combining the work_dir with the job_file_name.
bgranger
Initial version of Win HPC job scehduler support.
r2327 job_file = Unicode(u'')
# The hostname of the scheduler to submit the job to
Brian Granger
More work on the launchers and Win HPC support.
r2333 scheduler = Str('', config=True)
bgranger
Minor fixes to get Win HPC support working fully.
r2334 job_cmd = Str(find_job_cmd(), config=True)
bgranger
Initial version of Win HPC job scehduler support.
r2327
Brian Granger
Adding support for HasTraits to take keyword arguments.
r2740 def __init__(self, work_dir=u'', config=None):
bgranger
Initial version of Win HPC job scehduler support.
r2327 super(WindowsHPCLauncher, self).__init__(
Brian Granger
Adding support for HasTraits to take keyword arguments.
r2740 work_dir=work_dir, config=config
bgranger
Initial version of Win HPC job scehduler support.
r2327 )
Brian Granger
More work on the launchers and Win HPC support.
r2333 @property
def job_file(self):
Brian Granger
Fixing how the working directory is handled in kernel....
r2336 return os.path.join(self.work_dir, self.job_file_name)
Brian Granger
More work on the launchers and Win HPC support.
r2333
bgranger
Initial version of Win HPC job scehduler support.
r2327 def write_job_file(self, n):
raise NotImplementedError("Implement write_job_file in a subclass.")
def find_args(self):
return ['job.exe']
def parse_job_id(self, output):
"""Take the output of the submit command and return the job id."""
m = re.search(self.job_id_regexp, output)
if m is not None:
job_id = m.group()
else:
raise LauncherError("Job id couldn't be determined: %s" % output)
self.job_id = job_id
log.msg('Job started with job id: %r' % job_id)
return job_id
@inlineCallbacks
def start(self, n):
"""Start n copies of the process using the Win HPC job scheduler."""
self.write_job_file(n)
args = [
'submit',
'/jobfile:%s' % self.job_file,
'/scheduler:%s' % self.scheduler
]
log.msg("Starting Win HPC Job: %s" % (self.job_cmd + ' ' + ' '.join(args),))
bgranger
Minor fixes to get Win HPC support working fully.
r2334 # Twisted will raise DeprecationWarnings if we try to pass unicode to this
output = yield getProcessOutput(str(self.job_cmd),
[str(a) for a in args],
env=dict((str(k),str(v)) for k,v in os.environ.items()),
Brian Granger
Fixing how the working directory is handled in kernel....
r2336 path=self.work_dir
bgranger
Initial version of Win HPC job scehduler support.
r2327 )
job_id = self.parse_job_id(output)
self.notify_start(job_id)
defer.returnValue(job_id)
@inlineCallbacks
def stop(self):
args = [
'cancel',
self.job_id,
'/scheduler:%s' % self.scheduler
]
log.msg("Stopping Win HPC Job: %s" % (self.job_cmd + ' ' + ' '.join(args),))
try:
bgranger
Minor fixes to get Win HPC support working fully.
r2334 # Twisted will raise DeprecationWarnings if we try to pass unicode to this
output = yield getProcessOutput(str(self.job_cmd),
[str(a) for a in args],
env=dict((str(k),str(v)) for k,v in os.environ.items()),
Brian Granger
Fixing how the working directory is handled in kernel....
r2336 path=self.work_dir
bgranger
Initial version of Win HPC job scehduler support.
r2327 )
except:
output = 'The job already appears to be stoppped: %r' % self.job_id
self.notify_stop(output) # Pass the output of the kill cmd
defer.returnValue(output)
Brian Granger
More work on the launchers and Win HPC support.
r2333
class WindowsHPCControllerLauncher(WindowsHPCLauncher):
job_file_name = Unicode(u'ipcontroller_job.xml', config=True)
extra_args = List([], config=False)
def write_job_file(self, n):
Brian Granger
Adding support for HasTraits to take keyword arguments.
r2740 job = IPControllerJob(config=self.config)
Brian Granger
More work on the launchers and Win HPC support.
r2333
Brian Granger
Adding support for HasTraits to take keyword arguments.
r2740 t = IPControllerTask(config=self.config)
Brian Granger
More work on the launchers and Win HPC support.
r2333 # The tasks work directory is *not* the actual work directory of
# the controller. It is used as the base path for the stdout/stderr
# files that the scheduler redirects to.
t.work_directory = self.cluster_dir
Brian Granger
Fixing how the working directory is handled in kernel....
r2336 # Add the --cluster-dir and from self.start().
Brian Granger
More work on the launchers and Win HPC support.
r2333 t.controller_args.extend(self.extra_args)
job.add_task(t)
log.msg("Writing job description file: %s" % self.job_file)
job.write(self.job_file)
@property
def job_file(self):
return os.path.join(self.cluster_dir, self.job_file_name)
def start(self, cluster_dir):
"""Start the controller by cluster_dir."""
Brian Granger
Fixing how the working directory is handled in kernel....
r2336 self.extra_args = ['--cluster-dir', cluster_dir]
Brian Granger
More work on the launchers and Win HPC support.
r2333 self.cluster_dir = unicode(cluster_dir)
return super(WindowsHPCControllerLauncher, self).start(1)
class WindowsHPCEngineSetLauncher(WindowsHPCLauncher):
job_file_name = Unicode(u'ipengineset_job.xml', config=True)
extra_args = List([], config=False)
def write_job_file(self, n):
Brian Granger
Adding support for HasTraits to take keyword arguments.
r2740 job = IPEngineSetJob(config=self.config)
Brian Granger
More work on the launchers and Win HPC support.
r2333
for i in range(n):
Brian Granger
Adding support for HasTraits to take keyword arguments.
r2740 t = IPEngineTask(config=self.config)
Brian Granger
More work on the launchers and Win HPC support.
r2333 # The tasks work directory is *not* the actual work directory of
# the engine. It is used as the base path for the stdout/stderr
# files that the scheduler redirects to.
t.work_directory = self.cluster_dir
Brian Granger
Fixing how the working directory is handled in kernel....
r2336 # Add the --cluster-dir and from self.start().
Brian Granger
More work on the launchers and Win HPC support.
r2333 t.engine_args.extend(self.extra_args)
job.add_task(t)
log.msg("Writing job description file: %s" % self.job_file)
job.write(self.job_file)
@property
def job_file(self):
return os.path.join(self.cluster_dir, self.job_file_name)
bgranger
Minor fixes to get Win HPC support working fully.
r2334 def start(self, n, cluster_dir):
Brian Granger
More work on the launchers and Win HPC support.
r2333 """Start the controller by cluster_dir."""
Brian Granger
Fixing how the working directory is handled in kernel....
r2336 self.extra_args = ['--cluster-dir', cluster_dir]
Brian Granger
More work on the launchers and Win HPC support.
r2333 self.cluster_dir = unicode(cluster_dir)
bgranger
Minor fixes to get Win HPC support working fully.
r2334 return super(WindowsHPCEngineSetLauncher, self).start(n)
Brian Granger
More work on the launchers and Win HPC support.
r2333
#-----------------------------------------------------------------------------
# Batch (PBS) system launchers
#-----------------------------------------------------------------------------
Brian Granger
Fixing minor bugs in IPython.kernel....
r2342 # TODO: Get PBS launcher working again.
Brian Granger
Adding files for the refactored kernel scripts.
r2304
class BatchSystemLauncher(BaseLauncher):
Brian Granger
Work on default config files and docstrings....
r2310 """Launch an external process using a batch system.
This class is designed to work with UNIX batch systems like PBS, LSF,
GridEngine, etc. The overall model is that there are different commands
like qsub, qdel, etc. that handle the starting and stopping of the process.
This class also has the notion of a batch script. The ``batch_template``
attribute can be set to a string that is a template for the batch script.
This template is instantiated using Itpl. Thus the template can use
${n} fot the number of instances. Subclasses can add additional variables
to the template dict.
"""
Brian Granger
Adding files for the refactored kernel scripts.
r2304
# Subclasses must fill these in. See PBSEngineSet
Brian Granger
Work on default config files and docstrings....
r2310 # The name of the command line program used to submit jobs.
Brian Granger
Adding files for the refactored kernel scripts.
r2304 submit_command = Str('', config=True)
Brian Granger
Work on default config files and docstrings....
r2310 # The name of the command line program used to delete jobs.
Brian Granger
Adding files for the refactored kernel scripts.
r2304 delete_command = Str('', config=True)
Brian Granger
Work on default config files and docstrings....
r2310 # A regular expression used to get the job id from the output of the
# submit_command.
Brian Granger
Adding files for the refactored kernel scripts.
r2304 job_id_regexp = Str('', config=True)
Brian Granger
Work on default config files and docstrings....
r2310 # The string that is the batch script template itself.
Brian Granger
Adding files for the refactored kernel scripts.
r2304 batch_template = Str('', config=True)
Brian Granger
Work on default config files and docstrings....
r2310 # The filename of the instantiated batch script.
Brian Granger
Adding files for the refactored kernel scripts.
r2304 batch_file_name = Unicode(u'batch_script', config=True)
Brian Granger
Work on default config files and docstrings....
r2310 # The full path to the instantiated batch script.
Brian Granger
Adding files for the refactored kernel scripts.
r2304 batch_file = Unicode(u'')
Brian Granger
Adding support for HasTraits to take keyword arguments.
r2740 def __init__(self, work_dir=u'', config=None):
Brian Granger
Adding files for the refactored kernel scripts.
r2304 super(BatchSystemLauncher, self).__init__(
Brian Granger
Adding support for HasTraits to take keyword arguments.
r2740 work_dir=work_dir, config=config
Brian Granger
Adding files for the refactored kernel scripts.
r2304 )
Brian Granger
Fixing how the working directory is handled in kernel....
r2336 self.batch_file = os.path.join(self.work_dir, self.batch_file_name)
Brian Granger
Adding files for the refactored kernel scripts.
r2304 self.context = {}
def parse_job_id(self, output):
Brian Granger
Work on default config files and docstrings....
r2310 """Take the output of the submit command and return the job id."""
Brian Granger
Adding files for the refactored kernel scripts.
r2304 m = re.match(self.job_id_regexp, output)
if m is not None:
job_id = m.group()
else:
raise LauncherError("Job id couldn't be determined: %s" % output)
self.job_id = job_id
log.msg('Job started with job id: %r' % job_id)
return job_id
def write_batch_script(self, n):
Brian Granger
Fixing how the working directory is handled in kernel....
r2336 """Instantiate and write the batch script to the work_dir."""
Brian Granger
Adding files for the refactored kernel scripts.
r2304 self.context['n'] = n
script_as_string = Itpl.itplns(self.batch_template, self.context)
log.msg('Writing instantiated batch script: %s' % self.batch_file)
f = open(self.batch_file, 'w')
f.write(script_as_string)
f.close()
@inlineCallbacks
def start(self, n):
"""Start n copies of the process using a batch system."""
self.write_batch_script(n)
output = yield getProcessOutput(self.submit_command,
[self.batch_file], env=os.environ)
job_id = self.parse_job_id(output)
self.notify_start(job_id)
defer.returnValue(job_id)
@inlineCallbacks
def stop(self):
output = yield getProcessOutput(self.delete_command,
[self.job_id], env=os.environ
)
self.notify_stop(output) # Pass the output of the kill cmd
defer.returnValue(output)
class PBSLauncher(BatchSystemLauncher):
Brian Granger
Work on default config files and docstrings....
r2310 """A BatchSystemLauncher subclass for PBS."""
Brian Granger
Adding files for the refactored kernel scripts.
r2304
submit_command = Str('qsub', config=True)
delete_command = Str('qdel', config=True)
Brian Granger
Fixing minor bugs in IPython.kernel....
r2342 job_id_regexp = Str(r'\d+', config=True)
Brian Granger
Adding files for the refactored kernel scripts.
r2304 batch_template = Str('', config=True)
batch_file_name = Unicode(u'pbs_batch_script', config=True)
batch_file = Unicode(u'')
class PBSControllerLauncher(PBSLauncher):
Brian Granger
Work on default config files and docstrings....
r2310 """Launch a controller using PBS."""
batch_file_name = Unicode(u'pbs_batch_script_controller', config=True)
Brian Granger
Adding files for the refactored kernel scripts.
r2304
Brian Granger
More work on the launchers and Win HPC support.
r2333 def start(self, cluster_dir):
Brian Granger
Work on default config files and docstrings....
r2310 """Start the controller by profile or cluster_dir."""
Brian Granger
Adding files for the refactored kernel scripts.
r2304 # Here we save profile and cluster_dir in the context so they
# can be used in the batch script template as ${profile} and
# ${cluster_dir}
Brian Granger
More work on the launchers and Win HPC support.
r2333 self.context['cluster_dir'] = cluster_dir
self.cluster_dir = unicode(cluster_dir)
Brian Granger
Adding files for the refactored kernel scripts.
r2304 log.msg("Starting PBSControllerLauncher: %r" % self.args)
return super(PBSControllerLauncher, self).start(1)
class PBSEngineSetLauncher(PBSLauncher):
Brian Granger
Work on default config files and docstrings....
r2310 batch_file_name = Unicode(u'pbs_batch_script_engines', config=True)
Brian Granger
More work on the launchers and Win HPC support.
r2333 def start(self, n, cluster_dir):
Brian Granger
Work on default config files and docstrings....
r2310 """Start n engines by profile or cluster_dir."""
Brian Granger
More work on the launchers and Win HPC support.
r2333 self.program_args.extend(['--cluster-dir', cluster_dir])
self.cluster_dir = unicode(cluster_dir)
Brian Granger
Adding files for the refactored kernel scripts.
r2304 log.msg('Starting PBSEngineSetLauncher: %r' % self.args)
return super(PBSEngineSetLauncher, self).start(n)
Brian Granger
Most of the new ipcluster is now working, including a nice client.
r2306 #-----------------------------------------------------------------------------
# A launcher for ipcluster itself!
#-----------------------------------------------------------------------------
class IPClusterLauncher(LocalProcessLauncher):
Brian Granger
Work on default config files and docstrings....
r2310 """Launch the ipcluster program in an external process."""
Brian Granger
Most of the new ipcluster is now working, including a nice client.
r2306
Brian Granger
Work to address the review comments on Fernando's branch....
r2498 ipcluster_cmd = List(ipcluster_cmd_argv, config=True)
Brian Granger
Work on default config files and docstrings....
r2310 # Command line arguments to pass to ipcluster.
Brian Granger
Most of the new ipcluster is now working, including a nice client.
r2306 ipcluster_args = List(
['--clean-logs', '--log-to-file', '--log-level', '40'], config=True)
ipcluster_subcommand = Str('start')
ipcluster_n = Int(2)
def find_args(self):
return self.ipcluster_cmd + [self.ipcluster_subcommand] + \
['-n', repr(self.ipcluster_n)] + self.ipcluster_args
def start(self):
log.msg("Starting ipcluster: %r" % self.args)
return super(IPClusterLauncher, self).start()
Brian Granger
Adding files for the refactored kernel scripts.
r2304