diff --git a/IPython/config/default/ipcluster_config.py b/IPython/config/default/ipcluster_config.py index 988c0de..ff40ca4 100644 --- a/IPython/config/default/ipcluster_config.py +++ b/IPython/config/default/ipcluster_config.py @@ -2,66 +2,154 @@ import os c = get_config() -# Options are: -# * LocalControllerLauncher -# * PBSControllerLauncher +#----------------------------------------------------------------------------- +# Select which launchers to use +#----------------------------------------------------------------------------- + +# This allows you to control what method is used to start the controller +# and engines. The following methods are currently supported: +# * Start as a regular process on localhost. +# * Start using mpiexec. +# * Start using PBS +# * Start using SSH (currently broken) + +# The selected launchers can be configured below. + +# Options are (LocalControllerLauncher, MPIExecControllerLauncher, +# PBSControllerLauncher) # c.Global.controller_launcher = 'IPython.kernel.launcher.LocalControllerLauncher' -# Options are: -# * LocalEngineSetLauncher -# * MPIExecEngineSetLauncher -# * PBSEngineSetLauncher +# Options are (LocalEngineSetLauncher, MPIExecEngineSetLauncher, +# PBSEngineSetLauncher) # c.Global.engine_launcher = 'IPython.kernel.launcher.LocalEngineSetLauncher' -# c.Global.log_to_file = False +#----------------------------------------------------------------------------- +# Global configuration +#----------------------------------------------------------------------------- + +# The default number of engine that will be started. This is overridden by +# the -n command line option: "ipcluster start -n 4" # c.Global.n = 2 -# c.Global.reset_config = False -# c.Global.clean_logs = True -# c.MPIExecLauncher.mpi_cmd = ['mpiexec'] -# c.MPIExecLauncher.mpi_args = [] -# c.MPIExecLauncher.program = [] -# c.MPIExecLauncher.program_args = [] -# c.MPIExecLauncher.n = 1 +# Log to a file in cluster_dir/log, otherwise just log to sys.stdout. +# c.Global.log_to_file = False -# c.SSHLauncher.ssh_cmd = ['ssh'] -# c.SSHLauncher.ssh_args = [] -# c.SSHLauncher.program = [] -# s.SSHLauncher.program_args = [] -# c.SSHLauncher.hostname = '' -# c.SSHLauncher.user = os.environ['USER'] +# Remove old logs from cluster_dir/log before starting. +# c.Global.clean_logs = True -# c.PBSLauncher.submit_command = 'qsub' -# c.PBSLauncher.delete_command = 'qdel' -# c.PBSLauncher.job_id_regexp = '\d+' -# c.PBSLauncher.batch_template = """""" -# c.PBSLauncher.batch_file_name = u'pbs_batch_script' +#----------------------------------------------------------------------------- +# Controller launcher configuration +#----------------------------------------------------------------------------- -# c.LocalControllerLauncher.controller_args = [] +# Configure how the controller is started. The configuration of the controller +# can also bet setup by editing the controller config file: +# ipcontroller_config.py +# The command line arguments to call the controller with. +# c.LocalControllerLauncher.controller_args = \ +# ['--log-to-file','--log-level', '40'] + +# The mpiexec/mpirun command to use in started the controller. # c.MPIExecControllerLauncher.mpi_cmd = ['mpiexec'] + +# Additional arguments to pass to the actual mpiexec command. # c.MPIExecControllerLauncher.mpi_args = [] -# c.MPIExecControllerLauncher.controller_args = [] -# c.MPIExecControllerLauncher.n = 1 +# The command line argument to call the controller with. +# c.MPIExecControllerLauncher.controller_args = \ +# ['--log-to-file','--log-level', '40'] + +# The command line program to use to submit a PBS job. # c.PBSControllerLauncher.submit_command = 'qsub' + +# The command line program to use to delete a PBS job. # c.PBSControllerLauncher.delete_command = 'qdel' + +# A regular expression that takes the output of qsub and find the job id. # c.PBSControllerLauncher.job_id_regexp = '\d+' + +# The batch submission script used to start the controller. This is where +# environment variables would be setup, etc. This string is interpolated using +# the Itpl module in IPython.external. Basically, you can use ${profile} for +# the controller profile or ${cluster_dir} for the cluster_dir. # c.PBSControllerLauncher.batch_template = """""" -# c.PBSLauncher.batch_file_name = u'pbs_batch_script' -# c.LocalEngineLauncher.engine_args = [] +# The name of the instantiated batch script that will actually be used to +# submit the job. This will be written to the cluster directory. +# c.PBSControllerLauncher.batch_file_name = u'pbs_batch_script_controller' + +#----------------------------------------------------------------------------- +# Engine launcher configuration +#----------------------------------------------------------------------------- -# c.LocalEngineSetLauncher.engine_args = [] +# Command line argument passed to the engines. +# c.LocalEngineSetLauncher.engine_args = ['--log-to-file','--log-level', '40'] +# The mpiexec/mpirun command to use in started the controller. # c.MPIExecEngineSetLauncher.mpi_cmd = ['mpiexec'] + +# Additional arguments to pass to the actual mpiexec command. # c.MPIExecEngineSetLauncher.mpi_args = [] -# c.MPIExecEngineSetLauncher.controller_args = [] + +# Command line argument passed to the engines. +# c.MPIExecEngineSetLauncher.engine_args = ['--log-to-file','--log-level', '40'] + +# The default number of engines to start if not given elsewhere. # c.MPIExecEngineSetLauncher.n = 1 +# The command line program to use to submit a PBS job. # c.PBSEngineSetLauncher.submit_command = 'qsub' + +# The command line program to use to delete a PBS job. # c.PBSEngineSetLauncher.delete_command = 'qdel' + +# A regular expression that takes the output of qsub and find the job id. # c.PBSEngineSetLauncher.job_id_regexp = '\d+' + +# The batch submission script used to start the engines. This is where +# environment variables would be setup, etc. This string is interpolated using +# the Itpl module in IPython.external. Basically, you can use ${n} for the +# number of engine, ${profile} or the engine profile and ${cluster_dir} +# for the cluster_dir. # c.PBSEngineSetLauncher.batch_template = """""" -# c.PBSEngineSetLauncher.batch_file_name = u'pbs_batch_script' + +# The name of the instantiated batch script that will actually be used to +# submit the job. This will be written to the cluster directory. +# c.PBSEngineSetLauncher.batch_file_name = u'pbs_batch_script_engines' + +#----------------------------------------------------------------------------- +# Base launcher configuration +#----------------------------------------------------------------------------- + +# The various launchers are organized into an inheritance hierarchy. +# The configurations can also be iherited and the following attributes +# allow you to configure the base classes. + +# c.MPIExecLauncher.mpi_cmd = ['mpiexec'] +# c.MPIExecLauncher.mpi_args = [] +# c.MPIExecLauncher.program = [] +# c.MPIExecLauncher.program_args = [] +# c.MPIExecLauncher.n = 1 + +# c.SSHLauncher.ssh_cmd = ['ssh'] +# c.SSHLauncher.ssh_args = [] +# c.SSHLauncher.program = [] +# s.SSHLauncher.program_args = [] +# c.SSHLauncher.hostname = '' +# c.SSHLauncher.user = os.environ['USER'] + +# c.BatchSystemLauncher.submit_command +# c.BatchSystemLauncher.delete_command +# c.BatchSystemLauncher.job_id_regexp +# c.BatchSystemLauncher.batch_template +# c.BatchSystemLauncher.batch_file_name + +# c.PBSLauncher.submit_command = 'qsub' +# c.PBSLauncher.delete_command = 'qdel' +# c.PBSLauncher.job_id_regexp = '\d+' +# c.PBSLauncher.batch_template = """""" +# c.PBSLauncher.batch_file_name = u'pbs_batch_script' + + + diff --git a/IPython/config/default/ipcontroller_config.py b/IPython/config/default/ipcontroller_config.py index cf41bca..abacdc5 100644 --- a/IPython/config/default/ipcontroller_config.py +++ b/IPython/config/default/ipcontroller_config.py @@ -7,10 +7,32 @@ c = get_config() #----------------------------------------------------------------------------- # Basic Global config attributes + +# Start up messages are logged to stdout using the logging module. +# These all happen before the twisted reactor is started and are +# useful for debugging purposes. Can be (10=DEBUG,20=INFO,30=WARN,40=CRITICAL) +# and smaller is more verbose. +# c.Global.log_level = 20 + +# Log to a file in cluster_dir/log, otherwise just log to sys.stdout. # c.Global.log_to_file = False + +# Remove old logs from cluster_dir/log before starting. # c.Global.clean_logs = True + +# A list of Python statements that will be run before starting the +# controller. This is provided because occasionally certain things need to +# be imported in the controller for pickling to work. # c.Global.import_statements = ['import math'] + +# Reuse the controller's FURL files. If False, FURL files are regenerated +# each time the controller is run. If True, they will be reused, *but*, you +# also must set the network ports by hand. If set, this will override the +# values set for the client and engine connections below. # c.Global.reuse_furls = True + +# Enable SSL encryption on all connections to the controller. If set, this +# will override the values set for the client and engine connections below. # c.Global.secure = True #----------------------------------------------------------------------------- @@ -18,13 +40,67 @@ c = get_config() #----------------------------------------------------------------------------- # Basic client service config attributes + +# The network interface the controller will listen on for client connections. +# This should be an IP address or hostname of the controller's host. The empty +# string means listen on all interfaces. # c.FCClientServiceFactory.ip = '' + +# The TCP/IP port the controller will listen on for client connections. If 0 +# a random port will be used. If the controller's host has a firewall running +# it must allow incoming traffic on this port. # c.FCClientServiceFactory.port = 0 + +# The client learns how to connect to the controller by looking at the +# location field embedded in the FURL. If this field is empty, all network +# interfaces that the controller is listening on will be listed. To have the +# client connect on a particular interface, list it here. # c.FCClientServiceFactory.location = '' + +# Use SSL encryption for the client connection. # c.FCClientServiceFactory.secure = True + +# Reuse the client FURL each time the controller is started. If set, you must +# also pick a specific network port above (FCClientServiceFactory.port). # c.FCClientServiceFactory.reuse_furls = False -# You shouldn't have to modify the rest of this section +#----------------------------------------------------------------------------- +# Configure the engine services +#----------------------------------------------------------------------------- + +# Basic config attributes for the engine services. + +# The network interface the controller will listen on for engine connections. +# This should be an IP address or hostname of the controller's host. The empty +# string means listen on all interfaces. +# c.FCEngineServiceFactory.ip = '' + +# The TCP/IP port the controller will listen on for engine connections. If 0 +# a random port will be used. If the controller's host has a firewall running +# it must allow incoming traffic on this port. +# c.FCEngineServiceFactory.port = 0 + +# The engine learns how to connect to the controller by looking at the +# location field embedded in the FURL. If this field is empty, all network +# interfaces that the controller is listening on will be listed. To have the +# client connect on a particular interface, list it here. +# c.FCEngineServiceFactory.location = '' + +# Use SSL encryption for the engine connection. +# c.FCEngineServiceFactory.secure = True + +# Reuse the client FURL each time the controller is started. If set, you must +# also pick a specific network port above (FCClientServiceFactory.port). +# c.FCEngineServiceFactory.reuse_furls = False + +#----------------------------------------------------------------------------- +# Developer level configuration attributes +#----------------------------------------------------------------------------- + +# You shouldn't have to modify anything in this section. These attributes +# are more for developers who want to change the behavior of the controller +# at a fundamental level. + # c.FCClientServiceFactory.cert_file = 'ipcontroller-client.pem' # default_client_interfaces = Config() @@ -44,18 +120,6 @@ c = get_config() # # c.FCEngineServiceFactory.interfaces = default_client_interfaces -#----------------------------------------------------------------------------- -# Configure the engine services -#----------------------------------------------------------------------------- - -# Basic config attributes for the engine services -# c.FCEngineServiceFactory.ip = '' -# c.FCEngineServiceFactory.port = 0 -# c.FCEngineServiceFactory.location = '' -# c.FCEngineServiceFactory.secure = True -# c.FCEngineServiceFactory.reuse_furls = False - -# You shouldn't have to modify the rest of this section # c.FCEngineServiceFactory.cert_file = 'ipcontroller-engine.pem' # default_engine_interfaces = Config() diff --git a/IPython/config/default/ipengine_config.py b/IPython/config/default/ipengine_config.py index 9595e98..7c7aa06 100644 --- a/IPython/config/default/ipengine_config.py +++ b/IPython/config/default/ipengine_config.py @@ -1,23 +1,61 @@ c = get_config() +#----------------------------------------------------------------------------- +# Global configuration +#----------------------------------------------------------------------------- + +# Start up messages are logged to stdout using the logging module. +# These all happen before the twisted reactor is started and are +# useful for debugging purposes. Can be (10=DEBUG,20=INFO,30=WARN,40=CRITICAL) +# and smaller is more verbose. +# c.Global.log_level = 20 + +# Log to a file in cluster_dir/log, otherwise just log to sys.stdout. # c.Global.log_to_file = False -# c.Global.clean_logs = False + +# Remove old logs from cluster_dir/log before starting. +# c.Global.clean_logs = True + +# A list of strings that will be executed in the users namespace on the engine +# before it connects to the controller. # c.Global.exec_lines = ['import numpy'] -# c.Global.log_level = 10 -# c.Global.shell_class = 'IPython.kernel.core.interpreter.Interpreter' -# c.Global.furl_file_name = 'ipcontroller-engine.furl' -# c.Global.furl_file = '' -# The max number of connection attemps and the initial delay between + +# The engine will try to connect to the controller multiple times, to allow +# the controller time to startup and write its FURL file. These parameters +# control the number of retries (connect_max_tries) and the initial delay +# (connect_delay) between attemps. The actual delay between attempts gets +# longer each time by a factor of 1.5 (delay[i] = 1.5*delay[i-1]) # those attemps. # c.Global.connect_delay = 0.1 # c.Global.connect_max_tries = 15 +# By default, the engine will look for the controller's FURL file in its own +# cluster directory. Sometimes, the FURL file will be elsewhere and this +# attribute can be set to the full path of the FURL file. +# c.Global.furl_file = '' + +#----------------------------------------------------------------------------- +# MPI configuration +#----------------------------------------------------------------------------- +# Upon starting the engine can be configured to call MPI_Init. This section +# configures that. + +# Select which MPI section to execute to setup MPI. The value of this +# attribute must match the name of another attribute in the MPI config +# section (mpi4py, pytrilinos, etc.). This can also be set by the --mpi +# command line option. # c.MPI.use = '' + +# Initialize MPI using mpi4py. To use this, set c.MPI.use = 'mpi4py' to use +# --mpi=mpi4py at the command line. # c.MPI.mpi4py = """from mpi4py import MPI as mpi # mpi.size = mpi.COMM_WORLD.Get_size() # mpi.rank = mpi.COMM_WORLD.Get_rank() # """ + +# Initialize MPI using pytrilinos. To use this, set c.MPI.use = 'pytrilinos' +# to use --mpi=pytrilinos at the command line. # c.MPI.pytrilinos = """from PyTrilinos import Epetra # class SimpleStruct: # pass @@ -26,3 +64,23 @@ c = get_config() # mpi.size = 0 # """ +#----------------------------------------------------------------------------- +# Developer level configuration attributes +#----------------------------------------------------------------------------- + +# You shouldn't have to modify anything in this section. These attributes +# are more for developers who want to change the behavior of the controller +# at a fundamental level. + +# You should not have to change these attributes. + +# c.Global.shell_class = 'IPython.kernel.core.interpreter.Interpreter' + +# c.Global.furl_file_name = 'ipcontroller-engine.furl' + + + + + + + diff --git a/IPython/kernel/clientconnector.py b/IPython/kernel/clientconnector.py index c440017..3152db3 100644 --- a/IPython/kernel/clientconnector.py +++ b/IPython/kernel/clientconnector.py @@ -156,6 +156,11 @@ class AsyncClientConnector(object): ipythondir : str The location of the ipythondir if different from the default. This is used if the cluster directory is being found by profile. + delay : float + The initial delay between re-connection attempts. Susequent delays + get longer according to ``delay[i] = 1.5*delay[i-1]``. + max_tries : int + The max number of re-connection attempts. Returns ------- @@ -193,7 +198,12 @@ class AsyncClientConnector(object): ipythondir : str The location of the ipythondir if different from the default. This is used if the cluster directory is being found by profile. - + delay : float + The initial delay between re-connection attempts. Susequent delays + get longer according to ``delay[i] = 1.5*delay[i-1]``. + max_tries : int + The max number of re-connection attempts. + Returns ------- A deferred to the actual client class. @@ -233,6 +243,11 @@ class AsyncClientConnector(object): ipythondir : str The location of the ipythondir if different from the default. This is used if the cluster directory is being found by profile. + delay : float + The initial delay between re-connection attempts. Susequent delays + get longer according to ``delay[i] = 1.5*delay[i-1]``. + max_tries : int + The max number of re-connection attempts. Returns ------- @@ -332,6 +347,11 @@ class ClientConnector(object): ipythondir : str The location of the ipythondir if different from the default. This is used if the cluster directory is being found by profile. + delay : float + The initial delay between re-connection attempts. Susequent delays + get longer according to ``delay[i] = 1.5*delay[i-1]``. + max_tries : int + The max number of re-connection attempts. Returns ------- @@ -368,6 +388,11 @@ class ClientConnector(object): ipythondir : str The location of the ipythondir if different from the default. This is used if the cluster directory is being found by profile. + delay : float + The initial delay between re-connection attempts. Susequent delays + get longer according to ``delay[i] = 1.5*delay[i-1]``. + max_tries : int + The max number of re-connection attempts. Returns ------- @@ -640,8 +665,22 @@ class Cluster(object): def get_multiengine_client(self, delay=DELAY, max_tries=MAX_TRIES): """Get the multiengine client for the running cluster. - If this fails, it means that the cluster has not finished starting. - Usually waiting a few seconds are re-trying will solve this. + This will try to attempt to the controller multiple times. If this + fails altogether, try looking at the following: + * Make sure the controller is starting properly by looking at its + log files. + * Make sure the controller is writing its FURL file in the location + expected by the client. + * Make sure a firewall on the controller's host is not blocking the + client from connecting. + + Parameters + ---------- + delay : float + The initial delay between re-connection attempts. Susequent delays + get longer according to ``delay[i] = 1.5*delay[i-1]``. + max_tries : int + The max number of re-connection attempts. """ if self.client_connector is None: self.client_connector = ClientConnector() @@ -653,8 +692,22 @@ class Cluster(object): def get_task_client(self, delay=DELAY, max_tries=MAX_TRIES): """Get the task client for the running cluster. - If this fails, it means that the cluster has not finished starting. - Usually waiting a few seconds are re-trying will solve this. + This will try to attempt to the controller multiple times. If this + fails altogether, try looking at the following: + * Make sure the controller is starting properly by looking at its + log files. + * Make sure the controller is writing its FURL file in the location + expected by the client. + * Make sure a firewall on the controller's host is not blocking the + client from connecting. + + Parameters + ---------- + delay : float + The initial delay between re-connection attempts. Susequent delays + get longer according to ``delay[i] = 1.5*delay[i-1]``. + max_tries : int + The max number of re-connection attempts. """ if self.client_connector is None: self.client_connector = ClientConnector() diff --git a/IPython/kernel/launcher.py b/IPython/kernel/launcher.py index 0e7d536..8855795 100644 --- a/IPython/kernel/launcher.py +++ b/IPython/kernel/launcher.py @@ -52,6 +52,8 @@ class UnknownStatus(LauncherError): class BaseLauncher(Component): """An asbtraction for starting, stopping and signaling a process.""" + # A directory for files related to the process. But, we don't cd to + # this directory, working_dir = Unicode(u'') def __init__(self, working_dir, parent=None, name=None, config=None): @@ -64,11 +66,18 @@ class BaseLauncher(Component): @property def args(self): - """A list of cmd and args that will be used to start the process.""" + """A list of cmd and args that will be used to start the process. + + This is what is passed to :func:`spawnProcess` and the first element + will be the process name. + """ return self.find_args() def find_args(self): - """The ``.args`` property calls this to find the args list.""" + """The ``.args`` property calls this to find the args list. + + Subcommand should implement this to construct the cmd and args. + """ raise NotImplementedError('find_args must be implemented in a subclass') @property @@ -78,6 +87,7 @@ class BaseLauncher(Component): @property def running(self): + """Am I running.""" if self.state == 'running': return True else: @@ -87,7 +97,7 @@ class BaseLauncher(Component): """Start the process. This must return a deferred that fires with information about the - process starting (like a pid, job id, etc.) + process starting (like a pid, job id, etc.). """ return defer.fail( Failure(NotImplementedError( @@ -96,12 +106,13 @@ class BaseLauncher(Component): ) def stop(self): - """Stop the process and notify observers of ProcessStopped. + """Stop the process and notify observers of stopping. - This must return a deferred that fires with any errors that occur - while the process is attempting to be shut down. This deferred - won't fire when the process actually stops. These events are - handled by calling :func:`observe_stop`. + This must return a deferred that fires with information about the + processing stopping, like errors that occur while the process is + attempting to be shut down. This deferred won't fire when the process + actually stops. To observe the actual process stopping, see + :func:`observe_stop`. """ return defer.fail( Failure(NotImplementedError( @@ -123,9 +134,9 @@ class BaseLauncher(Component): return d def notify_start(self, data): - """Call this to tigger startup actions. + """Call this to trigger startup actions. - This logs the process startup and sets the state to running. It is + This logs the process startup and sets the state to 'running'. It is a pass-through so it can be used as a callback. """ @@ -135,7 +146,10 @@ class BaseLauncher(Component): return data def notify_stop(self, data): - """Call this to trigger all the deferreds from :func:`observe_stop`.""" + """Call this to trigger process stop actions. + + This logs the process stopping and sets the state to 'after'. Call + this to trigger all the deferreds from :func:`observe_stop`.""" log.msg('Process %r stopped: %r' % (self.args[0], data)) self.stop_data = data @@ -205,6 +219,8 @@ class LocalProcessLauncherProtocol(ProcessProtocol): class LocalProcessLauncher(BaseLauncher): """Start and stop an external process in an asynchronous manner.""" + # This is used to to construct self.args, which is passed to + # spawnProcess. cmd_and_args = List([]) def __init__(self, working_dir, parent=None, name=None, config=None): @@ -246,24 +262,33 @@ class LocalProcessLauncher(BaseLauncher): @inlineCallbacks def interrupt_then_kill(self, delay=2.0): + """Send INT, wait a delay and then send KILL.""" yield self.signal('INT') yield sleep_deferred(delay) yield self.signal('KILL') class MPIExecLauncher(LocalProcessLauncher): + """Launch an external process using mpiexec.""" + # The mpiexec command to use in starting the process. mpi_cmd = List(['mpiexec'], config=True) + # The command line arguments to pass to mpiexec. mpi_args = List([], config=True) + # The program to start using mpiexec. program = List(['date'], config=True) + # The command line argument to the program. program_args = List([], config=True) + # The number of instances of the program to start. n = Int(1, config=True) def find_args(self): + """Build self.args using all the fields.""" return self.mpi_cmd + ['-n', self.n] + self.mpi_args + \ self.program + self.program_args def start(self, n): + """Start n instances of the program using mpiexec.""" self.n = n return super(MPIExecLauncher, self).start() @@ -307,13 +332,32 @@ class WindowsHPCLauncher(BaseLauncher): class BatchSystemLauncher(BaseLauncher): + """Launch an external process using a batch system. + + This class is designed to work with UNIX batch systems like PBS, LSF, + GridEngine, etc. The overall model is that there are different commands + like qsub, qdel, etc. that handle the starting and stopping of the process. + + This class also has the notion of a batch script. The ``batch_template`` + attribute can be set to a string that is a template for the batch script. + This template is instantiated using Itpl. Thus the template can use + ${n} fot the number of instances. Subclasses can add additional variables + to the template dict. + """ # Subclasses must fill these in. See PBSEngineSet + # The name of the command line program used to submit jobs. submit_command = Str('', config=True) + # The name of the command line program used to delete jobs. delete_command = Str('', config=True) + # A regular expression used to get the job id from the output of the + # submit_command. job_id_regexp = Str('', config=True) + # The string that is the batch script template itself. batch_template = Str('', config=True) + # The filename of the instantiated batch script. batch_file_name = Unicode(u'batch_script', config=True) + # The full path to the instantiated batch script. batch_file = Unicode(u'') def __init__(self, working_dir, parent=None, name=None, config=None): @@ -324,6 +368,7 @@ class BatchSystemLauncher(BaseLauncher): self.context = {} def parse_job_id(self, output): + """Take the output of the submit command and return the job id.""" m = re.match(self.job_id_regexp, output) if m is not None: job_id = m.group() @@ -334,6 +379,7 @@ class BatchSystemLauncher(BaseLauncher): return job_id def write_batch_script(self, n): + """Instantiate and write the batch script to the working_dir.""" self.context['n'] = n script_as_string = Itpl.itplns(self.batch_template, self.context) log.msg('Writing instantiated batch script: %s' % self.batch_file) @@ -361,6 +407,7 @@ class BatchSystemLauncher(BaseLauncher): class PBSLauncher(BatchSystemLauncher): + """A BatchSystemLauncher subclass for PBS.""" submit_command = Str('qsub', config=True) delete_command = Str('qdel', config=True) @@ -375,6 +422,7 @@ class PBSLauncher(BatchSystemLauncher): #----------------------------------------------------------------------------- def find_controller_cmd(): + """Find the command line ipcontroller program in a cross platform way.""" if sys.platform == 'win32': # This logic is needed because the ipcontroller script doesn't # always get installed in the same way or in the same location. @@ -392,14 +440,17 @@ def find_controller_cmd(): class LocalControllerLauncher(LocalProcessLauncher): + """Launch a controller as a regular external process.""" controller_cmd = List(find_controller_cmd()) + # Command line arguments to ipcontroller. controller_args = List(['--log-to-file','--log-level', '40'], config=True) def find_args(self): return self.controller_cmd + self.controller_args def start(self, profile=None, cluster_dir=None): + """Start the controller by profile or cluster_dir.""" if cluster_dir is not None: self.controller_args.extend(['--cluster-dir', cluster_dir]) if profile is not None: @@ -413,12 +464,15 @@ class WindowsHPCControllerLauncher(WindowsHPCLauncher): class MPIExecControllerLauncher(MPIExecLauncher): + """Launch a controller using mpiexec.""" controller_cmd = List(find_controller_cmd(), config=False) + # Command line arguments to ipcontroller. controller_args = List(['--log-to-file','--log-level', '40'], config=True) n = Int(1, config=False) def start(self, profile=None, cluster_dir=None): + """Start the controller by profile or cluster_dir.""" if cluster_dir is not None: self.controller_args.extend(['--cluster-dir', cluster_dir]) if profile is not None: @@ -426,15 +480,18 @@ class MPIExecControllerLauncher(MPIExecLauncher): log.msg("Starting MPIExecControllerLauncher: %r" % self.args) return super(MPIExecControllerLauncher, self).start(1) - def find_args(self): return self.mpi_cmd + ['-n', self.n] + self.mpi_args + \ self.controller_cmd + self.controller_args class PBSControllerLauncher(PBSLauncher): + """Launch a controller using PBS.""" + + batch_file_name = Unicode(u'pbs_batch_script_controller', config=True) def start(self, profile=None, cluster_dir=None): + """Start the controller by profile or cluster_dir.""" # Here we save profile and cluster_dir in the context so they # can be used in the batch script template as ${profile} and # ${cluster_dir} @@ -456,6 +513,7 @@ class SSHControllerLauncher(SSHLauncher): def find_engine_cmd(): + """Find the command line ipengine program in a cross platform way.""" if sys.platform == 'win32': # This logic is needed because the ipengine script doesn't # always get installed in the same way or in the same location. @@ -473,8 +531,10 @@ def find_engine_cmd(): class LocalEngineLauncher(LocalProcessLauncher): + """Launch a single engine as a regular externall process.""" engine_cmd = List(find_engine_cmd()) + # Command line arguments for ipengine. engine_args = List( ['--log-to-file','--log-level', '40'], config=True ) @@ -483,6 +543,7 @@ class LocalEngineLauncher(LocalProcessLauncher): return self.engine_cmd + self.engine_args def start(self, profile=None, cluster_dir=None): + """Start the engine by profile or cluster_dir.""" if cluster_dir is not None: self.engine_args.extend(['--cluster-dir', cluster_dir]) if profile is not None: @@ -491,7 +552,9 @@ class LocalEngineLauncher(LocalProcessLauncher): class LocalEngineSetLauncher(BaseLauncher): + """Launch a set of engines as regular external processes.""" + # Command line arguments for ipengine. engine_args = List( ['--log-to-file','--log-level', '40'], config=True ) @@ -503,6 +566,7 @@ class LocalEngineSetLauncher(BaseLauncher): self.launchers = [] def start(self, n, profile=None, cluster_dir=None): + """Start n engines by profile or cluster_dir.""" dlist = [] for i in range(n): el = LocalEngineLauncher(self.working_dir, self) @@ -551,12 +615,14 @@ class LocalEngineSetLauncher(BaseLauncher): class MPIExecEngineSetLauncher(MPIExecLauncher): engine_cmd = List(find_engine_cmd(), config=False) + # Command line arguments for ipengine. engine_args = List( ['--log-to-file','--log-level', '40'], config=True ) n = Int(1, config=True) def start(self, n, profile=None, cluster_dir=None): + """Start n engines by profile or cluster_dir.""" if cluster_dir is not None: self.engine_args.extend(['--cluster-dir', cluster_dir]) if profile is not None: @@ -575,7 +641,10 @@ class WindowsHPCEngineSetLauncher(WindowsHPCLauncher): class PBSEngineSetLauncher(PBSLauncher): + batch_file_name = Unicode(u'pbs_batch_script_engines', config=True) + def start(self, n, profile=None, cluster_dir=None): + """Start n engines by profile or cluster_dir.""" if cluster_dir is not None: self.program_args.extend(['--cluster-dir', cluster_dir]) if profile is not None: @@ -594,6 +663,7 @@ class SSHEngineSetLauncher(BaseLauncher): def find_ipcluster_cmd(): + """Find the command line ipcluster program in a cross platform way.""" if sys.platform == 'win32': # This logic is needed because the ipcluster script doesn't # always get installed in the same way or in the same location. @@ -611,8 +681,10 @@ def find_ipcluster_cmd(): class IPClusterLauncher(LocalProcessLauncher): + """Launch the ipcluster program in an external process.""" ipcluster_cmd = List(find_ipcluster_cmd()) + # Command line arguments to pass to ipcluster. ipcluster_args = List( ['--clean-logs', '--log-to-file', '--log-level', '40'], config=True) ipcluster_subcommand = Str('start')