##// END OF EJS Templates
Work on default config files and docstrings....
Brian Granger -
Show More
@@ -2,66 +2,154 b' import os'
2 2
3 3 c = get_config()
4 4
5 # Options are:
6 # * LocalControllerLauncher
7 # * PBSControllerLauncher
5 #-----------------------------------------------------------------------------
6 # Select which launchers to use
7 #-----------------------------------------------------------------------------
8
9 # This allows you to control what method is used to start the controller
10 # and engines. The following methods are currently supported:
11 # * Start as a regular process on localhost.
12 # * Start using mpiexec.
13 # * Start using PBS
14 # * Start using SSH (currently broken)
15
16 # The selected launchers can be configured below.
17
18 # Options are (LocalControllerLauncher, MPIExecControllerLauncher,
19 # PBSControllerLauncher)
8 20 # c.Global.controller_launcher = 'IPython.kernel.launcher.LocalControllerLauncher'
9 21
10 # Options are:
11 # * LocalEngineSetLauncher
12 # * MPIExecEngineSetLauncher
13 # * PBSEngineSetLauncher
22 # Options are (LocalEngineSetLauncher, MPIExecEngineSetLauncher,
23 # PBSEngineSetLauncher)
14 24 # c.Global.engine_launcher = 'IPython.kernel.launcher.LocalEngineSetLauncher'
15 25
16 # c.Global.log_to_file = False
26 #-----------------------------------------------------------------------------
27 # Global configuration
28 #-----------------------------------------------------------------------------
29
30 # The default number of engine that will be started. This is overridden by
31 # the -n command line option: "ipcluster start -n 4"
17 32 # c.Global.n = 2
18 # c.Global.reset_config = False
19 # c.Global.clean_logs = True
20 33
21 # c.MPIExecLauncher.mpi_cmd = ['mpiexec']
22 # c.MPIExecLauncher.mpi_args = []
23 # c.MPIExecLauncher.program = []
24 # c.MPIExecLauncher.program_args = []
25 # c.MPIExecLauncher.n = 1
34 # Log to a file in cluster_dir/log, otherwise just log to sys.stdout.
35 # c.Global.log_to_file = False
26 36
27 # c.SSHLauncher.ssh_cmd = ['ssh']
28 # c.SSHLauncher.ssh_args = []
29 # c.SSHLauncher.program = []
30 # s.SSHLauncher.program_args = []
31 # c.SSHLauncher.hostname = ''
32 # c.SSHLauncher.user = os.environ['USER']
37 # Remove old logs from cluster_dir/log before starting.
38 # c.Global.clean_logs = True
33 39
34 # c.PBSLauncher.submit_command = 'qsub'
35 # c.PBSLauncher.delete_command = 'qdel'
36 # c.PBSLauncher.job_id_regexp = '\d+'
37 # c.PBSLauncher.batch_template = """"""
38 # c.PBSLauncher.batch_file_name = u'pbs_batch_script'
40 #-----------------------------------------------------------------------------
41 # Controller launcher configuration
42 #-----------------------------------------------------------------------------
39 43
40 # c.LocalControllerLauncher.controller_args = []
44 # Configure how the controller is started. The configuration of the controller
45 # can also bet setup by editing the controller config file:
46 # ipcontroller_config.py
41 47
48 # The command line arguments to call the controller with.
49 # c.LocalControllerLauncher.controller_args = \
50 # ['--log-to-file','--log-level', '40']
51
52 # The mpiexec/mpirun command to use in started the controller.
42 53 # c.MPIExecControllerLauncher.mpi_cmd = ['mpiexec']
54
55 # Additional arguments to pass to the actual mpiexec command.
43 56 # c.MPIExecControllerLauncher.mpi_args = []
44 # c.MPIExecControllerLauncher.controller_args = []
45 # c.MPIExecControllerLauncher.n = 1
46 57
58 # The command line argument to call the controller with.
59 # c.MPIExecControllerLauncher.controller_args = \
60 # ['--log-to-file','--log-level', '40']
61
62 # The command line program to use to submit a PBS job.
47 63 # c.PBSControllerLauncher.submit_command = 'qsub'
64
65 # The command line program to use to delete a PBS job.
48 66 # c.PBSControllerLauncher.delete_command = 'qdel'
67
68 # A regular expression that takes the output of qsub and find the job id.
49 69 # c.PBSControllerLauncher.job_id_regexp = '\d+'
70
71 # The batch submission script used to start the controller. This is where
72 # environment variables would be setup, etc. This string is interpolated using
73 # the Itpl module in IPython.external. Basically, you can use ${profile} for
74 # the controller profile or ${cluster_dir} for the cluster_dir.
50 75 # c.PBSControllerLauncher.batch_template = """"""
51 # c.PBSLauncher.batch_file_name = u'pbs_batch_script'
52 76
53 # c.LocalEngineLauncher.engine_args = []
77 # The name of the instantiated batch script that will actually be used to
78 # submit the job. This will be written to the cluster directory.
79 # c.PBSControllerLauncher.batch_file_name = u'pbs_batch_script_controller'
80
81 #-----------------------------------------------------------------------------
82 # Engine launcher configuration
83 #-----------------------------------------------------------------------------
54 84
55 # c.LocalEngineSetLauncher.engine_args = []
85 # Command line argument passed to the engines.
86 # c.LocalEngineSetLauncher.engine_args = ['--log-to-file','--log-level', '40']
56 87
88 # The mpiexec/mpirun command to use in started the controller.
57 89 # c.MPIExecEngineSetLauncher.mpi_cmd = ['mpiexec']
90
91 # Additional arguments to pass to the actual mpiexec command.
58 92 # c.MPIExecEngineSetLauncher.mpi_args = []
59 # c.MPIExecEngineSetLauncher.controller_args = []
93
94 # Command line argument passed to the engines.
95 # c.MPIExecEngineSetLauncher.engine_args = ['--log-to-file','--log-level', '40']
96
97 # The default number of engines to start if not given elsewhere.
60 98 # c.MPIExecEngineSetLauncher.n = 1
61 99
100 # The command line program to use to submit a PBS job.
62 101 # c.PBSEngineSetLauncher.submit_command = 'qsub'
102
103 # The command line program to use to delete a PBS job.
63 104 # c.PBSEngineSetLauncher.delete_command = 'qdel'
105
106 # A regular expression that takes the output of qsub and find the job id.
64 107 # c.PBSEngineSetLauncher.job_id_regexp = '\d+'
108
109 # The batch submission script used to start the engines. This is where
110 # environment variables would be setup, etc. This string is interpolated using
111 # the Itpl module in IPython.external. Basically, you can use ${n} for the
112 # number of engine, ${profile} or the engine profile and ${cluster_dir}
113 # for the cluster_dir.
65 114 # c.PBSEngineSetLauncher.batch_template = """"""
66 # c.PBSEngineSetLauncher.batch_file_name = u'pbs_batch_script'
115
116 # The name of the instantiated batch script that will actually be used to
117 # submit the job. This will be written to the cluster directory.
118 # c.PBSEngineSetLauncher.batch_file_name = u'pbs_batch_script_engines'
119
120 #-----------------------------------------------------------------------------
121 # Base launcher configuration
122 #-----------------------------------------------------------------------------
123
124 # The various launchers are organized into an inheritance hierarchy.
125 # The configurations can also be iherited and the following attributes
126 # allow you to configure the base classes.
127
128 # c.MPIExecLauncher.mpi_cmd = ['mpiexec']
129 # c.MPIExecLauncher.mpi_args = []
130 # c.MPIExecLauncher.program = []
131 # c.MPIExecLauncher.program_args = []
132 # c.MPIExecLauncher.n = 1
133
134 # c.SSHLauncher.ssh_cmd = ['ssh']
135 # c.SSHLauncher.ssh_args = []
136 # c.SSHLauncher.program = []
137 # s.SSHLauncher.program_args = []
138 # c.SSHLauncher.hostname = ''
139 # c.SSHLauncher.user = os.environ['USER']
140
141 # c.BatchSystemLauncher.submit_command
142 # c.BatchSystemLauncher.delete_command
143 # c.BatchSystemLauncher.job_id_regexp
144 # c.BatchSystemLauncher.batch_template
145 # c.BatchSystemLauncher.batch_file_name
146
147 # c.PBSLauncher.submit_command = 'qsub'
148 # c.PBSLauncher.delete_command = 'qdel'
149 # c.PBSLauncher.job_id_regexp = '\d+'
150 # c.PBSLauncher.batch_template = """"""
151 # c.PBSLauncher.batch_file_name = u'pbs_batch_script'
152
153
154
67 155
@@ -7,10 +7,32 b' c = get_config()'
7 7 #-----------------------------------------------------------------------------
8 8
9 9 # Basic Global config attributes
10
11 # Start up messages are logged to stdout using the logging module.
12 # These all happen before the twisted reactor is started and are
13 # useful for debugging purposes. Can be (10=DEBUG,20=INFO,30=WARN,40=CRITICAL)
14 # and smaller is more verbose.
15 # c.Global.log_level = 20
16
17 # Log to a file in cluster_dir/log, otherwise just log to sys.stdout.
10 18 # c.Global.log_to_file = False
19
20 # Remove old logs from cluster_dir/log before starting.
11 21 # c.Global.clean_logs = True
22
23 # A list of Python statements that will be run before starting the
24 # controller. This is provided because occasionally certain things need to
25 # be imported in the controller for pickling to work.
12 26 # c.Global.import_statements = ['import math']
27
28 # Reuse the controller's FURL files. If False, FURL files are regenerated
29 # each time the controller is run. If True, they will be reused, *but*, you
30 # also must set the network ports by hand. If set, this will override the
31 # values set for the client and engine connections below.
13 32 # c.Global.reuse_furls = True
33
34 # Enable SSL encryption on all connections to the controller. If set, this
35 # will override the values set for the client and engine connections below.
14 36 # c.Global.secure = True
15 37
16 38 #-----------------------------------------------------------------------------
@@ -18,13 +40,67 b' c = get_config()'
18 40 #-----------------------------------------------------------------------------
19 41
20 42 # Basic client service config attributes
43
44 # The network interface the controller will listen on for client connections.
45 # This should be an IP address or hostname of the controller's host. The empty
46 # string means listen on all interfaces.
21 47 # c.FCClientServiceFactory.ip = ''
48
49 # The TCP/IP port the controller will listen on for client connections. If 0
50 # a random port will be used. If the controller's host has a firewall running
51 # it must allow incoming traffic on this port.
22 52 # c.FCClientServiceFactory.port = 0
53
54 # The client learns how to connect to the controller by looking at the
55 # location field embedded in the FURL. If this field is empty, all network
56 # interfaces that the controller is listening on will be listed. To have the
57 # client connect on a particular interface, list it here.
23 58 # c.FCClientServiceFactory.location = ''
59
60 # Use SSL encryption for the client connection.
24 61 # c.FCClientServiceFactory.secure = True
62
63 # Reuse the client FURL each time the controller is started. If set, you must
64 # also pick a specific network port above (FCClientServiceFactory.port).
25 65 # c.FCClientServiceFactory.reuse_furls = False
26 66
27 # You shouldn't have to modify the rest of this section
67 #-----------------------------------------------------------------------------
68 # Configure the engine services
69 #-----------------------------------------------------------------------------
70
71 # Basic config attributes for the engine services.
72
73 # The network interface the controller will listen on for engine connections.
74 # This should be an IP address or hostname of the controller's host. The empty
75 # string means listen on all interfaces.
76 # c.FCEngineServiceFactory.ip = ''
77
78 # The TCP/IP port the controller will listen on for engine connections. If 0
79 # a random port will be used. If the controller's host has a firewall running
80 # it must allow incoming traffic on this port.
81 # c.FCEngineServiceFactory.port = 0
82
83 # The engine learns how to connect to the controller by looking at the
84 # location field embedded in the FURL. If this field is empty, all network
85 # interfaces that the controller is listening on will be listed. To have the
86 # client connect on a particular interface, list it here.
87 # c.FCEngineServiceFactory.location = ''
88
89 # Use SSL encryption for the engine connection.
90 # c.FCEngineServiceFactory.secure = True
91
92 # Reuse the client FURL each time the controller is started. If set, you must
93 # also pick a specific network port above (FCClientServiceFactory.port).
94 # c.FCEngineServiceFactory.reuse_furls = False
95
96 #-----------------------------------------------------------------------------
97 # Developer level configuration attributes
98 #-----------------------------------------------------------------------------
99
100 # You shouldn't have to modify anything in this section. These attributes
101 # are more for developers who want to change the behavior of the controller
102 # at a fundamental level.
103
28 104 # c.FCClientServiceFactory.cert_file = 'ipcontroller-client.pem'
29 105
30 106 # default_client_interfaces = Config()
@@ -44,18 +120,6 b' c = get_config()'
44 120 #
45 121 # c.FCEngineServiceFactory.interfaces = default_client_interfaces
46 122
47 #-----------------------------------------------------------------------------
48 # Configure the engine services
49 #-----------------------------------------------------------------------------
50
51 # Basic config attributes for the engine services
52 # c.FCEngineServiceFactory.ip = ''
53 # c.FCEngineServiceFactory.port = 0
54 # c.FCEngineServiceFactory.location = ''
55 # c.FCEngineServiceFactory.secure = True
56 # c.FCEngineServiceFactory.reuse_furls = False
57
58 # You shouldn't have to modify the rest of this section
59 123 # c.FCEngineServiceFactory.cert_file = 'ipcontroller-engine.pem'
60 124
61 125 # default_engine_interfaces = Config()
@@ -1,23 +1,61 b''
1 1 c = get_config()
2 2
3 #-----------------------------------------------------------------------------
4 # Global configuration
5 #-----------------------------------------------------------------------------
6
7 # Start up messages are logged to stdout using the logging module.
8 # These all happen before the twisted reactor is started and are
9 # useful for debugging purposes. Can be (10=DEBUG,20=INFO,30=WARN,40=CRITICAL)
10 # and smaller is more verbose.
11 # c.Global.log_level = 20
12
13 # Log to a file in cluster_dir/log, otherwise just log to sys.stdout.
3 14 # c.Global.log_to_file = False
4 # c.Global.clean_logs = False
15
16 # Remove old logs from cluster_dir/log before starting.
17 # c.Global.clean_logs = True
18
19 # A list of strings that will be executed in the users namespace on the engine
20 # before it connects to the controller.
5 21 # c.Global.exec_lines = ['import numpy']
6 # c.Global.log_level = 10
7 # c.Global.shell_class = 'IPython.kernel.core.interpreter.Interpreter'
8 # c.Global.furl_file_name = 'ipcontroller-engine.furl'
9 # c.Global.furl_file = ''
10 # The max number of connection attemps and the initial delay between
22
23 # The engine will try to connect to the controller multiple times, to allow
24 # the controller time to startup and write its FURL file. These parameters
25 # control the number of retries (connect_max_tries) and the initial delay
26 # (connect_delay) between attemps. The actual delay between attempts gets
27 # longer each time by a factor of 1.5 (delay[i] = 1.5*delay[i-1])
11 28 # those attemps.
12 29 # c.Global.connect_delay = 0.1
13 30 # c.Global.connect_max_tries = 15
14 31
32 # By default, the engine will look for the controller's FURL file in its own
33 # cluster directory. Sometimes, the FURL file will be elsewhere and this
34 # attribute can be set to the full path of the FURL file.
35 # c.Global.furl_file = ''
36
37 #-----------------------------------------------------------------------------
38 # MPI configuration
39 #-----------------------------------------------------------------------------
15 40
41 # Upon starting the engine can be configured to call MPI_Init. This section
42 # configures that.
43
44 # Select which MPI section to execute to setup MPI. The value of this
45 # attribute must match the name of another attribute in the MPI config
46 # section (mpi4py, pytrilinos, etc.). This can also be set by the --mpi
47 # command line option.
16 48 # c.MPI.use = ''
49
50 # Initialize MPI using mpi4py. To use this, set c.MPI.use = 'mpi4py' to use
51 # --mpi=mpi4py at the command line.
17 52 # c.MPI.mpi4py = """from mpi4py import MPI as mpi
18 53 # mpi.size = mpi.COMM_WORLD.Get_size()
19 54 # mpi.rank = mpi.COMM_WORLD.Get_rank()
20 55 # """
56
57 # Initialize MPI using pytrilinos. To use this, set c.MPI.use = 'pytrilinos'
58 # to use --mpi=pytrilinos at the command line.
21 59 # c.MPI.pytrilinos = """from PyTrilinos import Epetra
22 60 # class SimpleStruct:
23 61 # pass
@@ -26,3 +64,23 b' c = get_config()'
26 64 # mpi.size = 0
27 65 # """
28 66
67 #-----------------------------------------------------------------------------
68 # Developer level configuration attributes
69 #-----------------------------------------------------------------------------
70
71 # You shouldn't have to modify anything in this section. These attributes
72 # are more for developers who want to change the behavior of the controller
73 # at a fundamental level.
74
75 # You should not have to change these attributes.
76
77 # c.Global.shell_class = 'IPython.kernel.core.interpreter.Interpreter'
78
79 # c.Global.furl_file_name = 'ipcontroller-engine.furl'
80
81
82
83
84
85
86
@@ -156,6 +156,11 b' class AsyncClientConnector(object):'
156 156 ipythondir : str
157 157 The location of the ipythondir if different from the default.
158 158 This is used if the cluster directory is being found by profile.
159 delay : float
160 The initial delay between re-connection attempts. Susequent delays
161 get longer according to ``delay[i] = 1.5*delay[i-1]``.
162 max_tries : int
163 The max number of re-connection attempts.
159 164
160 165 Returns
161 166 -------
@@ -193,7 +198,12 b' class AsyncClientConnector(object):'
193 198 ipythondir : str
194 199 The location of the ipythondir if different from the default.
195 200 This is used if the cluster directory is being found by profile.
196
201 delay : float
202 The initial delay between re-connection attempts. Susequent delays
203 get longer according to ``delay[i] = 1.5*delay[i-1]``.
204 max_tries : int
205 The max number of re-connection attempts.
206
197 207 Returns
198 208 -------
199 209 A deferred to the actual client class.
@@ -233,6 +243,11 b' class AsyncClientConnector(object):'
233 243 ipythondir : str
234 244 The location of the ipythondir if different from the default.
235 245 This is used if the cluster directory is being found by profile.
246 delay : float
247 The initial delay between re-connection attempts. Susequent delays
248 get longer according to ``delay[i] = 1.5*delay[i-1]``.
249 max_tries : int
250 The max number of re-connection attempts.
236 251
237 252 Returns
238 253 -------
@@ -332,6 +347,11 b' class ClientConnector(object):'
332 347 ipythondir : str
333 348 The location of the ipythondir if different from the default.
334 349 This is used if the cluster directory is being found by profile.
350 delay : float
351 The initial delay between re-connection attempts. Susequent delays
352 get longer according to ``delay[i] = 1.5*delay[i-1]``.
353 max_tries : int
354 The max number of re-connection attempts.
335 355
336 356 Returns
337 357 -------
@@ -368,6 +388,11 b' class ClientConnector(object):'
368 388 ipythondir : str
369 389 The location of the ipythondir if different from the default.
370 390 This is used if the cluster directory is being found by profile.
391 delay : float
392 The initial delay between re-connection attempts. Susequent delays
393 get longer according to ``delay[i] = 1.5*delay[i-1]``.
394 max_tries : int
395 The max number of re-connection attempts.
371 396
372 397 Returns
373 398 -------
@@ -640,8 +665,22 b' class Cluster(object):'
640 665 def get_multiengine_client(self, delay=DELAY, max_tries=MAX_TRIES):
641 666 """Get the multiengine client for the running cluster.
642 667
643 If this fails, it means that the cluster has not finished starting.
644 Usually waiting a few seconds are re-trying will solve this.
668 This will try to attempt to the controller multiple times. If this
669 fails altogether, try looking at the following:
670 * Make sure the controller is starting properly by looking at its
671 log files.
672 * Make sure the controller is writing its FURL file in the location
673 expected by the client.
674 * Make sure a firewall on the controller's host is not blocking the
675 client from connecting.
676
677 Parameters
678 ----------
679 delay : float
680 The initial delay between re-connection attempts. Susequent delays
681 get longer according to ``delay[i] = 1.5*delay[i-1]``.
682 max_tries : int
683 The max number of re-connection attempts.
645 684 """
646 685 if self.client_connector is None:
647 686 self.client_connector = ClientConnector()
@@ -653,8 +692,22 b' class Cluster(object):'
653 692 def get_task_client(self, delay=DELAY, max_tries=MAX_TRIES):
654 693 """Get the task client for the running cluster.
655 694
656 If this fails, it means that the cluster has not finished starting.
657 Usually waiting a few seconds are re-trying will solve this.
695 This will try to attempt to the controller multiple times. If this
696 fails altogether, try looking at the following:
697 * Make sure the controller is starting properly by looking at its
698 log files.
699 * Make sure the controller is writing its FURL file in the location
700 expected by the client.
701 * Make sure a firewall on the controller's host is not blocking the
702 client from connecting.
703
704 Parameters
705 ----------
706 delay : float
707 The initial delay between re-connection attempts. Susequent delays
708 get longer according to ``delay[i] = 1.5*delay[i-1]``.
709 max_tries : int
710 The max number of re-connection attempts.
658 711 """
659 712 if self.client_connector is None:
660 713 self.client_connector = ClientConnector()
@@ -52,6 +52,8 b' class UnknownStatus(LauncherError):'
52 52 class BaseLauncher(Component):
53 53 """An asbtraction for starting, stopping and signaling a process."""
54 54
55 # A directory for files related to the process. But, we don't cd to
56 # this directory,
55 57 working_dir = Unicode(u'')
56 58
57 59 def __init__(self, working_dir, parent=None, name=None, config=None):
@@ -64,11 +66,18 b' class BaseLauncher(Component):'
64 66
65 67 @property
66 68 def args(self):
67 """A list of cmd and args that will be used to start the process."""
69 """A list of cmd and args that will be used to start the process.
70
71 This is what is passed to :func:`spawnProcess` and the first element
72 will be the process name.
73 """
68 74 return self.find_args()
69 75
70 76 def find_args(self):
71 """The ``.args`` property calls this to find the args list."""
77 """The ``.args`` property calls this to find the args list.
78
79 Subcommand should implement this to construct the cmd and args.
80 """
72 81 raise NotImplementedError('find_args must be implemented in a subclass')
73 82
74 83 @property
@@ -78,6 +87,7 b' class BaseLauncher(Component):'
78 87
79 88 @property
80 89 def running(self):
90 """Am I running."""
81 91 if self.state == 'running':
82 92 return True
83 93 else:
@@ -87,7 +97,7 b' class BaseLauncher(Component):'
87 97 """Start the process.
88 98
89 99 This must return a deferred that fires with information about the
90 process starting (like a pid, job id, etc.)
100 process starting (like a pid, job id, etc.).
91 101 """
92 102 return defer.fail(
93 103 Failure(NotImplementedError(
@@ -96,12 +106,13 b' class BaseLauncher(Component):'
96 106 )
97 107
98 108 def stop(self):
99 """Stop the process and notify observers of ProcessStopped.
109 """Stop the process and notify observers of stopping.
100 110
101 This must return a deferred that fires with any errors that occur
102 while the process is attempting to be shut down. This deferred
103 won't fire when the process actually stops. These events are
104 handled by calling :func:`observe_stop`.
111 This must return a deferred that fires with information about the
112 processing stopping, like errors that occur while the process is
113 attempting to be shut down. This deferred won't fire when the process
114 actually stops. To observe the actual process stopping, see
115 :func:`observe_stop`.
105 116 """
106 117 return defer.fail(
107 118 Failure(NotImplementedError(
@@ -123,9 +134,9 b' class BaseLauncher(Component):'
123 134 return d
124 135
125 136 def notify_start(self, data):
126 """Call this to tigger startup actions.
137 """Call this to trigger startup actions.
127 138
128 This logs the process startup and sets the state to running. It is
139 This logs the process startup and sets the state to 'running'. It is
129 140 a pass-through so it can be used as a callback.
130 141 """
131 142
@@ -135,7 +146,10 b' class BaseLauncher(Component):'
135 146 return data
136 147
137 148 def notify_stop(self, data):
138 """Call this to trigger all the deferreds from :func:`observe_stop`."""
149 """Call this to trigger process stop actions.
150
151 This logs the process stopping and sets the state to 'after'. Call
152 this to trigger all the deferreds from :func:`observe_stop`."""
139 153
140 154 log.msg('Process %r stopped: %r' % (self.args[0], data))
141 155 self.stop_data = data
@@ -205,6 +219,8 b' class LocalProcessLauncherProtocol(ProcessProtocol):'
205 219 class LocalProcessLauncher(BaseLauncher):
206 220 """Start and stop an external process in an asynchronous manner."""
207 221
222 # This is used to to construct self.args, which is passed to
223 # spawnProcess.
208 224 cmd_and_args = List([])
209 225
210 226 def __init__(self, working_dir, parent=None, name=None, config=None):
@@ -246,24 +262,33 b' class LocalProcessLauncher(BaseLauncher):'
246 262
247 263 @inlineCallbacks
248 264 def interrupt_then_kill(self, delay=2.0):
265 """Send INT, wait a delay and then send KILL."""
249 266 yield self.signal('INT')
250 267 yield sleep_deferred(delay)
251 268 yield self.signal('KILL')
252 269
253 270
254 271 class MPIExecLauncher(LocalProcessLauncher):
272 """Launch an external process using mpiexec."""
255 273
274 # The mpiexec command to use in starting the process.
256 275 mpi_cmd = List(['mpiexec'], config=True)
276 # The command line arguments to pass to mpiexec.
257 277 mpi_args = List([], config=True)
278 # The program to start using mpiexec.
258 279 program = List(['date'], config=True)
280 # The command line argument to the program.
259 281 program_args = List([], config=True)
282 # The number of instances of the program to start.
260 283 n = Int(1, config=True)
261 284
262 285 def find_args(self):
286 """Build self.args using all the fields."""
263 287 return self.mpi_cmd + ['-n', self.n] + self.mpi_args + \
264 288 self.program + self.program_args
265 289
266 290 def start(self, n):
291 """Start n instances of the program using mpiexec."""
267 292 self.n = n
268 293 return super(MPIExecLauncher, self).start()
269 294
@@ -307,13 +332,32 b' class WindowsHPCLauncher(BaseLauncher):'
307 332
308 333
309 334 class BatchSystemLauncher(BaseLauncher):
335 """Launch an external process using a batch system.
336
337 This class is designed to work with UNIX batch systems like PBS, LSF,
338 GridEngine, etc. The overall model is that there are different commands
339 like qsub, qdel, etc. that handle the starting and stopping of the process.
340
341 This class also has the notion of a batch script. The ``batch_template``
342 attribute can be set to a string that is a template for the batch script.
343 This template is instantiated using Itpl. Thus the template can use
344 ${n} fot the number of instances. Subclasses can add additional variables
345 to the template dict.
346 """
310 347
311 348 # Subclasses must fill these in. See PBSEngineSet
349 # The name of the command line program used to submit jobs.
312 350 submit_command = Str('', config=True)
351 # The name of the command line program used to delete jobs.
313 352 delete_command = Str('', config=True)
353 # A regular expression used to get the job id from the output of the
354 # submit_command.
314 355 job_id_regexp = Str('', config=True)
356 # The string that is the batch script template itself.
315 357 batch_template = Str('', config=True)
358 # The filename of the instantiated batch script.
316 359 batch_file_name = Unicode(u'batch_script', config=True)
360 # The full path to the instantiated batch script.
317 361 batch_file = Unicode(u'')
318 362
319 363 def __init__(self, working_dir, parent=None, name=None, config=None):
@@ -324,6 +368,7 b' class BatchSystemLauncher(BaseLauncher):'
324 368 self.context = {}
325 369
326 370 def parse_job_id(self, output):
371 """Take the output of the submit command and return the job id."""
327 372 m = re.match(self.job_id_regexp, output)
328 373 if m is not None:
329 374 job_id = m.group()
@@ -334,6 +379,7 b' class BatchSystemLauncher(BaseLauncher):'
334 379 return job_id
335 380
336 381 def write_batch_script(self, n):
382 """Instantiate and write the batch script to the working_dir."""
337 383 self.context['n'] = n
338 384 script_as_string = Itpl.itplns(self.batch_template, self.context)
339 385 log.msg('Writing instantiated batch script: %s' % self.batch_file)
@@ -361,6 +407,7 b' class BatchSystemLauncher(BaseLauncher):'
361 407
362 408
363 409 class PBSLauncher(BatchSystemLauncher):
410 """A BatchSystemLauncher subclass for PBS."""
364 411
365 412 submit_command = Str('qsub', config=True)
366 413 delete_command = Str('qdel', config=True)
@@ -375,6 +422,7 b' class PBSLauncher(BatchSystemLauncher):'
375 422 #-----------------------------------------------------------------------------
376 423
377 424 def find_controller_cmd():
425 """Find the command line ipcontroller program in a cross platform way."""
378 426 if sys.platform == 'win32':
379 427 # This logic is needed because the ipcontroller script doesn't
380 428 # always get installed in the same way or in the same location.
@@ -392,14 +440,17 b' def find_controller_cmd():'
392 440
393 441
394 442 class LocalControllerLauncher(LocalProcessLauncher):
443 """Launch a controller as a regular external process."""
395 444
396 445 controller_cmd = List(find_controller_cmd())
446 # Command line arguments to ipcontroller.
397 447 controller_args = List(['--log-to-file','--log-level', '40'], config=True)
398 448
399 449 def find_args(self):
400 450 return self.controller_cmd + self.controller_args
401 451
402 452 def start(self, profile=None, cluster_dir=None):
453 """Start the controller by profile or cluster_dir."""
403 454 if cluster_dir is not None:
404 455 self.controller_args.extend(['--cluster-dir', cluster_dir])
405 456 if profile is not None:
@@ -413,12 +464,15 b' class WindowsHPCControllerLauncher(WindowsHPCLauncher):'
413 464
414 465
415 466 class MPIExecControllerLauncher(MPIExecLauncher):
467 """Launch a controller using mpiexec."""
416 468
417 469 controller_cmd = List(find_controller_cmd(), config=False)
470 # Command line arguments to ipcontroller.
418 471 controller_args = List(['--log-to-file','--log-level', '40'], config=True)
419 472 n = Int(1, config=False)
420 473
421 474 def start(self, profile=None, cluster_dir=None):
475 """Start the controller by profile or cluster_dir."""
422 476 if cluster_dir is not None:
423 477 self.controller_args.extend(['--cluster-dir', cluster_dir])
424 478 if profile is not None:
@@ -426,15 +480,18 b' class MPIExecControllerLauncher(MPIExecLauncher):'
426 480 log.msg("Starting MPIExecControllerLauncher: %r" % self.args)
427 481 return super(MPIExecControllerLauncher, self).start(1)
428 482
429
430 483 def find_args(self):
431 484 return self.mpi_cmd + ['-n', self.n] + self.mpi_args + \
432 485 self.controller_cmd + self.controller_args
433 486
434 487
435 488 class PBSControllerLauncher(PBSLauncher):
489 """Launch a controller using PBS."""
490
491 batch_file_name = Unicode(u'pbs_batch_script_controller', config=True)
436 492
437 493 def start(self, profile=None, cluster_dir=None):
494 """Start the controller by profile or cluster_dir."""
438 495 # Here we save profile and cluster_dir in the context so they
439 496 # can be used in the batch script template as ${profile} and
440 497 # ${cluster_dir}
@@ -456,6 +513,7 b' class SSHControllerLauncher(SSHLauncher):'
456 513
457 514
458 515 def find_engine_cmd():
516 """Find the command line ipengine program in a cross platform way."""
459 517 if sys.platform == 'win32':
460 518 # This logic is needed because the ipengine script doesn't
461 519 # always get installed in the same way or in the same location.
@@ -473,8 +531,10 b' def find_engine_cmd():'
473 531
474 532
475 533 class LocalEngineLauncher(LocalProcessLauncher):
534 """Launch a single engine as a regular externall process."""
476 535
477 536 engine_cmd = List(find_engine_cmd())
537 # Command line arguments for ipengine.
478 538 engine_args = List(
479 539 ['--log-to-file','--log-level', '40'], config=True
480 540 )
@@ -483,6 +543,7 b' class LocalEngineLauncher(LocalProcessLauncher):'
483 543 return self.engine_cmd + self.engine_args
484 544
485 545 def start(self, profile=None, cluster_dir=None):
546 """Start the engine by profile or cluster_dir."""
486 547 if cluster_dir is not None:
487 548 self.engine_args.extend(['--cluster-dir', cluster_dir])
488 549 if profile is not None:
@@ -491,7 +552,9 b' class LocalEngineLauncher(LocalProcessLauncher):'
491 552
492 553
493 554 class LocalEngineSetLauncher(BaseLauncher):
555 """Launch a set of engines as regular external processes."""
494 556
557 # Command line arguments for ipengine.
495 558 engine_args = List(
496 559 ['--log-to-file','--log-level', '40'], config=True
497 560 )
@@ -503,6 +566,7 b' class LocalEngineSetLauncher(BaseLauncher):'
503 566 self.launchers = []
504 567
505 568 def start(self, n, profile=None, cluster_dir=None):
569 """Start n engines by profile or cluster_dir."""
506 570 dlist = []
507 571 for i in range(n):
508 572 el = LocalEngineLauncher(self.working_dir, self)
@@ -551,12 +615,14 b' class LocalEngineSetLauncher(BaseLauncher):'
551 615 class MPIExecEngineSetLauncher(MPIExecLauncher):
552 616
553 617 engine_cmd = List(find_engine_cmd(), config=False)
618 # Command line arguments for ipengine.
554 619 engine_args = List(
555 620 ['--log-to-file','--log-level', '40'], config=True
556 621 )
557 622 n = Int(1, config=True)
558 623
559 624 def start(self, n, profile=None, cluster_dir=None):
625 """Start n engines by profile or cluster_dir."""
560 626 if cluster_dir is not None:
561 627 self.engine_args.extend(['--cluster-dir', cluster_dir])
562 628 if profile is not None:
@@ -575,7 +641,10 b' class WindowsHPCEngineSetLauncher(WindowsHPCLauncher):'
575 641
576 642 class PBSEngineSetLauncher(PBSLauncher):
577 643
644 batch_file_name = Unicode(u'pbs_batch_script_engines', config=True)
645
578 646 def start(self, n, profile=None, cluster_dir=None):
647 """Start n engines by profile or cluster_dir."""
579 648 if cluster_dir is not None:
580 649 self.program_args.extend(['--cluster-dir', cluster_dir])
581 650 if profile is not None:
@@ -594,6 +663,7 b' class SSHEngineSetLauncher(BaseLauncher):'
594 663
595 664
596 665 def find_ipcluster_cmd():
666 """Find the command line ipcluster program in a cross platform way."""
597 667 if sys.platform == 'win32':
598 668 # This logic is needed because the ipcluster script doesn't
599 669 # always get installed in the same way or in the same location.
@@ -611,8 +681,10 b' def find_ipcluster_cmd():'
611 681
612 682
613 683 class IPClusterLauncher(LocalProcessLauncher):
684 """Launch the ipcluster program in an external process."""
614 685
615 686 ipcluster_cmd = List(find_ipcluster_cmd())
687 # Command line arguments to pass to ipcluster.
616 688 ipcluster_args = List(
617 689 ['--clean-logs', '--log-to-file', '--log-level', '40'], config=True)
618 690 ipcluster_subcommand = Str('start')
General Comments 0
You need to be logged in to leave comments. Login now