##// END OF EJS Templates
Work on default config files and docstrings....
Brian Granger -
Show More
@@ -2,66 +2,154 b' import os'
2
2
3 c = get_config()
3 c = get_config()
4
4
5 # Options are:
5 #-----------------------------------------------------------------------------
6 # * LocalControllerLauncher
6 # Select which launchers to use
7 # * PBSControllerLauncher
7 #-----------------------------------------------------------------------------
8
9 # This allows you to control what method is used to start the controller
10 # and engines. The following methods are currently supported:
11 # * Start as a regular process on localhost.
12 # * Start using mpiexec.
13 # * Start using PBS
14 # * Start using SSH (currently broken)
15
16 # The selected launchers can be configured below.
17
18 # Options are (LocalControllerLauncher, MPIExecControllerLauncher,
19 # PBSControllerLauncher)
8 # c.Global.controller_launcher = 'IPython.kernel.launcher.LocalControllerLauncher'
20 # c.Global.controller_launcher = 'IPython.kernel.launcher.LocalControllerLauncher'
9
21
10 # Options are:
22 # Options are (LocalEngineSetLauncher, MPIExecEngineSetLauncher,
11 # * LocalEngineSetLauncher
23 # PBSEngineSetLauncher)
12 # * MPIExecEngineSetLauncher
13 # * PBSEngineSetLauncher
14 # c.Global.engine_launcher = 'IPython.kernel.launcher.LocalEngineSetLauncher'
24 # c.Global.engine_launcher = 'IPython.kernel.launcher.LocalEngineSetLauncher'
15
25
16 # c.Global.log_to_file = False
26 #-----------------------------------------------------------------------------
27 # Global configuration
28 #-----------------------------------------------------------------------------
29
30 # The default number of engine that will be started. This is overridden by
31 # the -n command line option: "ipcluster start -n 4"
17 # c.Global.n = 2
32 # c.Global.n = 2
18 # c.Global.reset_config = False
19 # c.Global.clean_logs = True
20
33
21 # c.MPIExecLauncher.mpi_cmd = ['mpiexec']
34 # Log to a file in cluster_dir/log, otherwise just log to sys.stdout.
22 # c.MPIExecLauncher.mpi_args = []
35 # c.Global.log_to_file = False
23 # c.MPIExecLauncher.program = []
24 # c.MPIExecLauncher.program_args = []
25 # c.MPIExecLauncher.n = 1
26
36
27 # c.SSHLauncher.ssh_cmd = ['ssh']
37 # Remove old logs from cluster_dir/log before starting.
28 # c.SSHLauncher.ssh_args = []
38 # c.Global.clean_logs = True
29 # c.SSHLauncher.program = []
30 # s.SSHLauncher.program_args = []
31 # c.SSHLauncher.hostname = ''
32 # c.SSHLauncher.user = os.environ['USER']
33
39
34 # c.PBSLauncher.submit_command = 'qsub'
40 #-----------------------------------------------------------------------------
35 # c.PBSLauncher.delete_command = 'qdel'
41 # Controller launcher configuration
36 # c.PBSLauncher.job_id_regexp = '\d+'
42 #-----------------------------------------------------------------------------
37 # c.PBSLauncher.batch_template = """"""
38 # c.PBSLauncher.batch_file_name = u'pbs_batch_script'
39
43
40 # c.LocalControllerLauncher.controller_args = []
44 # Configure how the controller is started. The configuration of the controller
45 # can also bet setup by editing the controller config file:
46 # ipcontroller_config.py
41
47
48 # The command line arguments to call the controller with.
49 # c.LocalControllerLauncher.controller_args = \
50 # ['--log-to-file','--log-level', '40']
51
52 # The mpiexec/mpirun command to use in started the controller.
42 # c.MPIExecControllerLauncher.mpi_cmd = ['mpiexec']
53 # c.MPIExecControllerLauncher.mpi_cmd = ['mpiexec']
54
55 # Additional arguments to pass to the actual mpiexec command.
43 # c.MPIExecControllerLauncher.mpi_args = []
56 # c.MPIExecControllerLauncher.mpi_args = []
44 # c.MPIExecControllerLauncher.controller_args = []
45 # c.MPIExecControllerLauncher.n = 1
46
57
58 # The command line argument to call the controller with.
59 # c.MPIExecControllerLauncher.controller_args = \
60 # ['--log-to-file','--log-level', '40']
61
62 # The command line program to use to submit a PBS job.
47 # c.PBSControllerLauncher.submit_command = 'qsub'
63 # c.PBSControllerLauncher.submit_command = 'qsub'
64
65 # The command line program to use to delete a PBS job.
48 # c.PBSControllerLauncher.delete_command = 'qdel'
66 # c.PBSControllerLauncher.delete_command = 'qdel'
67
68 # A regular expression that takes the output of qsub and find the job id.
49 # c.PBSControllerLauncher.job_id_regexp = '\d+'
69 # c.PBSControllerLauncher.job_id_regexp = '\d+'
70
71 # The batch submission script used to start the controller. This is where
72 # environment variables would be setup, etc. This string is interpolated using
73 # the Itpl module in IPython.external. Basically, you can use ${profile} for
74 # the controller profile or ${cluster_dir} for the cluster_dir.
50 # c.PBSControllerLauncher.batch_template = """"""
75 # c.PBSControllerLauncher.batch_template = """"""
51 # c.PBSLauncher.batch_file_name = u'pbs_batch_script'
52
76
53 # c.LocalEngineLauncher.engine_args = []
77 # The name of the instantiated batch script that will actually be used to
78 # submit the job. This will be written to the cluster directory.
79 # c.PBSControllerLauncher.batch_file_name = u'pbs_batch_script_controller'
80
81 #-----------------------------------------------------------------------------
82 # Engine launcher configuration
83 #-----------------------------------------------------------------------------
54
84
55 # c.LocalEngineSetLauncher.engine_args = []
85 # Command line argument passed to the engines.
86 # c.LocalEngineSetLauncher.engine_args = ['--log-to-file','--log-level', '40']
56
87
88 # The mpiexec/mpirun command to use in started the controller.
57 # c.MPIExecEngineSetLauncher.mpi_cmd = ['mpiexec']
89 # c.MPIExecEngineSetLauncher.mpi_cmd = ['mpiexec']
90
91 # Additional arguments to pass to the actual mpiexec command.
58 # c.MPIExecEngineSetLauncher.mpi_args = []
92 # c.MPIExecEngineSetLauncher.mpi_args = []
59 # c.MPIExecEngineSetLauncher.controller_args = []
93
94 # Command line argument passed to the engines.
95 # c.MPIExecEngineSetLauncher.engine_args = ['--log-to-file','--log-level', '40']
96
97 # The default number of engines to start if not given elsewhere.
60 # c.MPIExecEngineSetLauncher.n = 1
98 # c.MPIExecEngineSetLauncher.n = 1
61
99
100 # The command line program to use to submit a PBS job.
62 # c.PBSEngineSetLauncher.submit_command = 'qsub'
101 # c.PBSEngineSetLauncher.submit_command = 'qsub'
102
103 # The command line program to use to delete a PBS job.
63 # c.PBSEngineSetLauncher.delete_command = 'qdel'
104 # c.PBSEngineSetLauncher.delete_command = 'qdel'
105
106 # A regular expression that takes the output of qsub and find the job id.
64 # c.PBSEngineSetLauncher.job_id_regexp = '\d+'
107 # c.PBSEngineSetLauncher.job_id_regexp = '\d+'
108
109 # The batch submission script used to start the engines. This is where
110 # environment variables would be setup, etc. This string is interpolated using
111 # the Itpl module in IPython.external. Basically, you can use ${n} for the
112 # number of engine, ${profile} or the engine profile and ${cluster_dir}
113 # for the cluster_dir.
65 # c.PBSEngineSetLauncher.batch_template = """"""
114 # c.PBSEngineSetLauncher.batch_template = """"""
66 # c.PBSEngineSetLauncher.batch_file_name = u'pbs_batch_script'
115
116 # The name of the instantiated batch script that will actually be used to
117 # submit the job. This will be written to the cluster directory.
118 # c.PBSEngineSetLauncher.batch_file_name = u'pbs_batch_script_engines'
119
120 #-----------------------------------------------------------------------------
121 # Base launcher configuration
122 #-----------------------------------------------------------------------------
123
124 # The various launchers are organized into an inheritance hierarchy.
125 # The configurations can also be iherited and the following attributes
126 # allow you to configure the base classes.
127
128 # c.MPIExecLauncher.mpi_cmd = ['mpiexec']
129 # c.MPIExecLauncher.mpi_args = []
130 # c.MPIExecLauncher.program = []
131 # c.MPIExecLauncher.program_args = []
132 # c.MPIExecLauncher.n = 1
133
134 # c.SSHLauncher.ssh_cmd = ['ssh']
135 # c.SSHLauncher.ssh_args = []
136 # c.SSHLauncher.program = []
137 # s.SSHLauncher.program_args = []
138 # c.SSHLauncher.hostname = ''
139 # c.SSHLauncher.user = os.environ['USER']
140
141 # c.BatchSystemLauncher.submit_command
142 # c.BatchSystemLauncher.delete_command
143 # c.BatchSystemLauncher.job_id_regexp
144 # c.BatchSystemLauncher.batch_template
145 # c.BatchSystemLauncher.batch_file_name
146
147 # c.PBSLauncher.submit_command = 'qsub'
148 # c.PBSLauncher.delete_command = 'qdel'
149 # c.PBSLauncher.job_id_regexp = '\d+'
150 # c.PBSLauncher.batch_template = """"""
151 # c.PBSLauncher.batch_file_name = u'pbs_batch_script'
152
153
154
67
155
@@ -7,10 +7,32 b' c = get_config()'
7 #-----------------------------------------------------------------------------
7 #-----------------------------------------------------------------------------
8
8
9 # Basic Global config attributes
9 # Basic Global config attributes
10
11 # Start up messages are logged to stdout using the logging module.
12 # These all happen before the twisted reactor is started and are
13 # useful for debugging purposes. Can be (10=DEBUG,20=INFO,30=WARN,40=CRITICAL)
14 # and smaller is more verbose.
15 # c.Global.log_level = 20
16
17 # Log to a file in cluster_dir/log, otherwise just log to sys.stdout.
10 # c.Global.log_to_file = False
18 # c.Global.log_to_file = False
19
20 # Remove old logs from cluster_dir/log before starting.
11 # c.Global.clean_logs = True
21 # c.Global.clean_logs = True
22
23 # A list of Python statements that will be run before starting the
24 # controller. This is provided because occasionally certain things need to
25 # be imported in the controller for pickling to work.
12 # c.Global.import_statements = ['import math']
26 # c.Global.import_statements = ['import math']
27
28 # Reuse the controller's FURL files. If False, FURL files are regenerated
29 # each time the controller is run. If True, they will be reused, *but*, you
30 # also must set the network ports by hand. If set, this will override the
31 # values set for the client and engine connections below.
13 # c.Global.reuse_furls = True
32 # c.Global.reuse_furls = True
33
34 # Enable SSL encryption on all connections to the controller. If set, this
35 # will override the values set for the client and engine connections below.
14 # c.Global.secure = True
36 # c.Global.secure = True
15
37
16 #-----------------------------------------------------------------------------
38 #-----------------------------------------------------------------------------
@@ -18,13 +40,67 b' c = get_config()'
18 #-----------------------------------------------------------------------------
40 #-----------------------------------------------------------------------------
19
41
20 # Basic client service config attributes
42 # Basic client service config attributes
43
44 # The network interface the controller will listen on for client connections.
45 # This should be an IP address or hostname of the controller's host. The empty
46 # string means listen on all interfaces.
21 # c.FCClientServiceFactory.ip = ''
47 # c.FCClientServiceFactory.ip = ''
48
49 # The TCP/IP port the controller will listen on for client connections. If 0
50 # a random port will be used. If the controller's host has a firewall running
51 # it must allow incoming traffic on this port.
22 # c.FCClientServiceFactory.port = 0
52 # c.FCClientServiceFactory.port = 0
53
54 # The client learns how to connect to the controller by looking at the
55 # location field embedded in the FURL. If this field is empty, all network
56 # interfaces that the controller is listening on will be listed. To have the
57 # client connect on a particular interface, list it here.
23 # c.FCClientServiceFactory.location = ''
58 # c.FCClientServiceFactory.location = ''
59
60 # Use SSL encryption for the client connection.
24 # c.FCClientServiceFactory.secure = True
61 # c.FCClientServiceFactory.secure = True
62
63 # Reuse the client FURL each time the controller is started. If set, you must
64 # also pick a specific network port above (FCClientServiceFactory.port).
25 # c.FCClientServiceFactory.reuse_furls = False
65 # c.FCClientServiceFactory.reuse_furls = False
26
66
27 # You shouldn't have to modify the rest of this section
67 #-----------------------------------------------------------------------------
68 # Configure the engine services
69 #-----------------------------------------------------------------------------
70
71 # Basic config attributes for the engine services.
72
73 # The network interface the controller will listen on for engine connections.
74 # This should be an IP address or hostname of the controller's host. The empty
75 # string means listen on all interfaces.
76 # c.FCEngineServiceFactory.ip = ''
77
78 # The TCP/IP port the controller will listen on for engine connections. If 0
79 # a random port will be used. If the controller's host has a firewall running
80 # it must allow incoming traffic on this port.
81 # c.FCEngineServiceFactory.port = 0
82
83 # The engine learns how to connect to the controller by looking at the
84 # location field embedded in the FURL. If this field is empty, all network
85 # interfaces that the controller is listening on will be listed. To have the
86 # client connect on a particular interface, list it here.
87 # c.FCEngineServiceFactory.location = ''
88
89 # Use SSL encryption for the engine connection.
90 # c.FCEngineServiceFactory.secure = True
91
92 # Reuse the client FURL each time the controller is started. If set, you must
93 # also pick a specific network port above (FCClientServiceFactory.port).
94 # c.FCEngineServiceFactory.reuse_furls = False
95
96 #-----------------------------------------------------------------------------
97 # Developer level configuration attributes
98 #-----------------------------------------------------------------------------
99
100 # You shouldn't have to modify anything in this section. These attributes
101 # are more for developers who want to change the behavior of the controller
102 # at a fundamental level.
103
28 # c.FCClientServiceFactory.cert_file = 'ipcontroller-client.pem'
104 # c.FCClientServiceFactory.cert_file = 'ipcontroller-client.pem'
29
105
30 # default_client_interfaces = Config()
106 # default_client_interfaces = Config()
@@ -44,18 +120,6 b' c = get_config()'
44 #
120 #
45 # c.FCEngineServiceFactory.interfaces = default_client_interfaces
121 # c.FCEngineServiceFactory.interfaces = default_client_interfaces
46
122
47 #-----------------------------------------------------------------------------
48 # Configure the engine services
49 #-----------------------------------------------------------------------------
50
51 # Basic config attributes for the engine services
52 # c.FCEngineServiceFactory.ip = ''
53 # c.FCEngineServiceFactory.port = 0
54 # c.FCEngineServiceFactory.location = ''
55 # c.FCEngineServiceFactory.secure = True
56 # c.FCEngineServiceFactory.reuse_furls = False
57
58 # You shouldn't have to modify the rest of this section
59 # c.FCEngineServiceFactory.cert_file = 'ipcontroller-engine.pem'
123 # c.FCEngineServiceFactory.cert_file = 'ipcontroller-engine.pem'
60
124
61 # default_engine_interfaces = Config()
125 # default_engine_interfaces = Config()
@@ -1,23 +1,61 b''
1 c = get_config()
1 c = get_config()
2
2
3 #-----------------------------------------------------------------------------
4 # Global configuration
5 #-----------------------------------------------------------------------------
6
7 # Start up messages are logged to stdout using the logging module.
8 # These all happen before the twisted reactor is started and are
9 # useful for debugging purposes. Can be (10=DEBUG,20=INFO,30=WARN,40=CRITICAL)
10 # and smaller is more verbose.
11 # c.Global.log_level = 20
12
13 # Log to a file in cluster_dir/log, otherwise just log to sys.stdout.
3 # c.Global.log_to_file = False
14 # c.Global.log_to_file = False
4 # c.Global.clean_logs = False
15
16 # Remove old logs from cluster_dir/log before starting.
17 # c.Global.clean_logs = True
18
19 # A list of strings that will be executed in the users namespace on the engine
20 # before it connects to the controller.
5 # c.Global.exec_lines = ['import numpy']
21 # c.Global.exec_lines = ['import numpy']
6 # c.Global.log_level = 10
22
7 # c.Global.shell_class = 'IPython.kernel.core.interpreter.Interpreter'
23 # The engine will try to connect to the controller multiple times, to allow
8 # c.Global.furl_file_name = 'ipcontroller-engine.furl'
24 # the controller time to startup and write its FURL file. These parameters
9 # c.Global.furl_file = ''
25 # control the number of retries (connect_max_tries) and the initial delay
10 # The max number of connection attemps and the initial delay between
26 # (connect_delay) between attemps. The actual delay between attempts gets
27 # longer each time by a factor of 1.5 (delay[i] = 1.5*delay[i-1])
11 # those attemps.
28 # those attemps.
12 # c.Global.connect_delay = 0.1
29 # c.Global.connect_delay = 0.1
13 # c.Global.connect_max_tries = 15
30 # c.Global.connect_max_tries = 15
14
31
32 # By default, the engine will look for the controller's FURL file in its own
33 # cluster directory. Sometimes, the FURL file will be elsewhere and this
34 # attribute can be set to the full path of the FURL file.
35 # c.Global.furl_file = ''
36
37 #-----------------------------------------------------------------------------
38 # MPI configuration
39 #-----------------------------------------------------------------------------
15
40
41 # Upon starting the engine can be configured to call MPI_Init. This section
42 # configures that.
43
44 # Select which MPI section to execute to setup MPI. The value of this
45 # attribute must match the name of another attribute in the MPI config
46 # section (mpi4py, pytrilinos, etc.). This can also be set by the --mpi
47 # command line option.
16 # c.MPI.use = ''
48 # c.MPI.use = ''
49
50 # Initialize MPI using mpi4py. To use this, set c.MPI.use = 'mpi4py' to use
51 # --mpi=mpi4py at the command line.
17 # c.MPI.mpi4py = """from mpi4py import MPI as mpi
52 # c.MPI.mpi4py = """from mpi4py import MPI as mpi
18 # mpi.size = mpi.COMM_WORLD.Get_size()
53 # mpi.size = mpi.COMM_WORLD.Get_size()
19 # mpi.rank = mpi.COMM_WORLD.Get_rank()
54 # mpi.rank = mpi.COMM_WORLD.Get_rank()
20 # """
55 # """
56
57 # Initialize MPI using pytrilinos. To use this, set c.MPI.use = 'pytrilinos'
58 # to use --mpi=pytrilinos at the command line.
21 # c.MPI.pytrilinos = """from PyTrilinos import Epetra
59 # c.MPI.pytrilinos = """from PyTrilinos import Epetra
22 # class SimpleStruct:
60 # class SimpleStruct:
23 # pass
61 # pass
@@ -26,3 +64,23 b' c = get_config()'
26 # mpi.size = 0
64 # mpi.size = 0
27 # """
65 # """
28
66
67 #-----------------------------------------------------------------------------
68 # Developer level configuration attributes
69 #-----------------------------------------------------------------------------
70
71 # You shouldn't have to modify anything in this section. These attributes
72 # are more for developers who want to change the behavior of the controller
73 # at a fundamental level.
74
75 # You should not have to change these attributes.
76
77 # c.Global.shell_class = 'IPython.kernel.core.interpreter.Interpreter'
78
79 # c.Global.furl_file_name = 'ipcontroller-engine.furl'
80
81
82
83
84
85
86
@@ -156,6 +156,11 b' class AsyncClientConnector(object):'
156 ipythondir : str
156 ipythondir : str
157 The location of the ipythondir if different from the default.
157 The location of the ipythondir if different from the default.
158 This is used if the cluster directory is being found by profile.
158 This is used if the cluster directory is being found by profile.
159 delay : float
160 The initial delay between re-connection attempts. Susequent delays
161 get longer according to ``delay[i] = 1.5*delay[i-1]``.
162 max_tries : int
163 The max number of re-connection attempts.
159
164
160 Returns
165 Returns
161 -------
166 -------
@@ -193,7 +198,12 b' class AsyncClientConnector(object):'
193 ipythondir : str
198 ipythondir : str
194 The location of the ipythondir if different from the default.
199 The location of the ipythondir if different from the default.
195 This is used if the cluster directory is being found by profile.
200 This is used if the cluster directory is being found by profile.
196
201 delay : float
202 The initial delay between re-connection attempts. Susequent delays
203 get longer according to ``delay[i] = 1.5*delay[i-1]``.
204 max_tries : int
205 The max number of re-connection attempts.
206
197 Returns
207 Returns
198 -------
208 -------
199 A deferred to the actual client class.
209 A deferred to the actual client class.
@@ -233,6 +243,11 b' class AsyncClientConnector(object):'
233 ipythondir : str
243 ipythondir : str
234 The location of the ipythondir if different from the default.
244 The location of the ipythondir if different from the default.
235 This is used if the cluster directory is being found by profile.
245 This is used if the cluster directory is being found by profile.
246 delay : float
247 The initial delay between re-connection attempts. Susequent delays
248 get longer according to ``delay[i] = 1.5*delay[i-1]``.
249 max_tries : int
250 The max number of re-connection attempts.
236
251
237 Returns
252 Returns
238 -------
253 -------
@@ -332,6 +347,11 b' class ClientConnector(object):'
332 ipythondir : str
347 ipythondir : str
333 The location of the ipythondir if different from the default.
348 The location of the ipythondir if different from the default.
334 This is used if the cluster directory is being found by profile.
349 This is used if the cluster directory is being found by profile.
350 delay : float
351 The initial delay between re-connection attempts. Susequent delays
352 get longer according to ``delay[i] = 1.5*delay[i-1]``.
353 max_tries : int
354 The max number of re-connection attempts.
335
355
336 Returns
356 Returns
337 -------
357 -------
@@ -368,6 +388,11 b' class ClientConnector(object):'
368 ipythondir : str
388 ipythondir : str
369 The location of the ipythondir if different from the default.
389 The location of the ipythondir if different from the default.
370 This is used if the cluster directory is being found by profile.
390 This is used if the cluster directory is being found by profile.
391 delay : float
392 The initial delay between re-connection attempts. Susequent delays
393 get longer according to ``delay[i] = 1.5*delay[i-1]``.
394 max_tries : int
395 The max number of re-connection attempts.
371
396
372 Returns
397 Returns
373 -------
398 -------
@@ -640,8 +665,22 b' class Cluster(object):'
640 def get_multiengine_client(self, delay=DELAY, max_tries=MAX_TRIES):
665 def get_multiengine_client(self, delay=DELAY, max_tries=MAX_TRIES):
641 """Get the multiengine client for the running cluster.
666 """Get the multiengine client for the running cluster.
642
667
643 If this fails, it means that the cluster has not finished starting.
668 This will try to attempt to the controller multiple times. If this
644 Usually waiting a few seconds are re-trying will solve this.
669 fails altogether, try looking at the following:
670 * Make sure the controller is starting properly by looking at its
671 log files.
672 * Make sure the controller is writing its FURL file in the location
673 expected by the client.
674 * Make sure a firewall on the controller's host is not blocking the
675 client from connecting.
676
677 Parameters
678 ----------
679 delay : float
680 The initial delay between re-connection attempts. Susequent delays
681 get longer according to ``delay[i] = 1.5*delay[i-1]``.
682 max_tries : int
683 The max number of re-connection attempts.
645 """
684 """
646 if self.client_connector is None:
685 if self.client_connector is None:
647 self.client_connector = ClientConnector()
686 self.client_connector = ClientConnector()
@@ -653,8 +692,22 b' class Cluster(object):'
653 def get_task_client(self, delay=DELAY, max_tries=MAX_TRIES):
692 def get_task_client(self, delay=DELAY, max_tries=MAX_TRIES):
654 """Get the task client for the running cluster.
693 """Get the task client for the running cluster.
655
694
656 If this fails, it means that the cluster has not finished starting.
695 This will try to attempt to the controller multiple times. If this
657 Usually waiting a few seconds are re-trying will solve this.
696 fails altogether, try looking at the following:
697 * Make sure the controller is starting properly by looking at its
698 log files.
699 * Make sure the controller is writing its FURL file in the location
700 expected by the client.
701 * Make sure a firewall on the controller's host is not blocking the
702 client from connecting.
703
704 Parameters
705 ----------
706 delay : float
707 The initial delay between re-connection attempts. Susequent delays
708 get longer according to ``delay[i] = 1.5*delay[i-1]``.
709 max_tries : int
710 The max number of re-connection attempts.
658 """
711 """
659 if self.client_connector is None:
712 if self.client_connector is None:
660 self.client_connector = ClientConnector()
713 self.client_connector = ClientConnector()
@@ -52,6 +52,8 b' class UnknownStatus(LauncherError):'
52 class BaseLauncher(Component):
52 class BaseLauncher(Component):
53 """An asbtraction for starting, stopping and signaling a process."""
53 """An asbtraction for starting, stopping and signaling a process."""
54
54
55 # A directory for files related to the process. But, we don't cd to
56 # this directory,
55 working_dir = Unicode(u'')
57 working_dir = Unicode(u'')
56
58
57 def __init__(self, working_dir, parent=None, name=None, config=None):
59 def __init__(self, working_dir, parent=None, name=None, config=None):
@@ -64,11 +66,18 b' class BaseLauncher(Component):'
64
66
65 @property
67 @property
66 def args(self):
68 def args(self):
67 """A list of cmd and args that will be used to start the process."""
69 """A list of cmd and args that will be used to start the process.
70
71 This is what is passed to :func:`spawnProcess` and the first element
72 will be the process name.
73 """
68 return self.find_args()
74 return self.find_args()
69
75
70 def find_args(self):
76 def find_args(self):
71 """The ``.args`` property calls this to find the args list."""
77 """The ``.args`` property calls this to find the args list.
78
79 Subcommand should implement this to construct the cmd and args.
80 """
72 raise NotImplementedError('find_args must be implemented in a subclass')
81 raise NotImplementedError('find_args must be implemented in a subclass')
73
82
74 @property
83 @property
@@ -78,6 +87,7 b' class BaseLauncher(Component):'
78
87
79 @property
88 @property
80 def running(self):
89 def running(self):
90 """Am I running."""
81 if self.state == 'running':
91 if self.state == 'running':
82 return True
92 return True
83 else:
93 else:
@@ -87,7 +97,7 b' class BaseLauncher(Component):'
87 """Start the process.
97 """Start the process.
88
98
89 This must return a deferred that fires with information about the
99 This must return a deferred that fires with information about the
90 process starting (like a pid, job id, etc.)
100 process starting (like a pid, job id, etc.).
91 """
101 """
92 return defer.fail(
102 return defer.fail(
93 Failure(NotImplementedError(
103 Failure(NotImplementedError(
@@ -96,12 +106,13 b' class BaseLauncher(Component):'
96 )
106 )
97
107
98 def stop(self):
108 def stop(self):
99 """Stop the process and notify observers of ProcessStopped.
109 """Stop the process and notify observers of stopping.
100
110
101 This must return a deferred that fires with any errors that occur
111 This must return a deferred that fires with information about the
102 while the process is attempting to be shut down. This deferred
112 processing stopping, like errors that occur while the process is
103 won't fire when the process actually stops. These events are
113 attempting to be shut down. This deferred won't fire when the process
104 handled by calling :func:`observe_stop`.
114 actually stops. To observe the actual process stopping, see
115 :func:`observe_stop`.
105 """
116 """
106 return defer.fail(
117 return defer.fail(
107 Failure(NotImplementedError(
118 Failure(NotImplementedError(
@@ -123,9 +134,9 b' class BaseLauncher(Component):'
123 return d
134 return d
124
135
125 def notify_start(self, data):
136 def notify_start(self, data):
126 """Call this to tigger startup actions.
137 """Call this to trigger startup actions.
127
138
128 This logs the process startup and sets the state to running. It is
139 This logs the process startup and sets the state to 'running'. It is
129 a pass-through so it can be used as a callback.
140 a pass-through so it can be used as a callback.
130 """
141 """
131
142
@@ -135,7 +146,10 b' class BaseLauncher(Component):'
135 return data
146 return data
136
147
137 def notify_stop(self, data):
148 def notify_stop(self, data):
138 """Call this to trigger all the deferreds from :func:`observe_stop`."""
149 """Call this to trigger process stop actions.
150
151 This logs the process stopping and sets the state to 'after'. Call
152 this to trigger all the deferreds from :func:`observe_stop`."""
139
153
140 log.msg('Process %r stopped: %r' % (self.args[0], data))
154 log.msg('Process %r stopped: %r' % (self.args[0], data))
141 self.stop_data = data
155 self.stop_data = data
@@ -205,6 +219,8 b' class LocalProcessLauncherProtocol(ProcessProtocol):'
205 class LocalProcessLauncher(BaseLauncher):
219 class LocalProcessLauncher(BaseLauncher):
206 """Start and stop an external process in an asynchronous manner."""
220 """Start and stop an external process in an asynchronous manner."""
207
221
222 # This is used to to construct self.args, which is passed to
223 # spawnProcess.
208 cmd_and_args = List([])
224 cmd_and_args = List([])
209
225
210 def __init__(self, working_dir, parent=None, name=None, config=None):
226 def __init__(self, working_dir, parent=None, name=None, config=None):
@@ -246,24 +262,33 b' class LocalProcessLauncher(BaseLauncher):'
246
262
247 @inlineCallbacks
263 @inlineCallbacks
248 def interrupt_then_kill(self, delay=2.0):
264 def interrupt_then_kill(self, delay=2.0):
265 """Send INT, wait a delay and then send KILL."""
249 yield self.signal('INT')
266 yield self.signal('INT')
250 yield sleep_deferred(delay)
267 yield sleep_deferred(delay)
251 yield self.signal('KILL')
268 yield self.signal('KILL')
252
269
253
270
254 class MPIExecLauncher(LocalProcessLauncher):
271 class MPIExecLauncher(LocalProcessLauncher):
272 """Launch an external process using mpiexec."""
255
273
274 # The mpiexec command to use in starting the process.
256 mpi_cmd = List(['mpiexec'], config=True)
275 mpi_cmd = List(['mpiexec'], config=True)
276 # The command line arguments to pass to mpiexec.
257 mpi_args = List([], config=True)
277 mpi_args = List([], config=True)
278 # The program to start using mpiexec.
258 program = List(['date'], config=True)
279 program = List(['date'], config=True)
280 # The command line argument to the program.
259 program_args = List([], config=True)
281 program_args = List([], config=True)
282 # The number of instances of the program to start.
260 n = Int(1, config=True)
283 n = Int(1, config=True)
261
284
262 def find_args(self):
285 def find_args(self):
286 """Build self.args using all the fields."""
263 return self.mpi_cmd + ['-n', self.n] + self.mpi_args + \
287 return self.mpi_cmd + ['-n', self.n] + self.mpi_args + \
264 self.program + self.program_args
288 self.program + self.program_args
265
289
266 def start(self, n):
290 def start(self, n):
291 """Start n instances of the program using mpiexec."""
267 self.n = n
292 self.n = n
268 return super(MPIExecLauncher, self).start()
293 return super(MPIExecLauncher, self).start()
269
294
@@ -307,13 +332,32 b' class WindowsHPCLauncher(BaseLauncher):'
307
332
308
333
309 class BatchSystemLauncher(BaseLauncher):
334 class BatchSystemLauncher(BaseLauncher):
335 """Launch an external process using a batch system.
336
337 This class is designed to work with UNIX batch systems like PBS, LSF,
338 GridEngine, etc. The overall model is that there are different commands
339 like qsub, qdel, etc. that handle the starting and stopping of the process.
340
341 This class also has the notion of a batch script. The ``batch_template``
342 attribute can be set to a string that is a template for the batch script.
343 This template is instantiated using Itpl. Thus the template can use
344 ${n} fot the number of instances. Subclasses can add additional variables
345 to the template dict.
346 """
310
347
311 # Subclasses must fill these in. See PBSEngineSet
348 # Subclasses must fill these in. See PBSEngineSet
349 # The name of the command line program used to submit jobs.
312 submit_command = Str('', config=True)
350 submit_command = Str('', config=True)
351 # The name of the command line program used to delete jobs.
313 delete_command = Str('', config=True)
352 delete_command = Str('', config=True)
353 # A regular expression used to get the job id from the output of the
354 # submit_command.
314 job_id_regexp = Str('', config=True)
355 job_id_regexp = Str('', config=True)
356 # The string that is the batch script template itself.
315 batch_template = Str('', config=True)
357 batch_template = Str('', config=True)
358 # The filename of the instantiated batch script.
316 batch_file_name = Unicode(u'batch_script', config=True)
359 batch_file_name = Unicode(u'batch_script', config=True)
360 # The full path to the instantiated batch script.
317 batch_file = Unicode(u'')
361 batch_file = Unicode(u'')
318
362
319 def __init__(self, working_dir, parent=None, name=None, config=None):
363 def __init__(self, working_dir, parent=None, name=None, config=None):
@@ -324,6 +368,7 b' class BatchSystemLauncher(BaseLauncher):'
324 self.context = {}
368 self.context = {}
325
369
326 def parse_job_id(self, output):
370 def parse_job_id(self, output):
371 """Take the output of the submit command and return the job id."""
327 m = re.match(self.job_id_regexp, output)
372 m = re.match(self.job_id_regexp, output)
328 if m is not None:
373 if m is not None:
329 job_id = m.group()
374 job_id = m.group()
@@ -334,6 +379,7 b' class BatchSystemLauncher(BaseLauncher):'
334 return job_id
379 return job_id
335
380
336 def write_batch_script(self, n):
381 def write_batch_script(self, n):
382 """Instantiate and write the batch script to the working_dir."""
337 self.context['n'] = n
383 self.context['n'] = n
338 script_as_string = Itpl.itplns(self.batch_template, self.context)
384 script_as_string = Itpl.itplns(self.batch_template, self.context)
339 log.msg('Writing instantiated batch script: %s' % self.batch_file)
385 log.msg('Writing instantiated batch script: %s' % self.batch_file)
@@ -361,6 +407,7 b' class BatchSystemLauncher(BaseLauncher):'
361
407
362
408
363 class PBSLauncher(BatchSystemLauncher):
409 class PBSLauncher(BatchSystemLauncher):
410 """A BatchSystemLauncher subclass for PBS."""
364
411
365 submit_command = Str('qsub', config=True)
412 submit_command = Str('qsub', config=True)
366 delete_command = Str('qdel', config=True)
413 delete_command = Str('qdel', config=True)
@@ -375,6 +422,7 b' class PBSLauncher(BatchSystemLauncher):'
375 #-----------------------------------------------------------------------------
422 #-----------------------------------------------------------------------------
376
423
377 def find_controller_cmd():
424 def find_controller_cmd():
425 """Find the command line ipcontroller program in a cross platform way."""
378 if sys.platform == 'win32':
426 if sys.platform == 'win32':
379 # This logic is needed because the ipcontroller script doesn't
427 # This logic is needed because the ipcontroller script doesn't
380 # always get installed in the same way or in the same location.
428 # always get installed in the same way or in the same location.
@@ -392,14 +440,17 b' def find_controller_cmd():'
392
440
393
441
394 class LocalControllerLauncher(LocalProcessLauncher):
442 class LocalControllerLauncher(LocalProcessLauncher):
443 """Launch a controller as a regular external process."""
395
444
396 controller_cmd = List(find_controller_cmd())
445 controller_cmd = List(find_controller_cmd())
446 # Command line arguments to ipcontroller.
397 controller_args = List(['--log-to-file','--log-level', '40'], config=True)
447 controller_args = List(['--log-to-file','--log-level', '40'], config=True)
398
448
399 def find_args(self):
449 def find_args(self):
400 return self.controller_cmd + self.controller_args
450 return self.controller_cmd + self.controller_args
401
451
402 def start(self, profile=None, cluster_dir=None):
452 def start(self, profile=None, cluster_dir=None):
453 """Start the controller by profile or cluster_dir."""
403 if cluster_dir is not None:
454 if cluster_dir is not None:
404 self.controller_args.extend(['--cluster-dir', cluster_dir])
455 self.controller_args.extend(['--cluster-dir', cluster_dir])
405 if profile is not None:
456 if profile is not None:
@@ -413,12 +464,15 b' class WindowsHPCControllerLauncher(WindowsHPCLauncher):'
413
464
414
465
415 class MPIExecControllerLauncher(MPIExecLauncher):
466 class MPIExecControllerLauncher(MPIExecLauncher):
467 """Launch a controller using mpiexec."""
416
468
417 controller_cmd = List(find_controller_cmd(), config=False)
469 controller_cmd = List(find_controller_cmd(), config=False)
470 # Command line arguments to ipcontroller.
418 controller_args = List(['--log-to-file','--log-level', '40'], config=True)
471 controller_args = List(['--log-to-file','--log-level', '40'], config=True)
419 n = Int(1, config=False)
472 n = Int(1, config=False)
420
473
421 def start(self, profile=None, cluster_dir=None):
474 def start(self, profile=None, cluster_dir=None):
475 """Start the controller by profile or cluster_dir."""
422 if cluster_dir is not None:
476 if cluster_dir is not None:
423 self.controller_args.extend(['--cluster-dir', cluster_dir])
477 self.controller_args.extend(['--cluster-dir', cluster_dir])
424 if profile is not None:
478 if profile is not None:
@@ -426,15 +480,18 b' class MPIExecControllerLauncher(MPIExecLauncher):'
426 log.msg("Starting MPIExecControllerLauncher: %r" % self.args)
480 log.msg("Starting MPIExecControllerLauncher: %r" % self.args)
427 return super(MPIExecControllerLauncher, self).start(1)
481 return super(MPIExecControllerLauncher, self).start(1)
428
482
429
430 def find_args(self):
483 def find_args(self):
431 return self.mpi_cmd + ['-n', self.n] + self.mpi_args + \
484 return self.mpi_cmd + ['-n', self.n] + self.mpi_args + \
432 self.controller_cmd + self.controller_args
485 self.controller_cmd + self.controller_args
433
486
434
487
435 class PBSControllerLauncher(PBSLauncher):
488 class PBSControllerLauncher(PBSLauncher):
489 """Launch a controller using PBS."""
490
491 batch_file_name = Unicode(u'pbs_batch_script_controller', config=True)
436
492
437 def start(self, profile=None, cluster_dir=None):
493 def start(self, profile=None, cluster_dir=None):
494 """Start the controller by profile or cluster_dir."""
438 # Here we save profile and cluster_dir in the context so they
495 # Here we save profile and cluster_dir in the context so they
439 # can be used in the batch script template as ${profile} and
496 # can be used in the batch script template as ${profile} and
440 # ${cluster_dir}
497 # ${cluster_dir}
@@ -456,6 +513,7 b' class SSHControllerLauncher(SSHLauncher):'
456
513
457
514
458 def find_engine_cmd():
515 def find_engine_cmd():
516 """Find the command line ipengine program in a cross platform way."""
459 if sys.platform == 'win32':
517 if sys.platform == 'win32':
460 # This logic is needed because the ipengine script doesn't
518 # This logic is needed because the ipengine script doesn't
461 # always get installed in the same way or in the same location.
519 # always get installed in the same way or in the same location.
@@ -473,8 +531,10 b' def find_engine_cmd():'
473
531
474
532
475 class LocalEngineLauncher(LocalProcessLauncher):
533 class LocalEngineLauncher(LocalProcessLauncher):
534 """Launch a single engine as a regular externall process."""
476
535
477 engine_cmd = List(find_engine_cmd())
536 engine_cmd = List(find_engine_cmd())
537 # Command line arguments for ipengine.
478 engine_args = List(
538 engine_args = List(
479 ['--log-to-file','--log-level', '40'], config=True
539 ['--log-to-file','--log-level', '40'], config=True
480 )
540 )
@@ -483,6 +543,7 b' class LocalEngineLauncher(LocalProcessLauncher):'
483 return self.engine_cmd + self.engine_args
543 return self.engine_cmd + self.engine_args
484
544
485 def start(self, profile=None, cluster_dir=None):
545 def start(self, profile=None, cluster_dir=None):
546 """Start the engine by profile or cluster_dir."""
486 if cluster_dir is not None:
547 if cluster_dir is not None:
487 self.engine_args.extend(['--cluster-dir', cluster_dir])
548 self.engine_args.extend(['--cluster-dir', cluster_dir])
488 if profile is not None:
549 if profile is not None:
@@ -491,7 +552,9 b' class LocalEngineLauncher(LocalProcessLauncher):'
491
552
492
553
493 class LocalEngineSetLauncher(BaseLauncher):
554 class LocalEngineSetLauncher(BaseLauncher):
555 """Launch a set of engines as regular external processes."""
494
556
557 # Command line arguments for ipengine.
495 engine_args = List(
558 engine_args = List(
496 ['--log-to-file','--log-level', '40'], config=True
559 ['--log-to-file','--log-level', '40'], config=True
497 )
560 )
@@ -503,6 +566,7 b' class LocalEngineSetLauncher(BaseLauncher):'
503 self.launchers = []
566 self.launchers = []
504
567
505 def start(self, n, profile=None, cluster_dir=None):
568 def start(self, n, profile=None, cluster_dir=None):
569 """Start n engines by profile or cluster_dir."""
506 dlist = []
570 dlist = []
507 for i in range(n):
571 for i in range(n):
508 el = LocalEngineLauncher(self.working_dir, self)
572 el = LocalEngineLauncher(self.working_dir, self)
@@ -551,12 +615,14 b' class LocalEngineSetLauncher(BaseLauncher):'
551 class MPIExecEngineSetLauncher(MPIExecLauncher):
615 class MPIExecEngineSetLauncher(MPIExecLauncher):
552
616
553 engine_cmd = List(find_engine_cmd(), config=False)
617 engine_cmd = List(find_engine_cmd(), config=False)
618 # Command line arguments for ipengine.
554 engine_args = List(
619 engine_args = List(
555 ['--log-to-file','--log-level', '40'], config=True
620 ['--log-to-file','--log-level', '40'], config=True
556 )
621 )
557 n = Int(1, config=True)
622 n = Int(1, config=True)
558
623
559 def start(self, n, profile=None, cluster_dir=None):
624 def start(self, n, profile=None, cluster_dir=None):
625 """Start n engines by profile or cluster_dir."""
560 if cluster_dir is not None:
626 if cluster_dir is not None:
561 self.engine_args.extend(['--cluster-dir', cluster_dir])
627 self.engine_args.extend(['--cluster-dir', cluster_dir])
562 if profile is not None:
628 if profile is not None:
@@ -575,7 +641,10 b' class WindowsHPCEngineSetLauncher(WindowsHPCLauncher):'
575
641
576 class PBSEngineSetLauncher(PBSLauncher):
642 class PBSEngineSetLauncher(PBSLauncher):
577
643
644 batch_file_name = Unicode(u'pbs_batch_script_engines', config=True)
645
578 def start(self, n, profile=None, cluster_dir=None):
646 def start(self, n, profile=None, cluster_dir=None):
647 """Start n engines by profile or cluster_dir."""
579 if cluster_dir is not None:
648 if cluster_dir is not None:
580 self.program_args.extend(['--cluster-dir', cluster_dir])
649 self.program_args.extend(['--cluster-dir', cluster_dir])
581 if profile is not None:
650 if profile is not None:
@@ -594,6 +663,7 b' class SSHEngineSetLauncher(BaseLauncher):'
594
663
595
664
596 def find_ipcluster_cmd():
665 def find_ipcluster_cmd():
666 """Find the command line ipcluster program in a cross platform way."""
597 if sys.platform == 'win32':
667 if sys.platform == 'win32':
598 # This logic is needed because the ipcluster script doesn't
668 # This logic is needed because the ipcluster script doesn't
599 # always get installed in the same way or in the same location.
669 # always get installed in the same way or in the same location.
@@ -611,8 +681,10 b' def find_ipcluster_cmd():'
611
681
612
682
613 class IPClusterLauncher(LocalProcessLauncher):
683 class IPClusterLauncher(LocalProcessLauncher):
684 """Launch the ipcluster program in an external process."""
614
685
615 ipcluster_cmd = List(find_ipcluster_cmd())
686 ipcluster_cmd = List(find_ipcluster_cmd())
687 # Command line arguments to pass to ipcluster.
616 ipcluster_args = List(
688 ipcluster_args = List(
617 ['--clean-logs', '--log-to-file', '--log-level', '40'], config=True)
689 ['--clean-logs', '--log-to-file', '--log-level', '40'], config=True)
618 ipcluster_subcommand = Str('start')
690 ipcluster_subcommand = Str('start')
General Comments 0
You need to be logged in to leave comments. Login now