##// END OF EJS Templates
expand engine/controller_launcher_class helpstring and docs...
MinRK -
Show More
@@ -1,535 +1,557 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 # encoding: utf-8
2 # encoding: utf-8
3 """
3 """
4 The ipcluster application.
4 The ipcluster application.
5
5
6 Authors:
6 Authors:
7
7
8 * Brian Granger
8 * Brian Granger
9 * MinRK
9 * MinRK
10
10
11 """
11 """
12
12
13 #-----------------------------------------------------------------------------
13 #-----------------------------------------------------------------------------
14 # Copyright (C) 2008-2011 The IPython Development Team
14 # Copyright (C) 2008-2011 The IPython Development Team
15 #
15 #
16 # Distributed under the terms of the BSD License. The full license is in
16 # Distributed under the terms of the BSD License. The full license is in
17 # the file COPYING, distributed as part of this software.
17 # the file COPYING, distributed as part of this software.
18 #-----------------------------------------------------------------------------
18 #-----------------------------------------------------------------------------
19
19
20 #-----------------------------------------------------------------------------
20 #-----------------------------------------------------------------------------
21 # Imports
21 # Imports
22 #-----------------------------------------------------------------------------
22 #-----------------------------------------------------------------------------
23
23
24 import errno
24 import errno
25 import logging
25 import logging
26 import os
26 import os
27 import re
27 import re
28 import signal
28 import signal
29
29
30 from subprocess import check_call, CalledProcessError, PIPE
30 from subprocess import check_call, CalledProcessError, PIPE
31 import zmq
31 import zmq
32 from zmq.eventloop import ioloop
32 from zmq.eventloop import ioloop
33
33
34 from IPython.config.application import Application, boolean_flag
34 from IPython.config.application import Application, boolean_flag
35 from IPython.config.loader import Config
35 from IPython.config.loader import Config
36 from IPython.core.application import BaseIPythonApplication
36 from IPython.core.application import BaseIPythonApplication
37 from IPython.core.profiledir import ProfileDir
37 from IPython.core.profiledir import ProfileDir
38 from IPython.utils.daemonize import daemonize
38 from IPython.utils.daemonize import daemonize
39 from IPython.utils.importstring import import_item
39 from IPython.utils.importstring import import_item
40 from IPython.utils.sysinfo import num_cpus
40 from IPython.utils.sysinfo import num_cpus
41 from IPython.utils.traitlets import (Int, Unicode, Bool, CFloat, Dict, List, Any,
41 from IPython.utils.traitlets import (Int, Unicode, Bool, CFloat, Dict, List, Any,
42 DottedObjectName)
42 DottedObjectName)
43
43
44 from IPython.parallel.apps.baseapp import (
44 from IPython.parallel.apps.baseapp import (
45 BaseParallelApplication,
45 BaseParallelApplication,
46 PIDFileError,
46 PIDFileError,
47 base_flags, base_aliases
47 base_flags, base_aliases
48 )
48 )
49
49
50
50
51 #-----------------------------------------------------------------------------
51 #-----------------------------------------------------------------------------
52 # Module level variables
52 # Module level variables
53 #-----------------------------------------------------------------------------
53 #-----------------------------------------------------------------------------
54
54
55
55
56 default_config_file_name = u'ipcluster_config.py'
56 default_config_file_name = u'ipcluster_config.py'
57
57
58
58
59 _description = """Start an IPython cluster for parallel computing.
59 _description = """Start an IPython cluster for parallel computing.
60
60
61 An IPython cluster consists of 1 controller and 1 or more engines.
61 An IPython cluster consists of 1 controller and 1 or more engines.
62 This command automates the startup of these processes using a wide
62 This command automates the startup of these processes using a wide
63 range of startup methods (SSH, local processes, PBS, mpiexec,
63 range of startup methods (SSH, local processes, PBS, mpiexec,
64 Windows HPC Server 2008). To start a cluster with 4 engines on your
64 Windows HPC Server 2008). To start a cluster with 4 engines on your
65 local host simply do 'ipcluster start --n=4'. For more complex usage
65 local host simply do 'ipcluster start --n=4'. For more complex usage
66 you will typically do 'ipython profile create mycluster --parallel', then edit
66 you will typically do 'ipython profile create mycluster --parallel', then edit
67 configuration files, followed by 'ipcluster start --profile=mycluster --n=4'.
67 configuration files, followed by 'ipcluster start --profile=mycluster --n=4'.
68 """
68 """
69
69
70 _main_examples = """
70 _main_examples = """
71 ipcluster start --n=4 # start a 4 node cluster on localhost
71 ipcluster start --n=4 # start a 4 node cluster on localhost
72 ipcluster start -h # show the help string for the start subcmd
72 ipcluster start -h # show the help string for the start subcmd
73
73
74 ipcluster stop -h # show the help string for the stop subcmd
74 ipcluster stop -h # show the help string for the stop subcmd
75 ipcluster engines -h # show the help string for the engines subcmd
75 ipcluster engines -h # show the help string for the engines subcmd
76 """
76 """
77
77
78 _start_examples = """
78 _start_examples = """
79 ipython profile create mycluster --parallel # create mycluster profile
79 ipython profile create mycluster --parallel # create mycluster profile
80 ipcluster start --profile=mycluster --n=4 # start mycluster with 4 nodes
80 ipcluster start --profile=mycluster --n=4 # start mycluster with 4 nodes
81 """
81 """
82
82
83 _stop_examples = """
83 _stop_examples = """
84 ipcluster stop --profile=mycluster # stop a running cluster by profile name
84 ipcluster stop --profile=mycluster # stop a running cluster by profile name
85 """
85 """
86
86
87 _engines_examples = """
87 _engines_examples = """
88 ipcluster engines --profile=mycluster --n=4 # start 4 engines only
88 ipcluster engines --profile=mycluster --n=4 # start 4 engines only
89 """
89 """
90
90
91
91
92 # Exit codes for ipcluster
92 # Exit codes for ipcluster
93
93
94 # This will be the exit code if the ipcluster appears to be running because
94 # This will be the exit code if the ipcluster appears to be running because
95 # a .pid file exists
95 # a .pid file exists
96 ALREADY_STARTED = 10
96 ALREADY_STARTED = 10
97
97
98
98
99 # This will be the exit code if ipcluster stop is run, but there is not .pid
99 # This will be the exit code if ipcluster stop is run, but there is not .pid
100 # file to be found.
100 # file to be found.
101 ALREADY_STOPPED = 11
101 ALREADY_STOPPED = 11
102
102
103 # This will be the exit code if ipcluster engines is run, but there is not .pid
103 # This will be the exit code if ipcluster engines is run, but there is not .pid
104 # file to be found.
104 # file to be found.
105 NO_CLUSTER = 12
105 NO_CLUSTER = 12
106
106
107
107
108 #-----------------------------------------------------------------------------
108 #-----------------------------------------------------------------------------
109 # Main application
109 # Main application
110 #-----------------------------------------------------------------------------
110 #-----------------------------------------------------------------------------
111 start_help = """Start an IPython cluster for parallel computing
111 start_help = """Start an IPython cluster for parallel computing
112
112
113 Start an ipython cluster by its profile name or cluster
113 Start an ipython cluster by its profile name or cluster
114 directory. Cluster directories contain configuration, log and
114 directory. Cluster directories contain configuration, log and
115 security related files and are named using the convention
115 security related files and are named using the convention
116 'profile_<name>' and should be creating using the 'start'
116 'profile_<name>' and should be creating using the 'start'
117 subcommand of 'ipcluster'. If your cluster directory is in
117 subcommand of 'ipcluster'. If your cluster directory is in
118 the cwd or the ipython directory, you can simply refer to it
118 the cwd or the ipython directory, you can simply refer to it
119 using its profile name, 'ipcluster start --n=4 --profile=<profile>`,
119 using its profile name, 'ipcluster start --n=4 --profile=<profile>`,
120 otherwise use the 'profile-dir' option.
120 otherwise use the 'profile-dir' option.
121 """
121 """
122 stop_help = """Stop a running IPython cluster
122 stop_help = """Stop a running IPython cluster
123
123
124 Stop a running ipython cluster by its profile name or cluster
124 Stop a running ipython cluster by its profile name or cluster
125 directory. Cluster directories are named using the convention
125 directory. Cluster directories are named using the convention
126 'profile_<name>'. If your cluster directory is in
126 'profile_<name>'. If your cluster directory is in
127 the cwd or the ipython directory, you can simply refer to it
127 the cwd or the ipython directory, you can simply refer to it
128 using its profile name, 'ipcluster stop --profile=<profile>`, otherwise
128 using its profile name, 'ipcluster stop --profile=<profile>`, otherwise
129 use the '--profile-dir' option.
129 use the '--profile-dir' option.
130 """
130 """
131 engines_help = """Start engines connected to an existing IPython cluster
131 engines_help = """Start engines connected to an existing IPython cluster
132
132
133 Start one or more engines to connect to an existing Cluster
133 Start one or more engines to connect to an existing Cluster
134 by profile name or cluster directory.
134 by profile name or cluster directory.
135 Cluster directories contain configuration, log and
135 Cluster directories contain configuration, log and
136 security related files and are named using the convention
136 security related files and are named using the convention
137 'profile_<name>' and should be creating using the 'start'
137 'profile_<name>' and should be creating using the 'start'
138 subcommand of 'ipcluster'. If your cluster directory is in
138 subcommand of 'ipcluster'. If your cluster directory is in
139 the cwd or the ipython directory, you can simply refer to it
139 the cwd or the ipython directory, you can simply refer to it
140 using its profile name, 'ipcluster engines --n=4 --profile=<profile>`,
140 using its profile name, 'ipcluster engines --n=4 --profile=<profile>`,
141 otherwise use the 'profile-dir' option.
141 otherwise use the 'profile-dir' option.
142 """
142 """
143 stop_aliases = dict(
143 stop_aliases = dict(
144 signal='IPClusterStop.signal',
144 signal='IPClusterStop.signal',
145 )
145 )
146 stop_aliases.update(base_aliases)
146 stop_aliases.update(base_aliases)
147
147
148 class IPClusterStop(BaseParallelApplication):
148 class IPClusterStop(BaseParallelApplication):
149 name = u'ipcluster'
149 name = u'ipcluster'
150 description = stop_help
150 description = stop_help
151 examples = _stop_examples
151 examples = _stop_examples
152 config_file_name = Unicode(default_config_file_name)
152 config_file_name = Unicode(default_config_file_name)
153
153
154 signal = Int(signal.SIGINT, config=True,
154 signal = Int(signal.SIGINT, config=True,
155 help="signal to use for stopping processes.")
155 help="signal to use for stopping processes.")
156
156
157 aliases = Dict(stop_aliases)
157 aliases = Dict(stop_aliases)
158
158
159 def start(self):
159 def start(self):
160 """Start the app for the stop subcommand."""
160 """Start the app for the stop subcommand."""
161 try:
161 try:
162 pid = self.get_pid_from_file()
162 pid = self.get_pid_from_file()
163 except PIDFileError:
163 except PIDFileError:
164 self.log.critical(
164 self.log.critical(
165 'Could not read pid file, cluster is probably not running.'
165 'Could not read pid file, cluster is probably not running.'
166 )
166 )
167 # Here I exit with a unusual exit status that other processes
167 # Here I exit with a unusual exit status that other processes
168 # can watch for to learn how I existed.
168 # can watch for to learn how I existed.
169 self.remove_pid_file()
169 self.remove_pid_file()
170 self.exit(ALREADY_STOPPED)
170 self.exit(ALREADY_STOPPED)
171
171
172 if not self.check_pid(pid):
172 if not self.check_pid(pid):
173 self.log.critical(
173 self.log.critical(
174 'Cluster [pid=%r] is not running.' % pid
174 'Cluster [pid=%r] is not running.' % pid
175 )
175 )
176 self.remove_pid_file()
176 self.remove_pid_file()
177 # Here I exit with a unusual exit status that other processes
177 # Here I exit with a unusual exit status that other processes
178 # can watch for to learn how I existed.
178 # can watch for to learn how I existed.
179 self.exit(ALREADY_STOPPED)
179 self.exit(ALREADY_STOPPED)
180
180
181 elif os.name=='posix':
181 elif os.name=='posix':
182 sig = self.signal
182 sig = self.signal
183 self.log.info(
183 self.log.info(
184 "Stopping cluster [pid=%r] with [signal=%r]" % (pid, sig)
184 "Stopping cluster [pid=%r] with [signal=%r]" % (pid, sig)
185 )
185 )
186 try:
186 try:
187 os.kill(pid, sig)
187 os.kill(pid, sig)
188 except OSError:
188 except OSError:
189 self.log.error("Stopping cluster failed, assuming already dead.",
189 self.log.error("Stopping cluster failed, assuming already dead.",
190 exc_info=True)
190 exc_info=True)
191 self.remove_pid_file()
191 self.remove_pid_file()
192 elif os.name=='nt':
192 elif os.name=='nt':
193 try:
193 try:
194 # kill the whole tree
194 # kill the whole tree
195 p = check_call(['taskkill', '-pid', str(pid), '-t', '-f'], stdout=PIPE,stderr=PIPE)
195 p = check_call(['taskkill', '-pid', str(pid), '-t', '-f'], stdout=PIPE,stderr=PIPE)
196 except (CalledProcessError, OSError):
196 except (CalledProcessError, OSError):
197 self.log.error("Stopping cluster failed, assuming already dead.",
197 self.log.error("Stopping cluster failed, assuming already dead.",
198 exc_info=True)
198 exc_info=True)
199 self.remove_pid_file()
199 self.remove_pid_file()
200
200
201 engine_aliases = {}
201 engine_aliases = {}
202 engine_aliases.update(base_aliases)
202 engine_aliases.update(base_aliases)
203 engine_aliases.update(dict(
203 engine_aliases.update(dict(
204 n='IPClusterEngines.n',
204 n='IPClusterEngines.n',
205 engines = 'IPClusterEngines.engine_launcher_class',
205 engines = 'IPClusterEngines.engine_launcher_class',
206 daemonize = 'IPClusterEngines.daemonize',
206 daemonize = 'IPClusterEngines.daemonize',
207 ))
207 ))
208 engine_flags = {}
208 engine_flags = {}
209 engine_flags.update(base_flags)
209 engine_flags.update(base_flags)
210
210
211 engine_flags.update(dict(
211 engine_flags.update(dict(
212 daemonize=(
212 daemonize=(
213 {'IPClusterEngines' : {'daemonize' : True}},
213 {'IPClusterEngines' : {'daemonize' : True}},
214 """run the cluster into the background (not available on Windows)""",
214 """run the cluster into the background (not available on Windows)""",
215 )
215 )
216 ))
216 ))
217 class IPClusterEngines(BaseParallelApplication):
217 class IPClusterEngines(BaseParallelApplication):
218
218
219 name = u'ipcluster'
219 name = u'ipcluster'
220 description = engines_help
220 description = engines_help
221 examples = _engines_examples
221 examples = _engines_examples
222 usage = None
222 usage = None
223 config_file_name = Unicode(default_config_file_name)
223 config_file_name = Unicode(default_config_file_name)
224 default_log_level = logging.INFO
224 default_log_level = logging.INFO
225 classes = List()
225 classes = List()
226 def _classes_default(self):
226 def _classes_default(self):
227 from IPython.parallel.apps import launcher
227 from IPython.parallel.apps import launcher
228 launchers = launcher.all_launchers
228 launchers = launcher.all_launchers
229 eslaunchers = [ l for l in launchers if 'EngineSet' in l.__name__]
229 eslaunchers = [ l for l in launchers if 'EngineSet' in l.__name__]
230 return [ProfileDir]+eslaunchers
230 return [ProfileDir]+eslaunchers
231
231
232 n = Int(num_cpus(), config=True,
232 n = Int(num_cpus(), config=True,
233 help="""The number of engines to start. The default is to use one for each
233 help="""The number of engines to start. The default is to use one for each
234 CPU on your machine""")
234 CPU on your machine""")
235
235
236 engine_launcher = Any(config=True, help="Deprecated, use engine_launcher_class")
236 engine_launcher = Any(config=True, help="Deprecated, use engine_launcher_class")
237 def _engine_launcher_changed(self, name, old, new):
237 def _engine_launcher_changed(self, name, old, new):
238 if isinstance(new, basestring):
238 if isinstance(new, basestring):
239 self.log.warn("WARNING: %s.engine_launcher is deprecated as of 0.12,"
239 self.log.warn("WARNING: %s.engine_launcher is deprecated as of 0.12,"
240 " use engine_launcher_class" % self.__class__.__name__)
240 " use engine_launcher_class" % self.__class__.__name__)
241 self.engine_launcher_class = new
241 self.engine_launcher_class = new
242 engine_launcher_class = DottedObjectName('LocalEngineSetLauncher',
242 engine_launcher_class = DottedObjectName('LocalEngineSetLauncher',
243 config=True,
243 config=True,
244 help="""The class for launching a set of Engines. Change this value
244 help="""The class for launching a set of Engines. Change this value
245 to use various batch systems to launch your engines, such as PBS,SGE,MPIExec,etc.
245 to use various batch systems to launch your engines, such as PBS,SGE,MPIExec,etc.
246 Each launcher class has its own set of configuration options, for making sure
246 Each launcher class has its own set of configuration options, for making sure
247 it will work in your environment.
247 it will work in your environment.
248
248
249 You can also write your own launcher, and specify it's absolute import path,
249 You can also write your own launcher, and specify it's absolute import path,
250 as in 'mymodule.launcher.FTLEnginesLauncher`.
250 as in 'mymodule.launcher.FTLEnginesLauncher`.
251
251
252 Examples include:
252 Examples include:
253
253
254 LocalEngineSetLauncher : start engines locally as subprocesses [default]
254 LocalEngineSetLauncher : start engines locally as subprocesses [default]
255 MPIExecEngineSetLauncher : use mpiexec to launch in an MPI environment
255 MPIExecEngineSetLauncher : use mpiexec to launch in an MPI environment
256 PBSEngineSetLauncher : use PBS (qsub) to submit engines to a batch queue
256 PBSEngineSetLauncher : use PBS (qsub) to submit engines to a batch queue
257 SGEEngineSetLauncher : use SGE (qsub) to submit engines to a batch queue
257 SGEEngineSetLauncher : use SGE (qsub) to submit engines to a batch queue
258 LSFEngineSetLauncher : use LSF (bsub) to submit engines to a batch queue
258 SSHEngineSetLauncher : use SSH to start the controller
259 SSHEngineSetLauncher : use SSH to start the controller
259 Note that SSH does *not* move the connection files
260 Note that SSH does *not* move the connection files
260 around, so you will likely have to do this manually
261 around, so you will likely have to do this manually
261 unless the machines are on a shared file system.
262 unless the machines are on a shared file system.
262 WindowsHPCEngineSetLauncher : use Windows HPC
263 WindowsHPCEngineSetLauncher : use Windows HPC
264
265 If you are using one of IPython's builtin launchers, you can specify just the
266 prefix, e.g:
267
268 c.IPClusterEngines.engine_launcher_class = 'SSH'
269
270 or:
271
272 ipcluster start --engines 'MPIExec'
273
263 """
274 """
264 )
275 )
265 daemonize = Bool(False, config=True,
276 daemonize = Bool(False, config=True,
266 help="""Daemonize the ipcluster program. This implies --log-to-file.
277 help="""Daemonize the ipcluster program. This implies --log-to-file.
267 Not available on Windows.
278 Not available on Windows.
268 """)
279 """)
269
280
270 def _daemonize_changed(self, name, old, new):
281 def _daemonize_changed(self, name, old, new):
271 if new:
282 if new:
272 self.log_to_file = True
283 self.log_to_file = True
273
284
274 aliases = Dict(engine_aliases)
285 aliases = Dict(engine_aliases)
275 flags = Dict(engine_flags)
286 flags = Dict(engine_flags)
276 _stopping = False
287 _stopping = False
277
288
278 def initialize(self, argv=None):
289 def initialize(self, argv=None):
279 super(IPClusterEngines, self).initialize(argv)
290 super(IPClusterEngines, self).initialize(argv)
280 self.init_signal()
291 self.init_signal()
281 self.init_launchers()
292 self.init_launchers()
282
293
283 def init_launchers(self):
294 def init_launchers(self):
284 self.engine_launcher = self.build_launcher(self.engine_launcher_class, 'EngineSet')
295 self.engine_launcher = self.build_launcher(self.engine_launcher_class, 'EngineSet')
285 self.engine_launcher.on_stop(lambda r: self.loop.stop())
296 self.engine_launcher.on_stop(lambda r: self.loop.stop())
286
297
287 def init_signal(self):
298 def init_signal(self):
288 # Setup signals
299 # Setup signals
289 signal.signal(signal.SIGINT, self.sigint_handler)
300 signal.signal(signal.SIGINT, self.sigint_handler)
290
301
291 def build_launcher(self, clsname, kind=None):
302 def build_launcher(self, clsname, kind=None):
292 """import and instantiate a Launcher based on importstring"""
303 """import and instantiate a Launcher based on importstring"""
293 if '.' not in clsname:
304 if '.' not in clsname:
294 # not a module, presume it's the raw name in apps.launcher
305 # not a module, presume it's the raw name in apps.launcher
295 if kind and kind not in clsname:
306 if kind and kind not in clsname:
296 # doesn't match necessary full class name, assume it's
307 # doesn't match necessary full class name, assume it's
297 # just 'PBS' or 'MPIExec' prefix:
308 # just 'PBS' or 'MPIExec' prefix:
298 clsname = clsname + kind + 'Launcher'
309 clsname = clsname + kind + 'Launcher'
299 clsname = 'IPython.parallel.apps.launcher.'+clsname
310 clsname = 'IPython.parallel.apps.launcher.'+clsname
300 try:
311 try:
301 klass = import_item(clsname)
312 klass = import_item(clsname)
302 except (ImportError, KeyError):
313 except (ImportError, KeyError):
303 self.log.fatal("Could not import launcher class: %r"%clsname)
314 self.log.fatal("Could not import launcher class: %r"%clsname)
304 self.exit(1)
315 self.exit(1)
305
316
306 launcher = klass(
317 launcher = klass(
307 work_dir=u'.', config=self.config, log=self.log,
318 work_dir=u'.', config=self.config, log=self.log,
308 profile_dir=self.profile_dir.location, cluster_id=self.cluster_id,
319 profile_dir=self.profile_dir.location, cluster_id=self.cluster_id,
309 )
320 )
310 return launcher
321 return launcher
311
322
312 def start_engines(self):
323 def start_engines(self):
313 self.log.info("Starting %i engines"%self.n)
324 self.log.info("Starting %i engines"%self.n)
314 self.engine_launcher.start(self.n)
325 self.engine_launcher.start(self.n)
315
326
316 def stop_engines(self):
327 def stop_engines(self):
317 self.log.info("Stopping Engines...")
328 self.log.info("Stopping Engines...")
318 if self.engine_launcher.running:
329 if self.engine_launcher.running:
319 d = self.engine_launcher.stop()
330 d = self.engine_launcher.stop()
320 return d
331 return d
321 else:
332 else:
322 return None
333 return None
323
334
324 def stop_launchers(self, r=None):
335 def stop_launchers(self, r=None):
325 if not self._stopping:
336 if not self._stopping:
326 self._stopping = True
337 self._stopping = True
327 self.log.error("IPython cluster: stopping")
338 self.log.error("IPython cluster: stopping")
328 self.stop_engines()
339 self.stop_engines()
329 # Wait a few seconds to let things shut down.
340 # Wait a few seconds to let things shut down.
330 dc = ioloop.DelayedCallback(self.loop.stop, 4000, self.loop)
341 dc = ioloop.DelayedCallback(self.loop.stop, 4000, self.loop)
331 dc.start()
342 dc.start()
332
343
333 def sigint_handler(self, signum, frame):
344 def sigint_handler(self, signum, frame):
334 self.log.debug("SIGINT received, stopping launchers...")
345 self.log.debug("SIGINT received, stopping launchers...")
335 self.stop_launchers()
346 self.stop_launchers()
336
347
337 def start_logging(self):
348 def start_logging(self):
338 # Remove old log files of the controller and engine
349 # Remove old log files of the controller and engine
339 if self.clean_logs:
350 if self.clean_logs:
340 log_dir = self.profile_dir.log_dir
351 log_dir = self.profile_dir.log_dir
341 for f in os.listdir(log_dir):
352 for f in os.listdir(log_dir):
342 if re.match(r'ip(engine|controller)z-\d+\.(log|err|out)',f):
353 if re.match(r'ip(engine|controller)z-\d+\.(log|err|out)',f):
343 os.remove(os.path.join(log_dir, f))
354 os.remove(os.path.join(log_dir, f))
344 # This will remove old log files for ipcluster itself
355 # This will remove old log files for ipcluster itself
345 # super(IPBaseParallelApplication, self).start_logging()
356 # super(IPBaseParallelApplication, self).start_logging()
346
357
347 def start(self):
358 def start(self):
348 """Start the app for the engines subcommand."""
359 """Start the app for the engines subcommand."""
349 self.log.info("IPython cluster: started")
360 self.log.info("IPython cluster: started")
350 # First see if the cluster is already running
361 # First see if the cluster is already running
351
362
352 # Now log and daemonize
363 # Now log and daemonize
353 self.log.info(
364 self.log.info(
354 'Starting engines with [daemon=%r]' % self.daemonize
365 'Starting engines with [daemon=%r]' % self.daemonize
355 )
366 )
356 # TODO: Get daemonize working on Windows or as a Windows Server.
367 # TODO: Get daemonize working on Windows or as a Windows Server.
357 if self.daemonize:
368 if self.daemonize:
358 if os.name=='posix':
369 if os.name=='posix':
359 daemonize()
370 daemonize()
360
371
361 dc = ioloop.DelayedCallback(self.start_engines, 0, self.loop)
372 dc = ioloop.DelayedCallback(self.start_engines, 0, self.loop)
362 dc.start()
373 dc.start()
363 # Now write the new pid file AFTER our new forked pid is active.
374 # Now write the new pid file AFTER our new forked pid is active.
364 # self.write_pid_file()
375 # self.write_pid_file()
365 try:
376 try:
366 self.loop.start()
377 self.loop.start()
367 except KeyboardInterrupt:
378 except KeyboardInterrupt:
368 pass
379 pass
369 except zmq.ZMQError as e:
380 except zmq.ZMQError as e:
370 if e.errno == errno.EINTR:
381 if e.errno == errno.EINTR:
371 pass
382 pass
372 else:
383 else:
373 raise
384 raise
374
385
375 start_aliases = {}
386 start_aliases = {}
376 start_aliases.update(engine_aliases)
387 start_aliases.update(engine_aliases)
377 start_aliases.update(dict(
388 start_aliases.update(dict(
378 delay='IPClusterStart.delay',
389 delay='IPClusterStart.delay',
379 controller = 'IPClusterStart.controller_launcher_class',
390 controller = 'IPClusterStart.controller_launcher_class',
380 ))
391 ))
381 start_aliases['clean-logs'] = 'IPClusterStart.clean_logs'
392 start_aliases['clean-logs'] = 'IPClusterStart.clean_logs'
382
393
383 class IPClusterStart(IPClusterEngines):
394 class IPClusterStart(IPClusterEngines):
384
395
385 name = u'ipcluster'
396 name = u'ipcluster'
386 description = start_help
397 description = start_help
387 examples = _start_examples
398 examples = _start_examples
388 default_log_level = logging.INFO
399 default_log_level = logging.INFO
389 auto_create = Bool(True, config=True,
400 auto_create = Bool(True, config=True,
390 help="whether to create the profile_dir if it doesn't exist")
401 help="whether to create the profile_dir if it doesn't exist")
391 classes = List()
402 classes = List()
392 def _classes_default(self,):
403 def _classes_default(self,):
393 from IPython.parallel.apps import launcher
404 from IPython.parallel.apps import launcher
394 return [ProfileDir] + [IPClusterEngines] + launcher.all_launchers
405 return [ProfileDir] + [IPClusterEngines] + launcher.all_launchers
395
406
396 clean_logs = Bool(True, config=True,
407 clean_logs = Bool(True, config=True,
397 help="whether to cleanup old logs before starting")
408 help="whether to cleanup old logs before starting")
398
409
399 delay = CFloat(1., config=True,
410 delay = CFloat(1., config=True,
400 help="delay (in s) between starting the controller and the engines")
411 help="delay (in s) between starting the controller and the engines")
401
412
402 controller_launcher = Any(config=True, help="Deprecated, use controller_launcher_class")
413 controller_launcher = Any(config=True, help="Deprecated, use controller_launcher_class")
403 def _controller_launcher_changed(self, name, old, new):
414 def _controller_launcher_changed(self, name, old, new):
404 if isinstance(new, basestring):
415 if isinstance(new, basestring):
405 # old 0.11-style config
416 # old 0.11-style config
406 self.log.warn("WARNING: %s.controller_launcher is deprecated as of 0.12,"
417 self.log.warn("WARNING: %s.controller_launcher is deprecated as of 0.12,"
407 " use controller_launcher_class" % self.__class__.__name__)
418 " use controller_launcher_class" % self.__class__.__name__)
408 self.controller_launcher_class = new
419 self.controller_launcher_class = new
409 controller_launcher_class = DottedObjectName('LocalControllerLauncher',
420 controller_launcher_class = DottedObjectName('LocalControllerLauncher',
410 config=True,
421 config=True,
411 help="""The class for launching a Controller. Change this value if you want
422 help="""The class for launching a Controller. Change this value if you want
412 your controller to also be launched by a batch system, such as PBS,SGE,MPIExec,etc.
423 your controller to also be launched by a batch system, such as PBS,SGE,MPIExec,etc.
413
424
414 Each launcher class has its own set of configuration options, for making sure
425 Each launcher class has its own set of configuration options, for making sure
415 it will work in your environment.
426 it will work in your environment.
416
427
417 Examples include:
428 Examples include:
418
429
419 LocalControllerLauncher : start engines locally as subprocesses
430 LocalControllerLauncher : start engines locally as subprocesses
420 MPIExecControllerLauncher : use mpiexec to launch engines in an MPI universe
431 MPIExecControllerLauncher : use mpiexec to launch engines in an MPI universe
421 PBSControllerLauncher : use PBS (qsub) to submit engines to a batch queue
432 PBSControllerLauncher : use PBS (qsub) to submit engines to a batch queue
422 SGEControllerLauncher : use SGE (qsub) to submit engines to a batch queue
433 SGEControllerLauncher : use SGE (qsub) to submit engines to a batch queue
434 LSFControllerLauncher : use LSF (bsub) to submit engines to a batch queue
423 SSHControllerLauncher : use SSH to start the controller
435 SSHControllerLauncher : use SSH to start the controller
424 WindowsHPCControllerLauncher : use Windows HPC
436 WindowsHPCControllerLauncher : use Windows HPC
437
438 If you are using one of IPython's builtin launchers, you can specify just the
439 prefix, e.g:
440
441 c.IPClusterStart.controller_launcher_class = 'SSH'
442
443 or:
444
445 ipcluster start --controller 'MPIExec'
446
425 """
447 """
426 )
448 )
427 reset = Bool(False, config=True,
449 reset = Bool(False, config=True,
428 help="Whether to reset config files as part of '--create'."
450 help="Whether to reset config files as part of '--create'."
429 )
451 )
430
452
431 # flags = Dict(flags)
453 # flags = Dict(flags)
432 aliases = Dict(start_aliases)
454 aliases = Dict(start_aliases)
433
455
434 def init_launchers(self):
456 def init_launchers(self):
435 self.controller_launcher = self.build_launcher(self.controller_launcher_class, 'Controller')
457 self.controller_launcher = self.build_launcher(self.controller_launcher_class, 'Controller')
436 self.engine_launcher = self.build_launcher(self.engine_launcher_class, 'EngineSet')
458 self.engine_launcher = self.build_launcher(self.engine_launcher_class, 'EngineSet')
437 self.controller_launcher.on_stop(self.stop_launchers)
459 self.controller_launcher.on_stop(self.stop_launchers)
438
460
439 def start_controller(self):
461 def start_controller(self):
440 self.controller_launcher.start()
462 self.controller_launcher.start()
441
463
442 def stop_controller(self):
464 def stop_controller(self):
443 # self.log.info("In stop_controller")
465 # self.log.info("In stop_controller")
444 if self.controller_launcher and self.controller_launcher.running:
466 if self.controller_launcher and self.controller_launcher.running:
445 return self.controller_launcher.stop()
467 return self.controller_launcher.stop()
446
468
447 def stop_launchers(self, r=None):
469 def stop_launchers(self, r=None):
448 if not self._stopping:
470 if not self._stopping:
449 self.stop_controller()
471 self.stop_controller()
450 super(IPClusterStart, self).stop_launchers()
472 super(IPClusterStart, self).stop_launchers()
451
473
452 def start(self):
474 def start(self):
453 """Start the app for the start subcommand."""
475 """Start the app for the start subcommand."""
454 # First see if the cluster is already running
476 # First see if the cluster is already running
455 try:
477 try:
456 pid = self.get_pid_from_file()
478 pid = self.get_pid_from_file()
457 except PIDFileError:
479 except PIDFileError:
458 pass
480 pass
459 else:
481 else:
460 if self.check_pid(pid):
482 if self.check_pid(pid):
461 self.log.critical(
483 self.log.critical(
462 'Cluster is already running with [pid=%s]. '
484 'Cluster is already running with [pid=%s]. '
463 'use "ipcluster stop" to stop the cluster.' % pid
485 'use "ipcluster stop" to stop the cluster.' % pid
464 )
486 )
465 # Here I exit with a unusual exit status that other processes
487 # Here I exit with a unusual exit status that other processes
466 # can watch for to learn how I existed.
488 # can watch for to learn how I existed.
467 self.exit(ALREADY_STARTED)
489 self.exit(ALREADY_STARTED)
468 else:
490 else:
469 self.remove_pid_file()
491 self.remove_pid_file()
470
492
471
493
472 # Now log and daemonize
494 # Now log and daemonize
473 self.log.info(
495 self.log.info(
474 'Starting ipcluster with [daemon=%r]' % self.daemonize
496 'Starting ipcluster with [daemon=%r]' % self.daemonize
475 )
497 )
476 # TODO: Get daemonize working on Windows or as a Windows Server.
498 # TODO: Get daemonize working on Windows or as a Windows Server.
477 if self.daemonize:
499 if self.daemonize:
478 if os.name=='posix':
500 if os.name=='posix':
479 daemonize()
501 daemonize()
480
502
481 dc = ioloop.DelayedCallback(self.start_controller, 0, self.loop)
503 dc = ioloop.DelayedCallback(self.start_controller, 0, self.loop)
482 dc.start()
504 dc.start()
483 dc = ioloop.DelayedCallback(self.start_engines, 1000*self.delay, self.loop)
505 dc = ioloop.DelayedCallback(self.start_engines, 1000*self.delay, self.loop)
484 dc.start()
506 dc.start()
485 # Now write the new pid file AFTER our new forked pid is active.
507 # Now write the new pid file AFTER our new forked pid is active.
486 self.write_pid_file()
508 self.write_pid_file()
487 try:
509 try:
488 self.loop.start()
510 self.loop.start()
489 except KeyboardInterrupt:
511 except KeyboardInterrupt:
490 pass
512 pass
491 except zmq.ZMQError as e:
513 except zmq.ZMQError as e:
492 if e.errno == errno.EINTR:
514 if e.errno == errno.EINTR:
493 pass
515 pass
494 else:
516 else:
495 raise
517 raise
496 finally:
518 finally:
497 self.remove_pid_file()
519 self.remove_pid_file()
498
520
499 base='IPython.parallel.apps.ipclusterapp.IPCluster'
521 base='IPython.parallel.apps.ipclusterapp.IPCluster'
500
522
501 class IPClusterApp(Application):
523 class IPClusterApp(Application):
502 name = u'ipcluster'
524 name = u'ipcluster'
503 description = _description
525 description = _description
504 examples = _main_examples
526 examples = _main_examples
505
527
506 subcommands = {
528 subcommands = {
507 'start' : (base+'Start', start_help),
529 'start' : (base+'Start', start_help),
508 'stop' : (base+'Stop', stop_help),
530 'stop' : (base+'Stop', stop_help),
509 'engines' : (base+'Engines', engines_help),
531 'engines' : (base+'Engines', engines_help),
510 }
532 }
511
533
512 # no aliases or flags for parent App
534 # no aliases or flags for parent App
513 aliases = Dict()
535 aliases = Dict()
514 flags = Dict()
536 flags = Dict()
515
537
516 def start(self):
538 def start(self):
517 if self.subapp is None:
539 if self.subapp is None:
518 print "No subcommand specified. Must specify one of: %s"%(self.subcommands.keys())
540 print "No subcommand specified. Must specify one of: %s"%(self.subcommands.keys())
519 print
541 print
520 self.print_description()
542 self.print_description()
521 self.print_subcommands()
543 self.print_subcommands()
522 self.exit(1)
544 self.exit(1)
523 else:
545 else:
524 return self.subapp.start()
546 return self.subapp.start()
525
547
526 def launch_new_instance():
548 def launch_new_instance():
527 """Create and run the IPython cluster."""
549 """Create and run the IPython cluster."""
528 app = IPClusterApp.instance()
550 app = IPClusterApp.instance()
529 app.initialize()
551 app.initialize()
530 app.start()
552 app.start()
531
553
532
554
533 if __name__ == '__main__':
555 if __name__ == '__main__':
534 launch_new_instance()
556 launch_new_instance()
535
557
@@ -1,734 +1,761 b''
1 .. _parallel_process:
1 .. _parallel_process:
2
2
3 ===========================================
3 ===========================================
4 Starting the IPython controller and engines
4 Starting the IPython controller and engines
5 ===========================================
5 ===========================================
6
6
7 To use IPython for parallel computing, you need to start one instance of
7 To use IPython for parallel computing, you need to start one instance of
8 the controller and one or more instances of the engine. The controller
8 the controller and one or more instances of the engine. The controller
9 and each engine can run on different machines or on the same machine.
9 and each engine can run on different machines or on the same machine.
10 Because of this, there are many different possibilities.
10 Because of this, there are many different possibilities.
11
11
12 Broadly speaking, there are two ways of going about starting a controller and engines:
12 Broadly speaking, there are two ways of going about starting a controller and engines:
13
13
14 * In an automated manner using the :command:`ipcluster` command.
14 * In an automated manner using the :command:`ipcluster` command.
15 * In a more manual way using the :command:`ipcontroller` and
15 * In a more manual way using the :command:`ipcontroller` and
16 :command:`ipengine` commands.
16 :command:`ipengine` commands.
17
17
18 This document describes both of these methods. We recommend that new users
18 This document describes both of these methods. We recommend that new users
19 start with the :command:`ipcluster` command as it simplifies many common usage
19 start with the :command:`ipcluster` command as it simplifies many common usage
20 cases.
20 cases.
21
21
22 General considerations
22 General considerations
23 ======================
23 ======================
24
24
25 Before delving into the details about how you can start a controller and
25 Before delving into the details about how you can start a controller and
26 engines using the various methods, we outline some of the general issues that
26 engines using the various methods, we outline some of the general issues that
27 come up when starting the controller and engines. These things come up no
27 come up when starting the controller and engines. These things come up no
28 matter which method you use to start your IPython cluster.
28 matter which method you use to start your IPython cluster.
29
29
30 If you are running engines on multiple machines, you will likely need to instruct the
30 If you are running engines on multiple machines, you will likely need to instruct the
31 controller to listen for connections on an external interface. This can be done by specifying
31 controller to listen for connections on an external interface. This can be done by specifying
32 the ``ip`` argument on the command-line, or the ``HubFactory.ip`` configurable in
32 the ``ip`` argument on the command-line, or the ``HubFactory.ip`` configurable in
33 :file:`ipcontroller_config.py`.
33 :file:`ipcontroller_config.py`.
34
34
35 If your machines are on a trusted network, you can safely instruct the controller to listen
35 If your machines are on a trusted network, you can safely instruct the controller to listen
36 on all public interfaces with::
36 on all public interfaces with::
37
37
38 $> ipcontroller --ip=*
38 $> ipcontroller --ip=*
39
39
40 Or you can set the same behavior as the default by adding the following line to your :file:`ipcontroller_config.py`:
40 Or you can set the same behavior as the default by adding the following line to your :file:`ipcontroller_config.py`:
41
41
42 .. sourcecode:: python
42 .. sourcecode:: python
43
43
44 c.HubFactory.ip = '*'
44 c.HubFactory.ip = '*'
45
45
46 .. note::
46 .. note::
47
47
48 Due to the lack of security in ZeroMQ, the controller will only listen for connections on
48 Due to the lack of security in ZeroMQ, the controller will only listen for connections on
49 localhost by default. If you see Timeout errors on engines or clients, then the first
49 localhost by default. If you see Timeout errors on engines or clients, then the first
50 thing you should check is the ip address the controller is listening on, and make sure
50 thing you should check is the ip address the controller is listening on, and make sure
51 that it is visible from the timing out machine.
51 that it is visible from the timing out machine.
52
52
53 .. seealso::
53 .. seealso::
54
54
55 Our `notes <parallel_security>`_ on security in the new parallel computing code.
55 Our `notes <parallel_security>`_ on security in the new parallel computing code.
56
56
57 Let's say that you want to start the controller on ``host0`` and engines on
57 Let's say that you want to start the controller on ``host0`` and engines on
58 hosts ``host1``-``hostn``. The following steps are then required:
58 hosts ``host1``-``hostn``. The following steps are then required:
59
59
60 1. Start the controller on ``host0`` by running :command:`ipcontroller` on
60 1. Start the controller on ``host0`` by running :command:`ipcontroller` on
61 ``host0``. The controller must be instructed to listen on an interface visible
61 ``host0``. The controller must be instructed to listen on an interface visible
62 to the engine machines, via the ``ip`` command-line argument or ``HubFactory.ip``
62 to the engine machines, via the ``ip`` command-line argument or ``HubFactory.ip``
63 in :file:`ipcontroller_config.py`.
63 in :file:`ipcontroller_config.py`.
64 2. Move the JSON file (:file:`ipcontroller-engine.json`) created by the
64 2. Move the JSON file (:file:`ipcontroller-engine.json`) created by the
65 controller from ``host0`` to hosts ``host1``-``hostn``.
65 controller from ``host0`` to hosts ``host1``-``hostn``.
66 3. Start the engines on hosts ``host1``-``hostn`` by running
66 3. Start the engines on hosts ``host1``-``hostn`` by running
67 :command:`ipengine`. This command has to be told where the JSON file
67 :command:`ipengine`. This command has to be told where the JSON file
68 (:file:`ipcontroller-engine.json`) is located.
68 (:file:`ipcontroller-engine.json`) is located.
69
69
70 At this point, the controller and engines will be connected. By default, the JSON files
70 At this point, the controller and engines will be connected. By default, the JSON files
71 created by the controller are put into the :file:`~/.ipython/profile_default/security`
71 created by the controller are put into the :file:`~/.ipython/profile_default/security`
72 directory. If the engines share a filesystem with the controller, step 2 can be skipped as
72 directory. If the engines share a filesystem with the controller, step 2 can be skipped as
73 the engines will automatically look at that location.
73 the engines will automatically look at that location.
74
74
75 The final step required to actually use the running controller from a client is to move
75 The final step required to actually use the running controller from a client is to move
76 the JSON file :file:`ipcontroller-client.json` from ``host0`` to any host where clients
76 the JSON file :file:`ipcontroller-client.json` from ``host0`` to any host where clients
77 will be run. If these file are put into the :file:`~/.ipython/profile_default/security`
77 will be run. If these file are put into the :file:`~/.ipython/profile_default/security`
78 directory of the client's host, they will be found automatically. Otherwise, the full path
78 directory of the client's host, they will be found automatically. Otherwise, the full path
79 to them has to be passed to the client's constructor.
79 to them has to be passed to the client's constructor.
80
80
81 Using :command:`ipcluster`
81 Using :command:`ipcluster`
82 ===========================
82 ===========================
83
83
84 The :command:`ipcluster` command provides a simple way of starting a
84 The :command:`ipcluster` command provides a simple way of starting a
85 controller and engines in the following situations:
85 controller and engines in the following situations:
86
86
87 1. When the controller and engines are all run on localhost. This is useful
87 1. When the controller and engines are all run on localhost. This is useful
88 for testing or running on a multicore computer.
88 for testing or running on a multicore computer.
89 2. When engines are started using the :command:`mpiexec` command that comes
89 2. When engines are started using the :command:`mpiexec` command that comes
90 with most MPI [MPI]_ implementations
90 with most MPI [MPI]_ implementations
91 3. When engines are started using the PBS [PBS]_ batch system
91 3. When engines are started using the PBS [PBS]_ batch system
92 (or other `qsub` systems, such as SGE).
92 (or other `qsub` systems, such as SGE).
93 4. When the controller is started on localhost and the engines are started on
93 4. When the controller is started on localhost and the engines are started on
94 remote nodes using :command:`ssh`.
94 remote nodes using :command:`ssh`.
95 5. When engines are started using the Windows HPC Server batch system.
95 5. When engines are started using the Windows HPC Server batch system.
96
96
97 .. note::
97 .. note::
98
98
99 Currently :command:`ipcluster` requires that the
99 Currently :command:`ipcluster` requires that the
100 :file:`~/.ipython/profile_<name>/security` directory live on a shared filesystem that is
100 :file:`~/.ipython/profile_<name>/security` directory live on a shared filesystem that is
101 seen by both the controller and engines. If you don't have a shared file
101 seen by both the controller and engines. If you don't have a shared file
102 system you will need to use :command:`ipcontroller` and
102 system you will need to use :command:`ipcontroller` and
103 :command:`ipengine` directly.
103 :command:`ipengine` directly.
104
104
105 Under the hood, :command:`ipcluster` just uses :command:`ipcontroller`
105 Under the hood, :command:`ipcluster` just uses :command:`ipcontroller`
106 and :command:`ipengine` to perform the steps described above.
106 and :command:`ipengine` to perform the steps described above.
107
107
108 The simplest way to use ipcluster requires no configuration, and will
108 The simplest way to use ipcluster requires no configuration, and will
109 launch a controller and a number of engines on the local machine. For instance,
109 launch a controller and a number of engines on the local machine. For instance,
110 to start one controller and 4 engines on localhost, just do::
110 to start one controller and 4 engines on localhost, just do::
111
111
112 $ ipcluster start -n 4
112 $ ipcluster start -n 4
113
113
114 To see other command line options, do::
114 To see other command line options, do::
115
115
116 $ ipcluster -h
116 $ ipcluster -h
117
117
118
118
119 Configuring an IPython cluster
119 Configuring an IPython cluster
120 ==============================
120 ==============================
121
121
122 Cluster configurations are stored as `profiles`. You can create a new profile with::
122 Cluster configurations are stored as `profiles`. You can create a new profile with::
123
123
124 $ ipython profile create --parallel --profile=myprofile
124 $ ipython profile create --parallel --profile=myprofile
125
125
126 This will create the directory :file:`IPYTHONDIR/profile_myprofile`, and populate it
126 This will create the directory :file:`IPYTHONDIR/profile_myprofile`, and populate it
127 with the default configuration files for the three IPython cluster commands. Once
127 with the default configuration files for the three IPython cluster commands. Once
128 you edit those files, you can continue to call ipcluster/ipcontroller/ipengine
128 you edit those files, you can continue to call ipcluster/ipcontroller/ipengine
129 with no arguments beyond ``profile=myprofile``, and any configuration will be maintained.
129 with no arguments beyond ``profile=myprofile``, and any configuration will be maintained.
130
130
131 There is no limit to the number of profiles you can have, so you can maintain a profile for each
131 There is no limit to the number of profiles you can have, so you can maintain a profile for each
132 of your common use cases. The default profile will be used whenever the
132 of your common use cases. The default profile will be used whenever the
133 profile argument is not specified, so edit :file:`IPYTHONDIR/profile_default/*_config.py` to
133 profile argument is not specified, so edit :file:`IPYTHONDIR/profile_default/*_config.py` to
134 represent your most common use case.
134 represent your most common use case.
135
135
136 The configuration files are loaded with commented-out settings and explanations,
136 The configuration files are loaded with commented-out settings and explanations,
137 which should cover most of the available possibilities.
137 which should cover most of the available possibilities.
138
138
139 Using various batch systems with :command:`ipcluster`
139 Using various batch systems with :command:`ipcluster`
140 -----------------------------------------------------
140 -----------------------------------------------------
141
141
142 :command:`ipcluster` has a notion of Launchers that can start controllers
142 :command:`ipcluster` has a notion of Launchers that can start controllers
143 and engines with various remote execution schemes. Currently supported
143 and engines with various remote execution schemes. Currently supported
144 models include :command:`ssh`, :command:`mpiexec`, PBS-style (Torque, SGE),
144 models include :command:`ssh`, :command:`mpiexec`, PBS-style (Torque, SGE, LSF),
145 and Windows HPC Server.
145 and Windows HPC Server.
146
146
147 In general, these are configured by the :attr:`IPClusterEngines.engine_set_launcher_class`,
148 and :attr:`IPClusterStart.controller_launcher_class` configurables, which can be the
149 fully specified object name (e.g. ``'IPython.parallel.apps.launcher.LocalControllerLauncher'``),
150 but if you are using IPython's builtin launchers, you can specify just the class name,
151 or even just the prefix e.g:
152
153 .. sourcecode:: python
154
155 c.IPClusterEngines.engine_launcher_class = 'SSH'
156 # equivalent to
157 c.IPClusterEngines.engine_launcher_class = 'SSHEngineSetLauncher'
158 # both of which expand to
159 c.IPClusterEngines.engine_launcher_class = 'IPython.parallel.apps.launcher.SSHEngineSetLauncher'
160
161 The shortest form being of particular use on the command line, where all you need to do to
162 get an IPython cluster running with engines started with MPI is:
163
164 .. sourcecode:: bash
165
166 $> ipcluster start --engines=MPIExec
167
168 Assuming that the default MPI config is sufficient.
169
170 .. note::
171
172 shortcuts for builtin launcher names were added in 0.12, as was the ``_class`` suffix
173 on the configurable names. If you use the old 0.11 names (e.g. ``engine_set_launcher``),
174 they will still work, but you will get a deprecation warning that the name has changed.
175
176
147 .. note::
177 .. note::
148
178
149 The Launchers and configuration are designed in such a way that advanced
179 The Launchers and configuration are designed in such a way that advanced
150 users can subclass and configure them to fit their own system that we
180 users can subclass and configure them to fit their own system that we
151 have not yet supported (such as Condor)
181 have not yet supported (such as Condor)
152
182
153 Using :command:`ipcluster` in mpiexec/mpirun mode
183 Using :command:`ipcluster` in mpiexec/mpirun mode
154 --------------------------------------------------
184 --------------------------------------------------
155
185
156
186
157 The mpiexec/mpirun mode is useful if you:
187 The mpiexec/mpirun mode is useful if you:
158
188
159 1. Have MPI installed.
189 1. Have MPI installed.
160 2. Your systems are configured to use the :command:`mpiexec` or
190 2. Your systems are configured to use the :command:`mpiexec` or
161 :command:`mpirun` commands to start MPI processes.
191 :command:`mpirun` commands to start MPI processes.
162
192
163 If these are satisfied, you can create a new profile::
193 If these are satisfied, you can create a new profile::
164
194
165 $ ipython profile create --parallel --profile=mpi
195 $ ipython profile create --parallel --profile=mpi
166
196
167 and edit the file :file:`IPYTHONDIR/profile_mpi/ipcluster_config.py`.
197 and edit the file :file:`IPYTHONDIR/profile_mpi/ipcluster_config.py`.
168
198
169 There, instruct ipcluster to use the MPIExec launchers by adding the lines:
199 There, instruct ipcluster to use the MPIExec launchers by adding the lines:
170
200
171 .. sourcecode:: python
201 .. sourcecode:: python
172
202
173 c.IPClusterEngines.engine_launcher = 'IPython.parallel.apps.launcher.MPIExecEngineSetLauncher'
203 c.IPClusterEngines.engine_launcher_class = 'MPIExecEngineSetLauncher'
174
204
175 If the default MPI configuration is correct, then you can now start your cluster, with::
205 If the default MPI configuration is correct, then you can now start your cluster, with::
176
206
177 $ ipcluster start -n 4 --profile=mpi
207 $ ipcluster start -n 4 --profile=mpi
178
208
179 This does the following:
209 This does the following:
180
210
181 1. Starts the IPython controller on current host.
211 1. Starts the IPython controller on current host.
182 2. Uses :command:`mpiexec` to start 4 engines.
212 2. Uses :command:`mpiexec` to start 4 engines.
183
213
184 If you have a reason to also start the Controller with mpi, you can specify:
214 If you have a reason to also start the Controller with mpi, you can specify:
185
215
186 .. sourcecode:: python
216 .. sourcecode:: python
187
217
188 c.IPClusterStart.controller_launcher = 'IPython.parallel.apps.launcher.MPIExecControllerLauncher'
218 c.IPClusterStart.controller_launcher_class = 'MPIExecControllerLauncher'
189
219
190 .. note::
220 .. note::
191
221
192 The Controller *will not* be in the same MPI universe as the engines, so there is not
222 The Controller *will not* be in the same MPI universe as the engines, so there is not
193 much reason to do this unless sysadmins demand it.
223 much reason to do this unless sysadmins demand it.
194
224
195 On newer MPI implementations (such as OpenMPI), this will work even if you
225 On newer MPI implementations (such as OpenMPI), this will work even if you
196 don't make any calls to MPI or call :func:`MPI_Init`. However, older MPI
226 don't make any calls to MPI or call :func:`MPI_Init`. However, older MPI
197 implementations actually require each process to call :func:`MPI_Init` upon
227 implementations actually require each process to call :func:`MPI_Init` upon
198 starting. The easiest way of having this done is to install the mpi4py
228 starting. The easiest way of having this done is to install the mpi4py
199 [mpi4py]_ package and then specify the ``c.MPI.use`` option in :file:`ipengine_config.py`:
229 [mpi4py]_ package and then specify the ``c.MPI.use`` option in :file:`ipengine_config.py`:
200
230
201 .. sourcecode:: python
231 .. sourcecode:: python
202
232
203 c.MPI.use = 'mpi4py'
233 c.MPI.use = 'mpi4py'
204
234
205 Unfortunately, even this won't work for some MPI implementations. If you are
235 Unfortunately, even this won't work for some MPI implementations. If you are
206 having problems with this, you will likely have to use a custom Python
236 having problems with this, you will likely have to use a custom Python
207 executable that itself calls :func:`MPI_Init` at the appropriate time.
237 executable that itself calls :func:`MPI_Init` at the appropriate time.
208 Fortunately, mpi4py comes with such a custom Python executable that is easy to
238 Fortunately, mpi4py comes with such a custom Python executable that is easy to
209 install and use. However, this custom Python executable approach will not work
239 install and use. However, this custom Python executable approach will not work
210 with :command:`ipcluster` currently.
240 with :command:`ipcluster` currently.
211
241
212 More details on using MPI with IPython can be found :ref:`here <parallelmpi>`.
242 More details on using MPI with IPython can be found :ref:`here <parallelmpi>`.
213
243
214
244
215 Using :command:`ipcluster` in PBS mode
245 Using :command:`ipcluster` in PBS mode
216 ---------------------------------------
246 ---------------------------------------
217
247
218 The PBS mode uses the Portable Batch System (PBS) to start the engines.
248 The PBS mode uses the Portable Batch System (PBS) to start the engines.
219
249
220 As usual, we will start by creating a fresh profile::
250 As usual, we will start by creating a fresh profile::
221
251
222 $ ipython profile create --parallel --profile=pbs
252 $ ipython profile create --parallel --profile=pbs
223
253
224 And in :file:`ipcluster_config.py`, we will select the PBS launchers for the controller
254 And in :file:`ipcluster_config.py`, we will select the PBS launchers for the controller
225 and engines:
255 and engines:
226
256
227 .. sourcecode:: python
257 .. sourcecode:: python
228
258
229 c.IPClusterStart.controller_launcher = \
259 c.IPClusterStart.controller_launcher_class = 'PBSControllerLauncher'
230 'IPython.parallel.apps.launcher.PBSControllerLauncher'
260 c.IPClusterEngines.engine_launcher_class = 'PBSEngineSetLauncher'
231 c.IPClusterEngines.engine_launcher = \
232 'IPython.parallel.apps.launcher.PBSEngineSetLauncher'
233
261
234 .. note::
262 .. note::
235
263
236 Note that the configurable is IPClusterEngines for the engine launcher, and
264 Note that the configurable is IPClusterEngines for the engine launcher, and
237 IPClusterStart for the controller launcher. This is because the start command is a
265 IPClusterStart for the controller launcher. This is because the start command is a
238 subclass of the engine command, adding a controller launcher. Since it is a subclass,
266 subclass of the engine command, adding a controller launcher. Since it is a subclass,
239 any configuration made in IPClusterEngines is inherited by IPClusterStart unless it is
267 any configuration made in IPClusterEngines is inherited by IPClusterStart unless it is
240 overridden.
268 overridden.
241
269
242 IPython does provide simple default batch templates for PBS and SGE, but you may need
270 IPython does provide simple default batch templates for PBS and SGE, but you may need
243 to specify your own. Here is a sample PBS script template:
271 to specify your own. Here is a sample PBS script template:
244
272
245 .. sourcecode:: bash
273 .. sourcecode:: bash
246
274
247 #PBS -N ipython
275 #PBS -N ipython
248 #PBS -j oe
276 #PBS -j oe
249 #PBS -l walltime=00:10:00
277 #PBS -l walltime=00:10:00
250 #PBS -l nodes={n/4}:ppn=4
278 #PBS -l nodes={n/4}:ppn=4
251 #PBS -q {queue}
279 #PBS -q {queue}
252
280
253 cd $PBS_O_WORKDIR
281 cd $PBS_O_WORKDIR
254 export PATH=$HOME/usr/local/bin
282 export PATH=$HOME/usr/local/bin
255 export PYTHONPATH=$HOME/usr/local/lib/python2.7/site-packages
283 export PYTHONPATH=$HOME/usr/local/lib/python2.7/site-packages
256 /usr/local/bin/mpiexec -n {n} ipengine --profile-dir={profile_dir}
284 /usr/local/bin/mpiexec -n {n} ipengine --profile-dir={profile_dir}
257
285
258 There are a few important points about this template:
286 There are a few important points about this template:
259
287
260 1. This template will be rendered at runtime using IPython's :class:`EvalFormatter`.
288 1. This template will be rendered at runtime using IPython's :class:`EvalFormatter`.
261 This is simply a subclass of :class:`string.Formatter` that allows simple expressions
289 This is simply a subclass of :class:`string.Formatter` that allows simple expressions
262 on keys.
290 on keys.
263
291
264 2. Instead of putting in the actual number of engines, use the notation
292 2. Instead of putting in the actual number of engines, use the notation
265 ``{n}`` to indicate the number of engines to be started. You can also use
293 ``{n}`` to indicate the number of engines to be started. You can also use
266 expressions like ``{n/4}`` in the template to indicate the number of nodes.
294 expressions like ``{n/4}`` in the template to indicate the number of nodes.
267 There will always be ``{n}`` and ``{profile_dir}`` variables passed to the formatter.
295 There will always be ``{n}`` and ``{profile_dir}`` variables passed to the formatter.
268 These allow the batch system to know how many engines, and where the configuration
296 These allow the batch system to know how many engines, and where the configuration
269 files reside. The same is true for the batch queue, with the template variable
297 files reside. The same is true for the batch queue, with the template variable
270 ``{queue}``.
298 ``{queue}``.
271
299
272 3. Any options to :command:`ipengine` can be given in the batch script
300 3. Any options to :command:`ipengine` can be given in the batch script
273 template, or in :file:`ipengine_config.py`.
301 template, or in :file:`ipengine_config.py`.
274
302
275 4. Depending on the configuration of you system, you may have to set
303 4. Depending on the configuration of you system, you may have to set
276 environment variables in the script template.
304 environment variables in the script template.
277
305
278 The controller template should be similar, but simpler:
306 The controller template should be similar, but simpler:
279
307
280 .. sourcecode:: bash
308 .. sourcecode:: bash
281
309
282 #PBS -N ipython
310 #PBS -N ipython
283 #PBS -j oe
311 #PBS -j oe
284 #PBS -l walltime=00:10:00
312 #PBS -l walltime=00:10:00
285 #PBS -l nodes=1:ppn=4
313 #PBS -l nodes=1:ppn=4
286 #PBS -q {queue}
314 #PBS -q {queue}
287
315
288 cd $PBS_O_WORKDIR
316 cd $PBS_O_WORKDIR
289 export PATH=$HOME/usr/local/bin
317 export PATH=$HOME/usr/local/bin
290 export PYTHONPATH=$HOME/usr/local/lib/python2.7/site-packages
318 export PYTHONPATH=$HOME/usr/local/lib/python2.7/site-packages
291 ipcontroller --profile-dir={profile_dir}
319 ipcontroller --profile-dir={profile_dir}
292
320
293
321
294 Once you have created these scripts, save them with names like
322 Once you have created these scripts, save them with names like
295 :file:`pbs.engine.template`. Now you can load them into the :file:`ipcluster_config` with:
323 :file:`pbs.engine.template`. Now you can load them into the :file:`ipcluster_config` with:
296
324
297 .. sourcecode:: python
325 .. sourcecode:: python
298
326
299 c.PBSEngineSetLauncher.batch_template_file = "pbs.engine.template"
327 c.PBSEngineSetLauncher.batch_template_file = "pbs.engine.template"
300
328
301 c.PBSControllerLauncher.batch_template_file = "pbs.controller.template"
329 c.PBSControllerLauncher.batch_template_file = "pbs.controller.template"
302
330
303
331
304 Alternately, you can just define the templates as strings inside :file:`ipcluster_config`.
332 Alternately, you can just define the templates as strings inside :file:`ipcluster_config`.
305
333
306 Whether you are using your own templates or our defaults, the extra configurables available are
334 Whether you are using your own templates or our defaults, the extra configurables available are
307 the number of engines to launch (``{n}``, and the batch system queue to which the jobs are to be
335 the number of engines to launch (``{n}``, and the batch system queue to which the jobs are to be
308 submitted (``{queue}``)). These are configurables, and can be specified in
336 submitted (``{queue}``)). These are configurables, and can be specified in
309 :file:`ipcluster_config`:
337 :file:`ipcluster_config`:
310
338
311 .. sourcecode:: python
339 .. sourcecode:: python
312
340
313 c.PBSLauncher.queue = 'veryshort.q'
341 c.PBSLauncher.queue = 'veryshort.q'
314 c.IPClusterEngines.n = 64
342 c.IPClusterEngines.n = 64
315
343
316 Note that assuming you are running PBS on a multi-node cluster, the Controller's default behavior
344 Note that assuming you are running PBS on a multi-node cluster, the Controller's default behavior
317 of listening only on localhost is likely too restrictive. In this case, also assuming the
345 of listening only on localhost is likely too restrictive. In this case, also assuming the
318 nodes are safely behind a firewall, you can simply instruct the Controller to listen for
346 nodes are safely behind a firewall, you can simply instruct the Controller to listen for
319 connections on all its interfaces, by adding in :file:`ipcontroller_config`:
347 connections on all its interfaces, by adding in :file:`ipcontroller_config`:
320
348
321 .. sourcecode:: python
349 .. sourcecode:: python
322
350
323 c.HubFactory.ip = '*'
351 c.HubFactory.ip = '*'
324
352
325 You can now run the cluster with::
353 You can now run the cluster with::
326
354
327 $ ipcluster start --profile=pbs -n 128
355 $ ipcluster start --profile=pbs -n 128
328
356
329 Additional configuration options can be found in the PBS section of :file:`ipcluster_config`.
357 Additional configuration options can be found in the PBS section of :file:`ipcluster_config`.
330
358
331 .. note::
359 .. note::
332
360
333 Due to the flexibility of configuration, the PBS launchers work with simple changes
361 Due to the flexibility of configuration, the PBS launchers work with simple changes
334 to the template for other :command:`qsub`-using systems, such as Sun Grid Engine,
362 to the template for other :command:`qsub`-using systems, such as Sun Grid Engine,
335 and with further configuration in similar batch systems like Condor.
363 and with further configuration in similar batch systems like Condor.
336
364
337
365
338 Using :command:`ipcluster` in SSH mode
366 Using :command:`ipcluster` in SSH mode
339 ---------------------------------------
367 ---------------------------------------
340
368
341
369
342 The SSH mode uses :command:`ssh` to execute :command:`ipengine` on remote
370 The SSH mode uses :command:`ssh` to execute :command:`ipengine` on remote
343 nodes and :command:`ipcontroller` can be run remotely as well, or on localhost.
371 nodes and :command:`ipcontroller` can be run remotely as well, or on localhost.
344
372
345 .. note::
373 .. note::
346
374
347 When using this mode it highly recommended that you have set up SSH keys
375 When using this mode it highly recommended that you have set up SSH keys
348 and are using ssh-agent [SSH]_ for password-less logins.
376 and are using ssh-agent [SSH]_ for password-less logins.
349
377
350 As usual, we start by creating a clean profile::
378 As usual, we start by creating a clean profile::
351
379
352 $ ipython profile create --parallel --profile=ssh
380 $ ipython profile create --parallel --profile=ssh
353
381
354 To use this mode, select the SSH launchers in :file:`ipcluster_config.py`:
382 To use this mode, select the SSH launchers in :file:`ipcluster_config.py`:
355
383
356 .. sourcecode:: python
384 .. sourcecode:: python
357
385
358 c.IPClusterEngines.engine_launcher = \
386 c.IPClusterEngines.engine_launcher_class = 'SSHEngineSetLauncher'
359 'IPython.parallel.apps.launcher.SSHEngineSetLauncher'
360 # and if the Controller is also to be remote:
387 # and if the Controller is also to be remote:
361 c.IPClusterStart.controller_launcher = \
388 c.IPClusterStart.controller_launcher_class = 'SSHControllerLauncher'
362 'IPython.parallel.apps.launcher.SSHControllerLauncher'
389
363
390
364
391
365 The controller's remote location and configuration can be specified:
392 The controller's remote location and configuration can be specified:
366
393
367 .. sourcecode:: python
394 .. sourcecode:: python
368
395
369 # Set the user and hostname for the controller
396 # Set the user and hostname for the controller
370 # c.SSHControllerLauncher.hostname = 'controller.example.com'
397 # c.SSHControllerLauncher.hostname = 'controller.example.com'
371 # c.SSHControllerLauncher.user = os.environ.get('USER','username')
398 # c.SSHControllerLauncher.user = os.environ.get('USER','username')
372
399
373 # Set the arguments to be passed to ipcontroller
400 # Set the arguments to be passed to ipcontroller
374 # note that remotely launched ipcontroller will not get the contents of
401 # note that remotely launched ipcontroller will not get the contents of
375 # the local ipcontroller_config.py unless it resides on the *remote host*
402 # the local ipcontroller_config.py unless it resides on the *remote host*
376 # in the location specified by the `profile-dir` argument.
403 # in the location specified by the `profile-dir` argument.
377 # c.SSHControllerLauncher.program_args = ['--reuse', '--ip=*', '--profile-dir=/path/to/cd']
404 # c.SSHControllerLauncher.program_args = ['--reuse', '--ip=*', '--profile-dir=/path/to/cd']
378
405
379 .. note::
406 .. note::
380
407
381 SSH mode does not do any file movement, so you will need to distribute configuration
408 SSH mode does not do any file movement, so you will need to distribute configuration
382 files manually. To aid in this, the `reuse_files` flag defaults to True for ssh-launched
409 files manually. To aid in this, the `reuse_files` flag defaults to True for ssh-launched
383 Controllers, so you will only need to do this once, unless you override this flag back
410 Controllers, so you will only need to do this once, unless you override this flag back
384 to False.
411 to False.
385
412
386 Engines are specified in a dictionary, by hostname and the number of engines to be run
413 Engines are specified in a dictionary, by hostname and the number of engines to be run
387 on that host.
414 on that host.
388
415
389 .. sourcecode:: python
416 .. sourcecode:: python
390
417
391 c.SSHEngineSetLauncher.engines = { 'host1.example.com' : 2,
418 c.SSHEngineSetLauncher.engines = { 'host1.example.com' : 2,
392 'host2.example.com' : 5,
419 'host2.example.com' : 5,
393 'host3.example.com' : (1, ['--profile-dir=/home/different/location']),
420 'host3.example.com' : (1, ['--profile-dir=/home/different/location']),
394 'host4.example.com' : 8 }
421 'host4.example.com' : 8 }
395
422
396 * The `engines` dict, where the keys are the host we want to run engines on and
423 * The `engines` dict, where the keys are the host we want to run engines on and
397 the value is the number of engines to run on that host.
424 the value is the number of engines to run on that host.
398 * on host3, the value is a tuple, where the number of engines is first, and the arguments
425 * on host3, the value is a tuple, where the number of engines is first, and the arguments
399 to be passed to :command:`ipengine` are the second element.
426 to be passed to :command:`ipengine` are the second element.
400
427
401 For engines without explicitly specified arguments, the default arguments are set in
428 For engines without explicitly specified arguments, the default arguments are set in
402 a single location:
429 a single location:
403
430
404 .. sourcecode:: python
431 .. sourcecode:: python
405
432
406 c.SSHEngineSetLauncher.engine_args = ['--profile-dir=/path/to/profile_ssh']
433 c.SSHEngineSetLauncher.engine_args = ['--profile-dir=/path/to/profile_ssh']
407
434
408 Current limitations of the SSH mode of :command:`ipcluster` are:
435 Current limitations of the SSH mode of :command:`ipcluster` are:
409
436
410 * Untested on Windows. Would require a working :command:`ssh` on Windows.
437 * Untested on Windows. Would require a working :command:`ssh` on Windows.
411 Also, we are using shell scripts to setup and execute commands on remote
438 Also, we are using shell scripts to setup and execute commands on remote
412 hosts.
439 hosts.
413 * No file movement - This is a regression from 0.10, which moved connection files
440 * No file movement - This is a regression from 0.10, which moved connection files
414 around with scp. This will be improved, but not before 0.11 release.
441 around with scp. This will be improved, but not before 0.11 release.
415
442
416 Using the :command:`ipcontroller` and :command:`ipengine` commands
443 Using the :command:`ipcontroller` and :command:`ipengine` commands
417 ====================================================================
444 ====================================================================
418
445
419 It is also possible to use the :command:`ipcontroller` and :command:`ipengine`
446 It is also possible to use the :command:`ipcontroller` and :command:`ipengine`
420 commands to start your controller and engines. This approach gives you full
447 commands to start your controller and engines. This approach gives you full
421 control over all aspects of the startup process.
448 control over all aspects of the startup process.
422
449
423 Starting the controller and engine on your local machine
450 Starting the controller and engine on your local machine
424 --------------------------------------------------------
451 --------------------------------------------------------
425
452
426 To use :command:`ipcontroller` and :command:`ipengine` to start things on your
453 To use :command:`ipcontroller` and :command:`ipengine` to start things on your
427 local machine, do the following.
454 local machine, do the following.
428
455
429 First start the controller::
456 First start the controller::
430
457
431 $ ipcontroller
458 $ ipcontroller
432
459
433 Next, start however many instances of the engine you want using (repeatedly)
460 Next, start however many instances of the engine you want using (repeatedly)
434 the command::
461 the command::
435
462
436 $ ipengine
463 $ ipengine
437
464
438 The engines should start and automatically connect to the controller using the
465 The engines should start and automatically connect to the controller using the
439 JSON files in :file:`~/.ipython/profile_default/security`. You are now ready to use the
466 JSON files in :file:`~/.ipython/profile_default/security`. You are now ready to use the
440 controller and engines from IPython.
467 controller and engines from IPython.
441
468
442 .. warning::
469 .. warning::
443
470
444 The order of the above operations may be important. You *must*
471 The order of the above operations may be important. You *must*
445 start the controller before the engines, unless you are reusing connection
472 start the controller before the engines, unless you are reusing connection
446 information (via ``--reuse``), in which case ordering is not important.
473 information (via ``--reuse``), in which case ordering is not important.
447
474
448 .. note::
475 .. note::
449
476
450 On some platforms (OS X), to put the controller and engine into the
477 On some platforms (OS X), to put the controller and engine into the
451 background you may need to give these commands in the form ``(ipcontroller
478 background you may need to give these commands in the form ``(ipcontroller
452 &)`` and ``(ipengine &)`` (with the parentheses) for them to work
479 &)`` and ``(ipengine &)`` (with the parentheses) for them to work
453 properly.
480 properly.
454
481
455 Starting the controller and engines on different hosts
482 Starting the controller and engines on different hosts
456 ------------------------------------------------------
483 ------------------------------------------------------
457
484
458 When the controller and engines are running on different hosts, things are
485 When the controller and engines are running on different hosts, things are
459 slightly more complicated, but the underlying ideas are the same:
486 slightly more complicated, but the underlying ideas are the same:
460
487
461 1. Start the controller on a host using :command:`ipcontroller`. The controller must be
488 1. Start the controller on a host using :command:`ipcontroller`. The controller must be
462 instructed to listen on an interface visible to the engine machines, via the ``ip``
489 instructed to listen on an interface visible to the engine machines, via the ``ip``
463 command-line argument or ``HubFactory.ip`` in :file:`ipcontroller_config.py`.
490 command-line argument or ``HubFactory.ip`` in :file:`ipcontroller_config.py`.
464 2. Copy :file:`ipcontroller-engine.json` from :file:`~/.ipython/profile_<name>/security` on
491 2. Copy :file:`ipcontroller-engine.json` from :file:`~/.ipython/profile_<name>/security` on
465 the controller's host to the host where the engines will run.
492 the controller's host to the host where the engines will run.
466 3. Use :command:`ipengine` on the engine's hosts to start the engines.
493 3. Use :command:`ipengine` on the engine's hosts to start the engines.
467
494
468 The only thing you have to be careful of is to tell :command:`ipengine` where
495 The only thing you have to be careful of is to tell :command:`ipengine` where
469 the :file:`ipcontroller-engine.json` file is located. There are two ways you
496 the :file:`ipcontroller-engine.json` file is located. There are two ways you
470 can do this:
497 can do this:
471
498
472 * Put :file:`ipcontroller-engine.json` in the :file:`~/.ipython/profile_<name>/security`
499 * Put :file:`ipcontroller-engine.json` in the :file:`~/.ipython/profile_<name>/security`
473 directory on the engine's host, where it will be found automatically.
500 directory on the engine's host, where it will be found automatically.
474 * Call :command:`ipengine` with the ``--file=full_path_to_the_file``
501 * Call :command:`ipengine` with the ``--file=full_path_to_the_file``
475 flag.
502 flag.
476
503
477 The ``file`` flag works like this::
504 The ``file`` flag works like this::
478
505
479 $ ipengine --file=/path/to/my/ipcontroller-engine.json
506 $ ipengine --file=/path/to/my/ipcontroller-engine.json
480
507
481 .. note::
508 .. note::
482
509
483 If the controller's and engine's hosts all have a shared file system
510 If the controller's and engine's hosts all have a shared file system
484 (:file:`~/.ipython/profile_<name>/security` is the same on all of them), then things
511 (:file:`~/.ipython/profile_<name>/security` is the same on all of them), then things
485 will just work!
512 will just work!
486
513
487 SSH Tunnels
514 SSH Tunnels
488 ***********
515 ***********
489
516
490 If your engines are not on the same LAN as the controller, or you are on a highly
517 If your engines are not on the same LAN as the controller, or you are on a highly
491 restricted network where your nodes cannot see each others ports, then you can
518 restricted network where your nodes cannot see each others ports, then you can
492 use SSH tunnels to connect engines to the controller.
519 use SSH tunnels to connect engines to the controller.
493
520
494 .. note::
521 .. note::
495
522
496 This does not work in all cases. Manual tunnels may be an option, but are
523 This does not work in all cases. Manual tunnels may be an option, but are
497 highly inconvenient. Support for manual tunnels will be improved.
524 highly inconvenient. Support for manual tunnels will be improved.
498
525
499 You can instruct all engines to use ssh, by specifying the ssh server in
526 You can instruct all engines to use ssh, by specifying the ssh server in
500 :file:`ipcontroller-engine.json`:
527 :file:`ipcontroller-engine.json`:
501
528
502 .. I know this is really JSON, but the example is a subset of Python:
529 .. I know this is really JSON, but the example is a subset of Python:
503 .. sourcecode:: python
530 .. sourcecode:: python
504
531
505 {
532 {
506 "url":"tcp://192.168.1.123:56951",
533 "url":"tcp://192.168.1.123:56951",
507 "exec_key":"26f4c040-587d-4a4e-b58b-030b96399584",
534 "exec_key":"26f4c040-587d-4a4e-b58b-030b96399584",
508 "ssh":"user@example.com",
535 "ssh":"user@example.com",
509 "location":"192.168.1.123"
536 "location":"192.168.1.123"
510 }
537 }
511
538
512 This will be specified if you give the ``--enginessh=use@example.com`` argument when
539 This will be specified if you give the ``--enginessh=use@example.com`` argument when
513 starting :command:`ipcontroller`.
540 starting :command:`ipcontroller`.
514
541
515 Or you can specify an ssh server on the command-line when starting an engine::
542 Or you can specify an ssh server on the command-line when starting an engine::
516
543
517 $> ipengine --profile=foo --ssh=my.login.node
544 $> ipengine --profile=foo --ssh=my.login.node
518
545
519 For example, if your system is totally restricted, then all connections will actually be
546 For example, if your system is totally restricted, then all connections will actually be
520 loopback, and ssh tunnels will be used to connect engines to the controller::
547 loopback, and ssh tunnels will be used to connect engines to the controller::
521
548
522 [node1] $> ipcontroller --enginessh=node1
549 [node1] $> ipcontroller --enginessh=node1
523 [node2] $> ipengine
550 [node2] $> ipengine
524 [node3] $> ipcluster engines --n=4
551 [node3] $> ipcluster engines --n=4
525
552
526 Or if you want to start many engines on each node, the command `ipcluster engines --n=4`
553 Or if you want to start many engines on each node, the command `ipcluster engines --n=4`
527 without any configuration is equivalent to running ipengine 4 times.
554 without any configuration is equivalent to running ipengine 4 times.
528
555
529
556
530 Make JSON files persistent
557 Make JSON files persistent
531 --------------------------
558 --------------------------
532
559
533 At fist glance it may seem that that managing the JSON files is a bit
560 At fist glance it may seem that that managing the JSON files is a bit
534 annoying. Going back to the house and key analogy, copying the JSON around
561 annoying. Going back to the house and key analogy, copying the JSON around
535 each time you start the controller is like having to make a new key every time
562 each time you start the controller is like having to make a new key every time
536 you want to unlock the door and enter your house. As with your house, you want
563 you want to unlock the door and enter your house. As with your house, you want
537 to be able to create the key (or JSON file) once, and then simply use it at
564 to be able to create the key (or JSON file) once, and then simply use it at
538 any point in the future.
565 any point in the future.
539
566
540 To do this, the only thing you have to do is specify the `--reuse` flag, so that
567 To do this, the only thing you have to do is specify the `--reuse` flag, so that
541 the connection information in the JSON files remains accurate::
568 the connection information in the JSON files remains accurate::
542
569
543 $ ipcontroller --reuse
570 $ ipcontroller --reuse
544
571
545 Then, just copy the JSON files over the first time and you are set. You can
572 Then, just copy the JSON files over the first time and you are set. You can
546 start and stop the controller and engines any many times as you want in the
573 start and stop the controller and engines any many times as you want in the
547 future, just make sure to tell the controller to reuse the file.
574 future, just make sure to tell the controller to reuse the file.
548
575
549 .. note::
576 .. note::
550
577
551 You may ask the question: what ports does the controller listen on if you
578 You may ask the question: what ports does the controller listen on if you
552 don't tell is to use specific ones? The default is to use high random port
579 don't tell is to use specific ones? The default is to use high random port
553 numbers. We do this for two reasons: i) to increase security through
580 numbers. We do this for two reasons: i) to increase security through
554 obscurity and ii) to multiple controllers on a given host to start and
581 obscurity and ii) to multiple controllers on a given host to start and
555 automatically use different ports.
582 automatically use different ports.
556
583
557 Log files
584 Log files
558 ---------
585 ---------
559
586
560 All of the components of IPython have log files associated with them.
587 All of the components of IPython have log files associated with them.
561 These log files can be extremely useful in debugging problems with
588 These log files can be extremely useful in debugging problems with
562 IPython and can be found in the directory :file:`~/.ipython/profile_<name>/log`.
589 IPython and can be found in the directory :file:`~/.ipython/profile_<name>/log`.
563 Sending the log files to us will often help us to debug any problems.
590 Sending the log files to us will often help us to debug any problems.
564
591
565
592
566 Configuring `ipcontroller`
593 Configuring `ipcontroller`
567 ---------------------------
594 ---------------------------
568
595
569 The IPython Controller takes its configuration from the file :file:`ipcontroller_config.py`
596 The IPython Controller takes its configuration from the file :file:`ipcontroller_config.py`
570 in the active profile directory.
597 in the active profile directory.
571
598
572 Ports and addresses
599 Ports and addresses
573 *******************
600 *******************
574
601
575 In many cases, you will want to configure the Controller's network identity. By default,
602 In many cases, you will want to configure the Controller's network identity. By default,
576 the Controller listens only on loopback, which is the most secure but often impractical.
603 the Controller listens only on loopback, which is the most secure but often impractical.
577 To instruct the controller to listen on a specific interface, you can set the
604 To instruct the controller to listen on a specific interface, you can set the
578 :attr:`HubFactory.ip` trait. To listen on all interfaces, simply specify:
605 :attr:`HubFactory.ip` trait. To listen on all interfaces, simply specify:
579
606
580 .. sourcecode:: python
607 .. sourcecode:: python
581
608
582 c.HubFactory.ip = '*'
609 c.HubFactory.ip = '*'
583
610
584 When connecting to a Controller that is listening on loopback or behind a firewall, it may
611 When connecting to a Controller that is listening on loopback or behind a firewall, it may
585 be necessary to specify an SSH server to use for tunnels, and the external IP of the
612 be necessary to specify an SSH server to use for tunnels, and the external IP of the
586 Controller. If you specified that the HubFactory listen on loopback, or all interfaces,
613 Controller. If you specified that the HubFactory listen on loopback, or all interfaces,
587 then IPython will try to guess the external IP. If you are on a system with VM network
614 then IPython will try to guess the external IP. If you are on a system with VM network
588 devices, or many interfaces, this guess may be incorrect. In these cases, you will want
615 devices, or many interfaces, this guess may be incorrect. In these cases, you will want
589 to specify the 'location' of the Controller. This is the IP of the machine the Controller
616 to specify the 'location' of the Controller. This is the IP of the machine the Controller
590 is on, as seen by the clients, engines, or the SSH server used to tunnel connections.
617 is on, as seen by the clients, engines, or the SSH server used to tunnel connections.
591
618
592 For example, to set up a cluster with a Controller on a work node, using ssh tunnels
619 For example, to set up a cluster with a Controller on a work node, using ssh tunnels
593 through the login node, an example :file:`ipcontroller_config.py` might contain:
620 through the login node, an example :file:`ipcontroller_config.py` might contain:
594
621
595 .. sourcecode:: python
622 .. sourcecode:: python
596
623
597 # allow connections on all interfaces from engines
624 # allow connections on all interfaces from engines
598 # engines on the same node will use loopback, while engines
625 # engines on the same node will use loopback, while engines
599 # from other nodes will use an external IP
626 # from other nodes will use an external IP
600 c.HubFactory.ip = '*'
627 c.HubFactory.ip = '*'
601
628
602 # you typically only need to specify the location when there are extra
629 # you typically only need to specify the location when there are extra
603 # interfaces that may not be visible to peer nodes (e.g. VM interfaces)
630 # interfaces that may not be visible to peer nodes (e.g. VM interfaces)
604 c.HubFactory.location = '10.0.1.5'
631 c.HubFactory.location = '10.0.1.5'
605 # or to get an automatic value, try this:
632 # or to get an automatic value, try this:
606 import socket
633 import socket
607 ex_ip = socket.gethostbyname_ex(socket.gethostname())[-1][0]
634 ex_ip = socket.gethostbyname_ex(socket.gethostname())[-1][0]
608 c.HubFactory.location = ex_ip
635 c.HubFactory.location = ex_ip
609
636
610 # now instruct clients to use the login node for SSH tunnels:
637 # now instruct clients to use the login node for SSH tunnels:
611 c.HubFactory.ssh_server = 'login.mycluster.net'
638 c.HubFactory.ssh_server = 'login.mycluster.net'
612
639
613 After doing this, your :file:`ipcontroller-client.json` file will look something like this:
640 After doing this, your :file:`ipcontroller-client.json` file will look something like this:
614
641
615 .. this can be Python, despite the fact that it's actually JSON, because it's
642 .. this can be Python, despite the fact that it's actually JSON, because it's
616 .. still valid Python
643 .. still valid Python
617
644
618 .. sourcecode:: python
645 .. sourcecode:: python
619
646
620 {
647 {
621 "url":"tcp:\/\/*:43447",
648 "url":"tcp:\/\/*:43447",
622 "exec_key":"9c7779e4-d08a-4c3b-ba8e-db1f80b562c1",
649 "exec_key":"9c7779e4-d08a-4c3b-ba8e-db1f80b562c1",
623 "ssh":"login.mycluster.net",
650 "ssh":"login.mycluster.net",
624 "location":"10.0.1.5"
651 "location":"10.0.1.5"
625 }
652 }
626
653
627 Then this file will be all you need for a client to connect to the controller, tunneling
654 Then this file will be all you need for a client to connect to the controller, tunneling
628 SSH connections through login.mycluster.net.
655 SSH connections through login.mycluster.net.
629
656
630 Database Backend
657 Database Backend
631 ****************
658 ****************
632
659
633 The Hub stores all messages and results passed between Clients and Engines.
660 The Hub stores all messages and results passed between Clients and Engines.
634 For large and/or long-running clusters, it would be unreasonable to keep all
661 For large and/or long-running clusters, it would be unreasonable to keep all
635 of this information in memory. For this reason, we have two database backends:
662 of this information in memory. For this reason, we have two database backends:
636 [MongoDB]_ via PyMongo_, and SQLite with the stdlib :py:mod:`sqlite`.
663 [MongoDB]_ via PyMongo_, and SQLite with the stdlib :py:mod:`sqlite`.
637
664
638 MongoDB is our design target, and the dict-like model it uses has driven our design. As far
665 MongoDB is our design target, and the dict-like model it uses has driven our design. As far
639 as we are concerned, BSON can be considered essentially the same as JSON, adding support
666 as we are concerned, BSON can be considered essentially the same as JSON, adding support
640 for binary data and datetime objects, and any new database backend must support the same
667 for binary data and datetime objects, and any new database backend must support the same
641 data types.
668 data types.
642
669
643 .. seealso::
670 .. seealso::
644
671
645 MongoDB `BSON doc <http://www.mongodb.org/display/DOCS/BSON>`_
672 MongoDB `BSON doc <http://www.mongodb.org/display/DOCS/BSON>`_
646
673
647 To use one of these backends, you must set the :attr:`HubFactory.db_class` trait:
674 To use one of these backends, you must set the :attr:`HubFactory.db_class` trait:
648
675
649 .. sourcecode:: python
676 .. sourcecode:: python
650
677
651 # for a simple dict-based in-memory implementation, use dictdb
678 # for a simple dict-based in-memory implementation, use dictdb
652 # This is the default and the fastest, since it doesn't involve the filesystem
679 # This is the default and the fastest, since it doesn't involve the filesystem
653 c.HubFactory.db_class = 'IPython.parallel.controller.dictdb.DictDB'
680 c.HubFactory.db_class = 'IPython.parallel.controller.dictdb.DictDB'
654
681
655 # To use MongoDB:
682 # To use MongoDB:
656 c.HubFactory.db_class = 'IPython.parallel.controller.mongodb.MongoDB'
683 c.HubFactory.db_class = 'IPython.parallel.controller.mongodb.MongoDB'
657
684
658 # and SQLite:
685 # and SQLite:
659 c.HubFactory.db_class = 'IPython.parallel.controller.sqlitedb.SQLiteDB'
686 c.HubFactory.db_class = 'IPython.parallel.controller.sqlitedb.SQLiteDB'
660
687
661 When using the proper databases, you can actually allow for tasks to persist from
688 When using the proper databases, you can actually allow for tasks to persist from
662 one session to the next by specifying the MongoDB database or SQLite table in
689 one session to the next by specifying the MongoDB database or SQLite table in
663 which tasks are to be stored. The default is to use a table named for the Hub's Session,
690 which tasks are to be stored. The default is to use a table named for the Hub's Session,
664 which is a UUID, and thus different every time.
691 which is a UUID, and thus different every time.
665
692
666 .. sourcecode:: python
693 .. sourcecode:: python
667
694
668 # To keep persistant task history in MongoDB:
695 # To keep persistant task history in MongoDB:
669 c.MongoDB.database = 'tasks'
696 c.MongoDB.database = 'tasks'
670
697
671 # and in SQLite:
698 # and in SQLite:
672 c.SQLiteDB.table = 'tasks'
699 c.SQLiteDB.table = 'tasks'
673
700
674
701
675 Since MongoDB servers can be running remotely or configured to listen on a particular port,
702 Since MongoDB servers can be running remotely or configured to listen on a particular port,
676 you can specify any arguments you may need to the PyMongo `Connection
703 you can specify any arguments you may need to the PyMongo `Connection
677 <http://api.mongodb.org/python/1.9/api/pymongo/connection.html#pymongo.connection.Connection>`_:
704 <http://api.mongodb.org/python/1.9/api/pymongo/connection.html#pymongo.connection.Connection>`_:
678
705
679 .. sourcecode:: python
706 .. sourcecode:: python
680
707
681 # positional args to pymongo.Connection
708 # positional args to pymongo.Connection
682 c.MongoDB.connection_args = []
709 c.MongoDB.connection_args = []
683
710
684 # keyword args to pymongo.Connection
711 # keyword args to pymongo.Connection
685 c.MongoDB.connection_kwargs = {}
712 c.MongoDB.connection_kwargs = {}
686
713
687 .. _MongoDB: http://www.mongodb.org
714 .. _MongoDB: http://www.mongodb.org
688 .. _PyMongo: http://api.mongodb.org/python/1.9/
715 .. _PyMongo: http://api.mongodb.org/python/1.9/
689
716
690 Configuring `ipengine`
717 Configuring `ipengine`
691 -----------------------
718 -----------------------
692
719
693 The IPython Engine takes its configuration from the file :file:`ipengine_config.py`
720 The IPython Engine takes its configuration from the file :file:`ipengine_config.py`
694
721
695 The Engine itself also has some amount of configuration. Most of this
722 The Engine itself also has some amount of configuration. Most of this
696 has to do with initializing MPI or connecting to the controller.
723 has to do with initializing MPI or connecting to the controller.
697
724
698 To instruct the Engine to initialize with an MPI environment set up by
725 To instruct the Engine to initialize with an MPI environment set up by
699 mpi4py, add:
726 mpi4py, add:
700
727
701 .. sourcecode:: python
728 .. sourcecode:: python
702
729
703 c.MPI.use = 'mpi4py'
730 c.MPI.use = 'mpi4py'
704
731
705 In this case, the Engine will use our default mpi4py init script to set up
732 In this case, the Engine will use our default mpi4py init script to set up
706 the MPI environment prior to exection. We have default init scripts for
733 the MPI environment prior to exection. We have default init scripts for
707 mpi4py and pytrilinos. If you want to specify your own code to be run
734 mpi4py and pytrilinos. If you want to specify your own code to be run
708 at the beginning, specify `c.MPI.init_script`.
735 at the beginning, specify `c.MPI.init_script`.
709
736
710 You can also specify a file or python command to be run at startup of the
737 You can also specify a file or python command to be run at startup of the
711 Engine:
738 Engine:
712
739
713 .. sourcecode:: python
740 .. sourcecode:: python
714
741
715 c.IPEngineApp.startup_script = u'/path/to/my/startup.py'
742 c.IPEngineApp.startup_script = u'/path/to/my/startup.py'
716
743
717 c.IPEngineApp.startup_command = 'import numpy, scipy, mpi4py'
744 c.IPEngineApp.startup_command = 'import numpy, scipy, mpi4py'
718
745
719 These commands/files will be run again, after each
746 These commands/files will be run again, after each
720
747
721 It's also useful on systems with shared filesystems to run the engines
748 It's also useful on systems with shared filesystems to run the engines
722 in some scratch directory. This can be set with:
749 in some scratch directory. This can be set with:
723
750
724 .. sourcecode:: python
751 .. sourcecode:: python
725
752
726 c.IPEngineApp.work_dir = u'/path/to/scratch/'
753 c.IPEngineApp.work_dir = u'/path/to/scratch/'
727
754
728
755
729
756
730 .. [MongoDB] MongoDB database http://www.mongodb.org
757 .. [MongoDB] MongoDB database http://www.mongodb.org
731
758
732 .. [PBS] Portable Batch System http://www.openpbs.org
759 .. [PBS] Portable Batch System http://www.openpbs.org
733
760
734 .. [SSH] SSH-Agent http://en.wikipedia.org/wiki/ssh-agent
761 .. [SSH] SSH-Agent http://en.wikipedia.org/wiki/ssh-agent
@@ -1,334 +1,332 b''
1 ============================================
1 ============================================
2 Getting started with Windows HPC Server 2008
2 Getting started with Windows HPC Server 2008
3 ============================================
3 ============================================
4
4
5 .. note::
5 .. note::
6
6
7 Not adapted to zmq yet
7 Not adapted to zmq yet
8
8
9 Introduction
9 Introduction
10 ============
10 ============
11
11
12 The Python programming language is an increasingly popular language for
12 The Python programming language is an increasingly popular language for
13 numerical computing. This is due to a unique combination of factors. First,
13 numerical computing. This is due to a unique combination of factors. First,
14 Python is a high-level and *interactive* language that is well matched to
14 Python is a high-level and *interactive* language that is well matched to
15 interactive numerical work. Second, it is easy (often times trivial) to
15 interactive numerical work. Second, it is easy (often times trivial) to
16 integrate legacy C/C++/Fortran code into Python. Third, a large number of
16 integrate legacy C/C++/Fortran code into Python. Third, a large number of
17 high-quality open source projects provide all the needed building blocks for
17 high-quality open source projects provide all the needed building blocks for
18 numerical computing: numerical arrays (NumPy), algorithms (SciPy), 2D/3D
18 numerical computing: numerical arrays (NumPy), algorithms (SciPy), 2D/3D
19 Visualization (Matplotlib, Mayavi, Chaco), Symbolic Mathematics (Sage, Sympy)
19 Visualization (Matplotlib, Mayavi, Chaco), Symbolic Mathematics (Sage, Sympy)
20 and others.
20 and others.
21
21
22 The IPython project is a core part of this open-source toolchain and is
22 The IPython project is a core part of this open-source toolchain and is
23 focused on creating a comprehensive environment for interactive and
23 focused on creating a comprehensive environment for interactive and
24 exploratory computing in the Python programming language. It enables all of
24 exploratory computing in the Python programming language. It enables all of
25 the above tools to be used interactively and consists of two main components:
25 the above tools to be used interactively and consists of two main components:
26
26
27 * An enhanced interactive Python shell with support for interactive plotting
27 * An enhanced interactive Python shell with support for interactive plotting
28 and visualization.
28 and visualization.
29 * An architecture for interactive parallel computing.
29 * An architecture for interactive parallel computing.
30
30
31 With these components, it is possible to perform all aspects of a parallel
31 With these components, it is possible to perform all aspects of a parallel
32 computation interactively. This type of workflow is particularly relevant in
32 computation interactively. This type of workflow is particularly relevant in
33 scientific and numerical computing where algorithms, code and data are
33 scientific and numerical computing where algorithms, code and data are
34 continually evolving as the user/developer explores a problem. The broad
34 continually evolving as the user/developer explores a problem. The broad
35 treads in computing (commodity clusters, multicore, cloud computing, etc.)
35 treads in computing (commodity clusters, multicore, cloud computing, etc.)
36 make these capabilities of IPython particularly relevant.
36 make these capabilities of IPython particularly relevant.
37
37
38 While IPython is a cross platform tool, it has particularly strong support for
38 While IPython is a cross platform tool, it has particularly strong support for
39 Windows based compute clusters running Windows HPC Server 2008. This document
39 Windows based compute clusters running Windows HPC Server 2008. This document
40 describes how to get started with IPython on Windows HPC Server 2008. The
40 describes how to get started with IPython on Windows HPC Server 2008. The
41 content and emphasis here is practical: installing IPython, configuring
41 content and emphasis here is practical: installing IPython, configuring
42 IPython to use the Windows job scheduler and running example parallel programs
42 IPython to use the Windows job scheduler and running example parallel programs
43 interactively. A more complete description of IPython's parallel computing
43 interactively. A more complete description of IPython's parallel computing
44 capabilities can be found in IPython's online documentation
44 capabilities can be found in IPython's online documentation
45 (http://ipython.org/documentation.html).
45 (http://ipython.org/documentation.html).
46
46
47 Setting up your Windows cluster
47 Setting up your Windows cluster
48 ===============================
48 ===============================
49
49
50 This document assumes that you already have a cluster running Windows
50 This document assumes that you already have a cluster running Windows
51 HPC Server 2008. Here is a broad overview of what is involved with setting up
51 HPC Server 2008. Here is a broad overview of what is involved with setting up
52 such a cluster:
52 such a cluster:
53
53
54 1. Install Windows Server 2008 on the head and compute nodes in the cluster.
54 1. Install Windows Server 2008 on the head and compute nodes in the cluster.
55 2. Setup the network configuration on each host. Each host should have a
55 2. Setup the network configuration on each host. Each host should have a
56 static IP address.
56 static IP address.
57 3. On the head node, activate the "Active Directory Domain Services" role
57 3. On the head node, activate the "Active Directory Domain Services" role
58 and make the head node the domain controller.
58 and make the head node the domain controller.
59 4. Join the compute nodes to the newly created Active Directory (AD) domain.
59 4. Join the compute nodes to the newly created Active Directory (AD) domain.
60 5. Setup user accounts in the domain with shared home directories.
60 5. Setup user accounts in the domain with shared home directories.
61 6. Install the HPC Pack 2008 on the head node to create a cluster.
61 6. Install the HPC Pack 2008 on the head node to create a cluster.
62 7. Install the HPC Pack 2008 on the compute nodes.
62 7. Install the HPC Pack 2008 on the compute nodes.
63
63
64 More details about installing and configuring Windows HPC Server 2008 can be
64 More details about installing and configuring Windows HPC Server 2008 can be
65 found on the Windows HPC Home Page (http://www.microsoft.com/hpc). Regardless
65 found on the Windows HPC Home Page (http://www.microsoft.com/hpc). Regardless
66 of what steps you follow to set up your cluster, the remainder of this
66 of what steps you follow to set up your cluster, the remainder of this
67 document will assume that:
67 document will assume that:
68
68
69 * There are domain users that can log on to the AD domain and submit jobs
69 * There are domain users that can log on to the AD domain and submit jobs
70 to the cluster scheduler.
70 to the cluster scheduler.
71 * These domain users have shared home directories. While shared home
71 * These domain users have shared home directories. While shared home
72 directories are not required to use IPython, they make it much easier to
72 directories are not required to use IPython, they make it much easier to
73 use IPython.
73 use IPython.
74
74
75 Installation of IPython and its dependencies
75 Installation of IPython and its dependencies
76 ============================================
76 ============================================
77
77
78 IPython and all of its dependencies are freely available and open source.
78 IPython and all of its dependencies are freely available and open source.
79 These packages provide a powerful and cost-effective approach to numerical and
79 These packages provide a powerful and cost-effective approach to numerical and
80 scientific computing on Windows. The following dependencies are needed to run
80 scientific computing on Windows. The following dependencies are needed to run
81 IPython on Windows:
81 IPython on Windows:
82
82
83 * Python 2.6 or 2.7 (http://www.python.org)
83 * Python 2.6 or 2.7 (http://www.python.org)
84 * pywin32 (http://sourceforge.net/projects/pywin32/)
84 * pywin32 (http://sourceforge.net/projects/pywin32/)
85 * PyReadline (https://launchpad.net/pyreadline)
85 * PyReadline (https://launchpad.net/pyreadline)
86 * pyzmq (http://github.com/zeromq/pyzmq/downloads)
86 * pyzmq (http://github.com/zeromq/pyzmq/downloads)
87 * IPython (http://ipython.org)
87 * IPython (http://ipython.org)
88
88
89 In addition, the following dependencies are needed to run the demos described
89 In addition, the following dependencies are needed to run the demos described
90 in this document.
90 in this document.
91
91
92 * NumPy and SciPy (http://www.scipy.org)
92 * NumPy and SciPy (http://www.scipy.org)
93 * Matplotlib (http://matplotlib.sourceforge.net/)
93 * Matplotlib (http://matplotlib.sourceforge.net/)
94
94
95 The easiest way of obtaining these dependencies is through the Enthought
95 The easiest way of obtaining these dependencies is through the Enthought
96 Python Distribution (EPD) (http://www.enthought.com/products/epd.php). EPD is
96 Python Distribution (EPD) (http://www.enthought.com/products/epd.php). EPD is
97 produced by Enthought, Inc. and contains all of these packages and others in a
97 produced by Enthought, Inc. and contains all of these packages and others in a
98 single installer and is available free for academic users. While it is also
98 single installer and is available free for academic users. While it is also
99 possible to download and install each package individually, this is a tedious
99 possible to download and install each package individually, this is a tedious
100 process. Thus, we highly recommend using EPD to install these packages on
100 process. Thus, we highly recommend using EPD to install these packages on
101 Windows.
101 Windows.
102
102
103 Regardless of how you install the dependencies, here are the steps you will
103 Regardless of how you install the dependencies, here are the steps you will
104 need to follow:
104 need to follow:
105
105
106 1. Install all of the packages listed above, either individually or using EPD
106 1. Install all of the packages listed above, either individually or using EPD
107 on the head node, compute nodes and user workstations.
107 on the head node, compute nodes and user workstations.
108
108
109 2. Make sure that :file:`C:\\Python27` and :file:`C:\\Python27\\Scripts` are
109 2. Make sure that :file:`C:\\Python27` and :file:`C:\\Python27\\Scripts` are
110 in the system :envvar:`%PATH%` variable on each node.
110 in the system :envvar:`%PATH%` variable on each node.
111
111
112 3. Install the latest development version of IPython. This can be done by
112 3. Install the latest development version of IPython. This can be done by
113 downloading the the development version from the IPython website
113 downloading the the development version from the IPython website
114 (http://ipython.org) and following the installation instructions.
114 (http://ipython.org) and following the installation instructions.
115
115
116 Further details about installing IPython or its dependencies can be found in
116 Further details about installing IPython or its dependencies can be found in
117 the online IPython documentation (http://ipython.org/documentation.html)
117 the online IPython documentation (http://ipython.org/documentation.html)
118 Once you are finished with the installation, you can try IPython out by
118 Once you are finished with the installation, you can try IPython out by
119 opening a Windows Command Prompt and typing ``ipython``. This will
119 opening a Windows Command Prompt and typing ``ipython``. This will
120 start IPython's interactive shell and you should see something like the
120 start IPython's interactive shell and you should see something like the
121 following screenshot:
121 following screenshot:
122
122
123 .. image:: figs/ipython_shell.*
123 .. image:: figs/ipython_shell.*
124
124
125 Starting an IPython cluster
125 Starting an IPython cluster
126 ===========================
126 ===========================
127
127
128 To use IPython's parallel computing capabilities, you will need to start an
128 To use IPython's parallel computing capabilities, you will need to start an
129 IPython cluster. An IPython cluster consists of one controller and multiple
129 IPython cluster. An IPython cluster consists of one controller and multiple
130 engines:
130 engines:
131
131
132 IPython controller
132 IPython controller
133 The IPython controller manages the engines and acts as a gateway between
133 The IPython controller manages the engines and acts as a gateway between
134 the engines and the client, which runs in the user's interactive IPython
134 the engines and the client, which runs in the user's interactive IPython
135 session. The controller is started using the :command:`ipcontroller`
135 session. The controller is started using the :command:`ipcontroller`
136 command.
136 command.
137
137
138 IPython engine
138 IPython engine
139 IPython engines run a user's Python code in parallel on the compute nodes.
139 IPython engines run a user's Python code in parallel on the compute nodes.
140 Engines are starting using the :command:`ipengine` command.
140 Engines are starting using the :command:`ipengine` command.
141
141
142 Once these processes are started, a user can run Python code interactively and
142 Once these processes are started, a user can run Python code interactively and
143 in parallel on the engines from within the IPython shell using an appropriate
143 in parallel on the engines from within the IPython shell using an appropriate
144 client. This includes the ability to interact with, plot and visualize data
144 client. This includes the ability to interact with, plot and visualize data
145 from the engines.
145 from the engines.
146
146
147 IPython has a command line program called :command:`ipcluster` that automates
147 IPython has a command line program called :command:`ipcluster` that automates
148 all aspects of starting the controller and engines on the compute nodes.
148 all aspects of starting the controller and engines on the compute nodes.
149 :command:`ipcluster` has full support for the Windows HPC job scheduler,
149 :command:`ipcluster` has full support for the Windows HPC job scheduler,
150 meaning that :command:`ipcluster` can use this job scheduler to start the
150 meaning that :command:`ipcluster` can use this job scheduler to start the
151 controller and engines. In our experience, the Windows HPC job scheduler is
151 controller and engines. In our experience, the Windows HPC job scheduler is
152 particularly well suited for interactive applications, such as IPython. Once
152 particularly well suited for interactive applications, such as IPython. Once
153 :command:`ipcluster` is configured properly, a user can start an IPython
153 :command:`ipcluster` is configured properly, a user can start an IPython
154 cluster from their local workstation almost instantly, without having to log
154 cluster from their local workstation almost instantly, without having to log
155 on to the head node (as is typically required by Unix based job schedulers).
155 on to the head node (as is typically required by Unix based job schedulers).
156 This enables a user to move seamlessly between serial and parallel
156 This enables a user to move seamlessly between serial and parallel
157 computations.
157 computations.
158
158
159 In this section we show how to use :command:`ipcluster` to start an IPython
159 In this section we show how to use :command:`ipcluster` to start an IPython
160 cluster using the Windows HPC Server 2008 job scheduler. To make sure that
160 cluster using the Windows HPC Server 2008 job scheduler. To make sure that
161 :command:`ipcluster` is installed and working properly, you should first try
161 :command:`ipcluster` is installed and working properly, you should first try
162 to start an IPython cluster on your local host. To do this, open a Windows
162 to start an IPython cluster on your local host. To do this, open a Windows
163 Command Prompt and type the following command::
163 Command Prompt and type the following command::
164
164
165 ipcluster start n=2
165 ipcluster start n=2
166
166
167 You should see a number of messages printed to the screen, ending with
167 You should see a number of messages printed to the screen, ending with
168 "IPython cluster: started". The result should look something like the following
168 "IPython cluster: started". The result should look something like the following
169 screenshot:
169 screenshot:
170
170
171 .. image:: figs/ipcluster_start.*
171 .. image:: figs/ipcluster_start.*
172
172
173 At this point, the controller and two engines are running on your local host.
173 At this point, the controller and two engines are running on your local host.
174 This configuration is useful for testing and for situations where you want to
174 This configuration is useful for testing and for situations where you want to
175 take advantage of multiple cores on your local computer.
175 take advantage of multiple cores on your local computer.
176
176
177 Now that we have confirmed that :command:`ipcluster` is working properly, we
177 Now that we have confirmed that :command:`ipcluster` is working properly, we
178 describe how to configure and run an IPython cluster on an actual compute
178 describe how to configure and run an IPython cluster on an actual compute
179 cluster running Windows HPC Server 2008. Here is an outline of the needed
179 cluster running Windows HPC Server 2008. Here is an outline of the needed
180 steps:
180 steps:
181
181
182 1. Create a cluster profile using: ``ipython profile create --parallel profile=mycluster``
182 1. Create a cluster profile using: ``ipython profile create --parallel profile=mycluster``
183
183
184 2. Edit configuration files in the directory :file:`.ipython\\cluster_mycluster`
184 2. Edit configuration files in the directory :file:`.ipython\\cluster_mycluster`
185
185
186 3. Start the cluster using: ``ipcluser start profile=mycluster n=32``
186 3. Start the cluster using: ``ipcluser start profile=mycluster n=32``
187
187
188 Creating a cluster profile
188 Creating a cluster profile
189 --------------------------
189 --------------------------
190
190
191 In most cases, you will have to create a cluster profile to use IPython on a
191 In most cases, you will have to create a cluster profile to use IPython on a
192 cluster. A cluster profile is a name (like "mycluster") that is associated
192 cluster. A cluster profile is a name (like "mycluster") that is associated
193 with a particular cluster configuration. The profile name is used by
193 with a particular cluster configuration. The profile name is used by
194 :command:`ipcluster` when working with the cluster.
194 :command:`ipcluster` when working with the cluster.
195
195
196 Associated with each cluster profile is a cluster directory. This cluster
196 Associated with each cluster profile is a cluster directory. This cluster
197 directory is a specially named directory (typically located in the
197 directory is a specially named directory (typically located in the
198 :file:`.ipython` subdirectory of your home directory) that contains the
198 :file:`.ipython` subdirectory of your home directory) that contains the
199 configuration files for a particular cluster profile, as well as log files and
199 configuration files for a particular cluster profile, as well as log files and
200 security keys. The naming convention for cluster directories is:
200 security keys. The naming convention for cluster directories is:
201 :file:`profile_<profile name>`. Thus, the cluster directory for a profile named
201 :file:`profile_<profile name>`. Thus, the cluster directory for a profile named
202 "foo" would be :file:`.ipython\\cluster_foo`.
202 "foo" would be :file:`.ipython\\cluster_foo`.
203
203
204 To create a new cluster profile (named "mycluster") and the associated cluster
204 To create a new cluster profile (named "mycluster") and the associated cluster
205 directory, type the following command at the Windows Command Prompt::
205 directory, type the following command at the Windows Command Prompt::
206
206
207 ipython profile create --parallel --profile=mycluster
207 ipython profile create --parallel --profile=mycluster
208
208
209 The output of this command is shown in the screenshot below. Notice how
209 The output of this command is shown in the screenshot below. Notice how
210 :command:`ipcluster` prints out the location of the newly created cluster
210 :command:`ipcluster` prints out the location of the newly created cluster
211 directory.
211 directory.
212
212
213 .. image:: figs/ipcluster_create.*
213 .. image:: figs/ipcluster_create.*
214
214
215 Configuring a cluster profile
215 Configuring a cluster profile
216 -----------------------------
216 -----------------------------
217
217
218 Next, you will need to configure the newly created cluster profile by editing
218 Next, you will need to configure the newly created cluster profile by editing
219 the following configuration files in the cluster directory:
219 the following configuration files in the cluster directory:
220
220
221 * :file:`ipcluster_config.py`
221 * :file:`ipcluster_config.py`
222 * :file:`ipcontroller_config.py`
222 * :file:`ipcontroller_config.py`
223 * :file:`ipengine_config.py`
223 * :file:`ipengine_config.py`
224
224
225 When :command:`ipcluster` is run, these configuration files are used to
225 When :command:`ipcluster` is run, these configuration files are used to
226 determine how the engines and controller will be started. In most cases,
226 determine how the engines and controller will be started. In most cases,
227 you will only have to set a few of the attributes in these files.
227 you will only have to set a few of the attributes in these files.
228
228
229 To configure :command:`ipcluster` to use the Windows HPC job scheduler, you
229 To configure :command:`ipcluster` to use the Windows HPC job scheduler, you
230 will need to edit the following attributes in the file
230 will need to edit the following attributes in the file
231 :file:`ipcluster_config.py`::
231 :file:`ipcluster_config.py`::
232
232
233 # Set these at the top of the file to tell ipcluster to use the
233 # Set these at the top of the file to tell ipcluster to use the
234 # Windows HPC job scheduler.
234 # Windows HPC job scheduler.
235 c.IPClusterStart.controller_launcher = \
235 c.IPClusterStart.controller_launcher_class = 'WindowsHPCControllerLauncher'
236 'IPython.parallel.apps.launcher.WindowsHPCControllerLauncher'
236 c.IPClusterEngines.engine_launcher_class = 'WindowsHPCEngineSetLauncher'
237 c.IPClusterEngines.engine_launcher = \
238 'IPython.parallel.apps.launcher.WindowsHPCEngineSetLauncher'
239
237
240 # Set these to the host name of the scheduler (head node) of your cluster.
238 # Set these to the host name of the scheduler (head node) of your cluster.
241 c.WindowsHPCControllerLauncher.scheduler = 'HEADNODE'
239 c.WindowsHPCControllerLauncher.scheduler = 'HEADNODE'
242 c.WindowsHPCEngineSetLauncher.scheduler = 'HEADNODE'
240 c.WindowsHPCEngineSetLauncher.scheduler = 'HEADNODE'
243
241
244 There are a number of other configuration attributes that can be set, but
242 There are a number of other configuration attributes that can be set, but
245 in most cases these will be sufficient to get you started.
243 in most cases these will be sufficient to get you started.
246
244
247 .. warning::
245 .. warning::
248 If any of your configuration attributes involve specifying the location
246 If any of your configuration attributes involve specifying the location
249 of shared directories or files, you must make sure that you use UNC paths
247 of shared directories or files, you must make sure that you use UNC paths
250 like :file:`\\\\host\\share`. It is also important that you specify
248 like :file:`\\\\host\\share`. It is also important that you specify
251 these paths using raw Python strings: ``r'\\host\share'`` to make sure
249 these paths using raw Python strings: ``r'\\host\share'`` to make sure
252 that the backslashes are properly escaped.
250 that the backslashes are properly escaped.
253
251
254 Starting the cluster profile
252 Starting the cluster profile
255 ----------------------------
253 ----------------------------
256
254
257 Once a cluster profile has been configured, starting an IPython cluster using
255 Once a cluster profile has been configured, starting an IPython cluster using
258 the profile is simple::
256 the profile is simple::
259
257
260 ipcluster start --profile=mycluster -n 32
258 ipcluster start --profile=mycluster -n 32
261
259
262 The ``-n`` option tells :command:`ipcluster` how many engines to start (in
260 The ``-n`` option tells :command:`ipcluster` how many engines to start (in
263 this case 32). Stopping the cluster is as simple as typing Control-C.
261 this case 32). Stopping the cluster is as simple as typing Control-C.
264
262
265 Using the HPC Job Manager
263 Using the HPC Job Manager
266 -------------------------
264 -------------------------
267
265
268 When ``ipcluster start`` is run the first time, :command:`ipcluster` creates
266 When ``ipcluster start`` is run the first time, :command:`ipcluster` creates
269 two XML job description files in the cluster directory:
267 two XML job description files in the cluster directory:
270
268
271 * :file:`ipcontroller_job.xml`
269 * :file:`ipcontroller_job.xml`
272 * :file:`ipengineset_job.xml`
270 * :file:`ipengineset_job.xml`
273
271
274 Once these files have been created, they can be imported into the HPC Job
272 Once these files have been created, they can be imported into the HPC Job
275 Manager application. Then, the controller and engines for that profile can be
273 Manager application. Then, the controller and engines for that profile can be
276 started using the HPC Job Manager directly, without using :command:`ipcluster`.
274 started using the HPC Job Manager directly, without using :command:`ipcluster`.
277 However, anytime the cluster profile is re-configured, ``ipcluster start``
275 However, anytime the cluster profile is re-configured, ``ipcluster start``
278 must be run again to regenerate the XML job description files. The
276 must be run again to regenerate the XML job description files. The
279 following screenshot shows what the HPC Job Manager interface looks like
277 following screenshot shows what the HPC Job Manager interface looks like
280 with a running IPython cluster.
278 with a running IPython cluster.
281
279
282 .. image:: figs/hpc_job_manager.*
280 .. image:: figs/hpc_job_manager.*
283
281
284 Performing a simple interactive parallel computation
282 Performing a simple interactive parallel computation
285 ====================================================
283 ====================================================
286
284
287 Once you have started your IPython cluster, you can start to use it. To do
285 Once you have started your IPython cluster, you can start to use it. To do
288 this, open up a new Windows Command Prompt and start up IPython's interactive
286 this, open up a new Windows Command Prompt and start up IPython's interactive
289 shell by typing::
287 shell by typing::
290
288
291 ipython
289 ipython
292
290
293 Then you can create a :class:`MultiEngineClient` instance for your profile and
291 Then you can create a :class:`MultiEngineClient` instance for your profile and
294 use the resulting instance to do a simple interactive parallel computation. In
292 use the resulting instance to do a simple interactive parallel computation. In
295 the code and screenshot that follows, we take a simple Python function and
293 the code and screenshot that follows, we take a simple Python function and
296 apply it to each element of an array of integers in parallel using the
294 apply it to each element of an array of integers in parallel using the
297 :meth:`MultiEngineClient.map` method:
295 :meth:`MultiEngineClient.map` method:
298
296
299 .. sourcecode:: ipython
297 .. sourcecode:: ipython
300
298
301 In [1]: from IPython.parallel import *
299 In [1]: from IPython.parallel import *
302
300
303 In [2]: c = MultiEngineClient(profile='mycluster')
301 In [2]: c = MultiEngineClient(profile='mycluster')
304
302
305 In [3]: mec.get_ids()
303 In [3]: mec.get_ids()
306 Out[3]: [0, 1, 2, 3, 4, 5, 67, 8, 9, 10, 11, 12, 13, 14]
304 Out[3]: [0, 1, 2, 3, 4, 5, 67, 8, 9, 10, 11, 12, 13, 14]
307
305
308 In [4]: def f(x):
306 In [4]: def f(x):
309 ...: return x**10
307 ...: return x**10
310
308
311 In [5]: mec.map(f, range(15)) # f is applied in parallel
309 In [5]: mec.map(f, range(15)) # f is applied in parallel
312 Out[5]:
310 Out[5]:
313 [0,
311 [0,
314 1,
312 1,
315 1024,
313 1024,
316 59049,
314 59049,
317 1048576,
315 1048576,
318 9765625,
316 9765625,
319 60466176,
317 60466176,
320 282475249,
318 282475249,
321 1073741824,
319 1073741824,
322 3486784401L,
320 3486784401L,
323 10000000000L,
321 10000000000L,
324 25937424601L,
322 25937424601L,
325 61917364224L,
323 61917364224L,
326 137858491849L,
324 137858491849L,
327 289254654976L]
325 289254654976L]
328
326
329 The :meth:`map` method has the same signature as Python's builtin :func:`map`
327 The :meth:`map` method has the same signature as Python's builtin :func:`map`
330 function, but runs the calculation in parallel. More involved examples of using
328 function, but runs the calculation in parallel. More involved examples of using
331 :class:`MultiEngineClient` are provided in the examples that follow.
329 :class:`MultiEngineClient` are provided in the examples that follow.
332
330
333 .. image:: figs/mec_simple.*
331 .. image:: figs/mec_simple.*
334
332
General Comments 0
You need to be logged in to leave comments. Login now