##// END OF EJS Templates
Merging vvatsa's ipcluster-dev branch....
Brian Granger -
r1833:e4b173fe merge
parent child Browse files
Show More
1 NO CONTENT: modified file
NO CONTENT: modified file
1 NO CONTENT: modified file
NO CONTENT: modified file
@@ -1,521 +1,723 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 # encoding: utf-8
2 # encoding: utf-8
3
3
4 """Start an IPython cluster = (controller + engines)."""
4 """Start an IPython cluster = (controller + engines)."""
5
5
6 #-----------------------------------------------------------------------------
6 #-----------------------------------------------------------------------------
7 # Copyright (C) 2008 The IPython Development Team
7 # Copyright (C) 2008 The IPython Development Team
8 #
8 #
9 # Distributed under the terms of the BSD License. The full license is in
9 # Distributed under the terms of the BSD License. The full license is in
10 # the file COPYING, distributed as part of this software.
10 # the file COPYING, distributed as part of this software.
11 #-----------------------------------------------------------------------------
11 #-----------------------------------------------------------------------------
12
12
13 #-----------------------------------------------------------------------------
13 #-----------------------------------------------------------------------------
14 # Imports
14 # Imports
15 #-----------------------------------------------------------------------------
15 #-----------------------------------------------------------------------------
16
16
17 import os
17 import os
18 import re
18 import re
19 import sys
19 import sys
20 import signal
20 import signal
21 import tempfile
21 pjoin = os.path.join
22 pjoin = os.path.join
22
23
23 from twisted.internet import reactor, defer
24 from twisted.internet import reactor, defer
24 from twisted.internet.protocol import ProcessProtocol
25 from twisted.internet.protocol import ProcessProtocol
25 from twisted.internet.error import ProcessDone, ProcessTerminated
26 from twisted.internet.error import ProcessDone, ProcessTerminated
26 from twisted.internet.utils import getProcessOutput
27 from twisted.internet.utils import getProcessOutput
27 from twisted.python import failure, log
28 from twisted.python import failure, log
28
29
29 from IPython.external import argparse
30 from IPython.external import argparse
30 from IPython.external import Itpl
31 from IPython.external import Itpl
31 from IPython.genutils import get_ipython_dir, num_cpus
32 from IPython.genutils import get_ipython_dir, num_cpus
32 from IPython.kernel.fcutil import have_crypto
33 from IPython.kernel.fcutil import have_crypto
33 from IPython.kernel.error import SecurityError
34 from IPython.kernel.error import SecurityError
34 from IPython.kernel.fcutil import have_crypto
35 from IPython.kernel.fcutil import have_crypto
35 from IPython.kernel.twistedutil import gatherBoth
36 from IPython.kernel.twistedutil import gatherBoth
36 from IPython.kernel.util import printer
37 from IPython.kernel.util import printer
37
38
38
39
39 #-----------------------------------------------------------------------------
40 #-----------------------------------------------------------------------------
40 # General process handling code
41 # General process handling code
41 #-----------------------------------------------------------------------------
42 #-----------------------------------------------------------------------------
42
43
43 def find_exe(cmd):
44 def find_exe(cmd):
44 try:
45 try:
45 import win32api
46 import win32api
46 except ImportError:
47 except ImportError:
47 raise ImportError('you need to have pywin32 installed for this to work')
48 raise ImportError('you need to have pywin32 installed for this to work')
48 else:
49 else:
49 try:
50 try:
50 (path, offest) = win32api.SearchPath(os.environ['PATH'],cmd + '.exe')
51 (path, offest) = win32api.SearchPath(os.environ['PATH'],cmd + '.exe')
51 except:
52 except:
52 (path, offset) = win32api.SearchPath(os.environ['PATH'],cmd + '.bat')
53 (path, offset) = win32api.SearchPath(os.environ['PATH'],cmd + '.bat')
53 return path
54 return path
54
55
55 class ProcessStateError(Exception):
56 class ProcessStateError(Exception):
56 pass
57 pass
57
58
58 class UnknownStatus(Exception):
59 class UnknownStatus(Exception):
59 pass
60 pass
60
61
61 class LauncherProcessProtocol(ProcessProtocol):
62 class LauncherProcessProtocol(ProcessProtocol):
62 """
63 """
63 A ProcessProtocol to go with the ProcessLauncher.
64 A ProcessProtocol to go with the ProcessLauncher.
64 """
65 """
65 def __init__(self, process_launcher):
66 def __init__(self, process_launcher):
66 self.process_launcher = process_launcher
67 self.process_launcher = process_launcher
67
68
68 def connectionMade(self):
69 def connectionMade(self):
69 self.process_launcher.fire_start_deferred(self.transport.pid)
70 self.process_launcher.fire_start_deferred(self.transport.pid)
70
71
71 def processEnded(self, status):
72 def processEnded(self, status):
72 value = status.value
73 value = status.value
73 if isinstance(value, ProcessDone):
74 if isinstance(value, ProcessDone):
74 self.process_launcher.fire_stop_deferred(0)
75 self.process_launcher.fire_stop_deferred(0)
75 elif isinstance(value, ProcessTerminated):
76 elif isinstance(value, ProcessTerminated):
76 self.process_launcher.fire_stop_deferred(
77 self.process_launcher.fire_stop_deferred(
77 {'exit_code':value.exitCode,
78 {'exit_code':value.exitCode,
78 'signal':value.signal,
79 'signal':value.signal,
79 'status':value.status
80 'status':value.status
80 }
81 }
81 )
82 )
82 else:
83 else:
83 raise UnknownStatus("unknown exit status, this is probably a bug in Twisted")
84 raise UnknownStatus("unknown exit status, this is probably a bug in Twisted")
84
85
85 def outReceived(self, data):
86 def outReceived(self, data):
86 log.msg(data)
87 log.msg(data)
87
88
88 def errReceived(self, data):
89 def errReceived(self, data):
89 log.err(data)
90 log.err(data)
90
91
91 class ProcessLauncher(object):
92 class ProcessLauncher(object):
92 """
93 """
93 Start and stop an external process in an asynchronous manner.
94 Start and stop an external process in an asynchronous manner.
94
95
95 Currently this uses deferreds to notify other parties of process state
96 Currently this uses deferreds to notify other parties of process state
96 changes. This is an awkward design and should be moved to using
97 changes. This is an awkward design and should be moved to using
97 a formal NotificationCenter.
98 a formal NotificationCenter.
98 """
99 """
99 def __init__(self, cmd_and_args):
100 def __init__(self, cmd_and_args):
100 self.cmd = cmd_and_args[0]
101 self.cmd = cmd_and_args[0]
101 self.args = cmd_and_args
102 self.args = cmd_and_args
102 self._reset()
103 self._reset()
103
104
104 def _reset(self):
105 def _reset(self):
105 self.process_protocol = None
106 self.process_protocol = None
106 self.pid = None
107 self.pid = None
107 self.start_deferred = None
108 self.start_deferred = None
108 self.stop_deferreds = []
109 self.stop_deferreds = []
109 self.state = 'before' # before, running, or after
110 self.state = 'before' # before, running, or after
110
111
111 @property
112 @property
112 def running(self):
113 def running(self):
113 if self.state == 'running':
114 if self.state == 'running':
114 return True
115 return True
115 else:
116 else:
116 return False
117 return False
117
118
118 def fire_start_deferred(self, pid):
119 def fire_start_deferred(self, pid):
119 self.pid = pid
120 self.pid = pid
120 self.state = 'running'
121 self.state = 'running'
121 log.msg('Process %r has started with pid=%i' % (self.args, pid))
122 log.msg('Process %r has started with pid=%i' % (self.args, pid))
122 self.start_deferred.callback(pid)
123 self.start_deferred.callback(pid)
123
124
124 def start(self):
125 def start(self):
125 if self.state == 'before':
126 if self.state == 'before':
126 self.process_protocol = LauncherProcessProtocol(self)
127 self.process_protocol = LauncherProcessProtocol(self)
127 self.start_deferred = defer.Deferred()
128 self.start_deferred = defer.Deferred()
128 self.process_transport = reactor.spawnProcess(
129 self.process_transport = reactor.spawnProcess(
129 self.process_protocol,
130 self.process_protocol,
130 self.cmd,
131 self.cmd,
131 self.args,
132 self.args,
132 env=os.environ
133 env=os.environ
133 )
134 )
134 return self.start_deferred
135 return self.start_deferred
135 else:
136 else:
136 s = 'the process has already been started and has state: %r' % \
137 s = 'the process has already been started and has state: %r' % \
137 self.state
138 self.state
138 return defer.fail(ProcessStateError(s))
139 return defer.fail(ProcessStateError(s))
139
140
140 def get_stop_deferred(self):
141 def get_stop_deferred(self):
141 if self.state == 'running' or self.state == 'before':
142 if self.state == 'running' or self.state == 'before':
142 d = defer.Deferred()
143 d = defer.Deferred()
143 self.stop_deferreds.append(d)
144 self.stop_deferreds.append(d)
144 return d
145 return d
145 else:
146 else:
146 s = 'this process is already complete'
147 s = 'this process is already complete'
147 return defer.fail(ProcessStateError(s))
148 return defer.fail(ProcessStateError(s))
148
149
149 def fire_stop_deferred(self, exit_code):
150 def fire_stop_deferred(self, exit_code):
150 log.msg('Process %r has stopped with %r' % (self.args, exit_code))
151 log.msg('Process %r has stopped with %r' % (self.args, exit_code))
151 self.state = 'after'
152 self.state = 'after'
152 for d in self.stop_deferreds:
153 for d in self.stop_deferreds:
153 d.callback(exit_code)
154 d.callback(exit_code)
154
155
155 def signal(self, sig):
156 def signal(self, sig):
156 """
157 """
157 Send a signal to the process.
158 Send a signal to the process.
158
159
159 The argument sig can be ('KILL','INT', etc.) or any signal number.
160 The argument sig can be ('KILL','INT', etc.) or any signal number.
160 """
161 """
161 if self.state == 'running':
162 if self.state == 'running':
162 self.process_transport.signalProcess(sig)
163 self.process_transport.signalProcess(sig)
163
164
164 # def __del__(self):
165 # def __del__(self):
165 # self.signal('KILL')
166 # self.signal('KILL')
166
167
167 def interrupt_then_kill(self, delay=1.0):
168 def interrupt_then_kill(self, delay=1.0):
168 self.signal('INT')
169 self.signal('INT')
169 reactor.callLater(delay, self.signal, 'KILL')
170 reactor.callLater(delay, self.signal, 'KILL')
170
171
171
172
172 #-----------------------------------------------------------------------------
173 #-----------------------------------------------------------------------------
173 # Code for launching controller and engines
174 # Code for launching controller and engines
174 #-----------------------------------------------------------------------------
175 #-----------------------------------------------------------------------------
175
176
176
177
177 class ControllerLauncher(ProcessLauncher):
178 class ControllerLauncher(ProcessLauncher):
178
179
179 def __init__(self, extra_args=None):
180 def __init__(self, extra_args=None):
180 if sys.platform == 'win32':
181 if sys.platform == 'win32':
181 # This logic is needed because the ipcontroller script doesn't
182 # This logic is needed because the ipcontroller script doesn't
182 # always get installed in the same way or in the same location.
183 # always get installed in the same way or in the same location.
183 from IPython.kernel.scripts import ipcontroller
184 from IPython.kernel.scripts import ipcontroller
184 script_location = ipcontroller.__file__.replace('.pyc', '.py')
185 script_location = ipcontroller.__file__.replace('.pyc', '.py')
185 # The -u option here turns on unbuffered output, which is required
186 # The -u option here turns on unbuffered output, which is required
186 # on Win32 to prevent wierd conflict and problems with Twisted
187 # on Win32 to prevent wierd conflict and problems with Twisted
187 args = [find_exe('python'), '-u', script_location]
188 args = [find_exe('python'), '-u', script_location]
188 else:
189 else:
189 args = ['ipcontroller']
190 args = ['ipcontroller']
190 self.extra_args = extra_args
191 self.extra_args = extra_args
191 if extra_args is not None:
192 if extra_args is not None:
192 args.extend(extra_args)
193 args.extend(extra_args)
193
194
194 ProcessLauncher.__init__(self, args)
195 ProcessLauncher.__init__(self, args)
195
196
196
197
197 class EngineLauncher(ProcessLauncher):
198 class EngineLauncher(ProcessLauncher):
198
199
199 def __init__(self, extra_args=None):
200 def __init__(self, extra_args=None):
200 if sys.platform == 'win32':
201 if sys.platform == 'win32':
201 # This logic is needed because the ipcontroller script doesn't
202 # This logic is needed because the ipcontroller script doesn't
202 # always get installed in the same way or in the same location.
203 # always get installed in the same way or in the same location.
203 from IPython.kernel.scripts import ipengine
204 from IPython.kernel.scripts import ipengine
204 script_location = ipengine.__file__.replace('.pyc', '.py')
205 script_location = ipengine.__file__.replace('.pyc', '.py')
205 # The -u option here turns on unbuffered output, which is required
206 # The -u option here turns on unbuffered output, which is required
206 # on Win32 to prevent wierd conflict and problems with Twisted
207 # on Win32 to prevent wierd conflict and problems with Twisted
207 args = [find_exe('python'), '-u', script_location]
208 args = [find_exe('python'), '-u', script_location]
208 else:
209 else:
209 args = ['ipengine']
210 args = ['ipengine']
210 self.extra_args = extra_args
211 self.extra_args = extra_args
211 if extra_args is not None:
212 if extra_args is not None:
212 args.extend(extra_args)
213 args.extend(extra_args)
213
214
214 ProcessLauncher.__init__(self, args)
215 ProcessLauncher.__init__(self, args)
215
216
216
217
217 class LocalEngineSet(object):
218 class LocalEngineSet(object):
218
219
219 def __init__(self, extra_args=None):
220 def __init__(self, extra_args=None):
220 self.extra_args = extra_args
221 self.extra_args = extra_args
221 self.launchers = []
222 self.launchers = []
222
223
223 def start(self, n):
224 def start(self, n):
224 dlist = []
225 dlist = []
225 for i in range(n):
226 for i in range(n):
226 el = EngineLauncher(extra_args=self.extra_args)
227 el = EngineLauncher(extra_args=self.extra_args)
227 d = el.start()
228 d = el.start()
228 self.launchers.append(el)
229 self.launchers.append(el)
229 dlist.append(d)
230 dlist.append(d)
230 dfinal = gatherBoth(dlist, consumeErrors=True)
231 dfinal = gatherBoth(dlist, consumeErrors=True)
231 dfinal.addCallback(self._handle_start)
232 dfinal.addCallback(self._handle_start)
232 return dfinal
233 return dfinal
233
234
234 def _handle_start(self, r):
235 def _handle_start(self, r):
235 log.msg('Engines started with pids: %r' % r)
236 log.msg('Engines started with pids: %r' % r)
236 return r
237 return r
237
238
238 def _handle_stop(self, r):
239 def _handle_stop(self, r):
239 log.msg('Engines received signal: %r' % r)
240 log.msg('Engines received signal: %r' % r)
240 return r
241 return r
241
242
242 def signal(self, sig):
243 def signal(self, sig):
243 dlist = []
244 dlist = []
244 for el in self.launchers:
245 for el in self.launchers:
245 d = el.get_stop_deferred()
246 d = el.get_stop_deferred()
246 dlist.append(d)
247 dlist.append(d)
247 el.signal(sig)
248 el.signal(sig)
248 dfinal = gatherBoth(dlist, consumeErrors=True)
249 dfinal = gatherBoth(dlist, consumeErrors=True)
249 dfinal.addCallback(self._handle_stop)
250 dfinal.addCallback(self._handle_stop)
250 return dfinal
251 return dfinal
251
252
252 def interrupt_then_kill(self, delay=1.0):
253 def interrupt_then_kill(self, delay=1.0):
253 dlist = []
254 dlist = []
254 for el in self.launchers:
255 for el in self.launchers:
255 d = el.get_stop_deferred()
256 d = el.get_stop_deferred()
256 dlist.append(d)
257 dlist.append(d)
257 el.interrupt_then_kill(delay)
258 el.interrupt_then_kill(delay)
258 dfinal = gatherBoth(dlist, consumeErrors=True)
259 dfinal = gatherBoth(dlist, consumeErrors=True)
259 dfinal.addCallback(self._handle_stop)
260 dfinal.addCallback(self._handle_stop)
260 return dfinal
261 return dfinal
261
262
262
263
263 class BatchEngineSet(object):
264 class BatchEngineSet(object):
264
265
265 # Subclasses must fill these in. See PBSEngineSet
266 # Subclasses must fill these in. See PBSEngineSet
266 submit_command = ''
267 submit_command = ''
267 delete_command = ''
268 delete_command = ''
268 job_id_regexp = ''
269 job_id_regexp = ''
269
270
270 def __init__(self, template_file, **kwargs):
271 def __init__(self, template_file, **kwargs):
271 self.template_file = template_file
272 self.template_file = template_file
272 self.context = {}
273 self.context = {}
273 self.context.update(kwargs)
274 self.context.update(kwargs)
274 self.batch_file = self.template_file+'-run'
275 self.batch_file = self.template_file+'-run'
275
276
276 def parse_job_id(self, output):
277 def parse_job_id(self, output):
277 m = re.match(self.job_id_regexp, output)
278 m = re.match(self.job_id_regexp, output)
278 if m is not None:
279 if m is not None:
279 job_id = m.group()
280 job_id = m.group()
280 else:
281 else:
281 raise Exception("job id couldn't be determined: %s" % output)
282 raise Exception("job id couldn't be determined: %s" % output)
282 self.job_id = job_id
283 self.job_id = job_id
283 log.msg('Job started with job id: %r' % job_id)
284 log.msg('Job started with job id: %r' % job_id)
284 return job_id
285 return job_id
285
286
286 def write_batch_script(self, n):
287 def write_batch_script(self, n):
287 self.context['n'] = n
288 self.context['n'] = n
288 template = open(self.template_file, 'r').read()
289 template = open(self.template_file, 'r').read()
289 log.msg('Using template for batch script: %s' % self.template_file)
290 log.msg('Using template for batch script: %s' % self.template_file)
290 script_as_string = Itpl.itplns(template, self.context)
291 script_as_string = Itpl.itplns(template, self.context)
291 log.msg('Writing instantiated batch script: %s' % self.batch_file)
292 log.msg('Writing instantiated batch script: %s' % self.batch_file)
292 f = open(self.batch_file,'w')
293 f = open(self.batch_file,'w')
293 f.write(script_as_string)
294 f.write(script_as_string)
294 f.close()
295 f.close()
295
296
296 def handle_error(self, f):
297 def handle_error(self, f):
297 f.printTraceback()
298 f.printTraceback()
298 f.raiseException()
299 f.raiseException()
299
300
300 def start(self, n):
301 def start(self, n):
301 self.write_batch_script(n)
302 self.write_batch_script(n)
302 d = getProcessOutput(self.submit_command,
303 d = getProcessOutput(self.submit_command,
303 [self.batch_file],env=os.environ)
304 [self.batch_file],env=os.environ)
304 d.addCallback(self.parse_job_id)
305 d.addCallback(self.parse_job_id)
305 d.addErrback(self.handle_error)
306 d.addErrback(self.handle_error)
306 return d
307 return d
307
308
308 def kill(self):
309 def kill(self):
309 d = getProcessOutput(self.delete_command,
310 d = getProcessOutput(self.delete_command,
310 [self.job_id],env=os.environ)
311 [self.job_id],env=os.environ)
311 return d
312 return d
312
313
313 class PBSEngineSet(BatchEngineSet):
314 class PBSEngineSet(BatchEngineSet):
314
315
315 submit_command = 'qsub'
316 submit_command = 'qsub'
316 delete_command = 'qdel'
317 delete_command = 'qdel'
317 job_id_regexp = '\d+'
318 job_id_regexp = '\d+'
318
319
319 def __init__(self, template_file, **kwargs):
320 def __init__(self, template_file, **kwargs):
320 BatchEngineSet.__init__(self, template_file, **kwargs)
321 BatchEngineSet.__init__(self, template_file, **kwargs)
321
322
322
323
324 sshx_template="""#!/bin/sh
325 "$@" &> /dev/null &
326 echo $!
327 """
328
329 engine_killer_template="""#!/bin/sh
330 ps -fu `whoami` | grep '[i]pengine' | awk '{print $2}' | xargs kill -TERM
331 """
332
333 class SSHEngineSet(object):
334 sshx_template=sshx_template
335 engine_killer_template=engine_killer_template
336
337 def __init__(self, engine_hosts, sshx=None, ipengine="ipengine"):
338 """Start a controller on localhost and engines using ssh.
339
340 The engine_hosts argument is a dict with hostnames as keys and
341 the number of engine (int) as values. sshx is the name of a local
342 file that will be used to run remote commands. This file is used
343 to setup the environment properly.
344 """
345
346 self.temp_dir = tempfile.gettempdir()
347 if sshx is not None:
348 self.sshx = sshx
349 else:
350 # Write the sshx.sh file locally from our template.
351 self.sshx = os.path.join(
352 self.temp_dir,
353 '%s-main-sshx.sh' % os.environ['USER']
354 )
355 f = open(self.sshx, 'w')
356 f.writelines(self.sshx_template)
357 f.close()
358 self.engine_command = ipengine
359 self.engine_hosts = engine_hosts
360 # Write the engine killer script file locally from our template.
361 self.engine_killer = os.path.join(
362 self.temp_dir,
363 '%s-local-engine_killer.sh' % os.environ['USER']
364 )
365 f = open(self.engine_killer, 'w')
366 f.writelines(self.engine_killer_template)
367 f.close()
368
369 def start(self, send_furl=False):
370 dlist = []
371 for host in self.engine_hosts.keys():
372 count = self.engine_hosts[host]
373 d = self._start(host, count, send_furl)
374 dlist.append(d)
375 return gatherBoth(dlist, consumeErrors=True)
376
377 def _start(self, hostname, count=1, send_furl=False):
378 if send_furl:
379 d = self._scp_furl(hostname)
380 else:
381 d = defer.succeed(None)
382 d.addCallback(lambda r: self._scp_sshx(hostname))
383 d.addCallback(lambda r: self._ssh_engine(hostname, count))
384 return d
385
386 def _scp_furl(self, hostname):
387 scp_cmd = "scp ~/.ipython/security/ipcontroller-engine.furl %s:.ipython/security/" % (hostname)
388 cmd_list = scp_cmd.split()
389 cmd_list[1] = os.path.expanduser(cmd_list[1])
390 log.msg('Copying furl file: %s' % scp_cmd)
391 d = getProcessOutput(cmd_list[0], cmd_list[1:], env=os.environ)
392 return d
393
394 def _scp_sshx(self, hostname):
395 scp_cmd = "scp %s %s:%s/%s-sshx.sh" % (
396 self.sshx, hostname,
397 self.temp_dir, os.environ['USER']
398 )
399 print
400 log.msg("Copying sshx: %s" % scp_cmd)
401 sshx_scp = scp_cmd.split()
402 d = getProcessOutput(sshx_scp[0], sshx_scp[1:], env=os.environ)
403 return d
404
405 def _ssh_engine(self, hostname, count):
406 exec_engine = "ssh %s sh %s/%s-sshx.sh %s" % (
407 hostname, self.temp_dir,
408 os.environ['USER'], self.engine_command
409 )
410 cmds = exec_engine.split()
411 dlist = []
412 log.msg("about to start engines...")
413 for i in range(count):
414 log.msg('Starting engines: %s' % exec_engine)
415 d = getProcessOutput(cmds[0], cmds[1:], env=os.environ)
416 dlist.append(d)
417 return gatherBoth(dlist, consumeErrors=True)
418
419 def kill(self):
420 dlist = []
421 for host in self.engine_hosts.keys():
422 d = self._killall(host)
423 dlist.append(d)
424 return gatherBoth(dlist, consumeErrors=True)
425
426 def _killall(self, hostname):
427 d = self._scp_engine_killer(hostname)
428 d.addCallback(lambda r: self._ssh_kill(hostname))
429 # d.addErrback(self._exec_err)
430 return d
431
432 def _scp_engine_killer(self, hostname):
433 scp_cmd = "scp %s %s:%s/%s-engine_killer.sh" % (
434 self.engine_killer,
435 hostname,
436 self.temp_dir,
437 os.environ['USER']
438 )
439 cmds = scp_cmd.split()
440 log.msg('Copying engine_killer: %s' % scp_cmd)
441 d = getProcessOutput(cmds[0], cmds[1:], env=os.environ)
442 return d
443
444 def _ssh_kill(self, hostname):
445 kill_cmd = "ssh %s sh %s/%s-engine_killer.sh" % (
446 hostname,
447 self.temp_dir,
448 os.environ['USER']
449 )
450 log.msg('Killing engine: %s' % kill_cmd)
451 kill_cmd = kill_cmd.split()
452 d = getProcessOutput(kill_cmd[0], kill_cmd[1:], env=os.environ)
453 return d
454
455 def _exec_err(self, r):
456 log.msg(r)
457
323 #-----------------------------------------------------------------------------
458 #-----------------------------------------------------------------------------
324 # Main functions for the different types of clusters
459 # Main functions for the different types of clusters
325 #-----------------------------------------------------------------------------
460 #-----------------------------------------------------------------------------
326
461
327 # TODO:
462 # TODO:
328 # The logic in these codes should be moved into classes like LocalCluster
463 # The logic in these codes should be moved into classes like LocalCluster
329 # MpirunCluster, PBSCluster, etc. This would remove alot of the duplications.
464 # MpirunCluster, PBSCluster, etc. This would remove alot of the duplications.
330 # The main functions should then just parse the command line arguments, create
465 # The main functions should then just parse the command line arguments, create
331 # the appropriate class and call a 'start' method.
466 # the appropriate class and call a 'start' method.
332
467
333 def check_security(args, cont_args):
468 def check_security(args, cont_args):
334 if (not args.x or not args.y) and not have_crypto:
469 if (not args.x or not args.y) and not have_crypto:
335 log.err("""
470 log.err("""
336 OpenSSL/pyOpenSSL is not available, so we can't run in secure mode.
471 OpenSSL/pyOpenSSL is not available, so we can't run in secure mode.
337 Try running ipcluster with the -xy flags: ipcluster local -xy -n 4""")
472 Try running ipcluster with the -xy flags: ipcluster local -xy -n 4""")
338 reactor.stop()
473 reactor.stop()
339 return False
474 return False
340 if args.x:
475 if args.x:
341 cont_args.append('-x')
476 cont_args.append('-x')
342 if args.y:
477 if args.y:
343 cont_args.append('-y')
478 cont_args.append('-y')
344 return True
479 return True
345
480
481
346 def main_local(args):
482 def main_local(args):
347 cont_args = []
483 cont_args = []
348 cont_args.append('--logfile=%s' % pjoin(args.logdir,'ipcontroller'))
484 cont_args.append('--logfile=%s' % pjoin(args.logdir,'ipcontroller'))
349
485
350 # Check security settings before proceeding
486 # Check security settings before proceeding
351 if not check_security(args, cont_args):
487 if not check_security(args, cont_args):
352 return
488 return
353
489
354 cl = ControllerLauncher(extra_args=cont_args)
490 cl = ControllerLauncher(extra_args=cont_args)
355 dstart = cl.start()
491 dstart = cl.start()
356 def start_engines(cont_pid):
492 def start_engines(cont_pid):
357 engine_args = []
493 engine_args = []
358 engine_args.append('--logfile=%s' % \
494 engine_args.append('--logfile=%s' % \
359 pjoin(args.logdir,'ipengine%s-' % cont_pid))
495 pjoin(args.logdir,'ipengine%s-' % cont_pid))
360 eset = LocalEngineSet(extra_args=engine_args)
496 eset = LocalEngineSet(extra_args=engine_args)
361 def shutdown(signum, frame):
497 def shutdown(signum, frame):
362 log.msg('Stopping local cluster')
498 log.msg('Stopping local cluster')
363 # We are still playing with the times here, but these seem
499 # We are still playing with the times here, but these seem
364 # to be reliable in allowing everything to exit cleanly.
500 # to be reliable in allowing everything to exit cleanly.
365 eset.interrupt_then_kill(0.5)
501 eset.interrupt_then_kill(0.5)
366 cl.interrupt_then_kill(0.5)
502 cl.interrupt_then_kill(0.5)
367 reactor.callLater(1.0, reactor.stop)
503 reactor.callLater(1.0, reactor.stop)
368 signal.signal(signal.SIGINT,shutdown)
504 signal.signal(signal.SIGINT,shutdown)
369 d = eset.start(args.n)
505 d = eset.start(args.n)
370 return d
506 return d
371 def delay_start(cont_pid):
507 def delay_start(cont_pid):
372 # This is needed because the controller doesn't start listening
508 # This is needed because the controller doesn't start listening
373 # right when it starts and the controller needs to write
509 # right when it starts and the controller needs to write
374 # furl files for the engine to pick up
510 # furl files for the engine to pick up
375 reactor.callLater(1.0, start_engines, cont_pid)
511 reactor.callLater(1.0, start_engines, cont_pid)
376 dstart.addCallback(delay_start)
512 dstart.addCallback(delay_start)
377 dstart.addErrback(lambda f: f.raiseException())
513 dstart.addErrback(lambda f: f.raiseException())
378
514
515
379 def main_mpirun(args):
516 def main_mpirun(args):
380 cont_args = []
517 cont_args = []
381 cont_args.append('--logfile=%s' % pjoin(args.logdir,'ipcontroller'))
518 cont_args.append('--logfile=%s' % pjoin(args.logdir,'ipcontroller'))
382
519
383 # Check security settings before proceeding
520 # Check security settings before proceeding
384 if not check_security(args, cont_args):
521 if not check_security(args, cont_args):
385 return
522 return
386
523
387 cl = ControllerLauncher(extra_args=cont_args)
524 cl = ControllerLauncher(extra_args=cont_args)
388 dstart = cl.start()
525 dstart = cl.start()
389 def start_engines(cont_pid):
526 def start_engines(cont_pid):
390 raw_args = ['mpirun']
527 raw_args = ['mpirun']
391 raw_args.extend(['-n',str(args.n)])
528 raw_args.extend(['-n',str(args.n)])
392 raw_args.append('ipengine')
529 raw_args.append('ipengine')
393 raw_args.append('-l')
530 raw_args.append('-l')
394 raw_args.append(pjoin(args.logdir,'ipengine%s-' % cont_pid))
531 raw_args.append(pjoin(args.logdir,'ipengine%s-' % cont_pid))
395 if args.mpi:
532 if args.mpi:
396 raw_args.append('--mpi=%s' % args.mpi)
533 raw_args.append('--mpi=%s' % args.mpi)
397 eset = ProcessLauncher(raw_args)
534 eset = ProcessLauncher(raw_args)
398 def shutdown(signum, frame):
535 def shutdown(signum, frame):
399 log.msg('Stopping local cluster')
536 log.msg('Stopping local cluster')
400 # We are still playing with the times here, but these seem
537 # We are still playing with the times here, but these seem
401 # to be reliable in allowing everything to exit cleanly.
538 # to be reliable in allowing everything to exit cleanly.
402 eset.interrupt_then_kill(1.0)
539 eset.interrupt_then_kill(1.0)
403 cl.interrupt_then_kill(1.0)
540 cl.interrupt_then_kill(1.0)
404 reactor.callLater(2.0, reactor.stop)
541 reactor.callLater(2.0, reactor.stop)
405 signal.signal(signal.SIGINT,shutdown)
542 signal.signal(signal.SIGINT,shutdown)
406 d = eset.start()
543 d = eset.start()
407 return d
544 return d
408 def delay_start(cont_pid):
545 def delay_start(cont_pid):
409 # This is needed because the controller doesn't start listening
546 # This is needed because the controller doesn't start listening
410 # right when it starts and the controller needs to write
547 # right when it starts and the controller needs to write
411 # furl files for the engine to pick up
548 # furl files for the engine to pick up
412 reactor.callLater(1.0, start_engines, cont_pid)
549 reactor.callLater(1.0, start_engines, cont_pid)
413 dstart.addCallback(delay_start)
550 dstart.addCallback(delay_start)
414 dstart.addErrback(lambda f: f.raiseException())
551 dstart.addErrback(lambda f: f.raiseException())
415
552
553
416 def main_pbs(args):
554 def main_pbs(args):
417 cont_args = []
555 cont_args = []
418 cont_args.append('--logfile=%s' % pjoin(args.logdir,'ipcontroller'))
556 cont_args.append('--logfile=%s' % pjoin(args.logdir,'ipcontroller'))
419
557
420 # Check security settings before proceeding
558 # Check security settings before proceeding
421 if not check_security(args, cont_args):
559 if not check_security(args, cont_args):
422 return
560 return
423
561
424 cl = ControllerLauncher(extra_args=cont_args)
562 cl = ControllerLauncher(extra_args=cont_args)
425 dstart = cl.start()
563 dstart = cl.start()
426 def start_engines(r):
564 def start_engines(r):
427 pbs_set = PBSEngineSet(args.pbsscript)
565 pbs_set = PBSEngineSet(args.pbsscript)
428 def shutdown(signum, frame):
566 def shutdown(signum, frame):
429 log.msg('Stopping pbs cluster')
567 log.msg('Stopping pbs cluster')
430 d = pbs_set.kill()
568 d = pbs_set.kill()
431 d.addBoth(lambda _: cl.interrupt_then_kill(1.0))
569 d.addBoth(lambda _: cl.interrupt_then_kill(1.0))
432 d.addBoth(lambda _: reactor.callLater(2.0, reactor.stop))
570 d.addBoth(lambda _: reactor.callLater(2.0, reactor.stop))
433 signal.signal(signal.SIGINT,shutdown)
571 signal.signal(signal.SIGINT,shutdown)
434 d = pbs_set.start(args.n)
572 d = pbs_set.start(args.n)
435 return d
573 return d
436 dstart.addCallback(start_engines)
574 dstart.addCallback(start_engines)
437 dstart.addErrback(lambda f: f.raiseException())
575 dstart.addErrback(lambda f: f.raiseException())
438
576
439
577
578 def main_ssh(args):
579 """Start a controller on localhost and engines using ssh.
580
581 Your clusterfile should look like::
582
583 send_furl = False # True, if you want
584 engines = {
585 'engine_host1' : engine_count,
586 'engine_host2' : engine_count2
587 }
588 """
589 clusterfile = {}
590 execfile(args.clusterfile, clusterfile)
591 if not clusterfile.has_key('send_furl'):
592 clusterfile['send_furl'] = False
593
594 cont_args = []
595 cont_args.append('--logfile=%s' % pjoin(args.logdir,'ipcontroller'))
596
597 # Check security settings before proceeding
598 if not check_security(args, cont_args):
599 return
600
601 cl = ControllerLauncher(extra_args=cont_args)
602 dstart = cl.start()
603 def start_engines(cont_pid):
604 ssh_set = SSHEngineSet(clusterfile['engines'], sshx=args.sshx)
605 def shutdown(signum, frame):
606 d = ssh_set.kill()
607 # d.addErrback(log.err)
608 cl.interrupt_then_kill(1.0)
609 reactor.callLater(2.0, reactor.stop)
610 signal.signal(signal.SIGINT,shutdown)
611 d = ssh_set.start(clusterfile['send_furl'])
612 return d
613
614 def delay_start(cont_pid):
615 reactor.callLater(1.0, start_engines, cont_pid)
616
617 dstart.addCallback(delay_start)
618 dstart.addErrback(lambda f: f.raiseException())
619
620
440 def get_args():
621 def get_args():
441 base_parser = argparse.ArgumentParser(add_help=False)
622 base_parser = argparse.ArgumentParser(add_help=False)
442 base_parser.add_argument(
623 base_parser.add_argument(
443 '-x',
624 '-x',
444 action='store_true',
625 action='store_true',
445 dest='x',
626 dest='x',
446 help='turn off client security'
627 help='turn off client security'
447 )
628 )
448 base_parser.add_argument(
629 base_parser.add_argument(
449 '-y',
630 '-y',
450 action='store_true',
631 action='store_true',
451 dest='y',
632 dest='y',
452 help='turn off engine security'
633 help='turn off engine security'
453 )
634 )
454 base_parser.add_argument(
635 base_parser.add_argument(
455 "--logdir",
636 "--logdir",
456 type=str,
637 type=str,
457 dest="logdir",
638 dest="logdir",
458 help="directory to put log files (default=$IPYTHONDIR/log)",
639 help="directory to put log files (default=$IPYTHONDIR/log)",
459 default=pjoin(get_ipython_dir(),'log')
640 default=pjoin(get_ipython_dir(),'log')
460 )
641 )
461 base_parser.add_argument(
642 base_parser.add_argument(
462 "-n",
643 "-n",
463 "--num",
644 "--num",
464 type=int,
645 type=int,
465 dest="n",
646 dest="n",
466 default=2,
647 default=2,
467 help="the number of engines to start"
648 help="the number of engines to start"
468 )
649 )
469
650
470 parser = argparse.ArgumentParser(
651 parser = argparse.ArgumentParser(
471 description='IPython cluster startup. This starts a controller and\
652 description='IPython cluster startup. This starts a controller and\
472 engines using various approaches. THIS IS A TECHNOLOGY PREVIEW AND\
653 engines using various approaches. THIS IS A TECHNOLOGY PREVIEW AND\
473 THE API WILL CHANGE SIGNIFICANTLY BEFORE THE FINAL RELEASE.'
654 THE API WILL CHANGE SIGNIFICANTLY BEFORE THE FINAL RELEASE.'
474 )
655 )
475 subparsers = parser.add_subparsers(
656 subparsers = parser.add_subparsers(
476 help='available cluster types. For help, do "ipcluster TYPE --help"')
657 help='available cluster types. For help, do "ipcluster TYPE --help"')
477
658
478 parser_local = subparsers.add_parser(
659 parser_local = subparsers.add_parser(
479 'local',
660 'local',
480 help='run a local cluster',
661 help='run a local cluster',
481 parents=[base_parser]
662 parents=[base_parser]
482 )
663 )
483 parser_local.set_defaults(func=main_local)
664 parser_local.set_defaults(func=main_local)
484
665
485 parser_mpirun = subparsers.add_parser(
666 parser_mpirun = subparsers.add_parser(
486 'mpirun',
667 'mpirun',
487 help='run a cluster using mpirun',
668 help='run a cluster using mpirun',
488 parents=[base_parser]
669 parents=[base_parser]
489 )
670 )
490 parser_mpirun.add_argument(
671 parser_mpirun.add_argument(
491 "--mpi",
672 "--mpi",
492 type=str,
673 type=str,
493 dest="mpi", # Don't put a default here to allow no MPI support
674 dest="mpi", # Don't put a default here to allow no MPI support
494 help="how to call MPI_Init (default=mpi4py)"
675 help="how to call MPI_Init (default=mpi4py)"
495 )
676 )
496 parser_mpirun.set_defaults(func=main_mpirun)
677 parser_mpirun.set_defaults(func=main_mpirun)
497
678
498 parser_pbs = subparsers.add_parser(
679 parser_pbs = subparsers.add_parser(
499 'pbs',
680 'pbs',
500 help='run a pbs cluster',
681 help='run a pbs cluster',
501 parents=[base_parser]
682 parents=[base_parser]
502 )
683 )
503 parser_pbs.add_argument(
684 parser_pbs.add_argument(
504 '--pbs-script',
685 '--pbs-script',
505 type=str,
686 type=str,
506 dest='pbsscript',
687 dest='pbsscript',
507 help='PBS script template',
688 help='PBS script template',
508 default='pbs.template'
689 default='pbs.template'
509 )
690 )
510 parser_pbs.set_defaults(func=main_pbs)
691 parser_pbs.set_defaults(func=main_pbs)
692
693 parser_ssh = subparsers.add_parser(
694 'ssh',
695 help='run a cluster using ssh, should have ssh-keys setup',
696 parents=[base_parser]
697 )
698 parser_ssh.add_argument(
699 '--clusterfile',
700 type=str,
701 dest='clusterfile',
702 help='python file describing the cluster',
703 default='clusterfile.py',
704 )
705 parser_ssh.add_argument(
706 '--sshx',
707 type=str,
708 dest='sshx',
709 help='sshx launcher helper'
710 )
711 parser_ssh.set_defaults(func=main_ssh)
712
511 args = parser.parse_args()
713 args = parser.parse_args()
512 return args
714 return args
513
715
514 def main():
716 def main():
515 args = get_args()
717 args = get_args()
516 reactor.callWhenRunning(args.func, args)
718 reactor.callWhenRunning(args.func, args)
517 log.startLogging(sys.stdout)
719 log.startLogging(sys.stdout)
518 reactor.run()
720 reactor.run()
519
721
520 if __name__ == '__main__':
722 if __name__ == '__main__':
521 main()
723 main()
@@ -1,393 +1,398 b''
1 .. _changes:
1 .. _changes:
2
2
3 ==========
3 ==========
4 What's new
4 What's new
5 ==========
5 ==========
6
6
7 .. contents::
7 .. contents::
8 ..
8 ..
9 1 Release 0.9.1
9 1 Release 0.9.1
10 2 Release 0.9
10 2 Release 0.9
11 2.1 New features
11 2.1 New features
12 2.2 Bug fixes
12 2.2 Bug fixes
13 2.3 Backwards incompatible changes
13 2.3 Backwards incompatible changes
14 2.4 Changes merged in from IPython1
14 2.4 Changes merged in from IPython1
15 2.4.1 New features
15 2.4.1 New features
16 2.4.2 Bug fixes
16 2.4.2 Bug fixes
17 2.4.3 Backwards incompatible changes
17 2.4.3 Backwards incompatible changes
18 3 Release 0.8.4
18 3 Release 0.8.4
19 4 Release 0.8.3
19 4 Release 0.8.3
20 5 Release 0.8.2
20 5 Release 0.8.2
21 6 Older releases
21 6 Older releases
22 ..
22 ..
23
23
24 Release dev
24 Release dev
25 ===========
25 ===========
26
26
27 New features
27 New features
28 ------------
28 ------------
29
29
30 * The new ipcluster now has a fully working ssh mode that should work on
31 Linux, Unix and OS X. Thanks to Vishal Vatsa for implementing this!
32
30 * The wonderful TextMate editor can now be used with %edit on OS X. Thanks
33 * The wonderful TextMate editor can now be used with %edit on OS X. Thanks
31 to Matt Foster for this patch.
34 to Matt Foster for this patch.
32
35
33 * Fully refactored :command:`ipcluster` command line program for starting
36 * Fully refactored :command:`ipcluster` command line program for starting
34 IPython clusters. This new version is a complete rewrite and 1) is fully
37 IPython clusters. This new version is a complete rewrite and 1) is fully
35 cross platform (we now use Twisted's process management), 2) has much
38 cross platform (we now use Twisted's process management), 2) has much
36 improved performance, 3) uses subcommands for different types of clusters,
39 improved performance, 3) uses subcommands for different types of clusters,
37 4) uses argparse for parsing command line options, 5) has better support
40 4) uses argparse for parsing command line options, 5) has better support
38 for starting clusters using :command:`mpirun`, 6) has experimental support
41 for starting clusters using :command:`mpirun`, 6) has experimental support
39 for starting engines using PBS. However, this new version of ipcluster
42 for starting engines using PBS. However, this new version of ipcluster
40 should be considered a technology preview. We plan on changing the API
43 should be considered a technology preview. We plan on changing the API
41 in significant ways before it is final.
44 in significant ways before it is final.
42
45
43 * The :mod:`argparse` module has been added to :mod:`IPython.external`.
46 * The :mod:`argparse` module has been added to :mod:`IPython.external`.
44
47
45 * Fully description of the security model added to the docs.
48 * Fully description of the security model added to the docs.
46
49
47 * cd completer: show bookmarks if no other completions are available.
50 * cd completer: show bookmarks if no other completions are available.
48
51
49 * sh profile: easy way to give 'title' to prompt: assign to variable
52 * sh profile: easy way to give 'title' to prompt: assign to variable
50 '_prompt_title'. It looks like this::
53 '_prompt_title'. It looks like this::
51
54
52 [~]|1> _prompt_title = 'sudo!'
55 [~]|1> _prompt_title = 'sudo!'
53 sudo![~]|2>
56 sudo![~]|2>
54
57
55 * %edit: If you do '%edit pasted_block', pasted_block
58 * %edit: If you do '%edit pasted_block', pasted_block
56 variable gets updated with new data (so repeated
59 variable gets updated with new data (so repeated
57 editing makes sense)
60 editing makes sense)
58
61
59 Bug fixes
62 Bug fixes
60 ---------
63 ---------
61
64
65 * Numerous bugs on Windows with the new ipcluster have been fixed.
66
62 * The ipengine and ipcontroller scripts now handle missing furl files
67 * The ipengine and ipcontroller scripts now handle missing furl files
63 more gracefully by giving better error messages.
68 more gracefully by giving better error messages.
64
69
65 * %rehashx: Aliases no longer contain dots. python3.0 binary
70 * %rehashx: Aliases no longer contain dots. python3.0 binary
66 will create alias python30. Fixes:
71 will create alias python30. Fixes:
67 #259716 "commands with dots in them don't work"
72 #259716 "commands with dots in them don't work"
68
73
69 * %cpaste: %cpaste -r repeats the last pasted block.
74 * %cpaste: %cpaste -r repeats the last pasted block.
70 The block is assigned to pasted_block even if code
75 The block is assigned to pasted_block even if code
71 raises exception.
76 raises exception.
72
77
73 Backwards incompatible changes
78 Backwards incompatible changes
74 ------------------------------
79 ------------------------------
75
80
76 * The controller now has a ``-r`` flag that needs to be used if you want to
81 * The controller now has a ``-r`` flag that needs to be used if you want to
77 reuse existing furl files. Otherwise they are deleted (the default).
82 reuse existing furl files. Otherwise they are deleted (the default).
78
83
79 * Remove ipy_leo.py. "easy_install ipython-extension" to get it.
84 * Remove ipy_leo.py. "easy_install ipython-extension" to get it.
80 (done to decouple it from ipython release cycle)
85 (done to decouple it from ipython release cycle)
81
86
82
87
83
88
84 Release 0.9.1
89 Release 0.9.1
85 =============
90 =============
86
91
87 This release was quickly made to restore compatibility with Python 2.4, which
92 This release was quickly made to restore compatibility with Python 2.4, which
88 version 0.9 accidentally broke. No new features were introduced, other than
93 version 0.9 accidentally broke. No new features were introduced, other than
89 some additional testing support for internal use.
94 some additional testing support for internal use.
90
95
91
96
92 Release 0.9
97 Release 0.9
93 ===========
98 ===========
94
99
95 New features
100 New features
96 ------------
101 ------------
97
102
98 * All furl files and security certificates are now put in a read-only
103 * All furl files and security certificates are now put in a read-only
99 directory named ~./ipython/security.
104 directory named ~./ipython/security.
100
105
101 * A single function :func:`get_ipython_dir`, in :mod:`IPython.genutils` that
106 * A single function :func:`get_ipython_dir`, in :mod:`IPython.genutils` that
102 determines the user's IPython directory in a robust manner.
107 determines the user's IPython directory in a robust manner.
103
108
104 * Laurent's WX application has been given a top-level script called
109 * Laurent's WX application has been given a top-level script called
105 ipython-wx, and it has received numerous fixes. We expect this code to be
110 ipython-wx, and it has received numerous fixes. We expect this code to be
106 architecturally better integrated with Gael's WX 'ipython widget' over the
111 architecturally better integrated with Gael's WX 'ipython widget' over the
107 next few releases.
112 next few releases.
108
113
109 * The Editor synchronization work by Vivian De Smedt has been merged in. This
114 * The Editor synchronization work by Vivian De Smedt has been merged in. This
110 code adds a number of new editor hooks to synchronize with editors under
115 code adds a number of new editor hooks to synchronize with editors under
111 Windows.
116 Windows.
112
117
113 * A new, still experimental but highly functional, WX shell by Gael Varoquaux.
118 * A new, still experimental but highly functional, WX shell by Gael Varoquaux.
114 This work was sponsored by Enthought, and while it's still very new, it is
119 This work was sponsored by Enthought, and while it's still very new, it is
115 based on a more cleanly organized arhictecture of the various IPython
120 based on a more cleanly organized arhictecture of the various IPython
116 components. We will continue to develop this over the next few releases as a
121 components. We will continue to develop this over the next few releases as a
117 model for GUI components that use IPython.
122 model for GUI components that use IPython.
118
123
119 * Another GUI frontend, Cocoa based (Cocoa is the OSX native GUI framework),
124 * Another GUI frontend, Cocoa based (Cocoa is the OSX native GUI framework),
120 authored by Barry Wark. Currently the WX and the Cocoa ones have slightly
125 authored by Barry Wark. Currently the WX and the Cocoa ones have slightly
121 different internal organizations, but the whole team is working on finding
126 different internal organizations, but the whole team is working on finding
122 what the right abstraction points are for a unified codebase.
127 what the right abstraction points are for a unified codebase.
123
128
124 * As part of the frontend work, Barry Wark also implemented an experimental
129 * As part of the frontend work, Barry Wark also implemented an experimental
125 event notification system that various ipython components can use. In the
130 event notification system that various ipython components can use. In the
126 next release the implications and use patterns of this system regarding the
131 next release the implications and use patterns of this system regarding the
127 various GUI options will be worked out.
132 various GUI options will be worked out.
128
133
129 * IPython finally has a full test system, that can test docstrings with
134 * IPython finally has a full test system, that can test docstrings with
130 IPython-specific functionality. There are still a few pieces missing for it
135 IPython-specific functionality. There are still a few pieces missing for it
131 to be widely accessible to all users (so they can run the test suite at any
136 to be widely accessible to all users (so they can run the test suite at any
132 time and report problems), but it now works for the developers. We are
137 time and report problems), but it now works for the developers. We are
133 working hard on continuing to improve it, as this was probably IPython's
138 working hard on continuing to improve it, as this was probably IPython's
134 major Achilles heel (the lack of proper test coverage made it effectively
139 major Achilles heel (the lack of proper test coverage made it effectively
135 impossible to do large-scale refactoring). The full test suite can now
140 impossible to do large-scale refactoring). The full test suite can now
136 be run using the :command:`iptest` command line program.
141 be run using the :command:`iptest` command line program.
137
142
138 * The notion of a task has been completely reworked. An `ITask` interface has
143 * The notion of a task has been completely reworked. An `ITask` interface has
139 been created. This interface defines the methods that tasks need to
144 been created. This interface defines the methods that tasks need to
140 implement. These methods are now responsible for things like submitting
145 implement. These methods are now responsible for things like submitting
141 tasks and processing results. There are two basic task types:
146 tasks and processing results. There are two basic task types:
142 :class:`IPython.kernel.task.StringTask` (this is the old `Task` object, but
147 :class:`IPython.kernel.task.StringTask` (this is the old `Task` object, but
143 renamed) and the new :class:`IPython.kernel.task.MapTask`, which is based on
148 renamed) and the new :class:`IPython.kernel.task.MapTask`, which is based on
144 a function.
149 a function.
145
150
146 * A new interface, :class:`IPython.kernel.mapper.IMapper` has been defined to
151 * A new interface, :class:`IPython.kernel.mapper.IMapper` has been defined to
147 standardize the idea of a `map` method. This interface has a single `map`
152 standardize the idea of a `map` method. This interface has a single `map`
148 method that has the same syntax as the built-in `map`. We have also defined
153 method that has the same syntax as the built-in `map`. We have also defined
149 a `mapper` factory interface that creates objects that implement
154 a `mapper` factory interface that creates objects that implement
150 :class:`IPython.kernel.mapper.IMapper` for different controllers. Both the
155 :class:`IPython.kernel.mapper.IMapper` for different controllers. Both the
151 multiengine and task controller now have mapping capabilties.
156 multiengine and task controller now have mapping capabilties.
152
157
153 * The parallel function capabilities have been reworks. The major changes are
158 * The parallel function capabilities have been reworks. The major changes are
154 that i) there is now an `@parallel` magic that creates parallel functions,
159 that i) there is now an `@parallel` magic that creates parallel functions,
155 ii) the syntax for mulitple variable follows that of `map`, iii) both the
160 ii) the syntax for mulitple variable follows that of `map`, iii) both the
156 multiengine and task controller now have a parallel function implementation.
161 multiengine and task controller now have a parallel function implementation.
157
162
158 * All of the parallel computing capabilities from `ipython1-dev` have been
163 * All of the parallel computing capabilities from `ipython1-dev` have been
159 merged into IPython proper. This resulted in the following new subpackages:
164 merged into IPython proper. This resulted in the following new subpackages:
160 :mod:`IPython.kernel`, :mod:`IPython.kernel.core`, :mod:`IPython.config`,
165 :mod:`IPython.kernel`, :mod:`IPython.kernel.core`, :mod:`IPython.config`,
161 :mod:`IPython.tools` and :mod:`IPython.testing`.
166 :mod:`IPython.tools` and :mod:`IPython.testing`.
162
167
163 * As part of merging in the `ipython1-dev` stuff, the `setup.py` script and
168 * As part of merging in the `ipython1-dev` stuff, the `setup.py` script and
164 friends have been completely refactored. Now we are checking for
169 friends have been completely refactored. Now we are checking for
165 dependencies using the approach that matplotlib uses.
170 dependencies using the approach that matplotlib uses.
166
171
167 * The documentation has been completely reorganized to accept the
172 * The documentation has been completely reorganized to accept the
168 documentation from `ipython1-dev`.
173 documentation from `ipython1-dev`.
169
174
170 * We have switched to using Foolscap for all of our network protocols in
175 * We have switched to using Foolscap for all of our network protocols in
171 :mod:`IPython.kernel`. This gives us secure connections that are both
176 :mod:`IPython.kernel`. This gives us secure connections that are both
172 encrypted and authenticated.
177 encrypted and authenticated.
173
178
174 * We have a brand new `COPYING.txt` files that describes the IPython license
179 * We have a brand new `COPYING.txt` files that describes the IPython license
175 and copyright. The biggest change is that we are putting "The IPython
180 and copyright. The biggest change is that we are putting "The IPython
176 Development Team" as the copyright holder. We give more details about
181 Development Team" as the copyright holder. We give more details about
177 exactly what this means in this file. All developer should read this and use
182 exactly what this means in this file. All developer should read this and use
178 the new banner in all IPython source code files.
183 the new banner in all IPython source code files.
179
184
180 * sh profile: ./foo runs foo as system command, no need to do !./foo anymore
185 * sh profile: ./foo runs foo as system command, no need to do !./foo anymore
181
186
182 * String lists now support ``sort(field, nums = True)`` method (to easily sort
187 * String lists now support ``sort(field, nums = True)`` method (to easily sort
183 system command output). Try it with ``a = !ls -l ; a.sort(1, nums=1)``.
188 system command output). Try it with ``a = !ls -l ; a.sort(1, nums=1)``.
184
189
185 * '%cpaste foo' now assigns the pasted block as string list, instead of string
190 * '%cpaste foo' now assigns the pasted block as string list, instead of string
186
191
187 * The ipcluster script now run by default with no security. This is done
192 * The ipcluster script now run by default with no security. This is done
188 because the main usage of the script is for starting things on localhost.
193 because the main usage of the script is for starting things on localhost.
189 Eventually when ipcluster is able to start things on other hosts, we will put
194 Eventually when ipcluster is able to start things on other hosts, we will put
190 security back.
195 security back.
191
196
192 * 'cd --foo' searches directory history for string foo, and jumps to that dir.
197 * 'cd --foo' searches directory history for string foo, and jumps to that dir.
193 Last part of dir name is checked first. If no matches for that are found,
198 Last part of dir name is checked first. If no matches for that are found,
194 look at the whole path.
199 look at the whole path.
195
200
196
201
197 Bug fixes
202 Bug fixes
198 ---------
203 ---------
199
204
200 * The Windows installer has been fixed. Now all IPython scripts have ``.bat``
205 * The Windows installer has been fixed. Now all IPython scripts have ``.bat``
201 versions created. Also, the Start Menu shortcuts have been updated.
206 versions created. Also, the Start Menu shortcuts have been updated.
202
207
203 * The colors escapes in the multiengine client are now turned off on win32 as
208 * The colors escapes in the multiengine client are now turned off on win32 as
204 they don't print correctly.
209 they don't print correctly.
205
210
206 * The :mod:`IPython.kernel.scripts.ipengine` script was exec'ing
211 * The :mod:`IPython.kernel.scripts.ipengine` script was exec'ing
207 mpi_import_statement incorrectly, which was leading the engine to crash when
212 mpi_import_statement incorrectly, which was leading the engine to crash when
208 mpi was enabled.
213 mpi was enabled.
209
214
210 * A few subpackages had missing ``__init__.py`` files.
215 * A few subpackages had missing ``__init__.py`` files.
211
216
212 * The documentation is only created if Sphinx is found. Previously, the
217 * The documentation is only created if Sphinx is found. Previously, the
213 ``setup.py`` script would fail if it was missing.
218 ``setup.py`` script would fail if it was missing.
214
219
215 * Greedy ``cd`` completion has been disabled again (it was enabled in 0.8.4) as
220 * Greedy ``cd`` completion has been disabled again (it was enabled in 0.8.4) as
216 it caused problems on certain platforms.
221 it caused problems on certain platforms.
217
222
218
223
219 Backwards incompatible changes
224 Backwards incompatible changes
220 ------------------------------
225 ------------------------------
221
226
222 * The ``clusterfile`` options of the :command:`ipcluster` command has been
227 * The ``clusterfile`` options of the :command:`ipcluster` command has been
223 removed as it was not working and it will be replaced soon by something much
228 removed as it was not working and it will be replaced soon by something much
224 more robust.
229 more robust.
225
230
226 * The :mod:`IPython.kernel` configuration now properly find the user's
231 * The :mod:`IPython.kernel` configuration now properly find the user's
227 IPython directory.
232 IPython directory.
228
233
229 * In ipapi, the :func:`make_user_ns` function has been replaced with
234 * In ipapi, the :func:`make_user_ns` function has been replaced with
230 :func:`make_user_namespaces`, to support dict subclasses in namespace
235 :func:`make_user_namespaces`, to support dict subclasses in namespace
231 creation.
236 creation.
232
237
233 * :class:`IPython.kernel.client.Task` has been renamed
238 * :class:`IPython.kernel.client.Task` has been renamed
234 :class:`IPython.kernel.client.StringTask` to make way for new task types.
239 :class:`IPython.kernel.client.StringTask` to make way for new task types.
235
240
236 * The keyword argument `style` has been renamed `dist` in `scatter`, `gather`
241 * The keyword argument `style` has been renamed `dist` in `scatter`, `gather`
237 and `map`.
242 and `map`.
238
243
239 * Renamed the values that the rename `dist` keyword argument can have from
244 * Renamed the values that the rename `dist` keyword argument can have from
240 `'basic'` to `'b'`.
245 `'basic'` to `'b'`.
241
246
242 * IPython has a larger set of dependencies if you want all of its capabilities.
247 * IPython has a larger set of dependencies if you want all of its capabilities.
243 See the `setup.py` script for details.
248 See the `setup.py` script for details.
244
249
245 * The constructors for :class:`IPython.kernel.client.MultiEngineClient` and
250 * The constructors for :class:`IPython.kernel.client.MultiEngineClient` and
246 :class:`IPython.kernel.client.TaskClient` no longer take the (ip,port) tuple.
251 :class:`IPython.kernel.client.TaskClient` no longer take the (ip,port) tuple.
247 Instead they take the filename of a file that contains the FURL for that
252 Instead they take the filename of a file that contains the FURL for that
248 client. If the FURL file is in your IPYTHONDIR, it will be found automatically
253 client. If the FURL file is in your IPYTHONDIR, it will be found automatically
249 and the constructor can be left empty.
254 and the constructor can be left empty.
250
255
251 * The asynchronous clients in :mod:`IPython.kernel.asyncclient` are now created
256 * The asynchronous clients in :mod:`IPython.kernel.asyncclient` are now created
252 using the factory functions :func:`get_multiengine_client` and
257 using the factory functions :func:`get_multiengine_client` and
253 :func:`get_task_client`. These return a `Deferred` to the actual client.
258 :func:`get_task_client`. These return a `Deferred` to the actual client.
254
259
255 * The command line options to `ipcontroller` and `ipengine` have changed to
260 * The command line options to `ipcontroller` and `ipengine` have changed to
256 reflect the new Foolscap network protocol and the FURL files. Please see the
261 reflect the new Foolscap network protocol and the FURL files. Please see the
257 help for these scripts for details.
262 help for these scripts for details.
258
263
259 * The configuration files for the kernel have changed because of the Foolscap
264 * The configuration files for the kernel have changed because of the Foolscap
260 stuff. If you were using custom config files before, you should delete them
265 stuff. If you were using custom config files before, you should delete them
261 and regenerate new ones.
266 and regenerate new ones.
262
267
263 Changes merged in from IPython1
268 Changes merged in from IPython1
264 -------------------------------
269 -------------------------------
265
270
266 New features
271 New features
267 ............
272 ............
268
273
269 * Much improved ``setup.py`` and ``setupegg.py`` scripts. Because Twisted and
274 * Much improved ``setup.py`` and ``setupegg.py`` scripts. Because Twisted and
270 zope.interface are now easy installable, we can declare them as dependencies
275 zope.interface are now easy installable, we can declare them as dependencies
271 in our setupegg.py script.
276 in our setupegg.py script.
272
277
273 * IPython is now compatible with Twisted 2.5.0 and 8.x.
278 * IPython is now compatible with Twisted 2.5.0 and 8.x.
274
279
275 * Added a new example of how to use :mod:`ipython1.kernel.asynclient`.
280 * Added a new example of how to use :mod:`ipython1.kernel.asynclient`.
276
281
277 * Initial draft of a process daemon in :mod:`ipython1.daemon`. This has not
282 * Initial draft of a process daemon in :mod:`ipython1.daemon`. This has not
278 been merged into IPython and is still in `ipython1-dev`.
283 been merged into IPython and is still in `ipython1-dev`.
279
284
280 * The ``TaskController`` now has methods for getting the queue status.
285 * The ``TaskController`` now has methods for getting the queue status.
281
286
282 * The ``TaskResult`` objects not have information about how long the task
287 * The ``TaskResult`` objects not have information about how long the task
283 took to run.
288 took to run.
284
289
285 * We are attaching additional attributes to exceptions ``(_ipython_*)`` that
290 * We are attaching additional attributes to exceptions ``(_ipython_*)`` that
286 we use to carry additional info around.
291 we use to carry additional info around.
287
292
288 * New top-level module :mod:`asyncclient` that has asynchronous versions (that
293 * New top-level module :mod:`asyncclient` that has asynchronous versions (that
289 return deferreds) of the client classes. This is designed to users who want
294 return deferreds) of the client classes. This is designed to users who want
290 to run their own Twisted reactor.
295 to run their own Twisted reactor.
291
296
292 * All the clients in :mod:`client` are now based on Twisted. This is done by
297 * All the clients in :mod:`client` are now based on Twisted. This is done by
293 running the Twisted reactor in a separate thread and using the
298 running the Twisted reactor in a separate thread and using the
294 :func:`blockingCallFromThread` function that is in recent versions of Twisted.
299 :func:`blockingCallFromThread` function that is in recent versions of Twisted.
295
300
296 * Functions can now be pushed/pulled to/from engines using
301 * Functions can now be pushed/pulled to/from engines using
297 :meth:`MultiEngineClient.push_function` and
302 :meth:`MultiEngineClient.push_function` and
298 :meth:`MultiEngineClient.pull_function`.
303 :meth:`MultiEngineClient.pull_function`.
299
304
300 * Gather/scatter are now implemented in the client to reduce the work load
305 * Gather/scatter are now implemented in the client to reduce the work load
301 of the controller and improve performance.
306 of the controller and improve performance.
302
307
303 * Complete rewrite of the IPython docuementation. All of the documentation
308 * Complete rewrite of the IPython docuementation. All of the documentation
304 from the IPython website has been moved into docs/source as restructured
309 from the IPython website has been moved into docs/source as restructured
305 text documents. PDF and HTML documentation are being generated using
310 text documents. PDF and HTML documentation are being generated using
306 Sphinx.
311 Sphinx.
307
312
308 * New developer oriented documentation: development guidelines and roadmap.
313 * New developer oriented documentation: development guidelines and roadmap.
309
314
310 * Traditional ``ChangeLog`` has been changed to a more useful ``changes.txt``
315 * Traditional ``ChangeLog`` has been changed to a more useful ``changes.txt``
311 file that is organized by release and is meant to provide something more
316 file that is organized by release and is meant to provide something more
312 relevant for users.
317 relevant for users.
313
318
314 Bug fixes
319 Bug fixes
315 .........
320 .........
316
321
317 * Created a proper ``MANIFEST.in`` file to create source distributions.
322 * Created a proper ``MANIFEST.in`` file to create source distributions.
318
323
319 * Fixed a bug in the ``MultiEngine`` interface. Previously, multi-engine
324 * Fixed a bug in the ``MultiEngine`` interface. Previously, multi-engine
320 actions were being collected with a :class:`DeferredList` with
325 actions were being collected with a :class:`DeferredList` with
321 ``fireononeerrback=1``. This meant that methods were returning
326 ``fireononeerrback=1``. This meant that methods were returning
322 before all engines had given their results. This was causing extremely odd
327 before all engines had given their results. This was causing extremely odd
323 bugs in certain cases. To fix this problem, we have 1) set
328 bugs in certain cases. To fix this problem, we have 1) set
324 ``fireononeerrback=0`` to make sure all results (or exceptions) are in
329 ``fireononeerrback=0`` to make sure all results (or exceptions) are in
325 before returning and 2) introduced a :exc:`CompositeError` exception
330 before returning and 2) introduced a :exc:`CompositeError` exception
326 that wraps all of the engine exceptions. This is a huge change as it means
331 that wraps all of the engine exceptions. This is a huge change as it means
327 that users will have to catch :exc:`CompositeError` rather than the actual
332 that users will have to catch :exc:`CompositeError` rather than the actual
328 exception.
333 exception.
329
334
330 Backwards incompatible changes
335 Backwards incompatible changes
331 ..............................
336 ..............................
332
337
333 * All names have been renamed to conform to the lowercase_with_underscore
338 * All names have been renamed to conform to the lowercase_with_underscore
334 convention. This will require users to change references to all names like
339 convention. This will require users to change references to all names like
335 ``queueStatus`` to ``queue_status``.
340 ``queueStatus`` to ``queue_status``.
336
341
337 * Previously, methods like :meth:`MultiEngineClient.push` and
342 * Previously, methods like :meth:`MultiEngineClient.push` and
338 :meth:`MultiEngineClient.push` used ``*args`` and ``**kwargs``. This was
343 :meth:`MultiEngineClient.push` used ``*args`` and ``**kwargs``. This was
339 becoming a problem as we weren't able to introduce new keyword arguments into
344 becoming a problem as we weren't able to introduce new keyword arguments into
340 the API. Now these methods simple take a dict or sequence. This has also
345 the API. Now these methods simple take a dict or sequence. This has also
341 allowed us to get rid of the ``*All`` methods like :meth:`pushAll` and
346 allowed us to get rid of the ``*All`` methods like :meth:`pushAll` and
342 :meth:`pullAll`. These things are now handled with the ``targets`` keyword
347 :meth:`pullAll`. These things are now handled with the ``targets`` keyword
343 argument that defaults to ``'all'``.
348 argument that defaults to ``'all'``.
344
349
345 * The :attr:`MultiEngineClient.magicTargets` has been renamed to
350 * The :attr:`MultiEngineClient.magicTargets` has been renamed to
346 :attr:`MultiEngineClient.targets`.
351 :attr:`MultiEngineClient.targets`.
347
352
348 * All methods in the MultiEngine interface now accept the optional keyword
353 * All methods in the MultiEngine interface now accept the optional keyword
349 argument ``block``.
354 argument ``block``.
350
355
351 * Renamed :class:`RemoteController` to :class:`MultiEngineClient` and
356 * Renamed :class:`RemoteController` to :class:`MultiEngineClient` and
352 :class:`TaskController` to :class:`TaskClient`.
357 :class:`TaskController` to :class:`TaskClient`.
353
358
354 * Renamed the top-level module from :mod:`api` to :mod:`client`.
359 * Renamed the top-level module from :mod:`api` to :mod:`client`.
355
360
356 * Most methods in the multiengine interface now raise a :exc:`CompositeError`
361 * Most methods in the multiengine interface now raise a :exc:`CompositeError`
357 exception that wraps the user's exceptions, rather than just raising the raw
362 exception that wraps the user's exceptions, rather than just raising the raw
358 user's exception.
363 user's exception.
359
364
360 * Changed the ``setupNS`` and ``resultNames`` in the ``Task`` class to ``push``
365 * Changed the ``setupNS`` and ``resultNames`` in the ``Task`` class to ``push``
361 and ``pull``.
366 and ``pull``.
362
367
363
368
364 Release 0.8.4
369 Release 0.8.4
365 =============
370 =============
366
371
367 This was a quick release to fix an unfortunate bug that slipped into the 0.8.3
372 This was a quick release to fix an unfortunate bug that slipped into the 0.8.3
368 release. The ``--twisted`` option was disabled, as it turned out to be broken
373 release. The ``--twisted`` option was disabled, as it turned out to be broken
369 across several platforms.
374 across several platforms.
370
375
371
376
372 Release 0.8.3
377 Release 0.8.3
373 =============
378 =============
374
379
375 * pydb is now disabled by default (due to %run -d problems). You can enable
380 * pydb is now disabled by default (due to %run -d problems). You can enable
376 it by passing -pydb command line argument to IPython. Note that setting
381 it by passing -pydb command line argument to IPython. Note that setting
377 it in config file won't work.
382 it in config file won't work.
378
383
379
384
380 Release 0.8.2
385 Release 0.8.2
381 =============
386 =============
382
387
383 * %pushd/%popd behave differently; now "pushd /foo" pushes CURRENT directory
388 * %pushd/%popd behave differently; now "pushd /foo" pushes CURRENT directory
384 and jumps to /foo. The current behaviour is closer to the documented
389 and jumps to /foo. The current behaviour is closer to the documented
385 behaviour, and should not trip anyone.
390 behaviour, and should not trip anyone.
386
391
387
392
388 Older releases
393 Older releases
389 ==============
394 ==============
390
395
391 Changes in earlier releases of IPython are described in the older file
396 Changes in earlier releases of IPython are described in the older file
392 ``ChangeLog``. Please refer to this document for details.
397 ``ChangeLog``. Please refer to this document for details.
393
398
@@ -1,251 +1,324 b''
1 .. _parallel_process:
1 .. _parallel_process:
2
2
3 ===========================================
3 ===========================================
4 Starting the IPython controller and engines
4 Starting the IPython controller and engines
5 ===========================================
5 ===========================================
6
6
7 To use IPython for parallel computing, you need to start one instance of
7 To use IPython for parallel computing, you need to start one instance of
8 the controller and one or more instances of the engine. The controller
8 the controller and one or more instances of the engine. The controller
9 and each engine can run on different machines or on the same machine.
9 and each engine can run on different machines or on the same machine.
10 Because of this, there are many different possibilities.
10 Because of this, there are many different possibilities.
11
11
12 Broadly speaking, there are two ways of going about starting a controller and engines:
12 Broadly speaking, there are two ways of going about starting a controller and engines:
13
13
14 * In an automated manner using the :command:`ipcluster` command.
14 * In an automated manner using the :command:`ipcluster` command.
15 * In a more manual way using the :command:`ipcontroller` and
15 * In a more manual way using the :command:`ipcontroller` and
16 :command:`ipengine` commands.
16 :command:`ipengine` commands.
17
17
18 This document describes both of these methods. We recommend that new users start with the :command:`ipcluster` command as it simplifies many common usage cases.
18 This document describes both of these methods. We recommend that new users start with the :command:`ipcluster` command as it simplifies many common usage cases.
19
19
20 General considerations
20 General considerations
21 ======================
21 ======================
22
22
23 Before delving into the details about how you can start a controller and engines using the various methods, we outline some of the general issues that come up when starting the controller and engines. These things come up no matter which method you use to start your IPython cluster.
23 Before delving into the details about how you can start a controller and engines using the various methods, we outline some of the general issues that come up when starting the controller and engines. These things come up no matter which method you use to start your IPython cluster.
24
24
25 Let's say that you want to start the controller on ``host0`` and engines on hosts ``host1``-``hostn``. The following steps are then required:
25 Let's say that you want to start the controller on ``host0`` and engines on hosts ``host1``-``hostn``. The following steps are then required:
26
26
27 1. Start the controller on ``host0`` by running :command:`ipcontroller` on
27 1. Start the controller on ``host0`` by running :command:`ipcontroller` on
28 ``host0``.
28 ``host0``.
29 2. Move the FURL file (:file:`ipcontroller-engine.furl`) created by the
29 2. Move the FURL file (:file:`ipcontroller-engine.furl`) created by the
30 controller from ``host0`` to hosts ``host1``-``hostn``.
30 controller from ``host0`` to hosts ``host1``-``hostn``.
31 3. Start the engines on hosts ``host1``-``hostn`` by running
31 3. Start the engines on hosts ``host1``-``hostn`` by running
32 :command:`ipengine`. This command has to be told where the FURL file
32 :command:`ipengine`. This command has to be told where the FURL file
33 (:file:`ipcontroller-engine.furl`) is located.
33 (:file:`ipcontroller-engine.furl`) is located.
34
34
35 At this point, the controller and engines will be connected. By default, the
35 At this point, the controller and engines will be connected. By default, the
36 FURL files created by the controller are put into the
36 FURL files created by the controller are put into the
37 :file:`~/.ipython/security` directory. If the engines share a filesystem with
37 :file:`~/.ipython/security` directory. If the engines share a filesystem with
38 the controller, step 2 can be skipped as the engines will automatically look
38 the controller, step 2 can be skipped as the engines will automatically look
39 at that location.
39 at that location.
40
40
41 The final step required required to actually use the running controller from a
41 The final step required required to actually use the running controller from a
42 client is to move the FURL files :file:`ipcontroller-mec.furl` and
42 client is to move the FURL files :file:`ipcontroller-mec.furl` and
43 :file:`ipcontroller-tc.furl` from ``host0`` to the host where the clients will
43 :file:`ipcontroller-tc.furl` from ``host0`` to the host where the clients will
44 be run. If these file are put into the :file:`~/.ipython/security` directory of the client's host, they will be found automatically. Otherwise, the full path to them has to be passed to the client's constructor.
44 be run. If these file are put into the :file:`~/.ipython/security` directory of the client's host, they will be found automatically. Otherwise, the full path to them has to be passed to the client's constructor.
45
45
46 Using :command:`ipcluster`
46 Using :command:`ipcluster`
47 ==========================
47 ==========================
48
48
49 The :command:`ipcluster` command provides a simple way of starting a controller and engines in the following situations:
49 The :command:`ipcluster` command provides a simple way of starting a controller and engines in the following situations:
50
50
51 1. When the controller and engines are all run on localhost. This is useful
51 1. When the controller and engines are all run on localhost. This is useful
52 for testing or running on a multicore computer.
52 for testing or running on a multicore computer.
53 2. When engines are started using the :command:`mpirun` command that comes
53 2. When engines are started using the :command:`mpirun` command that comes
54 with most MPI [MPI]_ implementations
54 with most MPI [MPI]_ implementations
55 3. When engines are started using the PBS [PBS]_ batch system.
55 3. When engines are started using the PBS [PBS]_ batch system.
56 4. When the controller is started on localhost and the engines are started on
57 remote nodes using :command:`ssh`.
56
58
57 .. note::
59 .. note::
58
60
59 It is also possible for advanced users to add support to
61 It is also possible for advanced users to add support to
60 :command:`ipcluster` for starting controllers and engines using other
62 :command:`ipcluster` for starting controllers and engines using other
61 methods (like Sun's Grid Engine for example).
63 methods (like Sun's Grid Engine for example).
62
64
63 .. note::
65 .. note::
64
66
65 Currently :command:`ipcluster` requires that the
67 Currently :command:`ipcluster` requires that the
66 :file:`~/.ipython/security` directory live on a shared filesystem that is
68 :file:`~/.ipython/security` directory live on a shared filesystem that is
67 seen by both the controller and engines. If you don't have a shared file
69 seen by both the controller and engines. If you don't have a shared file
68 system you will need to use :command:`ipcontroller` and
70 system you will need to use :command:`ipcontroller` and
69 :command:`ipengine` directly.
71 :command:`ipengine` directly. This constraint can be relaxed if you are
72 using the :command:`ssh` method to start the cluster.
70
73
71 Underneath the hood, :command:`ipcluster` just uses :command:`ipcontroller`
74 Underneath the hood, :command:`ipcluster` just uses :command:`ipcontroller`
72 and :command:`ipengine` to perform the steps described above.
75 and :command:`ipengine` to perform the steps described above.
73
76
74 Using :command:`ipcluster` in local mode
77 Using :command:`ipcluster` in local mode
75 ----------------------------------------
78 ----------------------------------------
76
79
77 To start one controller and 4 engines on localhost, just do::
80 To start one controller and 4 engines on localhost, just do::
78
81
79 $ ipcluster local -n 4
82 $ ipcluster local -n 4
80
83
81 To see other command line options for the local mode, do::
84 To see other command line options for the local mode, do::
82
85
83 $ ipcluster local -h
86 $ ipcluster local -h
84
87
85 Using :command:`ipcluster` in mpirun mode
88 Using :command:`ipcluster` in mpirun mode
86 -----------------------------------------
89 -----------------------------------------
87
90
88 The mpirun mode is useful if you:
91 The mpirun mode is useful if you:
89
92
90 1. Have MPI installed.
93 1. Have MPI installed.
91 2. Your systems are configured to use the :command:`mpirun` command to start
94 2. Your systems are configured to use the :command:`mpirun` command to start
92 processes.
95 processes.
93
96
94 If these are satisfied, you can start an IPython cluster using::
97 If these are satisfied, you can start an IPython cluster using::
95
98
96 $ ipcluster mpirun -n 4
99 $ ipcluster mpirun -n 4
97
100
98 This does the following:
101 This does the following:
99
102
100 1. Starts the IPython controller on current host.
103 1. Starts the IPython controller on current host.
101 2. Uses :command:`mpirun` to start 4 engines.
104 2. Uses :command:`mpirun` to start 4 engines.
102
105
103 On newer MPI implementations (such as OpenMPI), this will work even if you don't make any calls to MPI or call :func:`MPI_Init`. However, older MPI implementations actually require each process to call :func:`MPI_Init` upon starting. The easiest way of having this done is to install the mpi4py [mpi4py]_ package and then call ipcluster with the ``--mpi`` option::
106 On newer MPI implementations (such as OpenMPI), this will work even if you don't make any calls to MPI or call :func:`MPI_Init`. However, older MPI implementations actually require each process to call :func:`MPI_Init` upon starting. The easiest way of having this done is to install the mpi4py [mpi4py]_ package and then call ipcluster with the ``--mpi`` option::
104
107
105 $ ipcluster mpirun -n 4 --mpi=mpi4py
108 $ ipcluster mpirun -n 4 --mpi=mpi4py
106
109
107 Unfortunately, even this won't work for some MPI implementations. If you are having problems with this, you will likely have to use a custom Python executable that itself calls :func:`MPI_Init` at the appropriate time. Fortunately, mpi4py comes with such a custom Python executable that is easy to install and use. However, this custom Python executable approach will not work with :command:`ipcluster` currently.
110 Unfortunately, even this won't work for some MPI implementations. If you are having problems with this, you will likely have to use a custom Python executable that itself calls :func:`MPI_Init` at the appropriate time. Fortunately, mpi4py comes with such a custom Python executable that is easy to install and use. However, this custom Python executable approach will not work with :command:`ipcluster` currently.
108
111
109 Additional command line options for this mode can be found by doing::
112 Additional command line options for this mode can be found by doing::
110
113
111 $ ipcluster mpirun -h
114 $ ipcluster mpirun -h
112
115
113 More details on using MPI with IPython can be found :ref:`here <parallelmpi>`.
116 More details on using MPI with IPython can be found :ref:`here <parallelmpi>`.
114
117
115
118
116 Using :command:`ipcluster` in PBS mode
119 Using :command:`ipcluster` in PBS mode
117 --------------------------------------
120 --------------------------------------
118
121
119 The PBS mode uses the Portable Batch System [PBS]_ to start the engines. To use this mode, you first need to create a PBS script template that will be used to start the engines. Here is a sample PBS script template:
122 The PBS mode uses the Portable Batch System [PBS]_ to start the engines. To use this mode, you first need to create a PBS script template that will be used to start the engines. Here is a sample PBS script template:
120
123
121 .. sourcecode:: bash
124 .. sourcecode:: bash
122
125
123 #PBS -N ipython
126 #PBS -N ipython
124 #PBS -j oe
127 #PBS -j oe
125 #PBS -l walltime=00:10:00
128 #PBS -l walltime=00:10:00
126 #PBS -l nodes=${n/4}:ppn=4
129 #PBS -l nodes=${n/4}:ppn=4
127 #PBS -q parallel
130 #PBS -q parallel
128
131
129 cd $$PBS_O_WORKDIR
132 cd $$PBS_O_WORKDIR
130 export PATH=$$HOME/usr/local/bin
133 export PATH=$$HOME/usr/local/bin
131 export PYTHONPATH=$$HOME/usr/local/lib/python2.4/site-packages
134 export PYTHONPATH=$$HOME/usr/local/lib/python2.4/site-packages
132 /usr/local/bin/mpiexec -n ${n} ipengine --logfile=$$PBS_O_WORKDIR/ipengine
135 /usr/local/bin/mpiexec -n ${n} ipengine --logfile=$$PBS_O_WORKDIR/ipengine
133
136
134 There are a few important points about this template:
137 There are a few important points about this template:
135
138
136 1. This template will be rendered at runtime using IPython's :mod:`Itpl`
139 1. This template will be rendered at runtime using IPython's :mod:`Itpl`
137 template engine.
140 template engine.
138
141
139 2. Instead of putting in the actual number of engines, use the notation
142 2. Instead of putting in the actual number of engines, use the notation
140 ``${n}`` to indicate the number of engines to be started. You can also uses
143 ``${n}`` to indicate the number of engines to be started. You can also uses
141 expressions like ``${n/4}`` in the template to indicate the number of
144 expressions like ``${n/4}`` in the template to indicate the number of
142 nodes.
145 nodes.
143
146
144 3. Because ``$`` is a special character used by the template engine, you must
147 3. Because ``$`` is a special character used by the template engine, you must
145 escape any ``$`` by using ``$$``. This is important when referring to
148 escape any ``$`` by using ``$$``. This is important when referring to
146 environment variables in the template.
149 environment variables in the template.
147
150
148 4. Any options to :command:`ipengine` should be given in the batch script
151 4. Any options to :command:`ipengine` should be given in the batch script
149 template.
152 template.
150
153
151 5. Depending on the configuration of you system, you may have to set
154 5. Depending on the configuration of you system, you may have to set
152 environment variables in the script template.
155 environment variables in the script template.
153
156
154 Once you have created such a script, save it with a name like :file:`pbs.template`. Now you are ready to start your job::
157 Once you have created such a script, save it with a name like :file:`pbs.template`. Now you are ready to start your job::
155
158
156 $ ipcluster pbs -n 128 --pbs-script=pbs.template
159 $ ipcluster pbs -n 128 --pbs-script=pbs.template
157
160
158 Additional command line options for this mode can be found by doing::
161 Additional command line options for this mode can be found by doing::
159
162
160 $ ipcluster pbs -h
163 $ ipcluster pbs -h
161
164
165 Using :command:`ipcluster` in SSH mode
166 --------------------------------------
167
168 The SSH mode uses :command:`ssh` to execute :command:`ipengine` on remote
169 nodes and the :command:`ipcontroller` on localhost.
170
171 When using using this mode it highly recommended that you have set up SSH keys and are using ssh-agent [SSH]_ for password-less logins.
172
173 To use this mode you need a python file describing the cluster, here is an example of such a "clusterfile":
174
175 .. sourcecode:: python
176
177 send_furl = True
178 engines = { 'host1.example.com' : 2,
179 'host2.example.com' : 5,
180 'host3.example.com' : 1,
181 'host4.example.com' : 8 }
182
183 Since this is a regular python file usual python syntax applies. Things to note:
184
185 * The `engines` dict, where the keys is the host we want to run engines on and
186 the value is the number of engines to run on that host.
187 * send_furl can either be `True` or `False`, if `True` it will copy over the
188 furl needed for :command:`ipengine` to each host.
189
190 The ``--clusterfile`` command line option lets you specify the file to use for
191 the cluster definition. Once you have your cluster file and you can
192 :command:`ssh` into the remote hosts with out an password you are ready to
193 start your cluster like so:
194
195 .. sourcecode:: bash
196
197 $ ipcluster ssh --clusterfile /path/to/my/clusterfile.py
198
199
200 Two helper shell scripts are used to start and stop :command:`ipengine` on remote hosts:
201
202 * sshx.sh
203 * engine_killer.sh
204
205 Defaults for both of these are contained in the source code for :command:`ipcluster`. The default scripts are written to a local file in a tmep directory and then copied to a temp directory on the remote host and executed from there. On most Unix, Linux and OS X systems this is /tmp.
206
207 The default sshx.sh is the following:
208
209 .. sourcecode:: bash
210
211 #!/bin/sh
212 "$@" &> /dev/null &
213 echo $!
214
215 If you want to use a custom sshx.sh script you need to use the ``--sshx``
216 option and specify the file to use. Using a custom sshx.sh file could be
217 helpful when you need to setup the environment on the remote host before
218 executing :command:`ipengine`.
219
220 For a detailed options list:
221
222 .. sourcecode:: bash
223
224 $ ipcluster ssh -h
225
226 Current limitations of the SSH mode of :command:`ipcluster` are:
227
228 * Untested on Windows. Would require a working :command:`ssh` on Windows.
229 Also, we are using shell scripts to setup and execute commands on remote
230 hosts.
231 * :command:`ipcontroller` is started on localhost, with no option to start it
232 on a remote node.
233
162 Using the :command:`ipcontroller` and :command:`ipengine` commands
234 Using the :command:`ipcontroller` and :command:`ipengine` commands
163 ==================================================================
235 ==================================================================
164
236
165 It is also possible to use the :command:`ipcontroller` and :command:`ipengine` commands to start your controller and engines. This approach gives you full control over all aspects of the startup process.
237 It is also possible to use the :command:`ipcontroller` and :command:`ipengine` commands to start your controller and engines. This approach gives you full control over all aspects of the startup process.
166
238
167 Starting the controller and engine on your local machine
239 Starting the controller and engine on your local machine
168 --------------------------------------------------------
240 --------------------------------------------------------
169
241
170 To use :command:`ipcontroller` and :command:`ipengine` to start things on your
242 To use :command:`ipcontroller` and :command:`ipengine` to start things on your
171 local machine, do the following.
243 local machine, do the following.
172
244
173 First start the controller::
245 First start the controller::
174
246
175 $ ipcontroller
247 $ ipcontroller
176
248
177 Next, start however many instances of the engine you want using (repeatedly) the command::
249 Next, start however many instances of the engine you want using (repeatedly) the command::
178
250
179 $ ipengine
251 $ ipengine
180
252
181 The engines should start and automatically connect to the controller using the FURL files in :file:`~./ipython/security`. You are now ready to use the controller and engines from IPython.
253 The engines should start and automatically connect to the controller using the FURL files in :file:`~./ipython/security`. You are now ready to use the controller and engines from IPython.
182
254
183 .. warning::
255 .. warning::
184
256
185 The order of the above operations is very important. You *must*
257 The order of the above operations is very important. You *must*
186 start the controller before the engines, since the engines connect
258 start the controller before the engines, since the engines connect
187 to the controller as they get started.
259 to the controller as they get started.
188
260
189 .. note::
261 .. note::
190
262
191 On some platforms (OS X), to put the controller and engine into the
263 On some platforms (OS X), to put the controller and engine into the
192 background you may need to give these commands in the form ``(ipcontroller
264 background you may need to give these commands in the form ``(ipcontroller
193 &)`` and ``(ipengine &)`` (with the parentheses) for them to work
265 &)`` and ``(ipengine &)`` (with the parentheses) for them to work
194 properly.
266 properly.
195
267
196 Starting the controller and engines on different hosts
268 Starting the controller and engines on different hosts
197 ------------------------------------------------------
269 ------------------------------------------------------
198
270
199 When the controller and engines are running on different hosts, things are
271 When the controller and engines are running on different hosts, things are
200 slightly more complicated, but the underlying ideas are the same:
272 slightly more complicated, but the underlying ideas are the same:
201
273
202 1. Start the controller on a host using :command:`ipcontroller`.
274 1. Start the controller on a host using :command:`ipcontroller`.
203 2. Copy :file:`ipcontroller-engine.furl` from :file:`~./ipython/security` on the controller's host to the host where the engines will run.
275 2. Copy :file:`ipcontroller-engine.furl` from :file:`~./ipython/security` on the controller's host to the host where the engines will run.
204 3. Use :command:`ipengine` on the engine's hosts to start the engines.
276 3. Use :command:`ipengine` on the engine's hosts to start the engines.
205
277
206 The only thing you have to be careful of is to tell :command:`ipengine` where the :file:`ipcontroller-engine.furl` file is located. There are two ways you can do this:
278 The only thing you have to be careful of is to tell :command:`ipengine` where the :file:`ipcontroller-engine.furl` file is located. There are two ways you can do this:
207
279
208 * Put :file:`ipcontroller-engine.furl` in the :file:`~./ipython/security`
280 * Put :file:`ipcontroller-engine.furl` in the :file:`~./ipython/security`
209 directory on the engine's host, where it will be found automatically.
281 directory on the engine's host, where it will be found automatically.
210 * Call :command:`ipengine` with the ``--furl-file=full_path_to_the_file``
282 * Call :command:`ipengine` with the ``--furl-file=full_path_to_the_file``
211 flag.
283 flag.
212
284
213 The ``--furl-file`` flag works like this::
285 The ``--furl-file`` flag works like this::
214
286
215 $ ipengine --furl-file=/path/to/my/ipcontroller-engine.furl
287 $ ipengine --furl-file=/path/to/my/ipcontroller-engine.furl
216
288
217 .. note::
289 .. note::
218
290
219 If the controller's and engine's hosts all have a shared file system
291 If the controller's and engine's hosts all have a shared file system
220 (:file:`~./ipython/security` is the same on all of them), then things
292 (:file:`~./ipython/security` is the same on all of them), then things
221 will just work!
293 will just work!
222
294
223 Make FURL files persistent
295 Make FURL files persistent
224 ---------------------------
296 ---------------------------
225
297
226 At fist glance it may seem that that managing the FURL files is a bit annoying. Going back to the house and key analogy, copying the FURL around each time you start the controller is like having to make a new key every time you want to unlock the door and enter your house. As with your house, you want to be able to create the key (or FURL file) once, and then simply use it at any point in the future.
298 At fist glance it may seem that that managing the FURL files is a bit annoying. Going back to the house and key analogy, copying the FURL around each time you start the controller is like having to make a new key every time you want to unlock the door and enter your house. As with your house, you want to be able to create the key (or FURL file) once, and then simply use it at any point in the future.
227
299
228 This is possible. The only thing you have to do is decide what ports the controller will listen on for the engines and clients. This is done as follows::
300 This is possible. The only thing you have to do is decide what ports the controller will listen on for the engines and clients. This is done as follows::
229
301
230 $ ipcontroller -r --client-port=10101 --engine-port=10102
302 $ ipcontroller -r --client-port=10101 --engine-port=10102
231
303
232 Then, just copy the furl files over the first time and you are set. You can start and stop the controller and engines any many times as you want in the future, just make sure to tell the controller to use the *same* ports.
304 Then, just copy the furl files over the first time and you are set. You can start and stop the controller and engines any many times as you want in the future, just make sure to tell the controller to use the *same* ports.
233
305
234 .. note::
306 .. note::
235
307
236 You may ask the question: what ports does the controller listen on if you
308 You may ask the question: what ports does the controller listen on if you
237 don't tell is to use specific ones? The default is to use high random port
309 don't tell is to use specific ones? The default is to use high random port
238 numbers. We do this for two reasons: i) to increase security through
310 numbers. We do this for two reasons: i) to increase security through
239 obscurity and ii) to multiple controllers on a given host to start and
311 obscurity and ii) to multiple controllers on a given host to start and
240 automatically use different ports.
312 automatically use different ports.
241
313
242 Log files
314 Log files
243 ---------
315 ---------
244
316
245 All of the components of IPython have log files associated with them.
317 All of the components of IPython have log files associated with them.
246 These log files can be extremely useful in debugging problems with
318 These log files can be extremely useful in debugging problems with
247 IPython and can be found in the directory :file:`~/.ipython/log`. Sending
319 IPython and can be found in the directory :file:`~/.ipython/log`. Sending
248 the log files to us will often help us to debug any problems.
320 the log files to us will often help us to debug any problems.
249
321
250
322
251 .. [PBS] Portable Batch System. http://www.openpbs.org/
323 .. [PBS] Portable Batch System. http://www.openpbs.org/
324 .. [SSH] SSH-Agent http://en.wikipedia.org/wiki/Ssh-agent
General Comments 0
You need to be logged in to leave comments. Login now