Show More
@@ -693,7 +693,7 b' class QueuedEngine(object):' | |||||
693 | @queue |
|
693 | @queue | |
694 | def execute(self, lines): |
|
694 | def execute(self, lines): | |
695 | pass |
|
695 | pass | |
696 |
|
696 | |||
697 | @queue |
|
697 | @queue | |
698 | def push(self, namespace): |
|
698 | def push(self, namespace): | |
699 | pass |
|
699 | pass |
@@ -335,7 +335,7 b' class MultiEngine(ControllerAdapterBase):' | |||||
335 | #--------------------------------------------------------------------------- |
|
335 | #--------------------------------------------------------------------------- | |
336 | # IEngineMultiplexer methods |
|
336 | # IEngineMultiplexer methods | |
337 | #--------------------------------------------------------------------------- |
|
337 | #--------------------------------------------------------------------------- | |
338 |
|
|
338 | ||
339 | def execute(self, lines, targets='all'): |
|
339 | def execute(self, lines, targets='all'): | |
340 | return self._performOnEnginesAndGatherBoth('execute', lines, targets=targets) |
|
340 | return self._performOnEnginesAndGatherBoth('execute', lines, targets=targets) | |
341 |
|
341 |
@@ -131,7 +131,7 b' class FCSynchronousMultiEngineFromMultiEngine(Referenceable):' | |||||
131 | def _addDeferredIDCallback(self, did, callback, *args, **kwargs): |
|
131 | def _addDeferredIDCallback(self, did, callback, *args, **kwargs): | |
132 | self._deferredIDCallbacks[did] = (callback, args, kwargs) |
|
132 | self._deferredIDCallbacks[did] = (callback, args, kwargs) | |
133 | return did |
|
133 | return did | |
134 |
|
|
134 | ||
135 | #--------------------------------------------------------------------------- |
|
135 | #--------------------------------------------------------------------------- | |
136 | # IEngineMultiplexer related methods |
|
136 | # IEngineMultiplexer related methods | |
137 | #--------------------------------------------------------------------------- |
|
137 | #--------------------------------------------------------------------------- | |
@@ -346,7 +346,7 b' class FCFullSynchronousMultiEngineClient(object):' | |||||
346 | #--------------------------------------------------------------------------- |
|
346 | #--------------------------------------------------------------------------- | |
347 | # IEngineMultiplexer related methods |
|
347 | # IEngineMultiplexer related methods | |
348 | #--------------------------------------------------------------------------- |
|
348 | #--------------------------------------------------------------------------- | |
349 |
|
|
349 | ||
350 | def execute(self, lines, targets='all', block=True): |
|
350 | def execute(self, lines, targets='all', block=True): | |
351 | d = self.remote_reference.callRemote('execute', lines, targets, block) |
|
351 | d = self.remote_reference.callRemote('execute', lines, targets, block) | |
352 | d.addCallback(self.unpackage) |
|
352 | d.addCallback(self.unpackage) |
@@ -18,19 +18,24 b' import os' | |||||
18 | import re |
|
18 | import re | |
19 | import sys |
|
19 | import sys | |
20 | import signal |
|
20 | import signal | |
|
21 | import tempfile | |||
21 | pjoin = os.path.join |
|
22 | pjoin = os.path.join | |
22 |
|
23 | |||
23 | from twisted.internet import reactor, defer |
|
24 | from twisted.internet import reactor, defer | |
24 | from twisted.internet.protocol import ProcessProtocol |
|
25 | from twisted.internet.protocol import ProcessProtocol | |
25 | from twisted.python import failure, log |
|
|||
26 | from twisted.internet.error import ProcessDone, ProcessTerminated |
|
26 | from twisted.internet.error import ProcessDone, ProcessTerminated | |
27 | from twisted.internet.utils import getProcessOutput |
|
27 | from twisted.internet.utils import getProcessOutput | |
|
28 | from twisted.python import failure, log | |||
28 |
|
29 | |||
29 | from IPython.external import argparse |
|
30 | from IPython.external import argparse | |
30 | from IPython.external import Itpl |
|
31 | from IPython.external import Itpl | |
|
32 | from IPython.genutils import get_ipython_dir, num_cpus | |||
|
33 | from IPython.kernel.fcutil import have_crypto | |||
|
34 | from IPython.kernel.error import SecurityError | |||
|
35 | from IPython.kernel.fcutil import have_crypto | |||
31 | from IPython.kernel.twistedutil import gatherBoth |
|
36 | from IPython.kernel.twistedutil import gatherBoth | |
32 | from IPython.kernel.util import printer |
|
37 | from IPython.kernel.util import printer | |
33 | from IPython.genutils import get_ipython_dir, num_cpus |
|
38 | ||
34 |
|
39 | |||
35 | #----------------------------------------------------------------------------- |
|
40 | #----------------------------------------------------------------------------- | |
36 | # General process handling code |
|
41 | # General process handling code | |
@@ -42,8 +47,11 b' def find_exe(cmd):' | |||||
42 | except ImportError: |
|
47 | except ImportError: | |
43 | raise ImportError('you need to have pywin32 installed for this to work') |
|
48 | raise ImportError('you need to have pywin32 installed for this to work') | |
44 | else: |
|
49 | else: | |
45 | (path, offest) = win32api.SearchPath(os.environ['PATH'],cmd) |
|
50 | try: | |
46 | return path |
|
51 | (path, offest) = win32api.SearchPath(os.environ['PATH'],cmd + '.exe') | |
|
52 | except: | |||
|
53 | (path, offset) = win32api.SearchPath(os.environ['PATH'],cmd + '.bat') | |||
|
54 | return path | |||
47 |
|
55 | |||
48 | class ProcessStateError(Exception): |
|
56 | class ProcessStateError(Exception): | |
49 | pass |
|
57 | pass | |
@@ -74,10 +82,10 b' class LauncherProcessProtocol(ProcessProtocol):' | |||||
74 | ) |
|
82 | ) | |
75 | else: |
|
83 | else: | |
76 | raise UnknownStatus("unknown exit status, this is probably a bug in Twisted") |
|
84 | raise UnknownStatus("unknown exit status, this is probably a bug in Twisted") | |
77 |
|
85 | |||
78 | def outReceived(self, data): |
|
86 | def outReceived(self, data): | |
79 | log.msg(data) |
|
87 | log.msg(data) | |
80 |
|
88 | |||
81 | def errReceived(self, data): |
|
89 | def errReceived(self, data): | |
82 | log.err(data) |
|
90 | log.err(data) | |
83 |
|
91 | |||
@@ -171,7 +179,13 b' class ControllerLauncher(ProcessLauncher):' | |||||
171 |
|
179 | |||
172 | def __init__(self, extra_args=None): |
|
180 | def __init__(self, extra_args=None): | |
173 | if sys.platform == 'win32': |
|
181 | if sys.platform == 'win32': | |
174 | args = [find_exe('ipcontroller.bat')] |
|
182 | # This logic is needed because the ipcontroller script doesn't | |
|
183 | # always get installed in the same way or in the same location. | |||
|
184 | from IPython.kernel.scripts import ipcontroller | |||
|
185 | script_location = ipcontroller.__file__.replace('.pyc', '.py') | |||
|
186 | # The -u option here turns on unbuffered output, which is required | |||
|
187 | # on Win32 to prevent wierd conflict and problems with Twisted | |||
|
188 | args = [find_exe('python'), '-u', script_location] | |||
175 | else: |
|
189 | else: | |
176 | args = ['ipcontroller'] |
|
190 | args = ['ipcontroller'] | |
177 | self.extra_args = extra_args |
|
191 | self.extra_args = extra_args | |
@@ -185,7 +199,13 b' class EngineLauncher(ProcessLauncher):' | |||||
185 |
|
199 | |||
186 | def __init__(self, extra_args=None): |
|
200 | def __init__(self, extra_args=None): | |
187 | if sys.platform == 'win32': |
|
201 | if sys.platform == 'win32': | |
188 | args = [find_exe('ipengine.bat')] |
|
202 | # This logic is needed because the ipcontroller script doesn't | |
|
203 | # always get installed in the same way or in the same location. | |||
|
204 | from IPython.kernel.scripts import ipengine | |||
|
205 | script_location = ipengine.__file__.replace('.pyc', '.py') | |||
|
206 | # The -u option here turns on unbuffered output, which is required | |||
|
207 | # on Win32 to prevent wierd conflict and problems with Twisted | |||
|
208 | args = [find_exe('python'), '-u', script_location] | |||
189 | else: |
|
209 | else: | |
190 | args = ['ipengine'] |
|
210 | args = ['ipengine'] | |
191 | self.extra_args = extra_args |
|
211 | self.extra_args = extra_args | |
@@ -253,7 +273,7 b' class BatchEngineSet(object):' | |||||
253 | self.context = {} |
|
273 | self.context = {} | |
254 | self.context.update(kwargs) |
|
274 | self.context.update(kwargs) | |
255 | self.batch_file = self.template_file+'-run' |
|
275 | self.batch_file = self.template_file+'-run' | |
256 |
|
276 | |||
257 | def parse_job_id(self, output): |
|
277 | def parse_job_id(self, output): | |
258 | m = re.match(self.job_id_regexp, output) |
|
278 | m = re.match(self.job_id_regexp, output) | |
259 | if m is not None: |
|
279 | if m is not None: | |
@@ -273,7 +293,7 b' class BatchEngineSet(object):' | |||||
273 | f = open(self.batch_file,'w') |
|
293 | f = open(self.batch_file,'w') | |
274 | f.write(script_as_string) |
|
294 | f.write(script_as_string) | |
275 | f.close() |
|
295 | f.close() | |
276 |
|
296 | |||
277 | def handle_error(self, f): |
|
297 | def handle_error(self, f): | |
278 | f.printTraceback() |
|
298 | f.printTraceback() | |
279 | f.raiseException() |
|
299 | f.raiseException() | |
@@ -285,7 +305,7 b' class BatchEngineSet(object):' | |||||
285 | d.addCallback(self.parse_job_id) |
|
305 | d.addCallback(self.parse_job_id) | |
286 | d.addErrback(self.handle_error) |
|
306 | d.addErrback(self.handle_error) | |
287 | return d |
|
307 | return d | |
288 |
|
|
308 | ||
289 | def kill(self): |
|
309 | def kill(self): | |
290 | d = getProcessOutput(self.delete_command, |
|
310 | d = getProcessOutput(self.delete_command, | |
291 | [self.job_id],env=os.environ) |
|
311 | [self.job_id],env=os.environ) | |
@@ -301,6 +321,140 b' class PBSEngineSet(BatchEngineSet):' | |||||
301 | BatchEngineSet.__init__(self, template_file, **kwargs) |
|
321 | BatchEngineSet.__init__(self, template_file, **kwargs) | |
302 |
|
322 | |||
303 |
|
323 | |||
|
324 | sshx_template="""#!/bin/sh | |||
|
325 | "$@" &> /dev/null & | |||
|
326 | echo $! | |||
|
327 | """ | |||
|
328 | ||||
|
329 | engine_killer_template="""#!/bin/sh | |||
|
330 | ps -fu `whoami` | grep '[i]pengine' | awk '{print $2}' | xargs kill -TERM | |||
|
331 | """ | |||
|
332 | ||||
|
333 | class SSHEngineSet(object): | |||
|
334 | sshx_template=sshx_template | |||
|
335 | engine_killer_template=engine_killer_template | |||
|
336 | ||||
|
337 | def __init__(self, engine_hosts, sshx=None, ipengine="ipengine"): | |||
|
338 | """Start a controller on localhost and engines using ssh. | |||
|
339 | ||||
|
340 | The engine_hosts argument is a dict with hostnames as keys and | |||
|
341 | the number of engine (int) as values. sshx is the name of a local | |||
|
342 | file that will be used to run remote commands. This file is used | |||
|
343 | to setup the environment properly. | |||
|
344 | """ | |||
|
345 | ||||
|
346 | self.temp_dir = tempfile.gettempdir() | |||
|
347 | if sshx is not None: | |||
|
348 | self.sshx = sshx | |||
|
349 | else: | |||
|
350 | # Write the sshx.sh file locally from our template. | |||
|
351 | self.sshx = os.path.join( | |||
|
352 | self.temp_dir, | |||
|
353 | '%s-main-sshx.sh' % os.environ['USER'] | |||
|
354 | ) | |||
|
355 | f = open(self.sshx, 'w') | |||
|
356 | f.writelines(self.sshx_template) | |||
|
357 | f.close() | |||
|
358 | self.engine_command = ipengine | |||
|
359 | self.engine_hosts = engine_hosts | |||
|
360 | # Write the engine killer script file locally from our template. | |||
|
361 | self.engine_killer = os.path.join( | |||
|
362 | self.temp_dir, | |||
|
363 | '%s-local-engine_killer.sh' % os.environ['USER'] | |||
|
364 | ) | |||
|
365 | f = open(self.engine_killer, 'w') | |||
|
366 | f.writelines(self.engine_killer_template) | |||
|
367 | f.close() | |||
|
368 | ||||
|
369 | def start(self, send_furl=False): | |||
|
370 | dlist = [] | |||
|
371 | for host in self.engine_hosts.keys(): | |||
|
372 | count = self.engine_hosts[host] | |||
|
373 | d = self._start(host, count, send_furl) | |||
|
374 | dlist.append(d) | |||
|
375 | return gatherBoth(dlist, consumeErrors=True) | |||
|
376 | ||||
|
377 | def _start(self, hostname, count=1, send_furl=False): | |||
|
378 | if send_furl: | |||
|
379 | d = self._scp_furl(hostname) | |||
|
380 | else: | |||
|
381 | d = defer.succeed(None) | |||
|
382 | d.addCallback(lambda r: self._scp_sshx(hostname)) | |||
|
383 | d.addCallback(lambda r: self._ssh_engine(hostname, count)) | |||
|
384 | return d | |||
|
385 | ||||
|
386 | def _scp_furl(self, hostname): | |||
|
387 | scp_cmd = "scp ~/.ipython/security/ipcontroller-engine.furl %s:.ipython/security/" % (hostname) | |||
|
388 | cmd_list = scp_cmd.split() | |||
|
389 | cmd_list[1] = os.path.expanduser(cmd_list[1]) | |||
|
390 | log.msg('Copying furl file: %s' % scp_cmd) | |||
|
391 | d = getProcessOutput(cmd_list[0], cmd_list[1:], env=os.environ) | |||
|
392 | return d | |||
|
393 | ||||
|
394 | def _scp_sshx(self, hostname): | |||
|
395 | scp_cmd = "scp %s %s:%s/%s-sshx.sh" % ( | |||
|
396 | self.sshx, hostname, | |||
|
397 | self.temp_dir, os.environ['USER'] | |||
|
398 | ) | |||
|
399 | ||||
|
400 | log.msg("Copying sshx: %s" % scp_cmd) | |||
|
401 | sshx_scp = scp_cmd.split() | |||
|
402 | d = getProcessOutput(sshx_scp[0], sshx_scp[1:], env=os.environ) | |||
|
403 | return d | |||
|
404 | ||||
|
405 | def _ssh_engine(self, hostname, count): | |||
|
406 | exec_engine = "ssh %s sh %s/%s-sshx.sh %s" % ( | |||
|
407 | hostname, self.temp_dir, | |||
|
408 | os.environ['USER'], self.engine_command | |||
|
409 | ) | |||
|
410 | cmds = exec_engine.split() | |||
|
411 | dlist = [] | |||
|
412 | log.msg("about to start engines...") | |||
|
413 | for i in range(count): | |||
|
414 | log.msg('Starting engines: %s' % exec_engine) | |||
|
415 | d = getProcessOutput(cmds[0], cmds[1:], env=os.environ) | |||
|
416 | dlist.append(d) | |||
|
417 | return gatherBoth(dlist, consumeErrors=True) | |||
|
418 | ||||
|
419 | def kill(self): | |||
|
420 | dlist = [] | |||
|
421 | for host in self.engine_hosts.keys(): | |||
|
422 | d = self._killall(host) | |||
|
423 | dlist.append(d) | |||
|
424 | return gatherBoth(dlist, consumeErrors=True) | |||
|
425 | ||||
|
426 | def _killall(self, hostname): | |||
|
427 | d = self._scp_engine_killer(hostname) | |||
|
428 | d.addCallback(lambda r: self._ssh_kill(hostname)) | |||
|
429 | # d.addErrback(self._exec_err) | |||
|
430 | return d | |||
|
431 | ||||
|
432 | def _scp_engine_killer(self, hostname): | |||
|
433 | scp_cmd = "scp %s %s:%s/%s-engine_killer.sh" % ( | |||
|
434 | self.engine_killer, | |||
|
435 | hostname, | |||
|
436 | self.temp_dir, | |||
|
437 | os.environ['USER'] | |||
|
438 | ) | |||
|
439 | cmds = scp_cmd.split() | |||
|
440 | log.msg('Copying engine_killer: %s' % scp_cmd) | |||
|
441 | d = getProcessOutput(cmds[0], cmds[1:], env=os.environ) | |||
|
442 | return d | |||
|
443 | ||||
|
444 | def _ssh_kill(self, hostname): | |||
|
445 | kill_cmd = "ssh %s sh %s/%s-engine_killer.sh" % ( | |||
|
446 | hostname, | |||
|
447 | self.temp_dir, | |||
|
448 | os.environ['USER'] | |||
|
449 | ) | |||
|
450 | log.msg('Killing engine: %s' % kill_cmd) | |||
|
451 | kill_cmd = kill_cmd.split() | |||
|
452 | d = getProcessOutput(kill_cmd[0], kill_cmd[1:], env=os.environ) | |||
|
453 | return d | |||
|
454 | ||||
|
455 | def _exec_err(self, r): | |||
|
456 | log.msg(r) | |||
|
457 | ||||
304 | #----------------------------------------------------------------------------- |
|
458 | #----------------------------------------------------------------------------- | |
305 | # Main functions for the different types of clusters |
|
459 | # Main functions for the different types of clusters | |
306 | #----------------------------------------------------------------------------- |
|
460 | #----------------------------------------------------------------------------- | |
@@ -311,13 +465,28 b' class PBSEngineSet(BatchEngineSet):' | |||||
311 | # The main functions should then just parse the command line arguments, create |
|
465 | # The main functions should then just parse the command line arguments, create | |
312 | # the appropriate class and call a 'start' method. |
|
466 | # the appropriate class and call a 'start' method. | |
313 |
|
467 | |||
314 | def main_local(args): |
|
468 | def check_security(args, cont_args): | |
315 | cont_args = [] |
|
469 | if (not args.x or not args.y) and not have_crypto: | |
316 | cont_args.append('--logfile=%s' % pjoin(args.logdir,'ipcontroller')) |
|
470 | log.err(""" | |
|
471 | OpenSSL/pyOpenSSL is not available, so we can't run in secure mode. | |||
|
472 | Try running ipcluster with the -xy flags: ipcluster local -xy -n 4""") | |||
|
473 | reactor.stop() | |||
|
474 | return False | |||
317 | if args.x: |
|
475 | if args.x: | |
318 | cont_args.append('-x') |
|
476 | cont_args.append('-x') | |
319 | if args.y: |
|
477 | if args.y: | |
320 | cont_args.append('-y') |
|
478 | cont_args.append('-y') | |
|
479 | return True | |||
|
480 | ||||
|
481 | ||||
|
482 | def main_local(args): | |||
|
483 | cont_args = [] | |||
|
484 | cont_args.append('--logfile=%s' % pjoin(args.logdir,'ipcontroller')) | |||
|
485 | ||||
|
486 | # Check security settings before proceeding | |||
|
487 | if not check_security(args, cont_args): | |||
|
488 | return | |||
|
489 | ||||
321 | cl = ControllerLauncher(extra_args=cont_args) |
|
490 | cl = ControllerLauncher(extra_args=cont_args) | |
322 | dstart = cl.start() |
|
491 | dstart = cl.start() | |
323 | def start_engines(cont_pid): |
|
492 | def start_engines(cont_pid): | |
@@ -343,13 +512,15 b' def main_local(args):' | |||||
343 | dstart.addCallback(delay_start) |
|
512 | dstart.addCallback(delay_start) | |
344 | dstart.addErrback(lambda f: f.raiseException()) |
|
513 | dstart.addErrback(lambda f: f.raiseException()) | |
345 |
|
514 | |||
|
515 | ||||
346 | def main_mpirun(args): |
|
516 | def main_mpirun(args): | |
347 | cont_args = [] |
|
517 | cont_args = [] | |
348 | cont_args.append('--logfile=%s' % pjoin(args.logdir,'ipcontroller')) |
|
518 | cont_args.append('--logfile=%s' % pjoin(args.logdir,'ipcontroller')) | |
349 | if args.x: |
|
519 | ||
350 | cont_args.append('-x') |
|
520 | # Check security settings before proceeding | |
351 | if args.y: |
|
521 | if not check_security(args, cont_args): | |
352 | cont_args.append('-y') |
|
522 | return | |
|
523 | ||||
353 | cl = ControllerLauncher(extra_args=cont_args) |
|
524 | cl = ControllerLauncher(extra_args=cont_args) | |
354 | dstart = cl.start() |
|
525 | dstart = cl.start() | |
355 | def start_engines(cont_pid): |
|
526 | def start_engines(cont_pid): | |
@@ -379,13 +550,15 b' def main_mpirun(args):' | |||||
379 | dstart.addCallback(delay_start) |
|
550 | dstart.addCallback(delay_start) | |
380 | dstart.addErrback(lambda f: f.raiseException()) |
|
551 | dstart.addErrback(lambda f: f.raiseException()) | |
381 |
|
552 | |||
|
553 | ||||
382 | def main_pbs(args): |
|
554 | def main_pbs(args): | |
383 | cont_args = [] |
|
555 | cont_args = [] | |
384 | cont_args.append('--logfile=%s' % pjoin(args.logdir,'ipcontroller')) |
|
556 | cont_args.append('--logfile=%s' % pjoin(args.logdir,'ipcontroller')) | |
385 | if args.x: |
|
557 | ||
386 | cont_args.append('-x') |
|
558 | # Check security settings before proceeding | |
387 | if args.y: |
|
559 | if not check_security(args, cont_args): | |
388 | cont_args.append('-y') |
|
560 | return | |
|
561 | ||||
389 | cl = ControllerLauncher(extra_args=cont_args) |
|
562 | cl = ControllerLauncher(extra_args=cont_args) | |
390 | dstart = cl.start() |
|
563 | dstart = cl.start() | |
391 | def start_engines(r): |
|
564 | def start_engines(r): | |
@@ -402,6 +575,49 b' def main_pbs(args):' | |||||
402 | dstart.addErrback(lambda f: f.raiseException()) |
|
575 | dstart.addErrback(lambda f: f.raiseException()) | |
403 |
|
576 | |||
404 |
|
577 | |||
|
578 | def main_ssh(args): | |||
|
579 | """Start a controller on localhost and engines using ssh. | |||
|
580 | ||||
|
581 | Your clusterfile should look like:: | |||
|
582 | ||||
|
583 | send_furl = False # True, if you want | |||
|
584 | engines = { | |||
|
585 | 'engine_host1' : engine_count, | |||
|
586 | 'engine_host2' : engine_count2 | |||
|
587 | } | |||
|
588 | """ | |||
|
589 | clusterfile = {} | |||
|
590 | execfile(args.clusterfile, clusterfile) | |||
|
591 | if not clusterfile.has_key('send_furl'): | |||
|
592 | clusterfile['send_furl'] = False | |||
|
593 | ||||
|
594 | cont_args = [] | |||
|
595 | cont_args.append('--logfile=%s' % pjoin(args.logdir,'ipcontroller')) | |||
|
596 | ||||
|
597 | # Check security settings before proceeding | |||
|
598 | if not check_security(args, cont_args): | |||
|
599 | return | |||
|
600 | ||||
|
601 | cl = ControllerLauncher(extra_args=cont_args) | |||
|
602 | dstart = cl.start() | |||
|
603 | def start_engines(cont_pid): | |||
|
604 | ssh_set = SSHEngineSet(clusterfile['engines'], sshx=args.sshx) | |||
|
605 | def shutdown(signum, frame): | |||
|
606 | d = ssh_set.kill() | |||
|
607 | # d.addErrback(log.err) | |||
|
608 | cl.interrupt_then_kill(1.0) | |||
|
609 | reactor.callLater(2.0, reactor.stop) | |||
|
610 | signal.signal(signal.SIGINT,shutdown) | |||
|
611 | d = ssh_set.start(clusterfile['send_furl']) | |||
|
612 | return d | |||
|
613 | ||||
|
614 | def delay_start(cont_pid): | |||
|
615 | reactor.callLater(1.0, start_engines, cont_pid) | |||
|
616 | ||||
|
617 | dstart.addCallback(delay_start) | |||
|
618 | dstart.addErrback(lambda f: f.raiseException()) | |||
|
619 | ||||
|
620 | ||||
405 | def get_args(): |
|
621 | def get_args(): | |
406 | base_parser = argparse.ArgumentParser(add_help=False) |
|
622 | base_parser = argparse.ArgumentParser(add_help=False) | |
407 | base_parser.add_argument( |
|
623 | base_parser.add_argument( | |
@@ -473,6 +689,27 b' def get_args():' | |||||
473 | default='pbs.template' |
|
689 | default='pbs.template' | |
474 | ) |
|
690 | ) | |
475 | parser_pbs.set_defaults(func=main_pbs) |
|
691 | parser_pbs.set_defaults(func=main_pbs) | |
|
692 | ||||
|
693 | parser_ssh = subparsers.add_parser( | |||
|
694 | 'ssh', | |||
|
695 | help='run a cluster using ssh, should have ssh-keys setup', | |||
|
696 | parents=[base_parser] | |||
|
697 | ) | |||
|
698 | parser_ssh.add_argument( | |||
|
699 | '--clusterfile', | |||
|
700 | type=str, | |||
|
701 | dest='clusterfile', | |||
|
702 | help='python file describing the cluster', | |||
|
703 | default='clusterfile.py', | |||
|
704 | ) | |||
|
705 | parser_ssh.add_argument( | |||
|
706 | '--sshx', | |||
|
707 | type=str, | |||
|
708 | dest='sshx', | |||
|
709 | help='sshx launcher helper' | |||
|
710 | ) | |||
|
711 | parser_ssh.set_defaults(func=main_ssh) | |||
|
712 | ||||
476 | args = parser.parse_args() |
|
713 | args = parser.parse_args() | |
477 | return args |
|
714 | return args | |
478 |
|
715 |
@@ -27,6 +27,9 b' Release dev' | |||||
27 | New features |
|
27 | New features | |
28 | ------------ |
|
28 | ------------ | |
29 |
|
29 | |||
|
30 | * The new ipcluster now has a fully working ssh mode that should work on | |||
|
31 | Linux, Unix and OS X. Thanks to Vishal Vatsa for implementing this! | |||
|
32 | ||||
30 | * The wonderful TextMate editor can now be used with %edit on OS X. Thanks |
|
33 | * The wonderful TextMate editor can now be used with %edit on OS X. Thanks | |
31 | to Matt Foster for this patch. |
|
34 | to Matt Foster for this patch. | |
32 |
|
35 | |||
@@ -59,6 +62,8 b' New features' | |||||
59 | Bug fixes |
|
62 | Bug fixes | |
60 | --------- |
|
63 | --------- | |
61 |
|
64 | |||
|
65 | * Numerous bugs on Windows with the new ipcluster have been fixed. | |||
|
66 | ||||
62 | * The ipengine and ipcontroller scripts now handle missing furl files |
|
67 | * The ipengine and ipcontroller scripts now handle missing furl files | |
63 | more gracefully by giving better error messages. |
|
68 | more gracefully by giving better error messages. | |
64 |
|
69 |
@@ -53,6 +53,8 b' The :command:`ipcluster` command provides a simple way of starting a controller ' | |||||
53 | 2. When engines are started using the :command:`mpirun` command that comes |
|
53 | 2. When engines are started using the :command:`mpirun` command that comes | |
54 | with most MPI [MPI]_ implementations |
|
54 | with most MPI [MPI]_ implementations | |
55 | 3. When engines are started using the PBS [PBS]_ batch system. |
|
55 | 3. When engines are started using the PBS [PBS]_ batch system. | |
|
56 | 4. When the controller is started on localhost and the engines are started on | |||
|
57 | remote nodes using :command:`ssh`. | |||
56 |
|
58 | |||
57 | .. note:: |
|
59 | .. note:: | |
58 |
|
60 | |||
@@ -66,7 +68,8 b' The :command:`ipcluster` command provides a simple way of starting a controller ' | |||||
66 | :file:`~/.ipython/security` directory live on a shared filesystem that is |
|
68 | :file:`~/.ipython/security` directory live on a shared filesystem that is | |
67 | seen by both the controller and engines. If you don't have a shared file |
|
69 | seen by both the controller and engines. If you don't have a shared file | |
68 | system you will need to use :command:`ipcontroller` and |
|
70 | system you will need to use :command:`ipcontroller` and | |
69 | :command:`ipengine` directly. |
|
71 | :command:`ipengine` directly. This constraint can be relaxed if you are | |
|
72 | using the :command:`ssh` method to start the cluster. | |||
70 |
|
73 | |||
71 | Underneath the hood, :command:`ipcluster` just uses :command:`ipcontroller` |
|
74 | Underneath the hood, :command:`ipcluster` just uses :command:`ipcontroller` | |
72 | and :command:`ipengine` to perform the steps described above. |
|
75 | and :command:`ipengine` to perform the steps described above. | |
@@ -159,6 +162,75 b' Additional command line options for this mode can be found by doing::' | |||||
159 |
|
162 | |||
160 | $ ipcluster pbs -h |
|
163 | $ ipcluster pbs -h | |
161 |
|
164 | |||
|
165 | Using :command:`ipcluster` in SSH mode | |||
|
166 | -------------------------------------- | |||
|
167 | ||||
|
168 | The SSH mode uses :command:`ssh` to execute :command:`ipengine` on remote | |||
|
169 | nodes and the :command:`ipcontroller` on localhost. | |||
|
170 | ||||
|
171 | When using using this mode it highly recommended that you have set up SSH keys and are using ssh-agent [SSH]_ for password-less logins. | |||
|
172 | ||||
|
173 | To use this mode you need a python file describing the cluster, here is an example of such a "clusterfile": | |||
|
174 | ||||
|
175 | .. sourcecode:: python | |||
|
176 | ||||
|
177 | send_furl = True | |||
|
178 | engines = { 'host1.example.com' : 2, | |||
|
179 | 'host2.example.com' : 5, | |||
|
180 | 'host3.example.com' : 1, | |||
|
181 | 'host4.example.com' : 8 } | |||
|
182 | ||||
|
183 | Since this is a regular python file usual python syntax applies. Things to note: | |||
|
184 | ||||
|
185 | * The `engines` dict, where the keys is the host we want to run engines on and | |||
|
186 | the value is the number of engines to run on that host. | |||
|
187 | * send_furl can either be `True` or `False`, if `True` it will copy over the | |||
|
188 | furl needed for :command:`ipengine` to each host. | |||
|
189 | ||||
|
190 | The ``--clusterfile`` command line option lets you specify the file to use for | |||
|
191 | the cluster definition. Once you have your cluster file and you can | |||
|
192 | :command:`ssh` into the remote hosts with out an password you are ready to | |||
|
193 | start your cluster like so: | |||
|
194 | ||||
|
195 | .. sourcecode:: bash | |||
|
196 | ||||
|
197 | $ ipcluster ssh --clusterfile /path/to/my/clusterfile.py | |||
|
198 | ||||
|
199 | ||||
|
200 | Two helper shell scripts are used to start and stop :command:`ipengine` on remote hosts: | |||
|
201 | ||||
|
202 | * sshx.sh | |||
|
203 | * engine_killer.sh | |||
|
204 | ||||
|
205 | Defaults for both of these are contained in the source code for :command:`ipcluster`. The default scripts are written to a local file in a tmep directory and then copied to a temp directory on the remote host and executed from there. On most Unix, Linux and OS X systems this is /tmp. | |||
|
206 | ||||
|
207 | The default sshx.sh is the following: | |||
|
208 | ||||
|
209 | .. sourcecode:: bash | |||
|
210 | ||||
|
211 | #!/bin/sh | |||
|
212 | "$@" &> /dev/null & | |||
|
213 | echo $! | |||
|
214 | ||||
|
215 | If you want to use a custom sshx.sh script you need to use the ``--sshx`` | |||
|
216 | option and specify the file to use. Using a custom sshx.sh file could be | |||
|
217 | helpful when you need to setup the environment on the remote host before | |||
|
218 | executing :command:`ipengine`. | |||
|
219 | ||||
|
220 | For a detailed options list: | |||
|
221 | ||||
|
222 | .. sourcecode:: bash | |||
|
223 | ||||
|
224 | $ ipcluster ssh -h | |||
|
225 | ||||
|
226 | Current limitations of the SSH mode of :command:`ipcluster` are: | |||
|
227 | ||||
|
228 | * Untested on Windows. Would require a working :command:`ssh` on Windows. | |||
|
229 | Also, we are using shell scripts to setup and execute commands on remote | |||
|
230 | hosts. | |||
|
231 | * :command:`ipcontroller` is started on localhost, with no option to start it | |||
|
232 | on a remote node. | |||
|
233 | ||||
162 | Using the :command:`ipcontroller` and :command:`ipengine` commands |
|
234 | Using the :command:`ipcontroller` and :command:`ipengine` commands | |
163 | ================================================================== |
|
235 | ================================================================== | |
164 |
|
236 | |||
@@ -249,3 +321,4 b' the log files to us will often help us to debug any problems.' | |||||
249 |
|
321 | |||
250 |
|
322 | |||
251 | .. [PBS] Portable Batch System. http://www.openpbs.org/ |
|
323 | .. [PBS] Portable Batch System. http://www.openpbs.org/ | |
|
324 | .. [SSH] SSH-Agent http://en.wikipedia.org/wiki/Ssh-agent |
General Comments 0
You need to be logged in to leave comments.
Login now