##// END OF EJS Templates
initial sge compatibility attemp
Satrajit Ghosh -
Show More
@@ -330,6 +330,15 b' class PBSEngineSet(BatchEngineSet):'
330 def __init__(self, template_file, **kwargs):
330 def __init__(self, template_file, **kwargs):
331 BatchEngineSet.__init__(self, template_file, **kwargs)
331 BatchEngineSet.__init__(self, template_file, **kwargs)
332
332
333 class SGEEngineSet(BatchEngineSet):
334
335 submit_command = 'qsub'
336 delete_command = 'qdel'
337 job_id_regexp = '\d+'
338
339 def __init__(self, template_file, **kwargs):
340 BatchEngineSet.__init__(self, template_file, **kwargs)
341
333
342
334 sshx_template="""#!/bin/sh
343 sshx_template="""#!/bin/sh
335 "$@" &> /dev/null &
344 "$@" &> /dev/null &
@@ -627,6 +636,35 b' def main_pbs(args):'
627 dstart.addCallback(_delay_start, start_engines, furl_file, args.r)
636 dstart.addCallback(_delay_start, start_engines, furl_file, args.r)
628 dstart.addErrback(_err_and_stop)
637 dstart.addErrback(_err_and_stop)
629
638
639 def main_sge(args):
640 cont_args = []
641 cont_args.append('--logfile=%s' % pjoin(args.logdir,'ipcontroller'))
642
643 # Check security settings before proceeding
644 if not check_security(args, cont_args):
645 return
646
647 # See if we are reusing FURL files
648 if not check_reuse(args, cont_args):
649 return
650
651 cl = ControllerLauncher(extra_args=cont_args)
652 dstart = cl.start()
653 def start_engines(r):
654 sge_set = SGEEngineSet(args.sgescript)
655 def shutdown(signum, frame):
656 log.msg('Stopping sge cluster')
657 d = sge_set.kill()
658 d.addBoth(lambda _: cl.interrupt_then_kill(1.0))
659 d.addBoth(lambda _: reactor.callLater(2.0, reactor.stop))
660 signal.signal(signal.SIGINT,shutdown)
661 d = sge_set.start(args.n)
662 return d
663 config = kernel_config_manager.get_config_obj()
664 furl_file = config['controller']['engine_furl_file']
665 dstart.addCallback(_delay_start, start_engines, furl_file, args.r)
666 dstart.addErrback(_err_and_stop)
667
630
668
631 def main_ssh(args):
669 def main_ssh(args):
632 """Start a controller on localhost and engines using ssh.
670 """Start a controller on localhost and engines using ssh.
@@ -780,6 +818,20 b' def get_args():'
780 )
818 )
781 parser_pbs.set_defaults(func=main_pbs)
819 parser_pbs.set_defaults(func=main_pbs)
782
820
821 parser_sge = subparsers.add_parser(
822 'sge',
823 help='run a sge cluster',
824 parents=[base_parser]
825 )
826 parser_sge.add_argument(
827 '--sge-script',
828 type=str,
829 dest='sgescript',
830 help='SGE script template',
831 default='sge.template'
832 )
833 parser_sge.set_defaults(func=main_sge)
834
783 parser_ssh = subparsers.add_parser(
835 parser_ssh = subparsers.add_parser(
784 'ssh',
836 'ssh',
785 help='run a cluster using ssh, should have ssh-keys setup',
837 help='run a cluster using ssh, should have ssh-keys setup',
@@ -53,7 +53,8 b' The :command:`ipcluster` command provides a simple way of starting a controller '
53 2. When engines are started using the :command:`mpirun` command that comes
53 2. When engines are started using the :command:`mpirun` command that comes
54 with most MPI [MPI]_ implementations
54 with most MPI [MPI]_ implementations
55 3. When engines are started using the PBS [PBS]_ batch system.
55 3. When engines are started using the PBS [PBS]_ batch system.
56 4. When the controller is started on localhost and the engines are started on
56 4. When engines are started using the SGE [SGE]_ batch system.
57 5. When the controller is started on localhost and the engines are started on
57 remote nodes using :command:`ssh`.
58 remote nodes using :command:`ssh`.
58
59
59 .. note::
60 .. note::
@@ -169,6 +170,53 b' Additional command line options for this mode can be found by doing::'
169
170
170 $ ipcluster pbs -h
171 $ ipcluster pbs -h
171
172
173 Using :command:`ipcluster` in SGE mode
174 --------------------------------------
175
176 The SGE mode uses the Sun Grid Engine [SGE]_ to start the engines. To use this mode, you first need to create a SGE script template that will be used to start the engines. Here is a sample SGE script template:
177
178 .. sourcecode:: bash
179
180 #!/bin/bash
181 #$ -V
182 #$ -cwd
183 #$ -M joe@gmail.com
184 #$ -m beas
185 #$ -N ipython
186 #$ -r y
187 #$ -S /bin/bash
188
189 cd $$HOME/sge
190 /usr/local/bin/mpiexec -n ${n} ipengine --logfile=$$HOME/sge/ipengine
191
192 There are a few important points about this template:
193
194 1. This template will be rendered at runtime using IPython's :mod:`Itpl`
195 template engine.
196
197 2. Instead of putting in the actual number of engines, use the notation
198 ``${n}`` to indicate the number of engines to be started. You can also uses
199 expressions like ``${n/4}`` in the template to indicate the number of
200 nodes.
201
202 3. Because ``$`` is a special character used by the template engine, you must
203 escape any ``$`` by using ``$$``. This is important when referring to
204 environment variables in the template.
205
206 4. Any options to :command:`ipengine` should be given in the batch script
207 template.
208
209 5. Depending on the configuration of you system, you may have to set
210 environment variables in the script template.
211
212 Once you have created such a script, save it with a name like :file:`sge.template`. Now you are ready to start your job::
213
214 $ ipcluster sge -n 128 --sge-script=sge.template
215
216 Additional command line options for this mode can be found by doing::
217
218 $ ipcluster sge -h
219
172 Using :command:`ipcluster` in SSH mode
220 Using :command:`ipcluster` in SSH mode
173 --------------------------------------
221 --------------------------------------
174
222
@@ -348,4 +396,5 b' the log files to us will often help us to debug any problems.'
348
396
349
397
350 .. [PBS] Portable Batch System. http://www.openpbs.org/
398 .. [PBS] Portable Batch System. http://www.openpbs.org/
399 .. [SGE] Sun Grid Engine. http://www.sun.com/software/sge/
351 .. [SSH] SSH-Agent http://en.wikipedia.org/wiki/Ssh-agent
400 .. [SSH] SSH-Agent http://en.wikipedia.org/wiki/Ssh-agent
General Comments 0
You need to be logged in to leave comments. Login now