##// END OF EJS Templates
updated sge to launch multiple engines - start cleanly but does not shutdown cleanly
Satrajit Ghosh -
Show More
@@ -234,6 +234,7 b' class LocalEngineSet(object):'
234 234 def start(self, n):
235 235 dlist = []
236 236 for i in range(n):
237 print "starting engine:", i
237 238 el = EngineLauncher(extra_args=self.extra_args)
238 239 d = el.start()
239 240 self.launchers.append(el)
@@ -338,6 +339,7 b' class SGEEngineSet(BatchEngineSet):'
338 339
339 340 def __init__(self, template_file, **kwargs):
340 341 BatchEngineSet.__init__(self, template_file, **kwargs)
342 self.num_engines = None
341 343
342 344 def parse_job_id(self, output):
343 345 m = re.search(self.job_id_regexp, output)
@@ -345,10 +347,48 b' class SGEEngineSet(BatchEngineSet):'
345 347 job_id = m.group()
346 348 else:
347 349 raise Exception("job id couldn't be determined: %s" % output)
348 self.job_id = job_id
350 self.job_id.append(job_id)
349 351 log.msg('Job started with job id: %r' % job_id)
350 352 return job_id
351
353
354 def kill_job(self, output):
355 log.msg(output)
356 return output
357
358 def write_batch_script(self, i):
359 context = {'eid':i}
360 template = open(self.template_file, 'r').read()
361 log.msg('Using template for batch script: %s' % self.template_file)
362 script_as_string = Itpl.itplns(template, context)
363 log.msg('Writing instantiated batch script: %s' % self.batch_file+str(i))
364 f = open(self.batch_file+str(i),'w')
365 f.write(script_as_string)
366 f.close()
367
368 def start(self, n):
369 dlist = []
370 self.num_engines = 0
371 self.job_id = []
372 for i in range(n):
373 log.msg("starting engine: %d"%i)
374 self.write_batch_script(i)
375 d = getProcessOutput(self.submit_command,
376 [self.batch_file+str(i)],env=os.environ)
377 d.addCallback(self.parse_job_id)
378 d.addErrback(self.handle_error)
379 dlist.append(d)
380 return gatherBoth(dlist, consumeErrors=True)
381
382 def kill(self):
383 dlist = []
384 for i in range(self.num_engines):
385 log.msg("killing job id: %d"%self.job_id[i])
386 d = getProcessOutput(self.delete_command,
387 [self.job_id[i]],env=os.environ)
388 d.addCallback(self.kill_job)
389 dlist.append(d)
390 return gatherBoth(dlist, consumeErrors=True)
391
352 392 sshx_template="""#!/bin/sh
353 393 "$@" &> /dev/null &
354 394 echo $!
@@ -179,25 +179,22 b' The SGE mode uses the Sun Grid Engine [SGE]_ to start the engines. To use this '
179 179
180 180 #!/bin/bash
181 181 #$ -V
182 #$ -cwd
183 182 #$ -m n
184 #$ -N satra-ipython
183 #$ -N ipengine-${eid}
185 184 #$ -r y
186 185 #$ -q sub
187 186 #$ -S /bin/bash
188 187
189 188 cd $$HOME/sge
190 ipengine --logfile=ipengine
189 ipengine --logfile=ipengine${eid}
191 190
192 191 There are a few important points about this template:
193 192
194 193 1. This template will be rendered at runtime using IPython's :mod:`Itpl`
195 194 template engine.
196 195
197 2. Instead of putting in the actual number of engines, use the notation
198 ``${n}`` to indicate the number of engines to be started. You can also uses
199 expressions like ``${n/4}`` in the template to indicate the number of
200 nodes.
196 2. Instead of putting in the actual id of engines, use the notation
197 ``${eid}`` to indicate where engine id should be inserted.
201 198
202 199 3. Because ``$`` is a special character used by the template engine, you must
203 200 escape any ``$`` by using ``$$``. This is important when referring to
@@ -211,7 +208,7 b' There are a few important points about this template:'
211 208
212 209 Once you have created such a script, save it with a name like :file:`sge.template`. Now you are ready to start your job::
213 210
214 $ ipcluster sge -n 128 --sge-script=sge.template
211 $ ipcluster sge -n 12 --sge-script=sge.template
215 212
216 213 Additional command line options for this mode can be found by doing::
217 214
General Comments 0
You need to be logged in to leave comments. Login now