##// END OF EJS Templates
Update PBS/SGE launchers with 0.10.1 options and defaults
MinRK -
Show More
@@ -11,7 +11,7 b' c = get_config()'
11 11 # - Start as a regular process on localhost.
12 12 # - Start using mpiexec.
13 13 # - Start using the Windows HPC Server 2008 scheduler
14 # - Start using PBS
14 # - Start using PBS/SGE
15 15 # - Start using SSH
16 16
17 17
@@ -21,13 +21,16 b' c = get_config()'
21 21 # - LocalControllerLauncher
22 22 # - MPIExecControllerLauncher
23 23 # - PBSControllerLauncher
24 # - SGEControllerLauncher
24 25 # - WindowsHPCControllerLauncher
25 26 # c.Global.controller_launcher = 'IPython.zmq.parallel.launcher.LocalControllerLauncher'
27 c.Global.controller_launcher = 'IPython.zmq.parallel.launcher.PBSControllerLauncher'
26 28
27 29 # Options are:
28 30 # - LocalEngineSetLauncher
29 31 # - MPIExecEngineSetLauncher
30 32 # - PBSEngineSetLauncher
33 # - SGEEngineSetLauncher
31 34 # - WindowsHPCEngineSetLauncher
32 35 # c.Global.engine_launcher = 'IPython.zmq.parallel.launcher.LocalEngineSetLauncher'
33 36
@@ -136,14 +139,23 b' c = get_config()'
136 139 # Unix batch (PBS) schedulers launchers
137 140 #-----------------------------------------------------------------------------
138 141
142 # SGE and PBS are very similar. All configurables in this section called 'PBS*'
143 # also exist as 'SGE*'.
144
139 145 # The command line program to use to submit a PBS job.
140 # c.PBSControllerLauncher.submit_command = ['qsub']
146 # c.PBSLauncher.submit_command = ['qsub']
141 147
142 148 # The command line program to use to delete a PBS job.
143 # c.PBSControllerLauncher.delete_command = ['qdel']
149 # c.PBSLauncher.delete_command = ['qdel']
150
151 # The PBS queue in which the job should run
152 # c.PBSLauncher.queue = 'myqueue'
144 153
145 154 # A regular expression that takes the output of qsub and find the job id.
146 # c.PBSControllerLauncher.job_id_regexp = r'\d+'
155 # c.PBSLauncher.job_id_regexp = r'\d+'
156
157 # If for some reason the Controller and Engines have different options above, they
158 # can be set as c.PBSControllerLauncher.<option> etc.
147 159
148 160 # The batch submission script used to start the controller. This is where
149 161 # environment variables would be setup, etc. This string is interpreted using
@@ -151,23 +163,17 b' c = get_config()'
151 163 # number of engine and ${cluster_dir} for the cluster_dir.
152 164 # c.PBSControllerLauncher.batch_template = """
153 165 # #PBS -N ipcontroller
166 # #PBS -q $queue
154 167 #
155 168 # ipcontrollerz --cluster-dir $cluster_dir
156 169 # """
157 170
171 # You can also load this template from a file
172 # c.PBSControllerLauncher.batch_template_file = u"/path/to/my/template.sh"
173
158 174 # The name of the instantiated batch script that will actually be used to
159 175 # submit the job. This will be written to the cluster directory.
160 # c.PBSControllerLauncher.batch_file_name = u'pbs_batch_script_controller'
161
162
163 # The command line program to use to submit a PBS job.
164 # c.PBSEngineSetLauncher.submit_command = 'qsub'
165
166 # The command line program to use to delete a PBS job.
167 # c.PBSEngineSetLauncher.delete_command = 'qdel'
168
169 # A regular expression that takes the output of qsub and find the job id.
170 # c.PBSEngineSetLauncher.job_id_regexp = r'\d+'
176 # c.PBSControllerLauncher.batch_file_name = u'pbs_controller'
171 177
172 178 # The batch submission script used to start the engines. This is where
173 179 # environment variables would be setup, etc. This string is interpreted using
@@ -180,9 +186,14 b' c = get_config()'
180 186 # ipenginez --cluster-dir $cluster_dir$s
181 187 # """
182 188
189 # You can also load this template from a file
190 # c.PBSControllerLauncher.batch_template_file = u"/path/to/my/template.sh"
191
183 192 # The name of the instantiated batch script that will actually be used to
184 193 # submit the job. This will be written to the cluster directory.
185 # c.PBSEngineSetLauncher.batch_file_name = u'pbs_batch_script_engines'
194 # c.PBSEngineSetLauncher.batch_file_name = u'pbs_engines'
195
196
186 197
187 198 #-----------------------------------------------------------------------------
188 199 # Windows HPC Server 2008 launcher configuration
@@ -19,6 +19,7 b' import copy'
19 19 import logging
20 20 import os
21 21 import re
22 import stat
22 23
23 24 from signal import SIGINT, SIGTERM
24 25 try:
@@ -41,7 +42,7 b' from zmq.eventloop import ioloop'
41 42
42 43 from IPython.external import Itpl
43 44 # from IPython.config.configurable import Configurable
44 from IPython.utils.traitlets import Any, Str, Int, List, Unicode, Dict, Instance
45 from IPython.utils.traitlets import Any, Str, Int, List, Unicode, Dict, Instance, CUnicode
45 46 from IPython.utils.path import get_ipython_module_path
46 47 from IPython.utils.process import find_cmd, pycmd2argv, FindCmdError
47 48
@@ -62,15 +63,15 b' except ImportError:'
62 63 #-----------------------------------------------------------------------------
63 64
64 65
65 ipcluster_cmd_argv = pycmd2argv(get_ipython_module_path(
66 ipclusterz_cmd_argv = pycmd2argv(get_ipython_module_path(
66 67 'IPython.zmq.parallel.ipclusterapp'
67 68 ))
68 69
69 ipengine_cmd_argv = pycmd2argv(get_ipython_module_path(
70 ipenginez_cmd_argv = pycmd2argv(get_ipython_module_path(
70 71 'IPython.zmq.parallel.ipengineapp'
71 72 ))
72 73
73 ipcontroller_cmd_argv = pycmd2argv(get_ipython_module_path(
74 ipcontrollerz_cmd_argv = pycmd2argv(get_ipython_module_path(
74 75 'IPython.zmq.parallel.ipcontrollerapp'
75 76 ))
76 77
@@ -303,7 +304,7 b' class LocalProcessLauncher(BaseLauncher):'
303 304 class LocalControllerLauncher(LocalProcessLauncher):
304 305 """Launch a controller as a regular external process."""
305 306
306 controller_cmd = List(ipcontroller_cmd_argv, config=True)
307 controller_cmd = List(ipcontrollerz_cmd_argv, config=True)
307 308 # Command line arguments to ipcontroller.
308 309 controller_args = List(['--log-to-file','--log-level', str(logging.INFO)], config=True)
309 310
@@ -321,7 +322,7 b' class LocalControllerLauncher(LocalProcessLauncher):'
321 322 class LocalEngineLauncher(LocalProcessLauncher):
322 323 """Launch a single engine as a regular externall process."""
323 324
324 engine_cmd = List(ipengine_cmd_argv, config=True)
325 engine_cmd = List(ipenginez_cmd_argv, config=True)
325 326 # Command line arguments for ipengine.
326 327 engine_args = List(
327 328 ['--log-to-file','--log-level', str(logging.INFO)], config=True
@@ -442,7 +443,7 b' class MPIExecLauncher(LocalProcessLauncher):'
442 443 class MPIExecControllerLauncher(MPIExecLauncher):
443 444 """Launch a controller using mpiexec."""
444 445
445 controller_cmd = List(ipcontroller_cmd_argv, config=True)
446 controller_cmd = List(ipcontrollerz_cmd_argv, config=True)
446 447 # Command line arguments to ipcontroller.
447 448 controller_args = List(['--log-to-file','--log-level', str(logging.INFO)], config=True)
448 449 n = Int(1, config=False)
@@ -461,7 +462,7 b' class MPIExecControllerLauncher(MPIExecLauncher):'
461 462
462 463 class MPIExecEngineSetLauncher(MPIExecLauncher):
463 464
464 program = List(ipengine_cmd_argv, config=True)
465 program = List(ipenginez_cmd_argv, config=True)
465 466 # Command line arguments for ipengine.
466 467 program_args = List(
467 468 ['--log-to-file','--log-level', str(logging.INFO)], config=True
@@ -494,18 +495,18 b' class SSHLauncher(LocalProcessLauncher):'
494 495 ssh_args = List(['-tt'], config=True)
495 496 program = List(['date'], config=True)
496 497 program_args = List([], config=True)
497 hostname = Str('', config=True)
498 user = Str('', config=True)
499 location = Str('')
498 hostname = CUnicode('', config=True)
499 user = CUnicode('', config=True)
500 location = CUnicode('')
500 501
501 502 def _hostname_changed(self, name, old, new):
502 503 if self.user:
503 self.location = '%s@%s' % (self.user, new)
504 self.location = u'%s@%s' % (self.user, new)
504 505 else:
505 506 self.location = new
506 507
507 508 def _user_changed(self, name, old, new):
508 self.location = '%s@%s' % (new, self.hostname)
509 self.location = u'%s@%s' % (new, self.hostname)
509 510
510 511 def find_args(self):
511 512 return self.ssh_cmd + self.ssh_args + [self.location] + \
@@ -530,13 +531,13 b' class SSHLauncher(LocalProcessLauncher):'
530 531
531 532 class SSHControllerLauncher(SSHLauncher):
532 533
533 program = List(ipcontroller_cmd_argv, config=True)
534 program = List(ipcontrollerz_cmd_argv, config=True)
534 535 # Command line arguments to ipcontroller.
535 536 program_args = List(['-r', '--log-to-file','--log-level', str(logging.INFO)], config=True)
536 537
537 538
538 539 class SSHEngineLauncher(SSHLauncher):
539 program = List(ipengine_cmd_argv, config=True)
540 program = List(ipenginez_cmd_argv, config=True)
540 541 # Command line arguments for ipengine.
541 542 program_args = List(
542 543 ['--log-to-file','--log-level', str(logging.INFO)], config=True
@@ -602,13 +603,13 b' class WindowsHPCLauncher(BaseLauncher):'
602 603 # submit_command.
603 604 job_id_regexp = Str(r'\d+', config=True)
604 605 # The filename of the instantiated job script.
605 job_file_name = Unicode(u'ipython_job.xml', config=True)
606 job_file_name = CUnicode(u'ipython_job.xml', config=True)
606 607 # The full path to the instantiated job script. This gets made dynamically
607 608 # by combining the work_dir with the job_file_name.
608 job_file = Unicode(u'')
609 job_file = CUnicode(u'')
609 610 # The hostname of the scheduler to submit the job to
610 scheduler = Str('', config=True)
611 job_cmd = Str(find_job_cmd(), config=True)
611 scheduler = CUnicode('', config=True)
612 job_cmd = CUnicode(find_job_cmd(), config=True)
612 613
613 614 def __init__(self, work_dir=u'.', config=None, **kwargs):
614 615 super(WindowsHPCLauncher, self).__init__(
@@ -623,7 +624,7 b' class WindowsHPCLauncher(BaseLauncher):'
623 624 raise NotImplementedError("Implement write_job_file in a subclass.")
624 625
625 626 def find_args(self):
626 return ['job.exe']
627 return [u'job.exe']
627 628
628 629 def parse_job_id(self, output):
629 630 """Take the output of the submit command and return the job id."""
@@ -676,7 +677,7 b' class WindowsHPCLauncher(BaseLauncher):'
676 677
677 678 class WindowsHPCControllerLauncher(WindowsHPCLauncher):
678 679
679 job_file_name = Unicode(u'ipcontroller_job.xml', config=True)
680 job_file_name = CUnicode(u'ipcontroller_job.xml', config=True)
680 681 extra_args = List([], config=False)
681 682
682 683 def write_job_file(self, n):
@@ -707,7 +708,7 b' class WindowsHPCControllerLauncher(WindowsHPCLauncher):'
707 708
708 709 class WindowsHPCEngineSetLauncher(WindowsHPCLauncher):
709 710
710 job_file_name = Unicode(u'ipengineset_job.xml', config=True)
711 job_file_name = CUnicode(u'ipengineset_job.xml', config=True)
711 712 extra_args = List([], config=False)
712 713
713 714 def write_job_file(self, n):
@@ -757,24 +758,38 b' class BatchSystemLauncher(BaseLauncher):'
757 758
758 759 # Subclasses must fill these in. See PBSEngineSet
759 760 # The name of the command line program used to submit jobs.
760 submit_command = Str('', config=True)
761 submit_command = List([''], config=True)
761 762 # The name of the command line program used to delete jobs.
762 delete_command = Str('', config=True)
763 delete_command = List([''], config=True)
763 764 # A regular expression used to get the job id from the output of the
764 765 # submit_command.
765 job_id_regexp = Str('', config=True)
766 job_id_regexp = CUnicode('', config=True)
766 767 # The string that is the batch script template itself.
767 batch_template = Str('', config=True)
768 batch_template = CUnicode('', config=True)
769 # The file that contains the batch template
770 batch_template_file = CUnicode(u'', config=True)
768 771 # The filename of the instantiated batch script.
769 batch_file_name = Unicode(u'batch_script', config=True)
772 batch_file_name = CUnicode(u'batch_script', config=True)
773 # The PBS Queue
774 queue = CUnicode(u'', config=True)
775
776 # not configurable, override in subclasses
777 # PBS Job Array regex
778 job_array_regexp = CUnicode('')
779 job_array_template = CUnicode('')
780 # PBS Queue regex
781 queue_regexp = CUnicode('')
782 queue_template = CUnicode('')
783 # The default batch template, override in subclasses
784 default_template = CUnicode('')
770 785 # The full path to the instantiated batch script.
771 batch_file = Unicode(u'')
786 batch_file = CUnicode(u'')
772 787 # the format dict used with batch_template:
773 788 context = Dict()
774 789
775 790
776 791 def find_args(self):
777 return [self.submit_command, self.batch_file]
792 return self.submit_command + [self.batch_file]
778 793
779 794 def __init__(self, work_dir=u'.', config=None, **kwargs):
780 795 super(BatchSystemLauncher, self).__init__(
@@ -796,11 +811,37 b' class BatchSystemLauncher(BaseLauncher):'
796 811 def write_batch_script(self, n):
797 812 """Instantiate and write the batch script to the work_dir."""
798 813 self.context['n'] = n
814 self.context['queue'] = self.queue
815 print self.context
816 # first priority is batch_template if set
817 if self.batch_template_file and not self.batch_template:
818 # second priority is batch_template_file
819 with open(self.batch_template_file) as f:
820 self.batch_template = f.read()
821 if not self.batch_template:
822 # third (last) priority is default_template
823 self.batch_template = self.default_template
824
825 regex = re.compile(self.job_array_regexp)
826 # print regex.search(self.batch_template)
827 if not regex.search(self.batch_template):
828 self.log.info("adding job array settings to batch script")
829 firstline, rest = self.batch_template.split('\n',1)
830 self.batch_template = u'\n'.join([firstline, self.job_array_template, rest])
831
832 regex = re.compile(self.queue_regexp)
833 # print regex.search(self.batch_template)
834 if self.queue and not regex.search(self.batch_template):
835 self.log.info("adding PBS queue settings to batch script")
836 firstline, rest = self.batch_template.split('\n',1)
837 self.batch_template = u'\n'.join([firstline, self.queue_template, rest])
838
799 839 script_as_string = Itpl.itplns(self.batch_template, self.context)
800 840 self.log.info('Writing instantiated batch script: %s' % self.batch_file)
801 f = open(self.batch_file, 'w')
802 f.write(script_as_string)
803 f.close()
841
842 with open(self.batch_file, 'w') as f:
843 f.write(script_as_string)
844 os.chmod(self.batch_file, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
804 845
805 846 def start(self, n, cluster_dir):
806 847 """Start n copies of the process using a batch system."""
@@ -817,7 +858,7 b' class BatchSystemLauncher(BaseLauncher):'
817 858 return job_id
818 859
819 860 def stop(self):
820 output = check_output([self.delete_command, self.job_id], env=os.environ)
861 output = check_output(self.delete_command+[self.job_id], env=os.environ)
821 862 self.notify_stop(dict(job_id=self.job_id, output=output)) # Pass the output of the kill cmd
822 863 return output
823 864
@@ -825,18 +866,26 b' class BatchSystemLauncher(BaseLauncher):'
825 866 class PBSLauncher(BatchSystemLauncher):
826 867 """A BatchSystemLauncher subclass for PBS."""
827 868
828 submit_command = Str('qsub', config=True)
829 delete_command = Str('qdel', config=True)
830 job_id_regexp = Str(r'\d+', config=True)
831 batch_template = Str('', config=True)
832 batch_file_name = Unicode(u'pbs_batch_script', config=True)
833 batch_file = Unicode(u'')
869 submit_command = List(['qsub'], config=True)
870 delete_command = List(['qdel'], config=True)
871 job_id_regexp = CUnicode(r'\d+', config=True)
872
873 batch_file = CUnicode(u'')
874 job_array_regexp = CUnicode('#PBS\W+-t\W+[\w\d\-\$]+')
875 job_array_template = CUnicode('#PBS -t 1-$n')
876 queue_regexp = CUnicode('#PBS\W+-q\W+\$?\w+')
877 queue_template = CUnicode('#PBS -q $queue')
834 878
835 879
836 880 class PBSControllerLauncher(PBSLauncher):
837 881 """Launch a controller using PBS."""
838 882
839 batch_file_name = Unicode(u'pbs_batch_script_controller', config=True)
883 batch_file_name = CUnicode(u'pbs_controller', config=True)
884 default_template= CUnicode("""#!/bin/sh
885 #PBS -V
886 #PBS -N ipcontrollerz
887 %s --log-to-file --cluster-dir $cluster_dir
888 """%(' '.join(ipcontrollerz_cmd_argv)))
840 889
841 890 def start(self, cluster_dir):
842 891 """Start the controller by profile or cluster_dir."""
@@ -845,14 +894,57 b' class PBSControllerLauncher(PBSLauncher):'
845 894
846 895
847 896 class PBSEngineSetLauncher(PBSLauncher):
848
849 batch_file_name = Unicode(u'pbs_batch_script_engines', config=True)
897 """Launch Engines using PBS"""
898 batch_file_name = CUnicode(u'pbs_engines', config=True)
899 default_template= CUnicode(u"""#!/bin/sh
900 #PBS -V
901 #PBS -N ipenginez
902 %s --cluster-dir $cluster_dir
903 """%(' '.join(ipenginez_cmd_argv)))
850 904
851 905 def start(self, n, cluster_dir):
852 906 """Start n engines by profile or cluster_dir."""
853 self.log.info('Starting PBSEngineSetLauncher: %r' % self.args)
907 self.log.info('Starting %n engines with PBSEngineSetLauncher: %r' % (n, self.args))
854 908 return super(PBSEngineSetLauncher, self).start(n, cluster_dir)
855 909
910 #SGE is very similar to PBS
911
912 class SGELauncher(PBSLauncher):
913 """Sun GridEngine is a PBS clone with slightly different syntax"""
914 job_array_regexp = CUnicode('#$$\W+-t\W+[\w\d\-\$]+')
915 job_array_template = CUnicode('#$$ -t 1-$n')
916 queue_regexp = CUnicode('#$$\W+-q\W+\$?\w+')
917 queue_template = CUnicode('#$$ -q $queue')
918
919 class SGEControllerLauncher(SGELauncher):
920 """Launch a controller using SGE."""
921
922 batch_file_name = CUnicode(u'sge_controller', config=True)
923 default_template= CUnicode(u"""#$$ -V
924 #$$ -S /bin/sh
925 #$$ -N ipcontrollerz
926 %s --log-to-file --cluster-dir $cluster_dir
927 """%(' '.join(ipcontrollerz_cmd_argv)))
928
929 def start(self, cluster_dir):
930 """Start the controller by profile or cluster_dir."""
931 self.log.info("Starting PBSControllerLauncher: %r" % self.args)
932 return super(PBSControllerLauncher, self).start(1, cluster_dir)
933
934 class SGEEngineSetLauncher(SGELauncher):
935 """Launch Engines with SGE"""
936 batch_file_name = CUnicode(u'sge_engines', config=True)
937 default_template = CUnicode("""#$$ -V
938 #$$ -S /bin/sh
939 #$$ -N ipenginez
940 %s --cluster-dir $cluster_dir
941 """%(' '.join(ipenginez_cmd_argv)))
942
943 def start(self, n, cluster_dir):
944 """Start n engines by profile or cluster_dir."""
945 self.log.info('Starting %n engines with SGEEngineSetLauncher: %r' % (n, self.args))
946 return super(SGEEngineSetLauncher, self).start(n, cluster_dir)
947
856 948
857 949 #-----------------------------------------------------------------------------
858 950 # A launcher for ipcluster itself!
@@ -862,7 +954,7 b' class PBSEngineSetLauncher(PBSLauncher):'
862 954 class IPClusterLauncher(LocalProcessLauncher):
863 955 """Launch the ipcluster program in an external process."""
864 956
865 ipcluster_cmd = List(ipcluster_cmd_argv, config=True)
957 ipcluster_cmd = List(ipclusterz_cmd_argv, config=True)
866 958 # Command line arguments to pass to ipcluster.
867 959 ipcluster_args = List(
868 960 ['--clean-logs', '--log-to-file', '--log-level', str(logging.INFO)], config=True)
General Comments 0
You need to be logged in to leave comments. Login now