Show More
@@ -488,6 +488,7 b' class IPClusterStart(IPClusterEngines):' | |||||
488 | PBS : use PBS (qsub) to submit the controller to a batch queue |
|
488 | PBS : use PBS (qsub) to submit the controller to a batch queue | |
489 | SGE : use SGE (qsub) to submit the controller to a batch queue |
|
489 | SGE : use SGE (qsub) to submit the controller to a batch queue | |
490 | LSF : use LSF (bsub) to submit the controller to a batch queue |
|
490 | LSF : use LSF (bsub) to submit the controller to a batch queue | |
|
491 | Condor: use HTCondor to submit the controller to a batch queue | |||
491 | SSH : use SSH to start the controller |
|
492 | SSH : use SSH to start the controller | |
492 | WindowsHPC : use Windows HPC |
|
493 | WindowsHPC : use Windows HPC | |
493 |
|
494 |
@@ -83,6 +83,24 b' ipengine_cmd_argv = [sys.executable, "-c", cmd % "ipengineapp"]' | |||||
83 |
|
83 | |||
84 | ipcontroller_cmd_argv = [sys.executable, "-c", cmd % "ipcontrollerapp"] |
|
84 | ipcontroller_cmd_argv = [sys.executable, "-c", cmd % "ipcontrollerapp"] | |
85 |
|
85 | |||
|
86 | # HTCondor frustratingly destroys sys.executable when launching remote processes | |||
|
87 | # thus Python will default back to the system module search paths which is | |||
|
88 | # ridiculously fragile (not to mention destructive for virutalenvs). | |||
|
89 | # however, if we use the ip{cluster, engine, controller} scripts as our | |||
|
90 | # executable we circumvent this - the mechanism of shebanged scripts means that | |||
|
91 | # the python binary will be launched with argv[0] set correctly. | |||
|
92 | # This does mean that for HTCondor we require that: | |||
|
93 | # a. The python interpreter you are using is in a folder next to the ipengine, | |||
|
94 | # ipcluster and ipcontroller scripts | |||
|
95 | # b. I have no idea what the consequences are for Windows. | |||
|
96 | bin_dir = os.path.dirname(sys.executable) | |||
|
97 | ||||
|
98 | condor_ipcluster_cmd_argv = os.path.join(bin_dir, 'ipcluster') | |||
|
99 | ||||
|
100 | condor_ipengine_cmd_argv = os.path.join(bin_dir, 'ipengine') | |||
|
101 | ||||
|
102 | condor_ipcontroller_cmd_argv = os.path.join(bin_dir, 'ipcontroller') | |||
|
103 | ||||
86 | #----------------------------------------------------------------------------- |
|
104 | #----------------------------------------------------------------------------- | |
87 | # Base launchers and errors |
|
105 | # Base launchers and errors | |
88 | #----------------------------------------------------------------------------- |
|
106 | #----------------------------------------------------------------------------- | |
@@ -1047,6 +1065,7 b' class BatchSystemLauncher(BaseLauncher):' | |||||
1047 | batch_file = Unicode(u'') |
|
1065 | batch_file = Unicode(u'') | |
1048 | # the format dict used with batch_template: |
|
1066 | # the format dict used with batch_template: | |
1049 | context = Dict() |
|
1067 | context = Dict() | |
|
1068 | ||||
1050 | def _context_default(self): |
|
1069 | def _context_default(self): | |
1051 | """load the default context with the default values for the basic keys |
|
1070 | """load the default context with the default values for the basic keys | |
1052 |
|
1071 | |||
@@ -1058,7 +1077,6 b' class BatchSystemLauncher(BaseLauncher):' | |||||
1058 | # the Formatter instance for rendering the templates: |
|
1077 | # the Formatter instance for rendering the templates: | |
1059 | formatter = Instance(EvalFormatter, (), {}) |
|
1078 | formatter = Instance(EvalFormatter, (), {}) | |
1060 |
|
1079 | |||
1061 |
|
||||
1062 | def find_args(self): |
|
1080 | def find_args(self): | |
1063 | return self.submit_command + [self.batch_file] |
|
1081 | return self.submit_command + [self.batch_file] | |
1064 |
|
1082 | |||
@@ -1090,28 +1108,34 b' class BatchSystemLauncher(BaseLauncher):' | |||||
1090 | if not self.batch_template: |
|
1108 | if not self.batch_template: | |
1091 | # third (last) priority is default_template |
|
1109 | # third (last) priority is default_template | |
1092 | self.batch_template = self.default_template |
|
1110 | self.batch_template = self.default_template | |
1093 |
|
||||
1094 | # add jobarray or queue lines to user-specified template |
|
1111 | # add jobarray or queue lines to user-specified template | |
1095 | # note that this is *only* when user did not specify a template. |
|
1112 | # note that this is *only* when user did not specify a template. | |
1096 | # print self.job_array_regexp.search(self.batch_template) |
|
1113 | self._insert_queue_in_script() | |
1097 | if not self.job_array_regexp.search(self.batch_template): |
|
1114 | self._insert_job_array_in_script() | |
1098 | self.log.debug("adding job array settings to batch script") |
|
|||
1099 | firstline, rest = self.batch_template.split('\n',1) |
|
|||
1100 | self.batch_template = u'\n'.join([firstline, self.job_array_template, rest]) |
|
|||
1101 |
|
||||
1102 | # print self.queue_regexp.search(self.batch_template) |
|
|||
1103 | if self.queue and not self.queue_regexp.search(self.batch_template): |
|
|||
1104 | self.log.debug("adding PBS queue settings to batch script") |
|
|||
1105 | firstline, rest = self.batch_template.split('\n',1) |
|
|||
1106 | self.batch_template = u'\n'.join([firstline, self.queue_template, rest]) |
|
|||
1107 |
|
||||
1108 | script_as_string = self.formatter.format(self.batch_template, **self.context) |
|
1115 | script_as_string = self.formatter.format(self.batch_template, **self.context) | |
1109 | self.log.debug('Writing batch script: %s', self.batch_file) |
|
1116 | self.log.debug('Writing batch script: %s', self.batch_file) | |
1110 |
|
||||
1111 | with open(self.batch_file, 'w') as f: |
|
1117 | with open(self.batch_file, 'w') as f: | |
1112 | f.write(script_as_string) |
|
1118 | f.write(script_as_string) | |
1113 | os.chmod(self.batch_file, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR) |
|
1119 | os.chmod(self.batch_file, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR) | |
1114 |
|
1120 | |||
|
1121 | def _insert_queue_in_script(self): | |||
|
1122 | """Inserts a queue if required into the batch script. | |||
|
1123 | """ | |||
|
1124 | print self.queue_regexp.search(self.batch_template) | |||
|
1125 | if self.queue and not self.queue_regexp.search(self.batch_template): | |||
|
1126 | self.log.debug("adding PBS queue settings to batch script") | |||
|
1127 | firstline, rest = self.batch_template.split('\n',1) | |||
|
1128 | self.batch_template = u'\n'.join([firstline, self.queue_template, rest]) | |||
|
1129 | ||||
|
1130 | def _insert_job_array_in_script(self): | |||
|
1131 | """Inserts a job array if required into the batch script. | |||
|
1132 | """ | |||
|
1133 | print self.job_array_regexp.search(self.batch_template) | |||
|
1134 | if not self.job_array_regexp.search(self.batch_template): | |||
|
1135 | self.log.debug("adding job array settings to batch script") | |||
|
1136 | firstline, rest = self.batch_template.split('\n',1) | |||
|
1137 | self.batch_template = u'\n'.join([firstline, self.job_array_template, rest]) | |||
|
1138 | ||||
1115 | def start(self, n): |
|
1139 | def start(self, n): | |
1116 | """Start n copies of the process using a batch system.""" |
|
1140 | """Start n copies of the process using a batch system.""" | |
1117 | self.log.debug("Starting %s: %r", self.__class__.__name__, self.args) |
|
1141 | self.log.debug("Starting %s: %r", self.__class__.__name__, self.args) | |
@@ -1180,6 +1204,7 b' class PBSEngineSetLauncher(PBSLauncher, BatchClusterAppMixin):' | |||||
1180 | """Start n engines by profile or profile_dir.""" |
|
1204 | """Start n engines by profile or profile_dir.""" | |
1181 | return super(PBSEngineSetLauncher, self).start(n) |
|
1205 | return super(PBSEngineSetLauncher, self).start(n) | |
1182 |
|
1206 | |||
|
1207 | ||||
1183 | #SGE is very similar to PBS |
|
1208 | #SGE is very similar to PBS | |
1184 |
|
1209 | |||
1185 | class SGELauncher(PBSLauncher): |
|
1210 | class SGELauncher(PBSLauncher): | |
@@ -1189,6 +1214,7 b' class SGELauncher(PBSLauncher):' | |||||
1189 | queue_regexp = CRegExp('#\$\W+-q\W+\$?\w+') |
|
1214 | queue_regexp = CRegExp('#\$\W+-q\W+\$?\w+') | |
1190 | queue_template = Unicode('#$ -q {queue}') |
|
1215 | queue_template = Unicode('#$ -q {queue}') | |
1191 |
|
1216 | |||
|
1217 | ||||
1192 | class SGEControllerLauncher(SGELauncher, BatchClusterAppMixin): |
|
1218 | class SGEControllerLauncher(SGELauncher, BatchClusterAppMixin): | |
1193 | """Launch a controller using SGE.""" |
|
1219 | """Launch a controller using SGE.""" | |
1194 |
|
1220 | |||
@@ -1204,6 +1230,7 b' class SGEControllerLauncher(SGELauncher, BatchClusterAppMixin):' | |||||
1204 | """Start the controller by profile or profile_dir.""" |
|
1230 | """Start the controller by profile or profile_dir.""" | |
1205 | return super(SGEControllerLauncher, self).start(1) |
|
1231 | return super(SGEControllerLauncher, self).start(1) | |
1206 |
|
1232 | |||
|
1233 | ||||
1207 | class SGEEngineSetLauncher(SGELauncher, BatchClusterAppMixin): |
|
1234 | class SGEEngineSetLauncher(SGELauncher, BatchClusterAppMixin): | |
1208 | """Launch Engines with SGE""" |
|
1235 | """Launch Engines with SGE""" | |
1209 | batch_file_name = Unicode(u'sge_engines', config=True, |
|
1236 | batch_file_name = Unicode(u'sge_engines', config=True, | |
@@ -1288,6 +1315,76 b' class LSFEngineSetLauncher(LSFLauncher, BatchClusterAppMixin):' | |||||
1288 | return super(LSFEngineSetLauncher, self).start(n) |
|
1315 | return super(LSFEngineSetLauncher, self).start(n) | |
1289 |
|
1316 | |||
1290 |
|
1317 | |||
|
1318 | # Condor Requires that we launch the ipengine/ipcontroller scripts rather | |||
|
1319 | # that the python instance but otherwise is very similar to PBS | |||
|
1320 | ||||
|
1321 | class CondorLauncher(BatchSystemLauncher): | |||
|
1322 | """A BatchSystemLauncher subclass for Condor.""" | |||
|
1323 | ||||
|
1324 | submit_command = List(['condor_submit', '-verbose'], config=True, | |||
|
1325 | help="The Condor submit command ['condor_submit']") | |||
|
1326 | delete_command = List(['condor_rm'], config=True, | |||
|
1327 | help="The Condor delete command ['condor_rm']") | |||
|
1328 | job_id_regexp = CRegExp(r'\d+', config=True, | |||
|
1329 | help="Regular expression for identifying the job ID [r'\d+']") | |||
|
1330 | ||||
|
1331 | job_array_regexp = CRegExp('queue\W+\$') | |||
|
1332 | job_array_template = Unicode('queue {n}') | |||
|
1333 | # template for the submission of multiple jobs | |||
|
1334 | queue_regexp = CRegExp('#PBS\W+-q\W+\$?\w+') | |||
|
1335 | # regex to find a queue if the user has specified a template | |||
|
1336 | queue_template = Unicode('#PBS -q {queue}') | |||
|
1337 | # the queue we wish to submit to. Need to know the Condor eqiv (eg ibug cluster | |||
|
1338 | # or general?) | |||
|
1339 | ||||
|
1340 | def _insert_job_array_in_script(self): | |||
|
1341 | """Inserts a job array if required into the batch script. | |||
|
1342 | """ | |||
|
1343 | print self.job_array_regexp.search(self.batch_template) | |||
|
1344 | #Condor requires that the job array goes at the bottom of the | |||
|
1345 | #script | |||
|
1346 | if not self.job_array_regexp.search(self.batch_template): | |||
|
1347 | self.log.debug("adding job array settings to batch script") | |||
|
1348 | self.batch_template = '\n'.join([self.batch_template, | |||
|
1349 | self.job_array_template]) | |||
|
1350 | ||||
|
1351 | ||||
|
1352 | class CondorControllerLauncher(CondorLauncher, BatchClusterAppMixin): | |||
|
1353 | """Launch a controller using Condor.""" | |||
|
1354 | ||||
|
1355 | batch_file_name = Unicode(u'condor_controller', config=True, | |||
|
1356 | help="batch file name for the controller job.") | |||
|
1357 | default_template = Unicode(r""" | |||
|
1358 | universe = vanilla | |||
|
1359 | executable = %s | |||
|
1360 | # by default we expect a shared file system | |||
|
1361 | transfer_executable = False | |||
|
1362 | arguments = --log-to-file '--profile-dir={profile_dir}' --cluster-id='{cluster_id}' | |||
|
1363 | """ % condor_ipcontroller_cmd_argv) | |||
|
1364 | ||||
|
1365 | def start(self): | |||
|
1366 | """Start the controller by profile or profile_dir.""" | |||
|
1367 | return super(CondorControllerLauncher, self).start(1) | |||
|
1368 | ||||
|
1369 | ||||
|
1370 | class CondorEngineSetLauncher(CondorLauncher, BatchClusterAppMixin): | |||
|
1371 | """Launch Engines using Condor""" | |||
|
1372 | batch_file_name = Unicode(u'condor_engines', config=True, | |||
|
1373 | help="batch file name for the engine(s) job.") | |||
|
1374 | default_template = Unicode(""" | |||
|
1375 | universe = vanilla | |||
|
1376 | executable = %s | |||
|
1377 | # by default we expect a shared file system | |||
|
1378 | transfer_executable = False | |||
|
1379 | arguments = "--log-to-file '--profile-dir={profile_dir}' '--cluster-id={cluster_id}'" | |||
|
1380 | ||||
|
1381 | """ % condor_ipengine_cmd_argv) | |||
|
1382 | ||||
|
1383 | def start(self, n): | |||
|
1384 | """Start n engines by profile or profile_dir.""" | |||
|
1385 | return super(CondorEngineSetLauncher, self).start(n) | |||
|
1386 | ||||
|
1387 | ||||
1291 | #----------------------------------------------------------------------------- |
|
1388 | #----------------------------------------------------------------------------- | |
1292 | # A launcher for ipcluster itself! |
|
1389 | # A launcher for ipcluster itself! | |
1293 | #----------------------------------------------------------------------------- |
|
1390 | #----------------------------------------------------------------------------- | |
@@ -1354,6 +1451,10 b' lsf_launchers = [' | |||||
1354 | LSFControllerLauncher, |
|
1451 | LSFControllerLauncher, | |
1355 | LSFEngineSetLauncher, |
|
1452 | LSFEngineSetLauncher, | |
1356 | ] |
|
1453 | ] | |
|
1454 | condor_launchers = [ | |||
|
1455 | CondorLauncher, | |||
|
1456 | CondorControllerLauncher, | |||
|
1457 | CondorEngineSetLauncher, | |||
|
1458 | ] | |||
1357 | all_launchers = local_launchers + mpi_launchers + ssh_launchers + winhpc_launchers\ |
|
1459 | all_launchers = local_launchers + mpi_launchers + ssh_launchers + winhpc_launchers\ | |
1358 | + pbs_launchers + sge_launchers + lsf_launchers |
|
1460 | + pbs_launchers + sge_launchers + lsf_launchers + condor_launchers | |
1359 |
|
General Comments 0
You need to be logged in to leave comments.
Login now