##// END OF EJS Templates
Add Condor bindings for IPython.parallel
James Booth -
Show More
@@ -488,6 +488,7 b' class IPClusterStart(IPClusterEngines):'
488 PBS : use PBS (qsub) to submit the controller to a batch queue
488 PBS : use PBS (qsub) to submit the controller to a batch queue
489 SGE : use SGE (qsub) to submit the controller to a batch queue
489 SGE : use SGE (qsub) to submit the controller to a batch queue
490 LSF : use LSF (bsub) to submit the controller to a batch queue
490 LSF : use LSF (bsub) to submit the controller to a batch queue
491 Condor: use HTCondor to submit the controller to a batch queue
491 SSH : use SSH to start the controller
492 SSH : use SSH to start the controller
492 WindowsHPC : use Windows HPC
493 WindowsHPC : use Windows HPC
493
494
@@ -83,6 +83,24 b' ipengine_cmd_argv = [sys.executable, "-c", cmd % "ipengineapp"]'
83
83
84 ipcontroller_cmd_argv = [sys.executable, "-c", cmd % "ipcontrollerapp"]
84 ipcontroller_cmd_argv = [sys.executable, "-c", cmd % "ipcontrollerapp"]
85
85
86 # HTCondor frustratingly destroys sys.executable when launching remote processes
87 # thus Python will default back to the system module search paths which is
88 # ridiculously fragile (not to mention destructive for virutalenvs).
89 # however, if we use the ip{cluster, engine, controller} scripts as our
90 # executable we circumvent this - the mechanism of shebanged scripts means that
91 # the python binary will be launched with argv[0] set correctly.
92 # This does mean that for HTCondor we require that:
93 # a. The python interpreter you are using is in a folder next to the ipengine,
94 # ipcluster and ipcontroller scripts
95 # b. I have no idea what the consequences are for Windows.
96 bin_dir = os.path.dirname(sys.executable)
97
98 condor_ipcluster_cmd_argv = os.path.join(bin_dir, 'ipcluster')
99
100 condor_ipengine_cmd_argv = os.path.join(bin_dir, 'ipengine')
101
102 condor_ipcontroller_cmd_argv = os.path.join(bin_dir, 'ipcontroller')
103
86 #-----------------------------------------------------------------------------
104 #-----------------------------------------------------------------------------
87 # Base launchers and errors
105 # Base launchers and errors
88 #-----------------------------------------------------------------------------
106 #-----------------------------------------------------------------------------
@@ -1047,6 +1065,7 b' class BatchSystemLauncher(BaseLauncher):'
1047 batch_file = Unicode(u'')
1065 batch_file = Unicode(u'')
1048 # the format dict used with batch_template:
1066 # the format dict used with batch_template:
1049 context = Dict()
1067 context = Dict()
1068
1050 def _context_default(self):
1069 def _context_default(self):
1051 """load the default context with the default values for the basic keys
1070 """load the default context with the default values for the basic keys
1052
1071
@@ -1058,7 +1077,6 b' class BatchSystemLauncher(BaseLauncher):'
1058 # the Formatter instance for rendering the templates:
1077 # the Formatter instance for rendering the templates:
1059 formatter = Instance(EvalFormatter, (), {})
1078 formatter = Instance(EvalFormatter, (), {})
1060
1079
1061
1062 def find_args(self):
1080 def find_args(self):
1063 return self.submit_command + [self.batch_file]
1081 return self.submit_command + [self.batch_file]
1064
1082
@@ -1090,28 +1108,34 b' class BatchSystemLauncher(BaseLauncher):'
1090 if not self.batch_template:
1108 if not self.batch_template:
1091 # third (last) priority is default_template
1109 # third (last) priority is default_template
1092 self.batch_template = self.default_template
1110 self.batch_template = self.default_template
1093
1094 # add jobarray or queue lines to user-specified template
1111 # add jobarray or queue lines to user-specified template
1095 # note that this is *only* when user did not specify a template.
1112 # note that this is *only* when user did not specify a template.
1096 # print self.job_array_regexp.search(self.batch_template)
1113 self._insert_queue_in_script()
1097 if not self.job_array_regexp.search(self.batch_template):
1114 self._insert_job_array_in_script()
1098 self.log.debug("adding job array settings to batch script")
1099 firstline, rest = self.batch_template.split('\n',1)
1100 self.batch_template = u'\n'.join([firstline, self.job_array_template, rest])
1101
1102 # print self.queue_regexp.search(self.batch_template)
1103 if self.queue and not self.queue_regexp.search(self.batch_template):
1104 self.log.debug("adding PBS queue settings to batch script")
1105 firstline, rest = self.batch_template.split('\n',1)
1106 self.batch_template = u'\n'.join([firstline, self.queue_template, rest])
1107
1108 script_as_string = self.formatter.format(self.batch_template, **self.context)
1115 script_as_string = self.formatter.format(self.batch_template, **self.context)
1109 self.log.debug('Writing batch script: %s', self.batch_file)
1116 self.log.debug('Writing batch script: %s', self.batch_file)
1110
1111 with open(self.batch_file, 'w') as f:
1117 with open(self.batch_file, 'w') as f:
1112 f.write(script_as_string)
1118 f.write(script_as_string)
1113 os.chmod(self.batch_file, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
1119 os.chmod(self.batch_file, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
1114
1120
1121 def _insert_queue_in_script(self):
1122 """Inserts a queue if required into the batch script.
1123 """
1124 print self.queue_regexp.search(self.batch_template)
1125 if self.queue and not self.queue_regexp.search(self.batch_template):
1126 self.log.debug("adding PBS queue settings to batch script")
1127 firstline, rest = self.batch_template.split('\n',1)
1128 self.batch_template = u'\n'.join([firstline, self.queue_template, rest])
1129
1130 def _insert_job_array_in_script(self):
1131 """Inserts a job array if required into the batch script.
1132 """
1133 print self.job_array_regexp.search(self.batch_template)
1134 if not self.job_array_regexp.search(self.batch_template):
1135 self.log.debug("adding job array settings to batch script")
1136 firstline, rest = self.batch_template.split('\n',1)
1137 self.batch_template = u'\n'.join([firstline, self.job_array_template, rest])
1138
1115 def start(self, n):
1139 def start(self, n):
1116 """Start n copies of the process using a batch system."""
1140 """Start n copies of the process using a batch system."""
1117 self.log.debug("Starting %s: %r", self.__class__.__name__, self.args)
1141 self.log.debug("Starting %s: %r", self.__class__.__name__, self.args)
@@ -1180,6 +1204,7 b' class PBSEngineSetLauncher(PBSLauncher, BatchClusterAppMixin):'
1180 """Start n engines by profile or profile_dir."""
1204 """Start n engines by profile or profile_dir."""
1181 return super(PBSEngineSetLauncher, self).start(n)
1205 return super(PBSEngineSetLauncher, self).start(n)
1182
1206
1207
1183 #SGE is very similar to PBS
1208 #SGE is very similar to PBS
1184
1209
1185 class SGELauncher(PBSLauncher):
1210 class SGELauncher(PBSLauncher):
@@ -1189,6 +1214,7 b' class SGELauncher(PBSLauncher):'
1189 queue_regexp = CRegExp('#\$\W+-q\W+\$?\w+')
1214 queue_regexp = CRegExp('#\$\W+-q\W+\$?\w+')
1190 queue_template = Unicode('#$ -q {queue}')
1215 queue_template = Unicode('#$ -q {queue}')
1191
1216
1217
1192 class SGEControllerLauncher(SGELauncher, BatchClusterAppMixin):
1218 class SGEControllerLauncher(SGELauncher, BatchClusterAppMixin):
1193 """Launch a controller using SGE."""
1219 """Launch a controller using SGE."""
1194
1220
@@ -1204,6 +1230,7 b' class SGEControllerLauncher(SGELauncher, BatchClusterAppMixin):'
1204 """Start the controller by profile or profile_dir."""
1230 """Start the controller by profile or profile_dir."""
1205 return super(SGEControllerLauncher, self).start(1)
1231 return super(SGEControllerLauncher, self).start(1)
1206
1232
1233
1207 class SGEEngineSetLauncher(SGELauncher, BatchClusterAppMixin):
1234 class SGEEngineSetLauncher(SGELauncher, BatchClusterAppMixin):
1208 """Launch Engines with SGE"""
1235 """Launch Engines with SGE"""
1209 batch_file_name = Unicode(u'sge_engines', config=True,
1236 batch_file_name = Unicode(u'sge_engines', config=True,
@@ -1288,6 +1315,76 b' class LSFEngineSetLauncher(LSFLauncher, BatchClusterAppMixin):'
1288 return super(LSFEngineSetLauncher, self).start(n)
1315 return super(LSFEngineSetLauncher, self).start(n)
1289
1316
1290
1317
1318 # Condor Requires that we launch the ipengine/ipcontroller scripts rather
1319 # that the python instance but otherwise is very similar to PBS
1320
1321 class CondorLauncher(BatchSystemLauncher):
1322 """A BatchSystemLauncher subclass for Condor."""
1323
1324 submit_command = List(['condor_submit', '-verbose'], config=True,
1325 help="The Condor submit command ['condor_submit']")
1326 delete_command = List(['condor_rm'], config=True,
1327 help="The Condor delete command ['condor_rm']")
1328 job_id_regexp = CRegExp(r'\d+', config=True,
1329 help="Regular expression for identifying the job ID [r'\d+']")
1330
1331 job_array_regexp = CRegExp('queue\W+\$')
1332 job_array_template = Unicode('queue {n}')
1333 # template for the submission of multiple jobs
1334 queue_regexp = CRegExp('#PBS\W+-q\W+\$?\w+')
1335 # regex to find a queue if the user has specified a template
1336 queue_template = Unicode('#PBS -q {queue}')
1337 # the queue we wish to submit to. Need to know the Condor eqiv (eg ibug cluster
1338 # or general?)
1339
1340 def _insert_job_array_in_script(self):
1341 """Inserts a job array if required into the batch script.
1342 """
1343 print self.job_array_regexp.search(self.batch_template)
1344 #Condor requires that the job array goes at the bottom of the
1345 #script
1346 if not self.job_array_regexp.search(self.batch_template):
1347 self.log.debug("adding job array settings to batch script")
1348 self.batch_template = '\n'.join([self.batch_template,
1349 self.job_array_template])
1350
1351
1352 class CondorControllerLauncher(CondorLauncher, BatchClusterAppMixin):
1353 """Launch a controller using Condor."""
1354
1355 batch_file_name = Unicode(u'condor_controller', config=True,
1356 help="batch file name for the controller job.")
1357 default_template = Unicode(r"""
1358 universe = vanilla
1359 executable = %s
1360 # by default we expect a shared file system
1361 transfer_executable = False
1362 arguments = --log-to-file '--profile-dir={profile_dir}' --cluster-id='{cluster_id}'
1363 """ % condor_ipcontroller_cmd_argv)
1364
1365 def start(self):
1366 """Start the controller by profile or profile_dir."""
1367 return super(CondorControllerLauncher, self).start(1)
1368
1369
1370 class CondorEngineSetLauncher(CondorLauncher, BatchClusterAppMixin):
1371 """Launch Engines using Condor"""
1372 batch_file_name = Unicode(u'condor_engines', config=True,
1373 help="batch file name for the engine(s) job.")
1374 default_template = Unicode("""
1375 universe = vanilla
1376 executable = %s
1377 # by default we expect a shared file system
1378 transfer_executable = False
1379 arguments = "--log-to-file '--profile-dir={profile_dir}' '--cluster-id={cluster_id}'"
1380
1381 """ % condor_ipengine_cmd_argv)
1382
1383 def start(self, n):
1384 """Start n engines by profile or profile_dir."""
1385 return super(CondorEngineSetLauncher, self).start(n)
1386
1387
1291 #-----------------------------------------------------------------------------
1388 #-----------------------------------------------------------------------------
1292 # A launcher for ipcluster itself!
1389 # A launcher for ipcluster itself!
1293 #-----------------------------------------------------------------------------
1390 #-----------------------------------------------------------------------------
@@ -1354,6 +1451,10 b' lsf_launchers = ['
1354 LSFControllerLauncher,
1451 LSFControllerLauncher,
1355 LSFEngineSetLauncher,
1452 LSFEngineSetLauncher,
1356 ]
1453 ]
1454 condor_launchers = [
1455 CondorLauncher,
1456 CondorControllerLauncher,
1457 CondorEngineSetLauncher,
1458 ]
1357 all_launchers = local_launchers + mpi_launchers + ssh_launchers + winhpc_launchers\
1459 all_launchers = local_launchers + mpi_launchers + ssh_launchers + winhpc_launchers\
1358 + pbs_launchers + sge_launchers + lsf_launchers
1460 + pbs_launchers + sge_launchers + lsf_launchers + condor_launchers
1359
General Comments 0
You need to be logged in to leave comments. Login now