##// END OF EJS Templates
reorganize Factory classes to follow relocation of Session object
MinRK -
Show More
@@ -1,521 +1,521 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 # encoding: utf-8
2 # encoding: utf-8
3 """
3 """
4 The ipcluster application.
4 The ipcluster application.
5 """
5 """
6
6
7 #-----------------------------------------------------------------------------
7 #-----------------------------------------------------------------------------
8 # Copyright (C) 2008-2009 The IPython Development Team
8 # Copyright (C) 2008-2009 The IPython Development Team
9 #
9 #
10 # Distributed under the terms of the BSD License. The full license is in
10 # Distributed under the terms of the BSD License. The full license is in
11 # the file COPYING, distributed as part of this software.
11 # the file COPYING, distributed as part of this software.
12 #-----------------------------------------------------------------------------
12 #-----------------------------------------------------------------------------
13
13
14 #-----------------------------------------------------------------------------
14 #-----------------------------------------------------------------------------
15 # Imports
15 # Imports
16 #-----------------------------------------------------------------------------
16 #-----------------------------------------------------------------------------
17
17
18 import errno
18 import errno
19 import logging
19 import logging
20 import os
20 import os
21 import re
21 import re
22 import signal
22 import signal
23
23
24 from subprocess import check_call, CalledProcessError, PIPE
24 from subprocess import check_call, CalledProcessError, PIPE
25 import zmq
25 import zmq
26 from zmq.eventloop import ioloop
26 from zmq.eventloop import ioloop
27
27
28 from IPython.config.application import Application, boolean_flag
28 from IPython.config.application import Application, boolean_flag
29 from IPython.config.loader import Config
29 from IPython.config.loader import Config
30 from IPython.core.newapplication import BaseIPythonApplication, ProfileDir
30 from IPython.core.newapplication import BaseIPythonApplication, ProfileDir
31 from IPython.utils.importstring import import_item
31 from IPython.utils.importstring import import_item
32 from IPython.utils.traitlets import Int, Unicode, Bool, CFloat, Dict, List
32 from IPython.utils.traitlets import Int, Unicode, Bool, CFloat, Dict, List
33
33
34 from IPython.parallel.apps.baseapp import (
34 from IPython.parallel.apps.baseapp import (
35 BaseParallelApplication,
35 BaseParallelApplication,
36 PIDFileError,
36 PIDFileError,
37 base_flags, base_aliases
37 base_flags, base_aliases
38 )
38 )
39
39
40
40
41 #-----------------------------------------------------------------------------
41 #-----------------------------------------------------------------------------
42 # Module level variables
42 # Module level variables
43 #-----------------------------------------------------------------------------
43 #-----------------------------------------------------------------------------
44
44
45
45
46 default_config_file_name = u'ipcluster_config.py'
46 default_config_file_name = u'ipcluster_config.py'
47
47
48
48
49 _description = """Start an IPython cluster for parallel computing.
49 _description = """Start an IPython cluster for parallel computing.
50
50
51 An IPython cluster consists of 1 controller and 1 or more engines.
51 An IPython cluster consists of 1 controller and 1 or more engines.
52 This command automates the startup of these processes using a wide
52 This command automates the startup of these processes using a wide
53 range of startup methods (SSH, local processes, PBS, mpiexec,
53 range of startup methods (SSH, local processes, PBS, mpiexec,
54 Windows HPC Server 2008). To start a cluster with 4 engines on your
54 Windows HPC Server 2008). To start a cluster with 4 engines on your
55 local host simply do 'ipcluster start n=4'. For more complex usage
55 local host simply do 'ipcluster start n=4'. For more complex usage
56 you will typically do 'ipcluster create profile=mycluster', then edit
56 you will typically do 'ipcluster create profile=mycluster', then edit
57 configuration files, followed by 'ipcluster start profile=mycluster n=4'.
57 configuration files, followed by 'ipcluster start profile=mycluster n=4'.
58 """
58 """
59
59
60
60
61 # Exit codes for ipcluster
61 # Exit codes for ipcluster
62
62
63 # This will be the exit code if the ipcluster appears to be running because
63 # This will be the exit code if the ipcluster appears to be running because
64 # a .pid file exists
64 # a .pid file exists
65 ALREADY_STARTED = 10
65 ALREADY_STARTED = 10
66
66
67
67
68 # This will be the exit code if ipcluster stop is run, but there is not .pid
68 # This will be the exit code if ipcluster stop is run, but there is not .pid
69 # file to be found.
69 # file to be found.
70 ALREADY_STOPPED = 11
70 ALREADY_STOPPED = 11
71
71
72 # This will be the exit code if ipcluster engines is run, but there is not .pid
72 # This will be the exit code if ipcluster engines is run, but there is not .pid
73 # file to be found.
73 # file to be found.
74 NO_CLUSTER = 12
74 NO_CLUSTER = 12
75
75
76
76
77 #-----------------------------------------------------------------------------
77 #-----------------------------------------------------------------------------
78 # Main application
78 # Main application
79 #-----------------------------------------------------------------------------
79 #-----------------------------------------------------------------------------
80 start_help = """Start an IPython cluster for parallel computing
80 start_help = """Start an IPython cluster for parallel computing
81
81
82 Start an ipython cluster by its profile name or cluster
82 Start an ipython cluster by its profile name or cluster
83 directory. Cluster directories contain configuration, log and
83 directory. Cluster directories contain configuration, log and
84 security related files and are named using the convention
84 security related files and are named using the convention
85 'cluster_<profile>' and should be creating using the 'start'
85 'cluster_<profile>' and should be creating using the 'start'
86 subcommand of 'ipcluster'. If your cluster directory is in
86 subcommand of 'ipcluster'. If your cluster directory is in
87 the cwd or the ipython directory, you can simply refer to it
87 the cwd or the ipython directory, you can simply refer to it
88 using its profile name, 'ipcluster start n=4 profile=<profile>`,
88 using its profile name, 'ipcluster start n=4 profile=<profile>`,
89 otherwise use the 'profile_dir' option.
89 otherwise use the 'profile_dir' option.
90 """
90 """
91 stop_help = """Stop a running IPython cluster
91 stop_help = """Stop a running IPython cluster
92
92
93 Stop a running ipython cluster by its profile name or cluster
93 Stop a running ipython cluster by its profile name or cluster
94 directory. Cluster directories are named using the convention
94 directory. Cluster directories are named using the convention
95 'cluster_<profile>'. If your cluster directory is in
95 'cluster_<profile>'. If your cluster directory is in
96 the cwd or the ipython directory, you can simply refer to it
96 the cwd or the ipython directory, you can simply refer to it
97 using its profile name, 'ipcluster stop profile=<profile>`, otherwise
97 using its profile name, 'ipcluster stop profile=<profile>`, otherwise
98 use the 'profile_dir' option.
98 use the 'profile_dir' option.
99 """
99 """
100 engines_help = """Start engines connected to an existing IPython cluster
100 engines_help = """Start engines connected to an existing IPython cluster
101
101
102 Start one or more engines to connect to an existing Cluster
102 Start one or more engines to connect to an existing Cluster
103 by profile name or cluster directory.
103 by profile name or cluster directory.
104 Cluster directories contain configuration, log and
104 Cluster directories contain configuration, log and
105 security related files and are named using the convention
105 security related files and are named using the convention
106 'cluster_<profile>' and should be creating using the 'start'
106 'cluster_<profile>' and should be creating using the 'start'
107 subcommand of 'ipcluster'. If your cluster directory is in
107 subcommand of 'ipcluster'. If your cluster directory is in
108 the cwd or the ipython directory, you can simply refer to it
108 the cwd or the ipython directory, you can simply refer to it
109 using its profile name, 'ipcluster engines n=4 profile=<profile>`,
109 using its profile name, 'ipcluster engines n=4 profile=<profile>`,
110 otherwise use the 'profile_dir' option.
110 otherwise use the 'profile_dir' option.
111 """
111 """
112 create_help = """Create an ipcluster profile by name
112 create_help = """Create an ipcluster profile by name
113
113
114 Create an ipython cluster directory by its profile name or
114 Create an ipython cluster directory by its profile name or
115 cluster directory path. Cluster directories contain
115 cluster directory path. Cluster directories contain
116 configuration, log and security related files and are named
116 configuration, log and security related files and are named
117 using the convention 'cluster_<profile>'. By default they are
117 using the convention 'cluster_<profile>'. By default they are
118 located in your ipython directory. Once created, you will
118 located in your ipython directory. Once created, you will
119 probably need to edit the configuration files in the cluster
119 probably need to edit the configuration files in the cluster
120 directory to configure your cluster. Most users will create a
120 directory to configure your cluster. Most users will create a
121 cluster directory by profile name,
121 cluster directory by profile name,
122 `ipcluster create profile=mycluster`, which will put the directory
122 `ipcluster create profile=mycluster`, which will put the directory
123 in `<ipython_dir>/cluster_mycluster`.
123 in `<ipython_dir>/cluster_mycluster`.
124 """
124 """
125 list_help = """List available cluster profiles
125 list_help = """List available cluster profiles
126
126
127 List all available clusters, by cluster directory, that can
127 List all available clusters, by cluster directory, that can
128 be found in the current working directly or in the ipython
128 be found in the current working directly or in the ipython
129 directory. Cluster directories are named using the convention
129 directory. Cluster directories are named using the convention
130 'cluster_<profile>'.
130 'cluster_<profile>'.
131 """
131 """
132
132
133
133
134 class IPClusterList(BaseIPythonApplication):
134 class IPClusterList(BaseIPythonApplication):
135 name = u'ipcluster-list'
135 name = u'ipcluster-list'
136 description = list_help
136 description = list_help
137
137
138 # empty aliases
138 # empty aliases
139 aliases=Dict()
139 aliases=Dict()
140 flags = Dict(base_flags)
140 flags = Dict(base_flags)
141
141
142 def _log_level_default(self):
142 def _log_level_default(self):
143 return 20
143 return 20
144
144
145 def list_profile_dirs(self):
145 def list_profile_dirs(self):
146 # Find the search paths
146 # Find the search paths
147 profile_dir_paths = os.environ.get('IPYTHON_PROFILE_PATH','')
147 profile_dir_paths = os.environ.get('IPYTHON_PROFILE_PATH','')
148 if profile_dir_paths:
148 if profile_dir_paths:
149 profile_dir_paths = profile_dir_paths.split(':')
149 profile_dir_paths = profile_dir_paths.split(':')
150 else:
150 else:
151 profile_dir_paths = []
151 profile_dir_paths = []
152
152
153 ipython_dir = self.ipython_dir
153 ipython_dir = self.ipython_dir
154
154
155 paths = [os.getcwd(), ipython_dir] + profile_dir_paths
155 paths = [os.getcwd(), ipython_dir] + profile_dir_paths
156 paths = list(set(paths))
156 paths = list(set(paths))
157
157
158 self.log.info('Searching for cluster profiles in paths: %r' % paths)
158 self.log.info('Searching for cluster profiles in paths: %r' % paths)
159 for path in paths:
159 for path in paths:
160 files = os.listdir(path)
160 files = os.listdir(path)
161 for f in files:
161 for f in files:
162 full_path = os.path.join(path, f)
162 full_path = os.path.join(path, f)
163 if os.path.isdir(full_path) and f.startswith('profile_') and \
163 if os.path.isdir(full_path) and f.startswith('profile_') and \
164 os.path.isfile(os.path.join(full_path, 'ipcontroller_config.py')):
164 os.path.isfile(os.path.join(full_path, 'ipcontroller_config.py')):
165 profile = f.split('_')[-1]
165 profile = f.split('_')[-1]
166 start_cmd = 'ipcluster start profile=%s n=4' % profile
166 start_cmd = 'ipcluster start profile=%s n=4' % profile
167 print start_cmd + " ==> " + full_path
167 print start_cmd + " ==> " + full_path
168
168
169 def start(self):
169 def start(self):
170 self.list_profile_dirs()
170 self.list_profile_dirs()
171
171
172
172
173 # `ipcluster create` will be deprecated when `ipython profile create` or equivalent exists
173 # `ipcluster create` will be deprecated when `ipython profile create` or equivalent exists
174
174
175 create_flags = {}
175 create_flags = {}
176 create_flags.update(base_flags)
176 create_flags.update(base_flags)
177 create_flags.update(boolean_flag('reset', 'IPClusterCreate.overwrite',
177 create_flags.update(boolean_flag('reset', 'IPClusterCreate.overwrite',
178 "reset config files to defaults", "leave existing config files"))
178 "reset config files to defaults", "leave existing config files"))
179
179
180 class IPClusterCreate(BaseParallelApplication):
180 class IPClusterCreate(BaseParallelApplication):
181 name = u'ipcluster-create'
181 name = u'ipcluster-create'
182 description = create_help
182 description = create_help
183 auto_create = Bool(True)
183 auto_create = Bool(True)
184 config_file_name = Unicode(default_config_file_name)
184 config_file_name = Unicode(default_config_file_name)
185
185
186 flags = Dict(create_flags)
186 flags = Dict(create_flags)
187
187
188 aliases = Dict(dict(profile='BaseIPythonApplication.profile'))
188 aliases = Dict(dict(profile='BaseIPythonApplication.profile'))
189
189
190 classes = [ProfileDir]
190 classes = [ProfileDir]
191
191
192
192
193 stop_aliases = dict(
193 stop_aliases = dict(
194 signal='IPClusterStop.signal',
194 signal='IPClusterStop.signal',
195 profile='BaseIPythonApplication.profile',
195 profile='BaseIPythonApplication.profile',
196 profile_dir='ProfileDir.location',
196 profile_dir='ProfileDir.location',
197 )
197 )
198
198
199 class IPClusterStop(BaseParallelApplication):
199 class IPClusterStop(BaseParallelApplication):
200 name = u'ipcluster'
200 name = u'ipcluster'
201 description = stop_help
201 description = stop_help
202 config_file_name = Unicode(default_config_file_name)
202 config_file_name = Unicode(default_config_file_name)
203
203
204 signal = Int(signal.SIGINT, config=True,
204 signal = Int(signal.SIGINT, config=True,
205 help="signal to use for stopping processes.")
205 help="signal to use for stopping processes.")
206
206
207 aliases = Dict(stop_aliases)
207 aliases = Dict(stop_aliases)
208
208
209 def start(self):
209 def start(self):
210 """Start the app for the stop subcommand."""
210 """Start the app for the stop subcommand."""
211 try:
211 try:
212 pid = self.get_pid_from_file()
212 pid = self.get_pid_from_file()
213 except PIDFileError:
213 except PIDFileError:
214 self.log.critical(
214 self.log.critical(
215 'Could not read pid file, cluster is probably not running.'
215 'Could not read pid file, cluster is probably not running.'
216 )
216 )
217 # Here I exit with a unusual exit status that other processes
217 # Here I exit with a unusual exit status that other processes
218 # can watch for to learn how I existed.
218 # can watch for to learn how I existed.
219 self.remove_pid_file()
219 self.remove_pid_file()
220 self.exit(ALREADY_STOPPED)
220 self.exit(ALREADY_STOPPED)
221
221
222 if not self.check_pid(pid):
222 if not self.check_pid(pid):
223 self.log.critical(
223 self.log.critical(
224 'Cluster [pid=%r] is not running.' % pid
224 'Cluster [pid=%r] is not running.' % pid
225 )
225 )
226 self.remove_pid_file()
226 self.remove_pid_file()
227 # Here I exit with a unusual exit status that other processes
227 # Here I exit with a unusual exit status that other processes
228 # can watch for to learn how I existed.
228 # can watch for to learn how I existed.
229 self.exit(ALREADY_STOPPED)
229 self.exit(ALREADY_STOPPED)
230
230
231 elif os.name=='posix':
231 elif os.name=='posix':
232 sig = self.signal
232 sig = self.signal
233 self.log.info(
233 self.log.info(
234 "Stopping cluster [pid=%r] with [signal=%r]" % (pid, sig)
234 "Stopping cluster [pid=%r] with [signal=%r]" % (pid, sig)
235 )
235 )
236 try:
236 try:
237 os.kill(pid, sig)
237 os.kill(pid, sig)
238 except OSError:
238 except OSError:
239 self.log.error("Stopping cluster failed, assuming already dead.",
239 self.log.error("Stopping cluster failed, assuming already dead.",
240 exc_info=True)
240 exc_info=True)
241 self.remove_pid_file()
241 self.remove_pid_file()
242 elif os.name=='nt':
242 elif os.name=='nt':
243 try:
243 try:
244 # kill the whole tree
244 # kill the whole tree
245 p = check_call(['taskkill', '-pid', str(pid), '-t', '-f'], stdout=PIPE,stderr=PIPE)
245 p = check_call(['taskkill', '-pid', str(pid), '-t', '-f'], stdout=PIPE,stderr=PIPE)
246 except (CalledProcessError, OSError):
246 except (CalledProcessError, OSError):
247 self.log.error("Stopping cluster failed, assuming already dead.",
247 self.log.error("Stopping cluster failed, assuming already dead.",
248 exc_info=True)
248 exc_info=True)
249 self.remove_pid_file()
249 self.remove_pid_file()
250
250
251 engine_aliases = {}
251 engine_aliases = {}
252 engine_aliases.update(base_aliases)
252 engine_aliases.update(base_aliases)
253 engine_aliases.update(dict(
253 engine_aliases.update(dict(
254 n='IPClusterEngines.n',
254 n='IPClusterEngines.n',
255 elauncher = 'IPClusterEngines.engine_launcher_class',
255 elauncher = 'IPClusterEngines.engine_launcher_class',
256 ))
256 ))
257 class IPClusterEngines(BaseParallelApplication):
257 class IPClusterEngines(BaseParallelApplication):
258
258
259 name = u'ipcluster'
259 name = u'ipcluster'
260 description = engines_help
260 description = engines_help
261 usage = None
261 usage = None
262 config_file_name = Unicode(default_config_file_name)
262 config_file_name = Unicode(default_config_file_name)
263 default_log_level = logging.INFO
263 default_log_level = logging.INFO
264 classes = List()
264 classes = List()
265 def _classes_default(self):
265 def _classes_default(self):
266 from IPython.parallel.apps import launcher
266 from IPython.parallel.apps import launcher
267 launchers = launcher.all_launchers
267 launchers = launcher.all_launchers
268 eslaunchers = [ l for l in launchers if 'EngineSet' in l.__name__]
268 eslaunchers = [ l for l in launchers if 'EngineSet' in l.__name__]
269 return [ProfileDir]+eslaunchers
269 return [ProfileDir]+eslaunchers
270
270
271 n = Int(2, config=True,
271 n = Int(2, config=True,
272 help="The number of engines to start.")
272 help="The number of engines to start.")
273
273
274 engine_launcher_class = Unicode('LocalEngineSetLauncher',
274 engine_launcher_class = Unicode('LocalEngineSetLauncher',
275 config=True,
275 config=True,
276 help="The class for launching a set of Engines."
276 help="The class for launching a set of Engines."
277 )
277 )
278 daemonize = Bool(False, config=True,
278 daemonize = Bool(False, config=True,
279 help='Daemonize the ipcluster program. This implies --log-to-file')
279 help='Daemonize the ipcluster program. This implies --log-to-file')
280
280
281 def _daemonize_changed(self, name, old, new):
281 def _daemonize_changed(self, name, old, new):
282 if new:
282 if new:
283 self.log_to_file = True
283 self.log_to_file = True
284
284
285 aliases = Dict(engine_aliases)
285 aliases = Dict(engine_aliases)
286 # flags = Dict(flags)
286 # flags = Dict(flags)
287 _stopping = False
287 _stopping = False
288
288
289 def initialize(self, argv=None):
289 def initialize(self, argv=None):
290 super(IPClusterEngines, self).initialize(argv)
290 super(IPClusterEngines, self).initialize(argv)
291 self.init_signal()
291 self.init_signal()
292 self.init_launchers()
292 self.init_launchers()
293
293
294 def init_launchers(self):
294 def init_launchers(self):
295 self.engine_launcher = self.build_launcher(self.engine_launcher_class)
295 self.engine_launcher = self.build_launcher(self.engine_launcher_class)
296 self.engine_launcher.on_stop(lambda r: self.loop.stop())
296 self.engine_launcher.on_stop(lambda r: self.loop.stop())
297
297
298 def init_signal(self):
298 def init_signal(self):
299 # Setup signals
299 # Setup signals
300 signal.signal(signal.SIGINT, self.sigint_handler)
300 signal.signal(signal.SIGINT, self.sigint_handler)
301
301
302 def build_launcher(self, clsname):
302 def build_launcher(self, clsname):
303 """import and instantiate a Launcher based on importstring"""
303 """import and instantiate a Launcher based on importstring"""
304 if '.' not in clsname:
304 if '.' not in clsname:
305 # not a module, presume it's the raw name in apps.launcher
305 # not a module, presume it's the raw name in apps.launcher
306 clsname = 'IPython.parallel.apps.launcher.'+clsname
306 clsname = 'IPython.parallel.apps.launcher.'+clsname
307 # print repr(clsname)
307 # print repr(clsname)
308 klass = import_item(clsname)
308 klass = import_item(clsname)
309
309
310 launcher = klass(
310 launcher = klass(
311 work_dir=self.profile_dir.location, config=self.config, logname=self.log.name
311 work_dir=self.profile_dir.location, config=self.config, log=self.log
312 )
312 )
313 return launcher
313 return launcher
314
314
315 def start_engines(self):
315 def start_engines(self):
316 self.log.info("Starting %i engines"%self.n)
316 self.log.info("Starting %i engines"%self.n)
317 self.engine_launcher.start(
317 self.engine_launcher.start(
318 self.n,
318 self.n,
319 self.profile_dir.location
319 self.profile_dir.location
320 )
320 )
321
321
322 def stop_engines(self):
322 def stop_engines(self):
323 self.log.info("Stopping Engines...")
323 self.log.info("Stopping Engines...")
324 if self.engine_launcher.running:
324 if self.engine_launcher.running:
325 d = self.engine_launcher.stop()
325 d = self.engine_launcher.stop()
326 return d
326 return d
327 else:
327 else:
328 return None
328 return None
329
329
330 def stop_launchers(self, r=None):
330 def stop_launchers(self, r=None):
331 if not self._stopping:
331 if not self._stopping:
332 self._stopping = True
332 self._stopping = True
333 self.log.error("IPython cluster: stopping")
333 self.log.error("IPython cluster: stopping")
334 self.stop_engines()
334 self.stop_engines()
335 # Wait a few seconds to let things shut down.
335 # Wait a few seconds to let things shut down.
336 dc = ioloop.DelayedCallback(self.loop.stop, 4000, self.loop)
336 dc = ioloop.DelayedCallback(self.loop.stop, 4000, self.loop)
337 dc.start()
337 dc.start()
338
338
339 def sigint_handler(self, signum, frame):
339 def sigint_handler(self, signum, frame):
340 self.log.debug("SIGINT received, stopping launchers...")
340 self.log.debug("SIGINT received, stopping launchers...")
341 self.stop_launchers()
341 self.stop_launchers()
342
342
343 def start_logging(self):
343 def start_logging(self):
344 # Remove old log files of the controller and engine
344 # Remove old log files of the controller and engine
345 if self.clean_logs:
345 if self.clean_logs:
346 log_dir = self.profile_dir.log_dir
346 log_dir = self.profile_dir.log_dir
347 for f in os.listdir(log_dir):
347 for f in os.listdir(log_dir):
348 if re.match(r'ip(engine|controller)z-\d+\.(log|err|out)',f):
348 if re.match(r'ip(engine|controller)z-\d+\.(log|err|out)',f):
349 os.remove(os.path.join(log_dir, f))
349 os.remove(os.path.join(log_dir, f))
350 # This will remove old log files for ipcluster itself
350 # This will remove old log files for ipcluster itself
351 # super(IPBaseParallelApplication, self).start_logging()
351 # super(IPBaseParallelApplication, self).start_logging()
352
352
353 def start(self):
353 def start(self):
354 """Start the app for the engines subcommand."""
354 """Start the app for the engines subcommand."""
355 self.log.info("IPython cluster: started")
355 self.log.info("IPython cluster: started")
356 # First see if the cluster is already running
356 # First see if the cluster is already running
357
357
358 # Now log and daemonize
358 # Now log and daemonize
359 self.log.info(
359 self.log.info(
360 'Starting engines with [daemon=%r]' % self.daemonize
360 'Starting engines with [daemon=%r]' % self.daemonize
361 )
361 )
362 # TODO: Get daemonize working on Windows or as a Windows Server.
362 # TODO: Get daemonize working on Windows or as a Windows Server.
363 if self.daemonize:
363 if self.daemonize:
364 if os.name=='posix':
364 if os.name=='posix':
365 from twisted.scripts._twistd_unix import daemonize
365 from twisted.scripts._twistd_unix import daemonize
366 daemonize()
366 daemonize()
367
367
368 dc = ioloop.DelayedCallback(self.start_engines, 0, self.loop)
368 dc = ioloop.DelayedCallback(self.start_engines, 0, self.loop)
369 dc.start()
369 dc.start()
370 # Now write the new pid file AFTER our new forked pid is active.
370 # Now write the new pid file AFTER our new forked pid is active.
371 # self.write_pid_file()
371 # self.write_pid_file()
372 try:
372 try:
373 self.loop.start()
373 self.loop.start()
374 except KeyboardInterrupt:
374 except KeyboardInterrupt:
375 pass
375 pass
376 except zmq.ZMQError as e:
376 except zmq.ZMQError as e:
377 if e.errno == errno.EINTR:
377 if e.errno == errno.EINTR:
378 pass
378 pass
379 else:
379 else:
380 raise
380 raise
381
381
382 start_aliases = {}
382 start_aliases = {}
383 start_aliases.update(engine_aliases)
383 start_aliases.update(engine_aliases)
384 start_aliases.update(dict(
384 start_aliases.update(dict(
385 delay='IPClusterStart.delay',
385 delay='IPClusterStart.delay',
386 clean_logs='IPClusterStart.clean_logs',
386 clean_logs='IPClusterStart.clean_logs',
387 ))
387 ))
388
388
389 class IPClusterStart(IPClusterEngines):
389 class IPClusterStart(IPClusterEngines):
390
390
391 name = u'ipcluster'
391 name = u'ipcluster'
392 description = start_help
392 description = start_help
393 default_log_level = logging.INFO
393 default_log_level = logging.INFO
394 auto_create = Bool(True, config=True,
394 auto_create = Bool(True, config=True,
395 help="whether to create the profile_dir if it doesn't exist")
395 help="whether to create the profile_dir if it doesn't exist")
396 classes = List()
396 classes = List()
397 def _classes_default(self,):
397 def _classes_default(self,):
398 from IPython.parallel.apps import launcher
398 from IPython.parallel.apps import launcher
399 return [ProfileDir]+launcher.all_launchers
399 return [ProfileDir]+launcher.all_launchers
400
400
401 clean_logs = Bool(True, config=True,
401 clean_logs = Bool(True, config=True,
402 help="whether to cleanup old logs before starting")
402 help="whether to cleanup old logs before starting")
403
403
404 delay = CFloat(1., config=True,
404 delay = CFloat(1., config=True,
405 help="delay (in s) between starting the controller and the engines")
405 help="delay (in s) between starting the controller and the engines")
406
406
407 controller_launcher_class = Unicode('LocalControllerLauncher',
407 controller_launcher_class = Unicode('LocalControllerLauncher',
408 config=True,
408 config=True,
409 help="The class for launching a Controller."
409 help="The class for launching a Controller."
410 )
410 )
411 reset = Bool(False, config=True,
411 reset = Bool(False, config=True,
412 help="Whether to reset config files as part of '--create'."
412 help="Whether to reset config files as part of '--create'."
413 )
413 )
414
414
415 # flags = Dict(flags)
415 # flags = Dict(flags)
416 aliases = Dict(start_aliases)
416 aliases = Dict(start_aliases)
417
417
418 def init_launchers(self):
418 def init_launchers(self):
419 self.controller_launcher = self.build_launcher(self.controller_launcher_class)
419 self.controller_launcher = self.build_launcher(self.controller_launcher_class)
420 self.engine_launcher = self.build_launcher(self.engine_launcher_class)
420 self.engine_launcher = self.build_launcher(self.engine_launcher_class)
421 self.controller_launcher.on_stop(self.stop_launchers)
421 self.controller_launcher.on_stop(self.stop_launchers)
422
422
423 def start_controller(self):
423 def start_controller(self):
424 self.controller_launcher.start(
424 self.controller_launcher.start(
425 self.profile_dir.location
425 self.profile_dir.location
426 )
426 )
427
427
428 def stop_controller(self):
428 def stop_controller(self):
429 # self.log.info("In stop_controller")
429 # self.log.info("In stop_controller")
430 if self.controller_launcher and self.controller_launcher.running:
430 if self.controller_launcher and self.controller_launcher.running:
431 return self.controller_launcher.stop()
431 return self.controller_launcher.stop()
432
432
433 def stop_launchers(self, r=None):
433 def stop_launchers(self, r=None):
434 if not self._stopping:
434 if not self._stopping:
435 self.stop_controller()
435 self.stop_controller()
436 super(IPClusterStart, self).stop_launchers()
436 super(IPClusterStart, self).stop_launchers()
437
437
438 def start(self):
438 def start(self):
439 """Start the app for the start subcommand."""
439 """Start the app for the start subcommand."""
440 # First see if the cluster is already running
440 # First see if the cluster is already running
441 try:
441 try:
442 pid = self.get_pid_from_file()
442 pid = self.get_pid_from_file()
443 except PIDFileError:
443 except PIDFileError:
444 pass
444 pass
445 else:
445 else:
446 if self.check_pid(pid):
446 if self.check_pid(pid):
447 self.log.critical(
447 self.log.critical(
448 'Cluster is already running with [pid=%s]. '
448 'Cluster is already running with [pid=%s]. '
449 'use "ipcluster stop" to stop the cluster.' % pid
449 'use "ipcluster stop" to stop the cluster.' % pid
450 )
450 )
451 # Here I exit with a unusual exit status that other processes
451 # Here I exit with a unusual exit status that other processes
452 # can watch for to learn how I existed.
452 # can watch for to learn how I existed.
453 self.exit(ALREADY_STARTED)
453 self.exit(ALREADY_STARTED)
454 else:
454 else:
455 self.remove_pid_file()
455 self.remove_pid_file()
456
456
457
457
458 # Now log and daemonize
458 # Now log and daemonize
459 self.log.info(
459 self.log.info(
460 'Starting ipcluster with [daemon=%r]' % self.daemonize
460 'Starting ipcluster with [daemon=%r]' % self.daemonize
461 )
461 )
462 # TODO: Get daemonize working on Windows or as a Windows Server.
462 # TODO: Get daemonize working on Windows or as a Windows Server.
463 if self.daemonize:
463 if self.daemonize:
464 if os.name=='posix':
464 if os.name=='posix':
465 from twisted.scripts._twistd_unix import daemonize
465 from twisted.scripts._twistd_unix import daemonize
466 daemonize()
466 daemonize()
467
467
468 dc = ioloop.DelayedCallback(self.start_controller, 0, self.loop)
468 dc = ioloop.DelayedCallback(self.start_controller, 0, self.loop)
469 dc.start()
469 dc.start()
470 dc = ioloop.DelayedCallback(self.start_engines, 1000*self.delay, self.loop)
470 dc = ioloop.DelayedCallback(self.start_engines, 1000*self.delay, self.loop)
471 dc.start()
471 dc.start()
472 # Now write the new pid file AFTER our new forked pid is active.
472 # Now write the new pid file AFTER our new forked pid is active.
473 self.write_pid_file()
473 self.write_pid_file()
474 try:
474 try:
475 self.loop.start()
475 self.loop.start()
476 except KeyboardInterrupt:
476 except KeyboardInterrupt:
477 pass
477 pass
478 except zmq.ZMQError as e:
478 except zmq.ZMQError as e:
479 if e.errno == errno.EINTR:
479 if e.errno == errno.EINTR:
480 pass
480 pass
481 else:
481 else:
482 raise
482 raise
483 finally:
483 finally:
484 self.remove_pid_file()
484 self.remove_pid_file()
485
485
486 base='IPython.parallel.apps.ipclusterapp.IPCluster'
486 base='IPython.parallel.apps.ipclusterapp.IPCluster'
487
487
488 class IPBaseParallelApplication(Application):
488 class IPBaseParallelApplication(Application):
489 name = u'ipcluster'
489 name = u'ipcluster'
490 description = _description
490 description = _description
491
491
492 subcommands = {'create' : (base+'Create', create_help),
492 subcommands = {'create' : (base+'Create', create_help),
493 'list' : (base+'List', list_help),
493 'list' : (base+'List', list_help),
494 'start' : (base+'Start', start_help),
494 'start' : (base+'Start', start_help),
495 'stop' : (base+'Stop', stop_help),
495 'stop' : (base+'Stop', stop_help),
496 'engines' : (base+'Engines', engines_help),
496 'engines' : (base+'Engines', engines_help),
497 }
497 }
498
498
499 # no aliases or flags for parent App
499 # no aliases or flags for parent App
500 aliases = Dict()
500 aliases = Dict()
501 flags = Dict()
501 flags = Dict()
502
502
503 def start(self):
503 def start(self):
504 if self.subapp is None:
504 if self.subapp is None:
505 print "No subcommand specified! Must specify one of: %s"%(self.subcommands.keys())
505 print "No subcommand specified! Must specify one of: %s"%(self.subcommands.keys())
506 print
506 print
507 self.print_subcommands()
507 self.print_subcommands()
508 self.exit(1)
508 self.exit(1)
509 else:
509 else:
510 return self.subapp.start()
510 return self.subapp.start()
511
511
512 def launch_new_instance():
512 def launch_new_instance():
513 """Create and run the IPython cluster."""
513 """Create and run the IPython cluster."""
514 app = IPBaseParallelApplication.instance()
514 app = IPBaseParallelApplication.instance()
515 app.initialize()
515 app.initialize()
516 app.start()
516 app.start()
517
517
518
518
519 if __name__ == '__main__':
519 if __name__ == '__main__':
520 launch_new_instance()
520 launch_new_instance()
521
521
@@ -1,96 +1,96 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 # encoding: utf-8
2 # encoding: utf-8
3 """
3 """
4 A simple IPython logger application
4 A simple IPython logger application
5 """
5 """
6
6
7 #-----------------------------------------------------------------------------
7 #-----------------------------------------------------------------------------
8 # Copyright (C) 2011 The IPython Development Team
8 # Copyright (C) 2011 The IPython Development Team
9 #
9 #
10 # Distributed under the terms of the BSD License. The full license is in
10 # Distributed under the terms of the BSD License. The full license is in
11 # the file COPYING, distributed as part of this software.
11 # the file COPYING, distributed as part of this software.
12 #-----------------------------------------------------------------------------
12 #-----------------------------------------------------------------------------
13
13
14 #-----------------------------------------------------------------------------
14 #-----------------------------------------------------------------------------
15 # Imports
15 # Imports
16 #-----------------------------------------------------------------------------
16 #-----------------------------------------------------------------------------
17
17
18 import os
18 import os
19 import sys
19 import sys
20
20
21 import zmq
21 import zmq
22
22
23 from IPython.core.newapplication import ProfileDir
23 from IPython.core.newapplication import ProfileDir
24 from IPython.utils.traitlets import Bool, Dict, Unicode
24 from IPython.utils.traitlets import Bool, Dict, Unicode
25
25
26 from IPython.parallel.apps.baseapp import (
26 from IPython.parallel.apps.baseapp import (
27 BaseParallelApplication,
27 BaseParallelApplication,
28 base_aliases
28 base_aliases
29 )
29 )
30 from IPython.parallel.apps.logwatcher import LogWatcher
30 from IPython.parallel.apps.logwatcher import LogWatcher
31
31
32 #-----------------------------------------------------------------------------
32 #-----------------------------------------------------------------------------
33 # Module level variables
33 # Module level variables
34 #-----------------------------------------------------------------------------
34 #-----------------------------------------------------------------------------
35
35
36 #: The default config file name for this application
36 #: The default config file name for this application
37 default_config_file_name = u'iplogger_config.py'
37 default_config_file_name = u'iplogger_config.py'
38
38
39 _description = """Start an IPython logger for parallel computing.
39 _description = """Start an IPython logger for parallel computing.
40
40
41 IPython controllers and engines (and your own processes) can broadcast log messages
41 IPython controllers and engines (and your own processes) can broadcast log messages
42 by registering a `zmq.log.handlers.PUBHandler` with the `logging` module. The
42 by registering a `zmq.log.handlers.PUBHandler` with the `logging` module. The
43 logger can be configured using command line options or using a cluster
43 logger can be configured using command line options or using a cluster
44 directory. Cluster directories contain config, log and security files and are
44 directory. Cluster directories contain config, log and security files and are
45 usually located in your ipython directory and named as "cluster_<profile>".
45 usually located in your ipython directory and named as "cluster_<profile>".
46 See the `profile` and `profile_dir` options for details.
46 See the `profile` and `profile_dir` options for details.
47 """
47 """
48
48
49
49
50 #-----------------------------------------------------------------------------
50 #-----------------------------------------------------------------------------
51 # Main application
51 # Main application
52 #-----------------------------------------------------------------------------
52 #-----------------------------------------------------------------------------
53 aliases = {}
53 aliases = {}
54 aliases.update(base_aliases)
54 aliases.update(base_aliases)
55 aliases.update(dict(url='LogWatcher.url', topics='LogWatcher.topics'))
55 aliases.update(dict(url='LogWatcher.url', topics='LogWatcher.topics'))
56
56
57 class IPLoggerApp(BaseParallelApplication):
57 class IPLoggerApp(BaseParallelApplication):
58
58
59 name = u'iploggerz'
59 name = u'iploggerz'
60 description = _description
60 description = _description
61 config_file_name = Unicode(default_config_file_name)
61 config_file_name = Unicode(default_config_file_name)
62
62
63 classes = [LogWatcher, ProfileDir]
63 classes = [LogWatcher, ProfileDir]
64 aliases = Dict(aliases)
64 aliases = Dict(aliases)
65
65
66 def initialize(self, argv=None):
66 def initialize(self, argv=None):
67 super(IPLoggerApp, self).initialize(argv)
67 super(IPLoggerApp, self).initialize(argv)
68 self.init_watcher()
68 self.init_watcher()
69
69
70 def init_watcher(self):
70 def init_watcher(self):
71 try:
71 try:
72 self.watcher = LogWatcher(config=self.config, logname=self.log.name)
72 self.watcher = LogWatcher(config=self.config, log=self.log)
73 except:
73 except:
74 self.log.error("Couldn't start the LogWatcher", exc_info=True)
74 self.log.error("Couldn't start the LogWatcher", exc_info=True)
75 self.exit(1)
75 self.exit(1)
76 self.log.info("Listening for log messages on %r"%self.watcher.url)
76 self.log.info("Listening for log messages on %r"%self.watcher.url)
77
77
78
78
79 def start(self):
79 def start(self):
80 self.watcher.start()
80 self.watcher.start()
81 try:
81 try:
82 self.watcher.loop.start()
82 self.watcher.loop.start()
83 except KeyboardInterrupt:
83 except KeyboardInterrupt:
84 self.log.critical("Logging Interrupted, shutting down...\n")
84 self.log.critical("Logging Interrupted, shutting down...\n")
85
85
86
86
87 def launch_new_instance():
87 def launch_new_instance():
88 """Create and run the IPython LogWatcher"""
88 """Create and run the IPython LogWatcher"""
89 app = IPLoggerApp.instance()
89 app = IPLoggerApp.instance()
90 app.initialize()
90 app.initialize()
91 app.start()
91 app.start()
92
92
93
93
94 if __name__ == '__main__':
94 if __name__ == '__main__':
95 launch_new_instance()
95 launch_new_instance()
96
96
@@ -1,1070 +1,1069 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 # encoding: utf-8
2 # encoding: utf-8
3 """
3 """
4 Facilities for launching IPython processes asynchronously.
4 Facilities for launching IPython processes asynchronously.
5 """
5 """
6
6
7 #-----------------------------------------------------------------------------
7 #-----------------------------------------------------------------------------
8 # Copyright (C) 2008-2009 The IPython Development Team
8 # Copyright (C) 2008-2009 The IPython Development Team
9 #
9 #
10 # Distributed under the terms of the BSD License. The full license is in
10 # Distributed under the terms of the BSD License. The full license is in
11 # the file COPYING, distributed as part of this software.
11 # the file COPYING, distributed as part of this software.
12 #-----------------------------------------------------------------------------
12 #-----------------------------------------------------------------------------
13
13
14 #-----------------------------------------------------------------------------
14 #-----------------------------------------------------------------------------
15 # Imports
15 # Imports
16 #-----------------------------------------------------------------------------
16 #-----------------------------------------------------------------------------
17
17
18 import copy
18 import copy
19 import logging
19 import logging
20 import os
20 import os
21 import re
21 import re
22 import stat
22 import stat
23
23
24 # signal imports, handling various platforms, versions
24 # signal imports, handling various platforms, versions
25
25
26 from signal import SIGINT, SIGTERM
26 from signal import SIGINT, SIGTERM
27 try:
27 try:
28 from signal import SIGKILL
28 from signal import SIGKILL
29 except ImportError:
29 except ImportError:
30 # Windows
30 # Windows
31 SIGKILL=SIGTERM
31 SIGKILL=SIGTERM
32
32
33 try:
33 try:
34 # Windows >= 2.7, 3.2
34 # Windows >= 2.7, 3.2
35 from signal import CTRL_C_EVENT as SIGINT
35 from signal import CTRL_C_EVENT as SIGINT
36 except ImportError:
36 except ImportError:
37 pass
37 pass
38
38
39 from subprocess import Popen, PIPE, STDOUT
39 from subprocess import Popen, PIPE, STDOUT
40 try:
40 try:
41 from subprocess import check_output
41 from subprocess import check_output
42 except ImportError:
42 except ImportError:
43 # pre-2.7, define check_output with Popen
43 # pre-2.7, define check_output with Popen
44 def check_output(*args, **kwargs):
44 def check_output(*args, **kwargs):
45 kwargs.update(dict(stdout=PIPE))
45 kwargs.update(dict(stdout=PIPE))
46 p = Popen(*args, **kwargs)
46 p = Popen(*args, **kwargs)
47 out,err = p.communicate()
47 out,err = p.communicate()
48 return out
48 return out
49
49
50 from zmq.eventloop import ioloop
50 from zmq.eventloop import ioloop
51
51
52 # from IPython.config.configurable import Configurable
52 from IPython.config.configurable import Configurable
53 from IPython.utils.text import EvalFormatter
53 from IPython.utils.text import EvalFormatter
54 from IPython.utils.traitlets import Any, Int, List, Unicode, Dict, Instance
54 from IPython.utils.traitlets import Any, Int, List, Unicode, Dict, Instance
55 from IPython.utils.path import get_ipython_module_path
55 from IPython.utils.path import get_ipython_module_path
56 from IPython.utils.process import find_cmd, pycmd2argv, FindCmdError
56 from IPython.utils.process import find_cmd, pycmd2argv, FindCmdError
57
57
58 from IPython.parallel.factory import LoggingFactory
59
60 from .win32support import forward_read_events
58 from .win32support import forward_read_events
61
59
62 from .winhpcjob import IPControllerTask, IPEngineTask, IPControllerJob, IPEngineSetJob
60 from .winhpcjob import IPControllerTask, IPEngineTask, IPControllerJob, IPEngineSetJob
63
61
64 WINDOWS = os.name == 'nt'
62 WINDOWS = os.name == 'nt'
65
63
66 #-----------------------------------------------------------------------------
64 #-----------------------------------------------------------------------------
67 # Paths to the kernel apps
65 # Paths to the kernel apps
68 #-----------------------------------------------------------------------------
66 #-----------------------------------------------------------------------------
69
67
70
68
71 ipcluster_cmd_argv = pycmd2argv(get_ipython_module_path(
69 ipcluster_cmd_argv = pycmd2argv(get_ipython_module_path(
72 'IPython.parallel.apps.ipclusterapp'
70 'IPython.parallel.apps.ipclusterapp'
73 ))
71 ))
74
72
75 ipengine_cmd_argv = pycmd2argv(get_ipython_module_path(
73 ipengine_cmd_argv = pycmd2argv(get_ipython_module_path(
76 'IPython.parallel.apps.ipengineapp'
74 'IPython.parallel.apps.ipengineapp'
77 ))
75 ))
78
76
79 ipcontroller_cmd_argv = pycmd2argv(get_ipython_module_path(
77 ipcontroller_cmd_argv = pycmd2argv(get_ipython_module_path(
80 'IPython.parallel.apps.ipcontrollerapp'
78 'IPython.parallel.apps.ipcontrollerapp'
81 ))
79 ))
82
80
83 #-----------------------------------------------------------------------------
81 #-----------------------------------------------------------------------------
84 # Base launchers and errors
82 # Base launchers and errors
85 #-----------------------------------------------------------------------------
83 #-----------------------------------------------------------------------------
86
84
87
85
88 class LauncherError(Exception):
86 class LauncherError(Exception):
89 pass
87 pass
90
88
91
89
92 class ProcessStateError(LauncherError):
90 class ProcessStateError(LauncherError):
93 pass
91 pass
94
92
95
93
96 class UnknownStatus(LauncherError):
94 class UnknownStatus(LauncherError):
97 pass
95 pass
98
96
99
97
100 class BaseLauncher(LoggingFactory):
98 class BaseLauncher(Configurable):
101 """An asbtraction for starting, stopping and signaling a process."""
99 """An asbtraction for starting, stopping and signaling a process."""
102
100
103 # In all of the launchers, the work_dir is where child processes will be
101 # In all of the launchers, the work_dir is where child processes will be
104 # run. This will usually be the profile_dir, but may not be. any work_dir
102 # run. This will usually be the profile_dir, but may not be. any work_dir
105 # passed into the __init__ method will override the config value.
103 # passed into the __init__ method will override the config value.
106 # This should not be used to set the work_dir for the actual engine
104 # This should not be used to set the work_dir for the actual engine
107 # and controller. Instead, use their own config files or the
105 # and controller. Instead, use their own config files or the
108 # controller_args, engine_args attributes of the launchers to add
106 # controller_args, engine_args attributes of the launchers to add
109 # the work_dir option.
107 # the work_dir option.
110 work_dir = Unicode(u'.')
108 work_dir = Unicode(u'.')
111 loop = Instance('zmq.eventloop.ioloop.IOLoop')
109 loop = Instance('zmq.eventloop.ioloop.IOLoop')
110 log = Instance('logging.Logger', ('root',))
112
111
113 start_data = Any()
112 start_data = Any()
114 stop_data = Any()
113 stop_data = Any()
115
114
116 def _loop_default(self):
115 def _loop_default(self):
117 return ioloop.IOLoop.instance()
116 return ioloop.IOLoop.instance()
118
117
119 def __init__(self, work_dir=u'.', config=None, **kwargs):
118 def __init__(self, work_dir=u'.', config=None, **kwargs):
120 super(BaseLauncher, self).__init__(work_dir=work_dir, config=config, **kwargs)
119 super(BaseLauncher, self).__init__(work_dir=work_dir, config=config, **kwargs)
121 self.state = 'before' # can be before, running, after
120 self.state = 'before' # can be before, running, after
122 self.stop_callbacks = []
121 self.stop_callbacks = []
123 self.start_data = None
122 self.start_data = None
124 self.stop_data = None
123 self.stop_data = None
125
124
126 @property
125 @property
127 def args(self):
126 def args(self):
128 """A list of cmd and args that will be used to start the process.
127 """A list of cmd and args that will be used to start the process.
129
128
130 This is what is passed to :func:`spawnProcess` and the first element
129 This is what is passed to :func:`spawnProcess` and the first element
131 will be the process name.
130 will be the process name.
132 """
131 """
133 return self.find_args()
132 return self.find_args()
134
133
135 def find_args(self):
134 def find_args(self):
136 """The ``.args`` property calls this to find the args list.
135 """The ``.args`` property calls this to find the args list.
137
136
138 Subcommand should implement this to construct the cmd and args.
137 Subcommand should implement this to construct the cmd and args.
139 """
138 """
140 raise NotImplementedError('find_args must be implemented in a subclass')
139 raise NotImplementedError('find_args must be implemented in a subclass')
141
140
142 @property
141 @property
143 def arg_str(self):
142 def arg_str(self):
144 """The string form of the program arguments."""
143 """The string form of the program arguments."""
145 return ' '.join(self.args)
144 return ' '.join(self.args)
146
145
147 @property
146 @property
148 def running(self):
147 def running(self):
149 """Am I running."""
148 """Am I running."""
150 if self.state == 'running':
149 if self.state == 'running':
151 return True
150 return True
152 else:
151 else:
153 return False
152 return False
154
153
155 def start(self):
154 def start(self):
156 """Start the process.
155 """Start the process.
157
156
158 This must return a deferred that fires with information about the
157 This must return a deferred that fires with information about the
159 process starting (like a pid, job id, etc.).
158 process starting (like a pid, job id, etc.).
160 """
159 """
161 raise NotImplementedError('start must be implemented in a subclass')
160 raise NotImplementedError('start must be implemented in a subclass')
162
161
163 def stop(self):
162 def stop(self):
164 """Stop the process and notify observers of stopping.
163 """Stop the process and notify observers of stopping.
165
164
166 This must return a deferred that fires with information about the
165 This must return a deferred that fires with information about the
167 processing stopping, like errors that occur while the process is
166 processing stopping, like errors that occur while the process is
168 attempting to be shut down. This deferred won't fire when the process
167 attempting to be shut down. This deferred won't fire when the process
169 actually stops. To observe the actual process stopping, see
168 actually stops. To observe the actual process stopping, see
170 :func:`observe_stop`.
169 :func:`observe_stop`.
171 """
170 """
172 raise NotImplementedError('stop must be implemented in a subclass')
171 raise NotImplementedError('stop must be implemented in a subclass')
173
172
174 def on_stop(self, f):
173 def on_stop(self, f):
175 """Get a deferred that will fire when the process stops.
174 """Get a deferred that will fire when the process stops.
176
175
177 The deferred will fire with data that contains information about
176 The deferred will fire with data that contains information about
178 the exit status of the process.
177 the exit status of the process.
179 """
178 """
180 if self.state=='after':
179 if self.state=='after':
181 return f(self.stop_data)
180 return f(self.stop_data)
182 else:
181 else:
183 self.stop_callbacks.append(f)
182 self.stop_callbacks.append(f)
184
183
185 def notify_start(self, data):
184 def notify_start(self, data):
186 """Call this to trigger startup actions.
185 """Call this to trigger startup actions.
187
186
188 This logs the process startup and sets the state to 'running'. It is
187 This logs the process startup and sets the state to 'running'. It is
189 a pass-through so it can be used as a callback.
188 a pass-through so it can be used as a callback.
190 """
189 """
191
190
192 self.log.info('Process %r started: %r' % (self.args[0], data))
191 self.log.info('Process %r started: %r' % (self.args[0], data))
193 self.start_data = data
192 self.start_data = data
194 self.state = 'running'
193 self.state = 'running'
195 return data
194 return data
196
195
197 def notify_stop(self, data):
196 def notify_stop(self, data):
198 """Call this to trigger process stop actions.
197 """Call this to trigger process stop actions.
199
198
200 This logs the process stopping and sets the state to 'after'. Call
199 This logs the process stopping and sets the state to 'after'. Call
201 this to trigger all the deferreds from :func:`observe_stop`."""
200 this to trigger all the deferreds from :func:`observe_stop`."""
202
201
203 self.log.info('Process %r stopped: %r' % (self.args[0], data))
202 self.log.info('Process %r stopped: %r' % (self.args[0], data))
204 self.stop_data = data
203 self.stop_data = data
205 self.state = 'after'
204 self.state = 'after'
206 for i in range(len(self.stop_callbacks)):
205 for i in range(len(self.stop_callbacks)):
207 d = self.stop_callbacks.pop()
206 d = self.stop_callbacks.pop()
208 d(data)
207 d(data)
209 return data
208 return data
210
209
211 def signal(self, sig):
210 def signal(self, sig):
212 """Signal the process.
211 """Signal the process.
213
212
214 Return a semi-meaningless deferred after signaling the process.
213 Return a semi-meaningless deferred after signaling the process.
215
214
216 Parameters
215 Parameters
217 ----------
216 ----------
218 sig : str or int
217 sig : str or int
219 'KILL', 'INT', etc., or any signal number
218 'KILL', 'INT', etc., or any signal number
220 """
219 """
221 raise NotImplementedError('signal must be implemented in a subclass')
220 raise NotImplementedError('signal must be implemented in a subclass')
222
221
223
222
224 #-----------------------------------------------------------------------------
223 #-----------------------------------------------------------------------------
225 # Local process launchers
224 # Local process launchers
226 #-----------------------------------------------------------------------------
225 #-----------------------------------------------------------------------------
227
226
228
227
229 class LocalProcessLauncher(BaseLauncher):
228 class LocalProcessLauncher(BaseLauncher):
230 """Start and stop an external process in an asynchronous manner.
229 """Start and stop an external process in an asynchronous manner.
231
230
232 This will launch the external process with a working directory of
231 This will launch the external process with a working directory of
233 ``self.work_dir``.
232 ``self.work_dir``.
234 """
233 """
235
234
236 # This is used to to construct self.args, which is passed to
235 # This is used to to construct self.args, which is passed to
237 # spawnProcess.
236 # spawnProcess.
238 cmd_and_args = List([])
237 cmd_and_args = List([])
239 poll_frequency = Int(100) # in ms
238 poll_frequency = Int(100) # in ms
240
239
241 def __init__(self, work_dir=u'.', config=None, **kwargs):
240 def __init__(self, work_dir=u'.', config=None, **kwargs):
242 super(LocalProcessLauncher, self).__init__(
241 super(LocalProcessLauncher, self).__init__(
243 work_dir=work_dir, config=config, **kwargs
242 work_dir=work_dir, config=config, **kwargs
244 )
243 )
245 self.process = None
244 self.process = None
246 self.start_deferred = None
245 self.start_deferred = None
247 self.poller = None
246 self.poller = None
248
247
249 def find_args(self):
248 def find_args(self):
250 return self.cmd_and_args
249 return self.cmd_and_args
251
250
252 def start(self):
251 def start(self):
253 if self.state == 'before':
252 if self.state == 'before':
254 self.process = Popen(self.args,
253 self.process = Popen(self.args,
255 stdout=PIPE,stderr=PIPE,stdin=PIPE,
254 stdout=PIPE,stderr=PIPE,stdin=PIPE,
256 env=os.environ,
255 env=os.environ,
257 cwd=self.work_dir
256 cwd=self.work_dir
258 )
257 )
259 if WINDOWS:
258 if WINDOWS:
260 self.stdout = forward_read_events(self.process.stdout)
259 self.stdout = forward_read_events(self.process.stdout)
261 self.stderr = forward_read_events(self.process.stderr)
260 self.stderr = forward_read_events(self.process.stderr)
262 else:
261 else:
263 self.stdout = self.process.stdout.fileno()
262 self.stdout = self.process.stdout.fileno()
264 self.stderr = self.process.stderr.fileno()
263 self.stderr = self.process.stderr.fileno()
265 self.loop.add_handler(self.stdout, self.handle_stdout, self.loop.READ)
264 self.loop.add_handler(self.stdout, self.handle_stdout, self.loop.READ)
266 self.loop.add_handler(self.stderr, self.handle_stderr, self.loop.READ)
265 self.loop.add_handler(self.stderr, self.handle_stderr, self.loop.READ)
267 self.poller = ioloop.PeriodicCallback(self.poll, self.poll_frequency, self.loop)
266 self.poller = ioloop.PeriodicCallback(self.poll, self.poll_frequency, self.loop)
268 self.poller.start()
267 self.poller.start()
269 self.notify_start(self.process.pid)
268 self.notify_start(self.process.pid)
270 else:
269 else:
271 s = 'The process was already started and has state: %r' % self.state
270 s = 'The process was already started and has state: %r' % self.state
272 raise ProcessStateError(s)
271 raise ProcessStateError(s)
273
272
274 def stop(self):
273 def stop(self):
275 return self.interrupt_then_kill()
274 return self.interrupt_then_kill()
276
275
277 def signal(self, sig):
276 def signal(self, sig):
278 if self.state == 'running':
277 if self.state == 'running':
279 if WINDOWS and sig != SIGINT:
278 if WINDOWS and sig != SIGINT:
280 # use Windows tree-kill for better child cleanup
279 # use Windows tree-kill for better child cleanup
281 check_output(['taskkill', '-pid', str(self.process.pid), '-t', '-f'])
280 check_output(['taskkill', '-pid', str(self.process.pid), '-t', '-f'])
282 else:
281 else:
283 self.process.send_signal(sig)
282 self.process.send_signal(sig)
284
283
285 def interrupt_then_kill(self, delay=2.0):
284 def interrupt_then_kill(self, delay=2.0):
286 """Send INT, wait a delay and then send KILL."""
285 """Send INT, wait a delay and then send KILL."""
287 try:
286 try:
288 self.signal(SIGINT)
287 self.signal(SIGINT)
289 except Exception:
288 except Exception:
290 self.log.debug("interrupt failed")
289 self.log.debug("interrupt failed")
291 pass
290 pass
292 self.killer = ioloop.DelayedCallback(lambda : self.signal(SIGKILL), delay*1000, self.loop)
291 self.killer = ioloop.DelayedCallback(lambda : self.signal(SIGKILL), delay*1000, self.loop)
293 self.killer.start()
292 self.killer.start()
294
293
295 # callbacks, etc:
294 # callbacks, etc:
296
295
297 def handle_stdout(self, fd, events):
296 def handle_stdout(self, fd, events):
298 if WINDOWS:
297 if WINDOWS:
299 line = self.stdout.recv()
298 line = self.stdout.recv()
300 else:
299 else:
301 line = self.process.stdout.readline()
300 line = self.process.stdout.readline()
302 # a stopped process will be readable but return empty strings
301 # a stopped process will be readable but return empty strings
303 if line:
302 if line:
304 self.log.info(line[:-1])
303 self.log.info(line[:-1])
305 else:
304 else:
306 self.poll()
305 self.poll()
307
306
308 def handle_stderr(self, fd, events):
307 def handle_stderr(self, fd, events):
309 if WINDOWS:
308 if WINDOWS:
310 line = self.stderr.recv()
309 line = self.stderr.recv()
311 else:
310 else:
312 line = self.process.stderr.readline()
311 line = self.process.stderr.readline()
313 # a stopped process will be readable but return empty strings
312 # a stopped process will be readable but return empty strings
314 if line:
313 if line:
315 self.log.error(line[:-1])
314 self.log.error(line[:-1])
316 else:
315 else:
317 self.poll()
316 self.poll()
318
317
319 def poll(self):
318 def poll(self):
320 status = self.process.poll()
319 status = self.process.poll()
321 if status is not None:
320 if status is not None:
322 self.poller.stop()
321 self.poller.stop()
323 self.loop.remove_handler(self.stdout)
322 self.loop.remove_handler(self.stdout)
324 self.loop.remove_handler(self.stderr)
323 self.loop.remove_handler(self.stderr)
325 self.notify_stop(dict(exit_code=status, pid=self.process.pid))
324 self.notify_stop(dict(exit_code=status, pid=self.process.pid))
326 return status
325 return status
327
326
328 class LocalControllerLauncher(LocalProcessLauncher):
327 class LocalControllerLauncher(LocalProcessLauncher):
329 """Launch a controller as a regular external process."""
328 """Launch a controller as a regular external process."""
330
329
331 controller_cmd = List(ipcontroller_cmd_argv, config=True,
330 controller_cmd = List(ipcontroller_cmd_argv, config=True,
332 help="""Popen command to launch ipcontroller.""")
331 help="""Popen command to launch ipcontroller.""")
333 # Command line arguments to ipcontroller.
332 # Command line arguments to ipcontroller.
334 controller_args = List(['--log-to-file','log_level=%i'%logging.INFO], config=True,
333 controller_args = List(['--log-to-file','log_level=%i'%logging.INFO], config=True,
335 help="""command-line args to pass to ipcontroller""")
334 help="""command-line args to pass to ipcontroller""")
336
335
337 def find_args(self):
336 def find_args(self):
338 return self.controller_cmd + self.controller_args
337 return self.controller_cmd + self.controller_args
339
338
340 def start(self, profile_dir):
339 def start(self, profile_dir):
341 """Start the controller by profile_dir."""
340 """Start the controller by profile_dir."""
342 self.controller_args.extend(['profile_dir=%s'%profile_dir])
341 self.controller_args.extend(['profile_dir=%s'%profile_dir])
343 self.profile_dir = unicode(profile_dir)
342 self.profile_dir = unicode(profile_dir)
344 self.log.info("Starting LocalControllerLauncher: %r" % self.args)
343 self.log.info("Starting LocalControllerLauncher: %r" % self.args)
345 return super(LocalControllerLauncher, self).start()
344 return super(LocalControllerLauncher, self).start()
346
345
347
346
348 class LocalEngineLauncher(LocalProcessLauncher):
347 class LocalEngineLauncher(LocalProcessLauncher):
349 """Launch a single engine as a regular externall process."""
348 """Launch a single engine as a regular externall process."""
350
349
351 engine_cmd = List(ipengine_cmd_argv, config=True,
350 engine_cmd = List(ipengine_cmd_argv, config=True,
352 help="""command to launch the Engine.""")
351 help="""command to launch the Engine.""")
353 # Command line arguments for ipengine.
352 # Command line arguments for ipengine.
354 engine_args = List(['--log-to-file','log_level=%i'%logging.INFO], config=True,
353 engine_args = List(['--log-to-file','log_level=%i'%logging.INFO], config=True,
355 help="command-line arguments to pass to ipengine"
354 help="command-line arguments to pass to ipengine"
356 )
355 )
357
356
358 def find_args(self):
357 def find_args(self):
359 return self.engine_cmd + self.engine_args
358 return self.engine_cmd + self.engine_args
360
359
361 def start(self, profile_dir):
360 def start(self, profile_dir):
362 """Start the engine by profile_dir."""
361 """Start the engine by profile_dir."""
363 self.engine_args.extend(['profile_dir=%s'%profile_dir])
362 self.engine_args.extend(['profile_dir=%s'%profile_dir])
364 self.profile_dir = unicode(profile_dir)
363 self.profile_dir = unicode(profile_dir)
365 return super(LocalEngineLauncher, self).start()
364 return super(LocalEngineLauncher, self).start()
366
365
367
366
368 class LocalEngineSetLauncher(BaseLauncher):
367 class LocalEngineSetLauncher(BaseLauncher):
369 """Launch a set of engines as regular external processes."""
368 """Launch a set of engines as regular external processes."""
370
369
371 # Command line arguments for ipengine.
370 # Command line arguments for ipengine.
372 engine_args = List(
371 engine_args = List(
373 ['--log-to-file','log_level=%i'%logging.INFO], config=True,
372 ['--log-to-file','log_level=%i'%logging.INFO], config=True,
374 help="command-line arguments to pass to ipengine"
373 help="command-line arguments to pass to ipengine"
375 )
374 )
376 # launcher class
375 # launcher class
377 launcher_class = LocalEngineLauncher
376 launcher_class = LocalEngineLauncher
378
377
379 launchers = Dict()
378 launchers = Dict()
380 stop_data = Dict()
379 stop_data = Dict()
381
380
382 def __init__(self, work_dir=u'.', config=None, **kwargs):
381 def __init__(self, work_dir=u'.', config=None, **kwargs):
383 super(LocalEngineSetLauncher, self).__init__(
382 super(LocalEngineSetLauncher, self).__init__(
384 work_dir=work_dir, config=config, **kwargs
383 work_dir=work_dir, config=config, **kwargs
385 )
384 )
386 self.stop_data = {}
385 self.stop_data = {}
387
386
388 def start(self, n, profile_dir):
387 def start(self, n, profile_dir):
389 """Start n engines by profile or profile_dir."""
388 """Start n engines by profile or profile_dir."""
390 self.profile_dir = unicode(profile_dir)
389 self.profile_dir = unicode(profile_dir)
391 dlist = []
390 dlist = []
392 for i in range(n):
391 for i in range(n):
393 el = self.launcher_class(work_dir=self.work_dir, config=self.config, logname=self.log.name)
392 el = self.launcher_class(work_dir=self.work_dir, config=self.config, log=self.log)
394 # Copy the engine args over to each engine launcher.
393 # Copy the engine args over to each engine launcher.
395 el.engine_args = copy.deepcopy(self.engine_args)
394 el.engine_args = copy.deepcopy(self.engine_args)
396 el.on_stop(self._notice_engine_stopped)
395 el.on_stop(self._notice_engine_stopped)
397 d = el.start(profile_dir)
396 d = el.start(profile_dir)
398 if i==0:
397 if i==0:
399 self.log.info("Starting LocalEngineSetLauncher: %r" % el.args)
398 self.log.info("Starting LocalEngineSetLauncher: %r" % el.args)
400 self.launchers[i] = el
399 self.launchers[i] = el
401 dlist.append(d)
400 dlist.append(d)
402 self.notify_start(dlist)
401 self.notify_start(dlist)
403 # The consumeErrors here could be dangerous
402 # The consumeErrors here could be dangerous
404 # dfinal = gatherBoth(dlist, consumeErrors=True)
403 # dfinal = gatherBoth(dlist, consumeErrors=True)
405 # dfinal.addCallback(self.notify_start)
404 # dfinal.addCallback(self.notify_start)
406 return dlist
405 return dlist
407
406
408 def find_args(self):
407 def find_args(self):
409 return ['engine set']
408 return ['engine set']
410
409
411 def signal(self, sig):
410 def signal(self, sig):
412 dlist = []
411 dlist = []
413 for el in self.launchers.itervalues():
412 for el in self.launchers.itervalues():
414 d = el.signal(sig)
413 d = el.signal(sig)
415 dlist.append(d)
414 dlist.append(d)
416 # dfinal = gatherBoth(dlist, consumeErrors=True)
415 # dfinal = gatherBoth(dlist, consumeErrors=True)
417 return dlist
416 return dlist
418
417
419 def interrupt_then_kill(self, delay=1.0):
418 def interrupt_then_kill(self, delay=1.0):
420 dlist = []
419 dlist = []
421 for el in self.launchers.itervalues():
420 for el in self.launchers.itervalues():
422 d = el.interrupt_then_kill(delay)
421 d = el.interrupt_then_kill(delay)
423 dlist.append(d)
422 dlist.append(d)
424 # dfinal = gatherBoth(dlist, consumeErrors=True)
423 # dfinal = gatherBoth(dlist, consumeErrors=True)
425 return dlist
424 return dlist
426
425
427 def stop(self):
426 def stop(self):
428 return self.interrupt_then_kill()
427 return self.interrupt_then_kill()
429
428
430 def _notice_engine_stopped(self, data):
429 def _notice_engine_stopped(self, data):
431 pid = data['pid']
430 pid = data['pid']
432 for idx,el in self.launchers.iteritems():
431 for idx,el in self.launchers.iteritems():
433 if el.process.pid == pid:
432 if el.process.pid == pid:
434 break
433 break
435 self.launchers.pop(idx)
434 self.launchers.pop(idx)
436 self.stop_data[idx] = data
435 self.stop_data[idx] = data
437 if not self.launchers:
436 if not self.launchers:
438 self.notify_stop(self.stop_data)
437 self.notify_stop(self.stop_data)
439
438
440
439
441 #-----------------------------------------------------------------------------
440 #-----------------------------------------------------------------------------
442 # MPIExec launchers
441 # MPIExec launchers
443 #-----------------------------------------------------------------------------
442 #-----------------------------------------------------------------------------
444
443
445
444
446 class MPIExecLauncher(LocalProcessLauncher):
445 class MPIExecLauncher(LocalProcessLauncher):
447 """Launch an external process using mpiexec."""
446 """Launch an external process using mpiexec."""
448
447
449 mpi_cmd = List(['mpiexec'], config=True,
448 mpi_cmd = List(['mpiexec'], config=True,
450 help="The mpiexec command to use in starting the process."
449 help="The mpiexec command to use in starting the process."
451 )
450 )
452 mpi_args = List([], config=True,
451 mpi_args = List([], config=True,
453 help="The command line arguments to pass to mpiexec."
452 help="The command line arguments to pass to mpiexec."
454 )
453 )
455 program = List(['date'], config=True,
454 program = List(['date'], config=True,
456 help="The program to start via mpiexec.")
455 help="The program to start via mpiexec.")
457 program_args = List([], config=True,
456 program_args = List([], config=True,
458 help="The command line argument to the program."
457 help="The command line argument to the program."
459 )
458 )
460 n = Int(1)
459 n = Int(1)
461
460
462 def find_args(self):
461 def find_args(self):
463 """Build self.args using all the fields."""
462 """Build self.args using all the fields."""
464 return self.mpi_cmd + ['-n', str(self.n)] + self.mpi_args + \
463 return self.mpi_cmd + ['-n', str(self.n)] + self.mpi_args + \
465 self.program + self.program_args
464 self.program + self.program_args
466
465
467 def start(self, n):
466 def start(self, n):
468 """Start n instances of the program using mpiexec."""
467 """Start n instances of the program using mpiexec."""
469 self.n = n
468 self.n = n
470 return super(MPIExecLauncher, self).start()
469 return super(MPIExecLauncher, self).start()
471
470
472
471
473 class MPIExecControllerLauncher(MPIExecLauncher):
472 class MPIExecControllerLauncher(MPIExecLauncher):
474 """Launch a controller using mpiexec."""
473 """Launch a controller using mpiexec."""
475
474
476 controller_cmd = List(ipcontroller_cmd_argv, config=True,
475 controller_cmd = List(ipcontroller_cmd_argv, config=True,
477 help="Popen command to launch the Contropper"
476 help="Popen command to launch the Contropper"
478 )
477 )
479 controller_args = List(['--log-to-file','log_level=%i'%logging.INFO], config=True,
478 controller_args = List(['--log-to-file','log_level=%i'%logging.INFO], config=True,
480 help="Command line arguments to pass to ipcontroller."
479 help="Command line arguments to pass to ipcontroller."
481 )
480 )
482 n = Int(1)
481 n = Int(1)
483
482
484 def start(self, profile_dir):
483 def start(self, profile_dir):
485 """Start the controller by profile_dir."""
484 """Start the controller by profile_dir."""
486 self.controller_args.extend(['profile_dir=%s'%profile_dir])
485 self.controller_args.extend(['profile_dir=%s'%profile_dir])
487 self.profile_dir = unicode(profile_dir)
486 self.profile_dir = unicode(profile_dir)
488 self.log.info("Starting MPIExecControllerLauncher: %r" % self.args)
487 self.log.info("Starting MPIExecControllerLauncher: %r" % self.args)
489 return super(MPIExecControllerLauncher, self).start(1)
488 return super(MPIExecControllerLauncher, self).start(1)
490
489
491 def find_args(self):
490 def find_args(self):
492 return self.mpi_cmd + ['-n', self.n] + self.mpi_args + \
491 return self.mpi_cmd + ['-n', self.n] + self.mpi_args + \
493 self.controller_cmd + self.controller_args
492 self.controller_cmd + self.controller_args
494
493
495
494
496 class MPIExecEngineSetLauncher(MPIExecLauncher):
495 class MPIExecEngineSetLauncher(MPIExecLauncher):
497
496
498 program = List(ipengine_cmd_argv, config=True,
497 program = List(ipengine_cmd_argv, config=True,
499 help="Popen command for ipengine"
498 help="Popen command for ipengine"
500 )
499 )
501 program_args = List(
500 program_args = List(
502 ['--log-to-file','log_level=%i'%logging.INFO], config=True,
501 ['--log-to-file','log_level=%i'%logging.INFO], config=True,
503 help="Command line arguments for ipengine."
502 help="Command line arguments for ipengine."
504 )
503 )
505 n = Int(1)
504 n = Int(1)
506
505
507 def start(self, n, profile_dir):
506 def start(self, n, profile_dir):
508 """Start n engines by profile or profile_dir."""
507 """Start n engines by profile or profile_dir."""
509 self.program_args.extend(['profile_dir=%s'%profile_dir])
508 self.program_args.extend(['profile_dir=%s'%profile_dir])
510 self.profile_dir = unicode(profile_dir)
509 self.profile_dir = unicode(profile_dir)
511 self.n = n
510 self.n = n
512 self.log.info('Starting MPIExecEngineSetLauncher: %r' % self.args)
511 self.log.info('Starting MPIExecEngineSetLauncher: %r' % self.args)
513 return super(MPIExecEngineSetLauncher, self).start(n)
512 return super(MPIExecEngineSetLauncher, self).start(n)
514
513
515 #-----------------------------------------------------------------------------
514 #-----------------------------------------------------------------------------
516 # SSH launchers
515 # SSH launchers
517 #-----------------------------------------------------------------------------
516 #-----------------------------------------------------------------------------
518
517
519 # TODO: Get SSH Launcher working again.
518 # TODO: Get SSH Launcher working again.
520
519
521 class SSHLauncher(LocalProcessLauncher):
520 class SSHLauncher(LocalProcessLauncher):
522 """A minimal launcher for ssh.
521 """A minimal launcher for ssh.
523
522
524 To be useful this will probably have to be extended to use the ``sshx``
523 To be useful this will probably have to be extended to use the ``sshx``
525 idea for environment variables. There could be other things this needs
524 idea for environment variables. There could be other things this needs
526 as well.
525 as well.
527 """
526 """
528
527
529 ssh_cmd = List(['ssh'], config=True,
528 ssh_cmd = List(['ssh'], config=True,
530 help="command for starting ssh")
529 help="command for starting ssh")
531 ssh_args = List(['-tt'], config=True,
530 ssh_args = List(['-tt'], config=True,
532 help="args to pass to ssh")
531 help="args to pass to ssh")
533 program = List(['date'], config=True,
532 program = List(['date'], config=True,
534 help="Program to launch via ssh")
533 help="Program to launch via ssh")
535 program_args = List([], config=True,
534 program_args = List([], config=True,
536 help="args to pass to remote program")
535 help="args to pass to remote program")
537 hostname = Unicode('', config=True,
536 hostname = Unicode('', config=True,
538 help="hostname on which to launch the program")
537 help="hostname on which to launch the program")
539 user = Unicode('', config=True,
538 user = Unicode('', config=True,
540 help="username for ssh")
539 help="username for ssh")
541 location = Unicode('', config=True,
540 location = Unicode('', config=True,
542 help="user@hostname location for ssh in one setting")
541 help="user@hostname location for ssh in one setting")
543
542
544 def _hostname_changed(self, name, old, new):
543 def _hostname_changed(self, name, old, new):
545 if self.user:
544 if self.user:
546 self.location = u'%s@%s' % (self.user, new)
545 self.location = u'%s@%s' % (self.user, new)
547 else:
546 else:
548 self.location = new
547 self.location = new
549
548
550 def _user_changed(self, name, old, new):
549 def _user_changed(self, name, old, new):
551 self.location = u'%s@%s' % (new, self.hostname)
550 self.location = u'%s@%s' % (new, self.hostname)
552
551
553 def find_args(self):
552 def find_args(self):
554 return self.ssh_cmd + self.ssh_args + [self.location] + \
553 return self.ssh_cmd + self.ssh_args + [self.location] + \
555 self.program + self.program_args
554 self.program + self.program_args
556
555
557 def start(self, profile_dir, hostname=None, user=None):
556 def start(self, profile_dir, hostname=None, user=None):
558 self.profile_dir = unicode(profile_dir)
557 self.profile_dir = unicode(profile_dir)
559 if hostname is not None:
558 if hostname is not None:
560 self.hostname = hostname
559 self.hostname = hostname
561 if user is not None:
560 if user is not None:
562 self.user = user
561 self.user = user
563
562
564 return super(SSHLauncher, self).start()
563 return super(SSHLauncher, self).start()
565
564
566 def signal(self, sig):
565 def signal(self, sig):
567 if self.state == 'running':
566 if self.state == 'running':
568 # send escaped ssh connection-closer
567 # send escaped ssh connection-closer
569 self.process.stdin.write('~.')
568 self.process.stdin.write('~.')
570 self.process.stdin.flush()
569 self.process.stdin.flush()
571
570
572
571
573
572
574 class SSHControllerLauncher(SSHLauncher):
573 class SSHControllerLauncher(SSHLauncher):
575
574
576 program = List(ipcontroller_cmd_argv, config=True,
575 program = List(ipcontroller_cmd_argv, config=True,
577 help="remote ipcontroller command.")
576 help="remote ipcontroller command.")
578 program_args = List(['--reuse-files', '--log-to-file','log_level=%i'%logging.INFO], config=True,
577 program_args = List(['--reuse-files', '--log-to-file','log_level=%i'%logging.INFO], config=True,
579 help="Command line arguments to ipcontroller.")
578 help="Command line arguments to ipcontroller.")
580
579
581
580
582 class SSHEngineLauncher(SSHLauncher):
581 class SSHEngineLauncher(SSHLauncher):
583 program = List(ipengine_cmd_argv, config=True,
582 program = List(ipengine_cmd_argv, config=True,
584 help="remote ipengine command.")
583 help="remote ipengine command.")
585 # Command line arguments for ipengine.
584 # Command line arguments for ipengine.
586 program_args = List(
585 program_args = List(
587 ['--log-to-file','log_level=%i'%logging.INFO], config=True,
586 ['--log-to-file','log_level=%i'%logging.INFO], config=True,
588 help="Command line arguments to ipengine."
587 help="Command line arguments to ipengine."
589 )
588 )
590
589
591 class SSHEngineSetLauncher(LocalEngineSetLauncher):
590 class SSHEngineSetLauncher(LocalEngineSetLauncher):
592 launcher_class = SSHEngineLauncher
591 launcher_class = SSHEngineLauncher
593 engines = Dict(config=True,
592 engines = Dict(config=True,
594 help="""dict of engines to launch. This is a dict by hostname of ints,
593 help="""dict of engines to launch. This is a dict by hostname of ints,
595 corresponding to the number of engines to start on that host.""")
594 corresponding to the number of engines to start on that host.""")
596
595
597 def start(self, n, profile_dir):
596 def start(self, n, profile_dir):
598 """Start engines by profile or profile_dir.
597 """Start engines by profile or profile_dir.
599 `n` is ignored, and the `engines` config property is used instead.
598 `n` is ignored, and the `engines` config property is used instead.
600 """
599 """
601
600
602 self.profile_dir = unicode(profile_dir)
601 self.profile_dir = unicode(profile_dir)
603 dlist = []
602 dlist = []
604 for host, n in self.engines.iteritems():
603 for host, n in self.engines.iteritems():
605 if isinstance(n, (tuple, list)):
604 if isinstance(n, (tuple, list)):
606 n, args = n
605 n, args = n
607 else:
606 else:
608 args = copy.deepcopy(self.engine_args)
607 args = copy.deepcopy(self.engine_args)
609
608
610 if '@' in host:
609 if '@' in host:
611 user,host = host.split('@',1)
610 user,host = host.split('@',1)
612 else:
611 else:
613 user=None
612 user=None
614 for i in range(n):
613 for i in range(n):
615 el = self.launcher_class(work_dir=self.work_dir, config=self.config, logname=self.log.name)
614 el = self.launcher_class(work_dir=self.work_dir, config=self.config, log=self.log)
616
615
617 # Copy the engine args over to each engine launcher.
616 # Copy the engine args over to each engine launcher.
618 i
617 i
619 el.program_args = args
618 el.program_args = args
620 el.on_stop(self._notice_engine_stopped)
619 el.on_stop(self._notice_engine_stopped)
621 d = el.start(profile_dir, user=user, hostname=host)
620 d = el.start(profile_dir, user=user, hostname=host)
622 if i==0:
621 if i==0:
623 self.log.info("Starting SSHEngineSetLauncher: %r" % el.args)
622 self.log.info("Starting SSHEngineSetLauncher: %r" % el.args)
624 self.launchers[host+str(i)] = el
623 self.launchers[host+str(i)] = el
625 dlist.append(d)
624 dlist.append(d)
626 self.notify_start(dlist)
625 self.notify_start(dlist)
627 return dlist
626 return dlist
628
627
629
628
630
629
631 #-----------------------------------------------------------------------------
630 #-----------------------------------------------------------------------------
632 # Windows HPC Server 2008 scheduler launchers
631 # Windows HPC Server 2008 scheduler launchers
633 #-----------------------------------------------------------------------------
632 #-----------------------------------------------------------------------------
634
633
635
634
636 # This is only used on Windows.
635 # This is only used on Windows.
637 def find_job_cmd():
636 def find_job_cmd():
638 if WINDOWS:
637 if WINDOWS:
639 try:
638 try:
640 return find_cmd('job')
639 return find_cmd('job')
641 except (FindCmdError, ImportError):
640 except (FindCmdError, ImportError):
642 # ImportError will be raised if win32api is not installed
641 # ImportError will be raised if win32api is not installed
643 return 'job'
642 return 'job'
644 else:
643 else:
645 return 'job'
644 return 'job'
646
645
647
646
648 class WindowsHPCLauncher(BaseLauncher):
647 class WindowsHPCLauncher(BaseLauncher):
649
648
650 job_id_regexp = Unicode(r'\d+', config=True,
649 job_id_regexp = Unicode(r'\d+', config=True,
651 help="""A regular expression used to get the job id from the output of the
650 help="""A regular expression used to get the job id from the output of the
652 submit_command. """
651 submit_command. """
653 )
652 )
654 job_file_name = Unicode(u'ipython_job.xml', config=True,
653 job_file_name = Unicode(u'ipython_job.xml', config=True,
655 help="The filename of the instantiated job script.")
654 help="The filename of the instantiated job script.")
656 # The full path to the instantiated job script. This gets made dynamically
655 # The full path to the instantiated job script. This gets made dynamically
657 # by combining the work_dir with the job_file_name.
656 # by combining the work_dir with the job_file_name.
658 job_file = Unicode(u'')
657 job_file = Unicode(u'')
659 scheduler = Unicode('', config=True,
658 scheduler = Unicode('', config=True,
660 help="The hostname of the scheduler to submit the job to.")
659 help="The hostname of the scheduler to submit the job to.")
661 job_cmd = Unicode(find_job_cmd(), config=True,
660 job_cmd = Unicode(find_job_cmd(), config=True,
662 help="The command for submitting jobs.")
661 help="The command for submitting jobs.")
663
662
664 def __init__(self, work_dir=u'.', config=None, **kwargs):
663 def __init__(self, work_dir=u'.', config=None, **kwargs):
665 super(WindowsHPCLauncher, self).__init__(
664 super(WindowsHPCLauncher, self).__init__(
666 work_dir=work_dir, config=config, **kwargs
665 work_dir=work_dir, config=config, **kwargs
667 )
666 )
668
667
669 @property
668 @property
670 def job_file(self):
669 def job_file(self):
671 return os.path.join(self.work_dir, self.job_file_name)
670 return os.path.join(self.work_dir, self.job_file_name)
672
671
673 def write_job_file(self, n):
672 def write_job_file(self, n):
674 raise NotImplementedError("Implement write_job_file in a subclass.")
673 raise NotImplementedError("Implement write_job_file in a subclass.")
675
674
676 def find_args(self):
675 def find_args(self):
677 return [u'job.exe']
676 return [u'job.exe']
678
677
679 def parse_job_id(self, output):
678 def parse_job_id(self, output):
680 """Take the output of the submit command and return the job id."""
679 """Take the output of the submit command and return the job id."""
681 m = re.search(self.job_id_regexp, output)
680 m = re.search(self.job_id_regexp, output)
682 if m is not None:
681 if m is not None:
683 job_id = m.group()
682 job_id = m.group()
684 else:
683 else:
685 raise LauncherError("Job id couldn't be determined: %s" % output)
684 raise LauncherError("Job id couldn't be determined: %s" % output)
686 self.job_id = job_id
685 self.job_id = job_id
687 self.log.info('Job started with job id: %r' % job_id)
686 self.log.info('Job started with job id: %r' % job_id)
688 return job_id
687 return job_id
689
688
690 def start(self, n):
689 def start(self, n):
691 """Start n copies of the process using the Win HPC job scheduler."""
690 """Start n copies of the process using the Win HPC job scheduler."""
692 self.write_job_file(n)
691 self.write_job_file(n)
693 args = [
692 args = [
694 'submit',
693 'submit',
695 '/jobfile:%s' % self.job_file,
694 '/jobfile:%s' % self.job_file,
696 '/scheduler:%s' % self.scheduler
695 '/scheduler:%s' % self.scheduler
697 ]
696 ]
698 self.log.info("Starting Win HPC Job: %s" % (self.job_cmd + ' ' + ' '.join(args),))
697 self.log.info("Starting Win HPC Job: %s" % (self.job_cmd + ' ' + ' '.join(args),))
699 # Twisted will raise DeprecationWarnings if we try to pass unicode to this
698 # Twisted will raise DeprecationWarnings if we try to pass unicode to this
700 output = check_output([self.job_cmd]+args,
699 output = check_output([self.job_cmd]+args,
701 env=os.environ,
700 env=os.environ,
702 cwd=self.work_dir,
701 cwd=self.work_dir,
703 stderr=STDOUT
702 stderr=STDOUT
704 )
703 )
705 job_id = self.parse_job_id(output)
704 job_id = self.parse_job_id(output)
706 self.notify_start(job_id)
705 self.notify_start(job_id)
707 return job_id
706 return job_id
708
707
709 def stop(self):
708 def stop(self):
710 args = [
709 args = [
711 'cancel',
710 'cancel',
712 self.job_id,
711 self.job_id,
713 '/scheduler:%s' % self.scheduler
712 '/scheduler:%s' % self.scheduler
714 ]
713 ]
715 self.log.info("Stopping Win HPC Job: %s" % (self.job_cmd + ' ' + ' '.join(args),))
714 self.log.info("Stopping Win HPC Job: %s" % (self.job_cmd + ' ' + ' '.join(args),))
716 try:
715 try:
717 output = check_output([self.job_cmd]+args,
716 output = check_output([self.job_cmd]+args,
718 env=os.environ,
717 env=os.environ,
719 cwd=self.work_dir,
718 cwd=self.work_dir,
720 stderr=STDOUT
719 stderr=STDOUT
721 )
720 )
722 except:
721 except:
723 output = 'The job already appears to be stoppped: %r' % self.job_id
722 output = 'The job already appears to be stoppped: %r' % self.job_id
724 self.notify_stop(dict(job_id=self.job_id, output=output)) # Pass the output of the kill cmd
723 self.notify_stop(dict(job_id=self.job_id, output=output)) # Pass the output of the kill cmd
725 return output
724 return output
726
725
727
726
728 class WindowsHPCControllerLauncher(WindowsHPCLauncher):
727 class WindowsHPCControllerLauncher(WindowsHPCLauncher):
729
728
730 job_file_name = Unicode(u'ipcontroller_job.xml', config=True,
729 job_file_name = Unicode(u'ipcontroller_job.xml', config=True,
731 help="WinHPC xml job file.")
730 help="WinHPC xml job file.")
732 extra_args = List([], config=False,
731 extra_args = List([], config=False,
733 help="extra args to pass to ipcontroller")
732 help="extra args to pass to ipcontroller")
734
733
735 def write_job_file(self, n):
734 def write_job_file(self, n):
736 job = IPControllerJob(config=self.config)
735 job = IPControllerJob(config=self.config)
737
736
738 t = IPControllerTask(config=self.config)
737 t = IPControllerTask(config=self.config)
739 # The tasks work directory is *not* the actual work directory of
738 # The tasks work directory is *not* the actual work directory of
740 # the controller. It is used as the base path for the stdout/stderr
739 # the controller. It is used as the base path for the stdout/stderr
741 # files that the scheduler redirects to.
740 # files that the scheduler redirects to.
742 t.work_directory = self.profile_dir
741 t.work_directory = self.profile_dir
743 # Add the profile_dir and from self.start().
742 # Add the profile_dir and from self.start().
744 t.controller_args.extend(self.extra_args)
743 t.controller_args.extend(self.extra_args)
745 job.add_task(t)
744 job.add_task(t)
746
745
747 self.log.info("Writing job description file: %s" % self.job_file)
746 self.log.info("Writing job description file: %s" % self.job_file)
748 job.write(self.job_file)
747 job.write(self.job_file)
749
748
750 @property
749 @property
751 def job_file(self):
750 def job_file(self):
752 return os.path.join(self.profile_dir, self.job_file_name)
751 return os.path.join(self.profile_dir, self.job_file_name)
753
752
754 def start(self, profile_dir):
753 def start(self, profile_dir):
755 """Start the controller by profile_dir."""
754 """Start the controller by profile_dir."""
756 self.extra_args = ['profile_dir=%s'%profile_dir]
755 self.extra_args = ['profile_dir=%s'%profile_dir]
757 self.profile_dir = unicode(profile_dir)
756 self.profile_dir = unicode(profile_dir)
758 return super(WindowsHPCControllerLauncher, self).start(1)
757 return super(WindowsHPCControllerLauncher, self).start(1)
759
758
760
759
761 class WindowsHPCEngineSetLauncher(WindowsHPCLauncher):
760 class WindowsHPCEngineSetLauncher(WindowsHPCLauncher):
762
761
763 job_file_name = Unicode(u'ipengineset_job.xml', config=True,
762 job_file_name = Unicode(u'ipengineset_job.xml', config=True,
764 help="jobfile for ipengines job")
763 help="jobfile for ipengines job")
765 extra_args = List([], config=False,
764 extra_args = List([], config=False,
766 help="extra args to pas to ipengine")
765 help="extra args to pas to ipengine")
767
766
768 def write_job_file(self, n):
767 def write_job_file(self, n):
769 job = IPEngineSetJob(config=self.config)
768 job = IPEngineSetJob(config=self.config)
770
769
771 for i in range(n):
770 for i in range(n):
772 t = IPEngineTask(config=self.config)
771 t = IPEngineTask(config=self.config)
773 # The tasks work directory is *not* the actual work directory of
772 # The tasks work directory is *not* the actual work directory of
774 # the engine. It is used as the base path for the stdout/stderr
773 # the engine. It is used as the base path for the stdout/stderr
775 # files that the scheduler redirects to.
774 # files that the scheduler redirects to.
776 t.work_directory = self.profile_dir
775 t.work_directory = self.profile_dir
777 # Add the profile_dir and from self.start().
776 # Add the profile_dir and from self.start().
778 t.engine_args.extend(self.extra_args)
777 t.engine_args.extend(self.extra_args)
779 job.add_task(t)
778 job.add_task(t)
780
779
781 self.log.info("Writing job description file: %s" % self.job_file)
780 self.log.info("Writing job description file: %s" % self.job_file)
782 job.write(self.job_file)
781 job.write(self.job_file)
783
782
784 @property
783 @property
785 def job_file(self):
784 def job_file(self):
786 return os.path.join(self.profile_dir, self.job_file_name)
785 return os.path.join(self.profile_dir, self.job_file_name)
787
786
788 def start(self, n, profile_dir):
787 def start(self, n, profile_dir):
789 """Start the controller by profile_dir."""
788 """Start the controller by profile_dir."""
790 self.extra_args = ['profile_dir=%s'%profile_dir]
789 self.extra_args = ['profile_dir=%s'%profile_dir]
791 self.profile_dir = unicode(profile_dir)
790 self.profile_dir = unicode(profile_dir)
792 return super(WindowsHPCEngineSetLauncher, self).start(n)
791 return super(WindowsHPCEngineSetLauncher, self).start(n)
793
792
794
793
795 #-----------------------------------------------------------------------------
794 #-----------------------------------------------------------------------------
796 # Batch (PBS) system launchers
795 # Batch (PBS) system launchers
797 #-----------------------------------------------------------------------------
796 #-----------------------------------------------------------------------------
798
797
799 class BatchSystemLauncher(BaseLauncher):
798 class BatchSystemLauncher(BaseLauncher):
800 """Launch an external process using a batch system.
799 """Launch an external process using a batch system.
801
800
802 This class is designed to work with UNIX batch systems like PBS, LSF,
801 This class is designed to work with UNIX batch systems like PBS, LSF,
803 GridEngine, etc. The overall model is that there are different commands
802 GridEngine, etc. The overall model is that there are different commands
804 like qsub, qdel, etc. that handle the starting and stopping of the process.
803 like qsub, qdel, etc. that handle the starting and stopping of the process.
805
804
806 This class also has the notion of a batch script. The ``batch_template``
805 This class also has the notion of a batch script. The ``batch_template``
807 attribute can be set to a string that is a template for the batch script.
806 attribute can be set to a string that is a template for the batch script.
808 This template is instantiated using string formatting. Thus the template can
807 This template is instantiated using string formatting. Thus the template can
809 use {n} fot the number of instances. Subclasses can add additional variables
808 use {n} fot the number of instances. Subclasses can add additional variables
810 to the template dict.
809 to the template dict.
811 """
810 """
812
811
813 # Subclasses must fill these in. See PBSEngineSet
812 # Subclasses must fill these in. See PBSEngineSet
814 submit_command = List([''], config=True,
813 submit_command = List([''], config=True,
815 help="The name of the command line program used to submit jobs.")
814 help="The name of the command line program used to submit jobs.")
816 delete_command = List([''], config=True,
815 delete_command = List([''], config=True,
817 help="The name of the command line program used to delete jobs.")
816 help="The name of the command line program used to delete jobs.")
818 job_id_regexp = Unicode('', config=True,
817 job_id_regexp = Unicode('', config=True,
819 help="""A regular expression used to get the job id from the output of the
818 help="""A regular expression used to get the job id from the output of the
820 submit_command.""")
819 submit_command.""")
821 batch_template = Unicode('', config=True,
820 batch_template = Unicode('', config=True,
822 help="The string that is the batch script template itself.")
821 help="The string that is the batch script template itself.")
823 batch_template_file = Unicode(u'', config=True,
822 batch_template_file = Unicode(u'', config=True,
824 help="The file that contains the batch template.")
823 help="The file that contains the batch template.")
825 batch_file_name = Unicode(u'batch_script', config=True,
824 batch_file_name = Unicode(u'batch_script', config=True,
826 help="The filename of the instantiated batch script.")
825 help="The filename of the instantiated batch script.")
827 queue = Unicode(u'', config=True,
826 queue = Unicode(u'', config=True,
828 help="The PBS Queue.")
827 help="The PBS Queue.")
829
828
830 # not configurable, override in subclasses
829 # not configurable, override in subclasses
831 # PBS Job Array regex
830 # PBS Job Array regex
832 job_array_regexp = Unicode('')
831 job_array_regexp = Unicode('')
833 job_array_template = Unicode('')
832 job_array_template = Unicode('')
834 # PBS Queue regex
833 # PBS Queue regex
835 queue_regexp = Unicode('')
834 queue_regexp = Unicode('')
836 queue_template = Unicode('')
835 queue_template = Unicode('')
837 # The default batch template, override in subclasses
836 # The default batch template, override in subclasses
838 default_template = Unicode('')
837 default_template = Unicode('')
839 # The full path to the instantiated batch script.
838 # The full path to the instantiated batch script.
840 batch_file = Unicode(u'')
839 batch_file = Unicode(u'')
841 # the format dict used with batch_template:
840 # the format dict used with batch_template:
842 context = Dict()
841 context = Dict()
843 # the Formatter instance for rendering the templates:
842 # the Formatter instance for rendering the templates:
844 formatter = Instance(EvalFormatter, (), {})
843 formatter = Instance(EvalFormatter, (), {})
845
844
846
845
847 def find_args(self):
846 def find_args(self):
848 return self.submit_command + [self.batch_file]
847 return self.submit_command + [self.batch_file]
849
848
850 def __init__(self, work_dir=u'.', config=None, **kwargs):
849 def __init__(self, work_dir=u'.', config=None, **kwargs):
851 super(BatchSystemLauncher, self).__init__(
850 super(BatchSystemLauncher, self).__init__(
852 work_dir=work_dir, config=config, **kwargs
851 work_dir=work_dir, config=config, **kwargs
853 )
852 )
854 self.batch_file = os.path.join(self.work_dir, self.batch_file_name)
853 self.batch_file = os.path.join(self.work_dir, self.batch_file_name)
855
854
856 def parse_job_id(self, output):
855 def parse_job_id(self, output):
857 """Take the output of the submit command and return the job id."""
856 """Take the output of the submit command and return the job id."""
858 m = re.search(self.job_id_regexp, output)
857 m = re.search(self.job_id_regexp, output)
859 if m is not None:
858 if m is not None:
860 job_id = m.group()
859 job_id = m.group()
861 else:
860 else:
862 raise LauncherError("Job id couldn't be determined: %s" % output)
861 raise LauncherError("Job id couldn't be determined: %s" % output)
863 self.job_id = job_id
862 self.job_id = job_id
864 self.log.info('Job submitted with job id: %r' % job_id)
863 self.log.info('Job submitted with job id: %r' % job_id)
865 return job_id
864 return job_id
866
865
867 def write_batch_script(self, n):
866 def write_batch_script(self, n):
868 """Instantiate and write the batch script to the work_dir."""
867 """Instantiate and write the batch script to the work_dir."""
869 self.context['n'] = n
868 self.context['n'] = n
870 self.context['queue'] = self.queue
869 self.context['queue'] = self.queue
871 # first priority is batch_template if set
870 # first priority is batch_template if set
872 if self.batch_template_file and not self.batch_template:
871 if self.batch_template_file and not self.batch_template:
873 # second priority is batch_template_file
872 # second priority is batch_template_file
874 with open(self.batch_template_file) as f:
873 with open(self.batch_template_file) as f:
875 self.batch_template = f.read()
874 self.batch_template = f.read()
876 if not self.batch_template:
875 if not self.batch_template:
877 # third (last) priority is default_template
876 # third (last) priority is default_template
878 self.batch_template = self.default_template
877 self.batch_template = self.default_template
879
878
880 regex = re.compile(self.job_array_regexp)
879 regex = re.compile(self.job_array_regexp)
881 # print regex.search(self.batch_template)
880 # print regex.search(self.batch_template)
882 if not regex.search(self.batch_template):
881 if not regex.search(self.batch_template):
883 self.log.info("adding job array settings to batch script")
882 self.log.info("adding job array settings to batch script")
884 firstline, rest = self.batch_template.split('\n',1)
883 firstline, rest = self.batch_template.split('\n',1)
885 self.batch_template = u'\n'.join([firstline, self.job_array_template, rest])
884 self.batch_template = u'\n'.join([firstline, self.job_array_template, rest])
886
885
887 regex = re.compile(self.queue_regexp)
886 regex = re.compile(self.queue_regexp)
888 # print regex.search(self.batch_template)
887 # print regex.search(self.batch_template)
889 if self.queue and not regex.search(self.batch_template):
888 if self.queue and not regex.search(self.batch_template):
890 self.log.info("adding PBS queue settings to batch script")
889 self.log.info("adding PBS queue settings to batch script")
891 firstline, rest = self.batch_template.split('\n',1)
890 firstline, rest = self.batch_template.split('\n',1)
892 self.batch_template = u'\n'.join([firstline, self.queue_template, rest])
891 self.batch_template = u'\n'.join([firstline, self.queue_template, rest])
893
892
894 script_as_string = self.formatter.format(self.batch_template, **self.context)
893 script_as_string = self.formatter.format(self.batch_template, **self.context)
895 self.log.info('Writing instantiated batch script: %s' % self.batch_file)
894 self.log.info('Writing instantiated batch script: %s' % self.batch_file)
896
895
897 with open(self.batch_file, 'w') as f:
896 with open(self.batch_file, 'w') as f:
898 f.write(script_as_string)
897 f.write(script_as_string)
899 os.chmod(self.batch_file, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
898 os.chmod(self.batch_file, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
900
899
901 def start(self, n, profile_dir):
900 def start(self, n, profile_dir):
902 """Start n copies of the process using a batch system."""
901 """Start n copies of the process using a batch system."""
903 # Here we save profile_dir in the context so they
902 # Here we save profile_dir in the context so they
904 # can be used in the batch script template as {profile_dir}
903 # can be used in the batch script template as {profile_dir}
905 self.context['profile_dir'] = profile_dir
904 self.context['profile_dir'] = profile_dir
906 self.profile_dir = unicode(profile_dir)
905 self.profile_dir = unicode(profile_dir)
907 self.write_batch_script(n)
906 self.write_batch_script(n)
908 output = check_output(self.args, env=os.environ)
907 output = check_output(self.args, env=os.environ)
909
908
910 job_id = self.parse_job_id(output)
909 job_id = self.parse_job_id(output)
911 self.notify_start(job_id)
910 self.notify_start(job_id)
912 return job_id
911 return job_id
913
912
914 def stop(self):
913 def stop(self):
915 output = check_output(self.delete_command+[self.job_id], env=os.environ)
914 output = check_output(self.delete_command+[self.job_id], env=os.environ)
916 self.notify_stop(dict(job_id=self.job_id, output=output)) # Pass the output of the kill cmd
915 self.notify_stop(dict(job_id=self.job_id, output=output)) # Pass the output of the kill cmd
917 return output
916 return output
918
917
919
918
920 class PBSLauncher(BatchSystemLauncher):
919 class PBSLauncher(BatchSystemLauncher):
921 """A BatchSystemLauncher subclass for PBS."""
920 """A BatchSystemLauncher subclass for PBS."""
922
921
923 submit_command = List(['qsub'], config=True,
922 submit_command = List(['qsub'], config=True,
924 help="The PBS submit command ['qsub']")
923 help="The PBS submit command ['qsub']")
925 delete_command = List(['qdel'], config=True,
924 delete_command = List(['qdel'], config=True,
926 help="The PBS delete command ['qsub']")
925 help="The PBS delete command ['qsub']")
927 job_id_regexp = Unicode(r'\d+', config=True,
926 job_id_regexp = Unicode(r'\d+', config=True,
928 help="Regular expresion for identifying the job ID [r'\d+']")
927 help="Regular expresion for identifying the job ID [r'\d+']")
929
928
930 batch_file = Unicode(u'')
929 batch_file = Unicode(u'')
931 job_array_regexp = Unicode('#PBS\W+-t\W+[\w\d\-\$]+')
930 job_array_regexp = Unicode('#PBS\W+-t\W+[\w\d\-\$]+')
932 job_array_template = Unicode('#PBS -t 1-{n}')
931 job_array_template = Unicode('#PBS -t 1-{n}')
933 queue_regexp = Unicode('#PBS\W+-q\W+\$?\w+')
932 queue_regexp = Unicode('#PBS\W+-q\W+\$?\w+')
934 queue_template = Unicode('#PBS -q {queue}')
933 queue_template = Unicode('#PBS -q {queue}')
935
934
936
935
937 class PBSControllerLauncher(PBSLauncher):
936 class PBSControllerLauncher(PBSLauncher):
938 """Launch a controller using PBS."""
937 """Launch a controller using PBS."""
939
938
940 batch_file_name = Unicode(u'pbs_controller', config=True,
939 batch_file_name = Unicode(u'pbs_controller', config=True,
941 help="batch file name for the controller job.")
940 help="batch file name for the controller job.")
942 default_template= Unicode("""#!/bin/sh
941 default_template= Unicode("""#!/bin/sh
943 #PBS -V
942 #PBS -V
944 #PBS -N ipcontroller
943 #PBS -N ipcontroller
945 %s --log-to-file profile_dir={profile_dir}
944 %s --log-to-file profile_dir={profile_dir}
946 """%(' '.join(ipcontroller_cmd_argv)))
945 """%(' '.join(ipcontroller_cmd_argv)))
947
946
948 def start(self, profile_dir):
947 def start(self, profile_dir):
949 """Start the controller by profile or profile_dir."""
948 """Start the controller by profile or profile_dir."""
950 self.log.info("Starting PBSControllerLauncher: %r" % self.args)
949 self.log.info("Starting PBSControllerLauncher: %r" % self.args)
951 return super(PBSControllerLauncher, self).start(1, profile_dir)
950 return super(PBSControllerLauncher, self).start(1, profile_dir)
952
951
953
952
954 class PBSEngineSetLauncher(PBSLauncher):
953 class PBSEngineSetLauncher(PBSLauncher):
955 """Launch Engines using PBS"""
954 """Launch Engines using PBS"""
956 batch_file_name = Unicode(u'pbs_engines', config=True,
955 batch_file_name = Unicode(u'pbs_engines', config=True,
957 help="batch file name for the engine(s) job.")
956 help="batch file name for the engine(s) job.")
958 default_template= Unicode(u"""#!/bin/sh
957 default_template= Unicode(u"""#!/bin/sh
959 #PBS -V
958 #PBS -V
960 #PBS -N ipengine
959 #PBS -N ipengine
961 %s profile_dir={profile_dir}
960 %s profile_dir={profile_dir}
962 """%(' '.join(ipengine_cmd_argv)))
961 """%(' '.join(ipengine_cmd_argv)))
963
962
964 def start(self, n, profile_dir):
963 def start(self, n, profile_dir):
965 """Start n engines by profile or profile_dir."""
964 """Start n engines by profile or profile_dir."""
966 self.log.info('Starting %i engines with PBSEngineSetLauncher: %r' % (n, self.args))
965 self.log.info('Starting %i engines with PBSEngineSetLauncher: %r' % (n, self.args))
967 return super(PBSEngineSetLauncher, self).start(n, profile_dir)
966 return super(PBSEngineSetLauncher, self).start(n, profile_dir)
968
967
969 #SGE is very similar to PBS
968 #SGE is very similar to PBS
970
969
971 class SGELauncher(PBSLauncher):
970 class SGELauncher(PBSLauncher):
972 """Sun GridEngine is a PBS clone with slightly different syntax"""
971 """Sun GridEngine is a PBS clone with slightly different syntax"""
973 job_array_regexp = Unicode('#\$\W+\-t')
972 job_array_regexp = Unicode('#\$\W+\-t')
974 job_array_template = Unicode('#$ -t 1-{n}')
973 job_array_template = Unicode('#$ -t 1-{n}')
975 queue_regexp = Unicode('#\$\W+-q\W+\$?\w+')
974 queue_regexp = Unicode('#\$\W+-q\W+\$?\w+')
976 queue_template = Unicode('#$ -q $queue')
975 queue_template = Unicode('#$ -q $queue')
977
976
978 class SGEControllerLauncher(SGELauncher):
977 class SGEControllerLauncher(SGELauncher):
979 """Launch a controller using SGE."""
978 """Launch a controller using SGE."""
980
979
981 batch_file_name = Unicode(u'sge_controller', config=True,
980 batch_file_name = Unicode(u'sge_controller', config=True,
982 help="batch file name for the ipontroller job.")
981 help="batch file name for the ipontroller job.")
983 default_template= Unicode(u"""#$ -V
982 default_template= Unicode(u"""#$ -V
984 #$ -S /bin/sh
983 #$ -S /bin/sh
985 #$ -N ipcontroller
984 #$ -N ipcontroller
986 %s --log-to-file profile_dir={profile_dir}
985 %s --log-to-file profile_dir={profile_dir}
987 """%(' '.join(ipcontroller_cmd_argv)))
986 """%(' '.join(ipcontroller_cmd_argv)))
988
987
989 def start(self, profile_dir):
988 def start(self, profile_dir):
990 """Start the controller by profile or profile_dir."""
989 """Start the controller by profile or profile_dir."""
991 self.log.info("Starting PBSControllerLauncher: %r" % self.args)
990 self.log.info("Starting PBSControllerLauncher: %r" % self.args)
992 return super(SGEControllerLauncher, self).start(1, profile_dir)
991 return super(SGEControllerLauncher, self).start(1, profile_dir)
993
992
994 class SGEEngineSetLauncher(SGELauncher):
993 class SGEEngineSetLauncher(SGELauncher):
995 """Launch Engines with SGE"""
994 """Launch Engines with SGE"""
996 batch_file_name = Unicode(u'sge_engines', config=True,
995 batch_file_name = Unicode(u'sge_engines', config=True,
997 help="batch file name for the engine(s) job.")
996 help="batch file name for the engine(s) job.")
998 default_template = Unicode("""#$ -V
997 default_template = Unicode("""#$ -V
999 #$ -S /bin/sh
998 #$ -S /bin/sh
1000 #$ -N ipengine
999 #$ -N ipengine
1001 %s profile_dir={profile_dir}
1000 %s profile_dir={profile_dir}
1002 """%(' '.join(ipengine_cmd_argv)))
1001 """%(' '.join(ipengine_cmd_argv)))
1003
1002
1004 def start(self, n, profile_dir):
1003 def start(self, n, profile_dir):
1005 """Start n engines by profile or profile_dir."""
1004 """Start n engines by profile or profile_dir."""
1006 self.log.info('Starting %i engines with SGEEngineSetLauncher: %r' % (n, self.args))
1005 self.log.info('Starting %i engines with SGEEngineSetLauncher: %r' % (n, self.args))
1007 return super(SGEEngineSetLauncher, self).start(n, profile_dir)
1006 return super(SGEEngineSetLauncher, self).start(n, profile_dir)
1008
1007
1009
1008
1010 #-----------------------------------------------------------------------------
1009 #-----------------------------------------------------------------------------
1011 # A launcher for ipcluster itself!
1010 # A launcher for ipcluster itself!
1012 #-----------------------------------------------------------------------------
1011 #-----------------------------------------------------------------------------
1013
1012
1014
1013
1015 class IPClusterLauncher(LocalProcessLauncher):
1014 class IPClusterLauncher(LocalProcessLauncher):
1016 """Launch the ipcluster program in an external process."""
1015 """Launch the ipcluster program in an external process."""
1017
1016
1018 ipcluster_cmd = List(ipcluster_cmd_argv, config=True,
1017 ipcluster_cmd = List(ipcluster_cmd_argv, config=True,
1019 help="Popen command for ipcluster")
1018 help="Popen command for ipcluster")
1020 ipcluster_args = List(
1019 ipcluster_args = List(
1021 ['--clean-logs', '--log-to-file', 'log_level=%i'%logging.INFO], config=True,
1020 ['--clean-logs', '--log-to-file', 'log_level=%i'%logging.INFO], config=True,
1022 help="Command line arguments to pass to ipcluster.")
1021 help="Command line arguments to pass to ipcluster.")
1023 ipcluster_subcommand = Unicode('start')
1022 ipcluster_subcommand = Unicode('start')
1024 ipcluster_n = Int(2)
1023 ipcluster_n = Int(2)
1025
1024
1026 def find_args(self):
1025 def find_args(self):
1027 return self.ipcluster_cmd + ['--'+self.ipcluster_subcommand] + \
1026 return self.ipcluster_cmd + ['--'+self.ipcluster_subcommand] + \
1028 ['n=%i'%self.ipcluster_n] + self.ipcluster_args
1027 ['n=%i'%self.ipcluster_n] + self.ipcluster_args
1029
1028
1030 def start(self):
1029 def start(self):
1031 self.log.info("Starting ipcluster: %r" % self.args)
1030 self.log.info("Starting ipcluster: %r" % self.args)
1032 return super(IPClusterLauncher, self).start()
1031 return super(IPClusterLauncher, self).start()
1033
1032
1034 #-----------------------------------------------------------------------------
1033 #-----------------------------------------------------------------------------
1035 # Collections of launchers
1034 # Collections of launchers
1036 #-----------------------------------------------------------------------------
1035 #-----------------------------------------------------------------------------
1037
1036
1038 local_launchers = [
1037 local_launchers = [
1039 LocalControllerLauncher,
1038 LocalControllerLauncher,
1040 LocalEngineLauncher,
1039 LocalEngineLauncher,
1041 LocalEngineSetLauncher,
1040 LocalEngineSetLauncher,
1042 ]
1041 ]
1043 mpi_launchers = [
1042 mpi_launchers = [
1044 MPIExecLauncher,
1043 MPIExecLauncher,
1045 MPIExecControllerLauncher,
1044 MPIExecControllerLauncher,
1046 MPIExecEngineSetLauncher,
1045 MPIExecEngineSetLauncher,
1047 ]
1046 ]
1048 ssh_launchers = [
1047 ssh_launchers = [
1049 SSHLauncher,
1048 SSHLauncher,
1050 SSHControllerLauncher,
1049 SSHControllerLauncher,
1051 SSHEngineLauncher,
1050 SSHEngineLauncher,
1052 SSHEngineSetLauncher,
1051 SSHEngineSetLauncher,
1053 ]
1052 ]
1054 winhpc_launchers = [
1053 winhpc_launchers = [
1055 WindowsHPCLauncher,
1054 WindowsHPCLauncher,
1056 WindowsHPCControllerLauncher,
1055 WindowsHPCControllerLauncher,
1057 WindowsHPCEngineSetLauncher,
1056 WindowsHPCEngineSetLauncher,
1058 ]
1057 ]
1059 pbs_launchers = [
1058 pbs_launchers = [
1060 PBSLauncher,
1059 PBSLauncher,
1061 PBSControllerLauncher,
1060 PBSControllerLauncher,
1062 PBSEngineSetLauncher,
1061 PBSEngineSetLauncher,
1063 ]
1062 ]
1064 sge_launchers = [
1063 sge_launchers = [
1065 SGELauncher,
1064 SGELauncher,
1066 SGEControllerLauncher,
1065 SGEControllerLauncher,
1067 SGEEngineSetLauncher,
1066 SGEEngineSetLauncher,
1068 ]
1067 ]
1069 all_launchers = local_launchers + mpi_launchers + ssh_launchers + winhpc_launchers\
1068 all_launchers = local_launchers + mpi_launchers + ssh_launchers + winhpc_launchers\
1070 + pbs_launchers + sge_launchers
1069 + pbs_launchers + sge_launchers
@@ -1,108 +1,110 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 """A simple logger object that consolidates messages incoming from ipcluster processes."""
2 """A simple logger object that consolidates messages incoming from ipcluster processes."""
3
3
4 #-----------------------------------------------------------------------------
4 #-----------------------------------------------------------------------------
5 # Copyright (C) 2011 The IPython Development Team
5 # Copyright (C) 2011 The IPython Development Team
6 #
6 #
7 # Distributed under the terms of the BSD License. The full license is in
7 # Distributed under the terms of the BSD License. The full license is in
8 # the file COPYING, distributed as part of this software.
8 # the file COPYING, distributed as part of this software.
9 #-----------------------------------------------------------------------------
9 #-----------------------------------------------------------------------------
10
10
11 #-----------------------------------------------------------------------------
11 #-----------------------------------------------------------------------------
12 # Imports
12 # Imports
13 #-----------------------------------------------------------------------------
13 #-----------------------------------------------------------------------------
14
14
15
15
16 import logging
16 import logging
17 import sys
17 import sys
18
18
19 import zmq
19 import zmq
20 from zmq.eventloop import ioloop, zmqstream
20 from zmq.eventloop import ioloop, zmqstream
21
21
22 from IPython.config.configurable import Configurable
22 from IPython.utils.traitlets import Int, Unicode, Instance, List
23 from IPython.utils.traitlets import Int, Unicode, Instance, List
23
24
24 from IPython.parallel.factory import LoggingFactory
25
26 #-----------------------------------------------------------------------------
25 #-----------------------------------------------------------------------------
27 # Classes
26 # Classes
28 #-----------------------------------------------------------------------------
27 #-----------------------------------------------------------------------------
29
28
30
29
31 class LogWatcher(LoggingFactory):
30 class LogWatcher(Configurable):
32 """A simple class that receives messages on a SUB socket, as published
31 """A simple class that receives messages on a SUB socket, as published
33 by subclasses of `zmq.log.handlers.PUBHandler`, and logs them itself.
32 by subclasses of `zmq.log.handlers.PUBHandler`, and logs them itself.
34
33
35 This can subscribe to multiple topics, but defaults to all topics.
34 This can subscribe to multiple topics, but defaults to all topics.
36 """
35 """
36
37 log = Instance('logging.Logger', ('root',))
38
37 # configurables
39 # configurables
38 topics = List([''], config=True,
40 topics = List([''], config=True,
39 help="The ZMQ topics to subscribe to. Default is to subscribe to all messages")
41 help="The ZMQ topics to subscribe to. Default is to subscribe to all messages")
40 url = Unicode('tcp://127.0.0.1:20202', config=True,
42 url = Unicode('tcp://127.0.0.1:20202', config=True,
41 help="ZMQ url on which to listen for log messages")
43 help="ZMQ url on which to listen for log messages")
42
44
43 # internals
45 # internals
44 stream = Instance('zmq.eventloop.zmqstream.ZMQStream')
46 stream = Instance('zmq.eventloop.zmqstream.ZMQStream')
45
47
46 context = Instance(zmq.Context)
48 context = Instance(zmq.Context)
47 def _context_default(self):
49 def _context_default(self):
48 return zmq.Context.instance()
50 return zmq.Context.instance()
49
51
50 loop = Instance(zmq.eventloop.ioloop.IOLoop)
52 loop = Instance(zmq.eventloop.ioloop.IOLoop)
51 def _loop_default(self):
53 def _loop_default(self):
52 return ioloop.IOLoop.instance()
54 return ioloop.IOLoop.instance()
53
55
54 def __init__(self, **kwargs):
56 def __init__(self, **kwargs):
55 super(LogWatcher, self).__init__(**kwargs)
57 super(LogWatcher, self).__init__(**kwargs)
56 s = self.context.socket(zmq.SUB)
58 s = self.context.socket(zmq.SUB)
57 s.bind(self.url)
59 s.bind(self.url)
58 self.stream = zmqstream.ZMQStream(s, self.loop)
60 self.stream = zmqstream.ZMQStream(s, self.loop)
59 self.subscribe()
61 self.subscribe()
60 self.on_trait_change(self.subscribe, 'topics')
62 self.on_trait_change(self.subscribe, 'topics')
61
63
62 def start(self):
64 def start(self):
63 self.stream.on_recv(self.log_message)
65 self.stream.on_recv(self.log_message)
64
66
65 def stop(self):
67 def stop(self):
66 self.stream.stop_on_recv()
68 self.stream.stop_on_recv()
67
69
68 def subscribe(self):
70 def subscribe(self):
69 """Update our SUB socket's subscriptions."""
71 """Update our SUB socket's subscriptions."""
70 self.stream.setsockopt(zmq.UNSUBSCRIBE, '')
72 self.stream.setsockopt(zmq.UNSUBSCRIBE, '')
71 if '' in self.topics:
73 if '' in self.topics:
72 self.log.debug("Subscribing to: everything")
74 self.log.debug("Subscribing to: everything")
73 self.stream.setsockopt(zmq.SUBSCRIBE, '')
75 self.stream.setsockopt(zmq.SUBSCRIBE, '')
74 else:
76 else:
75 for topic in self.topics:
77 for topic in self.topics:
76 self.log.debug("Subscribing to: %r"%(topic))
78 self.log.debug("Subscribing to: %r"%(topic))
77 self.stream.setsockopt(zmq.SUBSCRIBE, topic)
79 self.stream.setsockopt(zmq.SUBSCRIBE, topic)
78
80
79 def _extract_level(self, topic_str):
81 def _extract_level(self, topic_str):
80 """Turn 'engine.0.INFO.extra' into (logging.INFO, 'engine.0.extra')"""
82 """Turn 'engine.0.INFO.extra' into (logging.INFO, 'engine.0.extra')"""
81 topics = topic_str.split('.')
83 topics = topic_str.split('.')
82 for idx,t in enumerate(topics):
84 for idx,t in enumerate(topics):
83 level = getattr(logging, t, None)
85 level = getattr(logging, t, None)
84 if level is not None:
86 if level is not None:
85 break
87 break
86
88
87 if level is None:
89 if level is None:
88 level = logging.INFO
90 level = logging.INFO
89 else:
91 else:
90 topics.pop(idx)
92 topics.pop(idx)
91
93
92 return level, '.'.join(topics)
94 return level, '.'.join(topics)
93
95
94
96
95 def log_message(self, raw):
97 def log_message(self, raw):
96 """receive and parse a message, then log it."""
98 """receive and parse a message, then log it."""
97 if len(raw) != 2 or '.' not in raw[0]:
99 if len(raw) != 2 or '.' not in raw[0]:
98 self.log.error("Invalid log message: %s"%raw)
100 self.log.error("Invalid log message: %s"%raw)
99 return
101 return
100 else:
102 else:
101 topic, msg = raw
103 topic, msg = raw
102 # don't newline, since log messages always newline:
104 # don't newline, since log messages always newline:
103 topic,level_name = topic.rsplit('.',1)
105 topic,level_name = topic.rsplit('.',1)
104 level,topic = self._extract_level(topic)
106 level,topic = self._extract_level(topic)
105 if msg[-1] == '\n':
107 if msg[-1] == '\n':
106 msg = msg[:-1]
108 msg = msg[:-1]
107 self.log.log(level, "[%s] %s" % (topic, msg))
109 self.log.log(level, "[%s] %s" % (topic, msg))
108
110
@@ -1,165 +1,166 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 """
2 """
3 A multi-heart Heartbeat system using PUB and XREP sockets. pings are sent out on the PUB,
3 A multi-heart Heartbeat system using PUB and XREP sockets. pings are sent out on the PUB,
4 and hearts are tracked based on their XREQ identities.
4 and hearts are tracked based on their XREQ identities.
5 """
5 """
6 #-----------------------------------------------------------------------------
6 #-----------------------------------------------------------------------------
7 # Copyright (C) 2010-2011 The IPython Development Team
7 # Copyright (C) 2010-2011 The IPython Development Team
8 #
8 #
9 # Distributed under the terms of the BSD License. The full license is in
9 # Distributed under the terms of the BSD License. The full license is in
10 # the file COPYING, distributed as part of this software.
10 # the file COPYING, distributed as part of this software.
11 #-----------------------------------------------------------------------------
11 #-----------------------------------------------------------------------------
12
12
13 from __future__ import print_function
13 from __future__ import print_function
14 import time
14 import time
15 import uuid
15 import uuid
16
16
17 import zmq
17 import zmq
18 from zmq.devices import ThreadDevice
18 from zmq.devices import ThreadDevice
19 from zmq.eventloop import ioloop, zmqstream
19 from zmq.eventloop import ioloop, zmqstream
20
20
21 from IPython.config.configurable import Configurable
21 from IPython.utils.traitlets import Set, Instance, CFloat
22 from IPython.utils.traitlets import Set, Instance, CFloat
22 from IPython.parallel.factory import LoggingFactory
23
23
24 class Heart(object):
24 class Heart(object):
25 """A basic heart object for responding to a HeartMonitor.
25 """A basic heart object for responding to a HeartMonitor.
26 This is a simple wrapper with defaults for the most common
26 This is a simple wrapper with defaults for the most common
27 Device model for responding to heartbeats.
27 Device model for responding to heartbeats.
28
28
29 It simply builds a threadsafe zmq.FORWARDER Device, defaulting to using
29 It simply builds a threadsafe zmq.FORWARDER Device, defaulting to using
30 SUB/XREQ for in/out.
30 SUB/XREQ for in/out.
31
31
32 You can specify the XREQ's IDENTITY via the optional heart_id argument."""
32 You can specify the XREQ's IDENTITY via the optional heart_id argument."""
33 device=None
33 device=None
34 id=None
34 id=None
35 def __init__(self, in_addr, out_addr, in_type=zmq.SUB, out_type=zmq.XREQ, heart_id=None):
35 def __init__(self, in_addr, out_addr, in_type=zmq.SUB, out_type=zmq.XREQ, heart_id=None):
36 self.device = ThreadDevice(zmq.FORWARDER, in_type, out_type)
36 self.device = ThreadDevice(zmq.FORWARDER, in_type, out_type)
37 self.device.daemon=True
37 self.device.daemon=True
38 self.device.connect_in(in_addr)
38 self.device.connect_in(in_addr)
39 self.device.connect_out(out_addr)
39 self.device.connect_out(out_addr)
40 if in_type == zmq.SUB:
40 if in_type == zmq.SUB:
41 self.device.setsockopt_in(zmq.SUBSCRIBE, "")
41 self.device.setsockopt_in(zmq.SUBSCRIBE, "")
42 if heart_id is None:
42 if heart_id is None:
43 heart_id = str(uuid.uuid4())
43 heart_id = str(uuid.uuid4())
44 self.device.setsockopt_out(zmq.IDENTITY, heart_id)
44 self.device.setsockopt_out(zmq.IDENTITY, heart_id)
45 self.id = heart_id
45 self.id = heart_id
46
46
47 def start(self):
47 def start(self):
48 return self.device.start()
48 return self.device.start()
49
49
50 class HeartMonitor(LoggingFactory):
50 class HeartMonitor(Configurable):
51 """A basic HeartMonitor class
51 """A basic HeartMonitor class
52 pingstream: a PUB stream
52 pingstream: a PUB stream
53 pongstream: an XREP stream
53 pongstream: an XREP stream
54 period: the period of the heartbeat in milliseconds"""
54 period: the period of the heartbeat in milliseconds"""
55
55
56 period=CFloat(1000, config=True,
56 period=CFloat(1000, config=True,
57 help='The frequency at which the Hub pings the engines for heartbeats '
57 help='The frequency at which the Hub pings the engines for heartbeats '
58 ' (in ms) [default: 100]',
58 ' (in ms) [default: 100]',
59 )
59 )
60
60
61 log = Instance('logging.Logger', ('root',))
61 pingstream=Instance('zmq.eventloop.zmqstream.ZMQStream')
62 pingstream=Instance('zmq.eventloop.zmqstream.ZMQStream')
62 pongstream=Instance('zmq.eventloop.zmqstream.ZMQStream')
63 pongstream=Instance('zmq.eventloop.zmqstream.ZMQStream')
63 loop = Instance('zmq.eventloop.ioloop.IOLoop')
64 loop = Instance('zmq.eventloop.ioloop.IOLoop')
64 def _loop_default(self):
65 def _loop_default(self):
65 return ioloop.IOLoop.instance()
66 return ioloop.IOLoop.instance()
66
67
67 # not settable:
68 # not settable:
68 hearts=Set()
69 hearts=Set()
69 responses=Set()
70 responses=Set()
70 on_probation=Set()
71 on_probation=Set()
71 last_ping=CFloat(0)
72 last_ping=CFloat(0)
72 _new_handlers = Set()
73 _new_handlers = Set()
73 _failure_handlers = Set()
74 _failure_handlers = Set()
74 lifetime = CFloat(0)
75 lifetime = CFloat(0)
75 tic = CFloat(0)
76 tic = CFloat(0)
76
77
77 def __init__(self, **kwargs):
78 def __init__(self, **kwargs):
78 super(HeartMonitor, self).__init__(**kwargs)
79 super(HeartMonitor, self).__init__(**kwargs)
79
80
80 self.pongstream.on_recv(self.handle_pong)
81 self.pongstream.on_recv(self.handle_pong)
81
82
82 def start(self):
83 def start(self):
83 self.caller = ioloop.PeriodicCallback(self.beat, self.period, self.loop)
84 self.caller = ioloop.PeriodicCallback(self.beat, self.period, self.loop)
84 self.caller.start()
85 self.caller.start()
85
86
86 def add_new_heart_handler(self, handler):
87 def add_new_heart_handler(self, handler):
87 """add a new handler for new hearts"""
88 """add a new handler for new hearts"""
88 self.log.debug("heartbeat::new_heart_handler: %s"%handler)
89 self.log.debug("heartbeat::new_heart_handler: %s"%handler)
89 self._new_handlers.add(handler)
90 self._new_handlers.add(handler)
90
91
91 def add_heart_failure_handler(self, handler):
92 def add_heart_failure_handler(self, handler):
92 """add a new handler for heart failure"""
93 """add a new handler for heart failure"""
93 self.log.debug("heartbeat::new heart failure handler: %s"%handler)
94 self.log.debug("heartbeat::new heart failure handler: %s"%handler)
94 self._failure_handlers.add(handler)
95 self._failure_handlers.add(handler)
95
96
96 def beat(self):
97 def beat(self):
97 self.pongstream.flush()
98 self.pongstream.flush()
98 self.last_ping = self.lifetime
99 self.last_ping = self.lifetime
99
100
100 toc = time.time()
101 toc = time.time()
101 self.lifetime += toc-self.tic
102 self.lifetime += toc-self.tic
102 self.tic = toc
103 self.tic = toc
103 # self.log.debug("heartbeat::%s"%self.lifetime)
104 # self.log.debug("heartbeat::%s"%self.lifetime)
104 goodhearts = self.hearts.intersection(self.responses)
105 goodhearts = self.hearts.intersection(self.responses)
105 missed_beats = self.hearts.difference(goodhearts)
106 missed_beats = self.hearts.difference(goodhearts)
106 heartfailures = self.on_probation.intersection(missed_beats)
107 heartfailures = self.on_probation.intersection(missed_beats)
107 newhearts = self.responses.difference(goodhearts)
108 newhearts = self.responses.difference(goodhearts)
108 map(self.handle_new_heart, newhearts)
109 map(self.handle_new_heart, newhearts)
109 map(self.handle_heart_failure, heartfailures)
110 map(self.handle_heart_failure, heartfailures)
110 self.on_probation = missed_beats.intersection(self.hearts)
111 self.on_probation = missed_beats.intersection(self.hearts)
111 self.responses = set()
112 self.responses = set()
112 # print self.on_probation, self.hearts
113 # print self.on_probation, self.hearts
113 # self.log.debug("heartbeat::beat %.3f, %i beating hearts"%(self.lifetime, len(self.hearts)))
114 # self.log.debug("heartbeat::beat %.3f, %i beating hearts"%(self.lifetime, len(self.hearts)))
114 self.pingstream.send(str(self.lifetime))
115 self.pingstream.send(str(self.lifetime))
115
116
116 def handle_new_heart(self, heart):
117 def handle_new_heart(self, heart):
117 if self._new_handlers:
118 if self._new_handlers:
118 for handler in self._new_handlers:
119 for handler in self._new_handlers:
119 handler(heart)
120 handler(heart)
120 else:
121 else:
121 self.log.info("heartbeat::yay, got new heart %s!"%heart)
122 self.log.info("heartbeat::yay, got new heart %s!"%heart)
122 self.hearts.add(heart)
123 self.hearts.add(heart)
123
124
124 def handle_heart_failure(self, heart):
125 def handle_heart_failure(self, heart):
125 if self._failure_handlers:
126 if self._failure_handlers:
126 for handler in self._failure_handlers:
127 for handler in self._failure_handlers:
127 try:
128 try:
128 handler(heart)
129 handler(heart)
129 except Exception as e:
130 except Exception as e:
130 self.log.error("heartbeat::Bad Handler! %s"%handler, exc_info=True)
131 self.log.error("heartbeat::Bad Handler! %s"%handler, exc_info=True)
131 pass
132 pass
132 else:
133 else:
133 self.log.info("heartbeat::Heart %s failed :("%heart)
134 self.log.info("heartbeat::Heart %s failed :("%heart)
134 self.hearts.remove(heart)
135 self.hearts.remove(heart)
135
136
136
137
137 def handle_pong(self, msg):
138 def handle_pong(self, msg):
138 "a heart just beat"
139 "a heart just beat"
139 if msg[1] == str(self.lifetime):
140 if msg[1] == str(self.lifetime):
140 delta = time.time()-self.tic
141 delta = time.time()-self.tic
141 # self.log.debug("heartbeat::heart %r took %.2f ms to respond"%(msg[0], 1000*delta))
142 # self.log.debug("heartbeat::heart %r took %.2f ms to respond"%(msg[0], 1000*delta))
142 self.responses.add(msg[0])
143 self.responses.add(msg[0])
143 elif msg[1] == str(self.last_ping):
144 elif msg[1] == str(self.last_ping):
144 delta = time.time()-self.tic + (self.lifetime-self.last_ping)
145 delta = time.time()-self.tic + (self.lifetime-self.last_ping)
145 self.log.warn("heartbeat::heart %r missed a beat, and took %.2f ms to respond"%(msg[0], 1000*delta))
146 self.log.warn("heartbeat::heart %r missed a beat, and took %.2f ms to respond"%(msg[0], 1000*delta))
146 self.responses.add(msg[0])
147 self.responses.add(msg[0])
147 else:
148 else:
148 self.log.warn("heartbeat::got bad heartbeat (possibly old?): %s (current=%.3f)"%
149 self.log.warn("heartbeat::got bad heartbeat (possibly old?): %s (current=%.3f)"%
149 (msg[1],self.lifetime))
150 (msg[1],self.lifetime))
150
151
151
152
152 if __name__ == '__main__':
153 if __name__ == '__main__':
153 loop = ioloop.IOLoop.instance()
154 loop = ioloop.IOLoop.instance()
154 context = zmq.Context()
155 context = zmq.Context()
155 pub = context.socket(zmq.PUB)
156 pub = context.socket(zmq.PUB)
156 pub.bind('tcp://127.0.0.1:5555')
157 pub.bind('tcp://127.0.0.1:5555')
157 xrep = context.socket(zmq.XREP)
158 xrep = context.socket(zmq.XREP)
158 xrep.bind('tcp://127.0.0.1:5556')
159 xrep.bind('tcp://127.0.0.1:5556')
159
160
160 outstream = zmqstream.ZMQStream(pub, loop)
161 outstream = zmqstream.ZMQStream(pub, loop)
161 instream = zmqstream.ZMQStream(xrep, loop)
162 instream = zmqstream.ZMQStream(xrep, loop)
162
163
163 hb = HeartMonitor(loop, outstream, instream)
164 hb = HeartMonitor(loop, outstream, instream)
164
165
165 loop.start()
166 loop.start()
@@ -1,1274 +1,1277 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 """The IPython Controller Hub with 0MQ
2 """The IPython Controller Hub with 0MQ
3 This is the master object that handles connections from engines and clients,
3 This is the master object that handles connections from engines and clients,
4 and monitors traffic through the various queues.
4 and monitors traffic through the various queues.
5 """
5 """
6 #-----------------------------------------------------------------------------
6 #-----------------------------------------------------------------------------
7 # Copyright (C) 2010 The IPython Development Team
7 # Copyright (C) 2010 The IPython Development Team
8 #
8 #
9 # Distributed under the terms of the BSD License. The full license is in
9 # Distributed under the terms of the BSD License. The full license is in
10 # the file COPYING, distributed as part of this software.
10 # the file COPYING, distributed as part of this software.
11 #-----------------------------------------------------------------------------
11 #-----------------------------------------------------------------------------
12
12
13 #-----------------------------------------------------------------------------
13 #-----------------------------------------------------------------------------
14 # Imports
14 # Imports
15 #-----------------------------------------------------------------------------
15 #-----------------------------------------------------------------------------
16 from __future__ import print_function
16 from __future__ import print_function
17
17
18 import sys
18 import sys
19 import time
19 import time
20 from datetime import datetime
20 from datetime import datetime
21
21
22 import zmq
22 import zmq
23 from zmq.eventloop import ioloop
23 from zmq.eventloop import ioloop
24 from zmq.eventloop.zmqstream import ZMQStream
24 from zmq.eventloop.zmqstream import ZMQStream
25
25
26 # internal:
26 # internal:
27 from IPython.utils.importstring import import_item
27 from IPython.utils.importstring import import_item
28 from IPython.utils.traitlets import (
28 from IPython.utils.traitlets import (
29 HasTraits, Instance, Int, Unicode, Dict, Set, Tuple, CStr
29 HasTraits, Instance, Int, Unicode, Dict, Set, Tuple, CStr
30 )
30 )
31 from IPython.utils.jsonutil import ISO8601, extract_dates
31 from IPython.utils.jsonutil import ISO8601, extract_dates
32
32
33 from IPython.parallel import error, util
33 from IPython.parallel import error, util
34 from IPython.parallel.factory import RegistrationFactory, LoggingFactory
34 from IPython.parallel.factory import RegistrationFactory
35
36 from IPython.zmq.session import SessionFactory
35
37
36 from .heartmonitor import HeartMonitor
38 from .heartmonitor import HeartMonitor
37
39
38 #-----------------------------------------------------------------------------
40 #-----------------------------------------------------------------------------
39 # Code
41 # Code
40 #-----------------------------------------------------------------------------
42 #-----------------------------------------------------------------------------
41
43
42 def _passer(*args, **kwargs):
44 def _passer(*args, **kwargs):
43 return
45 return
44
46
45 def _printer(*args, **kwargs):
47 def _printer(*args, **kwargs):
46 print (args)
48 print (args)
47 print (kwargs)
49 print (kwargs)
48
50
49 def empty_record():
51 def empty_record():
50 """Return an empty dict with all record keys."""
52 """Return an empty dict with all record keys."""
51 return {
53 return {
52 'msg_id' : None,
54 'msg_id' : None,
53 'header' : None,
55 'header' : None,
54 'content': None,
56 'content': None,
55 'buffers': None,
57 'buffers': None,
56 'submitted': None,
58 'submitted': None,
57 'client_uuid' : None,
59 'client_uuid' : None,
58 'engine_uuid' : None,
60 'engine_uuid' : None,
59 'started': None,
61 'started': None,
60 'completed': None,
62 'completed': None,
61 'resubmitted': None,
63 'resubmitted': None,
62 'result_header' : None,
64 'result_header' : None,
63 'result_content' : None,
65 'result_content' : None,
64 'result_buffers' : None,
66 'result_buffers' : None,
65 'queue' : None,
67 'queue' : None,
66 'pyin' : None,
68 'pyin' : None,
67 'pyout': None,
69 'pyout': None,
68 'pyerr': None,
70 'pyerr': None,
69 'stdout': '',
71 'stdout': '',
70 'stderr': '',
72 'stderr': '',
71 }
73 }
72
74
73 def init_record(msg):
75 def init_record(msg):
74 """Initialize a TaskRecord based on a request."""
76 """Initialize a TaskRecord based on a request."""
75 header = extract_dates(msg['header'])
77 header = extract_dates(msg['header'])
76 return {
78 return {
77 'msg_id' : header['msg_id'],
79 'msg_id' : header['msg_id'],
78 'header' : header,
80 'header' : header,
79 'content': msg['content'],
81 'content': msg['content'],
80 'buffers': msg['buffers'],
82 'buffers': msg['buffers'],
81 'submitted': header['date'],
83 'submitted': header['date'],
82 'client_uuid' : None,
84 'client_uuid' : None,
83 'engine_uuid' : None,
85 'engine_uuid' : None,
84 'started': None,
86 'started': None,
85 'completed': None,
87 'completed': None,
86 'resubmitted': None,
88 'resubmitted': None,
87 'result_header' : None,
89 'result_header' : None,
88 'result_content' : None,
90 'result_content' : None,
89 'result_buffers' : None,
91 'result_buffers' : None,
90 'queue' : None,
92 'queue' : None,
91 'pyin' : None,
93 'pyin' : None,
92 'pyout': None,
94 'pyout': None,
93 'pyerr': None,
95 'pyerr': None,
94 'stdout': '',
96 'stdout': '',
95 'stderr': '',
97 'stderr': '',
96 }
98 }
97
99
98
100
99 class EngineConnector(HasTraits):
101 class EngineConnector(HasTraits):
100 """A simple object for accessing the various zmq connections of an object.
102 """A simple object for accessing the various zmq connections of an object.
101 Attributes are:
103 Attributes are:
102 id (int): engine ID
104 id (int): engine ID
103 uuid (str): uuid (unused?)
105 uuid (str): uuid (unused?)
104 queue (str): identity of queue's XREQ socket
106 queue (str): identity of queue's XREQ socket
105 registration (str): identity of registration XREQ socket
107 registration (str): identity of registration XREQ socket
106 heartbeat (str): identity of heartbeat XREQ socket
108 heartbeat (str): identity of heartbeat XREQ socket
107 """
109 """
108 id=Int(0)
110 id=Int(0)
109 queue=CStr()
111 queue=CStr()
110 control=CStr()
112 control=CStr()
111 registration=CStr()
113 registration=CStr()
112 heartbeat=CStr()
114 heartbeat=CStr()
113 pending=Set()
115 pending=Set()
114
116
115 class HubFactory(RegistrationFactory):
117 class HubFactory(RegistrationFactory):
116 """The Configurable for setting up a Hub."""
118 """The Configurable for setting up a Hub."""
117
119
118 # port-pairs for monitoredqueues:
120 # port-pairs for monitoredqueues:
119 hb = Tuple(Int,Int,config=True,
121 hb = Tuple(Int,Int,config=True,
120 help="""XREQ/SUB Port pair for Engine heartbeats""")
122 help="""XREQ/SUB Port pair for Engine heartbeats""")
121 def _hb_default(self):
123 def _hb_default(self):
122 return tuple(util.select_random_ports(2))
124 return tuple(util.select_random_ports(2))
123
125
124 mux = Tuple(Int,Int,config=True,
126 mux = Tuple(Int,Int,config=True,
125 help="""Engine/Client Port pair for MUX queue""")
127 help="""Engine/Client Port pair for MUX queue""")
126
128
127 def _mux_default(self):
129 def _mux_default(self):
128 return tuple(util.select_random_ports(2))
130 return tuple(util.select_random_ports(2))
129
131
130 task = Tuple(Int,Int,config=True,
132 task = Tuple(Int,Int,config=True,
131 help="""Engine/Client Port pair for Task queue""")
133 help="""Engine/Client Port pair for Task queue""")
132 def _task_default(self):
134 def _task_default(self):
133 return tuple(util.select_random_ports(2))
135 return tuple(util.select_random_ports(2))
134
136
135 control = Tuple(Int,Int,config=True,
137 control = Tuple(Int,Int,config=True,
136 help="""Engine/Client Port pair for Control queue""")
138 help="""Engine/Client Port pair for Control queue""")
137
139
138 def _control_default(self):
140 def _control_default(self):
139 return tuple(util.select_random_ports(2))
141 return tuple(util.select_random_ports(2))
140
142
141 iopub = Tuple(Int,Int,config=True,
143 iopub = Tuple(Int,Int,config=True,
142 help="""Engine/Client Port pair for IOPub relay""")
144 help="""Engine/Client Port pair for IOPub relay""")
143
145
144 def _iopub_default(self):
146 def _iopub_default(self):
145 return tuple(util.select_random_ports(2))
147 return tuple(util.select_random_ports(2))
146
148
147 # single ports:
149 # single ports:
148 mon_port = Int(config=True,
150 mon_port = Int(config=True,
149 help="""Monitor (SUB) port for queue traffic""")
151 help="""Monitor (SUB) port for queue traffic""")
150
152
151 def _mon_port_default(self):
153 def _mon_port_default(self):
152 return util.select_random_ports(1)[0]
154 return util.select_random_ports(1)[0]
153
155
154 notifier_port = Int(config=True,
156 notifier_port = Int(config=True,
155 help="""PUB port for sending engine status notifications""")
157 help="""PUB port for sending engine status notifications""")
156
158
157 def _notifier_port_default(self):
159 def _notifier_port_default(self):
158 return util.select_random_ports(1)[0]
160 return util.select_random_ports(1)[0]
159
161
160 engine_ip = Unicode('127.0.0.1', config=True,
162 engine_ip = Unicode('127.0.0.1', config=True,
161 help="IP on which to listen for engine connections. [default: loopback]")
163 help="IP on which to listen for engine connections. [default: loopback]")
162 engine_transport = Unicode('tcp', config=True,
164 engine_transport = Unicode('tcp', config=True,
163 help="0MQ transport for engine connections. [default: tcp]")
165 help="0MQ transport for engine connections. [default: tcp]")
164
166
165 client_ip = Unicode('127.0.0.1', config=True,
167 client_ip = Unicode('127.0.0.1', config=True,
166 help="IP on which to listen for client connections. [default: loopback]")
168 help="IP on which to listen for client connections. [default: loopback]")
167 client_transport = Unicode('tcp', config=True,
169 client_transport = Unicode('tcp', config=True,
168 help="0MQ transport for client connections. [default : tcp]")
170 help="0MQ transport for client connections. [default : tcp]")
169
171
170 monitor_ip = Unicode('127.0.0.1', config=True,
172 monitor_ip = Unicode('127.0.0.1', config=True,
171 help="IP on which to listen for monitor messages. [default: loopback]")
173 help="IP on which to listen for monitor messages. [default: loopback]")
172 monitor_transport = Unicode('tcp', config=True,
174 monitor_transport = Unicode('tcp', config=True,
173 help="0MQ transport for monitor messages. [default : tcp]")
175 help="0MQ transport for monitor messages. [default : tcp]")
174
176
175 monitor_url = Unicode('')
177 monitor_url = Unicode('')
176
178
177 db_class = Unicode('IPython.parallel.controller.dictdb.DictDB', config=True,
179 db_class = Unicode('IPython.parallel.controller.dictdb.DictDB', config=True,
178 help="""The class to use for the DB backend""")
180 help="""The class to use for the DB backend""")
179
181
180 # not configurable
182 # not configurable
181 db = Instance('IPython.parallel.controller.dictdb.BaseDB')
183 db = Instance('IPython.parallel.controller.dictdb.BaseDB')
182 heartmonitor = Instance('IPython.parallel.controller.heartmonitor.HeartMonitor')
184 heartmonitor = Instance('IPython.parallel.controller.heartmonitor.HeartMonitor')
183
185
184 def _ip_changed(self, name, old, new):
186 def _ip_changed(self, name, old, new):
185 self.engine_ip = new
187 self.engine_ip = new
186 self.client_ip = new
188 self.client_ip = new
187 self.monitor_ip = new
189 self.monitor_ip = new
188 self._update_monitor_url()
190 self._update_monitor_url()
189
191
190 def _update_monitor_url(self):
192 def _update_monitor_url(self):
191 self.monitor_url = "%s://%s:%i"%(self.monitor_transport, self.monitor_ip, self.mon_port)
193 self.monitor_url = "%s://%s:%i"%(self.monitor_transport, self.monitor_ip, self.mon_port)
192
194
193 def _transport_changed(self, name, old, new):
195 def _transport_changed(self, name, old, new):
194 self.engine_transport = new
196 self.engine_transport = new
195 self.client_transport = new
197 self.client_transport = new
196 self.monitor_transport = new
198 self.monitor_transport = new
197 self._update_monitor_url()
199 self._update_monitor_url()
198
200
199 def __init__(self, **kwargs):
201 def __init__(self, **kwargs):
200 super(HubFactory, self).__init__(**kwargs)
202 super(HubFactory, self).__init__(**kwargs)
201 self._update_monitor_url()
203 self._update_monitor_url()
202
204
203
205
204 def construct(self):
206 def construct(self):
205 self.init_hub()
207 self.init_hub()
206
208
207 def start(self):
209 def start(self):
208 self.heartmonitor.start()
210 self.heartmonitor.start()
209 self.log.info("Heartmonitor started")
211 self.log.info("Heartmonitor started")
210
212
211 def init_hub(self):
213 def init_hub(self):
212 """construct"""
214 """construct"""
213 client_iface = "%s://%s:"%(self.client_transport, self.client_ip) + "%i"
215 client_iface = "%s://%s:"%(self.client_transport, self.client_ip) + "%i"
214 engine_iface = "%s://%s:"%(self.engine_transport, self.engine_ip) + "%i"
216 engine_iface = "%s://%s:"%(self.engine_transport, self.engine_ip) + "%i"
215
217
216 ctx = self.context
218 ctx = self.context
217 loop = self.loop
219 loop = self.loop
218
220
219 # Registrar socket
221 # Registrar socket
220 q = ZMQStream(ctx.socket(zmq.XREP), loop)
222 q = ZMQStream(ctx.socket(zmq.XREP), loop)
221 q.bind(client_iface % self.regport)
223 q.bind(client_iface % self.regport)
222 self.log.info("Hub listening on %s for registration."%(client_iface%self.regport))
224 self.log.info("Hub listening on %s for registration."%(client_iface%self.regport))
223 if self.client_ip != self.engine_ip:
225 if self.client_ip != self.engine_ip:
224 q.bind(engine_iface % self.regport)
226 q.bind(engine_iface % self.regport)
225 self.log.info("Hub listening on %s for registration."%(engine_iface%self.regport))
227 self.log.info("Hub listening on %s for registration."%(engine_iface%self.regport))
226
228
227 ### Engine connections ###
229 ### Engine connections ###
228
230
229 # heartbeat
231 # heartbeat
230 hpub = ctx.socket(zmq.PUB)
232 hpub = ctx.socket(zmq.PUB)
231 hpub.bind(engine_iface % self.hb[0])
233 hpub.bind(engine_iface % self.hb[0])
232 hrep = ctx.socket(zmq.XREP)
234 hrep = ctx.socket(zmq.XREP)
233 hrep.bind(engine_iface % self.hb[1])
235 hrep.bind(engine_iface % self.hb[1])
234 self.heartmonitor = HeartMonitor(loop=loop, pingstream=ZMQStream(hpub,loop), pongstream=ZMQStream(hrep,loop),
236 self.heartmonitor = HeartMonitor(loop=loop, config=self.config, log=self.log,
235 config=self.config)
237 pingstream=ZMQStream(hpub,loop),
238 pongstream=ZMQStream(hrep,loop)
239 )
236
240
237 ### Client connections ###
241 ### Client connections ###
238 # Notifier socket
242 # Notifier socket
239 n = ZMQStream(ctx.socket(zmq.PUB), loop)
243 n = ZMQStream(ctx.socket(zmq.PUB), loop)
240 n.bind(client_iface%self.notifier_port)
244 n.bind(client_iface%self.notifier_port)
241
245
242 ### build and launch the queues ###
246 ### build and launch the queues ###
243
247
244 # monitor socket
248 # monitor socket
245 sub = ctx.socket(zmq.SUB)
249 sub = ctx.socket(zmq.SUB)
246 sub.setsockopt(zmq.SUBSCRIBE, "")
250 sub.setsockopt(zmq.SUBSCRIBE, "")
247 sub.bind(self.monitor_url)
251 sub.bind(self.monitor_url)
248 sub.bind('inproc://monitor')
252 sub.bind('inproc://monitor')
249 sub = ZMQStream(sub, loop)
253 sub = ZMQStream(sub, loop)
250
254
251 # connect the db
255 # connect the db
252 self.log.info('Hub using DB backend: %r'%(self.db_class.split()[-1]))
256 self.log.info('Hub using DB backend: %r'%(self.db_class.split()[-1]))
253 # cdir = self.config.Global.cluster_dir
257 # cdir = self.config.Global.cluster_dir
254 self.db = import_item(str(self.db_class))(session=self.session.session, config=self.config)
258 self.db = import_item(str(self.db_class))(session=self.session.session, config=self.config)
255 time.sleep(.25)
259 time.sleep(.25)
256 try:
260 try:
257 scheme = self.config.TaskScheduler.scheme_name
261 scheme = self.config.TaskScheduler.scheme_name
258 except AttributeError:
262 except AttributeError:
259 from .scheduler import TaskScheduler
263 from .scheduler import TaskScheduler
260 scheme = TaskScheduler.scheme_name.get_default_value()
264 scheme = TaskScheduler.scheme_name.get_default_value()
261 # build connection dicts
265 # build connection dicts
262 self.engine_info = {
266 self.engine_info = {
263 'control' : engine_iface%self.control[1],
267 'control' : engine_iface%self.control[1],
264 'mux': engine_iface%self.mux[1],
268 'mux': engine_iface%self.mux[1],
265 'heartbeat': (engine_iface%self.hb[0], engine_iface%self.hb[1]),
269 'heartbeat': (engine_iface%self.hb[0], engine_iface%self.hb[1]),
266 'task' : engine_iface%self.task[1],
270 'task' : engine_iface%self.task[1],
267 'iopub' : engine_iface%self.iopub[1],
271 'iopub' : engine_iface%self.iopub[1],
268 # 'monitor' : engine_iface%self.mon_port,
272 # 'monitor' : engine_iface%self.mon_port,
269 }
273 }
270
274
271 self.client_info = {
275 self.client_info = {
272 'control' : client_iface%self.control[0],
276 'control' : client_iface%self.control[0],
273 'mux': client_iface%self.mux[0],
277 'mux': client_iface%self.mux[0],
274 'task' : (scheme, client_iface%self.task[0]),
278 'task' : (scheme, client_iface%self.task[0]),
275 'iopub' : client_iface%self.iopub[0],
279 'iopub' : client_iface%self.iopub[0],
276 'notification': client_iface%self.notifier_port
280 'notification': client_iface%self.notifier_port
277 }
281 }
278 self.log.debug("Hub engine addrs: %s"%self.engine_info)
282 self.log.debug("Hub engine addrs: %s"%self.engine_info)
279 self.log.debug("Hub client addrs: %s"%self.client_info)
283 self.log.debug("Hub client addrs: %s"%self.client_info)
280
284
281 # resubmit stream
285 # resubmit stream
282 r = ZMQStream(ctx.socket(zmq.XREQ), loop)
286 r = ZMQStream(ctx.socket(zmq.XREQ), loop)
283 url = util.disambiguate_url(self.client_info['task'][-1])
287 url = util.disambiguate_url(self.client_info['task'][-1])
284 r.setsockopt(zmq.IDENTITY, self.session.session)
288 r.setsockopt(zmq.IDENTITY, self.session.session)
285 r.connect(url)
289 r.connect(url)
286
290
287 self.hub = Hub(loop=loop, session=self.session, monitor=sub, heartmonitor=self.heartmonitor,
291 self.hub = Hub(loop=loop, session=self.session, monitor=sub, heartmonitor=self.heartmonitor,
288 query=q, notifier=n, resubmit=r, db=self.db,
292 query=q, notifier=n, resubmit=r, db=self.db,
289 engine_info=self.engine_info, client_info=self.client_info,
293 engine_info=self.engine_info, client_info=self.client_info,
290 logname=self.log.name)
294 log=self.log)
291
295
292
296
293 class Hub(LoggingFactory):
297 class Hub(SessionFactory):
294 """The IPython Controller Hub with 0MQ connections
298 """The IPython Controller Hub with 0MQ connections
295
299
296 Parameters
300 Parameters
297 ==========
301 ==========
298 loop: zmq IOLoop instance
302 loop: zmq IOLoop instance
299 session: Session object
303 session: Session object
300 <removed> context: zmq context for creating new connections (?)
304 <removed> context: zmq context for creating new connections (?)
301 queue: ZMQStream for monitoring the command queue (SUB)
305 queue: ZMQStream for monitoring the command queue (SUB)
302 query: ZMQStream for engine registration and client queries requests (XREP)
306 query: ZMQStream for engine registration and client queries requests (XREP)
303 heartbeat: HeartMonitor object checking the pulse of the engines
307 heartbeat: HeartMonitor object checking the pulse of the engines
304 notifier: ZMQStream for broadcasting engine registration changes (PUB)
308 notifier: ZMQStream for broadcasting engine registration changes (PUB)
305 db: connection to db for out of memory logging of commands
309 db: connection to db for out of memory logging of commands
306 NotImplemented
310 NotImplemented
307 engine_info: dict of zmq connection information for engines to connect
311 engine_info: dict of zmq connection information for engines to connect
308 to the queues.
312 to the queues.
309 client_info: dict of zmq connection information for engines to connect
313 client_info: dict of zmq connection information for engines to connect
310 to the queues.
314 to the queues.
311 """
315 """
312 # internal data structures:
316 # internal data structures:
313 ids=Set() # engine IDs
317 ids=Set() # engine IDs
314 keytable=Dict()
318 keytable=Dict()
315 by_ident=Dict()
319 by_ident=Dict()
316 engines=Dict()
320 engines=Dict()
317 clients=Dict()
321 clients=Dict()
318 hearts=Dict()
322 hearts=Dict()
319 pending=Set()
323 pending=Set()
320 queues=Dict() # pending msg_ids keyed by engine_id
324 queues=Dict() # pending msg_ids keyed by engine_id
321 tasks=Dict() # pending msg_ids submitted as tasks, keyed by client_id
325 tasks=Dict() # pending msg_ids submitted as tasks, keyed by client_id
322 completed=Dict() # completed msg_ids keyed by engine_id
326 completed=Dict() # completed msg_ids keyed by engine_id
323 all_completed=Set() # completed msg_ids keyed by engine_id
327 all_completed=Set() # completed msg_ids keyed by engine_id
324 dead_engines=Set() # completed msg_ids keyed by engine_id
328 dead_engines=Set() # completed msg_ids keyed by engine_id
325 unassigned=Set() # set of task msg_ds not yet assigned a destination
329 unassigned=Set() # set of task msg_ds not yet assigned a destination
326 incoming_registrations=Dict()
330 incoming_registrations=Dict()
327 registration_timeout=Int()
331 registration_timeout=Int()
328 _idcounter=Int(0)
332 _idcounter=Int(0)
329
333
330 # objects from constructor:
334 # objects from constructor:
331 loop=Instance(ioloop.IOLoop)
332 query=Instance(ZMQStream)
335 query=Instance(ZMQStream)
333 monitor=Instance(ZMQStream)
336 monitor=Instance(ZMQStream)
334 notifier=Instance(ZMQStream)
337 notifier=Instance(ZMQStream)
335 resubmit=Instance(ZMQStream)
338 resubmit=Instance(ZMQStream)
336 heartmonitor=Instance(HeartMonitor)
339 heartmonitor=Instance(HeartMonitor)
337 db=Instance(object)
340 db=Instance(object)
338 client_info=Dict()
341 client_info=Dict()
339 engine_info=Dict()
342 engine_info=Dict()
340
343
341
344
342 def __init__(self, **kwargs):
345 def __init__(self, **kwargs):
343 """
346 """
344 # universal:
347 # universal:
345 loop: IOLoop for creating future connections
348 loop: IOLoop for creating future connections
346 session: streamsession for sending serialized data
349 session: streamsession for sending serialized data
347 # engine:
350 # engine:
348 queue: ZMQStream for monitoring queue messages
351 queue: ZMQStream for monitoring queue messages
349 query: ZMQStream for engine+client registration and client requests
352 query: ZMQStream for engine+client registration and client requests
350 heartbeat: HeartMonitor object for tracking engines
353 heartbeat: HeartMonitor object for tracking engines
351 # extra:
354 # extra:
352 db: ZMQStream for db connection (NotImplemented)
355 db: ZMQStream for db connection (NotImplemented)
353 engine_info: zmq address/protocol dict for engine connections
356 engine_info: zmq address/protocol dict for engine connections
354 client_info: zmq address/protocol dict for client connections
357 client_info: zmq address/protocol dict for client connections
355 """
358 """
356
359
357 super(Hub, self).__init__(**kwargs)
360 super(Hub, self).__init__(**kwargs)
358 self.registration_timeout = max(5000, 2*self.heartmonitor.period)
361 self.registration_timeout = max(5000, 2*self.heartmonitor.period)
359
362
360 # validate connection dicts:
363 # validate connection dicts:
361 for k,v in self.client_info.iteritems():
364 for k,v in self.client_info.iteritems():
362 if k == 'task':
365 if k == 'task':
363 util.validate_url_container(v[1])
366 util.validate_url_container(v[1])
364 else:
367 else:
365 util.validate_url_container(v)
368 util.validate_url_container(v)
366 # util.validate_url_container(self.client_info)
369 # util.validate_url_container(self.client_info)
367 util.validate_url_container(self.engine_info)
370 util.validate_url_container(self.engine_info)
368
371
369 # register our callbacks
372 # register our callbacks
370 self.query.on_recv(self.dispatch_query)
373 self.query.on_recv(self.dispatch_query)
371 self.monitor.on_recv(self.dispatch_monitor_traffic)
374 self.monitor.on_recv(self.dispatch_monitor_traffic)
372
375
373 self.heartmonitor.add_heart_failure_handler(self.handle_heart_failure)
376 self.heartmonitor.add_heart_failure_handler(self.handle_heart_failure)
374 self.heartmonitor.add_new_heart_handler(self.handle_new_heart)
377 self.heartmonitor.add_new_heart_handler(self.handle_new_heart)
375
378
376 self.monitor_handlers = { 'in' : self.save_queue_request,
379 self.monitor_handlers = { 'in' : self.save_queue_request,
377 'out': self.save_queue_result,
380 'out': self.save_queue_result,
378 'intask': self.save_task_request,
381 'intask': self.save_task_request,
379 'outtask': self.save_task_result,
382 'outtask': self.save_task_result,
380 'tracktask': self.save_task_destination,
383 'tracktask': self.save_task_destination,
381 'incontrol': _passer,
384 'incontrol': _passer,
382 'outcontrol': _passer,
385 'outcontrol': _passer,
383 'iopub': self.save_iopub_message,
386 'iopub': self.save_iopub_message,
384 }
387 }
385
388
386 self.query_handlers = {'queue_request': self.queue_status,
389 self.query_handlers = {'queue_request': self.queue_status,
387 'result_request': self.get_results,
390 'result_request': self.get_results,
388 'history_request': self.get_history,
391 'history_request': self.get_history,
389 'db_request': self.db_query,
392 'db_request': self.db_query,
390 'purge_request': self.purge_results,
393 'purge_request': self.purge_results,
391 'load_request': self.check_load,
394 'load_request': self.check_load,
392 'resubmit_request': self.resubmit_task,
395 'resubmit_request': self.resubmit_task,
393 'shutdown_request': self.shutdown_request,
396 'shutdown_request': self.shutdown_request,
394 'registration_request' : self.register_engine,
397 'registration_request' : self.register_engine,
395 'unregistration_request' : self.unregister_engine,
398 'unregistration_request' : self.unregister_engine,
396 'connection_request': self.connection_request,
399 'connection_request': self.connection_request,
397 }
400 }
398
401
399 # ignore resubmit replies
402 # ignore resubmit replies
400 self.resubmit.on_recv(lambda msg: None, copy=False)
403 self.resubmit.on_recv(lambda msg: None, copy=False)
401
404
402 self.log.info("hub::created hub")
405 self.log.info("hub::created hub")
403
406
404 @property
407 @property
405 def _next_id(self):
408 def _next_id(self):
406 """gemerate a new ID.
409 """gemerate a new ID.
407
410
408 No longer reuse old ids, just count from 0."""
411 No longer reuse old ids, just count from 0."""
409 newid = self._idcounter
412 newid = self._idcounter
410 self._idcounter += 1
413 self._idcounter += 1
411 return newid
414 return newid
412 # newid = 0
415 # newid = 0
413 # incoming = [id[0] for id in self.incoming_registrations.itervalues()]
416 # incoming = [id[0] for id in self.incoming_registrations.itervalues()]
414 # # print newid, self.ids, self.incoming_registrations
417 # # print newid, self.ids, self.incoming_registrations
415 # while newid in self.ids or newid in incoming:
418 # while newid in self.ids or newid in incoming:
416 # newid += 1
419 # newid += 1
417 # return newid
420 # return newid
418
421
419 #-----------------------------------------------------------------------------
422 #-----------------------------------------------------------------------------
420 # message validation
423 # message validation
421 #-----------------------------------------------------------------------------
424 #-----------------------------------------------------------------------------
422
425
423 def _validate_targets(self, targets):
426 def _validate_targets(self, targets):
424 """turn any valid targets argument into a list of integer ids"""
427 """turn any valid targets argument into a list of integer ids"""
425 if targets is None:
428 if targets is None:
426 # default to all
429 # default to all
427 targets = self.ids
430 targets = self.ids
428
431
429 if isinstance(targets, (int,str,unicode)):
432 if isinstance(targets, (int,str,unicode)):
430 # only one target specified
433 # only one target specified
431 targets = [targets]
434 targets = [targets]
432 _targets = []
435 _targets = []
433 for t in targets:
436 for t in targets:
434 # map raw identities to ids
437 # map raw identities to ids
435 if isinstance(t, (str,unicode)):
438 if isinstance(t, (str,unicode)):
436 t = self.by_ident.get(t, t)
439 t = self.by_ident.get(t, t)
437 _targets.append(t)
440 _targets.append(t)
438 targets = _targets
441 targets = _targets
439 bad_targets = [ t for t in targets if t not in self.ids ]
442 bad_targets = [ t for t in targets if t not in self.ids ]
440 if bad_targets:
443 if bad_targets:
441 raise IndexError("No Such Engine: %r"%bad_targets)
444 raise IndexError("No Such Engine: %r"%bad_targets)
442 if not targets:
445 if not targets:
443 raise IndexError("No Engines Registered")
446 raise IndexError("No Engines Registered")
444 return targets
447 return targets
445
448
446 #-----------------------------------------------------------------------------
449 #-----------------------------------------------------------------------------
447 # dispatch methods (1 per stream)
450 # dispatch methods (1 per stream)
448 #-----------------------------------------------------------------------------
451 #-----------------------------------------------------------------------------
449
452
450
453
451 def dispatch_monitor_traffic(self, msg):
454 def dispatch_monitor_traffic(self, msg):
452 """all ME and Task queue messages come through here, as well as
455 """all ME and Task queue messages come through here, as well as
453 IOPub traffic."""
456 IOPub traffic."""
454 self.log.debug("monitor traffic: %r"%msg[:2])
457 self.log.debug("monitor traffic: %r"%msg[:2])
455 switch = msg[0]
458 switch = msg[0]
456 try:
459 try:
457 idents, msg = self.session.feed_identities(msg[1:])
460 idents, msg = self.session.feed_identities(msg[1:])
458 except ValueError:
461 except ValueError:
459 idents=[]
462 idents=[]
460 if not idents:
463 if not idents:
461 self.log.error("Bad Monitor Message: %r"%msg)
464 self.log.error("Bad Monitor Message: %r"%msg)
462 return
465 return
463 handler = self.monitor_handlers.get(switch, None)
466 handler = self.monitor_handlers.get(switch, None)
464 if handler is not None:
467 if handler is not None:
465 handler(idents, msg)
468 handler(idents, msg)
466 else:
469 else:
467 self.log.error("Invalid monitor topic: %r"%switch)
470 self.log.error("Invalid monitor topic: %r"%switch)
468
471
469
472
470 def dispatch_query(self, msg):
473 def dispatch_query(self, msg):
471 """Route registration requests and queries from clients."""
474 """Route registration requests and queries from clients."""
472 try:
475 try:
473 idents, msg = self.session.feed_identities(msg)
476 idents, msg = self.session.feed_identities(msg)
474 except ValueError:
477 except ValueError:
475 idents = []
478 idents = []
476 if not idents:
479 if not idents:
477 self.log.error("Bad Query Message: %r"%msg)
480 self.log.error("Bad Query Message: %r"%msg)
478 return
481 return
479 client_id = idents[0]
482 client_id = idents[0]
480 try:
483 try:
481 msg = self.session.unpack_message(msg, content=True)
484 msg = self.session.unpack_message(msg, content=True)
482 except Exception:
485 except Exception:
483 content = error.wrap_exception()
486 content = error.wrap_exception()
484 self.log.error("Bad Query Message: %r"%msg, exc_info=True)
487 self.log.error("Bad Query Message: %r"%msg, exc_info=True)
485 self.session.send(self.query, "hub_error", ident=client_id,
488 self.session.send(self.query, "hub_error", ident=client_id,
486 content=content)
489 content=content)
487 return
490 return
488 print( idents, msg)
491 print( idents, msg)
489 # print client_id, header, parent, content
492 # print client_id, header, parent, content
490 #switch on message type:
493 #switch on message type:
491 msg_type = msg['msg_type']
494 msg_type = msg['msg_type']
492 self.log.info("client::client %r requested %r"%(client_id, msg_type))
495 self.log.info("client::client %r requested %r"%(client_id, msg_type))
493 handler = self.query_handlers.get(msg_type, None)
496 handler = self.query_handlers.get(msg_type, None)
494 try:
497 try:
495 assert handler is not None, "Bad Message Type: %r"%msg_type
498 assert handler is not None, "Bad Message Type: %r"%msg_type
496 except:
499 except:
497 content = error.wrap_exception()
500 content = error.wrap_exception()
498 self.log.error("Bad Message Type: %r"%msg_type, exc_info=True)
501 self.log.error("Bad Message Type: %r"%msg_type, exc_info=True)
499 self.session.send(self.query, "hub_error", ident=client_id,
502 self.session.send(self.query, "hub_error", ident=client_id,
500 content=content)
503 content=content)
501 return
504 return
502
505
503 else:
506 else:
504 handler(idents, msg)
507 handler(idents, msg)
505
508
506 def dispatch_db(self, msg):
509 def dispatch_db(self, msg):
507 """"""
510 """"""
508 raise NotImplementedError
511 raise NotImplementedError
509
512
510 #---------------------------------------------------------------------------
513 #---------------------------------------------------------------------------
511 # handler methods (1 per event)
514 # handler methods (1 per event)
512 #---------------------------------------------------------------------------
515 #---------------------------------------------------------------------------
513
516
514 #----------------------- Heartbeat --------------------------------------
517 #----------------------- Heartbeat --------------------------------------
515
518
516 def handle_new_heart(self, heart):
519 def handle_new_heart(self, heart):
517 """handler to attach to heartbeater.
520 """handler to attach to heartbeater.
518 Called when a new heart starts to beat.
521 Called when a new heart starts to beat.
519 Triggers completion of registration."""
522 Triggers completion of registration."""
520 self.log.debug("heartbeat::handle_new_heart(%r)"%heart)
523 self.log.debug("heartbeat::handle_new_heart(%r)"%heart)
521 if heart not in self.incoming_registrations:
524 if heart not in self.incoming_registrations:
522 self.log.info("heartbeat::ignoring new heart: %r"%heart)
525 self.log.info("heartbeat::ignoring new heart: %r"%heart)
523 else:
526 else:
524 self.finish_registration(heart)
527 self.finish_registration(heart)
525
528
526
529
527 def handle_heart_failure(self, heart):
530 def handle_heart_failure(self, heart):
528 """handler to attach to heartbeater.
531 """handler to attach to heartbeater.
529 called when a previously registered heart fails to respond to beat request.
532 called when a previously registered heart fails to respond to beat request.
530 triggers unregistration"""
533 triggers unregistration"""
531 self.log.debug("heartbeat::handle_heart_failure(%r)"%heart)
534 self.log.debug("heartbeat::handle_heart_failure(%r)"%heart)
532 eid = self.hearts.get(heart, None)
535 eid = self.hearts.get(heart, None)
533 queue = self.engines[eid].queue
536 queue = self.engines[eid].queue
534 if eid is None:
537 if eid is None:
535 self.log.info("heartbeat::ignoring heart failure %r"%heart)
538 self.log.info("heartbeat::ignoring heart failure %r"%heart)
536 else:
539 else:
537 self.unregister_engine(heart, dict(content=dict(id=eid, queue=queue)))
540 self.unregister_engine(heart, dict(content=dict(id=eid, queue=queue)))
538
541
539 #----------------------- MUX Queue Traffic ------------------------------
542 #----------------------- MUX Queue Traffic ------------------------------
540
543
541 def save_queue_request(self, idents, msg):
544 def save_queue_request(self, idents, msg):
542 if len(idents) < 2:
545 if len(idents) < 2:
543 self.log.error("invalid identity prefix: %r"%idents)
546 self.log.error("invalid identity prefix: %r"%idents)
544 return
547 return
545 queue_id, client_id = idents[:2]
548 queue_id, client_id = idents[:2]
546 try:
549 try:
547 msg = self.session.unpack_message(msg, content=False)
550 msg = self.session.unpack_message(msg, content=False)
548 except Exception:
551 except Exception:
549 self.log.error("queue::client %r sent invalid message to %r: %r"%(client_id, queue_id, msg), exc_info=True)
552 self.log.error("queue::client %r sent invalid message to %r: %r"%(client_id, queue_id, msg), exc_info=True)
550 return
553 return
551
554
552 eid = self.by_ident.get(queue_id, None)
555 eid = self.by_ident.get(queue_id, None)
553 if eid is None:
556 if eid is None:
554 self.log.error("queue::target %r not registered"%queue_id)
557 self.log.error("queue::target %r not registered"%queue_id)
555 self.log.debug("queue:: valid are: %r"%(self.by_ident.keys()))
558 self.log.debug("queue:: valid are: %r"%(self.by_ident.keys()))
556 return
559 return
557
560
558 header = msg['header']
561 header = msg['header']
559 msg_id = header['msg_id']
562 msg_id = header['msg_id']
560 record = init_record(msg)
563 record = init_record(msg)
561 record['engine_uuid'] = queue_id
564 record['engine_uuid'] = queue_id
562 record['client_uuid'] = client_id
565 record['client_uuid'] = client_id
563 record['queue'] = 'mux'
566 record['queue'] = 'mux'
564
567
565 try:
568 try:
566 # it's posible iopub arrived first:
569 # it's posible iopub arrived first:
567 existing = self.db.get_record(msg_id)
570 existing = self.db.get_record(msg_id)
568 for key,evalue in existing.iteritems():
571 for key,evalue in existing.iteritems():
569 rvalue = record.get(key, None)
572 rvalue = record.get(key, None)
570 if evalue and rvalue and evalue != rvalue:
573 if evalue and rvalue and evalue != rvalue:
571 self.log.warn("conflicting initial state for record: %r:%r <%r> %r"%(msg_id, rvalue, key, evalue))
574 self.log.warn("conflicting initial state for record: %r:%r <%r> %r"%(msg_id, rvalue, key, evalue))
572 elif evalue and not rvalue:
575 elif evalue and not rvalue:
573 record[key] = evalue
576 record[key] = evalue
574 self.db.update_record(msg_id, record)
577 self.db.update_record(msg_id, record)
575 except KeyError:
578 except KeyError:
576 self.db.add_record(msg_id, record)
579 self.db.add_record(msg_id, record)
577
580
578 self.pending.add(msg_id)
581 self.pending.add(msg_id)
579 self.queues[eid].append(msg_id)
582 self.queues[eid].append(msg_id)
580
583
581 def save_queue_result(self, idents, msg):
584 def save_queue_result(self, idents, msg):
582 if len(idents) < 2:
585 if len(idents) < 2:
583 self.log.error("invalid identity prefix: %r"%idents)
586 self.log.error("invalid identity prefix: %r"%idents)
584 return
587 return
585
588
586 client_id, queue_id = idents[:2]
589 client_id, queue_id = idents[:2]
587 try:
590 try:
588 msg = self.session.unpack_message(msg, content=False)
591 msg = self.session.unpack_message(msg, content=False)
589 except Exception:
592 except Exception:
590 self.log.error("queue::engine %r sent invalid message to %r: %r"%(
593 self.log.error("queue::engine %r sent invalid message to %r: %r"%(
591 queue_id,client_id, msg), exc_info=True)
594 queue_id,client_id, msg), exc_info=True)
592 return
595 return
593
596
594 eid = self.by_ident.get(queue_id, None)
597 eid = self.by_ident.get(queue_id, None)
595 if eid is None:
598 if eid is None:
596 self.log.error("queue::unknown engine %r is sending a reply: "%queue_id)
599 self.log.error("queue::unknown engine %r is sending a reply: "%queue_id)
597 return
600 return
598
601
599 parent = msg['parent_header']
602 parent = msg['parent_header']
600 if not parent:
603 if not parent:
601 return
604 return
602 msg_id = parent['msg_id']
605 msg_id = parent['msg_id']
603 if msg_id in self.pending:
606 if msg_id in self.pending:
604 self.pending.remove(msg_id)
607 self.pending.remove(msg_id)
605 self.all_completed.add(msg_id)
608 self.all_completed.add(msg_id)
606 self.queues[eid].remove(msg_id)
609 self.queues[eid].remove(msg_id)
607 self.completed[eid].append(msg_id)
610 self.completed[eid].append(msg_id)
608 elif msg_id not in self.all_completed:
611 elif msg_id not in self.all_completed:
609 # it could be a result from a dead engine that died before delivering the
612 # it could be a result from a dead engine that died before delivering the
610 # result
613 # result
611 self.log.warn("queue:: unknown msg finished %r"%msg_id)
614 self.log.warn("queue:: unknown msg finished %r"%msg_id)
612 return
615 return
613 # update record anyway, because the unregistration could have been premature
616 # update record anyway, because the unregistration could have been premature
614 rheader = extract_dates(msg['header'])
617 rheader = extract_dates(msg['header'])
615 completed = rheader['date']
618 completed = rheader['date']
616 started = rheader.get('started', None)
619 started = rheader.get('started', None)
617 result = {
620 result = {
618 'result_header' : rheader,
621 'result_header' : rheader,
619 'result_content': msg['content'],
622 'result_content': msg['content'],
620 'started' : started,
623 'started' : started,
621 'completed' : completed
624 'completed' : completed
622 }
625 }
623
626
624 result['result_buffers'] = msg['buffers']
627 result['result_buffers'] = msg['buffers']
625 try:
628 try:
626 self.db.update_record(msg_id, result)
629 self.db.update_record(msg_id, result)
627 except Exception:
630 except Exception:
628 self.log.error("DB Error updating record %r"%msg_id, exc_info=True)
631 self.log.error("DB Error updating record %r"%msg_id, exc_info=True)
629
632
630
633
631 #--------------------- Task Queue Traffic ------------------------------
634 #--------------------- Task Queue Traffic ------------------------------
632
635
633 def save_task_request(self, idents, msg):
636 def save_task_request(self, idents, msg):
634 """Save the submission of a task."""
637 """Save the submission of a task."""
635 client_id = idents[0]
638 client_id = idents[0]
636
639
637 try:
640 try:
638 msg = self.session.unpack_message(msg, content=False)
641 msg = self.session.unpack_message(msg, content=False)
639 except Exception:
642 except Exception:
640 self.log.error("task::client %r sent invalid task message: %r"%(
643 self.log.error("task::client %r sent invalid task message: %r"%(
641 client_id, msg), exc_info=True)
644 client_id, msg), exc_info=True)
642 return
645 return
643 record = init_record(msg)
646 record = init_record(msg)
644
647
645 record['client_uuid'] = client_id
648 record['client_uuid'] = client_id
646 record['queue'] = 'task'
649 record['queue'] = 'task'
647 header = msg['header']
650 header = msg['header']
648 msg_id = header['msg_id']
651 msg_id = header['msg_id']
649 self.pending.add(msg_id)
652 self.pending.add(msg_id)
650 self.unassigned.add(msg_id)
653 self.unassigned.add(msg_id)
651 try:
654 try:
652 # it's posible iopub arrived first:
655 # it's posible iopub arrived first:
653 existing = self.db.get_record(msg_id)
656 existing = self.db.get_record(msg_id)
654 if existing['resubmitted']:
657 if existing['resubmitted']:
655 for key in ('submitted', 'client_uuid', 'buffers'):
658 for key in ('submitted', 'client_uuid', 'buffers'):
656 # don't clobber these keys on resubmit
659 # don't clobber these keys on resubmit
657 # submitted and client_uuid should be different
660 # submitted and client_uuid should be different
658 # and buffers might be big, and shouldn't have changed
661 # and buffers might be big, and shouldn't have changed
659 record.pop(key)
662 record.pop(key)
660 # still check content,header which should not change
663 # still check content,header which should not change
661 # but are not expensive to compare as buffers
664 # but are not expensive to compare as buffers
662
665
663 for key,evalue in existing.iteritems():
666 for key,evalue in existing.iteritems():
664 if key.endswith('buffers'):
667 if key.endswith('buffers'):
665 # don't compare buffers
668 # don't compare buffers
666 continue
669 continue
667 rvalue = record.get(key, None)
670 rvalue = record.get(key, None)
668 if evalue and rvalue and evalue != rvalue:
671 if evalue and rvalue and evalue != rvalue:
669 self.log.warn("conflicting initial state for record: %r:%r <%r> %r"%(msg_id, rvalue, key, evalue))
672 self.log.warn("conflicting initial state for record: %r:%r <%r> %r"%(msg_id, rvalue, key, evalue))
670 elif evalue and not rvalue:
673 elif evalue and not rvalue:
671 record[key] = evalue
674 record[key] = evalue
672 self.db.update_record(msg_id, record)
675 self.db.update_record(msg_id, record)
673 except KeyError:
676 except KeyError:
674 self.db.add_record(msg_id, record)
677 self.db.add_record(msg_id, record)
675 except Exception:
678 except Exception:
676 self.log.error("DB Error saving task request %r"%msg_id, exc_info=True)
679 self.log.error("DB Error saving task request %r"%msg_id, exc_info=True)
677
680
678 def save_task_result(self, idents, msg):
681 def save_task_result(self, idents, msg):
679 """save the result of a completed task."""
682 """save the result of a completed task."""
680 client_id = idents[0]
683 client_id = idents[0]
681 try:
684 try:
682 msg = self.session.unpack_message(msg, content=False)
685 msg = self.session.unpack_message(msg, content=False)
683 except Exception:
686 except Exception:
684 self.log.error("task::invalid task result message send to %r: %r"%(
687 self.log.error("task::invalid task result message send to %r: %r"%(
685 client_id, msg), exc_info=True)
688 client_id, msg), exc_info=True)
686 return
689 return
687
690
688 parent = msg['parent_header']
691 parent = msg['parent_header']
689 if not parent:
692 if not parent:
690 # print msg
693 # print msg
691 self.log.warn("Task %r had no parent!"%msg)
694 self.log.warn("Task %r had no parent!"%msg)
692 return
695 return
693 msg_id = parent['msg_id']
696 msg_id = parent['msg_id']
694 if msg_id in self.unassigned:
697 if msg_id in self.unassigned:
695 self.unassigned.remove(msg_id)
698 self.unassigned.remove(msg_id)
696
699
697 header = extract_dates(msg['header'])
700 header = extract_dates(msg['header'])
698 engine_uuid = header.get('engine', None)
701 engine_uuid = header.get('engine', None)
699 eid = self.by_ident.get(engine_uuid, None)
702 eid = self.by_ident.get(engine_uuid, None)
700
703
701 if msg_id in self.pending:
704 if msg_id in self.pending:
702 self.pending.remove(msg_id)
705 self.pending.remove(msg_id)
703 self.all_completed.add(msg_id)
706 self.all_completed.add(msg_id)
704 if eid is not None:
707 if eid is not None:
705 self.completed[eid].append(msg_id)
708 self.completed[eid].append(msg_id)
706 if msg_id in self.tasks[eid]:
709 if msg_id in self.tasks[eid]:
707 self.tasks[eid].remove(msg_id)
710 self.tasks[eid].remove(msg_id)
708 completed = header['date']
711 completed = header['date']
709 started = header.get('started', None)
712 started = header.get('started', None)
710 result = {
713 result = {
711 'result_header' : header,
714 'result_header' : header,
712 'result_content': msg['content'],
715 'result_content': msg['content'],
713 'started' : started,
716 'started' : started,
714 'completed' : completed,
717 'completed' : completed,
715 'engine_uuid': engine_uuid
718 'engine_uuid': engine_uuid
716 }
719 }
717
720
718 result['result_buffers'] = msg['buffers']
721 result['result_buffers'] = msg['buffers']
719 try:
722 try:
720 self.db.update_record(msg_id, result)
723 self.db.update_record(msg_id, result)
721 except Exception:
724 except Exception:
722 self.log.error("DB Error saving task request %r"%msg_id, exc_info=True)
725 self.log.error("DB Error saving task request %r"%msg_id, exc_info=True)
723
726
724 else:
727 else:
725 self.log.debug("task::unknown task %r finished"%msg_id)
728 self.log.debug("task::unknown task %r finished"%msg_id)
726
729
727 def save_task_destination(self, idents, msg):
730 def save_task_destination(self, idents, msg):
728 try:
731 try:
729 msg = self.session.unpack_message(msg, content=True)
732 msg = self.session.unpack_message(msg, content=True)
730 except Exception:
733 except Exception:
731 self.log.error("task::invalid task tracking message", exc_info=True)
734 self.log.error("task::invalid task tracking message", exc_info=True)
732 return
735 return
733 content = msg['content']
736 content = msg['content']
734 # print (content)
737 # print (content)
735 msg_id = content['msg_id']
738 msg_id = content['msg_id']
736 engine_uuid = content['engine_id']
739 engine_uuid = content['engine_id']
737 eid = self.by_ident[engine_uuid]
740 eid = self.by_ident[engine_uuid]
738
741
739 self.log.info("task::task %r arrived on %r"%(msg_id, eid))
742 self.log.info("task::task %r arrived on %r"%(msg_id, eid))
740 if msg_id in self.unassigned:
743 if msg_id in self.unassigned:
741 self.unassigned.remove(msg_id)
744 self.unassigned.remove(msg_id)
742 # else:
745 # else:
743 # self.log.debug("task::task %r not listed as MIA?!"%(msg_id))
746 # self.log.debug("task::task %r not listed as MIA?!"%(msg_id))
744
747
745 self.tasks[eid].append(msg_id)
748 self.tasks[eid].append(msg_id)
746 # self.pending[msg_id][1].update(received=datetime.now(),engine=(eid,engine_uuid))
749 # self.pending[msg_id][1].update(received=datetime.now(),engine=(eid,engine_uuid))
747 try:
750 try:
748 self.db.update_record(msg_id, dict(engine_uuid=engine_uuid))
751 self.db.update_record(msg_id, dict(engine_uuid=engine_uuid))
749 except Exception:
752 except Exception:
750 self.log.error("DB Error saving task destination %r"%msg_id, exc_info=True)
753 self.log.error("DB Error saving task destination %r"%msg_id, exc_info=True)
751
754
752
755
753 def mia_task_request(self, idents, msg):
756 def mia_task_request(self, idents, msg):
754 raise NotImplementedError
757 raise NotImplementedError
755 client_id = idents[0]
758 client_id = idents[0]
756 # content = dict(mia=self.mia,status='ok')
759 # content = dict(mia=self.mia,status='ok')
757 # self.session.send('mia_reply', content=content, idents=client_id)
760 # self.session.send('mia_reply', content=content, idents=client_id)
758
761
759
762
760 #--------------------- IOPub Traffic ------------------------------
763 #--------------------- IOPub Traffic ------------------------------
761
764
762 def save_iopub_message(self, topics, msg):
765 def save_iopub_message(self, topics, msg):
763 """save an iopub message into the db"""
766 """save an iopub message into the db"""
764 # print (topics)
767 # print (topics)
765 try:
768 try:
766 msg = self.session.unpack_message(msg, content=True)
769 msg = self.session.unpack_message(msg, content=True)
767 except Exception:
770 except Exception:
768 self.log.error("iopub::invalid IOPub message", exc_info=True)
771 self.log.error("iopub::invalid IOPub message", exc_info=True)
769 return
772 return
770
773
771 parent = msg['parent_header']
774 parent = msg['parent_header']
772 if not parent:
775 if not parent:
773 self.log.error("iopub::invalid IOPub message: %r"%msg)
776 self.log.error("iopub::invalid IOPub message: %r"%msg)
774 return
777 return
775 msg_id = parent['msg_id']
778 msg_id = parent['msg_id']
776 msg_type = msg['msg_type']
779 msg_type = msg['msg_type']
777 content = msg['content']
780 content = msg['content']
778
781
779 # ensure msg_id is in db
782 # ensure msg_id is in db
780 try:
783 try:
781 rec = self.db.get_record(msg_id)
784 rec = self.db.get_record(msg_id)
782 except KeyError:
785 except KeyError:
783 rec = empty_record()
786 rec = empty_record()
784 rec['msg_id'] = msg_id
787 rec['msg_id'] = msg_id
785 self.db.add_record(msg_id, rec)
788 self.db.add_record(msg_id, rec)
786 # stream
789 # stream
787 d = {}
790 d = {}
788 if msg_type == 'stream':
791 if msg_type == 'stream':
789 name = content['name']
792 name = content['name']
790 s = rec[name] or ''
793 s = rec[name] or ''
791 d[name] = s + content['data']
794 d[name] = s + content['data']
792
795
793 elif msg_type == 'pyerr':
796 elif msg_type == 'pyerr':
794 d['pyerr'] = content
797 d['pyerr'] = content
795 elif msg_type == 'pyin':
798 elif msg_type == 'pyin':
796 d['pyin'] = content['code']
799 d['pyin'] = content['code']
797 else:
800 else:
798 d[msg_type] = content.get('data', '')
801 d[msg_type] = content.get('data', '')
799
802
800 try:
803 try:
801 self.db.update_record(msg_id, d)
804 self.db.update_record(msg_id, d)
802 except Exception:
805 except Exception:
803 self.log.error("DB Error saving iopub message %r"%msg_id, exc_info=True)
806 self.log.error("DB Error saving iopub message %r"%msg_id, exc_info=True)
804
807
805
808
806
809
807 #-------------------------------------------------------------------------
810 #-------------------------------------------------------------------------
808 # Registration requests
811 # Registration requests
809 #-------------------------------------------------------------------------
812 #-------------------------------------------------------------------------
810
813
811 def connection_request(self, client_id, msg):
814 def connection_request(self, client_id, msg):
812 """Reply with connection addresses for clients."""
815 """Reply with connection addresses for clients."""
813 self.log.info("client::client %r connected"%client_id)
816 self.log.info("client::client %r connected"%client_id)
814 content = dict(status='ok')
817 content = dict(status='ok')
815 content.update(self.client_info)
818 content.update(self.client_info)
816 jsonable = {}
819 jsonable = {}
817 for k,v in self.keytable.iteritems():
820 for k,v in self.keytable.iteritems():
818 if v not in self.dead_engines:
821 if v not in self.dead_engines:
819 jsonable[str(k)] = v
822 jsonable[str(k)] = v
820 content['engines'] = jsonable
823 content['engines'] = jsonable
821 self.session.send(self.query, 'connection_reply', content, parent=msg, ident=client_id)
824 self.session.send(self.query, 'connection_reply', content, parent=msg, ident=client_id)
822
825
823 def register_engine(self, reg, msg):
826 def register_engine(self, reg, msg):
824 """Register a new engine."""
827 """Register a new engine."""
825 content = msg['content']
828 content = msg['content']
826 try:
829 try:
827 queue = content['queue']
830 queue = content['queue']
828 except KeyError:
831 except KeyError:
829 self.log.error("registration::queue not specified", exc_info=True)
832 self.log.error("registration::queue not specified", exc_info=True)
830 return
833 return
831 heart = content.get('heartbeat', None)
834 heart = content.get('heartbeat', None)
832 """register a new engine, and create the socket(s) necessary"""
835 """register a new engine, and create the socket(s) necessary"""
833 eid = self._next_id
836 eid = self._next_id
834 # print (eid, queue, reg, heart)
837 # print (eid, queue, reg, heart)
835
838
836 self.log.debug("registration::register_engine(%i, %r, %r, %r)"%(eid, queue, reg, heart))
839 self.log.debug("registration::register_engine(%i, %r, %r, %r)"%(eid, queue, reg, heart))
837
840
838 content = dict(id=eid,status='ok')
841 content = dict(id=eid,status='ok')
839 content.update(self.engine_info)
842 content.update(self.engine_info)
840 # check if requesting available IDs:
843 # check if requesting available IDs:
841 if queue in self.by_ident:
844 if queue in self.by_ident:
842 try:
845 try:
843 raise KeyError("queue_id %r in use"%queue)
846 raise KeyError("queue_id %r in use"%queue)
844 except:
847 except:
845 content = error.wrap_exception()
848 content = error.wrap_exception()
846 self.log.error("queue_id %r in use"%queue, exc_info=True)
849 self.log.error("queue_id %r in use"%queue, exc_info=True)
847 elif heart in self.hearts: # need to check unique hearts?
850 elif heart in self.hearts: # need to check unique hearts?
848 try:
851 try:
849 raise KeyError("heart_id %r in use"%heart)
852 raise KeyError("heart_id %r in use"%heart)
850 except:
853 except:
851 self.log.error("heart_id %r in use"%heart, exc_info=True)
854 self.log.error("heart_id %r in use"%heart, exc_info=True)
852 content = error.wrap_exception()
855 content = error.wrap_exception()
853 else:
856 else:
854 for h, pack in self.incoming_registrations.iteritems():
857 for h, pack in self.incoming_registrations.iteritems():
855 if heart == h:
858 if heart == h:
856 try:
859 try:
857 raise KeyError("heart_id %r in use"%heart)
860 raise KeyError("heart_id %r in use"%heart)
858 except:
861 except:
859 self.log.error("heart_id %r in use"%heart, exc_info=True)
862 self.log.error("heart_id %r in use"%heart, exc_info=True)
860 content = error.wrap_exception()
863 content = error.wrap_exception()
861 break
864 break
862 elif queue == pack[1]:
865 elif queue == pack[1]:
863 try:
866 try:
864 raise KeyError("queue_id %r in use"%queue)
867 raise KeyError("queue_id %r in use"%queue)
865 except:
868 except:
866 self.log.error("queue_id %r in use"%queue, exc_info=True)
869 self.log.error("queue_id %r in use"%queue, exc_info=True)
867 content = error.wrap_exception()
870 content = error.wrap_exception()
868 break
871 break
869
872
870 msg = self.session.send(self.query, "registration_reply",
873 msg = self.session.send(self.query, "registration_reply",
871 content=content,
874 content=content,
872 ident=reg)
875 ident=reg)
873
876
874 if content['status'] == 'ok':
877 if content['status'] == 'ok':
875 if heart in self.heartmonitor.hearts:
878 if heart in self.heartmonitor.hearts:
876 # already beating
879 # already beating
877 self.incoming_registrations[heart] = (eid,queue,reg[0],None)
880 self.incoming_registrations[heart] = (eid,queue,reg[0],None)
878 self.finish_registration(heart)
881 self.finish_registration(heart)
879 else:
882 else:
880 purge = lambda : self._purge_stalled_registration(heart)
883 purge = lambda : self._purge_stalled_registration(heart)
881 dc = ioloop.DelayedCallback(purge, self.registration_timeout, self.loop)
884 dc = ioloop.DelayedCallback(purge, self.registration_timeout, self.loop)
882 dc.start()
885 dc.start()
883 self.incoming_registrations[heart] = (eid,queue,reg[0],dc)
886 self.incoming_registrations[heart] = (eid,queue,reg[0],dc)
884 else:
887 else:
885 self.log.error("registration::registration %i failed: %r"%(eid, content['evalue']))
888 self.log.error("registration::registration %i failed: %r"%(eid, content['evalue']))
886 return eid
889 return eid
887
890
888 def unregister_engine(self, ident, msg):
891 def unregister_engine(self, ident, msg):
889 """Unregister an engine that explicitly requested to leave."""
892 """Unregister an engine that explicitly requested to leave."""
890 try:
893 try:
891 eid = msg['content']['id']
894 eid = msg['content']['id']
892 except:
895 except:
893 self.log.error("registration::bad engine id for unregistration: %r"%ident, exc_info=True)
896 self.log.error("registration::bad engine id for unregistration: %r"%ident, exc_info=True)
894 return
897 return
895 self.log.info("registration::unregister_engine(%r)"%eid)
898 self.log.info("registration::unregister_engine(%r)"%eid)
896 # print (eid)
899 # print (eid)
897 uuid = self.keytable[eid]
900 uuid = self.keytable[eid]
898 content=dict(id=eid, queue=uuid)
901 content=dict(id=eid, queue=uuid)
899 self.dead_engines.add(uuid)
902 self.dead_engines.add(uuid)
900 # self.ids.remove(eid)
903 # self.ids.remove(eid)
901 # uuid = self.keytable.pop(eid)
904 # uuid = self.keytable.pop(eid)
902 #
905 #
903 # ec = self.engines.pop(eid)
906 # ec = self.engines.pop(eid)
904 # self.hearts.pop(ec.heartbeat)
907 # self.hearts.pop(ec.heartbeat)
905 # self.by_ident.pop(ec.queue)
908 # self.by_ident.pop(ec.queue)
906 # self.completed.pop(eid)
909 # self.completed.pop(eid)
907 handleit = lambda : self._handle_stranded_msgs(eid, uuid)
910 handleit = lambda : self._handle_stranded_msgs(eid, uuid)
908 dc = ioloop.DelayedCallback(handleit, self.registration_timeout, self.loop)
911 dc = ioloop.DelayedCallback(handleit, self.registration_timeout, self.loop)
909 dc.start()
912 dc.start()
910 ############## TODO: HANDLE IT ################
913 ############## TODO: HANDLE IT ################
911
914
912 if self.notifier:
915 if self.notifier:
913 self.session.send(self.notifier, "unregistration_notification", content=content)
916 self.session.send(self.notifier, "unregistration_notification", content=content)
914
917
915 def _handle_stranded_msgs(self, eid, uuid):
918 def _handle_stranded_msgs(self, eid, uuid):
916 """Handle messages known to be on an engine when the engine unregisters.
919 """Handle messages known to be on an engine when the engine unregisters.
917
920
918 It is possible that this will fire prematurely - that is, an engine will
921 It is possible that this will fire prematurely - that is, an engine will
919 go down after completing a result, and the client will be notified
922 go down after completing a result, and the client will be notified
920 that the result failed and later receive the actual result.
923 that the result failed and later receive the actual result.
921 """
924 """
922
925
923 outstanding = self.queues[eid]
926 outstanding = self.queues[eid]
924
927
925 for msg_id in outstanding:
928 for msg_id in outstanding:
926 self.pending.remove(msg_id)
929 self.pending.remove(msg_id)
927 self.all_completed.add(msg_id)
930 self.all_completed.add(msg_id)
928 try:
931 try:
929 raise error.EngineError("Engine %r died while running task %r"%(eid, msg_id))
932 raise error.EngineError("Engine %r died while running task %r"%(eid, msg_id))
930 except:
933 except:
931 content = error.wrap_exception()
934 content = error.wrap_exception()
932 # build a fake header:
935 # build a fake header:
933 header = {}
936 header = {}
934 header['engine'] = uuid
937 header['engine'] = uuid
935 header['date'] = datetime.now()
938 header['date'] = datetime.now()
936 rec = dict(result_content=content, result_header=header, result_buffers=[])
939 rec = dict(result_content=content, result_header=header, result_buffers=[])
937 rec['completed'] = header['date']
940 rec['completed'] = header['date']
938 rec['engine_uuid'] = uuid
941 rec['engine_uuid'] = uuid
939 try:
942 try:
940 self.db.update_record(msg_id, rec)
943 self.db.update_record(msg_id, rec)
941 except Exception:
944 except Exception:
942 self.log.error("DB Error handling stranded msg %r"%msg_id, exc_info=True)
945 self.log.error("DB Error handling stranded msg %r"%msg_id, exc_info=True)
943
946
944
947
945 def finish_registration(self, heart):
948 def finish_registration(self, heart):
946 """Second half of engine registration, called after our HeartMonitor
949 """Second half of engine registration, called after our HeartMonitor
947 has received a beat from the Engine's Heart."""
950 has received a beat from the Engine's Heart."""
948 try:
951 try:
949 (eid,queue,reg,purge) = self.incoming_registrations.pop(heart)
952 (eid,queue,reg,purge) = self.incoming_registrations.pop(heart)
950 except KeyError:
953 except KeyError:
951 self.log.error("registration::tried to finish nonexistant registration", exc_info=True)
954 self.log.error("registration::tried to finish nonexistant registration", exc_info=True)
952 return
955 return
953 self.log.info("registration::finished registering engine %i:%r"%(eid,queue))
956 self.log.info("registration::finished registering engine %i:%r"%(eid,queue))
954 if purge is not None:
957 if purge is not None:
955 purge.stop()
958 purge.stop()
956 control = queue
959 control = queue
957 self.ids.add(eid)
960 self.ids.add(eid)
958 self.keytable[eid] = queue
961 self.keytable[eid] = queue
959 self.engines[eid] = EngineConnector(id=eid, queue=queue, registration=reg,
962 self.engines[eid] = EngineConnector(id=eid, queue=queue, registration=reg,
960 control=control, heartbeat=heart)
963 control=control, heartbeat=heart)
961 self.by_ident[queue] = eid
964 self.by_ident[queue] = eid
962 self.queues[eid] = list()
965 self.queues[eid] = list()
963 self.tasks[eid] = list()
966 self.tasks[eid] = list()
964 self.completed[eid] = list()
967 self.completed[eid] = list()
965 self.hearts[heart] = eid
968 self.hearts[heart] = eid
966 content = dict(id=eid, queue=self.engines[eid].queue)
969 content = dict(id=eid, queue=self.engines[eid].queue)
967 if self.notifier:
970 if self.notifier:
968 self.session.send(self.notifier, "registration_notification", content=content)
971 self.session.send(self.notifier, "registration_notification", content=content)
969 self.log.info("engine::Engine Connected: %i"%eid)
972 self.log.info("engine::Engine Connected: %i"%eid)
970
973
971 def _purge_stalled_registration(self, heart):
974 def _purge_stalled_registration(self, heart):
972 if heart in self.incoming_registrations:
975 if heart in self.incoming_registrations:
973 eid = self.incoming_registrations.pop(heart)[0]
976 eid = self.incoming_registrations.pop(heart)[0]
974 self.log.info("registration::purging stalled registration: %i"%eid)
977 self.log.info("registration::purging stalled registration: %i"%eid)
975 else:
978 else:
976 pass
979 pass
977
980
978 #-------------------------------------------------------------------------
981 #-------------------------------------------------------------------------
979 # Client Requests
982 # Client Requests
980 #-------------------------------------------------------------------------
983 #-------------------------------------------------------------------------
981
984
982 def shutdown_request(self, client_id, msg):
985 def shutdown_request(self, client_id, msg):
983 """handle shutdown request."""
986 """handle shutdown request."""
984 self.session.send(self.query, 'shutdown_reply', content={'status': 'ok'}, ident=client_id)
987 self.session.send(self.query, 'shutdown_reply', content={'status': 'ok'}, ident=client_id)
985 # also notify other clients of shutdown
988 # also notify other clients of shutdown
986 self.session.send(self.notifier, 'shutdown_notice', content={'status': 'ok'})
989 self.session.send(self.notifier, 'shutdown_notice', content={'status': 'ok'})
987 dc = ioloop.DelayedCallback(lambda : self._shutdown(), 1000, self.loop)
990 dc = ioloop.DelayedCallback(lambda : self._shutdown(), 1000, self.loop)
988 dc.start()
991 dc.start()
989
992
990 def _shutdown(self):
993 def _shutdown(self):
991 self.log.info("hub::hub shutting down.")
994 self.log.info("hub::hub shutting down.")
992 time.sleep(0.1)
995 time.sleep(0.1)
993 sys.exit(0)
996 sys.exit(0)
994
997
995
998
996 def check_load(self, client_id, msg):
999 def check_load(self, client_id, msg):
997 content = msg['content']
1000 content = msg['content']
998 try:
1001 try:
999 targets = content['targets']
1002 targets = content['targets']
1000 targets = self._validate_targets(targets)
1003 targets = self._validate_targets(targets)
1001 except:
1004 except:
1002 content = error.wrap_exception()
1005 content = error.wrap_exception()
1003 self.session.send(self.query, "hub_error",
1006 self.session.send(self.query, "hub_error",
1004 content=content, ident=client_id)
1007 content=content, ident=client_id)
1005 return
1008 return
1006
1009
1007 content = dict(status='ok')
1010 content = dict(status='ok')
1008 # loads = {}
1011 # loads = {}
1009 for t in targets:
1012 for t in targets:
1010 content[bytes(t)] = len(self.queues[t])+len(self.tasks[t])
1013 content[bytes(t)] = len(self.queues[t])+len(self.tasks[t])
1011 self.session.send(self.query, "load_reply", content=content, ident=client_id)
1014 self.session.send(self.query, "load_reply", content=content, ident=client_id)
1012
1015
1013
1016
1014 def queue_status(self, client_id, msg):
1017 def queue_status(self, client_id, msg):
1015 """Return the Queue status of one or more targets.
1018 """Return the Queue status of one or more targets.
1016 if verbose: return the msg_ids
1019 if verbose: return the msg_ids
1017 else: return len of each type.
1020 else: return len of each type.
1018 keys: queue (pending MUX jobs)
1021 keys: queue (pending MUX jobs)
1019 tasks (pending Task jobs)
1022 tasks (pending Task jobs)
1020 completed (finished jobs from both queues)"""
1023 completed (finished jobs from both queues)"""
1021 content = msg['content']
1024 content = msg['content']
1022 targets = content['targets']
1025 targets = content['targets']
1023 try:
1026 try:
1024 targets = self._validate_targets(targets)
1027 targets = self._validate_targets(targets)
1025 except:
1028 except:
1026 content = error.wrap_exception()
1029 content = error.wrap_exception()
1027 self.session.send(self.query, "hub_error",
1030 self.session.send(self.query, "hub_error",
1028 content=content, ident=client_id)
1031 content=content, ident=client_id)
1029 return
1032 return
1030 verbose = content.get('verbose', False)
1033 verbose = content.get('verbose', False)
1031 content = dict(status='ok')
1034 content = dict(status='ok')
1032 for t in targets:
1035 for t in targets:
1033 queue = self.queues[t]
1036 queue = self.queues[t]
1034 completed = self.completed[t]
1037 completed = self.completed[t]
1035 tasks = self.tasks[t]
1038 tasks = self.tasks[t]
1036 if not verbose:
1039 if not verbose:
1037 queue = len(queue)
1040 queue = len(queue)
1038 completed = len(completed)
1041 completed = len(completed)
1039 tasks = len(tasks)
1042 tasks = len(tasks)
1040 content[bytes(t)] = {'queue': queue, 'completed': completed , 'tasks': tasks}
1043 content[bytes(t)] = {'queue': queue, 'completed': completed , 'tasks': tasks}
1041 content['unassigned'] = list(self.unassigned) if verbose else len(self.unassigned)
1044 content['unassigned'] = list(self.unassigned) if verbose else len(self.unassigned)
1042
1045
1043 self.session.send(self.query, "queue_reply", content=content, ident=client_id)
1046 self.session.send(self.query, "queue_reply", content=content, ident=client_id)
1044
1047
1045 def purge_results(self, client_id, msg):
1048 def purge_results(self, client_id, msg):
1046 """Purge results from memory. This method is more valuable before we move
1049 """Purge results from memory. This method is more valuable before we move
1047 to a DB based message storage mechanism."""
1050 to a DB based message storage mechanism."""
1048 content = msg['content']
1051 content = msg['content']
1049 msg_ids = content.get('msg_ids', [])
1052 msg_ids = content.get('msg_ids', [])
1050 reply = dict(status='ok')
1053 reply = dict(status='ok')
1051 if msg_ids == 'all':
1054 if msg_ids == 'all':
1052 try:
1055 try:
1053 self.db.drop_matching_records(dict(completed={'$ne':None}))
1056 self.db.drop_matching_records(dict(completed={'$ne':None}))
1054 except Exception:
1057 except Exception:
1055 reply = error.wrap_exception()
1058 reply = error.wrap_exception()
1056 else:
1059 else:
1057 pending = filter(lambda m: m in self.pending, msg_ids)
1060 pending = filter(lambda m: m in self.pending, msg_ids)
1058 if pending:
1061 if pending:
1059 try:
1062 try:
1060 raise IndexError("msg pending: %r"%pending[0])
1063 raise IndexError("msg pending: %r"%pending[0])
1061 except:
1064 except:
1062 reply = error.wrap_exception()
1065 reply = error.wrap_exception()
1063 else:
1066 else:
1064 try:
1067 try:
1065 self.db.drop_matching_records(dict(msg_id={'$in':msg_ids}))
1068 self.db.drop_matching_records(dict(msg_id={'$in':msg_ids}))
1066 except Exception:
1069 except Exception:
1067 reply = error.wrap_exception()
1070 reply = error.wrap_exception()
1068
1071
1069 if reply['status'] == 'ok':
1072 if reply['status'] == 'ok':
1070 eids = content.get('engine_ids', [])
1073 eids = content.get('engine_ids', [])
1071 for eid in eids:
1074 for eid in eids:
1072 if eid not in self.engines:
1075 if eid not in self.engines:
1073 try:
1076 try:
1074 raise IndexError("No such engine: %i"%eid)
1077 raise IndexError("No such engine: %i"%eid)
1075 except:
1078 except:
1076 reply = error.wrap_exception()
1079 reply = error.wrap_exception()
1077 break
1080 break
1078 msg_ids = self.completed.pop(eid)
1081 msg_ids = self.completed.pop(eid)
1079 uid = self.engines[eid].queue
1082 uid = self.engines[eid].queue
1080 try:
1083 try:
1081 self.db.drop_matching_records(dict(engine_uuid=uid, completed={'$ne':None}))
1084 self.db.drop_matching_records(dict(engine_uuid=uid, completed={'$ne':None}))
1082 except Exception:
1085 except Exception:
1083 reply = error.wrap_exception()
1086 reply = error.wrap_exception()
1084 break
1087 break
1085
1088
1086 self.session.send(self.query, 'purge_reply', content=reply, ident=client_id)
1089 self.session.send(self.query, 'purge_reply', content=reply, ident=client_id)
1087
1090
1088 def resubmit_task(self, client_id, msg):
1091 def resubmit_task(self, client_id, msg):
1089 """Resubmit one or more tasks."""
1092 """Resubmit one or more tasks."""
1090 def finish(reply):
1093 def finish(reply):
1091 self.session.send(self.query, 'resubmit_reply', content=reply, ident=client_id)
1094 self.session.send(self.query, 'resubmit_reply', content=reply, ident=client_id)
1092
1095
1093 content = msg['content']
1096 content = msg['content']
1094 msg_ids = content['msg_ids']
1097 msg_ids = content['msg_ids']
1095 reply = dict(status='ok')
1098 reply = dict(status='ok')
1096 try:
1099 try:
1097 records = self.db.find_records({'msg_id' : {'$in' : msg_ids}}, keys=[
1100 records = self.db.find_records({'msg_id' : {'$in' : msg_ids}}, keys=[
1098 'header', 'content', 'buffers'])
1101 'header', 'content', 'buffers'])
1099 except Exception:
1102 except Exception:
1100 self.log.error('db::db error finding tasks to resubmit', exc_info=True)
1103 self.log.error('db::db error finding tasks to resubmit', exc_info=True)
1101 return finish(error.wrap_exception())
1104 return finish(error.wrap_exception())
1102
1105
1103 # validate msg_ids
1106 # validate msg_ids
1104 found_ids = [ rec['msg_id'] for rec in records ]
1107 found_ids = [ rec['msg_id'] for rec in records ]
1105 invalid_ids = filter(lambda m: m in self.pending, found_ids)
1108 invalid_ids = filter(lambda m: m in self.pending, found_ids)
1106 if len(records) > len(msg_ids):
1109 if len(records) > len(msg_ids):
1107 try:
1110 try:
1108 raise RuntimeError("DB appears to be in an inconsistent state."
1111 raise RuntimeError("DB appears to be in an inconsistent state."
1109 "More matching records were found than should exist")
1112 "More matching records were found than should exist")
1110 except Exception:
1113 except Exception:
1111 return finish(error.wrap_exception())
1114 return finish(error.wrap_exception())
1112 elif len(records) < len(msg_ids):
1115 elif len(records) < len(msg_ids):
1113 missing = [ m for m in msg_ids if m not in found_ids ]
1116 missing = [ m for m in msg_ids if m not in found_ids ]
1114 try:
1117 try:
1115 raise KeyError("No such msg(s): %r"%missing)
1118 raise KeyError("No such msg(s): %r"%missing)
1116 except KeyError:
1119 except KeyError:
1117 return finish(error.wrap_exception())
1120 return finish(error.wrap_exception())
1118 elif invalid_ids:
1121 elif invalid_ids:
1119 msg_id = invalid_ids[0]
1122 msg_id = invalid_ids[0]
1120 try:
1123 try:
1121 raise ValueError("Task %r appears to be inflight"%(msg_id))
1124 raise ValueError("Task %r appears to be inflight"%(msg_id))
1122 except Exception:
1125 except Exception:
1123 return finish(error.wrap_exception())
1126 return finish(error.wrap_exception())
1124
1127
1125 # clear the existing records
1128 # clear the existing records
1126 now = datetime.now()
1129 now = datetime.now()
1127 rec = empty_record()
1130 rec = empty_record()
1128 map(rec.pop, ['msg_id', 'header', 'content', 'buffers', 'submitted'])
1131 map(rec.pop, ['msg_id', 'header', 'content', 'buffers', 'submitted'])
1129 rec['resubmitted'] = now
1132 rec['resubmitted'] = now
1130 rec['queue'] = 'task'
1133 rec['queue'] = 'task'
1131 rec['client_uuid'] = client_id[0]
1134 rec['client_uuid'] = client_id[0]
1132 try:
1135 try:
1133 for msg_id in msg_ids:
1136 for msg_id in msg_ids:
1134 self.all_completed.discard(msg_id)
1137 self.all_completed.discard(msg_id)
1135 self.db.update_record(msg_id, rec)
1138 self.db.update_record(msg_id, rec)
1136 except Exception:
1139 except Exception:
1137 self.log.error('db::db error upating record', exc_info=True)
1140 self.log.error('db::db error upating record', exc_info=True)
1138 reply = error.wrap_exception()
1141 reply = error.wrap_exception()
1139 else:
1142 else:
1140 # send the messages
1143 # send the messages
1141 now_s = now.strftime(ISO8601)
1144 now_s = now.strftime(ISO8601)
1142 for rec in records:
1145 for rec in records:
1143 header = rec['header']
1146 header = rec['header']
1144 # include resubmitted in header to prevent digest collision
1147 # include resubmitted in header to prevent digest collision
1145 header['resubmitted'] = now_s
1148 header['resubmitted'] = now_s
1146 msg = self.session.msg(header['msg_type'])
1149 msg = self.session.msg(header['msg_type'])
1147 msg['content'] = rec['content']
1150 msg['content'] = rec['content']
1148 msg['header'] = header
1151 msg['header'] = header
1149 msg['msg_id'] = rec['msg_id']
1152 msg['msg_id'] = rec['msg_id']
1150 self.session.send(self.resubmit, msg, buffers=rec['buffers'])
1153 self.session.send(self.resubmit, msg, buffers=rec['buffers'])
1151
1154
1152 finish(dict(status='ok'))
1155 finish(dict(status='ok'))
1153
1156
1154
1157
1155 def _extract_record(self, rec):
1158 def _extract_record(self, rec):
1156 """decompose a TaskRecord dict into subsection of reply for get_result"""
1159 """decompose a TaskRecord dict into subsection of reply for get_result"""
1157 io_dict = {}
1160 io_dict = {}
1158 for key in 'pyin pyout pyerr stdout stderr'.split():
1161 for key in 'pyin pyout pyerr stdout stderr'.split():
1159 io_dict[key] = rec[key]
1162 io_dict[key] = rec[key]
1160 content = { 'result_content': rec['result_content'],
1163 content = { 'result_content': rec['result_content'],
1161 'header': rec['header'],
1164 'header': rec['header'],
1162 'result_header' : rec['result_header'],
1165 'result_header' : rec['result_header'],
1163 'io' : io_dict,
1166 'io' : io_dict,
1164 }
1167 }
1165 if rec['result_buffers']:
1168 if rec['result_buffers']:
1166 buffers = map(str, rec['result_buffers'])
1169 buffers = map(str, rec['result_buffers'])
1167 else:
1170 else:
1168 buffers = []
1171 buffers = []
1169
1172
1170 return content, buffers
1173 return content, buffers
1171
1174
1172 def get_results(self, client_id, msg):
1175 def get_results(self, client_id, msg):
1173 """Get the result of 1 or more messages."""
1176 """Get the result of 1 or more messages."""
1174 content = msg['content']
1177 content = msg['content']
1175 msg_ids = sorted(set(content['msg_ids']))
1178 msg_ids = sorted(set(content['msg_ids']))
1176 statusonly = content.get('status_only', False)
1179 statusonly = content.get('status_only', False)
1177 pending = []
1180 pending = []
1178 completed = []
1181 completed = []
1179 content = dict(status='ok')
1182 content = dict(status='ok')
1180 content['pending'] = pending
1183 content['pending'] = pending
1181 content['completed'] = completed
1184 content['completed'] = completed
1182 buffers = []
1185 buffers = []
1183 if not statusonly:
1186 if not statusonly:
1184 try:
1187 try:
1185 matches = self.db.find_records(dict(msg_id={'$in':msg_ids}))
1188 matches = self.db.find_records(dict(msg_id={'$in':msg_ids}))
1186 # turn match list into dict, for faster lookup
1189 # turn match list into dict, for faster lookup
1187 records = {}
1190 records = {}
1188 for rec in matches:
1191 for rec in matches:
1189 records[rec['msg_id']] = rec
1192 records[rec['msg_id']] = rec
1190 except Exception:
1193 except Exception:
1191 content = error.wrap_exception()
1194 content = error.wrap_exception()
1192 self.session.send(self.query, "result_reply", content=content,
1195 self.session.send(self.query, "result_reply", content=content,
1193 parent=msg, ident=client_id)
1196 parent=msg, ident=client_id)
1194 return
1197 return
1195 else:
1198 else:
1196 records = {}
1199 records = {}
1197 for msg_id in msg_ids:
1200 for msg_id in msg_ids:
1198 if msg_id in self.pending:
1201 if msg_id in self.pending:
1199 pending.append(msg_id)
1202 pending.append(msg_id)
1200 elif msg_id in self.all_completed:
1203 elif msg_id in self.all_completed:
1201 completed.append(msg_id)
1204 completed.append(msg_id)
1202 if not statusonly:
1205 if not statusonly:
1203 c,bufs = self._extract_record(records[msg_id])
1206 c,bufs = self._extract_record(records[msg_id])
1204 content[msg_id] = c
1207 content[msg_id] = c
1205 buffers.extend(bufs)
1208 buffers.extend(bufs)
1206 elif msg_id in records:
1209 elif msg_id in records:
1207 if rec['completed']:
1210 if rec['completed']:
1208 completed.append(msg_id)
1211 completed.append(msg_id)
1209 c,bufs = self._extract_record(records[msg_id])
1212 c,bufs = self._extract_record(records[msg_id])
1210 content[msg_id] = c
1213 content[msg_id] = c
1211 buffers.extend(bufs)
1214 buffers.extend(bufs)
1212 else:
1215 else:
1213 pending.append(msg_id)
1216 pending.append(msg_id)
1214 else:
1217 else:
1215 try:
1218 try:
1216 raise KeyError('No such message: '+msg_id)
1219 raise KeyError('No such message: '+msg_id)
1217 except:
1220 except:
1218 content = error.wrap_exception()
1221 content = error.wrap_exception()
1219 break
1222 break
1220 self.session.send(self.query, "result_reply", content=content,
1223 self.session.send(self.query, "result_reply", content=content,
1221 parent=msg, ident=client_id,
1224 parent=msg, ident=client_id,
1222 buffers=buffers)
1225 buffers=buffers)
1223
1226
1224 def get_history(self, client_id, msg):
1227 def get_history(self, client_id, msg):
1225 """Get a list of all msg_ids in our DB records"""
1228 """Get a list of all msg_ids in our DB records"""
1226 try:
1229 try:
1227 msg_ids = self.db.get_history()
1230 msg_ids = self.db.get_history()
1228 except Exception as e:
1231 except Exception as e:
1229 content = error.wrap_exception()
1232 content = error.wrap_exception()
1230 else:
1233 else:
1231 content = dict(status='ok', history=msg_ids)
1234 content = dict(status='ok', history=msg_ids)
1232
1235
1233 self.session.send(self.query, "history_reply", content=content,
1236 self.session.send(self.query, "history_reply", content=content,
1234 parent=msg, ident=client_id)
1237 parent=msg, ident=client_id)
1235
1238
1236 def db_query(self, client_id, msg):
1239 def db_query(self, client_id, msg):
1237 """Perform a raw query on the task record database."""
1240 """Perform a raw query on the task record database."""
1238 content = msg['content']
1241 content = msg['content']
1239 query = content.get('query', {})
1242 query = content.get('query', {})
1240 keys = content.get('keys', None)
1243 keys = content.get('keys', None)
1241 query = util.extract_dates(query)
1244 query = util.extract_dates(query)
1242 buffers = []
1245 buffers = []
1243 empty = list()
1246 empty = list()
1244
1247
1245 try:
1248 try:
1246 records = self.db.find_records(query, keys)
1249 records = self.db.find_records(query, keys)
1247 except Exception as e:
1250 except Exception as e:
1248 content = error.wrap_exception()
1251 content = error.wrap_exception()
1249 else:
1252 else:
1250 # extract buffers from reply content:
1253 # extract buffers from reply content:
1251 if keys is not None:
1254 if keys is not None:
1252 buffer_lens = [] if 'buffers' in keys else None
1255 buffer_lens = [] if 'buffers' in keys else None
1253 result_buffer_lens = [] if 'result_buffers' in keys else None
1256 result_buffer_lens = [] if 'result_buffers' in keys else None
1254 else:
1257 else:
1255 buffer_lens = []
1258 buffer_lens = []
1256 result_buffer_lens = []
1259 result_buffer_lens = []
1257
1260
1258 for rec in records:
1261 for rec in records:
1259 # buffers may be None, so double check
1262 # buffers may be None, so double check
1260 if buffer_lens is not None:
1263 if buffer_lens is not None:
1261 b = rec.pop('buffers', empty) or empty
1264 b = rec.pop('buffers', empty) or empty
1262 buffer_lens.append(len(b))
1265 buffer_lens.append(len(b))
1263 buffers.extend(b)
1266 buffers.extend(b)
1264 if result_buffer_lens is not None:
1267 if result_buffer_lens is not None:
1265 rb = rec.pop('result_buffers', empty) or empty
1268 rb = rec.pop('result_buffers', empty) or empty
1266 result_buffer_lens.append(len(rb))
1269 result_buffer_lens.append(len(rb))
1267 buffers.extend(rb)
1270 buffers.extend(rb)
1268 content = dict(status='ok', records=records, buffer_lens=buffer_lens,
1271 content = dict(status='ok', records=records, buffer_lens=buffer_lens,
1269 result_buffer_lens=result_buffer_lens)
1272 result_buffer_lens=result_buffer_lens)
1270
1273
1271 self.session.send(self.query, "db_reply", content=content,
1274 self.session.send(self.query, "db_reply", content=content,
1272 parent=msg, ident=client_id,
1275 parent=msg, ident=client_id,
1273 buffers=buffers)
1276 buffers=buffers)
1274
1277
@@ -1,687 +1,688 b''
1 """The Python scheduler for rich scheduling.
1 """The Python scheduler for rich scheduling.
2
2
3 The Pure ZMQ scheduler does not allow routing schemes other than LRU,
3 The Pure ZMQ scheduler does not allow routing schemes other than LRU,
4 nor does it check msg_id DAG dependencies. For those, a slightly slower
4 nor does it check msg_id DAG dependencies. For those, a slightly slower
5 Python Scheduler exists.
5 Python Scheduler exists.
6 """
6 """
7 #-----------------------------------------------------------------------------
7 #-----------------------------------------------------------------------------
8 # Copyright (C) 2010-2011 The IPython Development Team
8 # Copyright (C) 2010-2011 The IPython Development Team
9 #
9 #
10 # Distributed under the terms of the BSD License. The full license is in
10 # Distributed under the terms of the BSD License. The full license is in
11 # the file COPYING, distributed as part of this software.
11 # the file COPYING, distributed as part of this software.
12 #-----------------------------------------------------------------------------
12 #-----------------------------------------------------------------------------
13
13
14 #----------------------------------------------------------------------
14 #----------------------------------------------------------------------
15 # Imports
15 # Imports
16 #----------------------------------------------------------------------
16 #----------------------------------------------------------------------
17
17
18 from __future__ import print_function
18 from __future__ import print_function
19
19
20 import logging
20 import logging
21 import sys
21 import sys
22
22
23 from datetime import datetime, timedelta
23 from datetime import datetime, timedelta
24 from random import randint, random
24 from random import randint, random
25 from types import FunctionType
25 from types import FunctionType
26
26
27 try:
27 try:
28 import numpy
28 import numpy
29 except ImportError:
29 except ImportError:
30 numpy = None
30 numpy = None
31
31
32 import zmq
32 import zmq
33 from zmq.eventloop import ioloop, zmqstream
33 from zmq.eventloop import ioloop, zmqstream
34
34
35 # local imports
35 # local imports
36 from IPython.external.decorator import decorator
36 from IPython.external.decorator import decorator
37 from IPython.config.loader import Config
37 from IPython.config.loader import Config
38 from IPython.utils.traitlets import Instance, Dict, List, Set, Int, Str, Enum
38 from IPython.utils.traitlets import Instance, Dict, List, Set, Int, Str, Enum
39
39
40 from IPython.parallel import error
40 from IPython.parallel import error
41 from IPython.parallel.factory import SessionFactory
41 from IPython.parallel.factory import SessionFactory
42 from IPython.parallel.util import connect_logger, local_logger
42 from IPython.parallel.util import connect_logger, local_logger
43
43
44 from .dependency import Dependency
44 from .dependency import Dependency
45
45
46 @decorator
46 @decorator
47 def logged(f,self,*args,**kwargs):
47 def logged(f,self,*args,**kwargs):
48 # print ("#--------------------")
48 # print ("#--------------------")
49 self.log.debug("scheduler::%s(*%s,**%s)"%(f.func_name, args, kwargs))
49 self.log.debug("scheduler::%s(*%s,**%s)"%(f.func_name, args, kwargs))
50 # print ("#--")
50 # print ("#--")
51 return f(self,*args, **kwargs)
51 return f(self,*args, **kwargs)
52
52
53 #----------------------------------------------------------------------
53 #----------------------------------------------------------------------
54 # Chooser functions
54 # Chooser functions
55 #----------------------------------------------------------------------
55 #----------------------------------------------------------------------
56
56
57 def plainrandom(loads):
57 def plainrandom(loads):
58 """Plain random pick."""
58 """Plain random pick."""
59 n = len(loads)
59 n = len(loads)
60 return randint(0,n-1)
60 return randint(0,n-1)
61
61
62 def lru(loads):
62 def lru(loads):
63 """Always pick the front of the line.
63 """Always pick the front of the line.
64
64
65 The content of `loads` is ignored.
65 The content of `loads` is ignored.
66
66
67 Assumes LRU ordering of loads, with oldest first.
67 Assumes LRU ordering of loads, with oldest first.
68 """
68 """
69 return 0
69 return 0
70
70
71 def twobin(loads):
71 def twobin(loads):
72 """Pick two at random, use the LRU of the two.
72 """Pick two at random, use the LRU of the two.
73
73
74 The content of loads is ignored.
74 The content of loads is ignored.
75
75
76 Assumes LRU ordering of loads, with oldest first.
76 Assumes LRU ordering of loads, with oldest first.
77 """
77 """
78 n = len(loads)
78 n = len(loads)
79 a = randint(0,n-1)
79 a = randint(0,n-1)
80 b = randint(0,n-1)
80 b = randint(0,n-1)
81 return min(a,b)
81 return min(a,b)
82
82
83 def weighted(loads):
83 def weighted(loads):
84 """Pick two at random using inverse load as weight.
84 """Pick two at random using inverse load as weight.
85
85
86 Return the less loaded of the two.
86 Return the less loaded of the two.
87 """
87 """
88 # weight 0 a million times more than 1:
88 # weight 0 a million times more than 1:
89 weights = 1./(1e-6+numpy.array(loads))
89 weights = 1./(1e-6+numpy.array(loads))
90 sums = weights.cumsum()
90 sums = weights.cumsum()
91 t = sums[-1]
91 t = sums[-1]
92 x = random()*t
92 x = random()*t
93 y = random()*t
93 y = random()*t
94 idx = 0
94 idx = 0
95 idy = 0
95 idy = 0
96 while sums[idx] < x:
96 while sums[idx] < x:
97 idx += 1
97 idx += 1
98 while sums[idy] < y:
98 while sums[idy] < y:
99 idy += 1
99 idy += 1
100 if weights[idy] > weights[idx]:
100 if weights[idy] > weights[idx]:
101 return idy
101 return idy
102 else:
102 else:
103 return idx
103 return idx
104
104
105 def leastload(loads):
105 def leastload(loads):
106 """Always choose the lowest load.
106 """Always choose the lowest load.
107
107
108 If the lowest load occurs more than once, the first
108 If the lowest load occurs more than once, the first
109 occurance will be used. If loads has LRU ordering, this means
109 occurance will be used. If loads has LRU ordering, this means
110 the LRU of those with the lowest load is chosen.
110 the LRU of those with the lowest load is chosen.
111 """
111 """
112 return loads.index(min(loads))
112 return loads.index(min(loads))
113
113
114 #---------------------------------------------------------------------
114 #---------------------------------------------------------------------
115 # Classes
115 # Classes
116 #---------------------------------------------------------------------
116 #---------------------------------------------------------------------
117 # store empty default dependency:
117 # store empty default dependency:
118 MET = Dependency([])
118 MET = Dependency([])
119
119
120 class TaskScheduler(SessionFactory):
120 class TaskScheduler(SessionFactory):
121 """Python TaskScheduler object.
121 """Python TaskScheduler object.
122
122
123 This is the simplest object that supports msg_id based
123 This is the simplest object that supports msg_id based
124 DAG dependencies. *Only* task msg_ids are checked, not
124 DAG dependencies. *Only* task msg_ids are checked, not
125 msg_ids of jobs submitted via the MUX queue.
125 msg_ids of jobs submitted via the MUX queue.
126
126
127 """
127 """
128
128
129 hwm = Int(0, config=True, shortname='hwm',
129 hwm = Int(0, config=True, shortname='hwm',
130 help="""specify the High Water Mark (HWM) for the downstream
130 help="""specify the High Water Mark (HWM) for the downstream
131 socket in the Task scheduler. This is the maximum number
131 socket in the Task scheduler. This is the maximum number
132 of allowed outstanding tasks on each engine."""
132 of allowed outstanding tasks on each engine."""
133 )
133 )
134 scheme_name = Enum(('leastload', 'pure', 'lru', 'plainrandom', 'weighted', 'twobin'),
134 scheme_name = Enum(('leastload', 'pure', 'lru', 'plainrandom', 'weighted', 'twobin'),
135 'leastload', config=True, shortname='scheme', allow_none=False,
135 'leastload', config=True, shortname='scheme', allow_none=False,
136 help="""select the task scheduler scheme [default: Python LRU]
136 help="""select the task scheduler scheme [default: Python LRU]
137 Options are: 'pure', 'lru', 'plainrandom', 'weighted', 'twobin','leastload'"""
137 Options are: 'pure', 'lru', 'plainrandom', 'weighted', 'twobin','leastload'"""
138 )
138 )
139 def _scheme_name_changed(self, old, new):
139 def _scheme_name_changed(self, old, new):
140 self.log.debug("Using scheme %r"%new)
140 self.log.debug("Using scheme %r"%new)
141 self.scheme = globals()[new]
141 self.scheme = globals()[new]
142
142
143 # input arguments:
143 # input arguments:
144 scheme = Instance(FunctionType) # function for determining the destination
144 scheme = Instance(FunctionType) # function for determining the destination
145 def _scheme_default(self):
145 def _scheme_default(self):
146 return leastload
146 return leastload
147 client_stream = Instance(zmqstream.ZMQStream) # client-facing stream
147 client_stream = Instance(zmqstream.ZMQStream) # client-facing stream
148 engine_stream = Instance(zmqstream.ZMQStream) # engine-facing stream
148 engine_stream = Instance(zmqstream.ZMQStream) # engine-facing stream
149 notifier_stream = Instance(zmqstream.ZMQStream) # hub-facing sub stream
149 notifier_stream = Instance(zmqstream.ZMQStream) # hub-facing sub stream
150 mon_stream = Instance(zmqstream.ZMQStream) # hub-facing pub stream
150 mon_stream = Instance(zmqstream.ZMQStream) # hub-facing pub stream
151
151
152 # internals:
152 # internals:
153 graph = Dict() # dict by msg_id of [ msg_ids that depend on key ]
153 graph = Dict() # dict by msg_id of [ msg_ids that depend on key ]
154 retries = Dict() # dict by msg_id of retries remaining (non-neg ints)
154 retries = Dict() # dict by msg_id of retries remaining (non-neg ints)
155 # waiting = List() # list of msg_ids ready to run, but haven't due to HWM
155 # waiting = List() # list of msg_ids ready to run, but haven't due to HWM
156 depending = Dict() # dict by msg_id of (msg_id, raw_msg, after, follow)
156 depending = Dict() # dict by msg_id of (msg_id, raw_msg, after, follow)
157 pending = Dict() # dict by engine_uuid of submitted tasks
157 pending = Dict() # dict by engine_uuid of submitted tasks
158 completed = Dict() # dict by engine_uuid of completed tasks
158 completed = Dict() # dict by engine_uuid of completed tasks
159 failed = Dict() # dict by engine_uuid of failed tasks
159 failed = Dict() # dict by engine_uuid of failed tasks
160 destinations = Dict() # dict by msg_id of engine_uuids where jobs ran (reverse of completed+failed)
160 destinations = Dict() # dict by msg_id of engine_uuids where jobs ran (reverse of completed+failed)
161 clients = Dict() # dict by msg_id for who submitted the task
161 clients = Dict() # dict by msg_id for who submitted the task
162 targets = List() # list of target IDENTs
162 targets = List() # list of target IDENTs
163 loads = List() # list of engine loads
163 loads = List() # list of engine loads
164 # full = Set() # set of IDENTs that have HWM outstanding tasks
164 # full = Set() # set of IDENTs that have HWM outstanding tasks
165 all_completed = Set() # set of all completed tasks
165 all_completed = Set() # set of all completed tasks
166 all_failed = Set() # set of all failed tasks
166 all_failed = Set() # set of all failed tasks
167 all_done = Set() # set of all finished tasks=union(completed,failed)
167 all_done = Set() # set of all finished tasks=union(completed,failed)
168 all_ids = Set() # set of all submitted task IDs
168 all_ids = Set() # set of all submitted task IDs
169 blacklist = Dict() # dict by msg_id of locations where a job has encountered UnmetDependency
169 blacklist = Dict() # dict by msg_id of locations where a job has encountered UnmetDependency
170 auditor = Instance('zmq.eventloop.ioloop.PeriodicCallback')
170 auditor = Instance('zmq.eventloop.ioloop.PeriodicCallback')
171
171
172
172
173 def start(self):
173 def start(self):
174 self.engine_stream.on_recv(self.dispatch_result, copy=False)
174 self.engine_stream.on_recv(self.dispatch_result, copy=False)
175 self._notification_handlers = dict(
175 self._notification_handlers = dict(
176 registration_notification = self._register_engine,
176 registration_notification = self._register_engine,
177 unregistration_notification = self._unregister_engine
177 unregistration_notification = self._unregister_engine
178 )
178 )
179 self.notifier_stream.on_recv(self.dispatch_notification)
179 self.notifier_stream.on_recv(self.dispatch_notification)
180 self.auditor = ioloop.PeriodicCallback(self.audit_timeouts, 2e3, self.loop) # 1 Hz
180 self.auditor = ioloop.PeriodicCallback(self.audit_timeouts, 2e3, self.loop) # 1 Hz
181 self.auditor.start()
181 self.auditor.start()
182 self.log.info("Scheduler started...%r"%self)
182 self.log.info("Scheduler started...%r"%self)
183
183
184 def resume_receiving(self):
184 def resume_receiving(self):
185 """Resume accepting jobs."""
185 """Resume accepting jobs."""
186 self.client_stream.on_recv(self.dispatch_submission, copy=False)
186 self.client_stream.on_recv(self.dispatch_submission, copy=False)
187
187
188 def stop_receiving(self):
188 def stop_receiving(self):
189 """Stop accepting jobs while there are no engines.
189 """Stop accepting jobs while there are no engines.
190 Leave them in the ZMQ queue."""
190 Leave them in the ZMQ queue."""
191 self.client_stream.on_recv(None)
191 self.client_stream.on_recv(None)
192
192
193 #-----------------------------------------------------------------------
193 #-----------------------------------------------------------------------
194 # [Un]Registration Handling
194 # [Un]Registration Handling
195 #-----------------------------------------------------------------------
195 #-----------------------------------------------------------------------
196
196
197 def dispatch_notification(self, msg):
197 def dispatch_notification(self, msg):
198 """dispatch register/unregister events."""
198 """dispatch register/unregister events."""
199 try:
199 try:
200 idents,msg = self.session.feed_identities(msg)
200 idents,msg = self.session.feed_identities(msg)
201 except ValueError:
201 except ValueError:
202 self.log.warn("task::Invalid Message: %r"%msg)
202 self.log.warn("task::Invalid Message: %r"%msg)
203 return
203 return
204 try:
204 try:
205 msg = self.session.unpack_message(msg)
205 msg = self.session.unpack_message(msg)
206 except ValueError:
206 except ValueError:
207 self.log.warn("task::Unauthorized message from: %r"%idents)
207 self.log.warn("task::Unauthorized message from: %r"%idents)
208 return
208 return
209
209
210 msg_type = msg['msg_type']
210 msg_type = msg['msg_type']
211
211
212 handler = self._notification_handlers.get(msg_type, None)
212 handler = self._notification_handlers.get(msg_type, None)
213 if handler is None:
213 if handler is None:
214 self.log.error("Unhandled message type: %r"%msg_type)
214 self.log.error("Unhandled message type: %r"%msg_type)
215 else:
215 else:
216 try:
216 try:
217 handler(str(msg['content']['queue']))
217 handler(str(msg['content']['queue']))
218 except KeyError:
218 except KeyError:
219 self.log.error("task::Invalid notification msg: %r"%msg)
219 self.log.error("task::Invalid notification msg: %r"%msg)
220
220
221 @logged
221 @logged
222 def _register_engine(self, uid):
222 def _register_engine(self, uid):
223 """New engine with ident `uid` became available."""
223 """New engine with ident `uid` became available."""
224 # head of the line:
224 # head of the line:
225 self.targets.insert(0,uid)
225 self.targets.insert(0,uid)
226 self.loads.insert(0,0)
226 self.loads.insert(0,0)
227 # initialize sets
227 # initialize sets
228 self.completed[uid] = set()
228 self.completed[uid] = set()
229 self.failed[uid] = set()
229 self.failed[uid] = set()
230 self.pending[uid] = {}
230 self.pending[uid] = {}
231 if len(self.targets) == 1:
231 if len(self.targets) == 1:
232 self.resume_receiving()
232 self.resume_receiving()
233 # rescan the graph:
233 # rescan the graph:
234 self.update_graph(None)
234 self.update_graph(None)
235
235
236 def _unregister_engine(self, uid):
236 def _unregister_engine(self, uid):
237 """Existing engine with ident `uid` became unavailable."""
237 """Existing engine with ident `uid` became unavailable."""
238 if len(self.targets) == 1:
238 if len(self.targets) == 1:
239 # this was our only engine
239 # this was our only engine
240 self.stop_receiving()
240 self.stop_receiving()
241
241
242 # handle any potentially finished tasks:
242 # handle any potentially finished tasks:
243 self.engine_stream.flush()
243 self.engine_stream.flush()
244
244
245 # don't pop destinations, because they might be used later
245 # don't pop destinations, because they might be used later
246 # map(self.destinations.pop, self.completed.pop(uid))
246 # map(self.destinations.pop, self.completed.pop(uid))
247 # map(self.destinations.pop, self.failed.pop(uid))
247 # map(self.destinations.pop, self.failed.pop(uid))
248
248
249 # prevent this engine from receiving work
249 # prevent this engine from receiving work
250 idx = self.targets.index(uid)
250 idx = self.targets.index(uid)
251 self.targets.pop(idx)
251 self.targets.pop(idx)
252 self.loads.pop(idx)
252 self.loads.pop(idx)
253
253
254 # wait 5 seconds before cleaning up pending jobs, since the results might
254 # wait 5 seconds before cleaning up pending jobs, since the results might
255 # still be incoming
255 # still be incoming
256 if self.pending[uid]:
256 if self.pending[uid]:
257 dc = ioloop.DelayedCallback(lambda : self.handle_stranded_tasks(uid), 5000, self.loop)
257 dc = ioloop.DelayedCallback(lambda : self.handle_stranded_tasks(uid), 5000, self.loop)
258 dc.start()
258 dc.start()
259 else:
259 else:
260 self.completed.pop(uid)
260 self.completed.pop(uid)
261 self.failed.pop(uid)
261 self.failed.pop(uid)
262
262
263
263
264 @logged
264 @logged
265 def handle_stranded_tasks(self, engine):
265 def handle_stranded_tasks(self, engine):
266 """Deal with jobs resident in an engine that died."""
266 """Deal with jobs resident in an engine that died."""
267 lost = self.pending[engine]
267 lost = self.pending[engine]
268 for msg_id in lost.keys():
268 for msg_id in lost.keys():
269 if msg_id not in self.pending[engine]:
269 if msg_id not in self.pending[engine]:
270 # prevent double-handling of messages
270 # prevent double-handling of messages
271 continue
271 continue
272
272
273 raw_msg = lost[msg_id][0]
273 raw_msg = lost[msg_id][0]
274 idents,msg = self.session.feed_identities(raw_msg, copy=False)
274 idents,msg = self.session.feed_identities(raw_msg, copy=False)
275 parent = self.session.unpack(msg[1].bytes)
275 parent = self.session.unpack(msg[1].bytes)
276 idents = [engine, idents[0]]
276 idents = [engine, idents[0]]
277
277
278 # build fake error reply
278 # build fake error reply
279 try:
279 try:
280 raise error.EngineError("Engine %r died while running task %r"%(engine, msg_id))
280 raise error.EngineError("Engine %r died while running task %r"%(engine, msg_id))
281 except:
281 except:
282 content = error.wrap_exception()
282 content = error.wrap_exception()
283 msg = self.session.msg('apply_reply', content, parent=parent, subheader={'status':'error'})
283 msg = self.session.msg('apply_reply', content, parent=parent, subheader={'status':'error'})
284 raw_reply = map(zmq.Message, self.session.serialize(msg, ident=idents))
284 raw_reply = map(zmq.Message, self.session.serialize(msg, ident=idents))
285 # and dispatch it
285 # and dispatch it
286 self.dispatch_result(raw_reply)
286 self.dispatch_result(raw_reply)
287
287
288 # finally scrub completed/failed lists
288 # finally scrub completed/failed lists
289 self.completed.pop(engine)
289 self.completed.pop(engine)
290 self.failed.pop(engine)
290 self.failed.pop(engine)
291
291
292
292
293 #-----------------------------------------------------------------------
293 #-----------------------------------------------------------------------
294 # Job Submission
294 # Job Submission
295 #-----------------------------------------------------------------------
295 #-----------------------------------------------------------------------
296 @logged
296 @logged
297 def dispatch_submission(self, raw_msg):
297 def dispatch_submission(self, raw_msg):
298 """Dispatch job submission to appropriate handlers."""
298 """Dispatch job submission to appropriate handlers."""
299 # ensure targets up to date:
299 # ensure targets up to date:
300 self.notifier_stream.flush()
300 self.notifier_stream.flush()
301 try:
301 try:
302 idents, msg = self.session.feed_identities(raw_msg, copy=False)
302 idents, msg = self.session.feed_identities(raw_msg, copy=False)
303 msg = self.session.unpack_message(msg, content=False, copy=False)
303 msg = self.session.unpack_message(msg, content=False, copy=False)
304 except Exception:
304 except Exception:
305 self.log.error("task::Invaid task msg: %r"%raw_msg, exc_info=True)
305 self.log.error("task::Invaid task msg: %r"%raw_msg, exc_info=True)
306 return
306 return
307
307
308
308
309 # send to monitor
309 # send to monitor
310 self.mon_stream.send_multipart(['intask']+raw_msg, copy=False)
310 self.mon_stream.send_multipart(['intask']+raw_msg, copy=False)
311
311
312 header = msg['header']
312 header = msg['header']
313 msg_id = header['msg_id']
313 msg_id = header['msg_id']
314 self.all_ids.add(msg_id)
314 self.all_ids.add(msg_id)
315
315
316 # targets
316 # targets
317 targets = set(header.get('targets', []))
317 targets = set(header.get('targets', []))
318 retries = header.get('retries', 0)
318 retries = header.get('retries', 0)
319 self.retries[msg_id] = retries
319 self.retries[msg_id] = retries
320
320
321 # time dependencies
321 # time dependencies
322 after = Dependency(header.get('after', []))
322 after = Dependency(header.get('after', []))
323 if after.all:
323 if after.all:
324 if after.success:
324 if after.success:
325 after.difference_update(self.all_completed)
325 after.difference_update(self.all_completed)
326 if after.failure:
326 if after.failure:
327 after.difference_update(self.all_failed)
327 after.difference_update(self.all_failed)
328 if after.check(self.all_completed, self.all_failed):
328 if after.check(self.all_completed, self.all_failed):
329 # recast as empty set, if `after` already met,
329 # recast as empty set, if `after` already met,
330 # to prevent unnecessary set comparisons
330 # to prevent unnecessary set comparisons
331 after = MET
331 after = MET
332
332
333 # location dependencies
333 # location dependencies
334 follow = Dependency(header.get('follow', []))
334 follow = Dependency(header.get('follow', []))
335
335
336 # turn timeouts into datetime objects:
336 # turn timeouts into datetime objects:
337 timeout = header.get('timeout', None)
337 timeout = header.get('timeout', None)
338 if timeout:
338 if timeout:
339 timeout = datetime.now() + timedelta(0,timeout,0)
339 timeout = datetime.now() + timedelta(0,timeout,0)
340
340
341 args = [raw_msg, targets, after, follow, timeout]
341 args = [raw_msg, targets, after, follow, timeout]
342
342
343 # validate and reduce dependencies:
343 # validate and reduce dependencies:
344 for dep in after,follow:
344 for dep in after,follow:
345 # check valid:
345 # check valid:
346 if msg_id in dep or dep.difference(self.all_ids):
346 if msg_id in dep or dep.difference(self.all_ids):
347 self.depending[msg_id] = args
347 self.depending[msg_id] = args
348 return self.fail_unreachable(msg_id, error.InvalidDependency)
348 return self.fail_unreachable(msg_id, error.InvalidDependency)
349 # check if unreachable:
349 # check if unreachable:
350 if dep.unreachable(self.all_completed, self.all_failed):
350 if dep.unreachable(self.all_completed, self.all_failed):
351 self.depending[msg_id] = args
351 self.depending[msg_id] = args
352 return self.fail_unreachable(msg_id)
352 return self.fail_unreachable(msg_id)
353
353
354 if after.check(self.all_completed, self.all_failed):
354 if after.check(self.all_completed, self.all_failed):
355 # time deps already met, try to run
355 # time deps already met, try to run
356 if not self.maybe_run(msg_id, *args):
356 if not self.maybe_run(msg_id, *args):
357 # can't run yet
357 # can't run yet
358 if msg_id not in self.all_failed:
358 if msg_id not in self.all_failed:
359 # could have failed as unreachable
359 # could have failed as unreachable
360 self.save_unmet(msg_id, *args)
360 self.save_unmet(msg_id, *args)
361 else:
361 else:
362 self.save_unmet(msg_id, *args)
362 self.save_unmet(msg_id, *args)
363
363
364 # @logged
364 # @logged
365 def audit_timeouts(self):
365 def audit_timeouts(self):
366 """Audit all waiting tasks for expired timeouts."""
366 """Audit all waiting tasks for expired timeouts."""
367 now = datetime.now()
367 now = datetime.now()
368 for msg_id in self.depending.keys():
368 for msg_id in self.depending.keys():
369 # must recheck, in case one failure cascaded to another:
369 # must recheck, in case one failure cascaded to another:
370 if msg_id in self.depending:
370 if msg_id in self.depending:
371 raw,after,targets,follow,timeout = self.depending[msg_id]
371 raw,after,targets,follow,timeout = self.depending[msg_id]
372 if timeout and timeout < now:
372 if timeout and timeout < now:
373 self.fail_unreachable(msg_id, error.TaskTimeout)
373 self.fail_unreachable(msg_id, error.TaskTimeout)
374
374
375 @logged
375 @logged
376 def fail_unreachable(self, msg_id, why=error.ImpossibleDependency):
376 def fail_unreachable(self, msg_id, why=error.ImpossibleDependency):
377 """a task has become unreachable, send a reply with an ImpossibleDependency
377 """a task has become unreachable, send a reply with an ImpossibleDependency
378 error."""
378 error."""
379 if msg_id not in self.depending:
379 if msg_id not in self.depending:
380 self.log.error("msg %r already failed!"%msg_id)
380 self.log.error("msg %r already failed!"%msg_id)
381 return
381 return
382 raw_msg,targets,after,follow,timeout = self.depending.pop(msg_id)
382 raw_msg,targets,after,follow,timeout = self.depending.pop(msg_id)
383 for mid in follow.union(after):
383 for mid in follow.union(after):
384 if mid in self.graph:
384 if mid in self.graph:
385 self.graph[mid].remove(msg_id)
385 self.graph[mid].remove(msg_id)
386
386
387 # FIXME: unpacking a message I've already unpacked, but didn't save:
387 # FIXME: unpacking a message I've already unpacked, but didn't save:
388 idents,msg = self.session.feed_identities(raw_msg, copy=False)
388 idents,msg = self.session.feed_identities(raw_msg, copy=False)
389 header = self.session.unpack(msg[1].bytes)
389 header = self.session.unpack(msg[1].bytes)
390
390
391 try:
391 try:
392 raise why()
392 raise why()
393 except:
393 except:
394 content = error.wrap_exception()
394 content = error.wrap_exception()
395
395
396 self.all_done.add(msg_id)
396 self.all_done.add(msg_id)
397 self.all_failed.add(msg_id)
397 self.all_failed.add(msg_id)
398
398
399 msg = self.session.send(self.client_stream, 'apply_reply', content,
399 msg = self.session.send(self.client_stream, 'apply_reply', content,
400 parent=header, ident=idents)
400 parent=header, ident=idents)
401 self.session.send(self.mon_stream, msg, ident=['outtask']+idents)
401 self.session.send(self.mon_stream, msg, ident=['outtask']+idents)
402
402
403 self.update_graph(msg_id, success=False)
403 self.update_graph(msg_id, success=False)
404
404
405 @logged
405 @logged
406 def maybe_run(self, msg_id, raw_msg, targets, after, follow, timeout):
406 def maybe_run(self, msg_id, raw_msg, targets, after, follow, timeout):
407 """check location dependencies, and run if they are met."""
407 """check location dependencies, and run if they are met."""
408 blacklist = self.blacklist.setdefault(msg_id, set())
408 blacklist = self.blacklist.setdefault(msg_id, set())
409 if follow or targets or blacklist or self.hwm:
409 if follow or targets or blacklist or self.hwm:
410 # we need a can_run filter
410 # we need a can_run filter
411 def can_run(idx):
411 def can_run(idx):
412 # check hwm
412 # check hwm
413 if self.hwm and self.loads[idx] == self.hwm:
413 if self.hwm and self.loads[idx] == self.hwm:
414 return False
414 return False
415 target = self.targets[idx]
415 target = self.targets[idx]
416 # check blacklist
416 # check blacklist
417 if target in blacklist:
417 if target in blacklist:
418 return False
418 return False
419 # check targets
419 # check targets
420 if targets and target not in targets:
420 if targets and target not in targets:
421 return False
421 return False
422 # check follow
422 # check follow
423 return follow.check(self.completed[target], self.failed[target])
423 return follow.check(self.completed[target], self.failed[target])
424
424
425 indices = filter(can_run, range(len(self.targets)))
425 indices = filter(can_run, range(len(self.targets)))
426
426
427 if not indices:
427 if not indices:
428 # couldn't run
428 # couldn't run
429 if follow.all:
429 if follow.all:
430 # check follow for impossibility
430 # check follow for impossibility
431 dests = set()
431 dests = set()
432 relevant = set()
432 relevant = set()
433 if follow.success:
433 if follow.success:
434 relevant = self.all_completed
434 relevant = self.all_completed
435 if follow.failure:
435 if follow.failure:
436 relevant = relevant.union(self.all_failed)
436 relevant = relevant.union(self.all_failed)
437 for m in follow.intersection(relevant):
437 for m in follow.intersection(relevant):
438 dests.add(self.destinations[m])
438 dests.add(self.destinations[m])
439 if len(dests) > 1:
439 if len(dests) > 1:
440 self.depending[msg_id] = (raw_msg, targets, after, follow, timeout)
440 self.depending[msg_id] = (raw_msg, targets, after, follow, timeout)
441 self.fail_unreachable(msg_id)
441 self.fail_unreachable(msg_id)
442 return False
442 return False
443 if targets:
443 if targets:
444 # check blacklist+targets for impossibility
444 # check blacklist+targets for impossibility
445 targets.difference_update(blacklist)
445 targets.difference_update(blacklist)
446 if not targets or not targets.intersection(self.targets):
446 if not targets or not targets.intersection(self.targets):
447 self.depending[msg_id] = (raw_msg, targets, after, follow, timeout)
447 self.depending[msg_id] = (raw_msg, targets, after, follow, timeout)
448 self.fail_unreachable(msg_id)
448 self.fail_unreachable(msg_id)
449 return False
449 return False
450 return False
450 return False
451 else:
451 else:
452 indices = None
452 indices = None
453
453
454 self.submit_task(msg_id, raw_msg, targets, follow, timeout, indices)
454 self.submit_task(msg_id, raw_msg, targets, follow, timeout, indices)
455 return True
455 return True
456
456
457 @logged
457 @logged
458 def save_unmet(self, msg_id, raw_msg, targets, after, follow, timeout):
458 def save_unmet(self, msg_id, raw_msg, targets, after, follow, timeout):
459 """Save a message for later submission when its dependencies are met."""
459 """Save a message for later submission when its dependencies are met."""
460 self.depending[msg_id] = [raw_msg,targets,after,follow,timeout]
460 self.depending[msg_id] = [raw_msg,targets,after,follow,timeout]
461 # track the ids in follow or after, but not those already finished
461 # track the ids in follow or after, but not those already finished
462 for dep_id in after.union(follow).difference(self.all_done):
462 for dep_id in after.union(follow).difference(self.all_done):
463 if dep_id not in self.graph:
463 if dep_id not in self.graph:
464 self.graph[dep_id] = set()
464 self.graph[dep_id] = set()
465 self.graph[dep_id].add(msg_id)
465 self.graph[dep_id].add(msg_id)
466
466
467 @logged
467 @logged
468 def submit_task(self, msg_id, raw_msg, targets, follow, timeout, indices=None):
468 def submit_task(self, msg_id, raw_msg, targets, follow, timeout, indices=None):
469 """Submit a task to any of a subset of our targets."""
469 """Submit a task to any of a subset of our targets."""
470 if indices:
470 if indices:
471 loads = [self.loads[i] for i in indices]
471 loads = [self.loads[i] for i in indices]
472 else:
472 else:
473 loads = self.loads
473 loads = self.loads
474 idx = self.scheme(loads)
474 idx = self.scheme(loads)
475 if indices:
475 if indices:
476 idx = indices[idx]
476 idx = indices[idx]
477 target = self.targets[idx]
477 target = self.targets[idx]
478 # print (target, map(str, msg[:3]))
478 # print (target, map(str, msg[:3]))
479 # send job to the engine
479 # send job to the engine
480 self.engine_stream.send(target, flags=zmq.SNDMORE, copy=False)
480 self.engine_stream.send(target, flags=zmq.SNDMORE, copy=False)
481 self.engine_stream.send_multipart(raw_msg, copy=False)
481 self.engine_stream.send_multipart(raw_msg, copy=False)
482 # update load
482 # update load
483 self.add_job(idx)
483 self.add_job(idx)
484 self.pending[target][msg_id] = (raw_msg, targets, MET, follow, timeout)
484 self.pending[target][msg_id] = (raw_msg, targets, MET, follow, timeout)
485 # notify Hub
485 # notify Hub
486 content = dict(msg_id=msg_id, engine_id=target)
486 content = dict(msg_id=msg_id, engine_id=target)
487 self.session.send(self.mon_stream, 'task_destination', content=content,
487 self.session.send(self.mon_stream, 'task_destination', content=content,
488 ident=['tracktask',self.session.session])
488 ident=['tracktask',self.session.session])
489
489
490
490
491 #-----------------------------------------------------------------------
491 #-----------------------------------------------------------------------
492 # Result Handling
492 # Result Handling
493 #-----------------------------------------------------------------------
493 #-----------------------------------------------------------------------
494 @logged
494 @logged
495 def dispatch_result(self, raw_msg):
495 def dispatch_result(self, raw_msg):
496 """dispatch method for result replies"""
496 """dispatch method for result replies"""
497 try:
497 try:
498 idents,msg = self.session.feed_identities(raw_msg, copy=False)
498 idents,msg = self.session.feed_identities(raw_msg, copy=False)
499 msg = self.session.unpack_message(msg, content=False, copy=False)
499 msg = self.session.unpack_message(msg, content=False, copy=False)
500 engine = idents[0]
500 engine = idents[0]
501 try:
501 try:
502 idx = self.targets.index(engine)
502 idx = self.targets.index(engine)
503 except ValueError:
503 except ValueError:
504 pass # skip load-update for dead engines
504 pass # skip load-update for dead engines
505 else:
505 else:
506 self.finish_job(idx)
506 self.finish_job(idx)
507 except Exception:
507 except Exception:
508 self.log.error("task::Invaid result: %r"%raw_msg, exc_info=True)
508 self.log.error("task::Invaid result: %r"%raw_msg, exc_info=True)
509 return
509 return
510
510
511 header = msg['header']
511 header = msg['header']
512 parent = msg['parent_header']
512 parent = msg['parent_header']
513 if header.get('dependencies_met', True):
513 if header.get('dependencies_met', True):
514 success = (header['status'] == 'ok')
514 success = (header['status'] == 'ok')
515 msg_id = parent['msg_id']
515 msg_id = parent['msg_id']
516 retries = self.retries[msg_id]
516 retries = self.retries[msg_id]
517 if not success and retries > 0:
517 if not success and retries > 0:
518 # failed
518 # failed
519 self.retries[msg_id] = retries - 1
519 self.retries[msg_id] = retries - 1
520 self.handle_unmet_dependency(idents, parent)
520 self.handle_unmet_dependency(idents, parent)
521 else:
521 else:
522 del self.retries[msg_id]
522 del self.retries[msg_id]
523 # relay to client and update graph
523 # relay to client and update graph
524 self.handle_result(idents, parent, raw_msg, success)
524 self.handle_result(idents, parent, raw_msg, success)
525 # send to Hub monitor
525 # send to Hub monitor
526 self.mon_stream.send_multipart(['outtask']+raw_msg, copy=False)
526 self.mon_stream.send_multipart(['outtask']+raw_msg, copy=False)
527 else:
527 else:
528 self.handle_unmet_dependency(idents, parent)
528 self.handle_unmet_dependency(idents, parent)
529
529
530 @logged
530 @logged
531 def handle_result(self, idents, parent, raw_msg, success=True):
531 def handle_result(self, idents, parent, raw_msg, success=True):
532 """handle a real task result, either success or failure"""
532 """handle a real task result, either success or failure"""
533 # first, relay result to client
533 # first, relay result to client
534 engine = idents[0]
534 engine = idents[0]
535 client = idents[1]
535 client = idents[1]
536 # swap_ids for XREP-XREP mirror
536 # swap_ids for XREP-XREP mirror
537 raw_msg[:2] = [client,engine]
537 raw_msg[:2] = [client,engine]
538 # print (map(str, raw_msg[:4]))
538 # print (map(str, raw_msg[:4]))
539 self.client_stream.send_multipart(raw_msg, copy=False)
539 self.client_stream.send_multipart(raw_msg, copy=False)
540 # now, update our data structures
540 # now, update our data structures
541 msg_id = parent['msg_id']
541 msg_id = parent['msg_id']
542 self.blacklist.pop(msg_id, None)
542 self.blacklist.pop(msg_id, None)
543 self.pending[engine].pop(msg_id)
543 self.pending[engine].pop(msg_id)
544 if success:
544 if success:
545 self.completed[engine].add(msg_id)
545 self.completed[engine].add(msg_id)
546 self.all_completed.add(msg_id)
546 self.all_completed.add(msg_id)
547 else:
547 else:
548 self.failed[engine].add(msg_id)
548 self.failed[engine].add(msg_id)
549 self.all_failed.add(msg_id)
549 self.all_failed.add(msg_id)
550 self.all_done.add(msg_id)
550 self.all_done.add(msg_id)
551 self.destinations[msg_id] = engine
551 self.destinations[msg_id] = engine
552
552
553 self.update_graph(msg_id, success)
553 self.update_graph(msg_id, success)
554
554
555 @logged
555 @logged
556 def handle_unmet_dependency(self, idents, parent):
556 def handle_unmet_dependency(self, idents, parent):
557 """handle an unmet dependency"""
557 """handle an unmet dependency"""
558 engine = idents[0]
558 engine = idents[0]
559 msg_id = parent['msg_id']
559 msg_id = parent['msg_id']
560
560
561 if msg_id not in self.blacklist:
561 if msg_id not in self.blacklist:
562 self.blacklist[msg_id] = set()
562 self.blacklist[msg_id] = set()
563 self.blacklist[msg_id].add(engine)
563 self.blacklist[msg_id].add(engine)
564
564
565 args = self.pending[engine].pop(msg_id)
565 args = self.pending[engine].pop(msg_id)
566 raw,targets,after,follow,timeout = args
566 raw,targets,after,follow,timeout = args
567
567
568 if self.blacklist[msg_id] == targets:
568 if self.blacklist[msg_id] == targets:
569 self.depending[msg_id] = args
569 self.depending[msg_id] = args
570 self.fail_unreachable(msg_id)
570 self.fail_unreachable(msg_id)
571 elif not self.maybe_run(msg_id, *args):
571 elif not self.maybe_run(msg_id, *args):
572 # resubmit failed
572 # resubmit failed
573 if msg_id not in self.all_failed:
573 if msg_id not in self.all_failed:
574 # put it back in our dependency tree
574 # put it back in our dependency tree
575 self.save_unmet(msg_id, *args)
575 self.save_unmet(msg_id, *args)
576
576
577 if self.hwm:
577 if self.hwm:
578 try:
578 try:
579 idx = self.targets.index(engine)
579 idx = self.targets.index(engine)
580 except ValueError:
580 except ValueError:
581 pass # skip load-update for dead engines
581 pass # skip load-update for dead engines
582 else:
582 else:
583 if self.loads[idx] == self.hwm-1:
583 if self.loads[idx] == self.hwm-1:
584 self.update_graph(None)
584 self.update_graph(None)
585
585
586
586
587
587
588 @logged
588 @logged
589 def update_graph(self, dep_id=None, success=True):
589 def update_graph(self, dep_id=None, success=True):
590 """dep_id just finished. Update our dependency
590 """dep_id just finished. Update our dependency
591 graph and submit any jobs that just became runable.
591 graph and submit any jobs that just became runable.
592
592
593 Called with dep_id=None to update entire graph for hwm, but without finishing
593 Called with dep_id=None to update entire graph for hwm, but without finishing
594 a task.
594 a task.
595 """
595 """
596 # print ("\n\n***********")
596 # print ("\n\n***********")
597 # pprint (dep_id)
597 # pprint (dep_id)
598 # pprint (self.graph)
598 # pprint (self.graph)
599 # pprint (self.depending)
599 # pprint (self.depending)
600 # pprint (self.all_completed)
600 # pprint (self.all_completed)
601 # pprint (self.all_failed)
601 # pprint (self.all_failed)
602 # print ("\n\n***********\n\n")
602 # print ("\n\n***********\n\n")
603 # update any jobs that depended on the dependency
603 # update any jobs that depended on the dependency
604 jobs = self.graph.pop(dep_id, [])
604 jobs = self.graph.pop(dep_id, [])
605
605
606 # recheck *all* jobs if
606 # recheck *all* jobs if
607 # a) we have HWM and an engine just become no longer full
607 # a) we have HWM and an engine just become no longer full
608 # or b) dep_id was given as None
608 # or b) dep_id was given as None
609 if dep_id is None or self.hwm and any( [ load==self.hwm-1 for load in self.loads ]):
609 if dep_id is None or self.hwm and any( [ load==self.hwm-1 for load in self.loads ]):
610 jobs = self.depending.keys()
610 jobs = self.depending.keys()
611
611
612 for msg_id in jobs:
612 for msg_id in jobs:
613 raw_msg, targets, after, follow, timeout = self.depending[msg_id]
613 raw_msg, targets, after, follow, timeout = self.depending[msg_id]
614
614
615 if after.unreachable(self.all_completed, self.all_failed) or follow.unreachable(self.all_completed, self.all_failed):
615 if after.unreachable(self.all_completed, self.all_failed)\
616 or follow.unreachable(self.all_completed, self.all_failed):
616 self.fail_unreachable(msg_id)
617 self.fail_unreachable(msg_id)
617
618
618 elif after.check(self.all_completed, self.all_failed): # time deps met, maybe run
619 elif after.check(self.all_completed, self.all_failed): # time deps met, maybe run
619 if self.maybe_run(msg_id, raw_msg, targets, MET, follow, timeout):
620 if self.maybe_run(msg_id, raw_msg, targets, MET, follow, timeout):
620
621
621 self.depending.pop(msg_id)
622 self.depending.pop(msg_id)
622 for mid in follow.union(after):
623 for mid in follow.union(after):
623 if mid in self.graph:
624 if mid in self.graph:
624 self.graph[mid].remove(msg_id)
625 self.graph[mid].remove(msg_id)
625
626
626 #----------------------------------------------------------------------
627 #----------------------------------------------------------------------
627 # methods to be overridden by subclasses
628 # methods to be overridden by subclasses
628 #----------------------------------------------------------------------
629 #----------------------------------------------------------------------
629
630
630 def add_job(self, idx):
631 def add_job(self, idx):
631 """Called after self.targets[idx] just got the job with header.
632 """Called after self.targets[idx] just got the job with header.
632 Override with subclasses. The default ordering is simple LRU.
633 Override with subclasses. The default ordering is simple LRU.
633 The default loads are the number of outstanding jobs."""
634 The default loads are the number of outstanding jobs."""
634 self.loads[idx] += 1
635 self.loads[idx] += 1
635 for lis in (self.targets, self.loads):
636 for lis in (self.targets, self.loads):
636 lis.append(lis.pop(idx))
637 lis.append(lis.pop(idx))
637
638
638
639
639 def finish_job(self, idx):
640 def finish_job(self, idx):
640 """Called after self.targets[idx] just finished a job.
641 """Called after self.targets[idx] just finished a job.
641 Override with subclasses."""
642 Override with subclasses."""
642 self.loads[idx] -= 1
643 self.loads[idx] -= 1
643
644
644
645
645
646
646 def launch_scheduler(in_addr, out_addr, mon_addr, not_addr, config=None,logname='ZMQ',
647 def launch_scheduler(in_addr, out_addr, mon_addr, not_addr, config=None,
647 log_url=None, loglevel=logging.DEBUG,
648 logname='root', log_url=None, loglevel=logging.DEBUG,
648 identity=b'task'):
649 identity=b'task'):
649 from zmq.eventloop import ioloop
650 from zmq.eventloop import ioloop
650 from zmq.eventloop.zmqstream import ZMQStream
651 from zmq.eventloop.zmqstream import ZMQStream
651
652
652 if config:
653 if config:
653 # unwrap dict back into Config
654 # unwrap dict back into Config
654 config = Config(config)
655 config = Config(config)
655
656
656 ctx = zmq.Context()
657 ctx = zmq.Context()
657 loop = ioloop.IOLoop()
658 loop = ioloop.IOLoop()
658 ins = ZMQStream(ctx.socket(zmq.XREP),loop)
659 ins = ZMQStream(ctx.socket(zmq.XREP),loop)
659 ins.setsockopt(zmq.IDENTITY, identity)
660 ins.setsockopt(zmq.IDENTITY, identity)
660 ins.bind(in_addr)
661 ins.bind(in_addr)
661
662
662 outs = ZMQStream(ctx.socket(zmq.XREP),loop)
663 outs = ZMQStream(ctx.socket(zmq.XREP),loop)
663 outs.setsockopt(zmq.IDENTITY, identity)
664 outs.setsockopt(zmq.IDENTITY, identity)
664 outs.bind(out_addr)
665 outs.bind(out_addr)
665 mons = ZMQStream(ctx.socket(zmq.PUB),loop)
666 mons = ZMQStream(ctx.socket(zmq.PUB),loop)
666 mons.connect(mon_addr)
667 mons.connect(mon_addr)
667 nots = ZMQStream(ctx.socket(zmq.SUB),loop)
668 nots = ZMQStream(ctx.socket(zmq.SUB),loop)
668 nots.setsockopt(zmq.SUBSCRIBE, '')
669 nots.setsockopt(zmq.SUBSCRIBE, '')
669 nots.connect(not_addr)
670 nots.connect(not_addr)
670
671
671 # setup logging. Note that these will not work in-process, because they clobber
672 # setup logging. Note that these will not work in-process, because they clobber
672 # existing loggers.
673 # existing loggers.
673 if log_url:
674 if log_url:
674 connect_logger(logname, ctx, log_url, root="scheduler", loglevel=loglevel)
675 log = connect_logger(logname, ctx, log_url, root="scheduler", loglevel=loglevel)
675 else:
676 else:
676 local_logger(logname, loglevel)
677 log = local_logger(logname, loglevel)
677
678
678 scheduler = TaskScheduler(client_stream=ins, engine_stream=outs,
679 scheduler = TaskScheduler(client_stream=ins, engine_stream=outs,
679 mon_stream=mons, notifier_stream=nots,
680 mon_stream=mons, notifier_stream=nots,
680 loop=loop, logname=logname,
681 loop=loop, log=log,
681 config=config)
682 config=config)
682 scheduler.start()
683 scheduler.start()
683 try:
684 try:
684 loop.start()
685 loop.start()
685 except KeyboardInterrupt:
686 except KeyboardInterrupt:
686 print ("interrupted, exiting...", file=sys.__stderr__)
687 print ("interrupted, exiting...", file=sys.__stderr__)
687
688
@@ -1,99 +1,72 b''
1 """Base config factories."""
1 """Base config factories."""
2
2
3 #-----------------------------------------------------------------------------
3 #-----------------------------------------------------------------------------
4 # Copyright (C) 2008-2009 The IPython Development Team
4 # Copyright (C) 2010-2011 The IPython Development Team
5 #
5 #
6 # Distributed under the terms of the BSD License. The full license is in
6 # Distributed under the terms of the BSD License. The full license is in
7 # the file COPYING, distributed as part of this software.
7 # the file COPYING, distributed as part of this software.
8 #-----------------------------------------------------------------------------
8 #-----------------------------------------------------------------------------
9
9
10 #-----------------------------------------------------------------------------
10 #-----------------------------------------------------------------------------
11 # Imports
11 # Imports
12 #-----------------------------------------------------------------------------
12 #-----------------------------------------------------------------------------
13
13
14
14
15 import logging
15 import logging
16 import os
16 import os
17
17
18 import zmq
18 import zmq
19 from zmq.eventloop.ioloop import IOLoop
19 from zmq.eventloop.ioloop import IOLoop
20
20
21 from IPython.config.configurable import Configurable
21 from IPython.config.configurable import Configurable
22 from IPython.utils.traitlets import Int, Instance, Unicode
22 from IPython.utils.traitlets import Int, Instance, Unicode
23
23
24 from IPython.parallel.util import select_random_ports
24 from IPython.parallel.util import select_random_ports
25 from IPython.zmq.session import Session
25 from IPython.zmq.session import Session, SessionFactory
26
26
27 #-----------------------------------------------------------------------------
27 #-----------------------------------------------------------------------------
28 # Classes
28 # Classes
29 #-----------------------------------------------------------------------------
29 #-----------------------------------------------------------------------------
30 class LoggingFactory(Configurable):
31 """A most basic class, that has a `log` (type:`Logger`) attribute, set via a `logname` Trait."""
32 log = Instance('logging.Logger', ('ZMQ', logging.WARN))
33 logname = Unicode('ZMQ')
34 def _logname_changed(self, name, old, new):
35 self.log = logging.getLogger(new)
36
37
30
38 class SessionFactory(LoggingFactory):
39 """The Base factory from which every factory in IPython.parallel inherits"""
40
41 # not configurable:
42 context = Instance('zmq.Context')
43 def _context_default(self):
44 return zmq.Context.instance()
45
46 session = Instance('IPython.zmq.session.Session')
47 loop = Instance('zmq.eventloop.ioloop.IOLoop', allow_none=False)
48 def _loop_default(self):
49 return IOLoop.instance()
50
51
52 def __init__(self, **kwargs):
53 super(SessionFactory, self).__init__(**kwargs)
54
55 # construct the session
56 self.session = Session(**kwargs)
57
58
31
59 class RegistrationFactory(SessionFactory):
32 class RegistrationFactory(SessionFactory):
60 """The Base Configurable for objects that involve registration."""
33 """The Base Configurable for objects that involve registration."""
61
34
62 url = Unicode('', config=True,
35 url = Unicode('', config=True,
63 help="""The 0MQ url used for registration. This sets transport, ip, and port
36 help="""The 0MQ url used for registration. This sets transport, ip, and port
64 in one variable. For example: url='tcp://127.0.0.1:12345' or
37 in one variable. For example: url='tcp://127.0.0.1:12345' or
65 url='epgm://*:90210'""") # url takes precedence over ip,regport,transport
38 url='epgm://*:90210'""") # url takes precedence over ip,regport,transport
66 transport = Unicode('tcp', config=True,
39 transport = Unicode('tcp', config=True,
67 help="""The 0MQ transport for communications. This will likely be
40 help="""The 0MQ transport for communications. This will likely be
68 the default of 'tcp', but other values include 'ipc', 'epgm', 'inproc'.""")
41 the default of 'tcp', but other values include 'ipc', 'epgm', 'inproc'.""")
69 ip = Unicode('127.0.0.1', config=True,
42 ip = Unicode('127.0.0.1', config=True,
70 help="""The IP address for registration. This is generally either
43 help="""The IP address for registration. This is generally either
71 '127.0.0.1' for loopback only or '*' for all interfaces.
44 '127.0.0.1' for loopback only or '*' for all interfaces.
72 [default: '127.0.0.1']""")
45 [default: '127.0.0.1']""")
73 regport = Int(config=True,
46 regport = Int(config=True,
74 help="""The port on which the Hub listens for registration.""")
47 help="""The port on which the Hub listens for registration.""")
75 def _regport_default(self):
48 def _regport_default(self):
76 return select_random_ports(1)[0]
49 return select_random_ports(1)[0]
77
50
78 def __init__(self, **kwargs):
51 def __init__(self, **kwargs):
79 super(RegistrationFactory, self).__init__(**kwargs)
52 super(RegistrationFactory, self).__init__(**kwargs)
80 self._propagate_url()
53 self._propagate_url()
81 self._rebuild_url()
54 self._rebuild_url()
82 self.on_trait_change(self._propagate_url, 'url')
55 self.on_trait_change(self._propagate_url, 'url')
83 self.on_trait_change(self._rebuild_url, 'ip')
56 self.on_trait_change(self._rebuild_url, 'ip')
84 self.on_trait_change(self._rebuild_url, 'transport')
57 self.on_trait_change(self._rebuild_url, 'transport')
85 self.on_trait_change(self._rebuild_url, 'regport')
58 self.on_trait_change(self._rebuild_url, 'regport')
86
59
87 def _rebuild_url(self):
60 def _rebuild_url(self):
88 self.url = "%s://%s:%i"%(self.transport, self.ip, self.regport)
61 self.url = "%s://%s:%i"%(self.transport, self.ip, self.regport)
89
62
90 def _propagate_url(self):
63 def _propagate_url(self):
91 """Ensure self.url contains full transport://interface:port"""
64 """Ensure self.url contains full transport://interface:port"""
92 if self.url:
65 if self.url:
93 iface = self.url.split('://',1)
66 iface = self.url.split('://',1)
94 if len(iface) == 2:
67 if len(iface) == 2:
95 self.transport,iface = iface
68 self.transport,iface = iface
96 iface = iface.split(':')
69 iface = iface.split(':')
97 self.ip = iface[0]
70 self.ip = iface[0]
98 if iface[1]:
71 if iface[1]:
99 self.regport = int(iface[1])
72 self.regport = int(iface[1])
@@ -1,466 +1,468 b''
1 """some generic utilities for dealing with classes, urls, and serialization"""
1 """some generic utilities for dealing with classes, urls, and serialization"""
2 #-----------------------------------------------------------------------------
2 #-----------------------------------------------------------------------------
3 # Copyright (C) 2010-2011 The IPython Development Team
3 # Copyright (C) 2010-2011 The IPython Development Team
4 #
4 #
5 # Distributed under the terms of the BSD License. The full license is in
5 # Distributed under the terms of the BSD License. The full license is in
6 # the file COPYING, distributed as part of this software.
6 # the file COPYING, distributed as part of this software.
7 #-----------------------------------------------------------------------------
7 #-----------------------------------------------------------------------------
8
8
9 #-----------------------------------------------------------------------------
9 #-----------------------------------------------------------------------------
10 # Imports
10 # Imports
11 #-----------------------------------------------------------------------------
11 #-----------------------------------------------------------------------------
12
12
13 # Standard library imports.
13 # Standard library imports.
14 import logging
14 import logging
15 import os
15 import os
16 import re
16 import re
17 import stat
17 import stat
18 import socket
18 import socket
19 import sys
19 import sys
20 from signal import signal, SIGINT, SIGABRT, SIGTERM
20 from signal import signal, SIGINT, SIGABRT, SIGTERM
21 try:
21 try:
22 from signal import SIGKILL
22 from signal import SIGKILL
23 except ImportError:
23 except ImportError:
24 SIGKILL=None
24 SIGKILL=None
25
25
26 try:
26 try:
27 import cPickle
27 import cPickle
28 pickle = cPickle
28 pickle = cPickle
29 except:
29 except:
30 cPickle = None
30 cPickle = None
31 import pickle
31 import pickle
32
32
33 # System library imports
33 # System library imports
34 import zmq
34 import zmq
35 from zmq.log import handlers
35 from zmq.log import handlers
36
36
37 # IPython imports
37 # IPython imports
38 from IPython.utils.pickleutil import can, uncan, canSequence, uncanSequence
38 from IPython.utils.pickleutil import can, uncan, canSequence, uncanSequence
39 from IPython.utils.newserialized import serialize, unserialize
39 from IPython.utils.newserialized import serialize, unserialize
40 from IPython.zmq.log import EnginePUBHandler
40 from IPython.zmq.log import EnginePUBHandler
41
41
42 #-----------------------------------------------------------------------------
42 #-----------------------------------------------------------------------------
43 # Classes
43 # Classes
44 #-----------------------------------------------------------------------------
44 #-----------------------------------------------------------------------------
45
45
46 class Namespace(dict):
46 class Namespace(dict):
47 """Subclass of dict for attribute access to keys."""
47 """Subclass of dict for attribute access to keys."""
48
48
49 def __getattr__(self, key):
49 def __getattr__(self, key):
50 """getattr aliased to getitem"""
50 """getattr aliased to getitem"""
51 if key in self.iterkeys():
51 if key in self.iterkeys():
52 return self[key]
52 return self[key]
53 else:
53 else:
54 raise NameError(key)
54 raise NameError(key)
55
55
56 def __setattr__(self, key, value):
56 def __setattr__(self, key, value):
57 """setattr aliased to setitem, with strict"""
57 """setattr aliased to setitem, with strict"""
58 if hasattr(dict, key):
58 if hasattr(dict, key):
59 raise KeyError("Cannot override dict keys %r"%key)
59 raise KeyError("Cannot override dict keys %r"%key)
60 self[key] = value
60 self[key] = value
61
61
62
62
63 class ReverseDict(dict):
63 class ReverseDict(dict):
64 """simple double-keyed subset of dict methods."""
64 """simple double-keyed subset of dict methods."""
65
65
66 def __init__(self, *args, **kwargs):
66 def __init__(self, *args, **kwargs):
67 dict.__init__(self, *args, **kwargs)
67 dict.__init__(self, *args, **kwargs)
68 self._reverse = dict()
68 self._reverse = dict()
69 for key, value in self.iteritems():
69 for key, value in self.iteritems():
70 self._reverse[value] = key
70 self._reverse[value] = key
71
71
72 def __getitem__(self, key):
72 def __getitem__(self, key):
73 try:
73 try:
74 return dict.__getitem__(self, key)
74 return dict.__getitem__(self, key)
75 except KeyError:
75 except KeyError:
76 return self._reverse[key]
76 return self._reverse[key]
77
77
78 def __setitem__(self, key, value):
78 def __setitem__(self, key, value):
79 if key in self._reverse:
79 if key in self._reverse:
80 raise KeyError("Can't have key %r on both sides!"%key)
80 raise KeyError("Can't have key %r on both sides!"%key)
81 dict.__setitem__(self, key, value)
81 dict.__setitem__(self, key, value)
82 self._reverse[value] = key
82 self._reverse[value] = key
83
83
84 def pop(self, key):
84 def pop(self, key):
85 value = dict.pop(self, key)
85 value = dict.pop(self, key)
86 self._reverse.pop(value)
86 self._reverse.pop(value)
87 return value
87 return value
88
88
89 def get(self, key, default=None):
89 def get(self, key, default=None):
90 try:
90 try:
91 return self[key]
91 return self[key]
92 except KeyError:
92 except KeyError:
93 return default
93 return default
94
94
95 #-----------------------------------------------------------------------------
95 #-----------------------------------------------------------------------------
96 # Functions
96 # Functions
97 #-----------------------------------------------------------------------------
97 #-----------------------------------------------------------------------------
98
98
99 def validate_url(url):
99 def validate_url(url):
100 """validate a url for zeromq"""
100 """validate a url for zeromq"""
101 if not isinstance(url, basestring):
101 if not isinstance(url, basestring):
102 raise TypeError("url must be a string, not %r"%type(url))
102 raise TypeError("url must be a string, not %r"%type(url))
103 url = url.lower()
103 url = url.lower()
104
104
105 proto_addr = url.split('://')
105 proto_addr = url.split('://')
106 assert len(proto_addr) == 2, 'Invalid url: %r'%url
106 assert len(proto_addr) == 2, 'Invalid url: %r'%url
107 proto, addr = proto_addr
107 proto, addr = proto_addr
108 assert proto in ['tcp','pgm','epgm','ipc','inproc'], "Invalid protocol: %r"%proto
108 assert proto in ['tcp','pgm','epgm','ipc','inproc'], "Invalid protocol: %r"%proto
109
109
110 # domain pattern adapted from http://www.regexlib.com/REDetails.aspx?regexp_id=391
110 # domain pattern adapted from http://www.regexlib.com/REDetails.aspx?regexp_id=391
111 # author: Remi Sabourin
111 # author: Remi Sabourin
112 pat = re.compile(r'^([\w\d]([\w\d\-]{0,61}[\w\d])?\.)*[\w\d]([\w\d\-]{0,61}[\w\d])?$')
112 pat = re.compile(r'^([\w\d]([\w\d\-]{0,61}[\w\d])?\.)*[\w\d]([\w\d\-]{0,61}[\w\d])?$')
113
113
114 if proto == 'tcp':
114 if proto == 'tcp':
115 lis = addr.split(':')
115 lis = addr.split(':')
116 assert len(lis) == 2, 'Invalid url: %r'%url
116 assert len(lis) == 2, 'Invalid url: %r'%url
117 addr,s_port = lis
117 addr,s_port = lis
118 try:
118 try:
119 port = int(s_port)
119 port = int(s_port)
120 except ValueError:
120 except ValueError:
121 raise AssertionError("Invalid port %r in url: %r"%(port, url))
121 raise AssertionError("Invalid port %r in url: %r"%(port, url))
122
122
123 assert addr == '*' or pat.match(addr) is not None, 'Invalid url: %r'%url
123 assert addr == '*' or pat.match(addr) is not None, 'Invalid url: %r'%url
124
124
125 else:
125 else:
126 # only validate tcp urls currently
126 # only validate tcp urls currently
127 pass
127 pass
128
128
129 return True
129 return True
130
130
131
131
132 def validate_url_container(container):
132 def validate_url_container(container):
133 """validate a potentially nested collection of urls."""
133 """validate a potentially nested collection of urls."""
134 if isinstance(container, basestring):
134 if isinstance(container, basestring):
135 url = container
135 url = container
136 return validate_url(url)
136 return validate_url(url)
137 elif isinstance(container, dict):
137 elif isinstance(container, dict):
138 container = container.itervalues()
138 container = container.itervalues()
139
139
140 for element in container:
140 for element in container:
141 validate_url_container(element)
141 validate_url_container(element)
142
142
143
143
144 def split_url(url):
144 def split_url(url):
145 """split a zmq url (tcp://ip:port) into ('tcp','ip','port')."""
145 """split a zmq url (tcp://ip:port) into ('tcp','ip','port')."""
146 proto_addr = url.split('://')
146 proto_addr = url.split('://')
147 assert len(proto_addr) == 2, 'Invalid url: %r'%url
147 assert len(proto_addr) == 2, 'Invalid url: %r'%url
148 proto, addr = proto_addr
148 proto, addr = proto_addr
149 lis = addr.split(':')
149 lis = addr.split(':')
150 assert len(lis) == 2, 'Invalid url: %r'%url
150 assert len(lis) == 2, 'Invalid url: %r'%url
151 addr,s_port = lis
151 addr,s_port = lis
152 return proto,addr,s_port
152 return proto,addr,s_port
153
153
154 def disambiguate_ip_address(ip, location=None):
154 def disambiguate_ip_address(ip, location=None):
155 """turn multi-ip interfaces '0.0.0.0' and '*' into connectable
155 """turn multi-ip interfaces '0.0.0.0' and '*' into connectable
156 ones, based on the location (default interpretation of location is localhost)."""
156 ones, based on the location (default interpretation of location is localhost)."""
157 if ip in ('0.0.0.0', '*'):
157 if ip in ('0.0.0.0', '*'):
158 external_ips = socket.gethostbyname_ex(socket.gethostname())[2]
158 external_ips = socket.gethostbyname_ex(socket.gethostname())[2]
159 if location is None or location in external_ips:
159 if location is None or location in external_ips:
160 ip='127.0.0.1'
160 ip='127.0.0.1'
161 elif location:
161 elif location:
162 return location
162 return location
163 return ip
163 return ip
164
164
165 def disambiguate_url(url, location=None):
165 def disambiguate_url(url, location=None):
166 """turn multi-ip interfaces '0.0.0.0' and '*' into connectable
166 """turn multi-ip interfaces '0.0.0.0' and '*' into connectable
167 ones, based on the location (default interpretation is localhost).
167 ones, based on the location (default interpretation is localhost).
168
168
169 This is for zeromq urls, such as tcp://*:10101."""
169 This is for zeromq urls, such as tcp://*:10101."""
170 try:
170 try:
171 proto,ip,port = split_url(url)
171 proto,ip,port = split_url(url)
172 except AssertionError:
172 except AssertionError:
173 # probably not tcp url; could be ipc, etc.
173 # probably not tcp url; could be ipc, etc.
174 return url
174 return url
175
175
176 ip = disambiguate_ip_address(ip,location)
176 ip = disambiguate_ip_address(ip,location)
177
177
178 return "%s://%s:%s"%(proto,ip,port)
178 return "%s://%s:%s"%(proto,ip,port)
179
179
180
180
181 def rekey(dikt):
181 def rekey(dikt):
182 """Rekey a dict that has been forced to use str keys where there should be
182 """Rekey a dict that has been forced to use str keys where there should be
183 ints by json. This belongs in the jsonutil added by fperez."""
183 ints by json. This belongs in the jsonutil added by fperez."""
184 for k in dikt.iterkeys():
184 for k in dikt.iterkeys():
185 if isinstance(k, str):
185 if isinstance(k, str):
186 ik=fk=None
186 ik=fk=None
187 try:
187 try:
188 ik = int(k)
188 ik = int(k)
189 except ValueError:
189 except ValueError:
190 try:
190 try:
191 fk = float(k)
191 fk = float(k)
192 except ValueError:
192 except ValueError:
193 continue
193 continue
194 if ik is not None:
194 if ik is not None:
195 nk = ik
195 nk = ik
196 else:
196 else:
197 nk = fk
197 nk = fk
198 if nk in dikt:
198 if nk in dikt:
199 raise KeyError("already have key %r"%nk)
199 raise KeyError("already have key %r"%nk)
200 dikt[nk] = dikt.pop(k)
200 dikt[nk] = dikt.pop(k)
201 return dikt
201 return dikt
202
202
203 def serialize_object(obj, threshold=64e-6):
203 def serialize_object(obj, threshold=64e-6):
204 """Serialize an object into a list of sendable buffers.
204 """Serialize an object into a list of sendable buffers.
205
205
206 Parameters
206 Parameters
207 ----------
207 ----------
208
208
209 obj : object
209 obj : object
210 The object to be serialized
210 The object to be serialized
211 threshold : float
211 threshold : float
212 The threshold for not double-pickling the content.
212 The threshold for not double-pickling the content.
213
213
214
214
215 Returns
215 Returns
216 -------
216 -------
217 ('pmd', [bufs]) :
217 ('pmd', [bufs]) :
218 where pmd is the pickled metadata wrapper,
218 where pmd is the pickled metadata wrapper,
219 bufs is a list of data buffers
219 bufs is a list of data buffers
220 """
220 """
221 databuffers = []
221 databuffers = []
222 if isinstance(obj, (list, tuple)):
222 if isinstance(obj, (list, tuple)):
223 clist = canSequence(obj)
223 clist = canSequence(obj)
224 slist = map(serialize, clist)
224 slist = map(serialize, clist)
225 for s in slist:
225 for s in slist:
226 if s.typeDescriptor in ('buffer', 'ndarray') or s.getDataSize() > threshold:
226 if s.typeDescriptor in ('buffer', 'ndarray') or s.getDataSize() > threshold:
227 databuffers.append(s.getData())
227 databuffers.append(s.getData())
228 s.data = None
228 s.data = None
229 return pickle.dumps(slist,-1), databuffers
229 return pickle.dumps(slist,-1), databuffers
230 elif isinstance(obj, dict):
230 elif isinstance(obj, dict):
231 sobj = {}
231 sobj = {}
232 for k in sorted(obj.iterkeys()):
232 for k in sorted(obj.iterkeys()):
233 s = serialize(can(obj[k]))
233 s = serialize(can(obj[k]))
234 if s.typeDescriptor in ('buffer', 'ndarray') or s.getDataSize() > threshold:
234 if s.typeDescriptor in ('buffer', 'ndarray') or s.getDataSize() > threshold:
235 databuffers.append(s.getData())
235 databuffers.append(s.getData())
236 s.data = None
236 s.data = None
237 sobj[k] = s
237 sobj[k] = s
238 return pickle.dumps(sobj,-1),databuffers
238 return pickle.dumps(sobj,-1),databuffers
239 else:
239 else:
240 s = serialize(can(obj))
240 s = serialize(can(obj))
241 if s.typeDescriptor in ('buffer', 'ndarray') or s.getDataSize() > threshold:
241 if s.typeDescriptor in ('buffer', 'ndarray') or s.getDataSize() > threshold:
242 databuffers.append(s.getData())
242 databuffers.append(s.getData())
243 s.data = None
243 s.data = None
244 return pickle.dumps(s,-1),databuffers
244 return pickle.dumps(s,-1),databuffers
245
245
246
246
247 def unserialize_object(bufs):
247 def unserialize_object(bufs):
248 """reconstruct an object serialized by serialize_object from data buffers."""
248 """reconstruct an object serialized by serialize_object from data buffers."""
249 bufs = list(bufs)
249 bufs = list(bufs)
250 sobj = pickle.loads(bufs.pop(0))
250 sobj = pickle.loads(bufs.pop(0))
251 if isinstance(sobj, (list, tuple)):
251 if isinstance(sobj, (list, tuple)):
252 for s in sobj:
252 for s in sobj:
253 if s.data is None:
253 if s.data is None:
254 s.data = bufs.pop(0)
254 s.data = bufs.pop(0)
255 return uncanSequence(map(unserialize, sobj)), bufs
255 return uncanSequence(map(unserialize, sobj)), bufs
256 elif isinstance(sobj, dict):
256 elif isinstance(sobj, dict):
257 newobj = {}
257 newobj = {}
258 for k in sorted(sobj.iterkeys()):
258 for k in sorted(sobj.iterkeys()):
259 s = sobj[k]
259 s = sobj[k]
260 if s.data is None:
260 if s.data is None:
261 s.data = bufs.pop(0)
261 s.data = bufs.pop(0)
262 newobj[k] = uncan(unserialize(s))
262 newobj[k] = uncan(unserialize(s))
263 return newobj, bufs
263 return newobj, bufs
264 else:
264 else:
265 if sobj.data is None:
265 if sobj.data is None:
266 sobj.data = bufs.pop(0)
266 sobj.data = bufs.pop(0)
267 return uncan(unserialize(sobj)), bufs
267 return uncan(unserialize(sobj)), bufs
268
268
269 def pack_apply_message(f, args, kwargs, threshold=64e-6):
269 def pack_apply_message(f, args, kwargs, threshold=64e-6):
270 """pack up a function, args, and kwargs to be sent over the wire
270 """pack up a function, args, and kwargs to be sent over the wire
271 as a series of buffers. Any object whose data is larger than `threshold`
271 as a series of buffers. Any object whose data is larger than `threshold`
272 will not have their data copied (currently only numpy arrays support zero-copy)"""
272 will not have their data copied (currently only numpy arrays support zero-copy)"""
273 msg = [pickle.dumps(can(f),-1)]
273 msg = [pickle.dumps(can(f),-1)]
274 databuffers = [] # for large objects
274 databuffers = [] # for large objects
275 sargs, bufs = serialize_object(args,threshold)
275 sargs, bufs = serialize_object(args,threshold)
276 msg.append(sargs)
276 msg.append(sargs)
277 databuffers.extend(bufs)
277 databuffers.extend(bufs)
278 skwargs, bufs = serialize_object(kwargs,threshold)
278 skwargs, bufs = serialize_object(kwargs,threshold)
279 msg.append(skwargs)
279 msg.append(skwargs)
280 databuffers.extend(bufs)
280 databuffers.extend(bufs)
281 msg.extend(databuffers)
281 msg.extend(databuffers)
282 return msg
282 return msg
283
283
284 def unpack_apply_message(bufs, g=None, copy=True):
284 def unpack_apply_message(bufs, g=None, copy=True):
285 """unpack f,args,kwargs from buffers packed by pack_apply_message()
285 """unpack f,args,kwargs from buffers packed by pack_apply_message()
286 Returns: original f,args,kwargs"""
286 Returns: original f,args,kwargs"""
287 bufs = list(bufs) # allow us to pop
287 bufs = list(bufs) # allow us to pop
288 assert len(bufs) >= 3, "not enough buffers!"
288 assert len(bufs) >= 3, "not enough buffers!"
289 if not copy:
289 if not copy:
290 for i in range(3):
290 for i in range(3):
291 bufs[i] = bufs[i].bytes
291 bufs[i] = bufs[i].bytes
292 cf = pickle.loads(bufs.pop(0))
292 cf = pickle.loads(bufs.pop(0))
293 sargs = list(pickle.loads(bufs.pop(0)))
293 sargs = list(pickle.loads(bufs.pop(0)))
294 skwargs = dict(pickle.loads(bufs.pop(0)))
294 skwargs = dict(pickle.loads(bufs.pop(0)))
295 # print sargs, skwargs
295 # print sargs, skwargs
296 f = uncan(cf, g)
296 f = uncan(cf, g)
297 for sa in sargs:
297 for sa in sargs:
298 if sa.data is None:
298 if sa.data is None:
299 m = bufs.pop(0)
299 m = bufs.pop(0)
300 if sa.getTypeDescriptor() in ('buffer', 'ndarray'):
300 if sa.getTypeDescriptor() in ('buffer', 'ndarray'):
301 # always use a buffer, until memoryviews get sorted out
301 # always use a buffer, until memoryviews get sorted out
302 sa.data = buffer(m)
302 sa.data = buffer(m)
303 # disable memoryview support
303 # disable memoryview support
304 # if copy:
304 # if copy:
305 # sa.data = buffer(m)
305 # sa.data = buffer(m)
306 # else:
306 # else:
307 # sa.data = m.buffer
307 # sa.data = m.buffer
308 else:
308 else:
309 if copy:
309 if copy:
310 sa.data = m
310 sa.data = m
311 else:
311 else:
312 sa.data = m.bytes
312 sa.data = m.bytes
313
313
314 args = uncanSequence(map(unserialize, sargs), g)
314 args = uncanSequence(map(unserialize, sargs), g)
315 kwargs = {}
315 kwargs = {}
316 for k in sorted(skwargs.iterkeys()):
316 for k in sorted(skwargs.iterkeys()):
317 sa = skwargs[k]
317 sa = skwargs[k]
318 if sa.data is None:
318 if sa.data is None:
319 m = bufs.pop(0)
319 m = bufs.pop(0)
320 if sa.getTypeDescriptor() in ('buffer', 'ndarray'):
320 if sa.getTypeDescriptor() in ('buffer', 'ndarray'):
321 # always use a buffer, until memoryviews get sorted out
321 # always use a buffer, until memoryviews get sorted out
322 sa.data = buffer(m)
322 sa.data = buffer(m)
323 # disable memoryview support
323 # disable memoryview support
324 # if copy:
324 # if copy:
325 # sa.data = buffer(m)
325 # sa.data = buffer(m)
326 # else:
326 # else:
327 # sa.data = m.buffer
327 # sa.data = m.buffer
328 else:
328 else:
329 if copy:
329 if copy:
330 sa.data = m
330 sa.data = m
331 else:
331 else:
332 sa.data = m.bytes
332 sa.data = m.bytes
333
333
334 kwargs[k] = uncan(unserialize(sa), g)
334 kwargs[k] = uncan(unserialize(sa), g)
335
335
336 return f,args,kwargs
336 return f,args,kwargs
337
337
338 #--------------------------------------------------------------------------
338 #--------------------------------------------------------------------------
339 # helpers for implementing old MEC API via view.apply
339 # helpers for implementing old MEC API via view.apply
340 #--------------------------------------------------------------------------
340 #--------------------------------------------------------------------------
341
341
342 def interactive(f):
342 def interactive(f):
343 """decorator for making functions appear as interactively defined.
343 """decorator for making functions appear as interactively defined.
344 This results in the function being linked to the user_ns as globals()
344 This results in the function being linked to the user_ns as globals()
345 instead of the module globals().
345 instead of the module globals().
346 """
346 """
347 f.__module__ = '__main__'
347 f.__module__ = '__main__'
348 return f
348 return f
349
349
350 @interactive
350 @interactive
351 def _push(ns):
351 def _push(ns):
352 """helper method for implementing `client.push` via `client.apply`"""
352 """helper method for implementing `client.push` via `client.apply`"""
353 globals().update(ns)
353 globals().update(ns)
354
354
355 @interactive
355 @interactive
356 def _pull(keys):
356 def _pull(keys):
357 """helper method for implementing `client.pull` via `client.apply`"""
357 """helper method for implementing `client.pull` via `client.apply`"""
358 user_ns = globals()
358 user_ns = globals()
359 if isinstance(keys, (list,tuple, set)):
359 if isinstance(keys, (list,tuple, set)):
360 for key in keys:
360 for key in keys:
361 if not user_ns.has_key(key):
361 if not user_ns.has_key(key):
362 raise NameError("name '%s' is not defined"%key)
362 raise NameError("name '%s' is not defined"%key)
363 return map(user_ns.get, keys)
363 return map(user_ns.get, keys)
364 else:
364 else:
365 if not user_ns.has_key(keys):
365 if not user_ns.has_key(keys):
366 raise NameError("name '%s' is not defined"%keys)
366 raise NameError("name '%s' is not defined"%keys)
367 return user_ns.get(keys)
367 return user_ns.get(keys)
368
368
369 @interactive
369 @interactive
370 def _execute(code):
370 def _execute(code):
371 """helper method for implementing `client.execute` via `client.apply`"""
371 """helper method for implementing `client.execute` via `client.apply`"""
372 exec code in globals()
372 exec code in globals()
373
373
374 #--------------------------------------------------------------------------
374 #--------------------------------------------------------------------------
375 # extra process management utilities
375 # extra process management utilities
376 #--------------------------------------------------------------------------
376 #--------------------------------------------------------------------------
377
377
378 _random_ports = set()
378 _random_ports = set()
379
379
380 def select_random_ports(n):
380 def select_random_ports(n):
381 """Selects and return n random ports that are available."""
381 """Selects and return n random ports that are available."""
382 ports = []
382 ports = []
383 for i in xrange(n):
383 for i in xrange(n):
384 sock = socket.socket()
384 sock = socket.socket()
385 sock.bind(('', 0))
385 sock.bind(('', 0))
386 while sock.getsockname()[1] in _random_ports:
386 while sock.getsockname()[1] in _random_ports:
387 sock.close()
387 sock.close()
388 sock = socket.socket()
388 sock = socket.socket()
389 sock.bind(('', 0))
389 sock.bind(('', 0))
390 ports.append(sock)
390 ports.append(sock)
391 for i, sock in enumerate(ports):
391 for i, sock in enumerate(ports):
392 port = sock.getsockname()[1]
392 port = sock.getsockname()[1]
393 sock.close()
393 sock.close()
394 ports[i] = port
394 ports[i] = port
395 _random_ports.add(port)
395 _random_ports.add(port)
396 return ports
396 return ports
397
397
398 def signal_children(children):
398 def signal_children(children):
399 """Relay interupt/term signals to children, for more solid process cleanup."""
399 """Relay interupt/term signals to children, for more solid process cleanup."""
400 def terminate_children(sig, frame):
400 def terminate_children(sig, frame):
401 logging.critical("Got signal %i, terminating children..."%sig)
401 logging.critical("Got signal %i, terminating children..."%sig)
402 for child in children:
402 for child in children:
403 child.terminate()
403 child.terminate()
404
404
405 sys.exit(sig != SIGINT)
405 sys.exit(sig != SIGINT)
406 # sys.exit(sig)
406 # sys.exit(sig)
407 for sig in (SIGINT, SIGABRT, SIGTERM):
407 for sig in (SIGINT, SIGABRT, SIGTERM):
408 signal(sig, terminate_children)
408 signal(sig, terminate_children)
409
409
410 def generate_exec_key(keyfile):
410 def generate_exec_key(keyfile):
411 import uuid
411 import uuid
412 newkey = str(uuid.uuid4())
412 newkey = str(uuid.uuid4())
413 with open(keyfile, 'w') as f:
413 with open(keyfile, 'w') as f:
414 # f.write('ipython-key ')
414 # f.write('ipython-key ')
415 f.write(newkey+'\n')
415 f.write(newkey+'\n')
416 # set user-only RW permissions (0600)
416 # set user-only RW permissions (0600)
417 # this will have no effect on Windows
417 # this will have no effect on Windows
418 os.chmod(keyfile, stat.S_IRUSR|stat.S_IWUSR)
418 os.chmod(keyfile, stat.S_IRUSR|stat.S_IWUSR)
419
419
420
420
421 def integer_loglevel(loglevel):
421 def integer_loglevel(loglevel):
422 try:
422 try:
423 loglevel = int(loglevel)
423 loglevel = int(loglevel)
424 except ValueError:
424 except ValueError:
425 if isinstance(loglevel, str):
425 if isinstance(loglevel, str):
426 loglevel = getattr(logging, loglevel)
426 loglevel = getattr(logging, loglevel)
427 return loglevel
427 return loglevel
428
428
429 def connect_logger(logname, context, iface, root="ip", loglevel=logging.DEBUG):
429 def connect_logger(logname, context, iface, root="ip", loglevel=logging.DEBUG):
430 logger = logging.getLogger(logname)
430 logger = logging.getLogger(logname)
431 if any([isinstance(h, handlers.PUBHandler) for h in logger.handlers]):
431 if any([isinstance(h, handlers.PUBHandler) for h in logger.handlers]):
432 # don't add a second PUBHandler
432 # don't add a second PUBHandler
433 return
433 return
434 loglevel = integer_loglevel(loglevel)
434 loglevel = integer_loglevel(loglevel)
435 lsock = context.socket(zmq.PUB)
435 lsock = context.socket(zmq.PUB)
436 lsock.connect(iface)
436 lsock.connect(iface)
437 handler = handlers.PUBHandler(lsock)
437 handler = handlers.PUBHandler(lsock)
438 handler.setLevel(loglevel)
438 handler.setLevel(loglevel)
439 handler.root_topic = root
439 handler.root_topic = root
440 logger.addHandler(handler)
440 logger.addHandler(handler)
441 logger.setLevel(loglevel)
441 logger.setLevel(loglevel)
442
442
443 def connect_engine_logger(context, iface, engine, loglevel=logging.DEBUG):
443 def connect_engine_logger(context, iface, engine, loglevel=logging.DEBUG):
444 logger = logging.getLogger()
444 logger = logging.getLogger()
445 if any([isinstance(h, handlers.PUBHandler) for h in logger.handlers]):
445 if any([isinstance(h, handlers.PUBHandler) for h in logger.handlers]):
446 # don't add a second PUBHandler
446 # don't add a second PUBHandler
447 return
447 return
448 loglevel = integer_loglevel(loglevel)
448 loglevel = integer_loglevel(loglevel)
449 lsock = context.socket(zmq.PUB)
449 lsock = context.socket(zmq.PUB)
450 lsock.connect(iface)
450 lsock.connect(iface)
451 handler = EnginePUBHandler(engine, lsock)
451 handler = EnginePUBHandler(engine, lsock)
452 handler.setLevel(loglevel)
452 handler.setLevel(loglevel)
453 logger.addHandler(handler)
453 logger.addHandler(handler)
454 logger.setLevel(loglevel)
454 logger.setLevel(loglevel)
455 return logger
455
456
456 def local_logger(logname, loglevel=logging.DEBUG):
457 def local_logger(logname, loglevel=logging.DEBUG):
457 loglevel = integer_loglevel(loglevel)
458 loglevel = integer_loglevel(loglevel)
458 logger = logging.getLogger(logname)
459 logger = logging.getLogger(logname)
459 if any([isinstance(h, logging.StreamHandler) for h in logger.handlers]):
460 if any([isinstance(h, logging.StreamHandler) for h in logger.handlers]):
460 # don't add a second StreamHandler
461 # don't add a second StreamHandler
461 return
462 return
462 handler = logging.StreamHandler()
463 handler = logging.StreamHandler()
463 handler.setLevel(loglevel)
464 handler.setLevel(loglevel)
464 logger.addHandler(handler)
465 logger.addHandler(handler)
465 logger.setLevel(loglevel)
466 logger.setLevel(loglevel)
467 return logger
466
468
@@ -1,479 +1,511 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 """edited session.py to work with streams, and move msg_type to the header
2 """edited session.py to work with streams, and move msg_type to the header
3 """
3 """
4 #-----------------------------------------------------------------------------
4 #-----------------------------------------------------------------------------
5 # Copyright (C) 2010-2011 The IPython Development Team
5 # Copyright (C) 2010-2011 The IPython Development Team
6 #
6 #
7 # Distributed under the terms of the BSD License. The full license is in
7 # Distributed under the terms of the BSD License. The full license is in
8 # the file COPYING, distributed as part of this software.
8 # the file COPYING, distributed as part of this software.
9 #-----------------------------------------------------------------------------
9 #-----------------------------------------------------------------------------
10
10
11 #-----------------------------------------------------------------------------
11 #-----------------------------------------------------------------------------
12 # Imports
12 # Imports
13 #-----------------------------------------------------------------------------
13 #-----------------------------------------------------------------------------
14
14
15 import hmac
15 import hmac
16 import logging
16 import os
17 import os
17 import pprint
18 import pprint
18 import uuid
19 import uuid
19 from datetime import datetime
20 from datetime import datetime
20
21
21 try:
22 try:
22 import cPickle
23 import cPickle
23 pickle = cPickle
24 pickle = cPickle
24 except:
25 except:
25 cPickle = None
26 cPickle = None
26 import pickle
27 import pickle
27
28
28 import zmq
29 import zmq
29 from zmq.utils import jsonapi
30 from zmq.utils import jsonapi
31 from zmq.eventloop.ioloop import IOLoop
30 from zmq.eventloop.zmqstream import ZMQStream
32 from zmq.eventloop.zmqstream import ZMQStream
31
33
32 from IPython.config.configurable import Configurable
34 from IPython.config.configurable import Configurable
33 from IPython.utils.importstring import import_item
35 from IPython.utils.importstring import import_item
34 from IPython.utils.jsonutil import date_default
36 from IPython.utils.jsonutil import date_default
35 from IPython.utils.traitlets import CStr, Unicode, Bool, Any, Instance, Set
37 from IPython.utils.traitlets import CStr, Unicode, Bool, Any, Instance, Set
36
38
37 #-----------------------------------------------------------------------------
39 #-----------------------------------------------------------------------------
38 # utility functions
40 # utility functions
39 #-----------------------------------------------------------------------------
41 #-----------------------------------------------------------------------------
40
42
41 def squash_unicode(obj):
43 def squash_unicode(obj):
42 """coerce unicode back to bytestrings."""
44 """coerce unicode back to bytestrings."""
43 if isinstance(obj,dict):
45 if isinstance(obj,dict):
44 for key in obj.keys():
46 for key in obj.keys():
45 obj[key] = squash_unicode(obj[key])
47 obj[key] = squash_unicode(obj[key])
46 if isinstance(key, unicode):
48 if isinstance(key, unicode):
47 obj[squash_unicode(key)] = obj.pop(key)
49 obj[squash_unicode(key)] = obj.pop(key)
48 elif isinstance(obj, list):
50 elif isinstance(obj, list):
49 for i,v in enumerate(obj):
51 for i,v in enumerate(obj):
50 obj[i] = squash_unicode(v)
52 obj[i] = squash_unicode(v)
51 elif isinstance(obj, unicode):
53 elif isinstance(obj, unicode):
52 obj = obj.encode('utf8')
54 obj = obj.encode('utf8')
53 return obj
55 return obj
54
56
55 #-----------------------------------------------------------------------------
57 #-----------------------------------------------------------------------------
56 # globals and defaults
58 # globals and defaults
57 #-----------------------------------------------------------------------------
59 #-----------------------------------------------------------------------------
58
60
59 _default_key = 'on_unknown' if jsonapi.jsonmod.__name__ == 'jsonlib' else 'default'
61 _default_key = 'on_unknown' if jsonapi.jsonmod.__name__ == 'jsonlib' else 'default'
60 json_packer = lambda obj: jsonapi.dumps(obj, **{_default_key:date_default})
62 json_packer = lambda obj: jsonapi.dumps(obj, **{_default_key:date_default})
61 json_unpacker = lambda s: squash_unicode(jsonapi.loads(s))
63 json_unpacker = lambda s: squash_unicode(jsonapi.loads(s))
62
64
63 pickle_packer = lambda o: pickle.dumps(o,-1)
65 pickle_packer = lambda o: pickle.dumps(o,-1)
64 pickle_unpacker = pickle.loads
66 pickle_unpacker = pickle.loads
65
67
66 default_packer = json_packer
68 default_packer = json_packer
67 default_unpacker = json_unpacker
69 default_unpacker = json_unpacker
68
70
69
71
70 DELIM="<IDS|MSG>"
72 DELIM="<IDS|MSG>"
71
73
72 #-----------------------------------------------------------------------------
74 #-----------------------------------------------------------------------------
73 # Classes
75 # Classes
74 #-----------------------------------------------------------------------------
76 #-----------------------------------------------------------------------------
75
77
78 class SessionFactory(Configurable):
79 """The Base class for configurables that have a Session, Context, logger,
80 and IOLoop.
81 """
82
83 log = Instance('logging.Logger', ('', logging.WARN))
84
85 logname = Unicode('')
86 def _logname_changed(self, name, old, new):
87 self.log = logging.getLogger(new)
88
89 # not configurable:
90 context = Instance('zmq.Context')
91 def _context_default(self):
92 return zmq.Context.instance()
93
94 session = Instance('IPython.zmq.session.Session')
95
96 loop = Instance('zmq.eventloop.ioloop.IOLoop', allow_none=False)
97 def _loop_default(self):
98 return IOLoop.instance()
99
100 def __init__(self, **kwargs):
101 super(SessionFactory, self).__init__(**kwargs)
102
103 if self.session is None:
104 # construct the session
105 self.session = Session(**kwargs)
106
107
76 class Message(object):
108 class Message(object):
77 """A simple message object that maps dict keys to attributes.
109 """A simple message object that maps dict keys to attributes.
78
110
79 A Message can be created from a dict and a dict from a Message instance
111 A Message can be created from a dict and a dict from a Message instance
80 simply by calling dict(msg_obj)."""
112 simply by calling dict(msg_obj)."""
81
113
82 def __init__(self, msg_dict):
114 def __init__(self, msg_dict):
83 dct = self.__dict__
115 dct = self.__dict__
84 for k, v in dict(msg_dict).iteritems():
116 for k, v in dict(msg_dict).iteritems():
85 if isinstance(v, dict):
117 if isinstance(v, dict):
86 v = Message(v)
118 v = Message(v)
87 dct[k] = v
119 dct[k] = v
88
120
89 # Having this iterator lets dict(msg_obj) work out of the box.
121 # Having this iterator lets dict(msg_obj) work out of the box.
90 def __iter__(self):
122 def __iter__(self):
91 return iter(self.__dict__.iteritems())
123 return iter(self.__dict__.iteritems())
92
124
93 def __repr__(self):
125 def __repr__(self):
94 return repr(self.__dict__)
126 return repr(self.__dict__)
95
127
96 def __str__(self):
128 def __str__(self):
97 return pprint.pformat(self.__dict__)
129 return pprint.pformat(self.__dict__)
98
130
99 def __contains__(self, k):
131 def __contains__(self, k):
100 return k in self.__dict__
132 return k in self.__dict__
101
133
102 def __getitem__(self, k):
134 def __getitem__(self, k):
103 return self.__dict__[k]
135 return self.__dict__[k]
104
136
105
137
106 def msg_header(msg_id, msg_type, username, session):
138 def msg_header(msg_id, msg_type, username, session):
107 date=datetime.now()
139 date=datetime.now()
108 return locals()
140 return locals()
109
141
110 def extract_header(msg_or_header):
142 def extract_header(msg_or_header):
111 """Given a message or header, return the header."""
143 """Given a message or header, return the header."""
112 if not msg_or_header:
144 if not msg_or_header:
113 return {}
145 return {}
114 try:
146 try:
115 # See if msg_or_header is the entire message.
147 # See if msg_or_header is the entire message.
116 h = msg_or_header['header']
148 h = msg_or_header['header']
117 except KeyError:
149 except KeyError:
118 try:
150 try:
119 # See if msg_or_header is just the header
151 # See if msg_or_header is just the header
120 h = msg_or_header['msg_id']
152 h = msg_or_header['msg_id']
121 except KeyError:
153 except KeyError:
122 raise
154 raise
123 else:
155 else:
124 h = msg_or_header
156 h = msg_or_header
125 if not isinstance(h, dict):
157 if not isinstance(h, dict):
126 h = dict(h)
158 h = dict(h)
127 return h
159 return h
128
160
129 class Session(Configurable):
161 class Session(Configurable):
130 """tweaked version of IPython.zmq.session.Session, for development in Parallel"""
162 """tweaked version of IPython.zmq.session.Session, for development in Parallel"""
131 debug=Bool(False, config=True, help="""Debug output in the Session""")
163 debug=Bool(False, config=True, help="""Debug output in the Session""")
132 packer = Unicode('json',config=True,
164 packer = Unicode('json',config=True,
133 help="""The name of the packer for serializing messages.
165 help="""The name of the packer for serializing messages.
134 Should be one of 'json', 'pickle', or an import name
166 Should be one of 'json', 'pickle', or an import name
135 for a custom serializer.""")
167 for a custom serializer.""")
136 def _packer_changed(self, name, old, new):
168 def _packer_changed(self, name, old, new):
137 if new.lower() == 'json':
169 if new.lower() == 'json':
138 self.pack = json_packer
170 self.pack = json_packer
139 self.unpack = json_unpacker
171 self.unpack = json_unpacker
140 elif new.lower() == 'pickle':
172 elif new.lower() == 'pickle':
141 self.pack = pickle_packer
173 self.pack = pickle_packer
142 self.unpack = pickle_unpacker
174 self.unpack = pickle_unpacker
143 else:
175 else:
144 self.pack = import_item(new)
176 self.pack = import_item(new)
145
177
146 unpacker = Unicode('json',config=True,
178 unpacker = Unicode('json', config=True,
147 help="""The name of the unpacker for unserializing messages.
179 help="""The name of the unpacker for unserializing messages.
148 Only used with custom functions for `packer`.""")
180 Only used with custom functions for `packer`.""")
149 def _unpacker_changed(self, name, old, new):
181 def _unpacker_changed(self, name, old, new):
150 if new.lower() == 'json':
182 if new.lower() == 'json':
151 self.pack = json_packer
183 self.pack = json_packer
152 self.unpack = json_unpacker
184 self.unpack = json_unpacker
153 elif new.lower() == 'pickle':
185 elif new.lower() == 'pickle':
154 self.pack = pickle_packer
186 self.pack = pickle_packer
155 self.unpack = pickle_unpacker
187 self.unpack = pickle_unpacker
156 else:
188 else:
157 self.unpack = import_item(new)
189 self.unpack = import_item(new)
158
190
159 session = CStr('',config=True,
191 session = CStr('', config=True,
160 help="""The UUID identifying this session.""")
192 help="""The UUID identifying this session.""")
161 def _session_default(self):
193 def _session_default(self):
162 return bytes(uuid.uuid4())
194 return bytes(uuid.uuid4())
163 username = Unicode(os.environ.get('USER','username'), config=True,
195 username = Unicode(os.environ.get('USER','username'), config=True,
164 help="""Username for the Session. Default is your system username.""")
196 help="""Username for the Session. Default is your system username.""")
165
197
166 # message signature related traits:
198 # message signature related traits:
167 key = CStr('', config=True,
199 key = CStr('', config=True,
168 help="""execution key, for extra authentication.""")
200 help="""execution key, for extra authentication.""")
169 def _key_changed(self, name, old, new):
201 def _key_changed(self, name, old, new):
170 if new:
202 if new:
171 self.auth = hmac.HMAC(new)
203 self.auth = hmac.HMAC(new)
172 else:
204 else:
173 self.auth = None
205 self.auth = None
174 auth = Instance(hmac.HMAC)
206 auth = Instance(hmac.HMAC)
175 counters = Instance('collections.defaultdict', (int,))
207 counters = Instance('collections.defaultdict', (int,))
176 digest_history = Set()
208 digest_history = Set()
177
209
178 keyfile = Unicode('', config=True,
210 keyfile = Unicode('', config=True,
179 help="""path to file containing execution key.""")
211 help="""path to file containing execution key.""")
180 def _keyfile_changed(self, name, old, new):
212 def _keyfile_changed(self, name, old, new):
181 with open(new, 'rb') as f:
213 with open(new, 'rb') as f:
182 self.key = f.read().strip()
214 self.key = f.read().strip()
183
215
184 pack = Any(default_packer) # the actual packer function
216 pack = Any(default_packer) # the actual packer function
185 def _pack_changed(self, name, old, new):
217 def _pack_changed(self, name, old, new):
186 if not callable(new):
218 if not callable(new):
187 raise TypeError("packer must be callable, not %s"%type(new))
219 raise TypeError("packer must be callable, not %s"%type(new))
188
220
189 unpack = Any(default_unpacker) # the actual packer function
221 unpack = Any(default_unpacker) # the actual packer function
190 def _unpack_changed(self, name, old, new):
222 def _unpack_changed(self, name, old, new):
191 if not callable(new):
223 if not callable(new):
192 raise TypeError("packer must be callable, not %s"%type(new))
224 raise TypeError("packer must be callable, not %s"%type(new))
193
225
194 def __init__(self, **kwargs):
226 def __init__(self, **kwargs):
195 super(Session, self).__init__(**kwargs)
227 super(Session, self).__init__(**kwargs)
196 self.none = self.pack({})
228 self.none = self.pack({})
197
229
198 @property
230 @property
199 def msg_id(self):
231 def msg_id(self):
200 """always return new uuid"""
232 """always return new uuid"""
201 return str(uuid.uuid4())
233 return str(uuid.uuid4())
202
234
203 def msg_header(self, msg_type):
235 def msg_header(self, msg_type):
204 return msg_header(self.msg_id, msg_type, self.username, self.session)
236 return msg_header(self.msg_id, msg_type, self.username, self.session)
205
237
206 def msg(self, msg_type, content=None, parent=None, subheader=None):
238 def msg(self, msg_type, content=None, parent=None, subheader=None):
207 msg = {}
239 msg = {}
208 msg['header'] = self.msg_header(msg_type)
240 msg['header'] = self.msg_header(msg_type)
209 msg['msg_id'] = msg['header']['msg_id']
241 msg['msg_id'] = msg['header']['msg_id']
210 msg['parent_header'] = {} if parent is None else extract_header(parent)
242 msg['parent_header'] = {} if parent is None else extract_header(parent)
211 msg['msg_type'] = msg_type
243 msg['msg_type'] = msg_type
212 msg['content'] = {} if content is None else content
244 msg['content'] = {} if content is None else content
213 sub = {} if subheader is None else subheader
245 sub = {} if subheader is None else subheader
214 msg['header'].update(sub)
246 msg['header'].update(sub)
215 return msg
247 return msg
216
248
217 def check_key(self, msg_or_header):
249 def check_key(self, msg_or_header):
218 """Check that a message's header has the right key"""
250 """Check that a message's header has the right key"""
219 if not self.key:
251 if not self.key:
220 return True
252 return True
221 header = extract_header(msg_or_header)
253 header = extract_header(msg_or_header)
222 return header.get('key', '') == self.key
254 return header.get('key', '') == self.key
223
255
224 def sign(self, msg):
256 def sign(self, msg):
225 """Sign a message with HMAC digest. If no auth, return b''."""
257 """Sign a message with HMAC digest. If no auth, return b''."""
226 if self.auth is None:
258 if self.auth is None:
227 return b''
259 return b''
228 h = self.auth.copy()
260 h = self.auth.copy()
229 for m in msg:
261 for m in msg:
230 h.update(m)
262 h.update(m)
231 return h.hexdigest()
263 return h.hexdigest()
232
264
233 def serialize(self, msg, ident=None):
265 def serialize(self, msg, ident=None):
234 content = msg.get('content', {})
266 content = msg.get('content', {})
235 if content is None:
267 if content is None:
236 content = self.none
268 content = self.none
237 elif isinstance(content, dict):
269 elif isinstance(content, dict):
238 content = self.pack(content)
270 content = self.pack(content)
239 elif isinstance(content, bytes):
271 elif isinstance(content, bytes):
240 # content is already packed, as in a relayed message
272 # content is already packed, as in a relayed message
241 pass
273 pass
242 elif isinstance(content, unicode):
274 elif isinstance(content, unicode):
243 # should be bytes, but JSON often spits out unicode
275 # should be bytes, but JSON often spits out unicode
244 content = content.encode('utf8')
276 content = content.encode('utf8')
245 else:
277 else:
246 raise TypeError("Content incorrect type: %s"%type(content))
278 raise TypeError("Content incorrect type: %s"%type(content))
247
279
248 real_message = [self.pack(msg['header']),
280 real_message = [self.pack(msg['header']),
249 self.pack(msg['parent_header']),
281 self.pack(msg['parent_header']),
250 content
282 content
251 ]
283 ]
252
284
253 to_send = []
285 to_send = []
254
286
255 if isinstance(ident, list):
287 if isinstance(ident, list):
256 # accept list of idents
288 # accept list of idents
257 to_send.extend(ident)
289 to_send.extend(ident)
258 elif ident is not None:
290 elif ident is not None:
259 to_send.append(ident)
291 to_send.append(ident)
260 to_send.append(DELIM)
292 to_send.append(DELIM)
261
293
262 signature = self.sign(real_message)
294 signature = self.sign(real_message)
263 to_send.append(signature)
295 to_send.append(signature)
264
296
265 to_send.extend(real_message)
297 to_send.extend(real_message)
266
298
267 return to_send
299 return to_send
268
300
269 def send(self, stream, msg_or_type, content=None, parent=None, ident=None,
301 def send(self, stream, msg_or_type, content=None, parent=None, ident=None,
270 buffers=None, subheader=None, track=False):
302 buffers=None, subheader=None, track=False):
271 """Build and send a message via stream or socket.
303 """Build and send a message via stream or socket.
272
304
273 Parameters
305 Parameters
274 ----------
306 ----------
275
307
276 stream : zmq.Socket or ZMQStream
308 stream : zmq.Socket or ZMQStream
277 the socket-like object used to send the data
309 the socket-like object used to send the data
278 msg_or_type : str or Message/dict
310 msg_or_type : str or Message/dict
279 Normally, msg_or_type will be a msg_type unless a message is being sent more
311 Normally, msg_or_type will be a msg_type unless a message is being sent more
280 than once.
312 than once.
281
313
282 content : dict or None
314 content : dict or None
283 the content of the message (ignored if msg_or_type is a message)
315 the content of the message (ignored if msg_or_type is a message)
284 parent : Message or dict or None
316 parent : Message or dict or None
285 the parent or parent header describing the parent of this message
317 the parent or parent header describing the parent of this message
286 ident : bytes or list of bytes
318 ident : bytes or list of bytes
287 the zmq.IDENTITY routing path
319 the zmq.IDENTITY routing path
288 subheader : dict or None
320 subheader : dict or None
289 extra header keys for this message's header
321 extra header keys for this message's header
290 buffers : list or None
322 buffers : list or None
291 the already-serialized buffers to be appended to the message
323 the already-serialized buffers to be appended to the message
292 track : bool
324 track : bool
293 whether to track. Only for use with Sockets,
325 whether to track. Only for use with Sockets,
294 because ZMQStream objects cannot track messages.
326 because ZMQStream objects cannot track messages.
295
327
296 Returns
328 Returns
297 -------
329 -------
298 msg : message dict
330 msg : message dict
299 the constructed message
331 the constructed message
300 (msg,tracker) : (message dict, MessageTracker)
332 (msg,tracker) : (message dict, MessageTracker)
301 if track=True, then a 2-tuple will be returned,
333 if track=True, then a 2-tuple will be returned,
302 the first element being the constructed
334 the first element being the constructed
303 message, and the second being the MessageTracker
335 message, and the second being the MessageTracker
304
336
305 """
337 """
306
338
307 if not isinstance(stream, (zmq.Socket, ZMQStream)):
339 if not isinstance(stream, (zmq.Socket, ZMQStream)):
308 raise TypeError("stream must be Socket or ZMQStream, not %r"%type(stream))
340 raise TypeError("stream must be Socket or ZMQStream, not %r"%type(stream))
309 elif track and isinstance(stream, ZMQStream):
341 elif track and isinstance(stream, ZMQStream):
310 raise TypeError("ZMQStream cannot track messages")
342 raise TypeError("ZMQStream cannot track messages")
311
343
312 if isinstance(msg_or_type, (Message, dict)):
344 if isinstance(msg_or_type, (Message, dict)):
313 # we got a Message, not a msg_type
345 # we got a Message, not a msg_type
314 # don't build a new Message
346 # don't build a new Message
315 msg = msg_or_type
347 msg = msg_or_type
316 else:
348 else:
317 msg = self.msg(msg_or_type, content, parent, subheader)
349 msg = self.msg(msg_or_type, content, parent, subheader)
318
350
319 buffers = [] if buffers is None else buffers
351 buffers = [] if buffers is None else buffers
320 to_send = self.serialize(msg, ident)
352 to_send = self.serialize(msg, ident)
321 flag = 0
353 flag = 0
322 if buffers:
354 if buffers:
323 flag = zmq.SNDMORE
355 flag = zmq.SNDMORE
324 _track = False
356 _track = False
325 else:
357 else:
326 _track=track
358 _track=track
327 if track:
359 if track:
328 tracker = stream.send_multipart(to_send, flag, copy=False, track=_track)
360 tracker = stream.send_multipart(to_send, flag, copy=False, track=_track)
329 else:
361 else:
330 tracker = stream.send_multipart(to_send, flag, copy=False)
362 tracker = stream.send_multipart(to_send, flag, copy=False)
331 for b in buffers[:-1]:
363 for b in buffers[:-1]:
332 stream.send(b, flag, copy=False)
364 stream.send(b, flag, copy=False)
333 if buffers:
365 if buffers:
334 if track:
366 if track:
335 tracker = stream.send(buffers[-1], copy=False, track=track)
367 tracker = stream.send(buffers[-1], copy=False, track=track)
336 else:
368 else:
337 tracker = stream.send(buffers[-1], copy=False)
369 tracker = stream.send(buffers[-1], copy=False)
338
370
339 # omsg = Message(msg)
371 # omsg = Message(msg)
340 if self.debug:
372 if self.debug:
341 pprint.pprint(msg)
373 pprint.pprint(msg)
342 pprint.pprint(to_send)
374 pprint.pprint(to_send)
343 pprint.pprint(buffers)
375 pprint.pprint(buffers)
344
376
345 msg['tracker'] = tracker
377 msg['tracker'] = tracker
346
378
347 return msg
379 return msg
348
380
349 def send_raw(self, stream, msg, flags=0, copy=True, ident=None):
381 def send_raw(self, stream, msg, flags=0, copy=True, ident=None):
350 """Send a raw message via ident path.
382 """Send a raw message via ident path.
351
383
352 Parameters
384 Parameters
353 ----------
385 ----------
354 msg : list of sendable buffers"""
386 msg : list of sendable buffers"""
355 to_send = []
387 to_send = []
356 if isinstance(ident, bytes):
388 if isinstance(ident, bytes):
357 ident = [ident]
389 ident = [ident]
358 if ident is not None:
390 if ident is not None:
359 to_send.extend(ident)
391 to_send.extend(ident)
360
392
361 to_send.append(DELIM)
393 to_send.append(DELIM)
362 to_send.append(self.sign(msg))
394 to_send.append(self.sign(msg))
363 to_send.extend(msg)
395 to_send.extend(msg)
364 stream.send_multipart(msg, flags, copy=copy)
396 stream.send_multipart(msg, flags, copy=copy)
365
397
366 def recv(self, socket, mode=zmq.NOBLOCK, content=True, copy=True):
398 def recv(self, socket, mode=zmq.NOBLOCK, content=True, copy=True):
367 """receives and unpacks a message
399 """receives and unpacks a message
368 returns [idents], msg"""
400 returns [idents], msg"""
369 if isinstance(socket, ZMQStream):
401 if isinstance(socket, ZMQStream):
370 socket = socket.socket
402 socket = socket.socket
371 try:
403 try:
372 msg = socket.recv_multipart(mode)
404 msg = socket.recv_multipart(mode)
373 except zmq.ZMQError as e:
405 except zmq.ZMQError as e:
374 if e.errno == zmq.EAGAIN:
406 if e.errno == zmq.EAGAIN:
375 # We can convert EAGAIN to None as we know in this case
407 # We can convert EAGAIN to None as we know in this case
376 # recv_multipart won't return None.
408 # recv_multipart won't return None.
377 return None,None
409 return None,None
378 else:
410 else:
379 raise
411 raise
380 # return an actual Message object
412 # return an actual Message object
381 # determine the number of idents by trying to unpack them.
413 # determine the number of idents by trying to unpack them.
382 # this is terrible:
414 # this is terrible:
383 idents, msg = self.feed_identities(msg, copy)
415 idents, msg = self.feed_identities(msg, copy)
384 try:
416 try:
385 return idents, self.unpack_message(msg, content=content, copy=copy)
417 return idents, self.unpack_message(msg, content=content, copy=copy)
386 except Exception as e:
418 except Exception as e:
387 print (idents, msg)
419 print (idents, msg)
388 # TODO: handle it
420 # TODO: handle it
389 raise e
421 raise e
390
422
391 def feed_identities(self, msg, copy=True):
423 def feed_identities(self, msg, copy=True):
392 """feed until DELIM is reached, then return the prefix as idents and remainder as
424 """feed until DELIM is reached, then return the prefix as idents and remainder as
393 msg. This is easily broken by setting an IDENT to DELIM, but that would be silly.
425 msg. This is easily broken by setting an IDENT to DELIM, but that would be silly.
394
426
395 Parameters
427 Parameters
396 ----------
428 ----------
397 msg : a list of Message or bytes objects
429 msg : a list of Message or bytes objects
398 the message to be split
430 the message to be split
399 copy : bool
431 copy : bool
400 flag determining whether the arguments are bytes or Messages
432 flag determining whether the arguments are bytes or Messages
401
433
402 Returns
434 Returns
403 -------
435 -------
404 (idents,msg) : two lists
436 (idents,msg) : two lists
405 idents will always be a list of bytes - the indentity prefix
437 idents will always be a list of bytes - the indentity prefix
406 msg will be a list of bytes or Messages, unchanged from input
438 msg will be a list of bytes or Messages, unchanged from input
407 msg should be unpackable via self.unpack_message at this point.
439 msg should be unpackable via self.unpack_message at this point.
408 """
440 """
409 if copy:
441 if copy:
410 idx = msg.index(DELIM)
442 idx = msg.index(DELIM)
411 return msg[:idx], msg[idx+1:]
443 return msg[:idx], msg[idx+1:]
412 else:
444 else:
413 failed = True
445 failed = True
414 for idx,m in enumerate(msg):
446 for idx,m in enumerate(msg):
415 if m.bytes == DELIM:
447 if m.bytes == DELIM:
416 failed = False
448 failed = False
417 break
449 break
418 if failed:
450 if failed:
419 raise ValueError("DELIM not in msg")
451 raise ValueError("DELIM not in msg")
420 idents, msg = msg[:idx], msg[idx+1:]
452 idents, msg = msg[:idx], msg[idx+1:]
421 return [m.bytes for m in idents], msg
453 return [m.bytes for m in idents], msg
422
454
423 def unpack_message(self, msg, content=True, copy=True):
455 def unpack_message(self, msg, content=True, copy=True):
424 """Return a message object from the format
456 """Return a message object from the format
425 sent by self.send.
457 sent by self.send.
426
458
427 Parameters:
459 Parameters:
428 -----------
460 -----------
429
461
430 content : bool (True)
462 content : bool (True)
431 whether to unpack the content dict (True),
463 whether to unpack the content dict (True),
432 or leave it serialized (False)
464 or leave it serialized (False)
433
465
434 copy : bool (True)
466 copy : bool (True)
435 whether to return the bytes (True),
467 whether to return the bytes (True),
436 or the non-copying Message object in each place (False)
468 or the non-copying Message object in each place (False)
437
469
438 """
470 """
439 minlen = 4
471 minlen = 4
440 message = {}
472 message = {}
441 if not copy:
473 if not copy:
442 for i in range(minlen):
474 for i in range(minlen):
443 msg[i] = msg[i].bytes
475 msg[i] = msg[i].bytes
444 if self.auth is not None:
476 if self.auth is not None:
445 signature = msg[0]
477 signature = msg[0]
446 if signature in self.digest_history:
478 if signature in self.digest_history:
447 raise ValueError("Duplicate Signature: %r"%signature)
479 raise ValueError("Duplicate Signature: %r"%signature)
448 self.digest_history.add(signature)
480 self.digest_history.add(signature)
449 check = self.sign(msg[1:4])
481 check = self.sign(msg[1:4])
450 if not signature == check:
482 if not signature == check:
451 raise ValueError("Invalid Signature: %r"%signature)
483 raise ValueError("Invalid Signature: %r"%signature)
452 if not len(msg) >= minlen:
484 if not len(msg) >= minlen:
453 raise TypeError("malformed message, must have at least %i elements"%minlen)
485 raise TypeError("malformed message, must have at least %i elements"%minlen)
454 message['header'] = self.unpack(msg[1])
486 message['header'] = self.unpack(msg[1])
455 message['msg_type'] = message['header']['msg_type']
487 message['msg_type'] = message['header']['msg_type']
456 message['parent_header'] = self.unpack(msg[2])
488 message['parent_header'] = self.unpack(msg[2])
457 if content:
489 if content:
458 message['content'] = self.unpack(msg[3])
490 message['content'] = self.unpack(msg[3])
459 else:
491 else:
460 message['content'] = msg[3]
492 message['content'] = msg[3]
461
493
462 message['buffers'] = msg[4:]
494 message['buffers'] = msg[4:]
463 return message
495 return message
464
496
465 def test_msg2obj():
497 def test_msg2obj():
466 am = dict(x=1)
498 am = dict(x=1)
467 ao = Message(am)
499 ao = Message(am)
468 assert ao.x == am['x']
500 assert ao.x == am['x']
469
501
470 am['y'] = dict(z=1)
502 am['y'] = dict(z=1)
471 ao = Message(am)
503 ao = Message(am)
472 assert ao.y.z == am['y']['z']
504 assert ao.y.z == am['y']['z']
473
505
474 k1, k2 = 'y', 'z'
506 k1, k2 = 'y', 'z'
475 assert ao[k1][k2] == am[k1][k2]
507 assert ao[k1][k2] == am[k1][k2]
476
508
477 am2 = dict(ao)
509 am2 = dict(ao)
478 assert am['x'] == am2['x']
510 assert am['x'] == am2['x']
479 assert am['y']['z'] == am2['y']['z']
511 assert am['y']['z'] == am2['y']['z']
General Comments 0
You need to be logged in to leave comments. Login now