##// END OF EJS Templates
discard parallel.util.asbytes in favor of py3compat.cast_bytes
MinRK -
Show More
@@ -1,330 +1,331 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 # encoding: utf-8
2 # encoding: utf-8
3 """
3 """
4 The IPython engine application
4 The IPython engine application
5
5
6 Authors:
6 Authors:
7
7
8 * Brian Granger
8 * Brian Granger
9 * MinRK
9 * MinRK
10
10
11 """
11 """
12
12
13 #-----------------------------------------------------------------------------
13 #-----------------------------------------------------------------------------
14 # Copyright (C) 2008-2011 The IPython Development Team
14 # Copyright (C) 2008-2011 The IPython Development Team
15 #
15 #
16 # Distributed under the terms of the BSD License. The full license is in
16 # Distributed under the terms of the BSD License. The full license is in
17 # the file COPYING, distributed as part of this software.
17 # the file COPYING, distributed as part of this software.
18 #-----------------------------------------------------------------------------
18 #-----------------------------------------------------------------------------
19
19
20 #-----------------------------------------------------------------------------
20 #-----------------------------------------------------------------------------
21 # Imports
21 # Imports
22 #-----------------------------------------------------------------------------
22 #-----------------------------------------------------------------------------
23
23
24 import json
24 import json
25 import os
25 import os
26 import sys
26 import sys
27 import time
27 import time
28
28
29 import zmq
29 import zmq
30 from zmq.eventloop import ioloop
30 from zmq.eventloop import ioloop
31
31
32 from IPython.core.profiledir import ProfileDir
32 from IPython.core.profiledir import ProfileDir
33 from IPython.parallel.apps.baseapp import (
33 from IPython.parallel.apps.baseapp import (
34 BaseParallelApplication,
34 BaseParallelApplication,
35 base_aliases,
35 base_aliases,
36 base_flags,
36 base_flags,
37 catch_config_error,
37 catch_config_error,
38 )
38 )
39 from IPython.zmq.log import EnginePUBHandler
39 from IPython.zmq.log import EnginePUBHandler
40 from IPython.zmq.ipkernel import Kernel
40 from IPython.zmq.ipkernel import Kernel
41 from IPython.zmq.session import (
41 from IPython.zmq.session import (
42 Session, session_aliases, session_flags
42 Session, session_aliases, session_flags
43 )
43 )
44
44
45 from IPython.config.configurable import Configurable
45 from IPython.config.configurable import Configurable
46
46
47 from IPython.parallel.engine.engine import EngineFactory
47 from IPython.parallel.engine.engine import EngineFactory
48 from IPython.parallel.util import disambiguate_url, asbytes
48 from IPython.parallel.util import disambiguate_url
49
49
50 from IPython.utils.importstring import import_item
50 from IPython.utils.importstring import import_item
51 from IPython.utils.py3compat import cast_bytes
51 from IPython.utils.traitlets import Bool, Unicode, Dict, List, Float
52 from IPython.utils.traitlets import Bool, Unicode, Dict, List, Float
52
53
53
54
54 #-----------------------------------------------------------------------------
55 #-----------------------------------------------------------------------------
55 # Module level variables
56 # Module level variables
56 #-----------------------------------------------------------------------------
57 #-----------------------------------------------------------------------------
57
58
58 #: The default config file name for this application
59 #: The default config file name for this application
59 default_config_file_name = u'ipengine_config.py'
60 default_config_file_name = u'ipengine_config.py'
60
61
61 _description = """Start an IPython engine for parallel computing.
62 _description = """Start an IPython engine for parallel computing.
62
63
63 IPython engines run in parallel and perform computations on behalf of a client
64 IPython engines run in parallel and perform computations on behalf of a client
64 and controller. A controller needs to be started before the engines. The
65 and controller. A controller needs to be started before the engines. The
65 engine can be configured using command line options or using a cluster
66 engine can be configured using command line options or using a cluster
66 directory. Cluster directories contain config, log and security files and are
67 directory. Cluster directories contain config, log and security files and are
67 usually located in your ipython directory and named as "profile_name".
68 usually located in your ipython directory and named as "profile_name".
68 See the `profile` and `profile-dir` options for details.
69 See the `profile` and `profile-dir` options for details.
69 """
70 """
70
71
71 _examples = """
72 _examples = """
72 ipengine --ip=192.168.0.1 --port=1000 # connect to hub at ip and port
73 ipengine --ip=192.168.0.1 --port=1000 # connect to hub at ip and port
73 ipengine --log-to-file --log-level=DEBUG # log to a file with DEBUG verbosity
74 ipengine --log-to-file --log-level=DEBUG # log to a file with DEBUG verbosity
74 """
75 """
75
76
76 #-----------------------------------------------------------------------------
77 #-----------------------------------------------------------------------------
77 # MPI configuration
78 # MPI configuration
78 #-----------------------------------------------------------------------------
79 #-----------------------------------------------------------------------------
79
80
80 mpi4py_init = """from mpi4py import MPI as mpi
81 mpi4py_init = """from mpi4py import MPI as mpi
81 mpi.size = mpi.COMM_WORLD.Get_size()
82 mpi.size = mpi.COMM_WORLD.Get_size()
82 mpi.rank = mpi.COMM_WORLD.Get_rank()
83 mpi.rank = mpi.COMM_WORLD.Get_rank()
83 """
84 """
84
85
85
86
86 pytrilinos_init = """from PyTrilinos import Epetra
87 pytrilinos_init = """from PyTrilinos import Epetra
87 class SimpleStruct:
88 class SimpleStruct:
88 pass
89 pass
89 mpi = SimpleStruct()
90 mpi = SimpleStruct()
90 mpi.rank = 0
91 mpi.rank = 0
91 mpi.size = 0
92 mpi.size = 0
92 """
93 """
93
94
94 class MPI(Configurable):
95 class MPI(Configurable):
95 """Configurable for MPI initialization"""
96 """Configurable for MPI initialization"""
96 use = Unicode('', config=True,
97 use = Unicode('', config=True,
97 help='How to enable MPI (mpi4py, pytrilinos, or empty string to disable).'
98 help='How to enable MPI (mpi4py, pytrilinos, or empty string to disable).'
98 )
99 )
99
100
100 def _use_changed(self, name, old, new):
101 def _use_changed(self, name, old, new):
101 # load default init script if it's not set
102 # load default init script if it's not set
102 if not self.init_script:
103 if not self.init_script:
103 self.init_script = self.default_inits.get(new, '')
104 self.init_script = self.default_inits.get(new, '')
104
105
105 init_script = Unicode('', config=True,
106 init_script = Unicode('', config=True,
106 help="Initialization code for MPI")
107 help="Initialization code for MPI")
107
108
108 default_inits = Dict({'mpi4py' : mpi4py_init, 'pytrilinos':pytrilinos_init},
109 default_inits = Dict({'mpi4py' : mpi4py_init, 'pytrilinos':pytrilinos_init},
109 config=True)
110 config=True)
110
111
111
112
112 #-----------------------------------------------------------------------------
113 #-----------------------------------------------------------------------------
113 # Main application
114 # Main application
114 #-----------------------------------------------------------------------------
115 #-----------------------------------------------------------------------------
115 aliases = dict(
116 aliases = dict(
116 file = 'IPEngineApp.url_file',
117 file = 'IPEngineApp.url_file',
117 c = 'IPEngineApp.startup_command',
118 c = 'IPEngineApp.startup_command',
118 s = 'IPEngineApp.startup_script',
119 s = 'IPEngineApp.startup_script',
119
120
120 url = 'EngineFactory.url',
121 url = 'EngineFactory.url',
121 ssh = 'EngineFactory.sshserver',
122 ssh = 'EngineFactory.sshserver',
122 sshkey = 'EngineFactory.sshkey',
123 sshkey = 'EngineFactory.sshkey',
123 ip = 'EngineFactory.ip',
124 ip = 'EngineFactory.ip',
124 transport = 'EngineFactory.transport',
125 transport = 'EngineFactory.transport',
125 port = 'EngineFactory.regport',
126 port = 'EngineFactory.regport',
126 location = 'EngineFactory.location',
127 location = 'EngineFactory.location',
127
128
128 timeout = 'EngineFactory.timeout',
129 timeout = 'EngineFactory.timeout',
129
130
130 mpi = 'MPI.use',
131 mpi = 'MPI.use',
131
132
132 )
133 )
133 aliases.update(base_aliases)
134 aliases.update(base_aliases)
134 aliases.update(session_aliases)
135 aliases.update(session_aliases)
135 flags = {}
136 flags = {}
136 flags.update(base_flags)
137 flags.update(base_flags)
137 flags.update(session_flags)
138 flags.update(session_flags)
138
139
139 class IPEngineApp(BaseParallelApplication):
140 class IPEngineApp(BaseParallelApplication):
140
141
141 name = 'ipengine'
142 name = 'ipengine'
142 description = _description
143 description = _description
143 examples = _examples
144 examples = _examples
144 config_file_name = Unicode(default_config_file_name)
145 config_file_name = Unicode(default_config_file_name)
145 classes = List([ProfileDir, Session, EngineFactory, Kernel, MPI])
146 classes = List([ProfileDir, Session, EngineFactory, Kernel, MPI])
146
147
147 startup_script = Unicode(u'', config=True,
148 startup_script = Unicode(u'', config=True,
148 help='specify a script to be run at startup')
149 help='specify a script to be run at startup')
149 startup_command = Unicode('', config=True,
150 startup_command = Unicode('', config=True,
150 help='specify a command to be run at startup')
151 help='specify a command to be run at startup')
151
152
152 url_file = Unicode(u'', config=True,
153 url_file = Unicode(u'', config=True,
153 help="""The full location of the file containing the connection information for
154 help="""The full location of the file containing the connection information for
154 the controller. If this is not given, the file must be in the
155 the controller. If this is not given, the file must be in the
155 security directory of the cluster directory. This location is
156 security directory of the cluster directory. This location is
156 resolved using the `profile` or `profile_dir` options.""",
157 resolved using the `profile` or `profile_dir` options.""",
157 )
158 )
158 wait_for_url_file = Float(5, config=True,
159 wait_for_url_file = Float(5, config=True,
159 help="""The maximum number of seconds to wait for url_file to exist.
160 help="""The maximum number of seconds to wait for url_file to exist.
160 This is useful for batch-systems and shared-filesystems where the
161 This is useful for batch-systems and shared-filesystems where the
161 controller and engine are started at the same time and it
162 controller and engine are started at the same time and it
162 may take a moment for the controller to write the connector files.""")
163 may take a moment for the controller to write the connector files.""")
163
164
164 url_file_name = Unicode(u'ipcontroller-engine.json', config=True)
165 url_file_name = Unicode(u'ipcontroller-engine.json', config=True)
165
166
166 def _cluster_id_changed(self, name, old, new):
167 def _cluster_id_changed(self, name, old, new):
167 if new:
168 if new:
168 base = 'ipcontroller-%s' % new
169 base = 'ipcontroller-%s' % new
169 else:
170 else:
170 base = 'ipcontroller'
171 base = 'ipcontroller'
171 self.url_file_name = "%s-engine.json" % base
172 self.url_file_name = "%s-engine.json" % base
172
173
173 log_url = Unicode('', config=True,
174 log_url = Unicode('', config=True,
174 help="""The URL for the iploggerapp instance, for forwarding
175 help="""The URL for the iploggerapp instance, for forwarding
175 logging to a central location.""")
176 logging to a central location.""")
176
177
177 aliases = Dict(aliases)
178 aliases = Dict(aliases)
178 flags = Dict(flags)
179 flags = Dict(flags)
179
180
180 def find_url_file(self):
181 def find_url_file(self):
181 """Set the url file.
182 """Set the url file.
182
183
183 Here we don't try to actually see if it exists for is valid as that
184 Here we don't try to actually see if it exists for is valid as that
184 is hadled by the connection logic.
185 is hadled by the connection logic.
185 """
186 """
186 config = self.config
187 config = self.config
187 # Find the actual controller key file
188 # Find the actual controller key file
188 if not self.url_file:
189 if not self.url_file:
189 self.url_file = os.path.join(
190 self.url_file = os.path.join(
190 self.profile_dir.security_dir,
191 self.profile_dir.security_dir,
191 self.url_file_name
192 self.url_file_name
192 )
193 )
193
194
194 def load_connector_file(self):
195 def load_connector_file(self):
195 """load config from a JSON connector file,
196 """load config from a JSON connector file,
196 at a *lower* priority than command-line/config files.
197 at a *lower* priority than command-line/config files.
197 """
198 """
198
199
199 self.log.info("Loading url_file %r", self.url_file)
200 self.log.info("Loading url_file %r", self.url_file)
200 config = self.config
201 config = self.config
201
202
202 with open(self.url_file) as f:
203 with open(self.url_file) as f:
203 d = json.loads(f.read())
204 d = json.loads(f.read())
204
205
205 if 'exec_key' in d:
206 if 'exec_key' in d:
206 config.Session.key = asbytes(d['exec_key'])
207 config.Session.key = cast_bytes(d['exec_key'])
207
208
208 try:
209 try:
209 config.EngineFactory.location
210 config.EngineFactory.location
210 except AttributeError:
211 except AttributeError:
211 config.EngineFactory.location = d['location']
212 config.EngineFactory.location = d['location']
212
213
213 d['url'] = disambiguate_url(d['url'], config.EngineFactory.location)
214 d['url'] = disambiguate_url(d['url'], config.EngineFactory.location)
214 try:
215 try:
215 config.EngineFactory.url
216 config.EngineFactory.url
216 except AttributeError:
217 except AttributeError:
217 config.EngineFactory.url = d['url']
218 config.EngineFactory.url = d['url']
218
219
219 try:
220 try:
220 config.EngineFactory.sshserver
221 config.EngineFactory.sshserver
221 except AttributeError:
222 except AttributeError:
222 config.EngineFactory.sshserver = d['ssh']
223 config.EngineFactory.sshserver = d['ssh']
223
224
224 def init_engine(self):
225 def init_engine(self):
225 # This is the working dir by now.
226 # This is the working dir by now.
226 sys.path.insert(0, '')
227 sys.path.insert(0, '')
227 config = self.config
228 config = self.config
228 # print config
229 # print config
229 self.find_url_file()
230 self.find_url_file()
230
231
231 # was the url manually specified?
232 # was the url manually specified?
232 keys = set(self.config.EngineFactory.keys())
233 keys = set(self.config.EngineFactory.keys())
233 keys = keys.union(set(self.config.RegistrationFactory.keys()))
234 keys = keys.union(set(self.config.RegistrationFactory.keys()))
234
235
235 if keys.intersection(set(['ip', 'url', 'port'])):
236 if keys.intersection(set(['ip', 'url', 'port'])):
236 # Connection info was specified, don't wait for the file
237 # Connection info was specified, don't wait for the file
237 url_specified = True
238 url_specified = True
238 self.wait_for_url_file = 0
239 self.wait_for_url_file = 0
239 else:
240 else:
240 url_specified = False
241 url_specified = False
241
242
242 if self.wait_for_url_file and not os.path.exists(self.url_file):
243 if self.wait_for_url_file and not os.path.exists(self.url_file):
243 self.log.warn("url_file %r not found", self.url_file)
244 self.log.warn("url_file %r not found", self.url_file)
244 self.log.warn("Waiting up to %.1f seconds for it to arrive.", self.wait_for_url_file)
245 self.log.warn("Waiting up to %.1f seconds for it to arrive.", self.wait_for_url_file)
245 tic = time.time()
246 tic = time.time()
246 while not os.path.exists(self.url_file) and (time.time()-tic < self.wait_for_url_file):
247 while not os.path.exists(self.url_file) and (time.time()-tic < self.wait_for_url_file):
247 # wait for url_file to exist, or until time limit
248 # wait for url_file to exist, or until time limit
248 time.sleep(0.1)
249 time.sleep(0.1)
249
250
250 if os.path.exists(self.url_file):
251 if os.path.exists(self.url_file):
251 self.load_connector_file()
252 self.load_connector_file()
252 elif not url_specified:
253 elif not url_specified:
253 self.log.fatal("Fatal: url file never arrived: %s", self.url_file)
254 self.log.fatal("Fatal: url file never arrived: %s", self.url_file)
254 self.exit(1)
255 self.exit(1)
255
256
256
257
257 try:
258 try:
258 exec_lines = config.Kernel.exec_lines
259 exec_lines = config.Kernel.exec_lines
259 except AttributeError:
260 except AttributeError:
260 config.Kernel.exec_lines = []
261 config.Kernel.exec_lines = []
261 exec_lines = config.Kernel.exec_lines
262 exec_lines = config.Kernel.exec_lines
262
263
263 if self.startup_script:
264 if self.startup_script:
264 enc = sys.getfilesystemencoding() or 'utf8'
265 enc = sys.getfilesystemencoding() or 'utf8'
265 cmd="execfile(%r)" % self.startup_script.encode(enc)
266 cmd="execfile(%r)" % self.startup_script.encode(enc)
266 exec_lines.append(cmd)
267 exec_lines.append(cmd)
267 if self.startup_command:
268 if self.startup_command:
268 exec_lines.append(self.startup_command)
269 exec_lines.append(self.startup_command)
269
270
270 # Create the underlying shell class and Engine
271 # Create the underlying shell class and Engine
271 # shell_class = import_item(self.master_config.Global.shell_class)
272 # shell_class = import_item(self.master_config.Global.shell_class)
272 # print self.config
273 # print self.config
273 try:
274 try:
274 self.engine = EngineFactory(config=config, log=self.log)
275 self.engine = EngineFactory(config=config, log=self.log)
275 except:
276 except:
276 self.log.error("Couldn't start the Engine", exc_info=True)
277 self.log.error("Couldn't start the Engine", exc_info=True)
277 self.exit(1)
278 self.exit(1)
278
279
279 def forward_logging(self):
280 def forward_logging(self):
280 if self.log_url:
281 if self.log_url:
281 self.log.info("Forwarding logging to %s", self.log_url)
282 self.log.info("Forwarding logging to %s", self.log_url)
282 context = self.engine.context
283 context = self.engine.context
283 lsock = context.socket(zmq.PUB)
284 lsock = context.socket(zmq.PUB)
284 lsock.connect(self.log_url)
285 lsock.connect(self.log_url)
285 self.log.removeHandler(self._log_handler)
286 self.log.removeHandler(self._log_handler)
286 handler = EnginePUBHandler(self.engine, lsock)
287 handler = EnginePUBHandler(self.engine, lsock)
287 handler.setLevel(self.log_level)
288 handler.setLevel(self.log_level)
288 self.log.addHandler(handler)
289 self.log.addHandler(handler)
289 self._log_handler = handler
290 self._log_handler = handler
290
291
291 def init_mpi(self):
292 def init_mpi(self):
292 global mpi
293 global mpi
293 self.mpi = MPI(config=self.config)
294 self.mpi = MPI(config=self.config)
294
295
295 mpi_import_statement = self.mpi.init_script
296 mpi_import_statement = self.mpi.init_script
296 if mpi_import_statement:
297 if mpi_import_statement:
297 try:
298 try:
298 self.log.info("Initializing MPI:")
299 self.log.info("Initializing MPI:")
299 self.log.info(mpi_import_statement)
300 self.log.info(mpi_import_statement)
300 exec mpi_import_statement in globals()
301 exec mpi_import_statement in globals()
301 except:
302 except:
302 mpi = None
303 mpi = None
303 else:
304 else:
304 mpi = None
305 mpi = None
305
306
306 @catch_config_error
307 @catch_config_error
307 def initialize(self, argv=None):
308 def initialize(self, argv=None):
308 super(IPEngineApp, self).initialize(argv)
309 super(IPEngineApp, self).initialize(argv)
309 self.init_mpi()
310 self.init_mpi()
310 self.init_engine()
311 self.init_engine()
311 self.forward_logging()
312 self.forward_logging()
312
313
313 def start(self):
314 def start(self):
314 self.engine.start()
315 self.engine.start()
315 try:
316 try:
316 self.engine.loop.start()
317 self.engine.loop.start()
317 except KeyboardInterrupt:
318 except KeyboardInterrupt:
318 self.log.critical("Engine Interrupted, shutting down...\n")
319 self.log.critical("Engine Interrupted, shutting down...\n")
319
320
320
321
321 def launch_new_instance():
322 def launch_new_instance():
322 """Create and run the IPython engine"""
323 """Create and run the IPython engine"""
323 app = IPEngineApp.instance()
324 app = IPEngineApp.instance()
324 app.initialize()
325 app.initialize()
325 app.start()
326 app.start()
326
327
327
328
328 if __name__ == '__main__':
329 if __name__ == '__main__':
329 launch_new_instance()
330 launch_new_instance()
330
331
@@ -1,1569 +1,1570 b''
1 """A semi-synchronous Client for the ZMQ cluster
1 """A semi-synchronous Client for the ZMQ cluster
2
2
3 Authors:
3 Authors:
4
4
5 * MinRK
5 * MinRK
6 """
6 """
7 #-----------------------------------------------------------------------------
7 #-----------------------------------------------------------------------------
8 # Copyright (C) 2010-2011 The IPython Development Team
8 # Copyright (C) 2010-2011 The IPython Development Team
9 #
9 #
10 # Distributed under the terms of the BSD License. The full license is in
10 # Distributed under the terms of the BSD License. The full license is in
11 # the file COPYING, distributed as part of this software.
11 # the file COPYING, distributed as part of this software.
12 #-----------------------------------------------------------------------------
12 #-----------------------------------------------------------------------------
13
13
14 #-----------------------------------------------------------------------------
14 #-----------------------------------------------------------------------------
15 # Imports
15 # Imports
16 #-----------------------------------------------------------------------------
16 #-----------------------------------------------------------------------------
17
17
18 import os
18 import os
19 import json
19 import json
20 import sys
20 import sys
21 from threading import Thread, Event
21 from threading import Thread, Event
22 import time
22 import time
23 import warnings
23 import warnings
24 from datetime import datetime
24 from datetime import datetime
25 from getpass import getpass
25 from getpass import getpass
26 from pprint import pprint
26 from pprint import pprint
27
27
28 pjoin = os.path.join
28 pjoin = os.path.join
29
29
30 import zmq
30 import zmq
31 # from zmq.eventloop import ioloop, zmqstream
31 # from zmq.eventloop import ioloop, zmqstream
32
32
33 from IPython.config.configurable import MultipleInstanceError
33 from IPython.config.configurable import MultipleInstanceError
34 from IPython.core.application import BaseIPythonApplication
34 from IPython.core.application import BaseIPythonApplication
35
35
36 from IPython.utils.jsonutil import rekey
36 from IPython.utils.jsonutil import rekey
37 from IPython.utils.localinterfaces import LOCAL_IPS
37 from IPython.utils.localinterfaces import LOCAL_IPS
38 from IPython.utils.path import get_ipython_dir
38 from IPython.utils.path import get_ipython_dir
39 from IPython.utils.py3compat import cast_bytes
39 from IPython.utils.traitlets import (HasTraits, Integer, Instance, Unicode,
40 from IPython.utils.traitlets import (HasTraits, Integer, Instance, Unicode,
40 Dict, List, Bool, Set, Any)
41 Dict, List, Bool, Set, Any)
41 from IPython.external.decorator import decorator
42 from IPython.external.decorator import decorator
42 from IPython.external.ssh import tunnel
43 from IPython.external.ssh import tunnel
43
44
44 from IPython.parallel import Reference
45 from IPython.parallel import Reference
45 from IPython.parallel import error
46 from IPython.parallel import error
46 from IPython.parallel import util
47 from IPython.parallel import util
47
48
48 from IPython.zmq.session import Session, Message
49 from IPython.zmq.session import Session, Message
49
50
50 from .asyncresult import AsyncResult, AsyncHubResult
51 from .asyncresult import AsyncResult, AsyncHubResult
51 from IPython.core.profiledir import ProfileDir, ProfileDirError
52 from IPython.core.profiledir import ProfileDir, ProfileDirError
52 from .view import DirectView, LoadBalancedView
53 from .view import DirectView, LoadBalancedView
53
54
54 if sys.version_info[0] >= 3:
55 if sys.version_info[0] >= 3:
55 # xrange is used in a couple 'isinstance' tests in py2
56 # xrange is used in a couple 'isinstance' tests in py2
56 # should be just 'range' in 3k
57 # should be just 'range' in 3k
57 xrange = range
58 xrange = range
58
59
59 #--------------------------------------------------------------------------
60 #--------------------------------------------------------------------------
60 # Decorators for Client methods
61 # Decorators for Client methods
61 #--------------------------------------------------------------------------
62 #--------------------------------------------------------------------------
62
63
63 @decorator
64 @decorator
64 def spin_first(f, self, *args, **kwargs):
65 def spin_first(f, self, *args, **kwargs):
65 """Call spin() to sync state prior to calling the method."""
66 """Call spin() to sync state prior to calling the method."""
66 self.spin()
67 self.spin()
67 return f(self, *args, **kwargs)
68 return f(self, *args, **kwargs)
68
69
69
70
70 #--------------------------------------------------------------------------
71 #--------------------------------------------------------------------------
71 # Classes
72 # Classes
72 #--------------------------------------------------------------------------
73 #--------------------------------------------------------------------------
73
74
74 class Metadata(dict):
75 class Metadata(dict):
75 """Subclass of dict for initializing metadata values.
76 """Subclass of dict for initializing metadata values.
76
77
77 Attribute access works on keys.
78 Attribute access works on keys.
78
79
79 These objects have a strict set of keys - errors will raise if you try
80 These objects have a strict set of keys - errors will raise if you try
80 to add new keys.
81 to add new keys.
81 """
82 """
82 def __init__(self, *args, **kwargs):
83 def __init__(self, *args, **kwargs):
83 dict.__init__(self)
84 dict.__init__(self)
84 md = {'msg_id' : None,
85 md = {'msg_id' : None,
85 'submitted' : None,
86 'submitted' : None,
86 'started' : None,
87 'started' : None,
87 'completed' : None,
88 'completed' : None,
88 'received' : None,
89 'received' : None,
89 'engine_uuid' : None,
90 'engine_uuid' : None,
90 'engine_id' : None,
91 'engine_id' : None,
91 'follow' : None,
92 'follow' : None,
92 'after' : None,
93 'after' : None,
93 'status' : None,
94 'status' : None,
94
95
95 'pyin' : None,
96 'pyin' : None,
96 'pyout' : None,
97 'pyout' : None,
97 'pyerr' : None,
98 'pyerr' : None,
98 'stdout' : '',
99 'stdout' : '',
99 'stderr' : '',
100 'stderr' : '',
100 'outputs' : [],
101 'outputs' : [],
101 }
102 }
102 self.update(md)
103 self.update(md)
103 self.update(dict(*args, **kwargs))
104 self.update(dict(*args, **kwargs))
104
105
105 def __getattr__(self, key):
106 def __getattr__(self, key):
106 """getattr aliased to getitem"""
107 """getattr aliased to getitem"""
107 if key in self.iterkeys():
108 if key in self.iterkeys():
108 return self[key]
109 return self[key]
109 else:
110 else:
110 raise AttributeError(key)
111 raise AttributeError(key)
111
112
112 def __setattr__(self, key, value):
113 def __setattr__(self, key, value):
113 """setattr aliased to setitem, with strict"""
114 """setattr aliased to setitem, with strict"""
114 if key in self.iterkeys():
115 if key in self.iterkeys():
115 self[key] = value
116 self[key] = value
116 else:
117 else:
117 raise AttributeError(key)
118 raise AttributeError(key)
118
119
119 def __setitem__(self, key, value):
120 def __setitem__(self, key, value):
120 """strict static key enforcement"""
121 """strict static key enforcement"""
121 if key in self.iterkeys():
122 if key in self.iterkeys():
122 dict.__setitem__(self, key, value)
123 dict.__setitem__(self, key, value)
123 else:
124 else:
124 raise KeyError(key)
125 raise KeyError(key)
125
126
126
127
127 class Client(HasTraits):
128 class Client(HasTraits):
128 """A semi-synchronous client to the IPython ZMQ cluster
129 """A semi-synchronous client to the IPython ZMQ cluster
129
130
130 Parameters
131 Parameters
131 ----------
132 ----------
132
133
133 url_or_file : bytes or unicode; zmq url or path to ipcontroller-client.json
134 url_or_file : bytes or unicode; zmq url or path to ipcontroller-client.json
134 Connection information for the Hub's registration. If a json connector
135 Connection information for the Hub's registration. If a json connector
135 file is given, then likely no further configuration is necessary.
136 file is given, then likely no further configuration is necessary.
136 [Default: use profile]
137 [Default: use profile]
137 profile : bytes
138 profile : bytes
138 The name of the Cluster profile to be used to find connector information.
139 The name of the Cluster profile to be used to find connector information.
139 If run from an IPython application, the default profile will be the same
140 If run from an IPython application, the default profile will be the same
140 as the running application, otherwise it will be 'default'.
141 as the running application, otherwise it will be 'default'.
141 context : zmq.Context
142 context : zmq.Context
142 Pass an existing zmq.Context instance, otherwise the client will create its own.
143 Pass an existing zmq.Context instance, otherwise the client will create its own.
143 debug : bool
144 debug : bool
144 flag for lots of message printing for debug purposes
145 flag for lots of message printing for debug purposes
145 timeout : int/float
146 timeout : int/float
146 time (in seconds) to wait for connection replies from the Hub
147 time (in seconds) to wait for connection replies from the Hub
147 [Default: 10]
148 [Default: 10]
148
149
149 #-------------- session related args ----------------
150 #-------------- session related args ----------------
150
151
151 config : Config object
152 config : Config object
152 If specified, this will be relayed to the Session for configuration
153 If specified, this will be relayed to the Session for configuration
153 username : str
154 username : str
154 set username for the session object
155 set username for the session object
155 packer : str (import_string) or callable
156 packer : str (import_string) or callable
156 Can be either the simple keyword 'json' or 'pickle', or an import_string to a
157 Can be either the simple keyword 'json' or 'pickle', or an import_string to a
157 function to serialize messages. Must support same input as
158 function to serialize messages. Must support same input as
158 JSON, and output must be bytes.
159 JSON, and output must be bytes.
159 You can pass a callable directly as `pack`
160 You can pass a callable directly as `pack`
160 unpacker : str (import_string) or callable
161 unpacker : str (import_string) or callable
161 The inverse of packer. Only necessary if packer is specified as *not* one
162 The inverse of packer. Only necessary if packer is specified as *not* one
162 of 'json' or 'pickle'.
163 of 'json' or 'pickle'.
163
164
164 #-------------- ssh related args ----------------
165 #-------------- ssh related args ----------------
165 # These are args for configuring the ssh tunnel to be used
166 # These are args for configuring the ssh tunnel to be used
166 # credentials are used to forward connections over ssh to the Controller
167 # credentials are used to forward connections over ssh to the Controller
167 # Note that the ip given in `addr` needs to be relative to sshserver
168 # Note that the ip given in `addr` needs to be relative to sshserver
168 # The most basic case is to leave addr as pointing to localhost (127.0.0.1),
169 # The most basic case is to leave addr as pointing to localhost (127.0.0.1),
169 # and set sshserver as the same machine the Controller is on. However,
170 # and set sshserver as the same machine the Controller is on. However,
170 # the only requirement is that sshserver is able to see the Controller
171 # the only requirement is that sshserver is able to see the Controller
171 # (i.e. is within the same trusted network).
172 # (i.e. is within the same trusted network).
172
173
173 sshserver : str
174 sshserver : str
174 A string of the form passed to ssh, i.e. 'server.tld' or 'user@server.tld:port'
175 A string of the form passed to ssh, i.e. 'server.tld' or 'user@server.tld:port'
175 If keyfile or password is specified, and this is not, it will default to
176 If keyfile or password is specified, and this is not, it will default to
176 the ip given in addr.
177 the ip given in addr.
177 sshkey : str; path to ssh private key file
178 sshkey : str; path to ssh private key file
178 This specifies a key to be used in ssh login, default None.
179 This specifies a key to be used in ssh login, default None.
179 Regular default ssh keys will be used without specifying this argument.
180 Regular default ssh keys will be used without specifying this argument.
180 password : str
181 password : str
181 Your ssh password to sshserver. Note that if this is left None,
182 Your ssh password to sshserver. Note that if this is left None,
182 you will be prompted for it if passwordless key based login is unavailable.
183 you will be prompted for it if passwordless key based login is unavailable.
183 paramiko : bool
184 paramiko : bool
184 flag for whether to use paramiko instead of shell ssh for tunneling.
185 flag for whether to use paramiko instead of shell ssh for tunneling.
185 [default: True on win32, False else]
186 [default: True on win32, False else]
186
187
187 ------- exec authentication args -------
188 ------- exec authentication args -------
188 If even localhost is untrusted, you can have some protection against
189 If even localhost is untrusted, you can have some protection against
189 unauthorized execution by signing messages with HMAC digests.
190 unauthorized execution by signing messages with HMAC digests.
190 Messages are still sent as cleartext, so if someone can snoop your
191 Messages are still sent as cleartext, so if someone can snoop your
191 loopback traffic this will not protect your privacy, but will prevent
192 loopback traffic this will not protect your privacy, but will prevent
192 unauthorized execution.
193 unauthorized execution.
193
194
194 exec_key : str
195 exec_key : str
195 an authentication key or file containing a key
196 an authentication key or file containing a key
196 default: None
197 default: None
197
198
198
199
199 Attributes
200 Attributes
200 ----------
201 ----------
201
202
202 ids : list of int engine IDs
203 ids : list of int engine IDs
203 requesting the ids attribute always synchronizes
204 requesting the ids attribute always synchronizes
204 the registration state. To request ids without synchronization,
205 the registration state. To request ids without synchronization,
205 use semi-private _ids attributes.
206 use semi-private _ids attributes.
206
207
207 history : list of msg_ids
208 history : list of msg_ids
208 a list of msg_ids, keeping track of all the execution
209 a list of msg_ids, keeping track of all the execution
209 messages you have submitted in order.
210 messages you have submitted in order.
210
211
211 outstanding : set of msg_ids
212 outstanding : set of msg_ids
212 a set of msg_ids that have been submitted, but whose
213 a set of msg_ids that have been submitted, but whose
213 results have not yet been received.
214 results have not yet been received.
214
215
215 results : dict
216 results : dict
216 a dict of all our results, keyed by msg_id
217 a dict of all our results, keyed by msg_id
217
218
218 block : bool
219 block : bool
219 determines default behavior when block not specified
220 determines default behavior when block not specified
220 in execution methods
221 in execution methods
221
222
222 Methods
223 Methods
223 -------
224 -------
224
225
225 spin
226 spin
226 flushes incoming results and registration state changes
227 flushes incoming results and registration state changes
227 control methods spin, and requesting `ids` also ensures up to date
228 control methods spin, and requesting `ids` also ensures up to date
228
229
229 wait
230 wait
230 wait on one or more msg_ids
231 wait on one or more msg_ids
231
232
232 execution methods
233 execution methods
233 apply
234 apply
234 legacy: execute, run
235 legacy: execute, run
235
236
236 data movement
237 data movement
237 push, pull, scatter, gather
238 push, pull, scatter, gather
238
239
239 query methods
240 query methods
240 queue_status, get_result, purge, result_status
241 queue_status, get_result, purge, result_status
241
242
242 control methods
243 control methods
243 abort, shutdown
244 abort, shutdown
244
245
245 """
246 """
246
247
247
248
248 block = Bool(False)
249 block = Bool(False)
249 outstanding = Set()
250 outstanding = Set()
250 results = Instance('collections.defaultdict', (dict,))
251 results = Instance('collections.defaultdict', (dict,))
251 metadata = Instance('collections.defaultdict', (Metadata,))
252 metadata = Instance('collections.defaultdict', (Metadata,))
252 history = List()
253 history = List()
253 debug = Bool(False)
254 debug = Bool(False)
254 _spin_thread = Any()
255 _spin_thread = Any()
255 _stop_spinning = Any()
256 _stop_spinning = Any()
256
257
257 profile=Unicode()
258 profile=Unicode()
258 def _profile_default(self):
259 def _profile_default(self):
259 if BaseIPythonApplication.initialized():
260 if BaseIPythonApplication.initialized():
260 # an IPython app *might* be running, try to get its profile
261 # an IPython app *might* be running, try to get its profile
261 try:
262 try:
262 return BaseIPythonApplication.instance().profile
263 return BaseIPythonApplication.instance().profile
263 except (AttributeError, MultipleInstanceError):
264 except (AttributeError, MultipleInstanceError):
264 # could be a *different* subclass of config.Application,
265 # could be a *different* subclass of config.Application,
265 # which would raise one of these two errors.
266 # which would raise one of these two errors.
266 return u'default'
267 return u'default'
267 else:
268 else:
268 return u'default'
269 return u'default'
269
270
270
271
271 _outstanding_dict = Instance('collections.defaultdict', (set,))
272 _outstanding_dict = Instance('collections.defaultdict', (set,))
272 _ids = List()
273 _ids = List()
273 _connected=Bool(False)
274 _connected=Bool(False)
274 _ssh=Bool(False)
275 _ssh=Bool(False)
275 _context = Instance('zmq.Context')
276 _context = Instance('zmq.Context')
276 _config = Dict()
277 _config = Dict()
277 _engines=Instance(util.ReverseDict, (), {})
278 _engines=Instance(util.ReverseDict, (), {})
278 # _hub_socket=Instance('zmq.Socket')
279 # _hub_socket=Instance('zmq.Socket')
279 _query_socket=Instance('zmq.Socket')
280 _query_socket=Instance('zmq.Socket')
280 _control_socket=Instance('zmq.Socket')
281 _control_socket=Instance('zmq.Socket')
281 _iopub_socket=Instance('zmq.Socket')
282 _iopub_socket=Instance('zmq.Socket')
282 _notification_socket=Instance('zmq.Socket')
283 _notification_socket=Instance('zmq.Socket')
283 _mux_socket=Instance('zmq.Socket')
284 _mux_socket=Instance('zmq.Socket')
284 _task_socket=Instance('zmq.Socket')
285 _task_socket=Instance('zmq.Socket')
285 _task_scheme=Unicode()
286 _task_scheme=Unicode()
286 _closed = False
287 _closed = False
287 _ignored_control_replies=Integer(0)
288 _ignored_control_replies=Integer(0)
288 _ignored_hub_replies=Integer(0)
289 _ignored_hub_replies=Integer(0)
289
290
290 def __new__(self, *args, **kw):
291 def __new__(self, *args, **kw):
291 # don't raise on positional args
292 # don't raise on positional args
292 return HasTraits.__new__(self, **kw)
293 return HasTraits.__new__(self, **kw)
293
294
294 def __init__(self, url_or_file=None, profile=None, profile_dir=None, ipython_dir=None,
295 def __init__(self, url_or_file=None, profile=None, profile_dir=None, ipython_dir=None,
295 context=None, debug=False, exec_key=None,
296 context=None, debug=False, exec_key=None,
296 sshserver=None, sshkey=None, password=None, paramiko=None,
297 sshserver=None, sshkey=None, password=None, paramiko=None,
297 timeout=10, **extra_args
298 timeout=10, **extra_args
298 ):
299 ):
299 if profile:
300 if profile:
300 super(Client, self).__init__(debug=debug, profile=profile)
301 super(Client, self).__init__(debug=debug, profile=profile)
301 else:
302 else:
302 super(Client, self).__init__(debug=debug)
303 super(Client, self).__init__(debug=debug)
303 if context is None:
304 if context is None:
304 context = zmq.Context.instance()
305 context = zmq.Context.instance()
305 self._context = context
306 self._context = context
306 self._stop_spinning = Event()
307 self._stop_spinning = Event()
307
308
308 self._setup_profile_dir(self.profile, profile_dir, ipython_dir)
309 self._setup_profile_dir(self.profile, profile_dir, ipython_dir)
309 if self._cd is not None:
310 if self._cd is not None:
310 if url_or_file is None:
311 if url_or_file is None:
311 url_or_file = pjoin(self._cd.security_dir, 'ipcontroller-client.json')
312 url_or_file = pjoin(self._cd.security_dir, 'ipcontroller-client.json')
312 assert url_or_file is not None, "I can't find enough information to connect to a hub!"\
313 assert url_or_file is not None, "I can't find enough information to connect to a hub!"\
313 " Please specify at least one of url_or_file or profile."
314 " Please specify at least one of url_or_file or profile."
314
315
315 if not util.is_url(url_or_file):
316 if not util.is_url(url_or_file):
316 # it's not a url, try for a file
317 # it's not a url, try for a file
317 if not os.path.exists(url_or_file):
318 if not os.path.exists(url_or_file):
318 if self._cd:
319 if self._cd:
319 url_or_file = os.path.join(self._cd.security_dir, url_or_file)
320 url_or_file = os.path.join(self._cd.security_dir, url_or_file)
320 assert os.path.exists(url_or_file), "Not a valid connection file or url: %r"%url_or_file
321 assert os.path.exists(url_or_file), "Not a valid connection file or url: %r"%url_or_file
321 with open(url_or_file) as f:
322 with open(url_or_file) as f:
322 cfg = json.loads(f.read())
323 cfg = json.loads(f.read())
323 else:
324 else:
324 cfg = {'url':url_or_file}
325 cfg = {'url':url_or_file}
325
326
326 # sync defaults from args, json:
327 # sync defaults from args, json:
327 if sshserver:
328 if sshserver:
328 cfg['ssh'] = sshserver
329 cfg['ssh'] = sshserver
329 if exec_key:
330 if exec_key:
330 cfg['exec_key'] = exec_key
331 cfg['exec_key'] = exec_key
331 exec_key = cfg['exec_key']
332 exec_key = cfg['exec_key']
332 location = cfg.setdefault('location', None)
333 location = cfg.setdefault('location', None)
333 cfg['url'] = util.disambiguate_url(cfg['url'], location)
334 cfg['url'] = util.disambiguate_url(cfg['url'], location)
334 url = cfg['url']
335 url = cfg['url']
335 proto,addr,port = util.split_url(url)
336 proto,addr,port = util.split_url(url)
336 if location is not None and addr == '127.0.0.1':
337 if location is not None and addr == '127.0.0.1':
337 # location specified, and connection is expected to be local
338 # location specified, and connection is expected to be local
338 if location not in LOCAL_IPS and not sshserver:
339 if location not in LOCAL_IPS and not sshserver:
339 # load ssh from JSON *only* if the controller is not on
340 # load ssh from JSON *only* if the controller is not on
340 # this machine
341 # this machine
341 sshserver=cfg['ssh']
342 sshserver=cfg['ssh']
342 if location not in LOCAL_IPS and not sshserver:
343 if location not in LOCAL_IPS and not sshserver:
343 # warn if no ssh specified, but SSH is probably needed
344 # warn if no ssh specified, but SSH is probably needed
344 # This is only a warning, because the most likely cause
345 # This is only a warning, because the most likely cause
345 # is a local Controller on a laptop whose IP is dynamic
346 # is a local Controller on a laptop whose IP is dynamic
346 warnings.warn("""
347 warnings.warn("""
347 Controller appears to be listening on localhost, but not on this machine.
348 Controller appears to be listening on localhost, but not on this machine.
348 If this is true, you should specify Client(...,sshserver='you@%s')
349 If this is true, you should specify Client(...,sshserver='you@%s')
349 or instruct your controller to listen on an external IP."""%location,
350 or instruct your controller to listen on an external IP."""%location,
350 RuntimeWarning)
351 RuntimeWarning)
351 elif not sshserver:
352 elif not sshserver:
352 # otherwise sync with cfg
353 # otherwise sync with cfg
353 sshserver = cfg['ssh']
354 sshserver = cfg['ssh']
354
355
355 self._config = cfg
356 self._config = cfg
356
357
357 self._ssh = bool(sshserver or sshkey or password)
358 self._ssh = bool(sshserver or sshkey or password)
358 if self._ssh and sshserver is None:
359 if self._ssh and sshserver is None:
359 # default to ssh via localhost
360 # default to ssh via localhost
360 sshserver = url.split('://')[1].split(':')[0]
361 sshserver = url.split('://')[1].split(':')[0]
361 if self._ssh and password is None:
362 if self._ssh and password is None:
362 if tunnel.try_passwordless_ssh(sshserver, sshkey, paramiko):
363 if tunnel.try_passwordless_ssh(sshserver, sshkey, paramiko):
363 password=False
364 password=False
364 else:
365 else:
365 password = getpass("SSH Password for %s: "%sshserver)
366 password = getpass("SSH Password for %s: "%sshserver)
366 ssh_kwargs = dict(keyfile=sshkey, password=password, paramiko=paramiko)
367 ssh_kwargs = dict(keyfile=sshkey, password=password, paramiko=paramiko)
367
368
368 # configure and construct the session
369 # configure and construct the session
369 if exec_key is not None:
370 if exec_key is not None:
370 if os.path.isfile(exec_key):
371 if os.path.isfile(exec_key):
371 extra_args['keyfile'] = exec_key
372 extra_args['keyfile'] = exec_key
372 else:
373 else:
373 exec_key = util.asbytes(exec_key)
374 exec_key = cast_bytes(exec_key)
374 extra_args['key'] = exec_key
375 extra_args['key'] = exec_key
375 self.session = Session(**extra_args)
376 self.session = Session(**extra_args)
376
377
377 self._query_socket = self._context.socket(zmq.DEALER)
378 self._query_socket = self._context.socket(zmq.DEALER)
378 self._query_socket.setsockopt(zmq.IDENTITY, self.session.bsession)
379 self._query_socket.setsockopt(zmq.IDENTITY, self.session.bsession)
379 if self._ssh:
380 if self._ssh:
380 tunnel.tunnel_connection(self._query_socket, url, sshserver, **ssh_kwargs)
381 tunnel.tunnel_connection(self._query_socket, url, sshserver, **ssh_kwargs)
381 else:
382 else:
382 self._query_socket.connect(url)
383 self._query_socket.connect(url)
383
384
384 self.session.debug = self.debug
385 self.session.debug = self.debug
385
386
386 self._notification_handlers = {'registration_notification' : self._register_engine,
387 self._notification_handlers = {'registration_notification' : self._register_engine,
387 'unregistration_notification' : self._unregister_engine,
388 'unregistration_notification' : self._unregister_engine,
388 'shutdown_notification' : lambda msg: self.close(),
389 'shutdown_notification' : lambda msg: self.close(),
389 }
390 }
390 self._queue_handlers = {'execute_reply' : self._handle_execute_reply,
391 self._queue_handlers = {'execute_reply' : self._handle_execute_reply,
391 'apply_reply' : self._handle_apply_reply}
392 'apply_reply' : self._handle_apply_reply}
392 self._connect(sshserver, ssh_kwargs, timeout)
393 self._connect(sshserver, ssh_kwargs, timeout)
393
394
394 def __del__(self):
395 def __del__(self):
395 """cleanup sockets, but _not_ context."""
396 """cleanup sockets, but _not_ context."""
396 self.close()
397 self.close()
397
398
398 def _setup_profile_dir(self, profile, profile_dir, ipython_dir):
399 def _setup_profile_dir(self, profile, profile_dir, ipython_dir):
399 if ipython_dir is None:
400 if ipython_dir is None:
400 ipython_dir = get_ipython_dir()
401 ipython_dir = get_ipython_dir()
401 if profile_dir is not None:
402 if profile_dir is not None:
402 try:
403 try:
403 self._cd = ProfileDir.find_profile_dir(profile_dir)
404 self._cd = ProfileDir.find_profile_dir(profile_dir)
404 return
405 return
405 except ProfileDirError:
406 except ProfileDirError:
406 pass
407 pass
407 elif profile is not None:
408 elif profile is not None:
408 try:
409 try:
409 self._cd = ProfileDir.find_profile_dir_by_name(
410 self._cd = ProfileDir.find_profile_dir_by_name(
410 ipython_dir, profile)
411 ipython_dir, profile)
411 return
412 return
412 except ProfileDirError:
413 except ProfileDirError:
413 pass
414 pass
414 self._cd = None
415 self._cd = None
415
416
416 def _update_engines(self, engines):
417 def _update_engines(self, engines):
417 """Update our engines dict and _ids from a dict of the form: {id:uuid}."""
418 """Update our engines dict and _ids from a dict of the form: {id:uuid}."""
418 for k,v in engines.iteritems():
419 for k,v in engines.iteritems():
419 eid = int(k)
420 eid = int(k)
420 self._engines[eid] = v
421 self._engines[eid] = v
421 self._ids.append(eid)
422 self._ids.append(eid)
422 self._ids = sorted(self._ids)
423 self._ids = sorted(self._ids)
423 if sorted(self._engines.keys()) != range(len(self._engines)) and \
424 if sorted(self._engines.keys()) != range(len(self._engines)) and \
424 self._task_scheme == 'pure' and self._task_socket:
425 self._task_scheme == 'pure' and self._task_socket:
425 self._stop_scheduling_tasks()
426 self._stop_scheduling_tasks()
426
427
427 def _stop_scheduling_tasks(self):
428 def _stop_scheduling_tasks(self):
428 """Stop scheduling tasks because an engine has been unregistered
429 """Stop scheduling tasks because an engine has been unregistered
429 from a pure ZMQ scheduler.
430 from a pure ZMQ scheduler.
430 """
431 """
431 self._task_socket.close()
432 self._task_socket.close()
432 self._task_socket = None
433 self._task_socket = None
433 msg = "An engine has been unregistered, and we are using pure " +\
434 msg = "An engine has been unregistered, and we are using pure " +\
434 "ZMQ task scheduling. Task farming will be disabled."
435 "ZMQ task scheduling. Task farming will be disabled."
435 if self.outstanding:
436 if self.outstanding:
436 msg += " If you were running tasks when this happened, " +\
437 msg += " If you were running tasks when this happened, " +\
437 "some `outstanding` msg_ids may never resolve."
438 "some `outstanding` msg_ids may never resolve."
438 warnings.warn(msg, RuntimeWarning)
439 warnings.warn(msg, RuntimeWarning)
439
440
440 def _build_targets(self, targets):
441 def _build_targets(self, targets):
441 """Turn valid target IDs or 'all' into two lists:
442 """Turn valid target IDs or 'all' into two lists:
442 (int_ids, uuids).
443 (int_ids, uuids).
443 """
444 """
444 if not self._ids:
445 if not self._ids:
445 # flush notification socket if no engines yet, just in case
446 # flush notification socket if no engines yet, just in case
446 if not self.ids:
447 if not self.ids:
447 raise error.NoEnginesRegistered("Can't build targets without any engines")
448 raise error.NoEnginesRegistered("Can't build targets without any engines")
448
449
449 if targets is None:
450 if targets is None:
450 targets = self._ids
451 targets = self._ids
451 elif isinstance(targets, basestring):
452 elif isinstance(targets, basestring):
452 if targets.lower() == 'all':
453 if targets.lower() == 'all':
453 targets = self._ids
454 targets = self._ids
454 else:
455 else:
455 raise TypeError("%r not valid str target, must be 'all'"%(targets))
456 raise TypeError("%r not valid str target, must be 'all'"%(targets))
456 elif isinstance(targets, int):
457 elif isinstance(targets, int):
457 if targets < 0:
458 if targets < 0:
458 targets = self.ids[targets]
459 targets = self.ids[targets]
459 if targets not in self._ids:
460 if targets not in self._ids:
460 raise IndexError("No such engine: %i"%targets)
461 raise IndexError("No such engine: %i"%targets)
461 targets = [targets]
462 targets = [targets]
462
463
463 if isinstance(targets, slice):
464 if isinstance(targets, slice):
464 indices = range(len(self._ids))[targets]
465 indices = range(len(self._ids))[targets]
465 ids = self.ids
466 ids = self.ids
466 targets = [ ids[i] for i in indices ]
467 targets = [ ids[i] for i in indices ]
467
468
468 if not isinstance(targets, (tuple, list, xrange)):
469 if not isinstance(targets, (tuple, list, xrange)):
469 raise TypeError("targets by int/slice/collection of ints only, not %s"%(type(targets)))
470 raise TypeError("targets by int/slice/collection of ints only, not %s"%(type(targets)))
470
471
471 return [util.asbytes(self._engines[t]) for t in targets], list(targets)
472 return [cast_bytes(self._engines[t]) for t in targets], list(targets)
472
473
473 def _connect(self, sshserver, ssh_kwargs, timeout):
474 def _connect(self, sshserver, ssh_kwargs, timeout):
474 """setup all our socket connections to the cluster. This is called from
475 """setup all our socket connections to the cluster. This is called from
475 __init__."""
476 __init__."""
476
477
477 # Maybe allow reconnecting?
478 # Maybe allow reconnecting?
478 if self._connected:
479 if self._connected:
479 return
480 return
480 self._connected=True
481 self._connected=True
481
482
482 def connect_socket(s, url):
483 def connect_socket(s, url):
483 url = util.disambiguate_url(url, self._config['location'])
484 url = util.disambiguate_url(url, self._config['location'])
484 if self._ssh:
485 if self._ssh:
485 return tunnel.tunnel_connection(s, url, sshserver, **ssh_kwargs)
486 return tunnel.tunnel_connection(s, url, sshserver, **ssh_kwargs)
486 else:
487 else:
487 return s.connect(url)
488 return s.connect(url)
488
489
489 self.session.send(self._query_socket, 'connection_request')
490 self.session.send(self._query_socket, 'connection_request')
490 # use Poller because zmq.select has wrong units in pyzmq 2.1.7
491 # use Poller because zmq.select has wrong units in pyzmq 2.1.7
491 poller = zmq.Poller()
492 poller = zmq.Poller()
492 poller.register(self._query_socket, zmq.POLLIN)
493 poller.register(self._query_socket, zmq.POLLIN)
493 # poll expects milliseconds, timeout is seconds
494 # poll expects milliseconds, timeout is seconds
494 evts = poller.poll(timeout*1000)
495 evts = poller.poll(timeout*1000)
495 if not evts:
496 if not evts:
496 raise error.TimeoutError("Hub connection request timed out")
497 raise error.TimeoutError("Hub connection request timed out")
497 idents,msg = self.session.recv(self._query_socket,mode=0)
498 idents,msg = self.session.recv(self._query_socket,mode=0)
498 if self.debug:
499 if self.debug:
499 pprint(msg)
500 pprint(msg)
500 msg = Message(msg)
501 msg = Message(msg)
501 content = msg.content
502 content = msg.content
502 self._config['registration'] = dict(content)
503 self._config['registration'] = dict(content)
503 if content.status == 'ok':
504 if content.status == 'ok':
504 ident = self.session.bsession
505 ident = self.session.bsession
505 if content.mux:
506 if content.mux:
506 self._mux_socket = self._context.socket(zmq.DEALER)
507 self._mux_socket = self._context.socket(zmq.DEALER)
507 self._mux_socket.setsockopt(zmq.IDENTITY, ident)
508 self._mux_socket.setsockopt(zmq.IDENTITY, ident)
508 connect_socket(self._mux_socket, content.mux)
509 connect_socket(self._mux_socket, content.mux)
509 if content.task:
510 if content.task:
510 self._task_scheme, task_addr = content.task
511 self._task_scheme, task_addr = content.task
511 self._task_socket = self._context.socket(zmq.DEALER)
512 self._task_socket = self._context.socket(zmq.DEALER)
512 self._task_socket.setsockopt(zmq.IDENTITY, ident)
513 self._task_socket.setsockopt(zmq.IDENTITY, ident)
513 connect_socket(self._task_socket, task_addr)
514 connect_socket(self._task_socket, task_addr)
514 if content.notification:
515 if content.notification:
515 self._notification_socket = self._context.socket(zmq.SUB)
516 self._notification_socket = self._context.socket(zmq.SUB)
516 connect_socket(self._notification_socket, content.notification)
517 connect_socket(self._notification_socket, content.notification)
517 self._notification_socket.setsockopt(zmq.SUBSCRIBE, b'')
518 self._notification_socket.setsockopt(zmq.SUBSCRIBE, b'')
518 # if content.query:
519 # if content.query:
519 # self._query_socket = self._context.socket(zmq.DEALER)
520 # self._query_socket = self._context.socket(zmq.DEALER)
520 # self._query_socket.setsockopt(zmq.IDENTITY, self.session.bsession)
521 # self._query_socket.setsockopt(zmq.IDENTITY, self.session.bsession)
521 # connect_socket(self._query_socket, content.query)
522 # connect_socket(self._query_socket, content.query)
522 if content.control:
523 if content.control:
523 self._control_socket = self._context.socket(zmq.DEALER)
524 self._control_socket = self._context.socket(zmq.DEALER)
524 self._control_socket.setsockopt(zmq.IDENTITY, ident)
525 self._control_socket.setsockopt(zmq.IDENTITY, ident)
525 connect_socket(self._control_socket, content.control)
526 connect_socket(self._control_socket, content.control)
526 if content.iopub:
527 if content.iopub:
527 self._iopub_socket = self._context.socket(zmq.SUB)
528 self._iopub_socket = self._context.socket(zmq.SUB)
528 self._iopub_socket.setsockopt(zmq.SUBSCRIBE, b'')
529 self._iopub_socket.setsockopt(zmq.SUBSCRIBE, b'')
529 self._iopub_socket.setsockopt(zmq.IDENTITY, ident)
530 self._iopub_socket.setsockopt(zmq.IDENTITY, ident)
530 connect_socket(self._iopub_socket, content.iopub)
531 connect_socket(self._iopub_socket, content.iopub)
531 self._update_engines(dict(content.engines))
532 self._update_engines(dict(content.engines))
532 else:
533 else:
533 self._connected = False
534 self._connected = False
534 raise Exception("Failed to connect!")
535 raise Exception("Failed to connect!")
535
536
536 #--------------------------------------------------------------------------
537 #--------------------------------------------------------------------------
537 # handlers and callbacks for incoming messages
538 # handlers and callbacks for incoming messages
538 #--------------------------------------------------------------------------
539 #--------------------------------------------------------------------------
539
540
540 def _unwrap_exception(self, content):
541 def _unwrap_exception(self, content):
541 """unwrap exception, and remap engine_id to int."""
542 """unwrap exception, and remap engine_id to int."""
542 e = error.unwrap_exception(content)
543 e = error.unwrap_exception(content)
543 # print e.traceback
544 # print e.traceback
544 if e.engine_info:
545 if e.engine_info:
545 e_uuid = e.engine_info['engine_uuid']
546 e_uuid = e.engine_info['engine_uuid']
546 eid = self._engines[e_uuid]
547 eid = self._engines[e_uuid]
547 e.engine_info['engine_id'] = eid
548 e.engine_info['engine_id'] = eid
548 return e
549 return e
549
550
550 def _extract_metadata(self, header, parent, content):
551 def _extract_metadata(self, header, parent, content):
551 md = {'msg_id' : parent['msg_id'],
552 md = {'msg_id' : parent['msg_id'],
552 'received' : datetime.now(),
553 'received' : datetime.now(),
553 'engine_uuid' : header.get('engine', None),
554 'engine_uuid' : header.get('engine', None),
554 'follow' : parent.get('follow', []),
555 'follow' : parent.get('follow', []),
555 'after' : parent.get('after', []),
556 'after' : parent.get('after', []),
556 'status' : content['status'],
557 'status' : content['status'],
557 }
558 }
558
559
559 if md['engine_uuid'] is not None:
560 if md['engine_uuid'] is not None:
560 md['engine_id'] = self._engines.get(md['engine_uuid'], None)
561 md['engine_id'] = self._engines.get(md['engine_uuid'], None)
561
562
562 if 'date' in parent:
563 if 'date' in parent:
563 md['submitted'] = parent['date']
564 md['submitted'] = parent['date']
564 if 'started' in header:
565 if 'started' in header:
565 md['started'] = header['started']
566 md['started'] = header['started']
566 if 'date' in header:
567 if 'date' in header:
567 md['completed'] = header['date']
568 md['completed'] = header['date']
568 return md
569 return md
569
570
570 def _register_engine(self, msg):
571 def _register_engine(self, msg):
571 """Register a new engine, and update our connection info."""
572 """Register a new engine, and update our connection info."""
572 content = msg['content']
573 content = msg['content']
573 eid = content['id']
574 eid = content['id']
574 d = {eid : content['queue']}
575 d = {eid : content['queue']}
575 self._update_engines(d)
576 self._update_engines(d)
576
577
577 def _unregister_engine(self, msg):
578 def _unregister_engine(self, msg):
578 """Unregister an engine that has died."""
579 """Unregister an engine that has died."""
579 content = msg['content']
580 content = msg['content']
580 eid = int(content['id'])
581 eid = int(content['id'])
581 if eid in self._ids:
582 if eid in self._ids:
582 self._ids.remove(eid)
583 self._ids.remove(eid)
583 uuid = self._engines.pop(eid)
584 uuid = self._engines.pop(eid)
584
585
585 self._handle_stranded_msgs(eid, uuid)
586 self._handle_stranded_msgs(eid, uuid)
586
587
587 if self._task_socket and self._task_scheme == 'pure':
588 if self._task_socket and self._task_scheme == 'pure':
588 self._stop_scheduling_tasks()
589 self._stop_scheduling_tasks()
589
590
590 def _handle_stranded_msgs(self, eid, uuid):
591 def _handle_stranded_msgs(self, eid, uuid):
591 """Handle messages known to be on an engine when the engine unregisters.
592 """Handle messages known to be on an engine when the engine unregisters.
592
593
593 It is possible that this will fire prematurely - that is, an engine will
594 It is possible that this will fire prematurely - that is, an engine will
594 go down after completing a result, and the client will be notified
595 go down after completing a result, and the client will be notified
595 of the unregistration and later receive the successful result.
596 of the unregistration and later receive the successful result.
596 """
597 """
597
598
598 outstanding = self._outstanding_dict[uuid]
599 outstanding = self._outstanding_dict[uuid]
599
600
600 for msg_id in list(outstanding):
601 for msg_id in list(outstanding):
601 if msg_id in self.results:
602 if msg_id in self.results:
602 # we already
603 # we already
603 continue
604 continue
604 try:
605 try:
605 raise error.EngineError("Engine %r died while running task %r"%(eid, msg_id))
606 raise error.EngineError("Engine %r died while running task %r"%(eid, msg_id))
606 except:
607 except:
607 content = error.wrap_exception()
608 content = error.wrap_exception()
608 # build a fake message:
609 # build a fake message:
609 parent = {}
610 parent = {}
610 header = {}
611 header = {}
611 parent['msg_id'] = msg_id
612 parent['msg_id'] = msg_id
612 header['engine'] = uuid
613 header['engine'] = uuid
613 header['date'] = datetime.now()
614 header['date'] = datetime.now()
614 msg = dict(parent_header=parent, header=header, content=content)
615 msg = dict(parent_header=parent, header=header, content=content)
615 self._handle_apply_reply(msg)
616 self._handle_apply_reply(msg)
616
617
617 def _handle_execute_reply(self, msg):
618 def _handle_execute_reply(self, msg):
618 """Save the reply to an execute_request into our results.
619 """Save the reply to an execute_request into our results.
619
620
620 execute messages are never actually used. apply is used instead.
621 execute messages are never actually used. apply is used instead.
621 """
622 """
622
623
623 parent = msg['parent_header']
624 parent = msg['parent_header']
624 msg_id = parent['msg_id']
625 msg_id = parent['msg_id']
625 if msg_id not in self.outstanding:
626 if msg_id not in self.outstanding:
626 if msg_id in self.history:
627 if msg_id in self.history:
627 print ("got stale result: %s"%msg_id)
628 print ("got stale result: %s"%msg_id)
628 else:
629 else:
629 print ("got unknown result: %s"%msg_id)
630 print ("got unknown result: %s"%msg_id)
630 else:
631 else:
631 self.outstanding.remove(msg_id)
632 self.outstanding.remove(msg_id)
632
633
633 content = msg['content']
634 content = msg['content']
634 header = msg['header']
635 header = msg['header']
635
636
636 # construct metadata:
637 # construct metadata:
637 md = self.metadata[msg_id]
638 md = self.metadata[msg_id]
638 md.update(self._extract_metadata(header, parent, content))
639 md.update(self._extract_metadata(header, parent, content))
639 # is this redundant?
640 # is this redundant?
640 self.metadata[msg_id] = md
641 self.metadata[msg_id] = md
641
642
642 e_outstanding = self._outstanding_dict[md['engine_uuid']]
643 e_outstanding = self._outstanding_dict[md['engine_uuid']]
643 if msg_id in e_outstanding:
644 if msg_id in e_outstanding:
644 e_outstanding.remove(msg_id)
645 e_outstanding.remove(msg_id)
645
646
646 # construct result:
647 # construct result:
647 if content['status'] == 'ok':
648 if content['status'] == 'ok':
648 self.results[msg_id] = content
649 self.results[msg_id] = content
649 elif content['status'] == 'aborted':
650 elif content['status'] == 'aborted':
650 self.results[msg_id] = error.TaskAborted(msg_id)
651 self.results[msg_id] = error.TaskAborted(msg_id)
651 elif content['status'] == 'resubmitted':
652 elif content['status'] == 'resubmitted':
652 # TODO: handle resubmission
653 # TODO: handle resubmission
653 pass
654 pass
654 else:
655 else:
655 self.results[msg_id] = self._unwrap_exception(content)
656 self.results[msg_id] = self._unwrap_exception(content)
656
657
657 def _handle_apply_reply(self, msg):
658 def _handle_apply_reply(self, msg):
658 """Save the reply to an apply_request into our results."""
659 """Save the reply to an apply_request into our results."""
659 parent = msg['parent_header']
660 parent = msg['parent_header']
660 msg_id = parent['msg_id']
661 msg_id = parent['msg_id']
661 if msg_id not in self.outstanding:
662 if msg_id not in self.outstanding:
662 if msg_id in self.history:
663 if msg_id in self.history:
663 print ("got stale result: %s"%msg_id)
664 print ("got stale result: %s"%msg_id)
664 print self.results[msg_id]
665 print self.results[msg_id]
665 print msg
666 print msg
666 else:
667 else:
667 print ("got unknown result: %s"%msg_id)
668 print ("got unknown result: %s"%msg_id)
668 else:
669 else:
669 self.outstanding.remove(msg_id)
670 self.outstanding.remove(msg_id)
670 content = msg['content']
671 content = msg['content']
671 header = msg['header']
672 header = msg['header']
672
673
673 # construct metadata:
674 # construct metadata:
674 md = self.metadata[msg_id]
675 md = self.metadata[msg_id]
675 md.update(self._extract_metadata(header, parent, content))
676 md.update(self._extract_metadata(header, parent, content))
676 # is this redundant?
677 # is this redundant?
677 self.metadata[msg_id] = md
678 self.metadata[msg_id] = md
678
679
679 e_outstanding = self._outstanding_dict[md['engine_uuid']]
680 e_outstanding = self._outstanding_dict[md['engine_uuid']]
680 if msg_id in e_outstanding:
681 if msg_id in e_outstanding:
681 e_outstanding.remove(msg_id)
682 e_outstanding.remove(msg_id)
682
683
683 # construct result:
684 # construct result:
684 if content['status'] == 'ok':
685 if content['status'] == 'ok':
685 self.results[msg_id] = util.unserialize_object(msg['buffers'])[0]
686 self.results[msg_id] = util.unserialize_object(msg['buffers'])[0]
686 elif content['status'] == 'aborted':
687 elif content['status'] == 'aborted':
687 self.results[msg_id] = error.TaskAborted(msg_id)
688 self.results[msg_id] = error.TaskAborted(msg_id)
688 elif content['status'] == 'resubmitted':
689 elif content['status'] == 'resubmitted':
689 # TODO: handle resubmission
690 # TODO: handle resubmission
690 pass
691 pass
691 else:
692 else:
692 self.results[msg_id] = self._unwrap_exception(content)
693 self.results[msg_id] = self._unwrap_exception(content)
693
694
694 def _flush_notifications(self):
695 def _flush_notifications(self):
695 """Flush notifications of engine registrations waiting
696 """Flush notifications of engine registrations waiting
696 in ZMQ queue."""
697 in ZMQ queue."""
697 idents,msg = self.session.recv(self._notification_socket, mode=zmq.NOBLOCK)
698 idents,msg = self.session.recv(self._notification_socket, mode=zmq.NOBLOCK)
698 while msg is not None:
699 while msg is not None:
699 if self.debug:
700 if self.debug:
700 pprint(msg)
701 pprint(msg)
701 msg_type = msg['header']['msg_type']
702 msg_type = msg['header']['msg_type']
702 handler = self._notification_handlers.get(msg_type, None)
703 handler = self._notification_handlers.get(msg_type, None)
703 if handler is None:
704 if handler is None:
704 raise Exception("Unhandled message type: %s"%msg.msg_type)
705 raise Exception("Unhandled message type: %s"%msg.msg_type)
705 else:
706 else:
706 handler(msg)
707 handler(msg)
707 idents,msg = self.session.recv(self._notification_socket, mode=zmq.NOBLOCK)
708 idents,msg = self.session.recv(self._notification_socket, mode=zmq.NOBLOCK)
708
709
709 def _flush_results(self, sock):
710 def _flush_results(self, sock):
710 """Flush task or queue results waiting in ZMQ queue."""
711 """Flush task or queue results waiting in ZMQ queue."""
711 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
712 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
712 while msg is not None:
713 while msg is not None:
713 if self.debug:
714 if self.debug:
714 pprint(msg)
715 pprint(msg)
715 msg_type = msg['header']['msg_type']
716 msg_type = msg['header']['msg_type']
716 handler = self._queue_handlers.get(msg_type, None)
717 handler = self._queue_handlers.get(msg_type, None)
717 if handler is None:
718 if handler is None:
718 raise Exception("Unhandled message type: %s"%msg.msg_type)
719 raise Exception("Unhandled message type: %s"%msg.msg_type)
719 else:
720 else:
720 handler(msg)
721 handler(msg)
721 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
722 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
722
723
723 def _flush_control(self, sock):
724 def _flush_control(self, sock):
724 """Flush replies from the control channel waiting
725 """Flush replies from the control channel waiting
725 in the ZMQ queue.
726 in the ZMQ queue.
726
727
727 Currently: ignore them."""
728 Currently: ignore them."""
728 if self._ignored_control_replies <= 0:
729 if self._ignored_control_replies <= 0:
729 return
730 return
730 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
731 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
731 while msg is not None:
732 while msg is not None:
732 self._ignored_control_replies -= 1
733 self._ignored_control_replies -= 1
733 if self.debug:
734 if self.debug:
734 pprint(msg)
735 pprint(msg)
735 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
736 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
736
737
737 def _flush_ignored_control(self):
738 def _flush_ignored_control(self):
738 """flush ignored control replies"""
739 """flush ignored control replies"""
739 while self._ignored_control_replies > 0:
740 while self._ignored_control_replies > 0:
740 self.session.recv(self._control_socket)
741 self.session.recv(self._control_socket)
741 self._ignored_control_replies -= 1
742 self._ignored_control_replies -= 1
742
743
743 def _flush_ignored_hub_replies(self):
744 def _flush_ignored_hub_replies(self):
744 ident,msg = self.session.recv(self._query_socket, mode=zmq.NOBLOCK)
745 ident,msg = self.session.recv(self._query_socket, mode=zmq.NOBLOCK)
745 while msg is not None:
746 while msg is not None:
746 ident,msg = self.session.recv(self._query_socket, mode=zmq.NOBLOCK)
747 ident,msg = self.session.recv(self._query_socket, mode=zmq.NOBLOCK)
747
748
748 def _flush_iopub(self, sock):
749 def _flush_iopub(self, sock):
749 """Flush replies from the iopub channel waiting
750 """Flush replies from the iopub channel waiting
750 in the ZMQ queue.
751 in the ZMQ queue.
751 """
752 """
752 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
753 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
753 while msg is not None:
754 while msg is not None:
754 if self.debug:
755 if self.debug:
755 pprint(msg)
756 pprint(msg)
756 parent = msg['parent_header']
757 parent = msg['parent_header']
757 # ignore IOPub messages with no parent.
758 # ignore IOPub messages with no parent.
758 # Caused by print statements or warnings from before the first execution.
759 # Caused by print statements or warnings from before the first execution.
759 if not parent:
760 if not parent:
760 continue
761 continue
761 msg_id = parent['msg_id']
762 msg_id = parent['msg_id']
762 content = msg['content']
763 content = msg['content']
763 header = msg['header']
764 header = msg['header']
764 msg_type = msg['header']['msg_type']
765 msg_type = msg['header']['msg_type']
765
766
766 # init metadata:
767 # init metadata:
767 md = self.metadata[msg_id]
768 md = self.metadata[msg_id]
768
769
769 if msg_type == 'stream':
770 if msg_type == 'stream':
770 name = content['name']
771 name = content['name']
771 s = md[name] or ''
772 s = md[name] or ''
772 md[name] = s + content['data']
773 md[name] = s + content['data']
773 elif msg_type == 'pyerr':
774 elif msg_type == 'pyerr':
774 md.update({'pyerr' : self._unwrap_exception(content)})
775 md.update({'pyerr' : self._unwrap_exception(content)})
775 elif msg_type == 'pyin':
776 elif msg_type == 'pyin':
776 md.update({'pyin' : content['code']})
777 md.update({'pyin' : content['code']})
777 elif msg_type == 'display_data':
778 elif msg_type == 'display_data':
778 md['outputs'].append(content.get('data'))
779 md['outputs'].append(content.get('data'))
779 elif msg_type == 'pyout':
780 elif msg_type == 'pyout':
780 md['pyout'] = content.get('data')
781 md['pyout'] = content.get('data')
781 else:
782 else:
782 # unhandled msg_type (status, etc.)
783 # unhandled msg_type (status, etc.)
783 pass
784 pass
784
785
785 # reduntant?
786 # reduntant?
786 self.metadata[msg_id] = md
787 self.metadata[msg_id] = md
787
788
788 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
789 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
789
790
790 #--------------------------------------------------------------------------
791 #--------------------------------------------------------------------------
791 # len, getitem
792 # len, getitem
792 #--------------------------------------------------------------------------
793 #--------------------------------------------------------------------------
793
794
794 def __len__(self):
795 def __len__(self):
795 """len(client) returns # of engines."""
796 """len(client) returns # of engines."""
796 return len(self.ids)
797 return len(self.ids)
797
798
798 def __getitem__(self, key):
799 def __getitem__(self, key):
799 """index access returns DirectView multiplexer objects
800 """index access returns DirectView multiplexer objects
800
801
801 Must be int, slice, or list/tuple/xrange of ints"""
802 Must be int, slice, or list/tuple/xrange of ints"""
802 if not isinstance(key, (int, slice, tuple, list, xrange)):
803 if not isinstance(key, (int, slice, tuple, list, xrange)):
803 raise TypeError("key by int/slice/iterable of ints only, not %s"%(type(key)))
804 raise TypeError("key by int/slice/iterable of ints only, not %s"%(type(key)))
804 else:
805 else:
805 return self.direct_view(key)
806 return self.direct_view(key)
806
807
807 #--------------------------------------------------------------------------
808 #--------------------------------------------------------------------------
808 # Begin public methods
809 # Begin public methods
809 #--------------------------------------------------------------------------
810 #--------------------------------------------------------------------------
810
811
811 @property
812 @property
812 def ids(self):
813 def ids(self):
813 """Always up-to-date ids property."""
814 """Always up-to-date ids property."""
814 self._flush_notifications()
815 self._flush_notifications()
815 # always copy:
816 # always copy:
816 return list(self._ids)
817 return list(self._ids)
817
818
818 def close(self):
819 def close(self):
819 if self._closed:
820 if self._closed:
820 return
821 return
821 self.stop_spin_thread()
822 self.stop_spin_thread()
822 snames = filter(lambda n: n.endswith('socket'), dir(self))
823 snames = filter(lambda n: n.endswith('socket'), dir(self))
823 for socket in map(lambda name: getattr(self, name), snames):
824 for socket in map(lambda name: getattr(self, name), snames):
824 if isinstance(socket, zmq.Socket) and not socket.closed:
825 if isinstance(socket, zmq.Socket) and not socket.closed:
825 socket.close()
826 socket.close()
826 self._closed = True
827 self._closed = True
827
828
828 def _spin_every(self, interval=1):
829 def _spin_every(self, interval=1):
829 """target func for use in spin_thread"""
830 """target func for use in spin_thread"""
830 while True:
831 while True:
831 if self._stop_spinning.is_set():
832 if self._stop_spinning.is_set():
832 return
833 return
833 time.sleep(interval)
834 time.sleep(interval)
834 self.spin()
835 self.spin()
835
836
836 def spin_thread(self, interval=1):
837 def spin_thread(self, interval=1):
837 """call Client.spin() in a background thread on some regular interval
838 """call Client.spin() in a background thread on some regular interval
838
839
839 This helps ensure that messages don't pile up too much in the zmq queue
840 This helps ensure that messages don't pile up too much in the zmq queue
840 while you are working on other things, or just leaving an idle terminal.
841 while you are working on other things, or just leaving an idle terminal.
841
842
842 It also helps limit potential padding of the `received` timestamp
843 It also helps limit potential padding of the `received` timestamp
843 on AsyncResult objects, used for timings.
844 on AsyncResult objects, used for timings.
844
845
845 Parameters
846 Parameters
846 ----------
847 ----------
847
848
848 interval : float, optional
849 interval : float, optional
849 The interval on which to spin the client in the background thread
850 The interval on which to spin the client in the background thread
850 (simply passed to time.sleep).
851 (simply passed to time.sleep).
851
852
852 Notes
853 Notes
853 -----
854 -----
854
855
855 For precision timing, you may want to use this method to put a bound
856 For precision timing, you may want to use this method to put a bound
856 on the jitter (in seconds) in `received` timestamps used
857 on the jitter (in seconds) in `received` timestamps used
857 in AsyncResult.wall_time.
858 in AsyncResult.wall_time.
858
859
859 """
860 """
860 if self._spin_thread is not None:
861 if self._spin_thread is not None:
861 self.stop_spin_thread()
862 self.stop_spin_thread()
862 self._stop_spinning.clear()
863 self._stop_spinning.clear()
863 self._spin_thread = Thread(target=self._spin_every, args=(interval,))
864 self._spin_thread = Thread(target=self._spin_every, args=(interval,))
864 self._spin_thread.daemon = True
865 self._spin_thread.daemon = True
865 self._spin_thread.start()
866 self._spin_thread.start()
866
867
867 def stop_spin_thread(self):
868 def stop_spin_thread(self):
868 """stop background spin_thread, if any"""
869 """stop background spin_thread, if any"""
869 if self._spin_thread is not None:
870 if self._spin_thread is not None:
870 self._stop_spinning.set()
871 self._stop_spinning.set()
871 self._spin_thread.join()
872 self._spin_thread.join()
872 self._spin_thread = None
873 self._spin_thread = None
873
874
874 def spin(self):
875 def spin(self):
875 """Flush any registration notifications and execution results
876 """Flush any registration notifications and execution results
876 waiting in the ZMQ queue.
877 waiting in the ZMQ queue.
877 """
878 """
878 if self._notification_socket:
879 if self._notification_socket:
879 self._flush_notifications()
880 self._flush_notifications()
880 if self._mux_socket:
881 if self._mux_socket:
881 self._flush_results(self._mux_socket)
882 self._flush_results(self._mux_socket)
882 if self._task_socket:
883 if self._task_socket:
883 self._flush_results(self._task_socket)
884 self._flush_results(self._task_socket)
884 if self._control_socket:
885 if self._control_socket:
885 self._flush_control(self._control_socket)
886 self._flush_control(self._control_socket)
886 if self._iopub_socket:
887 if self._iopub_socket:
887 self._flush_iopub(self._iopub_socket)
888 self._flush_iopub(self._iopub_socket)
888 if self._query_socket:
889 if self._query_socket:
889 self._flush_ignored_hub_replies()
890 self._flush_ignored_hub_replies()
890
891
891 def wait(self, jobs=None, timeout=-1):
892 def wait(self, jobs=None, timeout=-1):
892 """waits on one or more `jobs`, for up to `timeout` seconds.
893 """waits on one or more `jobs`, for up to `timeout` seconds.
893
894
894 Parameters
895 Parameters
895 ----------
896 ----------
896
897
897 jobs : int, str, or list of ints and/or strs, or one or more AsyncResult objects
898 jobs : int, str, or list of ints and/or strs, or one or more AsyncResult objects
898 ints are indices to self.history
899 ints are indices to self.history
899 strs are msg_ids
900 strs are msg_ids
900 default: wait on all outstanding messages
901 default: wait on all outstanding messages
901 timeout : float
902 timeout : float
902 a time in seconds, after which to give up.
903 a time in seconds, after which to give up.
903 default is -1, which means no timeout
904 default is -1, which means no timeout
904
905
905 Returns
906 Returns
906 -------
907 -------
907
908
908 True : when all msg_ids are done
909 True : when all msg_ids are done
909 False : timeout reached, some msg_ids still outstanding
910 False : timeout reached, some msg_ids still outstanding
910 """
911 """
911 tic = time.time()
912 tic = time.time()
912 if jobs is None:
913 if jobs is None:
913 theids = self.outstanding
914 theids = self.outstanding
914 else:
915 else:
915 if isinstance(jobs, (int, basestring, AsyncResult)):
916 if isinstance(jobs, (int, basestring, AsyncResult)):
916 jobs = [jobs]
917 jobs = [jobs]
917 theids = set()
918 theids = set()
918 for job in jobs:
919 for job in jobs:
919 if isinstance(job, int):
920 if isinstance(job, int):
920 # index access
921 # index access
921 job = self.history[job]
922 job = self.history[job]
922 elif isinstance(job, AsyncResult):
923 elif isinstance(job, AsyncResult):
923 map(theids.add, job.msg_ids)
924 map(theids.add, job.msg_ids)
924 continue
925 continue
925 theids.add(job)
926 theids.add(job)
926 if not theids.intersection(self.outstanding):
927 if not theids.intersection(self.outstanding):
927 return True
928 return True
928 self.spin()
929 self.spin()
929 while theids.intersection(self.outstanding):
930 while theids.intersection(self.outstanding):
930 if timeout >= 0 and ( time.time()-tic ) > timeout:
931 if timeout >= 0 and ( time.time()-tic ) > timeout:
931 break
932 break
932 time.sleep(1e-3)
933 time.sleep(1e-3)
933 self.spin()
934 self.spin()
934 return len(theids.intersection(self.outstanding)) == 0
935 return len(theids.intersection(self.outstanding)) == 0
935
936
936 #--------------------------------------------------------------------------
937 #--------------------------------------------------------------------------
937 # Control methods
938 # Control methods
938 #--------------------------------------------------------------------------
939 #--------------------------------------------------------------------------
939
940
940 @spin_first
941 @spin_first
941 def clear(self, targets=None, block=None):
942 def clear(self, targets=None, block=None):
942 """Clear the namespace in target(s)."""
943 """Clear the namespace in target(s)."""
943 block = self.block if block is None else block
944 block = self.block if block is None else block
944 targets = self._build_targets(targets)[0]
945 targets = self._build_targets(targets)[0]
945 for t in targets:
946 for t in targets:
946 self.session.send(self._control_socket, 'clear_request', content={}, ident=t)
947 self.session.send(self._control_socket, 'clear_request', content={}, ident=t)
947 error = False
948 error = False
948 if block:
949 if block:
949 self._flush_ignored_control()
950 self._flush_ignored_control()
950 for i in range(len(targets)):
951 for i in range(len(targets)):
951 idents,msg = self.session.recv(self._control_socket,0)
952 idents,msg = self.session.recv(self._control_socket,0)
952 if self.debug:
953 if self.debug:
953 pprint(msg)
954 pprint(msg)
954 if msg['content']['status'] != 'ok':
955 if msg['content']['status'] != 'ok':
955 error = self._unwrap_exception(msg['content'])
956 error = self._unwrap_exception(msg['content'])
956 else:
957 else:
957 self._ignored_control_replies += len(targets)
958 self._ignored_control_replies += len(targets)
958 if error:
959 if error:
959 raise error
960 raise error
960
961
961
962
962 @spin_first
963 @spin_first
963 def abort(self, jobs=None, targets=None, block=None):
964 def abort(self, jobs=None, targets=None, block=None):
964 """Abort specific jobs from the execution queues of target(s).
965 """Abort specific jobs from the execution queues of target(s).
965
966
966 This is a mechanism to prevent jobs that have already been submitted
967 This is a mechanism to prevent jobs that have already been submitted
967 from executing.
968 from executing.
968
969
969 Parameters
970 Parameters
970 ----------
971 ----------
971
972
972 jobs : msg_id, list of msg_ids, or AsyncResult
973 jobs : msg_id, list of msg_ids, or AsyncResult
973 The jobs to be aborted
974 The jobs to be aborted
974
975
975 If unspecified/None: abort all outstanding jobs.
976 If unspecified/None: abort all outstanding jobs.
976
977
977 """
978 """
978 block = self.block if block is None else block
979 block = self.block if block is None else block
979 jobs = jobs if jobs is not None else list(self.outstanding)
980 jobs = jobs if jobs is not None else list(self.outstanding)
980 targets = self._build_targets(targets)[0]
981 targets = self._build_targets(targets)[0]
981
982
982 msg_ids = []
983 msg_ids = []
983 if isinstance(jobs, (basestring,AsyncResult)):
984 if isinstance(jobs, (basestring,AsyncResult)):
984 jobs = [jobs]
985 jobs = [jobs]
985 bad_ids = filter(lambda obj: not isinstance(obj, (basestring, AsyncResult)), jobs)
986 bad_ids = filter(lambda obj: not isinstance(obj, (basestring, AsyncResult)), jobs)
986 if bad_ids:
987 if bad_ids:
987 raise TypeError("Invalid msg_id type %r, expected str or AsyncResult"%bad_ids[0])
988 raise TypeError("Invalid msg_id type %r, expected str or AsyncResult"%bad_ids[0])
988 for j in jobs:
989 for j in jobs:
989 if isinstance(j, AsyncResult):
990 if isinstance(j, AsyncResult):
990 msg_ids.extend(j.msg_ids)
991 msg_ids.extend(j.msg_ids)
991 else:
992 else:
992 msg_ids.append(j)
993 msg_ids.append(j)
993 content = dict(msg_ids=msg_ids)
994 content = dict(msg_ids=msg_ids)
994 for t in targets:
995 for t in targets:
995 self.session.send(self._control_socket, 'abort_request',
996 self.session.send(self._control_socket, 'abort_request',
996 content=content, ident=t)
997 content=content, ident=t)
997 error = False
998 error = False
998 if block:
999 if block:
999 self._flush_ignored_control()
1000 self._flush_ignored_control()
1000 for i in range(len(targets)):
1001 for i in range(len(targets)):
1001 idents,msg = self.session.recv(self._control_socket,0)
1002 idents,msg = self.session.recv(self._control_socket,0)
1002 if self.debug:
1003 if self.debug:
1003 pprint(msg)
1004 pprint(msg)
1004 if msg['content']['status'] != 'ok':
1005 if msg['content']['status'] != 'ok':
1005 error = self._unwrap_exception(msg['content'])
1006 error = self._unwrap_exception(msg['content'])
1006 else:
1007 else:
1007 self._ignored_control_replies += len(targets)
1008 self._ignored_control_replies += len(targets)
1008 if error:
1009 if error:
1009 raise error
1010 raise error
1010
1011
1011 @spin_first
1012 @spin_first
1012 def shutdown(self, targets=None, restart=False, hub=False, block=None):
1013 def shutdown(self, targets=None, restart=False, hub=False, block=None):
1013 """Terminates one or more engine processes, optionally including the hub."""
1014 """Terminates one or more engine processes, optionally including the hub."""
1014 block = self.block if block is None else block
1015 block = self.block if block is None else block
1015 if hub:
1016 if hub:
1016 targets = 'all'
1017 targets = 'all'
1017 targets = self._build_targets(targets)[0]
1018 targets = self._build_targets(targets)[0]
1018 for t in targets:
1019 for t in targets:
1019 self.session.send(self._control_socket, 'shutdown_request',
1020 self.session.send(self._control_socket, 'shutdown_request',
1020 content={'restart':restart},ident=t)
1021 content={'restart':restart},ident=t)
1021 error = False
1022 error = False
1022 if block or hub:
1023 if block or hub:
1023 self._flush_ignored_control()
1024 self._flush_ignored_control()
1024 for i in range(len(targets)):
1025 for i in range(len(targets)):
1025 idents,msg = self.session.recv(self._control_socket, 0)
1026 idents,msg = self.session.recv(self._control_socket, 0)
1026 if self.debug:
1027 if self.debug:
1027 pprint(msg)
1028 pprint(msg)
1028 if msg['content']['status'] != 'ok':
1029 if msg['content']['status'] != 'ok':
1029 error = self._unwrap_exception(msg['content'])
1030 error = self._unwrap_exception(msg['content'])
1030 else:
1031 else:
1031 self._ignored_control_replies += len(targets)
1032 self._ignored_control_replies += len(targets)
1032
1033
1033 if hub:
1034 if hub:
1034 time.sleep(0.25)
1035 time.sleep(0.25)
1035 self.session.send(self._query_socket, 'shutdown_request')
1036 self.session.send(self._query_socket, 'shutdown_request')
1036 idents,msg = self.session.recv(self._query_socket, 0)
1037 idents,msg = self.session.recv(self._query_socket, 0)
1037 if self.debug:
1038 if self.debug:
1038 pprint(msg)
1039 pprint(msg)
1039 if msg['content']['status'] != 'ok':
1040 if msg['content']['status'] != 'ok':
1040 error = self._unwrap_exception(msg['content'])
1041 error = self._unwrap_exception(msg['content'])
1041
1042
1042 if error:
1043 if error:
1043 raise error
1044 raise error
1044
1045
1045 #--------------------------------------------------------------------------
1046 #--------------------------------------------------------------------------
1046 # Execution related methods
1047 # Execution related methods
1047 #--------------------------------------------------------------------------
1048 #--------------------------------------------------------------------------
1048
1049
1049 def _maybe_raise(self, result):
1050 def _maybe_raise(self, result):
1050 """wrapper for maybe raising an exception if apply failed."""
1051 """wrapper for maybe raising an exception if apply failed."""
1051 if isinstance(result, error.RemoteError):
1052 if isinstance(result, error.RemoteError):
1052 raise result
1053 raise result
1053
1054
1054 return result
1055 return result
1055
1056
1056 def send_apply_request(self, socket, f, args=None, kwargs=None, subheader=None, track=False,
1057 def send_apply_request(self, socket, f, args=None, kwargs=None, subheader=None, track=False,
1057 ident=None):
1058 ident=None):
1058 """construct and send an apply message via a socket.
1059 """construct and send an apply message via a socket.
1059
1060
1060 This is the principal method with which all engine execution is performed by views.
1061 This is the principal method with which all engine execution is performed by views.
1061 """
1062 """
1062
1063
1063 assert not self._closed, "cannot use me anymore, I'm closed!"
1064 assert not self._closed, "cannot use me anymore, I'm closed!"
1064 # defaults:
1065 # defaults:
1065 args = args if args is not None else []
1066 args = args if args is not None else []
1066 kwargs = kwargs if kwargs is not None else {}
1067 kwargs = kwargs if kwargs is not None else {}
1067 subheader = subheader if subheader is not None else {}
1068 subheader = subheader if subheader is not None else {}
1068
1069
1069 # validate arguments
1070 # validate arguments
1070 if not callable(f) and not isinstance(f, Reference):
1071 if not callable(f) and not isinstance(f, Reference):
1071 raise TypeError("f must be callable, not %s"%type(f))
1072 raise TypeError("f must be callable, not %s"%type(f))
1072 if not isinstance(args, (tuple, list)):
1073 if not isinstance(args, (tuple, list)):
1073 raise TypeError("args must be tuple or list, not %s"%type(args))
1074 raise TypeError("args must be tuple or list, not %s"%type(args))
1074 if not isinstance(kwargs, dict):
1075 if not isinstance(kwargs, dict):
1075 raise TypeError("kwargs must be dict, not %s"%type(kwargs))
1076 raise TypeError("kwargs must be dict, not %s"%type(kwargs))
1076 if not isinstance(subheader, dict):
1077 if not isinstance(subheader, dict):
1077 raise TypeError("subheader must be dict, not %s"%type(subheader))
1078 raise TypeError("subheader must be dict, not %s"%type(subheader))
1078
1079
1079 bufs = util.pack_apply_message(f,args,kwargs)
1080 bufs = util.pack_apply_message(f,args,kwargs)
1080
1081
1081 msg = self.session.send(socket, "apply_request", buffers=bufs, ident=ident,
1082 msg = self.session.send(socket, "apply_request", buffers=bufs, ident=ident,
1082 subheader=subheader, track=track)
1083 subheader=subheader, track=track)
1083
1084
1084 msg_id = msg['header']['msg_id']
1085 msg_id = msg['header']['msg_id']
1085 self.outstanding.add(msg_id)
1086 self.outstanding.add(msg_id)
1086 if ident:
1087 if ident:
1087 # possibly routed to a specific engine
1088 # possibly routed to a specific engine
1088 if isinstance(ident, list):
1089 if isinstance(ident, list):
1089 ident = ident[-1]
1090 ident = ident[-1]
1090 if ident in self._engines.values():
1091 if ident in self._engines.values():
1091 # save for later, in case of engine death
1092 # save for later, in case of engine death
1092 self._outstanding_dict[ident].add(msg_id)
1093 self._outstanding_dict[ident].add(msg_id)
1093 self.history.append(msg_id)
1094 self.history.append(msg_id)
1094 self.metadata[msg_id]['submitted'] = datetime.now()
1095 self.metadata[msg_id]['submitted'] = datetime.now()
1095
1096
1096 return msg
1097 return msg
1097
1098
1098 def send_execute_request(self, socket, code, silent=True, subheader=None, ident=None):
1099 def send_execute_request(self, socket, code, silent=True, subheader=None, ident=None):
1099 """construct and send an execute request via a socket.
1100 """construct and send an execute request via a socket.
1100
1101
1101 """
1102 """
1102
1103
1103 assert not self._closed, "cannot use me anymore, I'm closed!"
1104 assert not self._closed, "cannot use me anymore, I'm closed!"
1104 # defaults:
1105 # defaults:
1105 subheader = subheader if subheader is not None else {}
1106 subheader = subheader if subheader is not None else {}
1106
1107
1107 # validate arguments
1108 # validate arguments
1108 if not isinstance(code, basestring):
1109 if not isinstance(code, basestring):
1109 raise TypeError("code must be text, not %s" % type(code))
1110 raise TypeError("code must be text, not %s" % type(code))
1110 if not isinstance(subheader, dict):
1111 if not isinstance(subheader, dict):
1111 raise TypeError("subheader must be dict, not %s" % type(subheader))
1112 raise TypeError("subheader must be dict, not %s" % type(subheader))
1112
1113
1113 content = dict(code=code, silent=bool(silent), user_variables=[], user_expressions={})
1114 content = dict(code=code, silent=bool(silent), user_variables=[], user_expressions={})
1114
1115
1115
1116
1116 msg = self.session.send(socket, "execute_request", content=content, ident=ident,
1117 msg = self.session.send(socket, "execute_request", content=content, ident=ident,
1117 subheader=subheader)
1118 subheader=subheader)
1118
1119
1119 msg_id = msg['header']['msg_id']
1120 msg_id = msg['header']['msg_id']
1120 self.outstanding.add(msg_id)
1121 self.outstanding.add(msg_id)
1121 if ident:
1122 if ident:
1122 # possibly routed to a specific engine
1123 # possibly routed to a specific engine
1123 if isinstance(ident, list):
1124 if isinstance(ident, list):
1124 ident = ident[-1]
1125 ident = ident[-1]
1125 if ident in self._engines.values():
1126 if ident in self._engines.values():
1126 # save for later, in case of engine death
1127 # save for later, in case of engine death
1127 self._outstanding_dict[ident].add(msg_id)
1128 self._outstanding_dict[ident].add(msg_id)
1128 self.history.append(msg_id)
1129 self.history.append(msg_id)
1129 self.metadata[msg_id]['submitted'] = datetime.now()
1130 self.metadata[msg_id]['submitted'] = datetime.now()
1130
1131
1131 return msg
1132 return msg
1132
1133
1133 #--------------------------------------------------------------------------
1134 #--------------------------------------------------------------------------
1134 # construct a View object
1135 # construct a View object
1135 #--------------------------------------------------------------------------
1136 #--------------------------------------------------------------------------
1136
1137
1137 def load_balanced_view(self, targets=None):
1138 def load_balanced_view(self, targets=None):
1138 """construct a DirectView object.
1139 """construct a DirectView object.
1139
1140
1140 If no arguments are specified, create a LoadBalancedView
1141 If no arguments are specified, create a LoadBalancedView
1141 using all engines.
1142 using all engines.
1142
1143
1143 Parameters
1144 Parameters
1144 ----------
1145 ----------
1145
1146
1146 targets: list,slice,int,etc. [default: use all engines]
1147 targets: list,slice,int,etc. [default: use all engines]
1147 The subset of engines across which to load-balance
1148 The subset of engines across which to load-balance
1148 """
1149 """
1149 if targets == 'all':
1150 if targets == 'all':
1150 targets = None
1151 targets = None
1151 if targets is not None:
1152 if targets is not None:
1152 targets = self._build_targets(targets)[1]
1153 targets = self._build_targets(targets)[1]
1153 return LoadBalancedView(client=self, socket=self._task_socket, targets=targets)
1154 return LoadBalancedView(client=self, socket=self._task_socket, targets=targets)
1154
1155
1155 def direct_view(self, targets='all'):
1156 def direct_view(self, targets='all'):
1156 """construct a DirectView object.
1157 """construct a DirectView object.
1157
1158
1158 If no targets are specified, create a DirectView using all engines.
1159 If no targets are specified, create a DirectView using all engines.
1159
1160
1160 rc.direct_view('all') is distinguished from rc[:] in that 'all' will
1161 rc.direct_view('all') is distinguished from rc[:] in that 'all' will
1161 evaluate the target engines at each execution, whereas rc[:] will connect to
1162 evaluate the target engines at each execution, whereas rc[:] will connect to
1162 all *current* engines, and that list will not change.
1163 all *current* engines, and that list will not change.
1163
1164
1164 That is, 'all' will always use all engines, whereas rc[:] will not use
1165 That is, 'all' will always use all engines, whereas rc[:] will not use
1165 engines added after the DirectView is constructed.
1166 engines added after the DirectView is constructed.
1166
1167
1167 Parameters
1168 Parameters
1168 ----------
1169 ----------
1169
1170
1170 targets: list,slice,int,etc. [default: use all engines]
1171 targets: list,slice,int,etc. [default: use all engines]
1171 The engines to use for the View
1172 The engines to use for the View
1172 """
1173 """
1173 single = isinstance(targets, int)
1174 single = isinstance(targets, int)
1174 # allow 'all' to be lazily evaluated at each execution
1175 # allow 'all' to be lazily evaluated at each execution
1175 if targets != 'all':
1176 if targets != 'all':
1176 targets = self._build_targets(targets)[1]
1177 targets = self._build_targets(targets)[1]
1177 if single:
1178 if single:
1178 targets = targets[0]
1179 targets = targets[0]
1179 return DirectView(client=self, socket=self._mux_socket, targets=targets)
1180 return DirectView(client=self, socket=self._mux_socket, targets=targets)
1180
1181
1181 #--------------------------------------------------------------------------
1182 #--------------------------------------------------------------------------
1182 # Query methods
1183 # Query methods
1183 #--------------------------------------------------------------------------
1184 #--------------------------------------------------------------------------
1184
1185
1185 @spin_first
1186 @spin_first
1186 def get_result(self, indices_or_msg_ids=None, block=None):
1187 def get_result(self, indices_or_msg_ids=None, block=None):
1187 """Retrieve a result by msg_id or history index, wrapped in an AsyncResult object.
1188 """Retrieve a result by msg_id or history index, wrapped in an AsyncResult object.
1188
1189
1189 If the client already has the results, no request to the Hub will be made.
1190 If the client already has the results, no request to the Hub will be made.
1190
1191
1191 This is a convenient way to construct AsyncResult objects, which are wrappers
1192 This is a convenient way to construct AsyncResult objects, which are wrappers
1192 that include metadata about execution, and allow for awaiting results that
1193 that include metadata about execution, and allow for awaiting results that
1193 were not submitted by this Client.
1194 were not submitted by this Client.
1194
1195
1195 It can also be a convenient way to retrieve the metadata associated with
1196 It can also be a convenient way to retrieve the metadata associated with
1196 blocking execution, since it always retrieves
1197 blocking execution, since it always retrieves
1197
1198
1198 Examples
1199 Examples
1199 --------
1200 --------
1200 ::
1201 ::
1201
1202
1202 In [10]: r = client.apply()
1203 In [10]: r = client.apply()
1203
1204
1204 Parameters
1205 Parameters
1205 ----------
1206 ----------
1206
1207
1207 indices_or_msg_ids : integer history index, str msg_id, or list of either
1208 indices_or_msg_ids : integer history index, str msg_id, or list of either
1208 The indices or msg_ids of indices to be retrieved
1209 The indices or msg_ids of indices to be retrieved
1209
1210
1210 block : bool
1211 block : bool
1211 Whether to wait for the result to be done
1212 Whether to wait for the result to be done
1212
1213
1213 Returns
1214 Returns
1214 -------
1215 -------
1215
1216
1216 AsyncResult
1217 AsyncResult
1217 A single AsyncResult object will always be returned.
1218 A single AsyncResult object will always be returned.
1218
1219
1219 AsyncHubResult
1220 AsyncHubResult
1220 A subclass of AsyncResult that retrieves results from the Hub
1221 A subclass of AsyncResult that retrieves results from the Hub
1221
1222
1222 """
1223 """
1223 block = self.block if block is None else block
1224 block = self.block if block is None else block
1224 if indices_or_msg_ids is None:
1225 if indices_or_msg_ids is None:
1225 indices_or_msg_ids = -1
1226 indices_or_msg_ids = -1
1226
1227
1227 if not isinstance(indices_or_msg_ids, (list,tuple)):
1228 if not isinstance(indices_or_msg_ids, (list,tuple)):
1228 indices_or_msg_ids = [indices_or_msg_ids]
1229 indices_or_msg_ids = [indices_or_msg_ids]
1229
1230
1230 theids = []
1231 theids = []
1231 for id in indices_or_msg_ids:
1232 for id in indices_or_msg_ids:
1232 if isinstance(id, int):
1233 if isinstance(id, int):
1233 id = self.history[id]
1234 id = self.history[id]
1234 if not isinstance(id, basestring):
1235 if not isinstance(id, basestring):
1235 raise TypeError("indices must be str or int, not %r"%id)
1236 raise TypeError("indices must be str or int, not %r"%id)
1236 theids.append(id)
1237 theids.append(id)
1237
1238
1238 local_ids = filter(lambda msg_id: msg_id in self.history or msg_id in self.results, theids)
1239 local_ids = filter(lambda msg_id: msg_id in self.history or msg_id in self.results, theids)
1239 remote_ids = filter(lambda msg_id: msg_id not in local_ids, theids)
1240 remote_ids = filter(lambda msg_id: msg_id not in local_ids, theids)
1240
1241
1241 if remote_ids:
1242 if remote_ids:
1242 ar = AsyncHubResult(self, msg_ids=theids)
1243 ar = AsyncHubResult(self, msg_ids=theids)
1243 else:
1244 else:
1244 ar = AsyncResult(self, msg_ids=theids)
1245 ar = AsyncResult(self, msg_ids=theids)
1245
1246
1246 if block:
1247 if block:
1247 ar.wait()
1248 ar.wait()
1248
1249
1249 return ar
1250 return ar
1250
1251
1251 @spin_first
1252 @spin_first
1252 def resubmit(self, indices_or_msg_ids=None, subheader=None, block=None):
1253 def resubmit(self, indices_or_msg_ids=None, subheader=None, block=None):
1253 """Resubmit one or more tasks.
1254 """Resubmit one or more tasks.
1254
1255
1255 in-flight tasks may not be resubmitted.
1256 in-flight tasks may not be resubmitted.
1256
1257
1257 Parameters
1258 Parameters
1258 ----------
1259 ----------
1259
1260
1260 indices_or_msg_ids : integer history index, str msg_id, or list of either
1261 indices_or_msg_ids : integer history index, str msg_id, or list of either
1261 The indices or msg_ids of indices to be retrieved
1262 The indices or msg_ids of indices to be retrieved
1262
1263
1263 block : bool
1264 block : bool
1264 Whether to wait for the result to be done
1265 Whether to wait for the result to be done
1265
1266
1266 Returns
1267 Returns
1267 -------
1268 -------
1268
1269
1269 AsyncHubResult
1270 AsyncHubResult
1270 A subclass of AsyncResult that retrieves results from the Hub
1271 A subclass of AsyncResult that retrieves results from the Hub
1271
1272
1272 """
1273 """
1273 block = self.block if block is None else block
1274 block = self.block if block is None else block
1274 if indices_or_msg_ids is None:
1275 if indices_or_msg_ids is None:
1275 indices_or_msg_ids = -1
1276 indices_or_msg_ids = -1
1276
1277
1277 if not isinstance(indices_or_msg_ids, (list,tuple)):
1278 if not isinstance(indices_or_msg_ids, (list,tuple)):
1278 indices_or_msg_ids = [indices_or_msg_ids]
1279 indices_or_msg_ids = [indices_or_msg_ids]
1279
1280
1280 theids = []
1281 theids = []
1281 for id in indices_or_msg_ids:
1282 for id in indices_or_msg_ids:
1282 if isinstance(id, int):
1283 if isinstance(id, int):
1283 id = self.history[id]
1284 id = self.history[id]
1284 if not isinstance(id, basestring):
1285 if not isinstance(id, basestring):
1285 raise TypeError("indices must be str or int, not %r"%id)
1286 raise TypeError("indices must be str or int, not %r"%id)
1286 theids.append(id)
1287 theids.append(id)
1287
1288
1288 for msg_id in theids:
1289 for msg_id in theids:
1289 self.outstanding.discard(msg_id)
1290 self.outstanding.discard(msg_id)
1290 if msg_id in self.history:
1291 if msg_id in self.history:
1291 self.history.remove(msg_id)
1292 self.history.remove(msg_id)
1292 self.results.pop(msg_id, None)
1293 self.results.pop(msg_id, None)
1293 self.metadata.pop(msg_id, None)
1294 self.metadata.pop(msg_id, None)
1294 content = dict(msg_ids = theids)
1295 content = dict(msg_ids = theids)
1295
1296
1296 self.session.send(self._query_socket, 'resubmit_request', content)
1297 self.session.send(self._query_socket, 'resubmit_request', content)
1297
1298
1298 zmq.select([self._query_socket], [], [])
1299 zmq.select([self._query_socket], [], [])
1299 idents,msg = self.session.recv(self._query_socket, zmq.NOBLOCK)
1300 idents,msg = self.session.recv(self._query_socket, zmq.NOBLOCK)
1300 if self.debug:
1301 if self.debug:
1301 pprint(msg)
1302 pprint(msg)
1302 content = msg['content']
1303 content = msg['content']
1303 if content['status'] != 'ok':
1304 if content['status'] != 'ok':
1304 raise self._unwrap_exception(content)
1305 raise self._unwrap_exception(content)
1305
1306
1306 ar = AsyncHubResult(self, msg_ids=theids)
1307 ar = AsyncHubResult(self, msg_ids=theids)
1307
1308
1308 if block:
1309 if block:
1309 ar.wait()
1310 ar.wait()
1310
1311
1311 return ar
1312 return ar
1312
1313
1313 @spin_first
1314 @spin_first
1314 def result_status(self, msg_ids, status_only=True):
1315 def result_status(self, msg_ids, status_only=True):
1315 """Check on the status of the result(s) of the apply request with `msg_ids`.
1316 """Check on the status of the result(s) of the apply request with `msg_ids`.
1316
1317
1317 If status_only is False, then the actual results will be retrieved, else
1318 If status_only is False, then the actual results will be retrieved, else
1318 only the status of the results will be checked.
1319 only the status of the results will be checked.
1319
1320
1320 Parameters
1321 Parameters
1321 ----------
1322 ----------
1322
1323
1323 msg_ids : list of msg_ids
1324 msg_ids : list of msg_ids
1324 if int:
1325 if int:
1325 Passed as index to self.history for convenience.
1326 Passed as index to self.history for convenience.
1326 status_only : bool (default: True)
1327 status_only : bool (default: True)
1327 if False:
1328 if False:
1328 Retrieve the actual results of completed tasks.
1329 Retrieve the actual results of completed tasks.
1329
1330
1330 Returns
1331 Returns
1331 -------
1332 -------
1332
1333
1333 results : dict
1334 results : dict
1334 There will always be the keys 'pending' and 'completed', which will
1335 There will always be the keys 'pending' and 'completed', which will
1335 be lists of msg_ids that are incomplete or complete. If `status_only`
1336 be lists of msg_ids that are incomplete or complete. If `status_only`
1336 is False, then completed results will be keyed by their `msg_id`.
1337 is False, then completed results will be keyed by their `msg_id`.
1337 """
1338 """
1338 if not isinstance(msg_ids, (list,tuple)):
1339 if not isinstance(msg_ids, (list,tuple)):
1339 msg_ids = [msg_ids]
1340 msg_ids = [msg_ids]
1340
1341
1341 theids = []
1342 theids = []
1342 for msg_id in msg_ids:
1343 for msg_id in msg_ids:
1343 if isinstance(msg_id, int):
1344 if isinstance(msg_id, int):
1344 msg_id = self.history[msg_id]
1345 msg_id = self.history[msg_id]
1345 if not isinstance(msg_id, basestring):
1346 if not isinstance(msg_id, basestring):
1346 raise TypeError("msg_ids must be str, not %r"%msg_id)
1347 raise TypeError("msg_ids must be str, not %r"%msg_id)
1347 theids.append(msg_id)
1348 theids.append(msg_id)
1348
1349
1349 completed = []
1350 completed = []
1350 local_results = {}
1351 local_results = {}
1351
1352
1352 # comment this block out to temporarily disable local shortcut:
1353 # comment this block out to temporarily disable local shortcut:
1353 for msg_id in theids:
1354 for msg_id in theids:
1354 if msg_id in self.results:
1355 if msg_id in self.results:
1355 completed.append(msg_id)
1356 completed.append(msg_id)
1356 local_results[msg_id] = self.results[msg_id]
1357 local_results[msg_id] = self.results[msg_id]
1357 theids.remove(msg_id)
1358 theids.remove(msg_id)
1358
1359
1359 if theids: # some not locally cached
1360 if theids: # some not locally cached
1360 content = dict(msg_ids=theids, status_only=status_only)
1361 content = dict(msg_ids=theids, status_only=status_only)
1361 msg = self.session.send(self._query_socket, "result_request", content=content)
1362 msg = self.session.send(self._query_socket, "result_request", content=content)
1362 zmq.select([self._query_socket], [], [])
1363 zmq.select([self._query_socket], [], [])
1363 idents,msg = self.session.recv(self._query_socket, zmq.NOBLOCK)
1364 idents,msg = self.session.recv(self._query_socket, zmq.NOBLOCK)
1364 if self.debug:
1365 if self.debug:
1365 pprint(msg)
1366 pprint(msg)
1366 content = msg['content']
1367 content = msg['content']
1367 if content['status'] != 'ok':
1368 if content['status'] != 'ok':
1368 raise self._unwrap_exception(content)
1369 raise self._unwrap_exception(content)
1369 buffers = msg['buffers']
1370 buffers = msg['buffers']
1370 else:
1371 else:
1371 content = dict(completed=[],pending=[])
1372 content = dict(completed=[],pending=[])
1372
1373
1373 content['completed'].extend(completed)
1374 content['completed'].extend(completed)
1374
1375
1375 if status_only:
1376 if status_only:
1376 return content
1377 return content
1377
1378
1378 failures = []
1379 failures = []
1379 # load cached results into result:
1380 # load cached results into result:
1380 content.update(local_results)
1381 content.update(local_results)
1381
1382
1382 # update cache with results:
1383 # update cache with results:
1383 for msg_id in sorted(theids):
1384 for msg_id in sorted(theids):
1384 if msg_id in content['completed']:
1385 if msg_id in content['completed']:
1385 rec = content[msg_id]
1386 rec = content[msg_id]
1386 parent = rec['header']
1387 parent = rec['header']
1387 header = rec['result_header']
1388 header = rec['result_header']
1388 rcontent = rec['result_content']
1389 rcontent = rec['result_content']
1389 iodict = rec['io']
1390 iodict = rec['io']
1390 if isinstance(rcontent, str):
1391 if isinstance(rcontent, str):
1391 rcontent = self.session.unpack(rcontent)
1392 rcontent = self.session.unpack(rcontent)
1392
1393
1393 md = self.metadata[msg_id]
1394 md = self.metadata[msg_id]
1394 md.update(self._extract_metadata(header, parent, rcontent))
1395 md.update(self._extract_metadata(header, parent, rcontent))
1395 if rec.get('received'):
1396 if rec.get('received'):
1396 md['received'] = rec['received']
1397 md['received'] = rec['received']
1397 md.update(iodict)
1398 md.update(iodict)
1398
1399
1399 if rcontent['status'] == 'ok':
1400 if rcontent['status'] == 'ok':
1400 res,buffers = util.unserialize_object(buffers)
1401 res,buffers = util.unserialize_object(buffers)
1401 else:
1402 else:
1402 print rcontent
1403 print rcontent
1403 res = self._unwrap_exception(rcontent)
1404 res = self._unwrap_exception(rcontent)
1404 failures.append(res)
1405 failures.append(res)
1405
1406
1406 self.results[msg_id] = res
1407 self.results[msg_id] = res
1407 content[msg_id] = res
1408 content[msg_id] = res
1408
1409
1409 if len(theids) == 1 and failures:
1410 if len(theids) == 1 and failures:
1410 raise failures[0]
1411 raise failures[0]
1411
1412
1412 error.collect_exceptions(failures, "result_status")
1413 error.collect_exceptions(failures, "result_status")
1413 return content
1414 return content
1414
1415
1415 @spin_first
1416 @spin_first
1416 def queue_status(self, targets='all', verbose=False):
1417 def queue_status(self, targets='all', verbose=False):
1417 """Fetch the status of engine queues.
1418 """Fetch the status of engine queues.
1418
1419
1419 Parameters
1420 Parameters
1420 ----------
1421 ----------
1421
1422
1422 targets : int/str/list of ints/strs
1423 targets : int/str/list of ints/strs
1423 the engines whose states are to be queried.
1424 the engines whose states are to be queried.
1424 default : all
1425 default : all
1425 verbose : bool
1426 verbose : bool
1426 Whether to return lengths only, or lists of ids for each element
1427 Whether to return lengths only, or lists of ids for each element
1427 """
1428 """
1428 if targets == 'all':
1429 if targets == 'all':
1429 # allow 'all' to be evaluated on the engine
1430 # allow 'all' to be evaluated on the engine
1430 engine_ids = None
1431 engine_ids = None
1431 else:
1432 else:
1432 engine_ids = self._build_targets(targets)[1]
1433 engine_ids = self._build_targets(targets)[1]
1433 content = dict(targets=engine_ids, verbose=verbose)
1434 content = dict(targets=engine_ids, verbose=verbose)
1434 self.session.send(self._query_socket, "queue_request", content=content)
1435 self.session.send(self._query_socket, "queue_request", content=content)
1435 idents,msg = self.session.recv(self._query_socket, 0)
1436 idents,msg = self.session.recv(self._query_socket, 0)
1436 if self.debug:
1437 if self.debug:
1437 pprint(msg)
1438 pprint(msg)
1438 content = msg['content']
1439 content = msg['content']
1439 status = content.pop('status')
1440 status = content.pop('status')
1440 if status != 'ok':
1441 if status != 'ok':
1441 raise self._unwrap_exception(content)
1442 raise self._unwrap_exception(content)
1442 content = rekey(content)
1443 content = rekey(content)
1443 if isinstance(targets, int):
1444 if isinstance(targets, int):
1444 return content[targets]
1445 return content[targets]
1445 else:
1446 else:
1446 return content
1447 return content
1447
1448
1448 @spin_first
1449 @spin_first
1449 def purge_results(self, jobs=[], targets=[]):
1450 def purge_results(self, jobs=[], targets=[]):
1450 """Tell the Hub to forget results.
1451 """Tell the Hub to forget results.
1451
1452
1452 Individual results can be purged by msg_id, or the entire
1453 Individual results can be purged by msg_id, or the entire
1453 history of specific targets can be purged.
1454 history of specific targets can be purged.
1454
1455
1455 Use `purge_results('all')` to scrub everything from the Hub's db.
1456 Use `purge_results('all')` to scrub everything from the Hub's db.
1456
1457
1457 Parameters
1458 Parameters
1458 ----------
1459 ----------
1459
1460
1460 jobs : str or list of str or AsyncResult objects
1461 jobs : str or list of str or AsyncResult objects
1461 the msg_ids whose results should be forgotten.
1462 the msg_ids whose results should be forgotten.
1462 targets : int/str/list of ints/strs
1463 targets : int/str/list of ints/strs
1463 The targets, by int_id, whose entire history is to be purged.
1464 The targets, by int_id, whose entire history is to be purged.
1464
1465
1465 default : None
1466 default : None
1466 """
1467 """
1467 if not targets and not jobs:
1468 if not targets and not jobs:
1468 raise ValueError("Must specify at least one of `targets` and `jobs`")
1469 raise ValueError("Must specify at least one of `targets` and `jobs`")
1469 if targets:
1470 if targets:
1470 targets = self._build_targets(targets)[1]
1471 targets = self._build_targets(targets)[1]
1471
1472
1472 # construct msg_ids from jobs
1473 # construct msg_ids from jobs
1473 if jobs == 'all':
1474 if jobs == 'all':
1474 msg_ids = jobs
1475 msg_ids = jobs
1475 else:
1476 else:
1476 msg_ids = []
1477 msg_ids = []
1477 if isinstance(jobs, (basestring,AsyncResult)):
1478 if isinstance(jobs, (basestring,AsyncResult)):
1478 jobs = [jobs]
1479 jobs = [jobs]
1479 bad_ids = filter(lambda obj: not isinstance(obj, (basestring, AsyncResult)), jobs)
1480 bad_ids = filter(lambda obj: not isinstance(obj, (basestring, AsyncResult)), jobs)
1480 if bad_ids:
1481 if bad_ids:
1481 raise TypeError("Invalid msg_id type %r, expected str or AsyncResult"%bad_ids[0])
1482 raise TypeError("Invalid msg_id type %r, expected str or AsyncResult"%bad_ids[0])
1482 for j in jobs:
1483 for j in jobs:
1483 if isinstance(j, AsyncResult):
1484 if isinstance(j, AsyncResult):
1484 msg_ids.extend(j.msg_ids)
1485 msg_ids.extend(j.msg_ids)
1485 else:
1486 else:
1486 msg_ids.append(j)
1487 msg_ids.append(j)
1487
1488
1488 content = dict(engine_ids=targets, msg_ids=msg_ids)
1489 content = dict(engine_ids=targets, msg_ids=msg_ids)
1489 self.session.send(self._query_socket, "purge_request", content=content)
1490 self.session.send(self._query_socket, "purge_request", content=content)
1490 idents, msg = self.session.recv(self._query_socket, 0)
1491 idents, msg = self.session.recv(self._query_socket, 0)
1491 if self.debug:
1492 if self.debug:
1492 pprint(msg)
1493 pprint(msg)
1493 content = msg['content']
1494 content = msg['content']
1494 if content['status'] != 'ok':
1495 if content['status'] != 'ok':
1495 raise self._unwrap_exception(content)
1496 raise self._unwrap_exception(content)
1496
1497
1497 @spin_first
1498 @spin_first
1498 def hub_history(self):
1499 def hub_history(self):
1499 """Get the Hub's history
1500 """Get the Hub's history
1500
1501
1501 Just like the Client, the Hub has a history, which is a list of msg_ids.
1502 Just like the Client, the Hub has a history, which is a list of msg_ids.
1502 This will contain the history of all clients, and, depending on configuration,
1503 This will contain the history of all clients, and, depending on configuration,
1503 may contain history across multiple cluster sessions.
1504 may contain history across multiple cluster sessions.
1504
1505
1505 Any msg_id returned here is a valid argument to `get_result`.
1506 Any msg_id returned here is a valid argument to `get_result`.
1506
1507
1507 Returns
1508 Returns
1508 -------
1509 -------
1509
1510
1510 msg_ids : list of strs
1511 msg_ids : list of strs
1511 list of all msg_ids, ordered by task submission time.
1512 list of all msg_ids, ordered by task submission time.
1512 """
1513 """
1513
1514
1514 self.session.send(self._query_socket, "history_request", content={})
1515 self.session.send(self._query_socket, "history_request", content={})
1515 idents, msg = self.session.recv(self._query_socket, 0)
1516 idents, msg = self.session.recv(self._query_socket, 0)
1516
1517
1517 if self.debug:
1518 if self.debug:
1518 pprint(msg)
1519 pprint(msg)
1519 content = msg['content']
1520 content = msg['content']
1520 if content['status'] != 'ok':
1521 if content['status'] != 'ok':
1521 raise self._unwrap_exception(content)
1522 raise self._unwrap_exception(content)
1522 else:
1523 else:
1523 return content['history']
1524 return content['history']
1524
1525
1525 @spin_first
1526 @spin_first
1526 def db_query(self, query, keys=None):
1527 def db_query(self, query, keys=None):
1527 """Query the Hub's TaskRecord database
1528 """Query the Hub's TaskRecord database
1528
1529
1529 This will return a list of task record dicts that match `query`
1530 This will return a list of task record dicts that match `query`
1530
1531
1531 Parameters
1532 Parameters
1532 ----------
1533 ----------
1533
1534
1534 query : mongodb query dict
1535 query : mongodb query dict
1535 The search dict. See mongodb query docs for details.
1536 The search dict. See mongodb query docs for details.
1536 keys : list of strs [optional]
1537 keys : list of strs [optional]
1537 The subset of keys to be returned. The default is to fetch everything but buffers.
1538 The subset of keys to be returned. The default is to fetch everything but buffers.
1538 'msg_id' will *always* be included.
1539 'msg_id' will *always* be included.
1539 """
1540 """
1540 if isinstance(keys, basestring):
1541 if isinstance(keys, basestring):
1541 keys = [keys]
1542 keys = [keys]
1542 content = dict(query=query, keys=keys)
1543 content = dict(query=query, keys=keys)
1543 self.session.send(self._query_socket, "db_request", content=content)
1544 self.session.send(self._query_socket, "db_request", content=content)
1544 idents, msg = self.session.recv(self._query_socket, 0)
1545 idents, msg = self.session.recv(self._query_socket, 0)
1545 if self.debug:
1546 if self.debug:
1546 pprint(msg)
1547 pprint(msg)
1547 content = msg['content']
1548 content = msg['content']
1548 if content['status'] != 'ok':
1549 if content['status'] != 'ok':
1549 raise self._unwrap_exception(content)
1550 raise self._unwrap_exception(content)
1550
1551
1551 records = content['records']
1552 records = content['records']
1552
1553
1553 buffer_lens = content['buffer_lens']
1554 buffer_lens = content['buffer_lens']
1554 result_buffer_lens = content['result_buffer_lens']
1555 result_buffer_lens = content['result_buffer_lens']
1555 buffers = msg['buffers']
1556 buffers = msg['buffers']
1556 has_bufs = buffer_lens is not None
1557 has_bufs = buffer_lens is not None
1557 has_rbufs = result_buffer_lens is not None
1558 has_rbufs = result_buffer_lens is not None
1558 for i,rec in enumerate(records):
1559 for i,rec in enumerate(records):
1559 # relink buffers
1560 # relink buffers
1560 if has_bufs:
1561 if has_bufs:
1561 blen = buffer_lens[i]
1562 blen = buffer_lens[i]
1562 rec['buffers'], buffers = buffers[:blen],buffers[blen:]
1563 rec['buffers'], buffers = buffers[:blen],buffers[blen:]
1563 if has_rbufs:
1564 if has_rbufs:
1564 blen = result_buffer_lens[i]
1565 blen = result_buffer_lens[i]
1565 rec['result_buffers'], buffers = buffers[:blen],buffers[blen:]
1566 rec['result_buffers'], buffers = buffers[:blen],buffers[blen:]
1566
1567
1567 return records
1568 return records
1568
1569
1569 __all__ = [ 'Client' ]
1570 __all__ = [ 'Client' ]
@@ -1,181 +1,182 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 """
2 """
3 A multi-heart Heartbeat system using PUB and XREP sockets. pings are sent out on the PUB,
3 A multi-heart Heartbeat system using PUB and XREP sockets. pings are sent out on the PUB,
4 and hearts are tracked based on their XREQ identities.
4 and hearts are tracked based on their XREQ identities.
5
5
6 Authors:
6 Authors:
7
7
8 * Min RK
8 * Min RK
9 """
9 """
10 #-----------------------------------------------------------------------------
10 #-----------------------------------------------------------------------------
11 # Copyright (C) 2010-2011 The IPython Development Team
11 # Copyright (C) 2010-2011 The IPython Development Team
12 #
12 #
13 # Distributed under the terms of the BSD License. The full license is in
13 # Distributed under the terms of the BSD License. The full license is in
14 # the file COPYING, distributed as part of this software.
14 # the file COPYING, distributed as part of this software.
15 #-----------------------------------------------------------------------------
15 #-----------------------------------------------------------------------------
16
16
17 from __future__ import print_function
17 from __future__ import print_function
18 import time
18 import time
19 import uuid
19 import uuid
20
20
21 import zmq
21 import zmq
22 from zmq.devices import ThreadDevice
22 from zmq.devices import ThreadDevice
23 from zmq.eventloop import ioloop, zmqstream
23 from zmq.eventloop import ioloop, zmqstream
24
24
25 from IPython.config.configurable import LoggingConfigurable
25 from IPython.config.configurable import LoggingConfigurable
26 from IPython.utils.py3compat import str_to_bytes
26 from IPython.utils.traitlets import Set, Instance, CFloat, Integer
27 from IPython.utils.traitlets import Set, Instance, CFloat, Integer
27
28
28 from IPython.parallel.util import asbytes, log_errors
29 from IPython.parallel.util import log_errors
29
30
30 class Heart(object):
31 class Heart(object):
31 """A basic heart object for responding to a HeartMonitor.
32 """A basic heart object for responding to a HeartMonitor.
32 This is a simple wrapper with defaults for the most common
33 This is a simple wrapper with defaults for the most common
33 Device model for responding to heartbeats.
34 Device model for responding to heartbeats.
34
35
35 It simply builds a threadsafe zmq.FORWARDER Device, defaulting to using
36 It simply builds a threadsafe zmq.FORWARDER Device, defaulting to using
36 SUB/XREQ for in/out.
37 SUB/XREQ for in/out.
37
38
38 You can specify the XREQ's IDENTITY via the optional heart_id argument."""
39 You can specify the XREQ's IDENTITY via the optional heart_id argument."""
39 device=None
40 device=None
40 id=None
41 id=None
41 def __init__(self, in_addr, out_addr, in_type=zmq.SUB, out_type=zmq.DEALER, heart_id=None):
42 def __init__(self, in_addr, out_addr, in_type=zmq.SUB, out_type=zmq.DEALER, heart_id=None):
42 self.device = ThreadDevice(zmq.FORWARDER, in_type, out_type)
43 self.device = ThreadDevice(zmq.FORWARDER, in_type, out_type)
43 # do not allow the device to share global Context.instance,
44 # do not allow the device to share global Context.instance,
44 # which is the default behavior in pyzmq > 2.1.10
45 # which is the default behavior in pyzmq > 2.1.10
45 self.device.context_factory = zmq.Context
46 self.device.context_factory = zmq.Context
46
47
47 self.device.daemon=True
48 self.device.daemon=True
48 self.device.connect_in(in_addr)
49 self.device.connect_in(in_addr)
49 self.device.connect_out(out_addr)
50 self.device.connect_out(out_addr)
50 if in_type == zmq.SUB:
51 if in_type == zmq.SUB:
51 self.device.setsockopt_in(zmq.SUBSCRIBE, b"")
52 self.device.setsockopt_in(zmq.SUBSCRIBE, b"")
52 if heart_id is None:
53 if heart_id is None:
53 heart_id = uuid.uuid4().bytes
54 heart_id = uuid.uuid4().bytes
54 self.device.setsockopt_out(zmq.IDENTITY, heart_id)
55 self.device.setsockopt_out(zmq.IDENTITY, heart_id)
55 self.id = heart_id
56 self.id = heart_id
56
57
57 def start(self):
58 def start(self):
58 return self.device.start()
59 return self.device.start()
59
60
60
61
61 class HeartMonitor(LoggingConfigurable):
62 class HeartMonitor(LoggingConfigurable):
62 """A basic HeartMonitor class
63 """A basic HeartMonitor class
63 pingstream: a PUB stream
64 pingstream: a PUB stream
64 pongstream: an XREP stream
65 pongstream: an XREP stream
65 period: the period of the heartbeat in milliseconds"""
66 period: the period of the heartbeat in milliseconds"""
66
67
67 period = Integer(3000, config=True,
68 period = Integer(3000, config=True,
68 help='The frequency at which the Hub pings the engines for heartbeats '
69 help='The frequency at which the Hub pings the engines for heartbeats '
69 '(in ms)',
70 '(in ms)',
70 )
71 )
71
72
72 pingstream=Instance('zmq.eventloop.zmqstream.ZMQStream')
73 pingstream=Instance('zmq.eventloop.zmqstream.ZMQStream')
73 pongstream=Instance('zmq.eventloop.zmqstream.ZMQStream')
74 pongstream=Instance('zmq.eventloop.zmqstream.ZMQStream')
74 loop = Instance('zmq.eventloop.ioloop.IOLoop')
75 loop = Instance('zmq.eventloop.ioloop.IOLoop')
75 def _loop_default(self):
76 def _loop_default(self):
76 return ioloop.IOLoop.instance()
77 return ioloop.IOLoop.instance()
77
78
78 # not settable:
79 # not settable:
79 hearts=Set()
80 hearts=Set()
80 responses=Set()
81 responses=Set()
81 on_probation=Set()
82 on_probation=Set()
82 last_ping=CFloat(0)
83 last_ping=CFloat(0)
83 _new_handlers = Set()
84 _new_handlers = Set()
84 _failure_handlers = Set()
85 _failure_handlers = Set()
85 lifetime = CFloat(0)
86 lifetime = CFloat(0)
86 tic = CFloat(0)
87 tic = CFloat(0)
87
88
88 def __init__(self, **kwargs):
89 def __init__(self, **kwargs):
89 super(HeartMonitor, self).__init__(**kwargs)
90 super(HeartMonitor, self).__init__(**kwargs)
90
91
91 self.pongstream.on_recv(self.handle_pong)
92 self.pongstream.on_recv(self.handle_pong)
92
93
93 def start(self):
94 def start(self):
94 self.tic = time.time()
95 self.tic = time.time()
95 self.caller = ioloop.PeriodicCallback(self.beat, self.period, self.loop)
96 self.caller = ioloop.PeriodicCallback(self.beat, self.period, self.loop)
96 self.caller.start()
97 self.caller.start()
97
98
98 def add_new_heart_handler(self, handler):
99 def add_new_heart_handler(self, handler):
99 """add a new handler for new hearts"""
100 """add a new handler for new hearts"""
100 self.log.debug("heartbeat::new_heart_handler: %s", handler)
101 self.log.debug("heartbeat::new_heart_handler: %s", handler)
101 self._new_handlers.add(handler)
102 self._new_handlers.add(handler)
102
103
103 def add_heart_failure_handler(self, handler):
104 def add_heart_failure_handler(self, handler):
104 """add a new handler for heart failure"""
105 """add a new handler for heart failure"""
105 self.log.debug("heartbeat::new heart failure handler: %s", handler)
106 self.log.debug("heartbeat::new heart failure handler: %s", handler)
106 self._failure_handlers.add(handler)
107 self._failure_handlers.add(handler)
107
108
108 def beat(self):
109 def beat(self):
109 self.pongstream.flush()
110 self.pongstream.flush()
110 self.last_ping = self.lifetime
111 self.last_ping = self.lifetime
111
112
112 toc = time.time()
113 toc = time.time()
113 self.lifetime += toc-self.tic
114 self.lifetime += toc-self.tic
114 self.tic = toc
115 self.tic = toc
115 self.log.debug("heartbeat::sending %s", self.lifetime)
116 self.log.debug("heartbeat::sending %s", self.lifetime)
116 goodhearts = self.hearts.intersection(self.responses)
117 goodhearts = self.hearts.intersection(self.responses)
117 missed_beats = self.hearts.difference(goodhearts)
118 missed_beats = self.hearts.difference(goodhearts)
118 heartfailures = self.on_probation.intersection(missed_beats)
119 heartfailures = self.on_probation.intersection(missed_beats)
119 newhearts = self.responses.difference(goodhearts)
120 newhearts = self.responses.difference(goodhearts)
120 map(self.handle_new_heart, newhearts)
121 map(self.handle_new_heart, newhearts)
121 map(self.handle_heart_failure, heartfailures)
122 map(self.handle_heart_failure, heartfailures)
122 self.on_probation = missed_beats.intersection(self.hearts)
123 self.on_probation = missed_beats.intersection(self.hearts)
123 self.responses = set()
124 self.responses = set()
124 # print self.on_probation, self.hearts
125 # print self.on_probation, self.hearts
125 # self.log.debug("heartbeat::beat %.3f, %i beating hearts", self.lifetime, len(self.hearts))
126 # self.log.debug("heartbeat::beat %.3f, %i beating hearts", self.lifetime, len(self.hearts))
126 self.pingstream.send(asbytes(str(self.lifetime)))
127 self.pingstream.send(str_to_bytes(str(self.lifetime)))
127 # flush stream to force immediate socket send
128 # flush stream to force immediate socket send
128 self.pingstream.flush()
129 self.pingstream.flush()
129
130
130 def handle_new_heart(self, heart):
131 def handle_new_heart(self, heart):
131 if self._new_handlers:
132 if self._new_handlers:
132 for handler in self._new_handlers:
133 for handler in self._new_handlers:
133 handler(heart)
134 handler(heart)
134 else:
135 else:
135 self.log.info("heartbeat::yay, got new heart %s!", heart)
136 self.log.info("heartbeat::yay, got new heart %s!", heart)
136 self.hearts.add(heart)
137 self.hearts.add(heart)
137
138
138 def handle_heart_failure(self, heart):
139 def handle_heart_failure(self, heart):
139 if self._failure_handlers:
140 if self._failure_handlers:
140 for handler in self._failure_handlers:
141 for handler in self._failure_handlers:
141 try:
142 try:
142 handler(heart)
143 handler(heart)
143 except Exception as e:
144 except Exception as e:
144 self.log.error("heartbeat::Bad Handler! %s", handler, exc_info=True)
145 self.log.error("heartbeat::Bad Handler! %s", handler, exc_info=True)
145 pass
146 pass
146 else:
147 else:
147 self.log.info("heartbeat::Heart %s failed :(", heart)
148 self.log.info("heartbeat::Heart %s failed :(", heart)
148 self.hearts.remove(heart)
149 self.hearts.remove(heart)
149
150
150
151
151 @log_errors
152 @log_errors
152 def handle_pong(self, msg):
153 def handle_pong(self, msg):
153 "a heart just beat"
154 "a heart just beat"
154 current = asbytes(str(self.lifetime))
155 current = str_to_bytes(str(self.lifetime))
155 last = asbytes(str(self.last_ping))
156 last = str_to_bytes(str(self.last_ping))
156 if msg[1] == current:
157 if msg[1] == current:
157 delta = time.time()-self.tic
158 delta = time.time()-self.tic
158 # self.log.debug("heartbeat::heart %r took %.2f ms to respond"%(msg[0], 1000*delta))
159 # self.log.debug("heartbeat::heart %r took %.2f ms to respond"%(msg[0], 1000*delta))
159 self.responses.add(msg[0])
160 self.responses.add(msg[0])
160 elif msg[1] == last:
161 elif msg[1] == last:
161 delta = time.time()-self.tic + (self.lifetime-self.last_ping)
162 delta = time.time()-self.tic + (self.lifetime-self.last_ping)
162 self.log.warn("heartbeat::heart %r missed a beat, and took %.2f ms to respond", msg[0], 1000*delta)
163 self.log.warn("heartbeat::heart %r missed a beat, and took %.2f ms to respond", msg[0], 1000*delta)
163 self.responses.add(msg[0])
164 self.responses.add(msg[0])
164 else:
165 else:
165 self.log.warn("heartbeat::got bad heartbeat (possibly old?): %s (current=%.3f)", msg[1], self.lifetime)
166 self.log.warn("heartbeat::got bad heartbeat (possibly old?): %s (current=%.3f)", msg[1], self.lifetime)
166
167
167
168
168 if __name__ == '__main__':
169 if __name__ == '__main__':
169 loop = ioloop.IOLoop.instance()
170 loop = ioloop.IOLoop.instance()
170 context = zmq.Context()
171 context = zmq.Context()
171 pub = context.socket(zmq.PUB)
172 pub = context.socket(zmq.PUB)
172 pub.bind('tcp://127.0.0.1:5555')
173 pub.bind('tcp://127.0.0.1:5555')
173 xrep = context.socket(zmq.ROUTER)
174 xrep = context.socket(zmq.ROUTER)
174 xrep.bind('tcp://127.0.0.1:5556')
175 xrep.bind('tcp://127.0.0.1:5556')
175
176
176 outstream = zmqstream.ZMQStream(pub, loop)
177 outstream = zmqstream.ZMQStream(pub, loop)
177 instream = zmqstream.ZMQStream(xrep, loop)
178 instream = zmqstream.ZMQStream(xrep, loop)
178
179
179 hb = HeartMonitor(loop, outstream, instream)
180 hb = HeartMonitor(loop, outstream, instream)
180
181
181 loop.start()
182 loop.start()
@@ -1,1303 +1,1304 b''
1 """The IPython Controller Hub with 0MQ
1 """The IPython Controller Hub with 0MQ
2 This is the master object that handles connections from engines and clients,
2 This is the master object that handles connections from engines and clients,
3 and monitors traffic through the various queues.
3 and monitors traffic through the various queues.
4
4
5 Authors:
5 Authors:
6
6
7 * Min RK
7 * Min RK
8 """
8 """
9 #-----------------------------------------------------------------------------
9 #-----------------------------------------------------------------------------
10 # Copyright (C) 2010-2011 The IPython Development Team
10 # Copyright (C) 2010-2011 The IPython Development Team
11 #
11 #
12 # Distributed under the terms of the BSD License. The full license is in
12 # Distributed under the terms of the BSD License. The full license is in
13 # the file COPYING, distributed as part of this software.
13 # the file COPYING, distributed as part of this software.
14 #-----------------------------------------------------------------------------
14 #-----------------------------------------------------------------------------
15
15
16 #-----------------------------------------------------------------------------
16 #-----------------------------------------------------------------------------
17 # Imports
17 # Imports
18 #-----------------------------------------------------------------------------
18 #-----------------------------------------------------------------------------
19 from __future__ import print_function
19 from __future__ import print_function
20
20
21 import sys
21 import sys
22 import time
22 import time
23 from datetime import datetime
23 from datetime import datetime
24
24
25 import zmq
25 import zmq
26 from zmq.eventloop import ioloop
26 from zmq.eventloop import ioloop
27 from zmq.eventloop.zmqstream import ZMQStream
27 from zmq.eventloop.zmqstream import ZMQStream
28
28
29 # internal:
29 # internal:
30 from IPython.utils.importstring import import_item
30 from IPython.utils.importstring import import_item
31 from IPython.utils.py3compat import cast_bytes
31 from IPython.utils.traitlets import (
32 from IPython.utils.traitlets import (
32 HasTraits, Instance, Integer, Unicode, Dict, Set, Tuple, CBytes, DottedObjectName
33 HasTraits, Instance, Integer, Unicode, Dict, Set, Tuple, CBytes, DottedObjectName
33 )
34 )
34
35
35 from IPython.parallel import error, util
36 from IPython.parallel import error, util
36 from IPython.parallel.factory import RegistrationFactory
37 from IPython.parallel.factory import RegistrationFactory
37
38
38 from IPython.zmq.session import SessionFactory
39 from IPython.zmq.session import SessionFactory
39
40
40 from .heartmonitor import HeartMonitor
41 from .heartmonitor import HeartMonitor
41
42
42 #-----------------------------------------------------------------------------
43 #-----------------------------------------------------------------------------
43 # Code
44 # Code
44 #-----------------------------------------------------------------------------
45 #-----------------------------------------------------------------------------
45
46
46 def _passer(*args, **kwargs):
47 def _passer(*args, **kwargs):
47 return
48 return
48
49
49 def _printer(*args, **kwargs):
50 def _printer(*args, **kwargs):
50 print (args)
51 print (args)
51 print (kwargs)
52 print (kwargs)
52
53
53 def empty_record():
54 def empty_record():
54 """Return an empty dict with all record keys."""
55 """Return an empty dict with all record keys."""
55 return {
56 return {
56 'msg_id' : None,
57 'msg_id' : None,
57 'header' : None,
58 'header' : None,
58 'content': None,
59 'content': None,
59 'buffers': None,
60 'buffers': None,
60 'submitted': None,
61 'submitted': None,
61 'client_uuid' : None,
62 'client_uuid' : None,
62 'engine_uuid' : None,
63 'engine_uuid' : None,
63 'started': None,
64 'started': None,
64 'completed': None,
65 'completed': None,
65 'resubmitted': None,
66 'resubmitted': None,
66 'received': None,
67 'received': None,
67 'result_header' : None,
68 'result_header' : None,
68 'result_content' : None,
69 'result_content' : None,
69 'result_buffers' : None,
70 'result_buffers' : None,
70 'queue' : None,
71 'queue' : None,
71 'pyin' : None,
72 'pyin' : None,
72 'pyout': None,
73 'pyout': None,
73 'pyerr': None,
74 'pyerr': None,
74 'stdout': '',
75 'stdout': '',
75 'stderr': '',
76 'stderr': '',
76 }
77 }
77
78
78 def init_record(msg):
79 def init_record(msg):
79 """Initialize a TaskRecord based on a request."""
80 """Initialize a TaskRecord based on a request."""
80 header = msg['header']
81 header = msg['header']
81 return {
82 return {
82 'msg_id' : header['msg_id'],
83 'msg_id' : header['msg_id'],
83 'header' : header,
84 'header' : header,
84 'content': msg['content'],
85 'content': msg['content'],
85 'buffers': msg['buffers'],
86 'buffers': msg['buffers'],
86 'submitted': header['date'],
87 'submitted': header['date'],
87 'client_uuid' : None,
88 'client_uuid' : None,
88 'engine_uuid' : None,
89 'engine_uuid' : None,
89 'started': None,
90 'started': None,
90 'completed': None,
91 'completed': None,
91 'resubmitted': None,
92 'resubmitted': None,
92 'received': None,
93 'received': None,
93 'result_header' : None,
94 'result_header' : None,
94 'result_content' : None,
95 'result_content' : None,
95 'result_buffers' : None,
96 'result_buffers' : None,
96 'queue' : None,
97 'queue' : None,
97 'pyin' : None,
98 'pyin' : None,
98 'pyout': None,
99 'pyout': None,
99 'pyerr': None,
100 'pyerr': None,
100 'stdout': '',
101 'stdout': '',
101 'stderr': '',
102 'stderr': '',
102 }
103 }
103
104
104
105
105 class EngineConnector(HasTraits):
106 class EngineConnector(HasTraits):
106 """A simple object for accessing the various zmq connections of an object.
107 """A simple object for accessing the various zmq connections of an object.
107 Attributes are:
108 Attributes are:
108 id (int): engine ID
109 id (int): engine ID
109 uuid (str): uuid (unused?)
110 uuid (str): uuid (unused?)
110 queue (str): identity of queue's XREQ socket
111 queue (str): identity of queue's XREQ socket
111 registration (str): identity of registration XREQ socket
112 registration (str): identity of registration XREQ socket
112 heartbeat (str): identity of heartbeat XREQ socket
113 heartbeat (str): identity of heartbeat XREQ socket
113 """
114 """
114 id=Integer(0)
115 id=Integer(0)
115 queue=CBytes()
116 queue=CBytes()
116 control=CBytes()
117 control=CBytes()
117 registration=CBytes()
118 registration=CBytes()
118 heartbeat=CBytes()
119 heartbeat=CBytes()
119 pending=Set()
120 pending=Set()
120
121
121 class HubFactory(RegistrationFactory):
122 class HubFactory(RegistrationFactory):
122 """The Configurable for setting up a Hub."""
123 """The Configurable for setting up a Hub."""
123
124
124 # port-pairs for monitoredqueues:
125 # port-pairs for monitoredqueues:
125 hb = Tuple(Integer,Integer,config=True,
126 hb = Tuple(Integer,Integer,config=True,
126 help="""XREQ/SUB Port pair for Engine heartbeats""")
127 help="""XREQ/SUB Port pair for Engine heartbeats""")
127 def _hb_default(self):
128 def _hb_default(self):
128 return tuple(util.select_random_ports(2))
129 return tuple(util.select_random_ports(2))
129
130
130 mux = Tuple(Integer,Integer,config=True,
131 mux = Tuple(Integer,Integer,config=True,
131 help="""Engine/Client Port pair for MUX queue""")
132 help="""Engine/Client Port pair for MUX queue""")
132
133
133 def _mux_default(self):
134 def _mux_default(self):
134 return tuple(util.select_random_ports(2))
135 return tuple(util.select_random_ports(2))
135
136
136 task = Tuple(Integer,Integer,config=True,
137 task = Tuple(Integer,Integer,config=True,
137 help="""Engine/Client Port pair for Task queue""")
138 help="""Engine/Client Port pair for Task queue""")
138 def _task_default(self):
139 def _task_default(self):
139 return tuple(util.select_random_ports(2))
140 return tuple(util.select_random_ports(2))
140
141
141 control = Tuple(Integer,Integer,config=True,
142 control = Tuple(Integer,Integer,config=True,
142 help="""Engine/Client Port pair for Control queue""")
143 help="""Engine/Client Port pair for Control queue""")
143
144
144 def _control_default(self):
145 def _control_default(self):
145 return tuple(util.select_random_ports(2))
146 return tuple(util.select_random_ports(2))
146
147
147 iopub = Tuple(Integer,Integer,config=True,
148 iopub = Tuple(Integer,Integer,config=True,
148 help="""Engine/Client Port pair for IOPub relay""")
149 help="""Engine/Client Port pair for IOPub relay""")
149
150
150 def _iopub_default(self):
151 def _iopub_default(self):
151 return tuple(util.select_random_ports(2))
152 return tuple(util.select_random_ports(2))
152
153
153 # single ports:
154 # single ports:
154 mon_port = Integer(config=True,
155 mon_port = Integer(config=True,
155 help="""Monitor (SUB) port for queue traffic""")
156 help="""Monitor (SUB) port for queue traffic""")
156
157
157 def _mon_port_default(self):
158 def _mon_port_default(self):
158 return util.select_random_ports(1)[0]
159 return util.select_random_ports(1)[0]
159
160
160 notifier_port = Integer(config=True,
161 notifier_port = Integer(config=True,
161 help="""PUB port for sending engine status notifications""")
162 help="""PUB port for sending engine status notifications""")
162
163
163 def _notifier_port_default(self):
164 def _notifier_port_default(self):
164 return util.select_random_ports(1)[0]
165 return util.select_random_ports(1)[0]
165
166
166 engine_ip = Unicode('127.0.0.1', config=True,
167 engine_ip = Unicode('127.0.0.1', config=True,
167 help="IP on which to listen for engine connections. [default: loopback]")
168 help="IP on which to listen for engine connections. [default: loopback]")
168 engine_transport = Unicode('tcp', config=True,
169 engine_transport = Unicode('tcp', config=True,
169 help="0MQ transport for engine connections. [default: tcp]")
170 help="0MQ transport for engine connections. [default: tcp]")
170
171
171 client_ip = Unicode('127.0.0.1', config=True,
172 client_ip = Unicode('127.0.0.1', config=True,
172 help="IP on which to listen for client connections. [default: loopback]")
173 help="IP on which to listen for client connections. [default: loopback]")
173 client_transport = Unicode('tcp', config=True,
174 client_transport = Unicode('tcp', config=True,
174 help="0MQ transport for client connections. [default : tcp]")
175 help="0MQ transport for client connections. [default : tcp]")
175
176
176 monitor_ip = Unicode('127.0.0.1', config=True,
177 monitor_ip = Unicode('127.0.0.1', config=True,
177 help="IP on which to listen for monitor messages. [default: loopback]")
178 help="IP on which to listen for monitor messages. [default: loopback]")
178 monitor_transport = Unicode('tcp', config=True,
179 monitor_transport = Unicode('tcp', config=True,
179 help="0MQ transport for monitor messages. [default : tcp]")
180 help="0MQ transport for monitor messages. [default : tcp]")
180
181
181 monitor_url = Unicode('')
182 monitor_url = Unicode('')
182
183
183 db_class = DottedObjectName('IPython.parallel.controller.dictdb.DictDB',
184 db_class = DottedObjectName('IPython.parallel.controller.dictdb.DictDB',
184 config=True, help="""The class to use for the DB backend""")
185 config=True, help="""The class to use for the DB backend""")
185
186
186 # not configurable
187 # not configurable
187 db = Instance('IPython.parallel.controller.dictdb.BaseDB')
188 db = Instance('IPython.parallel.controller.dictdb.BaseDB')
188 heartmonitor = Instance('IPython.parallel.controller.heartmonitor.HeartMonitor')
189 heartmonitor = Instance('IPython.parallel.controller.heartmonitor.HeartMonitor')
189
190
190 def _ip_changed(self, name, old, new):
191 def _ip_changed(self, name, old, new):
191 self.engine_ip = new
192 self.engine_ip = new
192 self.client_ip = new
193 self.client_ip = new
193 self.monitor_ip = new
194 self.monitor_ip = new
194 self._update_monitor_url()
195 self._update_monitor_url()
195
196
196 def _update_monitor_url(self):
197 def _update_monitor_url(self):
197 self.monitor_url = "%s://%s:%i" % (self.monitor_transport, self.monitor_ip, self.mon_port)
198 self.monitor_url = "%s://%s:%i" % (self.monitor_transport, self.monitor_ip, self.mon_port)
198
199
199 def _transport_changed(self, name, old, new):
200 def _transport_changed(self, name, old, new):
200 self.engine_transport = new
201 self.engine_transport = new
201 self.client_transport = new
202 self.client_transport = new
202 self.monitor_transport = new
203 self.monitor_transport = new
203 self._update_monitor_url()
204 self._update_monitor_url()
204
205
205 def __init__(self, **kwargs):
206 def __init__(self, **kwargs):
206 super(HubFactory, self).__init__(**kwargs)
207 super(HubFactory, self).__init__(**kwargs)
207 self._update_monitor_url()
208 self._update_monitor_url()
208
209
209
210
210 def construct(self):
211 def construct(self):
211 self.init_hub()
212 self.init_hub()
212
213
213 def start(self):
214 def start(self):
214 self.heartmonitor.start()
215 self.heartmonitor.start()
215 self.log.info("Heartmonitor started")
216 self.log.info("Heartmonitor started")
216
217
217 def init_hub(self):
218 def init_hub(self):
218 """construct"""
219 """construct"""
219 client_iface = "%s://%s:" % (self.client_transport, self.client_ip) + "%i"
220 client_iface = "%s://%s:" % (self.client_transport, self.client_ip) + "%i"
220 engine_iface = "%s://%s:" % (self.engine_transport, self.engine_ip) + "%i"
221 engine_iface = "%s://%s:" % (self.engine_transport, self.engine_ip) + "%i"
221
222
222 ctx = self.context
223 ctx = self.context
223 loop = self.loop
224 loop = self.loop
224
225
225 # Registrar socket
226 # Registrar socket
226 q = ZMQStream(ctx.socket(zmq.ROUTER), loop)
227 q = ZMQStream(ctx.socket(zmq.ROUTER), loop)
227 q.bind(client_iface % self.regport)
228 q.bind(client_iface % self.regport)
228 self.log.info("Hub listening on %s for registration.", client_iface % self.regport)
229 self.log.info("Hub listening on %s for registration.", client_iface % self.regport)
229 if self.client_ip != self.engine_ip:
230 if self.client_ip != self.engine_ip:
230 q.bind(engine_iface % self.regport)
231 q.bind(engine_iface % self.regport)
231 self.log.info("Hub listening on %s for registration.", engine_iface % self.regport)
232 self.log.info("Hub listening on %s for registration.", engine_iface % self.regport)
232
233
233 ### Engine connections ###
234 ### Engine connections ###
234
235
235 # heartbeat
236 # heartbeat
236 hpub = ctx.socket(zmq.PUB)
237 hpub = ctx.socket(zmq.PUB)
237 hpub.bind(engine_iface % self.hb[0])
238 hpub.bind(engine_iface % self.hb[0])
238 hrep = ctx.socket(zmq.ROUTER)
239 hrep = ctx.socket(zmq.ROUTER)
239 hrep.bind(engine_iface % self.hb[1])
240 hrep.bind(engine_iface % self.hb[1])
240 self.heartmonitor = HeartMonitor(loop=loop, config=self.config, log=self.log,
241 self.heartmonitor = HeartMonitor(loop=loop, config=self.config, log=self.log,
241 pingstream=ZMQStream(hpub,loop),
242 pingstream=ZMQStream(hpub,loop),
242 pongstream=ZMQStream(hrep,loop)
243 pongstream=ZMQStream(hrep,loop)
243 )
244 )
244
245
245 ### Client connections ###
246 ### Client connections ###
246 # Notifier socket
247 # Notifier socket
247 n = ZMQStream(ctx.socket(zmq.PUB), loop)
248 n = ZMQStream(ctx.socket(zmq.PUB), loop)
248 n.bind(client_iface%self.notifier_port)
249 n.bind(client_iface%self.notifier_port)
249
250
250 ### build and launch the queues ###
251 ### build and launch the queues ###
251
252
252 # monitor socket
253 # monitor socket
253 sub = ctx.socket(zmq.SUB)
254 sub = ctx.socket(zmq.SUB)
254 sub.setsockopt(zmq.SUBSCRIBE, b"")
255 sub.setsockopt(zmq.SUBSCRIBE, b"")
255 sub.bind(self.monitor_url)
256 sub.bind(self.monitor_url)
256 sub.bind('inproc://monitor')
257 sub.bind('inproc://monitor')
257 sub = ZMQStream(sub, loop)
258 sub = ZMQStream(sub, loop)
258
259
259 # connect the db
260 # connect the db
260 self.log.info('Hub using DB backend: %r'%(self.db_class.split()[-1]))
261 self.log.info('Hub using DB backend: %r'%(self.db_class.split()[-1]))
261 # cdir = self.config.Global.cluster_dir
262 # cdir = self.config.Global.cluster_dir
262 self.db = import_item(str(self.db_class))(session=self.session.session,
263 self.db = import_item(str(self.db_class))(session=self.session.session,
263 config=self.config, log=self.log)
264 config=self.config, log=self.log)
264 time.sleep(.25)
265 time.sleep(.25)
265 try:
266 try:
266 scheme = self.config.TaskScheduler.scheme_name
267 scheme = self.config.TaskScheduler.scheme_name
267 except AttributeError:
268 except AttributeError:
268 from .scheduler import TaskScheduler
269 from .scheduler import TaskScheduler
269 scheme = TaskScheduler.scheme_name.get_default_value()
270 scheme = TaskScheduler.scheme_name.get_default_value()
270 # build connection dicts
271 # build connection dicts
271 self.engine_info = {
272 self.engine_info = {
272 'control' : engine_iface%self.control[1],
273 'control' : engine_iface%self.control[1],
273 'mux': engine_iface%self.mux[1],
274 'mux': engine_iface%self.mux[1],
274 'heartbeat': (engine_iface%self.hb[0], engine_iface%self.hb[1]),
275 'heartbeat': (engine_iface%self.hb[0], engine_iface%self.hb[1]),
275 'task' : engine_iface%self.task[1],
276 'task' : engine_iface%self.task[1],
276 'iopub' : engine_iface%self.iopub[1],
277 'iopub' : engine_iface%self.iopub[1],
277 # 'monitor' : engine_iface%self.mon_port,
278 # 'monitor' : engine_iface%self.mon_port,
278 }
279 }
279
280
280 self.client_info = {
281 self.client_info = {
281 'control' : client_iface%self.control[0],
282 'control' : client_iface%self.control[0],
282 'mux': client_iface%self.mux[0],
283 'mux': client_iface%self.mux[0],
283 'task' : (scheme, client_iface%self.task[0]),
284 'task' : (scheme, client_iface%self.task[0]),
284 'iopub' : client_iface%self.iopub[0],
285 'iopub' : client_iface%self.iopub[0],
285 'notification': client_iface%self.notifier_port
286 'notification': client_iface%self.notifier_port
286 }
287 }
287 self.log.debug("Hub engine addrs: %s", self.engine_info)
288 self.log.debug("Hub engine addrs: %s", self.engine_info)
288 self.log.debug("Hub client addrs: %s", self.client_info)
289 self.log.debug("Hub client addrs: %s", self.client_info)
289
290
290 # resubmit stream
291 # resubmit stream
291 r = ZMQStream(ctx.socket(zmq.DEALER), loop)
292 r = ZMQStream(ctx.socket(zmq.DEALER), loop)
292 url = util.disambiguate_url(self.client_info['task'][-1])
293 url = util.disambiguate_url(self.client_info['task'][-1])
293 r.setsockopt(zmq.IDENTITY, self.session.bsession)
294 r.setsockopt(zmq.IDENTITY, self.session.bsession)
294 r.connect(url)
295 r.connect(url)
295
296
296 self.hub = Hub(loop=loop, session=self.session, monitor=sub, heartmonitor=self.heartmonitor,
297 self.hub = Hub(loop=loop, session=self.session, monitor=sub, heartmonitor=self.heartmonitor,
297 query=q, notifier=n, resubmit=r, db=self.db,
298 query=q, notifier=n, resubmit=r, db=self.db,
298 engine_info=self.engine_info, client_info=self.client_info,
299 engine_info=self.engine_info, client_info=self.client_info,
299 log=self.log)
300 log=self.log)
300
301
301
302
302 class Hub(SessionFactory):
303 class Hub(SessionFactory):
303 """The IPython Controller Hub with 0MQ connections
304 """The IPython Controller Hub with 0MQ connections
304
305
305 Parameters
306 Parameters
306 ==========
307 ==========
307 loop: zmq IOLoop instance
308 loop: zmq IOLoop instance
308 session: Session object
309 session: Session object
309 <removed> context: zmq context for creating new connections (?)
310 <removed> context: zmq context for creating new connections (?)
310 queue: ZMQStream for monitoring the command queue (SUB)
311 queue: ZMQStream for monitoring the command queue (SUB)
311 query: ZMQStream for engine registration and client queries requests (XREP)
312 query: ZMQStream for engine registration and client queries requests (XREP)
312 heartbeat: HeartMonitor object checking the pulse of the engines
313 heartbeat: HeartMonitor object checking the pulse of the engines
313 notifier: ZMQStream for broadcasting engine registration changes (PUB)
314 notifier: ZMQStream for broadcasting engine registration changes (PUB)
314 db: connection to db for out of memory logging of commands
315 db: connection to db for out of memory logging of commands
315 NotImplemented
316 NotImplemented
316 engine_info: dict of zmq connection information for engines to connect
317 engine_info: dict of zmq connection information for engines to connect
317 to the queues.
318 to the queues.
318 client_info: dict of zmq connection information for engines to connect
319 client_info: dict of zmq connection information for engines to connect
319 to the queues.
320 to the queues.
320 """
321 """
321 # internal data structures:
322 # internal data structures:
322 ids=Set() # engine IDs
323 ids=Set() # engine IDs
323 keytable=Dict()
324 keytable=Dict()
324 by_ident=Dict()
325 by_ident=Dict()
325 engines=Dict()
326 engines=Dict()
326 clients=Dict()
327 clients=Dict()
327 hearts=Dict()
328 hearts=Dict()
328 pending=Set()
329 pending=Set()
329 queues=Dict() # pending msg_ids keyed by engine_id
330 queues=Dict() # pending msg_ids keyed by engine_id
330 tasks=Dict() # pending msg_ids submitted as tasks, keyed by client_id
331 tasks=Dict() # pending msg_ids submitted as tasks, keyed by client_id
331 completed=Dict() # completed msg_ids keyed by engine_id
332 completed=Dict() # completed msg_ids keyed by engine_id
332 all_completed=Set() # completed msg_ids keyed by engine_id
333 all_completed=Set() # completed msg_ids keyed by engine_id
333 dead_engines=Set() # completed msg_ids keyed by engine_id
334 dead_engines=Set() # completed msg_ids keyed by engine_id
334 unassigned=Set() # set of task msg_ds not yet assigned a destination
335 unassigned=Set() # set of task msg_ds not yet assigned a destination
335 incoming_registrations=Dict()
336 incoming_registrations=Dict()
336 registration_timeout=Integer()
337 registration_timeout=Integer()
337 _idcounter=Integer(0)
338 _idcounter=Integer(0)
338
339
339 # objects from constructor:
340 # objects from constructor:
340 query=Instance(ZMQStream)
341 query=Instance(ZMQStream)
341 monitor=Instance(ZMQStream)
342 monitor=Instance(ZMQStream)
342 notifier=Instance(ZMQStream)
343 notifier=Instance(ZMQStream)
343 resubmit=Instance(ZMQStream)
344 resubmit=Instance(ZMQStream)
344 heartmonitor=Instance(HeartMonitor)
345 heartmonitor=Instance(HeartMonitor)
345 db=Instance(object)
346 db=Instance(object)
346 client_info=Dict()
347 client_info=Dict()
347 engine_info=Dict()
348 engine_info=Dict()
348
349
349
350
350 def __init__(self, **kwargs):
351 def __init__(self, **kwargs):
351 """
352 """
352 # universal:
353 # universal:
353 loop: IOLoop for creating future connections
354 loop: IOLoop for creating future connections
354 session: streamsession for sending serialized data
355 session: streamsession for sending serialized data
355 # engine:
356 # engine:
356 queue: ZMQStream for monitoring queue messages
357 queue: ZMQStream for monitoring queue messages
357 query: ZMQStream for engine+client registration and client requests
358 query: ZMQStream for engine+client registration and client requests
358 heartbeat: HeartMonitor object for tracking engines
359 heartbeat: HeartMonitor object for tracking engines
359 # extra:
360 # extra:
360 db: ZMQStream for db connection (NotImplemented)
361 db: ZMQStream for db connection (NotImplemented)
361 engine_info: zmq address/protocol dict for engine connections
362 engine_info: zmq address/protocol dict for engine connections
362 client_info: zmq address/protocol dict for client connections
363 client_info: zmq address/protocol dict for client connections
363 """
364 """
364
365
365 super(Hub, self).__init__(**kwargs)
366 super(Hub, self).__init__(**kwargs)
366 self.registration_timeout = max(5000, 2*self.heartmonitor.period)
367 self.registration_timeout = max(5000, 2*self.heartmonitor.period)
367
368
368 # validate connection dicts:
369 # validate connection dicts:
369 for k,v in self.client_info.iteritems():
370 for k,v in self.client_info.iteritems():
370 if k == 'task':
371 if k == 'task':
371 util.validate_url_container(v[1])
372 util.validate_url_container(v[1])
372 else:
373 else:
373 util.validate_url_container(v)
374 util.validate_url_container(v)
374 # util.validate_url_container(self.client_info)
375 # util.validate_url_container(self.client_info)
375 util.validate_url_container(self.engine_info)
376 util.validate_url_container(self.engine_info)
376
377
377 # register our callbacks
378 # register our callbacks
378 self.query.on_recv(self.dispatch_query)
379 self.query.on_recv(self.dispatch_query)
379 self.monitor.on_recv(self.dispatch_monitor_traffic)
380 self.monitor.on_recv(self.dispatch_monitor_traffic)
380
381
381 self.heartmonitor.add_heart_failure_handler(self.handle_heart_failure)
382 self.heartmonitor.add_heart_failure_handler(self.handle_heart_failure)
382 self.heartmonitor.add_new_heart_handler(self.handle_new_heart)
383 self.heartmonitor.add_new_heart_handler(self.handle_new_heart)
383
384
384 self.monitor_handlers = {b'in' : self.save_queue_request,
385 self.monitor_handlers = {b'in' : self.save_queue_request,
385 b'out': self.save_queue_result,
386 b'out': self.save_queue_result,
386 b'intask': self.save_task_request,
387 b'intask': self.save_task_request,
387 b'outtask': self.save_task_result,
388 b'outtask': self.save_task_result,
388 b'tracktask': self.save_task_destination,
389 b'tracktask': self.save_task_destination,
389 b'incontrol': _passer,
390 b'incontrol': _passer,
390 b'outcontrol': _passer,
391 b'outcontrol': _passer,
391 b'iopub': self.save_iopub_message,
392 b'iopub': self.save_iopub_message,
392 }
393 }
393
394
394 self.query_handlers = {'queue_request': self.queue_status,
395 self.query_handlers = {'queue_request': self.queue_status,
395 'result_request': self.get_results,
396 'result_request': self.get_results,
396 'history_request': self.get_history,
397 'history_request': self.get_history,
397 'db_request': self.db_query,
398 'db_request': self.db_query,
398 'purge_request': self.purge_results,
399 'purge_request': self.purge_results,
399 'load_request': self.check_load,
400 'load_request': self.check_load,
400 'resubmit_request': self.resubmit_task,
401 'resubmit_request': self.resubmit_task,
401 'shutdown_request': self.shutdown_request,
402 'shutdown_request': self.shutdown_request,
402 'registration_request' : self.register_engine,
403 'registration_request' : self.register_engine,
403 'unregistration_request' : self.unregister_engine,
404 'unregistration_request' : self.unregister_engine,
404 'connection_request': self.connection_request,
405 'connection_request': self.connection_request,
405 }
406 }
406
407
407 # ignore resubmit replies
408 # ignore resubmit replies
408 self.resubmit.on_recv(lambda msg: None, copy=False)
409 self.resubmit.on_recv(lambda msg: None, copy=False)
409
410
410 self.log.info("hub::created hub")
411 self.log.info("hub::created hub")
411
412
412 @property
413 @property
413 def _next_id(self):
414 def _next_id(self):
414 """gemerate a new ID.
415 """gemerate a new ID.
415
416
416 No longer reuse old ids, just count from 0."""
417 No longer reuse old ids, just count from 0."""
417 newid = self._idcounter
418 newid = self._idcounter
418 self._idcounter += 1
419 self._idcounter += 1
419 return newid
420 return newid
420 # newid = 0
421 # newid = 0
421 # incoming = [id[0] for id in self.incoming_registrations.itervalues()]
422 # incoming = [id[0] for id in self.incoming_registrations.itervalues()]
422 # # print newid, self.ids, self.incoming_registrations
423 # # print newid, self.ids, self.incoming_registrations
423 # while newid in self.ids or newid in incoming:
424 # while newid in self.ids or newid in incoming:
424 # newid += 1
425 # newid += 1
425 # return newid
426 # return newid
426
427
427 #-----------------------------------------------------------------------------
428 #-----------------------------------------------------------------------------
428 # message validation
429 # message validation
429 #-----------------------------------------------------------------------------
430 #-----------------------------------------------------------------------------
430
431
431 def _validate_targets(self, targets):
432 def _validate_targets(self, targets):
432 """turn any valid targets argument into a list of integer ids"""
433 """turn any valid targets argument into a list of integer ids"""
433 if targets is None:
434 if targets is None:
434 # default to all
435 # default to all
435 return self.ids
436 return self.ids
436
437
437 if isinstance(targets, (int,str,unicode)):
438 if isinstance(targets, (int,str,unicode)):
438 # only one target specified
439 # only one target specified
439 targets = [targets]
440 targets = [targets]
440 _targets = []
441 _targets = []
441 for t in targets:
442 for t in targets:
442 # map raw identities to ids
443 # map raw identities to ids
443 if isinstance(t, (str,unicode)):
444 if isinstance(t, (str,unicode)):
444 t = self.by_ident.get(t, t)
445 t = self.by_ident.get(cast_bytes(t), t)
445 _targets.append(t)
446 _targets.append(t)
446 targets = _targets
447 targets = _targets
447 bad_targets = [ t for t in targets if t not in self.ids ]
448 bad_targets = [ t for t in targets if t not in self.ids ]
448 if bad_targets:
449 if bad_targets:
449 raise IndexError("No Such Engine: %r" % bad_targets)
450 raise IndexError("No Such Engine: %r" % bad_targets)
450 if not targets:
451 if not targets:
451 raise IndexError("No Engines Registered")
452 raise IndexError("No Engines Registered")
452 return targets
453 return targets
453
454
454 #-----------------------------------------------------------------------------
455 #-----------------------------------------------------------------------------
455 # dispatch methods (1 per stream)
456 # dispatch methods (1 per stream)
456 #-----------------------------------------------------------------------------
457 #-----------------------------------------------------------------------------
457
458
458
459
459 @util.log_errors
460 @util.log_errors
460 def dispatch_monitor_traffic(self, msg):
461 def dispatch_monitor_traffic(self, msg):
461 """all ME and Task queue messages come through here, as well as
462 """all ME and Task queue messages come through here, as well as
462 IOPub traffic."""
463 IOPub traffic."""
463 self.log.debug("monitor traffic: %r", msg[0])
464 self.log.debug("monitor traffic: %r", msg[0])
464 switch = msg[0]
465 switch = msg[0]
465 try:
466 try:
466 idents, msg = self.session.feed_identities(msg[1:])
467 idents, msg = self.session.feed_identities(msg[1:])
467 except ValueError:
468 except ValueError:
468 idents=[]
469 idents=[]
469 if not idents:
470 if not idents:
470 self.log.error("Bad Monitor Message: %r", msg)
471 self.log.error("Bad Monitor Message: %r", msg)
471 return
472 return
472 handler = self.monitor_handlers.get(switch, None)
473 handler = self.monitor_handlers.get(switch, None)
473 if handler is not None:
474 if handler is not None:
474 handler(idents, msg)
475 handler(idents, msg)
475 else:
476 else:
476 self.log.error("Invalid monitor topic: %r", switch)
477 self.log.error("Invalid monitor topic: %r", switch)
477
478
478
479
479 @util.log_errors
480 @util.log_errors
480 def dispatch_query(self, msg):
481 def dispatch_query(self, msg):
481 """Route registration requests and queries from clients."""
482 """Route registration requests and queries from clients."""
482 try:
483 try:
483 idents, msg = self.session.feed_identities(msg)
484 idents, msg = self.session.feed_identities(msg)
484 except ValueError:
485 except ValueError:
485 idents = []
486 idents = []
486 if not idents:
487 if not idents:
487 self.log.error("Bad Query Message: %r", msg)
488 self.log.error("Bad Query Message: %r", msg)
488 return
489 return
489 client_id = idents[0]
490 client_id = idents[0]
490 try:
491 try:
491 msg = self.session.unserialize(msg, content=True)
492 msg = self.session.unserialize(msg, content=True)
492 except Exception:
493 except Exception:
493 content = error.wrap_exception()
494 content = error.wrap_exception()
494 self.log.error("Bad Query Message: %r", msg, exc_info=True)
495 self.log.error("Bad Query Message: %r", msg, exc_info=True)
495 self.session.send(self.query, "hub_error", ident=client_id,
496 self.session.send(self.query, "hub_error", ident=client_id,
496 content=content)
497 content=content)
497 return
498 return
498 # print client_id, header, parent, content
499 # print client_id, header, parent, content
499 #switch on message type:
500 #switch on message type:
500 msg_type = msg['header']['msg_type']
501 msg_type = msg['header']['msg_type']
501 self.log.info("client::client %r requested %r", client_id, msg_type)
502 self.log.info("client::client %r requested %r", client_id, msg_type)
502 handler = self.query_handlers.get(msg_type, None)
503 handler = self.query_handlers.get(msg_type, None)
503 try:
504 try:
504 assert handler is not None, "Bad Message Type: %r" % msg_type
505 assert handler is not None, "Bad Message Type: %r" % msg_type
505 except:
506 except:
506 content = error.wrap_exception()
507 content = error.wrap_exception()
507 self.log.error("Bad Message Type: %r", msg_type, exc_info=True)
508 self.log.error("Bad Message Type: %r", msg_type, exc_info=True)
508 self.session.send(self.query, "hub_error", ident=client_id,
509 self.session.send(self.query, "hub_error", ident=client_id,
509 content=content)
510 content=content)
510 return
511 return
511
512
512 else:
513 else:
513 handler(idents, msg)
514 handler(idents, msg)
514
515
515 def dispatch_db(self, msg):
516 def dispatch_db(self, msg):
516 """"""
517 """"""
517 raise NotImplementedError
518 raise NotImplementedError
518
519
519 #---------------------------------------------------------------------------
520 #---------------------------------------------------------------------------
520 # handler methods (1 per event)
521 # handler methods (1 per event)
521 #---------------------------------------------------------------------------
522 #---------------------------------------------------------------------------
522
523
523 #----------------------- Heartbeat --------------------------------------
524 #----------------------- Heartbeat --------------------------------------
524
525
525 def handle_new_heart(self, heart):
526 def handle_new_heart(self, heart):
526 """handler to attach to heartbeater.
527 """handler to attach to heartbeater.
527 Called when a new heart starts to beat.
528 Called when a new heart starts to beat.
528 Triggers completion of registration."""
529 Triggers completion of registration."""
529 self.log.debug("heartbeat::handle_new_heart(%r)", heart)
530 self.log.debug("heartbeat::handle_new_heart(%r)", heart)
530 if heart not in self.incoming_registrations:
531 if heart not in self.incoming_registrations:
531 self.log.info("heartbeat::ignoring new heart: %r", heart)
532 self.log.info("heartbeat::ignoring new heart: %r", heart)
532 else:
533 else:
533 self.finish_registration(heart)
534 self.finish_registration(heart)
534
535
535
536
536 def handle_heart_failure(self, heart):
537 def handle_heart_failure(self, heart):
537 """handler to attach to heartbeater.
538 """handler to attach to heartbeater.
538 called when a previously registered heart fails to respond to beat request.
539 called when a previously registered heart fails to respond to beat request.
539 triggers unregistration"""
540 triggers unregistration"""
540 self.log.debug("heartbeat::handle_heart_failure(%r)", heart)
541 self.log.debug("heartbeat::handle_heart_failure(%r)", heart)
541 eid = self.hearts.get(heart, None)
542 eid = self.hearts.get(heart, None)
542 queue = self.engines[eid].queue
543 queue = self.engines[eid].queue
543 if eid is None or self.keytable[eid] in self.dead_engines:
544 if eid is None or self.keytable[eid] in self.dead_engines:
544 self.log.info("heartbeat::ignoring heart failure %r (not an engine or already dead)", heart)
545 self.log.info("heartbeat::ignoring heart failure %r (not an engine or already dead)", heart)
545 else:
546 else:
546 self.unregister_engine(heart, dict(content=dict(id=eid, queue=queue)))
547 self.unregister_engine(heart, dict(content=dict(id=eid, queue=queue)))
547
548
548 #----------------------- MUX Queue Traffic ------------------------------
549 #----------------------- MUX Queue Traffic ------------------------------
549
550
550 def save_queue_request(self, idents, msg):
551 def save_queue_request(self, idents, msg):
551 if len(idents) < 2:
552 if len(idents) < 2:
552 self.log.error("invalid identity prefix: %r", idents)
553 self.log.error("invalid identity prefix: %r", idents)
553 return
554 return
554 queue_id, client_id = idents[:2]
555 queue_id, client_id = idents[:2]
555 try:
556 try:
556 msg = self.session.unserialize(msg)
557 msg = self.session.unserialize(msg)
557 except Exception:
558 except Exception:
558 self.log.error("queue::client %r sent invalid message to %r: %r", client_id, queue_id, msg, exc_info=True)
559 self.log.error("queue::client %r sent invalid message to %r: %r", client_id, queue_id, msg, exc_info=True)
559 return
560 return
560
561
561 eid = self.by_ident.get(queue_id, None)
562 eid = self.by_ident.get(queue_id, None)
562 if eid is None:
563 if eid is None:
563 self.log.error("queue::target %r not registered", queue_id)
564 self.log.error("queue::target %r not registered", queue_id)
564 self.log.debug("queue:: valid are: %r", self.by_ident.keys())
565 self.log.debug("queue:: valid are: %r", self.by_ident.keys())
565 return
566 return
566 record = init_record(msg)
567 record = init_record(msg)
567 msg_id = record['msg_id']
568 msg_id = record['msg_id']
568 self.log.info("queue::client %r submitted request %r to %s", client_id, msg_id, eid)
569 self.log.info("queue::client %r submitted request %r to %s", client_id, msg_id, eid)
569 # Unicode in records
570 # Unicode in records
570 record['engine_uuid'] = queue_id.decode('ascii')
571 record['engine_uuid'] = queue_id.decode('ascii')
571 record['client_uuid'] = client_id.decode('ascii')
572 record['client_uuid'] = client_id.decode('ascii')
572 record['queue'] = 'mux'
573 record['queue'] = 'mux'
573
574
574 try:
575 try:
575 # it's posible iopub arrived first:
576 # it's posible iopub arrived first:
576 existing = self.db.get_record(msg_id)
577 existing = self.db.get_record(msg_id)
577 for key,evalue in existing.iteritems():
578 for key,evalue in existing.iteritems():
578 rvalue = record.get(key, None)
579 rvalue = record.get(key, None)
579 if evalue and rvalue and evalue != rvalue:
580 if evalue and rvalue and evalue != rvalue:
580 self.log.warn("conflicting initial state for record: %r:%r <%r> %r", msg_id, rvalue, key, evalue)
581 self.log.warn("conflicting initial state for record: %r:%r <%r> %r", msg_id, rvalue, key, evalue)
581 elif evalue and not rvalue:
582 elif evalue and not rvalue:
582 record[key] = evalue
583 record[key] = evalue
583 try:
584 try:
584 self.db.update_record(msg_id, record)
585 self.db.update_record(msg_id, record)
585 except Exception:
586 except Exception:
586 self.log.error("DB Error updating record %r", msg_id, exc_info=True)
587 self.log.error("DB Error updating record %r", msg_id, exc_info=True)
587 except KeyError:
588 except KeyError:
588 try:
589 try:
589 self.db.add_record(msg_id, record)
590 self.db.add_record(msg_id, record)
590 except Exception:
591 except Exception:
591 self.log.error("DB Error adding record %r", msg_id, exc_info=True)
592 self.log.error("DB Error adding record %r", msg_id, exc_info=True)
592
593
593
594
594 self.pending.add(msg_id)
595 self.pending.add(msg_id)
595 self.queues[eid].append(msg_id)
596 self.queues[eid].append(msg_id)
596
597
597 def save_queue_result(self, idents, msg):
598 def save_queue_result(self, idents, msg):
598 if len(idents) < 2:
599 if len(idents) < 2:
599 self.log.error("invalid identity prefix: %r", idents)
600 self.log.error("invalid identity prefix: %r", idents)
600 return
601 return
601
602
602 client_id, queue_id = idents[:2]
603 client_id, queue_id = idents[:2]
603 try:
604 try:
604 msg = self.session.unserialize(msg)
605 msg = self.session.unserialize(msg)
605 except Exception:
606 except Exception:
606 self.log.error("queue::engine %r sent invalid message to %r: %r",
607 self.log.error("queue::engine %r sent invalid message to %r: %r",
607 queue_id, client_id, msg, exc_info=True)
608 queue_id, client_id, msg, exc_info=True)
608 return
609 return
609
610
610 eid = self.by_ident.get(queue_id, None)
611 eid = self.by_ident.get(queue_id, None)
611 if eid is None:
612 if eid is None:
612 self.log.error("queue::unknown engine %r is sending a reply: ", queue_id)
613 self.log.error("queue::unknown engine %r is sending a reply: ", queue_id)
613 return
614 return
614
615
615 parent = msg['parent_header']
616 parent = msg['parent_header']
616 if not parent:
617 if not parent:
617 return
618 return
618 msg_id = parent['msg_id']
619 msg_id = parent['msg_id']
619 if msg_id in self.pending:
620 if msg_id in self.pending:
620 self.pending.remove(msg_id)
621 self.pending.remove(msg_id)
621 self.all_completed.add(msg_id)
622 self.all_completed.add(msg_id)
622 self.queues[eid].remove(msg_id)
623 self.queues[eid].remove(msg_id)
623 self.completed[eid].append(msg_id)
624 self.completed[eid].append(msg_id)
624 self.log.info("queue::request %r completed on %s", msg_id, eid)
625 self.log.info("queue::request %r completed on %s", msg_id, eid)
625 elif msg_id not in self.all_completed:
626 elif msg_id not in self.all_completed:
626 # it could be a result from a dead engine that died before delivering the
627 # it could be a result from a dead engine that died before delivering the
627 # result
628 # result
628 self.log.warn("queue:: unknown msg finished %r", msg_id)
629 self.log.warn("queue:: unknown msg finished %r", msg_id)
629 return
630 return
630 # update record anyway, because the unregistration could have been premature
631 # update record anyway, because the unregistration could have been premature
631 rheader = msg['header']
632 rheader = msg['header']
632 completed = rheader['date']
633 completed = rheader['date']
633 started = rheader.get('started', None)
634 started = rheader.get('started', None)
634 result = {
635 result = {
635 'result_header' : rheader,
636 'result_header' : rheader,
636 'result_content': msg['content'],
637 'result_content': msg['content'],
637 'received': datetime.now(),
638 'received': datetime.now(),
638 'started' : started,
639 'started' : started,
639 'completed' : completed
640 'completed' : completed
640 }
641 }
641
642
642 result['result_buffers'] = msg['buffers']
643 result['result_buffers'] = msg['buffers']
643 try:
644 try:
644 self.db.update_record(msg_id, result)
645 self.db.update_record(msg_id, result)
645 except Exception:
646 except Exception:
646 self.log.error("DB Error updating record %r", msg_id, exc_info=True)
647 self.log.error("DB Error updating record %r", msg_id, exc_info=True)
647
648
648
649
649 #--------------------- Task Queue Traffic ------------------------------
650 #--------------------- Task Queue Traffic ------------------------------
650
651
651 def save_task_request(self, idents, msg):
652 def save_task_request(self, idents, msg):
652 """Save the submission of a task."""
653 """Save the submission of a task."""
653 client_id = idents[0]
654 client_id = idents[0]
654
655
655 try:
656 try:
656 msg = self.session.unserialize(msg)
657 msg = self.session.unserialize(msg)
657 except Exception:
658 except Exception:
658 self.log.error("task::client %r sent invalid task message: %r",
659 self.log.error("task::client %r sent invalid task message: %r",
659 client_id, msg, exc_info=True)
660 client_id, msg, exc_info=True)
660 return
661 return
661 record = init_record(msg)
662 record = init_record(msg)
662
663
663 record['client_uuid'] = client_id.decode('ascii')
664 record['client_uuid'] = client_id.decode('ascii')
664 record['queue'] = 'task'
665 record['queue'] = 'task'
665 header = msg['header']
666 header = msg['header']
666 msg_id = header['msg_id']
667 msg_id = header['msg_id']
667 self.pending.add(msg_id)
668 self.pending.add(msg_id)
668 self.unassigned.add(msg_id)
669 self.unassigned.add(msg_id)
669 try:
670 try:
670 # it's posible iopub arrived first:
671 # it's posible iopub arrived first:
671 existing = self.db.get_record(msg_id)
672 existing = self.db.get_record(msg_id)
672 if existing['resubmitted']:
673 if existing['resubmitted']:
673 for key in ('submitted', 'client_uuid', 'buffers'):
674 for key in ('submitted', 'client_uuid', 'buffers'):
674 # don't clobber these keys on resubmit
675 # don't clobber these keys on resubmit
675 # submitted and client_uuid should be different
676 # submitted and client_uuid should be different
676 # and buffers might be big, and shouldn't have changed
677 # and buffers might be big, and shouldn't have changed
677 record.pop(key)
678 record.pop(key)
678 # still check content,header which should not change
679 # still check content,header which should not change
679 # but are not expensive to compare as buffers
680 # but are not expensive to compare as buffers
680
681
681 for key,evalue in existing.iteritems():
682 for key,evalue in existing.iteritems():
682 if key.endswith('buffers'):
683 if key.endswith('buffers'):
683 # don't compare buffers
684 # don't compare buffers
684 continue
685 continue
685 rvalue = record.get(key, None)
686 rvalue = record.get(key, None)
686 if evalue and rvalue and evalue != rvalue:
687 if evalue and rvalue and evalue != rvalue:
687 self.log.warn("conflicting initial state for record: %r:%r <%r> %r", msg_id, rvalue, key, evalue)
688 self.log.warn("conflicting initial state for record: %r:%r <%r> %r", msg_id, rvalue, key, evalue)
688 elif evalue and not rvalue:
689 elif evalue and not rvalue:
689 record[key] = evalue
690 record[key] = evalue
690 try:
691 try:
691 self.db.update_record(msg_id, record)
692 self.db.update_record(msg_id, record)
692 except Exception:
693 except Exception:
693 self.log.error("DB Error updating record %r", msg_id, exc_info=True)
694 self.log.error("DB Error updating record %r", msg_id, exc_info=True)
694 except KeyError:
695 except KeyError:
695 try:
696 try:
696 self.db.add_record(msg_id, record)
697 self.db.add_record(msg_id, record)
697 except Exception:
698 except Exception:
698 self.log.error("DB Error adding record %r", msg_id, exc_info=True)
699 self.log.error("DB Error adding record %r", msg_id, exc_info=True)
699 except Exception:
700 except Exception:
700 self.log.error("DB Error saving task request %r", msg_id, exc_info=True)
701 self.log.error("DB Error saving task request %r", msg_id, exc_info=True)
701
702
702 def save_task_result(self, idents, msg):
703 def save_task_result(self, idents, msg):
703 """save the result of a completed task."""
704 """save the result of a completed task."""
704 client_id = idents[0]
705 client_id = idents[0]
705 try:
706 try:
706 msg = self.session.unserialize(msg)
707 msg = self.session.unserialize(msg)
707 except Exception:
708 except Exception:
708 self.log.error("task::invalid task result message send to %r: %r",
709 self.log.error("task::invalid task result message send to %r: %r",
709 client_id, msg, exc_info=True)
710 client_id, msg, exc_info=True)
710 return
711 return
711
712
712 parent = msg['parent_header']
713 parent = msg['parent_header']
713 if not parent:
714 if not parent:
714 # print msg
715 # print msg
715 self.log.warn("Task %r had no parent!", msg)
716 self.log.warn("Task %r had no parent!", msg)
716 return
717 return
717 msg_id = parent['msg_id']
718 msg_id = parent['msg_id']
718 if msg_id in self.unassigned:
719 if msg_id in self.unassigned:
719 self.unassigned.remove(msg_id)
720 self.unassigned.remove(msg_id)
720
721
721 header = msg['header']
722 header = msg['header']
722 engine_uuid = header.get('engine', None)
723 engine_uuid = header.get('engine', u'')
723 eid = self.by_ident.get(engine_uuid, None)
724 eid = self.by_ident.get(cast_bytes(engine_uuid), None)
724
725
725 status = header.get('status', None)
726 status = header.get('status', None)
726
727
727 if msg_id in self.pending:
728 if msg_id in self.pending:
728 self.log.info("task::task %r finished on %s", msg_id, eid)
729 self.log.info("task::task %r finished on %s", msg_id, eid)
729 self.pending.remove(msg_id)
730 self.pending.remove(msg_id)
730 self.all_completed.add(msg_id)
731 self.all_completed.add(msg_id)
731 if eid is not None:
732 if eid is not None:
732 if status != 'aborted':
733 if status != 'aborted':
733 self.completed[eid].append(msg_id)
734 self.completed[eid].append(msg_id)
734 if msg_id in self.tasks[eid]:
735 if msg_id in self.tasks[eid]:
735 self.tasks[eid].remove(msg_id)
736 self.tasks[eid].remove(msg_id)
736 completed = header['date']
737 completed = header['date']
737 started = header.get('started', None)
738 started = header.get('started', None)
738 result = {
739 result = {
739 'result_header' : header,
740 'result_header' : header,
740 'result_content': msg['content'],
741 'result_content': msg['content'],
741 'started' : started,
742 'started' : started,
742 'completed' : completed,
743 'completed' : completed,
743 'received' : datetime.now(),
744 'received' : datetime.now(),
744 'engine_uuid': engine_uuid,
745 'engine_uuid': engine_uuid,
745 }
746 }
746
747
747 result['result_buffers'] = msg['buffers']
748 result['result_buffers'] = msg['buffers']
748 try:
749 try:
749 self.db.update_record(msg_id, result)
750 self.db.update_record(msg_id, result)
750 except Exception:
751 except Exception:
751 self.log.error("DB Error saving task request %r", msg_id, exc_info=True)
752 self.log.error("DB Error saving task request %r", msg_id, exc_info=True)
752
753
753 else:
754 else:
754 self.log.debug("task::unknown task %r finished", msg_id)
755 self.log.debug("task::unknown task %r finished", msg_id)
755
756
756 def save_task_destination(self, idents, msg):
757 def save_task_destination(self, idents, msg):
757 try:
758 try:
758 msg = self.session.unserialize(msg, content=True)
759 msg = self.session.unserialize(msg, content=True)
759 except Exception:
760 except Exception:
760 self.log.error("task::invalid task tracking message", exc_info=True)
761 self.log.error("task::invalid task tracking message", exc_info=True)
761 return
762 return
762 content = msg['content']
763 content = msg['content']
763 # print (content)
764 # print (content)
764 msg_id = content['msg_id']
765 msg_id = content['msg_id']
765 engine_uuid = content['engine_id']
766 engine_uuid = content['engine_id']
766 eid = self.by_ident[util.asbytes(engine_uuid)]
767 eid = self.by_ident[cast_bytes(engine_uuid)]
767
768
768 self.log.info("task::task %r arrived on %r", msg_id, eid)
769 self.log.info("task::task %r arrived on %r", msg_id, eid)
769 if msg_id in self.unassigned:
770 if msg_id in self.unassigned:
770 self.unassigned.remove(msg_id)
771 self.unassigned.remove(msg_id)
771 # else:
772 # else:
772 # self.log.debug("task::task %r not listed as MIA?!"%(msg_id))
773 # self.log.debug("task::task %r not listed as MIA?!"%(msg_id))
773
774
774 self.tasks[eid].append(msg_id)
775 self.tasks[eid].append(msg_id)
775 # self.pending[msg_id][1].update(received=datetime.now(),engine=(eid,engine_uuid))
776 # self.pending[msg_id][1].update(received=datetime.now(),engine=(eid,engine_uuid))
776 try:
777 try:
777 self.db.update_record(msg_id, dict(engine_uuid=engine_uuid))
778 self.db.update_record(msg_id, dict(engine_uuid=engine_uuid))
778 except Exception:
779 except Exception:
779 self.log.error("DB Error saving task destination %r", msg_id, exc_info=True)
780 self.log.error("DB Error saving task destination %r", msg_id, exc_info=True)
780
781
781
782
782 def mia_task_request(self, idents, msg):
783 def mia_task_request(self, idents, msg):
783 raise NotImplementedError
784 raise NotImplementedError
784 client_id = idents[0]
785 client_id = idents[0]
785 # content = dict(mia=self.mia,status='ok')
786 # content = dict(mia=self.mia,status='ok')
786 # self.session.send('mia_reply', content=content, idents=client_id)
787 # self.session.send('mia_reply', content=content, idents=client_id)
787
788
788
789
789 #--------------------- IOPub Traffic ------------------------------
790 #--------------------- IOPub Traffic ------------------------------
790
791
791 def save_iopub_message(self, topics, msg):
792 def save_iopub_message(self, topics, msg):
792 """save an iopub message into the db"""
793 """save an iopub message into the db"""
793 # print (topics)
794 # print (topics)
794 try:
795 try:
795 msg = self.session.unserialize(msg, content=True)
796 msg = self.session.unserialize(msg, content=True)
796 except Exception:
797 except Exception:
797 self.log.error("iopub::invalid IOPub message", exc_info=True)
798 self.log.error("iopub::invalid IOPub message", exc_info=True)
798 return
799 return
799
800
800 parent = msg['parent_header']
801 parent = msg['parent_header']
801 if not parent:
802 if not parent:
802 self.log.error("iopub::invalid IOPub message: %r", msg)
803 self.log.error("iopub::invalid IOPub message: %r", msg)
803 return
804 return
804 msg_id = parent['msg_id']
805 msg_id = parent['msg_id']
805 msg_type = msg['header']['msg_type']
806 msg_type = msg['header']['msg_type']
806 content = msg['content']
807 content = msg['content']
807
808
808 # ensure msg_id is in db
809 # ensure msg_id is in db
809 try:
810 try:
810 rec = self.db.get_record(msg_id)
811 rec = self.db.get_record(msg_id)
811 except KeyError:
812 except KeyError:
812 rec = empty_record()
813 rec = empty_record()
813 rec['msg_id'] = msg_id
814 rec['msg_id'] = msg_id
814 self.db.add_record(msg_id, rec)
815 self.db.add_record(msg_id, rec)
815 # stream
816 # stream
816 d = {}
817 d = {}
817 if msg_type == 'stream':
818 if msg_type == 'stream':
818 name = content['name']
819 name = content['name']
819 s = rec[name] or ''
820 s = rec[name] or ''
820 d[name] = s + content['data']
821 d[name] = s + content['data']
821
822
822 elif msg_type == 'pyerr':
823 elif msg_type == 'pyerr':
823 d['pyerr'] = content
824 d['pyerr'] = content
824 elif msg_type == 'pyin':
825 elif msg_type == 'pyin':
825 d['pyin'] = content['code']
826 d['pyin'] = content['code']
826 else:
827 else:
827 d[msg_type] = content.get('data', '')
828 d[msg_type] = content.get('data', '')
828
829
829 try:
830 try:
830 self.db.update_record(msg_id, d)
831 self.db.update_record(msg_id, d)
831 except Exception:
832 except Exception:
832 self.log.error("DB Error saving iopub message %r", msg_id, exc_info=True)
833 self.log.error("DB Error saving iopub message %r", msg_id, exc_info=True)
833
834
834
835
835
836
836 #-------------------------------------------------------------------------
837 #-------------------------------------------------------------------------
837 # Registration requests
838 # Registration requests
838 #-------------------------------------------------------------------------
839 #-------------------------------------------------------------------------
839
840
840 def connection_request(self, client_id, msg):
841 def connection_request(self, client_id, msg):
841 """Reply with connection addresses for clients."""
842 """Reply with connection addresses for clients."""
842 self.log.info("client::client %r connected", client_id)
843 self.log.info("client::client %r connected", client_id)
843 content = dict(status='ok')
844 content = dict(status='ok')
844 content.update(self.client_info)
845 content.update(self.client_info)
845 jsonable = {}
846 jsonable = {}
846 for k,v in self.keytable.iteritems():
847 for k,v in self.keytable.iteritems():
847 if v not in self.dead_engines:
848 if v not in self.dead_engines:
848 jsonable[str(k)] = v.decode('ascii')
849 jsonable[str(k)] = v.decode('ascii')
849 content['engines'] = jsonable
850 content['engines'] = jsonable
850 self.session.send(self.query, 'connection_reply', content, parent=msg, ident=client_id)
851 self.session.send(self.query, 'connection_reply', content, parent=msg, ident=client_id)
851
852
852 def register_engine(self, reg, msg):
853 def register_engine(self, reg, msg):
853 """Register a new engine."""
854 """Register a new engine."""
854 content = msg['content']
855 content = msg['content']
855 try:
856 try:
856 queue = util.asbytes(content['queue'])
857 queue = cast_bytes(content['queue'])
857 except KeyError:
858 except KeyError:
858 self.log.error("registration::queue not specified", exc_info=True)
859 self.log.error("registration::queue not specified", exc_info=True)
859 return
860 return
860 heart = content.get('heartbeat', None)
861 heart = content.get('heartbeat', None)
861 if heart:
862 if heart:
862 heart = util.asbytes(heart)
863 heart = cast_bytes(heart)
863 """register a new engine, and create the socket(s) necessary"""
864 """register a new engine, and create the socket(s) necessary"""
864 eid = self._next_id
865 eid = self._next_id
865 # print (eid, queue, reg, heart)
866 # print (eid, queue, reg, heart)
866
867
867 self.log.debug("registration::register_engine(%i, %r, %r, %r)", eid, queue, reg, heart)
868 self.log.debug("registration::register_engine(%i, %r, %r, %r)", eid, queue, reg, heart)
868
869
869 content = dict(id=eid,status='ok')
870 content = dict(id=eid,status='ok')
870 content.update(self.engine_info)
871 content.update(self.engine_info)
871 # check if requesting available IDs:
872 # check if requesting available IDs:
872 if queue in self.by_ident:
873 if queue in self.by_ident:
873 try:
874 try:
874 raise KeyError("queue_id %r in use" % queue)
875 raise KeyError("queue_id %r in use" % queue)
875 except:
876 except:
876 content = error.wrap_exception()
877 content = error.wrap_exception()
877 self.log.error("queue_id %r in use", queue, exc_info=True)
878 self.log.error("queue_id %r in use", queue, exc_info=True)
878 elif heart in self.hearts: # need to check unique hearts?
879 elif heart in self.hearts: # need to check unique hearts?
879 try:
880 try:
880 raise KeyError("heart_id %r in use" % heart)
881 raise KeyError("heart_id %r in use" % heart)
881 except:
882 except:
882 self.log.error("heart_id %r in use", heart, exc_info=True)
883 self.log.error("heart_id %r in use", heart, exc_info=True)
883 content = error.wrap_exception()
884 content = error.wrap_exception()
884 else:
885 else:
885 for h, pack in self.incoming_registrations.iteritems():
886 for h, pack in self.incoming_registrations.iteritems():
886 if heart == h:
887 if heart == h:
887 try:
888 try:
888 raise KeyError("heart_id %r in use" % heart)
889 raise KeyError("heart_id %r in use" % heart)
889 except:
890 except:
890 self.log.error("heart_id %r in use", heart, exc_info=True)
891 self.log.error("heart_id %r in use", heart, exc_info=True)
891 content = error.wrap_exception()
892 content = error.wrap_exception()
892 break
893 break
893 elif queue == pack[1]:
894 elif queue == pack[1]:
894 try:
895 try:
895 raise KeyError("queue_id %r in use" % queue)
896 raise KeyError("queue_id %r in use" % queue)
896 except:
897 except:
897 self.log.error("queue_id %r in use", queue, exc_info=True)
898 self.log.error("queue_id %r in use", queue, exc_info=True)
898 content = error.wrap_exception()
899 content = error.wrap_exception()
899 break
900 break
900
901
901 msg = self.session.send(self.query, "registration_reply",
902 msg = self.session.send(self.query, "registration_reply",
902 content=content,
903 content=content,
903 ident=reg)
904 ident=reg)
904
905
905 if content['status'] == 'ok':
906 if content['status'] == 'ok':
906 if heart in self.heartmonitor.hearts:
907 if heart in self.heartmonitor.hearts:
907 # already beating
908 # already beating
908 self.incoming_registrations[heart] = (eid,queue,reg[0],None)
909 self.incoming_registrations[heart] = (eid,queue,reg[0],None)
909 self.finish_registration(heart)
910 self.finish_registration(heart)
910 else:
911 else:
911 purge = lambda : self._purge_stalled_registration(heart)
912 purge = lambda : self._purge_stalled_registration(heart)
912 dc = ioloop.DelayedCallback(purge, self.registration_timeout, self.loop)
913 dc = ioloop.DelayedCallback(purge, self.registration_timeout, self.loop)
913 dc.start()
914 dc.start()
914 self.incoming_registrations[heart] = (eid,queue,reg[0],dc)
915 self.incoming_registrations[heart] = (eid,queue,reg[0],dc)
915 else:
916 else:
916 self.log.error("registration::registration %i failed: %r", eid, content['evalue'])
917 self.log.error("registration::registration %i failed: %r", eid, content['evalue'])
917 return eid
918 return eid
918
919
919 def unregister_engine(self, ident, msg):
920 def unregister_engine(self, ident, msg):
920 """Unregister an engine that explicitly requested to leave."""
921 """Unregister an engine that explicitly requested to leave."""
921 try:
922 try:
922 eid = msg['content']['id']
923 eid = msg['content']['id']
923 except:
924 except:
924 self.log.error("registration::bad engine id for unregistration: %r", ident, exc_info=True)
925 self.log.error("registration::bad engine id for unregistration: %r", ident, exc_info=True)
925 return
926 return
926 self.log.info("registration::unregister_engine(%r)", eid)
927 self.log.info("registration::unregister_engine(%r)", eid)
927 # print (eid)
928 # print (eid)
928 uuid = self.keytable[eid]
929 uuid = self.keytable[eid]
929 content=dict(id=eid, queue=uuid.decode('ascii'))
930 content=dict(id=eid, queue=uuid.decode('ascii'))
930 self.dead_engines.add(uuid)
931 self.dead_engines.add(uuid)
931 # self.ids.remove(eid)
932 # self.ids.remove(eid)
932 # uuid = self.keytable.pop(eid)
933 # uuid = self.keytable.pop(eid)
933 #
934 #
934 # ec = self.engines.pop(eid)
935 # ec = self.engines.pop(eid)
935 # self.hearts.pop(ec.heartbeat)
936 # self.hearts.pop(ec.heartbeat)
936 # self.by_ident.pop(ec.queue)
937 # self.by_ident.pop(ec.queue)
937 # self.completed.pop(eid)
938 # self.completed.pop(eid)
938 handleit = lambda : self._handle_stranded_msgs(eid, uuid)
939 handleit = lambda : self._handle_stranded_msgs(eid, uuid)
939 dc = ioloop.DelayedCallback(handleit, self.registration_timeout, self.loop)
940 dc = ioloop.DelayedCallback(handleit, self.registration_timeout, self.loop)
940 dc.start()
941 dc.start()
941 ############## TODO: HANDLE IT ################
942 ############## TODO: HANDLE IT ################
942
943
943 if self.notifier:
944 if self.notifier:
944 self.session.send(self.notifier, "unregistration_notification", content=content)
945 self.session.send(self.notifier, "unregistration_notification", content=content)
945
946
946 def _handle_stranded_msgs(self, eid, uuid):
947 def _handle_stranded_msgs(self, eid, uuid):
947 """Handle messages known to be on an engine when the engine unregisters.
948 """Handle messages known to be on an engine when the engine unregisters.
948
949
949 It is possible that this will fire prematurely - that is, an engine will
950 It is possible that this will fire prematurely - that is, an engine will
950 go down after completing a result, and the client will be notified
951 go down after completing a result, and the client will be notified
951 that the result failed and later receive the actual result.
952 that the result failed and later receive the actual result.
952 """
953 """
953
954
954 outstanding = self.queues[eid]
955 outstanding = self.queues[eid]
955
956
956 for msg_id in outstanding:
957 for msg_id in outstanding:
957 self.pending.remove(msg_id)
958 self.pending.remove(msg_id)
958 self.all_completed.add(msg_id)
959 self.all_completed.add(msg_id)
959 try:
960 try:
960 raise error.EngineError("Engine %r died while running task %r" % (eid, msg_id))
961 raise error.EngineError("Engine %r died while running task %r" % (eid, msg_id))
961 except:
962 except:
962 content = error.wrap_exception()
963 content = error.wrap_exception()
963 # build a fake header:
964 # build a fake header:
964 header = {}
965 header = {}
965 header['engine'] = uuid
966 header['engine'] = uuid
966 header['date'] = datetime.now()
967 header['date'] = datetime.now()
967 rec = dict(result_content=content, result_header=header, result_buffers=[])
968 rec = dict(result_content=content, result_header=header, result_buffers=[])
968 rec['completed'] = header['date']
969 rec['completed'] = header['date']
969 rec['engine_uuid'] = uuid
970 rec['engine_uuid'] = uuid
970 try:
971 try:
971 self.db.update_record(msg_id, rec)
972 self.db.update_record(msg_id, rec)
972 except Exception:
973 except Exception:
973 self.log.error("DB Error handling stranded msg %r", msg_id, exc_info=True)
974 self.log.error("DB Error handling stranded msg %r", msg_id, exc_info=True)
974
975
975
976
976 def finish_registration(self, heart):
977 def finish_registration(self, heart):
977 """Second half of engine registration, called after our HeartMonitor
978 """Second half of engine registration, called after our HeartMonitor
978 has received a beat from the Engine's Heart."""
979 has received a beat from the Engine's Heart."""
979 try:
980 try:
980 (eid,queue,reg,purge) = self.incoming_registrations.pop(heart)
981 (eid,queue,reg,purge) = self.incoming_registrations.pop(heart)
981 except KeyError:
982 except KeyError:
982 self.log.error("registration::tried to finish nonexistant registration", exc_info=True)
983 self.log.error("registration::tried to finish nonexistant registration", exc_info=True)
983 return
984 return
984 self.log.info("registration::finished registering engine %i:%r", eid, queue)
985 self.log.info("registration::finished registering engine %i:%r", eid, queue)
985 if purge is not None:
986 if purge is not None:
986 purge.stop()
987 purge.stop()
987 control = queue
988 control = queue
988 self.ids.add(eid)
989 self.ids.add(eid)
989 self.keytable[eid] = queue
990 self.keytable[eid] = queue
990 self.engines[eid] = EngineConnector(id=eid, queue=queue, registration=reg,
991 self.engines[eid] = EngineConnector(id=eid, queue=queue, registration=reg,
991 control=control, heartbeat=heart)
992 control=control, heartbeat=heart)
992 self.by_ident[queue] = eid
993 self.by_ident[queue] = eid
993 self.queues[eid] = list()
994 self.queues[eid] = list()
994 self.tasks[eid] = list()
995 self.tasks[eid] = list()
995 self.completed[eid] = list()
996 self.completed[eid] = list()
996 self.hearts[heart] = eid
997 self.hearts[heart] = eid
997 content = dict(id=eid, queue=self.engines[eid].queue.decode('ascii'))
998 content = dict(id=eid, queue=self.engines[eid].queue.decode('ascii'))
998 if self.notifier:
999 if self.notifier:
999 self.session.send(self.notifier, "registration_notification", content=content)
1000 self.session.send(self.notifier, "registration_notification", content=content)
1000 self.log.info("engine::Engine Connected: %i", eid)
1001 self.log.info("engine::Engine Connected: %i", eid)
1001
1002
1002 def _purge_stalled_registration(self, heart):
1003 def _purge_stalled_registration(self, heart):
1003 if heart in self.incoming_registrations:
1004 if heart in self.incoming_registrations:
1004 eid = self.incoming_registrations.pop(heart)[0]
1005 eid = self.incoming_registrations.pop(heart)[0]
1005 self.log.info("registration::purging stalled registration: %i", eid)
1006 self.log.info("registration::purging stalled registration: %i", eid)
1006 else:
1007 else:
1007 pass
1008 pass
1008
1009
1009 #-------------------------------------------------------------------------
1010 #-------------------------------------------------------------------------
1010 # Client Requests
1011 # Client Requests
1011 #-------------------------------------------------------------------------
1012 #-------------------------------------------------------------------------
1012
1013
1013 def shutdown_request(self, client_id, msg):
1014 def shutdown_request(self, client_id, msg):
1014 """handle shutdown request."""
1015 """handle shutdown request."""
1015 self.session.send(self.query, 'shutdown_reply', content={'status': 'ok'}, ident=client_id)
1016 self.session.send(self.query, 'shutdown_reply', content={'status': 'ok'}, ident=client_id)
1016 # also notify other clients of shutdown
1017 # also notify other clients of shutdown
1017 self.session.send(self.notifier, 'shutdown_notice', content={'status': 'ok'})
1018 self.session.send(self.notifier, 'shutdown_notice', content={'status': 'ok'})
1018 dc = ioloop.DelayedCallback(lambda : self._shutdown(), 1000, self.loop)
1019 dc = ioloop.DelayedCallback(lambda : self._shutdown(), 1000, self.loop)
1019 dc.start()
1020 dc.start()
1020
1021
1021 def _shutdown(self):
1022 def _shutdown(self):
1022 self.log.info("hub::hub shutting down.")
1023 self.log.info("hub::hub shutting down.")
1023 time.sleep(0.1)
1024 time.sleep(0.1)
1024 sys.exit(0)
1025 sys.exit(0)
1025
1026
1026
1027
1027 def check_load(self, client_id, msg):
1028 def check_load(self, client_id, msg):
1028 content = msg['content']
1029 content = msg['content']
1029 try:
1030 try:
1030 targets = content['targets']
1031 targets = content['targets']
1031 targets = self._validate_targets(targets)
1032 targets = self._validate_targets(targets)
1032 except:
1033 except:
1033 content = error.wrap_exception()
1034 content = error.wrap_exception()
1034 self.session.send(self.query, "hub_error",
1035 self.session.send(self.query, "hub_error",
1035 content=content, ident=client_id)
1036 content=content, ident=client_id)
1036 return
1037 return
1037
1038
1038 content = dict(status='ok')
1039 content = dict(status='ok')
1039 # loads = {}
1040 # loads = {}
1040 for t in targets:
1041 for t in targets:
1041 content[bytes(t)] = len(self.queues[t])+len(self.tasks[t])
1042 content[bytes(t)] = len(self.queues[t])+len(self.tasks[t])
1042 self.session.send(self.query, "load_reply", content=content, ident=client_id)
1043 self.session.send(self.query, "load_reply", content=content, ident=client_id)
1043
1044
1044
1045
1045 def queue_status(self, client_id, msg):
1046 def queue_status(self, client_id, msg):
1046 """Return the Queue status of one or more targets.
1047 """Return the Queue status of one or more targets.
1047 if verbose: return the msg_ids
1048 if verbose: return the msg_ids
1048 else: return len of each type.
1049 else: return len of each type.
1049 keys: queue (pending MUX jobs)
1050 keys: queue (pending MUX jobs)
1050 tasks (pending Task jobs)
1051 tasks (pending Task jobs)
1051 completed (finished jobs from both queues)"""
1052 completed (finished jobs from both queues)"""
1052 content = msg['content']
1053 content = msg['content']
1053 targets = content['targets']
1054 targets = content['targets']
1054 try:
1055 try:
1055 targets = self._validate_targets(targets)
1056 targets = self._validate_targets(targets)
1056 except:
1057 except:
1057 content = error.wrap_exception()
1058 content = error.wrap_exception()
1058 self.session.send(self.query, "hub_error",
1059 self.session.send(self.query, "hub_error",
1059 content=content, ident=client_id)
1060 content=content, ident=client_id)
1060 return
1061 return
1061 verbose = content.get('verbose', False)
1062 verbose = content.get('verbose', False)
1062 content = dict(status='ok')
1063 content = dict(status='ok')
1063 for t in targets:
1064 for t in targets:
1064 queue = self.queues[t]
1065 queue = self.queues[t]
1065 completed = self.completed[t]
1066 completed = self.completed[t]
1066 tasks = self.tasks[t]
1067 tasks = self.tasks[t]
1067 if not verbose:
1068 if not verbose:
1068 queue = len(queue)
1069 queue = len(queue)
1069 completed = len(completed)
1070 completed = len(completed)
1070 tasks = len(tasks)
1071 tasks = len(tasks)
1071 content[str(t)] = {'queue': queue, 'completed': completed , 'tasks': tasks}
1072 content[str(t)] = {'queue': queue, 'completed': completed , 'tasks': tasks}
1072 content['unassigned'] = list(self.unassigned) if verbose else len(self.unassigned)
1073 content['unassigned'] = list(self.unassigned) if verbose else len(self.unassigned)
1073 # print (content)
1074 # print (content)
1074 self.session.send(self.query, "queue_reply", content=content, ident=client_id)
1075 self.session.send(self.query, "queue_reply", content=content, ident=client_id)
1075
1076
1076 def purge_results(self, client_id, msg):
1077 def purge_results(self, client_id, msg):
1077 """Purge results from memory. This method is more valuable before we move
1078 """Purge results from memory. This method is more valuable before we move
1078 to a DB based message storage mechanism."""
1079 to a DB based message storage mechanism."""
1079 content = msg['content']
1080 content = msg['content']
1080 self.log.info("Dropping records with %s", content)
1081 self.log.info("Dropping records with %s", content)
1081 msg_ids = content.get('msg_ids', [])
1082 msg_ids = content.get('msg_ids', [])
1082 reply = dict(status='ok')
1083 reply = dict(status='ok')
1083 if msg_ids == 'all':
1084 if msg_ids == 'all':
1084 try:
1085 try:
1085 self.db.drop_matching_records(dict(completed={'$ne':None}))
1086 self.db.drop_matching_records(dict(completed={'$ne':None}))
1086 except Exception:
1087 except Exception:
1087 reply = error.wrap_exception()
1088 reply = error.wrap_exception()
1088 else:
1089 else:
1089 pending = filter(lambda m: m in self.pending, msg_ids)
1090 pending = filter(lambda m: m in self.pending, msg_ids)
1090 if pending:
1091 if pending:
1091 try:
1092 try:
1092 raise IndexError("msg pending: %r" % pending[0])
1093 raise IndexError("msg pending: %r" % pending[0])
1093 except:
1094 except:
1094 reply = error.wrap_exception()
1095 reply = error.wrap_exception()
1095 else:
1096 else:
1096 try:
1097 try:
1097 self.db.drop_matching_records(dict(msg_id={'$in':msg_ids}))
1098 self.db.drop_matching_records(dict(msg_id={'$in':msg_ids}))
1098 except Exception:
1099 except Exception:
1099 reply = error.wrap_exception()
1100 reply = error.wrap_exception()
1100
1101
1101 if reply['status'] == 'ok':
1102 if reply['status'] == 'ok':
1102 eids = content.get('engine_ids', [])
1103 eids = content.get('engine_ids', [])
1103 for eid in eids:
1104 for eid in eids:
1104 if eid not in self.engines:
1105 if eid not in self.engines:
1105 try:
1106 try:
1106 raise IndexError("No such engine: %i" % eid)
1107 raise IndexError("No such engine: %i" % eid)
1107 except:
1108 except:
1108 reply = error.wrap_exception()
1109 reply = error.wrap_exception()
1109 break
1110 break
1110 uid = self.engines[eid].queue
1111 uid = self.engines[eid].queue
1111 try:
1112 try:
1112 self.db.drop_matching_records(dict(engine_uuid=uid, completed={'$ne':None}))
1113 self.db.drop_matching_records(dict(engine_uuid=uid, completed={'$ne':None}))
1113 except Exception:
1114 except Exception:
1114 reply = error.wrap_exception()
1115 reply = error.wrap_exception()
1115 break
1116 break
1116
1117
1117 self.session.send(self.query, 'purge_reply', content=reply, ident=client_id)
1118 self.session.send(self.query, 'purge_reply', content=reply, ident=client_id)
1118
1119
1119 def resubmit_task(self, client_id, msg):
1120 def resubmit_task(self, client_id, msg):
1120 """Resubmit one or more tasks."""
1121 """Resubmit one or more tasks."""
1121 def finish(reply):
1122 def finish(reply):
1122 self.session.send(self.query, 'resubmit_reply', content=reply, ident=client_id)
1123 self.session.send(self.query, 'resubmit_reply', content=reply, ident=client_id)
1123
1124
1124 content = msg['content']
1125 content = msg['content']
1125 msg_ids = content['msg_ids']
1126 msg_ids = content['msg_ids']
1126 reply = dict(status='ok')
1127 reply = dict(status='ok')
1127 try:
1128 try:
1128 records = self.db.find_records({'msg_id' : {'$in' : msg_ids}}, keys=[
1129 records = self.db.find_records({'msg_id' : {'$in' : msg_ids}}, keys=[
1129 'header', 'content', 'buffers'])
1130 'header', 'content', 'buffers'])
1130 except Exception:
1131 except Exception:
1131 self.log.error('db::db error finding tasks to resubmit', exc_info=True)
1132 self.log.error('db::db error finding tasks to resubmit', exc_info=True)
1132 return finish(error.wrap_exception())
1133 return finish(error.wrap_exception())
1133
1134
1134 # validate msg_ids
1135 # validate msg_ids
1135 found_ids = [ rec['msg_id'] for rec in records ]
1136 found_ids = [ rec['msg_id'] for rec in records ]
1136 invalid_ids = filter(lambda m: m in self.pending, found_ids)
1137 invalid_ids = filter(lambda m: m in self.pending, found_ids)
1137 if len(records) > len(msg_ids):
1138 if len(records) > len(msg_ids):
1138 try:
1139 try:
1139 raise RuntimeError("DB appears to be in an inconsistent state."
1140 raise RuntimeError("DB appears to be in an inconsistent state."
1140 "More matching records were found than should exist")
1141 "More matching records were found than should exist")
1141 except Exception:
1142 except Exception:
1142 return finish(error.wrap_exception())
1143 return finish(error.wrap_exception())
1143 elif len(records) < len(msg_ids):
1144 elif len(records) < len(msg_ids):
1144 missing = [ m for m in msg_ids if m not in found_ids ]
1145 missing = [ m for m in msg_ids if m not in found_ids ]
1145 try:
1146 try:
1146 raise KeyError("No such msg(s): %r" % missing)
1147 raise KeyError("No such msg(s): %r" % missing)
1147 except KeyError:
1148 except KeyError:
1148 return finish(error.wrap_exception())
1149 return finish(error.wrap_exception())
1149 elif invalid_ids:
1150 elif invalid_ids:
1150 msg_id = invalid_ids[0]
1151 msg_id = invalid_ids[0]
1151 try:
1152 try:
1152 raise ValueError("Task %r appears to be inflight" % msg_id)
1153 raise ValueError("Task %r appears to be inflight" % msg_id)
1153 except Exception:
1154 except Exception:
1154 return finish(error.wrap_exception())
1155 return finish(error.wrap_exception())
1155
1156
1156 # clear the existing records
1157 # clear the existing records
1157 now = datetime.now()
1158 now = datetime.now()
1158 rec = empty_record()
1159 rec = empty_record()
1159 map(rec.pop, ['msg_id', 'header', 'content', 'buffers', 'submitted'])
1160 map(rec.pop, ['msg_id', 'header', 'content', 'buffers', 'submitted'])
1160 rec['resubmitted'] = now
1161 rec['resubmitted'] = now
1161 rec['queue'] = 'task'
1162 rec['queue'] = 'task'
1162 rec['client_uuid'] = client_id[0]
1163 rec['client_uuid'] = client_id[0]
1163 try:
1164 try:
1164 for msg_id in msg_ids:
1165 for msg_id in msg_ids:
1165 self.all_completed.discard(msg_id)
1166 self.all_completed.discard(msg_id)
1166 self.db.update_record(msg_id, rec)
1167 self.db.update_record(msg_id, rec)
1167 except Exception:
1168 except Exception:
1168 self.log.error('db::db error upating record', exc_info=True)
1169 self.log.error('db::db error upating record', exc_info=True)
1169 reply = error.wrap_exception()
1170 reply = error.wrap_exception()
1170 else:
1171 else:
1171 # send the messages
1172 # send the messages
1172 for rec in records:
1173 for rec in records:
1173 header = rec['header']
1174 header = rec['header']
1174 # include resubmitted in header to prevent digest collision
1175 # include resubmitted in header to prevent digest collision
1175 header['resubmitted'] = now
1176 header['resubmitted'] = now
1176 msg = self.session.msg(header['msg_type'])
1177 msg = self.session.msg(header['msg_type'])
1177 msg['content'] = rec['content']
1178 msg['content'] = rec['content']
1178 msg['header'] = header
1179 msg['header'] = header
1179 msg['header']['msg_id'] = rec['msg_id']
1180 msg['header']['msg_id'] = rec['msg_id']
1180 self.session.send(self.resubmit, msg, buffers=rec['buffers'])
1181 self.session.send(self.resubmit, msg, buffers=rec['buffers'])
1181
1182
1182 finish(dict(status='ok'))
1183 finish(dict(status='ok'))
1183
1184
1184
1185
1185 def _extract_record(self, rec):
1186 def _extract_record(self, rec):
1186 """decompose a TaskRecord dict into subsection of reply for get_result"""
1187 """decompose a TaskRecord dict into subsection of reply for get_result"""
1187 io_dict = {}
1188 io_dict = {}
1188 for key in ('pyin', 'pyout', 'pyerr', 'stdout', 'stderr'):
1189 for key in ('pyin', 'pyout', 'pyerr', 'stdout', 'stderr'):
1189 io_dict[key] = rec[key]
1190 io_dict[key] = rec[key]
1190 content = { 'result_content': rec['result_content'],
1191 content = { 'result_content': rec['result_content'],
1191 'header': rec['header'],
1192 'header': rec['header'],
1192 'result_header' : rec['result_header'],
1193 'result_header' : rec['result_header'],
1193 'received' : rec['received'],
1194 'received' : rec['received'],
1194 'io' : io_dict,
1195 'io' : io_dict,
1195 }
1196 }
1196 if rec['result_buffers']:
1197 if rec['result_buffers']:
1197 buffers = map(bytes, rec['result_buffers'])
1198 buffers = map(bytes, rec['result_buffers'])
1198 else:
1199 else:
1199 buffers = []
1200 buffers = []
1200
1201
1201 return content, buffers
1202 return content, buffers
1202
1203
1203 def get_results(self, client_id, msg):
1204 def get_results(self, client_id, msg):
1204 """Get the result of 1 or more messages."""
1205 """Get the result of 1 or more messages."""
1205 content = msg['content']
1206 content = msg['content']
1206 msg_ids = sorted(set(content['msg_ids']))
1207 msg_ids = sorted(set(content['msg_ids']))
1207 statusonly = content.get('status_only', False)
1208 statusonly = content.get('status_only', False)
1208 pending = []
1209 pending = []
1209 completed = []
1210 completed = []
1210 content = dict(status='ok')
1211 content = dict(status='ok')
1211 content['pending'] = pending
1212 content['pending'] = pending
1212 content['completed'] = completed
1213 content['completed'] = completed
1213 buffers = []
1214 buffers = []
1214 if not statusonly:
1215 if not statusonly:
1215 try:
1216 try:
1216 matches = self.db.find_records(dict(msg_id={'$in':msg_ids}))
1217 matches = self.db.find_records(dict(msg_id={'$in':msg_ids}))
1217 # turn match list into dict, for faster lookup
1218 # turn match list into dict, for faster lookup
1218 records = {}
1219 records = {}
1219 for rec in matches:
1220 for rec in matches:
1220 records[rec['msg_id']] = rec
1221 records[rec['msg_id']] = rec
1221 except Exception:
1222 except Exception:
1222 content = error.wrap_exception()
1223 content = error.wrap_exception()
1223 self.session.send(self.query, "result_reply", content=content,
1224 self.session.send(self.query, "result_reply", content=content,
1224 parent=msg, ident=client_id)
1225 parent=msg, ident=client_id)
1225 return
1226 return
1226 else:
1227 else:
1227 records = {}
1228 records = {}
1228 for msg_id in msg_ids:
1229 for msg_id in msg_ids:
1229 if msg_id in self.pending:
1230 if msg_id in self.pending:
1230 pending.append(msg_id)
1231 pending.append(msg_id)
1231 elif msg_id in self.all_completed:
1232 elif msg_id in self.all_completed:
1232 completed.append(msg_id)
1233 completed.append(msg_id)
1233 if not statusonly:
1234 if not statusonly:
1234 c,bufs = self._extract_record(records[msg_id])
1235 c,bufs = self._extract_record(records[msg_id])
1235 content[msg_id] = c
1236 content[msg_id] = c
1236 buffers.extend(bufs)
1237 buffers.extend(bufs)
1237 elif msg_id in records:
1238 elif msg_id in records:
1238 if rec['completed']:
1239 if rec['completed']:
1239 completed.append(msg_id)
1240 completed.append(msg_id)
1240 c,bufs = self._extract_record(records[msg_id])
1241 c,bufs = self._extract_record(records[msg_id])
1241 content[msg_id] = c
1242 content[msg_id] = c
1242 buffers.extend(bufs)
1243 buffers.extend(bufs)
1243 else:
1244 else:
1244 pending.append(msg_id)
1245 pending.append(msg_id)
1245 else:
1246 else:
1246 try:
1247 try:
1247 raise KeyError('No such message: '+msg_id)
1248 raise KeyError('No such message: '+msg_id)
1248 except:
1249 except:
1249 content = error.wrap_exception()
1250 content = error.wrap_exception()
1250 break
1251 break
1251 self.session.send(self.query, "result_reply", content=content,
1252 self.session.send(self.query, "result_reply", content=content,
1252 parent=msg, ident=client_id,
1253 parent=msg, ident=client_id,
1253 buffers=buffers)
1254 buffers=buffers)
1254
1255
1255 def get_history(self, client_id, msg):
1256 def get_history(self, client_id, msg):
1256 """Get a list of all msg_ids in our DB records"""
1257 """Get a list of all msg_ids in our DB records"""
1257 try:
1258 try:
1258 msg_ids = self.db.get_history()
1259 msg_ids = self.db.get_history()
1259 except Exception as e:
1260 except Exception as e:
1260 content = error.wrap_exception()
1261 content = error.wrap_exception()
1261 else:
1262 else:
1262 content = dict(status='ok', history=msg_ids)
1263 content = dict(status='ok', history=msg_ids)
1263
1264
1264 self.session.send(self.query, "history_reply", content=content,
1265 self.session.send(self.query, "history_reply", content=content,
1265 parent=msg, ident=client_id)
1266 parent=msg, ident=client_id)
1266
1267
1267 def db_query(self, client_id, msg):
1268 def db_query(self, client_id, msg):
1268 """Perform a raw query on the task record database."""
1269 """Perform a raw query on the task record database."""
1269 content = msg['content']
1270 content = msg['content']
1270 query = content.get('query', {})
1271 query = content.get('query', {})
1271 keys = content.get('keys', None)
1272 keys = content.get('keys', None)
1272 buffers = []
1273 buffers = []
1273 empty = list()
1274 empty = list()
1274 try:
1275 try:
1275 records = self.db.find_records(query, keys)
1276 records = self.db.find_records(query, keys)
1276 except Exception as e:
1277 except Exception as e:
1277 content = error.wrap_exception()
1278 content = error.wrap_exception()
1278 else:
1279 else:
1279 # extract buffers from reply content:
1280 # extract buffers from reply content:
1280 if keys is not None:
1281 if keys is not None:
1281 buffer_lens = [] if 'buffers' in keys else None
1282 buffer_lens = [] if 'buffers' in keys else None
1282 result_buffer_lens = [] if 'result_buffers' in keys else None
1283 result_buffer_lens = [] if 'result_buffers' in keys else None
1283 else:
1284 else:
1284 buffer_lens = None
1285 buffer_lens = None
1285 result_buffer_lens = None
1286 result_buffer_lens = None
1286
1287
1287 for rec in records:
1288 for rec in records:
1288 # buffers may be None, so double check
1289 # buffers may be None, so double check
1289 b = rec.pop('buffers', empty) or empty
1290 b = rec.pop('buffers', empty) or empty
1290 if buffer_lens is not None:
1291 if buffer_lens is not None:
1291 buffer_lens.append(len(b))
1292 buffer_lens.append(len(b))
1292 buffers.extend(b)
1293 buffers.extend(b)
1293 rb = rec.pop('result_buffers', empty) or empty
1294 rb = rec.pop('result_buffers', empty) or empty
1294 if result_buffer_lens is not None:
1295 if result_buffer_lens is not None:
1295 result_buffer_lens.append(len(rb))
1296 result_buffer_lens.append(len(rb))
1296 buffers.extend(rb)
1297 buffers.extend(rb)
1297 content = dict(status='ok', records=records, buffer_lens=buffer_lens,
1298 content = dict(status='ok', records=records, buffer_lens=buffer_lens,
1298 result_buffer_lens=result_buffer_lens)
1299 result_buffer_lens=result_buffer_lens)
1299 # self.log.debug (content)
1300 # self.log.debug (content)
1300 self.session.send(self.query, "db_reply", content=content,
1301 self.session.send(self.query, "db_reply", content=content,
1301 parent=msg, ident=client_id,
1302 parent=msg, ident=client_id,
1302 buffers=buffers)
1303 buffers=buffers)
1303
1304
@@ -1,767 +1,768 b''
1 """The Python scheduler for rich scheduling.
1 """The Python scheduler for rich scheduling.
2
2
3 The Pure ZMQ scheduler does not allow routing schemes other than LRU,
3 The Pure ZMQ scheduler does not allow routing schemes other than LRU,
4 nor does it check msg_id DAG dependencies. For those, a slightly slower
4 nor does it check msg_id DAG dependencies. For those, a slightly slower
5 Python Scheduler exists.
5 Python Scheduler exists.
6
6
7 Authors:
7 Authors:
8
8
9 * Min RK
9 * Min RK
10 """
10 """
11 #-----------------------------------------------------------------------------
11 #-----------------------------------------------------------------------------
12 # Copyright (C) 2010-2011 The IPython Development Team
12 # Copyright (C) 2010-2011 The IPython Development Team
13 #
13 #
14 # Distributed under the terms of the BSD License. The full license is in
14 # Distributed under the terms of the BSD License. The full license is in
15 # the file COPYING, distributed as part of this software.
15 # the file COPYING, distributed as part of this software.
16 #-----------------------------------------------------------------------------
16 #-----------------------------------------------------------------------------
17
17
18 #----------------------------------------------------------------------
18 #----------------------------------------------------------------------
19 # Imports
19 # Imports
20 #----------------------------------------------------------------------
20 #----------------------------------------------------------------------
21
21
22 from __future__ import print_function
22 from __future__ import print_function
23
23
24 import logging
24 import logging
25 import sys
25 import sys
26 import time
26 import time
27
27
28 from datetime import datetime, timedelta
28 from datetime import datetime, timedelta
29 from random import randint, random
29 from random import randint, random
30 from types import FunctionType
30 from types import FunctionType
31
31
32 try:
32 try:
33 import numpy
33 import numpy
34 except ImportError:
34 except ImportError:
35 numpy = None
35 numpy = None
36
36
37 import zmq
37 import zmq
38 from zmq.eventloop import ioloop, zmqstream
38 from zmq.eventloop import ioloop, zmqstream
39
39
40 # local imports
40 # local imports
41 from IPython.external.decorator import decorator
41 from IPython.external.decorator import decorator
42 from IPython.config.application import Application
42 from IPython.config.application import Application
43 from IPython.config.loader import Config
43 from IPython.config.loader import Config
44 from IPython.utils.traitlets import Instance, Dict, List, Set, Integer, Enum, CBytes
44 from IPython.utils.traitlets import Instance, Dict, List, Set, Integer, Enum, CBytes
45 from IPython.utils.py3compat import cast_bytes
45
46
46 from IPython.parallel import error, util
47 from IPython.parallel import error, util
47 from IPython.parallel.factory import SessionFactory
48 from IPython.parallel.factory import SessionFactory
48 from IPython.parallel.util import connect_logger, local_logger, asbytes
49 from IPython.parallel.util import connect_logger, local_logger
49
50
50 from .dependency import Dependency
51 from .dependency import Dependency
51
52
52 @decorator
53 @decorator
53 def logged(f,self,*args,**kwargs):
54 def logged(f,self,*args,**kwargs):
54 # print ("#--------------------")
55 # print ("#--------------------")
55 self.log.debug("scheduler::%s(*%s,**%s)", f.func_name, args, kwargs)
56 self.log.debug("scheduler::%s(*%s,**%s)", f.func_name, args, kwargs)
56 # print ("#--")
57 # print ("#--")
57 return f(self,*args, **kwargs)
58 return f(self,*args, **kwargs)
58
59
59 #----------------------------------------------------------------------
60 #----------------------------------------------------------------------
60 # Chooser functions
61 # Chooser functions
61 #----------------------------------------------------------------------
62 #----------------------------------------------------------------------
62
63
63 def plainrandom(loads):
64 def plainrandom(loads):
64 """Plain random pick."""
65 """Plain random pick."""
65 n = len(loads)
66 n = len(loads)
66 return randint(0,n-1)
67 return randint(0,n-1)
67
68
68 def lru(loads):
69 def lru(loads):
69 """Always pick the front of the line.
70 """Always pick the front of the line.
70
71
71 The content of `loads` is ignored.
72 The content of `loads` is ignored.
72
73
73 Assumes LRU ordering of loads, with oldest first.
74 Assumes LRU ordering of loads, with oldest first.
74 """
75 """
75 return 0
76 return 0
76
77
77 def twobin(loads):
78 def twobin(loads):
78 """Pick two at random, use the LRU of the two.
79 """Pick two at random, use the LRU of the two.
79
80
80 The content of loads is ignored.
81 The content of loads is ignored.
81
82
82 Assumes LRU ordering of loads, with oldest first.
83 Assumes LRU ordering of loads, with oldest first.
83 """
84 """
84 n = len(loads)
85 n = len(loads)
85 a = randint(0,n-1)
86 a = randint(0,n-1)
86 b = randint(0,n-1)
87 b = randint(0,n-1)
87 return min(a,b)
88 return min(a,b)
88
89
89 def weighted(loads):
90 def weighted(loads):
90 """Pick two at random using inverse load as weight.
91 """Pick two at random using inverse load as weight.
91
92
92 Return the less loaded of the two.
93 Return the less loaded of the two.
93 """
94 """
94 # weight 0 a million times more than 1:
95 # weight 0 a million times more than 1:
95 weights = 1./(1e-6+numpy.array(loads))
96 weights = 1./(1e-6+numpy.array(loads))
96 sums = weights.cumsum()
97 sums = weights.cumsum()
97 t = sums[-1]
98 t = sums[-1]
98 x = random()*t
99 x = random()*t
99 y = random()*t
100 y = random()*t
100 idx = 0
101 idx = 0
101 idy = 0
102 idy = 0
102 while sums[idx] < x:
103 while sums[idx] < x:
103 idx += 1
104 idx += 1
104 while sums[idy] < y:
105 while sums[idy] < y:
105 idy += 1
106 idy += 1
106 if weights[idy] > weights[idx]:
107 if weights[idy] > weights[idx]:
107 return idy
108 return idy
108 else:
109 else:
109 return idx
110 return idx
110
111
111 def leastload(loads):
112 def leastload(loads):
112 """Always choose the lowest load.
113 """Always choose the lowest load.
113
114
114 If the lowest load occurs more than once, the first
115 If the lowest load occurs more than once, the first
115 occurance will be used. If loads has LRU ordering, this means
116 occurance will be used. If loads has LRU ordering, this means
116 the LRU of those with the lowest load is chosen.
117 the LRU of those with the lowest load is chosen.
117 """
118 """
118 return loads.index(min(loads))
119 return loads.index(min(loads))
119
120
120 #---------------------------------------------------------------------
121 #---------------------------------------------------------------------
121 # Classes
122 # Classes
122 #---------------------------------------------------------------------
123 #---------------------------------------------------------------------
123
124
124
125
125 # store empty default dependency:
126 # store empty default dependency:
126 MET = Dependency([])
127 MET = Dependency([])
127
128
128
129
129 class Job(object):
130 class Job(object):
130 """Simple container for a job"""
131 """Simple container for a job"""
131 def __init__(self, msg_id, raw_msg, idents, msg, header, targets, after, follow, timeout):
132 def __init__(self, msg_id, raw_msg, idents, msg, header, targets, after, follow, timeout):
132 self.msg_id = msg_id
133 self.msg_id = msg_id
133 self.raw_msg = raw_msg
134 self.raw_msg = raw_msg
134 self.idents = idents
135 self.idents = idents
135 self.msg = msg
136 self.msg = msg
136 self.header = header
137 self.header = header
137 self.targets = targets
138 self.targets = targets
138 self.after = after
139 self.after = after
139 self.follow = follow
140 self.follow = follow
140 self.timeout = timeout
141 self.timeout = timeout
141
142
142
143
143 self.timestamp = time.time()
144 self.timestamp = time.time()
144 self.blacklist = set()
145 self.blacklist = set()
145
146
146 @property
147 @property
147 def dependents(self):
148 def dependents(self):
148 return self.follow.union(self.after)
149 return self.follow.union(self.after)
149
150
150 class TaskScheduler(SessionFactory):
151 class TaskScheduler(SessionFactory):
151 """Python TaskScheduler object.
152 """Python TaskScheduler object.
152
153
153 This is the simplest object that supports msg_id based
154 This is the simplest object that supports msg_id based
154 DAG dependencies. *Only* task msg_ids are checked, not
155 DAG dependencies. *Only* task msg_ids are checked, not
155 msg_ids of jobs submitted via the MUX queue.
156 msg_ids of jobs submitted via the MUX queue.
156
157
157 """
158 """
158
159
159 hwm = Integer(1, config=True,
160 hwm = Integer(1, config=True,
160 help="""specify the High Water Mark (HWM) for the downstream
161 help="""specify the High Water Mark (HWM) for the downstream
161 socket in the Task scheduler. This is the maximum number
162 socket in the Task scheduler. This is the maximum number
162 of allowed outstanding tasks on each engine.
163 of allowed outstanding tasks on each engine.
163
164
164 The default (1) means that only one task can be outstanding on each
165 The default (1) means that only one task can be outstanding on each
165 engine. Setting TaskScheduler.hwm=0 means there is no limit, and the
166 engine. Setting TaskScheduler.hwm=0 means there is no limit, and the
166 engines continue to be assigned tasks while they are working,
167 engines continue to be assigned tasks while they are working,
167 effectively hiding network latency behind computation, but can result
168 effectively hiding network latency behind computation, but can result
168 in an imbalance of work when submitting many heterogenous tasks all at
169 in an imbalance of work when submitting many heterogenous tasks all at
169 once. Any positive value greater than one is a compromise between the
170 once. Any positive value greater than one is a compromise between the
170 two.
171 two.
171
172
172 """
173 """
173 )
174 )
174 scheme_name = Enum(('leastload', 'pure', 'lru', 'plainrandom', 'weighted', 'twobin'),
175 scheme_name = Enum(('leastload', 'pure', 'lru', 'plainrandom', 'weighted', 'twobin'),
175 'leastload', config=True, allow_none=False,
176 'leastload', config=True, allow_none=False,
176 help="""select the task scheduler scheme [default: Python LRU]
177 help="""select the task scheduler scheme [default: Python LRU]
177 Options are: 'pure', 'lru', 'plainrandom', 'weighted', 'twobin','leastload'"""
178 Options are: 'pure', 'lru', 'plainrandom', 'weighted', 'twobin','leastload'"""
178 )
179 )
179 def _scheme_name_changed(self, old, new):
180 def _scheme_name_changed(self, old, new):
180 self.log.debug("Using scheme %r"%new)
181 self.log.debug("Using scheme %r"%new)
181 self.scheme = globals()[new]
182 self.scheme = globals()[new]
182
183
183 # input arguments:
184 # input arguments:
184 scheme = Instance(FunctionType) # function for determining the destination
185 scheme = Instance(FunctionType) # function for determining the destination
185 def _scheme_default(self):
186 def _scheme_default(self):
186 return leastload
187 return leastload
187 client_stream = Instance(zmqstream.ZMQStream) # client-facing stream
188 client_stream = Instance(zmqstream.ZMQStream) # client-facing stream
188 engine_stream = Instance(zmqstream.ZMQStream) # engine-facing stream
189 engine_stream = Instance(zmqstream.ZMQStream) # engine-facing stream
189 notifier_stream = Instance(zmqstream.ZMQStream) # hub-facing sub stream
190 notifier_stream = Instance(zmqstream.ZMQStream) # hub-facing sub stream
190 mon_stream = Instance(zmqstream.ZMQStream) # hub-facing pub stream
191 mon_stream = Instance(zmqstream.ZMQStream) # hub-facing pub stream
191
192
192 # internals:
193 # internals:
193 graph = Dict() # dict by msg_id of [ msg_ids that depend on key ]
194 graph = Dict() # dict by msg_id of [ msg_ids that depend on key ]
194 retries = Dict() # dict by msg_id of retries remaining (non-neg ints)
195 retries = Dict() # dict by msg_id of retries remaining (non-neg ints)
195 # waiting = List() # list of msg_ids ready to run, but haven't due to HWM
196 # waiting = List() # list of msg_ids ready to run, but haven't due to HWM
196 depending = Dict() # dict by msg_id of Jobs
197 depending = Dict() # dict by msg_id of Jobs
197 pending = Dict() # dict by engine_uuid of submitted tasks
198 pending = Dict() # dict by engine_uuid of submitted tasks
198 completed = Dict() # dict by engine_uuid of completed tasks
199 completed = Dict() # dict by engine_uuid of completed tasks
199 failed = Dict() # dict by engine_uuid of failed tasks
200 failed = Dict() # dict by engine_uuid of failed tasks
200 destinations = Dict() # dict by msg_id of engine_uuids where jobs ran (reverse of completed+failed)
201 destinations = Dict() # dict by msg_id of engine_uuids where jobs ran (reverse of completed+failed)
201 clients = Dict() # dict by msg_id for who submitted the task
202 clients = Dict() # dict by msg_id for who submitted the task
202 targets = List() # list of target IDENTs
203 targets = List() # list of target IDENTs
203 loads = List() # list of engine loads
204 loads = List() # list of engine loads
204 # full = Set() # set of IDENTs that have HWM outstanding tasks
205 # full = Set() # set of IDENTs that have HWM outstanding tasks
205 all_completed = Set() # set of all completed tasks
206 all_completed = Set() # set of all completed tasks
206 all_failed = Set() # set of all failed tasks
207 all_failed = Set() # set of all failed tasks
207 all_done = Set() # set of all finished tasks=union(completed,failed)
208 all_done = Set() # set of all finished tasks=union(completed,failed)
208 all_ids = Set() # set of all submitted task IDs
209 all_ids = Set() # set of all submitted task IDs
209
210
210 auditor = Instance('zmq.eventloop.ioloop.PeriodicCallback')
211 auditor = Instance('zmq.eventloop.ioloop.PeriodicCallback')
211
212
212 ident = CBytes() # ZMQ identity. This should just be self.session.session
213 ident = CBytes() # ZMQ identity. This should just be self.session.session
213 # but ensure Bytes
214 # but ensure Bytes
214 def _ident_default(self):
215 def _ident_default(self):
215 return self.session.bsession
216 return self.session.bsession
216
217
217 def start(self):
218 def start(self):
218 self.engine_stream.on_recv(self.dispatch_result, copy=False)
219 self.engine_stream.on_recv(self.dispatch_result, copy=False)
219 self.client_stream.on_recv(self.dispatch_submission, copy=False)
220 self.client_stream.on_recv(self.dispatch_submission, copy=False)
220
221
221 self._notification_handlers = dict(
222 self._notification_handlers = dict(
222 registration_notification = self._register_engine,
223 registration_notification = self._register_engine,
223 unregistration_notification = self._unregister_engine
224 unregistration_notification = self._unregister_engine
224 )
225 )
225 self.notifier_stream.on_recv(self.dispatch_notification)
226 self.notifier_stream.on_recv(self.dispatch_notification)
226 self.auditor = ioloop.PeriodicCallback(self.audit_timeouts, 2e3, self.loop) # 1 Hz
227 self.auditor = ioloop.PeriodicCallback(self.audit_timeouts, 2e3, self.loop) # 1 Hz
227 self.auditor.start()
228 self.auditor.start()
228 self.log.info("Scheduler started [%s]"%self.scheme_name)
229 self.log.info("Scheduler started [%s]"%self.scheme_name)
229
230
230 def resume_receiving(self):
231 def resume_receiving(self):
231 """Resume accepting jobs."""
232 """Resume accepting jobs."""
232 self.client_stream.on_recv(self.dispatch_submission, copy=False)
233 self.client_stream.on_recv(self.dispatch_submission, copy=False)
233
234
234 def stop_receiving(self):
235 def stop_receiving(self):
235 """Stop accepting jobs while there are no engines.
236 """Stop accepting jobs while there are no engines.
236 Leave them in the ZMQ queue."""
237 Leave them in the ZMQ queue."""
237 self.client_stream.on_recv(None)
238 self.client_stream.on_recv(None)
238
239
239 #-----------------------------------------------------------------------
240 #-----------------------------------------------------------------------
240 # [Un]Registration Handling
241 # [Un]Registration Handling
241 #-----------------------------------------------------------------------
242 #-----------------------------------------------------------------------
242
243
243
244
244 @util.log_errors
245 @util.log_errors
245 def dispatch_notification(self, msg):
246 def dispatch_notification(self, msg):
246 """dispatch register/unregister events."""
247 """dispatch register/unregister events."""
247 try:
248 try:
248 idents,msg = self.session.feed_identities(msg)
249 idents,msg = self.session.feed_identities(msg)
249 except ValueError:
250 except ValueError:
250 self.log.warn("task::Invalid Message: %r",msg)
251 self.log.warn("task::Invalid Message: %r",msg)
251 return
252 return
252 try:
253 try:
253 msg = self.session.unserialize(msg)
254 msg = self.session.unserialize(msg)
254 except ValueError:
255 except ValueError:
255 self.log.warn("task::Unauthorized message from: %r"%idents)
256 self.log.warn("task::Unauthorized message from: %r"%idents)
256 return
257 return
257
258
258 msg_type = msg['header']['msg_type']
259 msg_type = msg['header']['msg_type']
259
260
260 handler = self._notification_handlers.get(msg_type, None)
261 handler = self._notification_handlers.get(msg_type, None)
261 if handler is None:
262 if handler is None:
262 self.log.error("Unhandled message type: %r"%msg_type)
263 self.log.error("Unhandled message type: %r"%msg_type)
263 else:
264 else:
264 try:
265 try:
265 handler(asbytes(msg['content']['queue']))
266 handler(cast_bytes(msg['content']['queue']))
266 except Exception:
267 except Exception:
267 self.log.error("task::Invalid notification msg: %r", msg, exc_info=True)
268 self.log.error("task::Invalid notification msg: %r", msg, exc_info=True)
268
269
269 def _register_engine(self, uid):
270 def _register_engine(self, uid):
270 """New engine with ident `uid` became available."""
271 """New engine with ident `uid` became available."""
271 # head of the line:
272 # head of the line:
272 self.targets.insert(0,uid)
273 self.targets.insert(0,uid)
273 self.loads.insert(0,0)
274 self.loads.insert(0,0)
274
275
275 # initialize sets
276 # initialize sets
276 self.completed[uid] = set()
277 self.completed[uid] = set()
277 self.failed[uid] = set()
278 self.failed[uid] = set()
278 self.pending[uid] = {}
279 self.pending[uid] = {}
279
280
280 # rescan the graph:
281 # rescan the graph:
281 self.update_graph(None)
282 self.update_graph(None)
282
283
283 def _unregister_engine(self, uid):
284 def _unregister_engine(self, uid):
284 """Existing engine with ident `uid` became unavailable."""
285 """Existing engine with ident `uid` became unavailable."""
285 if len(self.targets) == 1:
286 if len(self.targets) == 1:
286 # this was our only engine
287 # this was our only engine
287 pass
288 pass
288
289
289 # handle any potentially finished tasks:
290 # handle any potentially finished tasks:
290 self.engine_stream.flush()
291 self.engine_stream.flush()
291
292
292 # don't pop destinations, because they might be used later
293 # don't pop destinations, because they might be used later
293 # map(self.destinations.pop, self.completed.pop(uid))
294 # map(self.destinations.pop, self.completed.pop(uid))
294 # map(self.destinations.pop, self.failed.pop(uid))
295 # map(self.destinations.pop, self.failed.pop(uid))
295
296
296 # prevent this engine from receiving work
297 # prevent this engine from receiving work
297 idx = self.targets.index(uid)
298 idx = self.targets.index(uid)
298 self.targets.pop(idx)
299 self.targets.pop(idx)
299 self.loads.pop(idx)
300 self.loads.pop(idx)
300
301
301 # wait 5 seconds before cleaning up pending jobs, since the results might
302 # wait 5 seconds before cleaning up pending jobs, since the results might
302 # still be incoming
303 # still be incoming
303 if self.pending[uid]:
304 if self.pending[uid]:
304 dc = ioloop.DelayedCallback(lambda : self.handle_stranded_tasks(uid), 5000, self.loop)
305 dc = ioloop.DelayedCallback(lambda : self.handle_stranded_tasks(uid), 5000, self.loop)
305 dc.start()
306 dc.start()
306 else:
307 else:
307 self.completed.pop(uid)
308 self.completed.pop(uid)
308 self.failed.pop(uid)
309 self.failed.pop(uid)
309
310
310
311
311 def handle_stranded_tasks(self, engine):
312 def handle_stranded_tasks(self, engine):
312 """Deal with jobs resident in an engine that died."""
313 """Deal with jobs resident in an engine that died."""
313 lost = self.pending[engine]
314 lost = self.pending[engine]
314 for msg_id in lost.keys():
315 for msg_id in lost.keys():
315 if msg_id not in self.pending[engine]:
316 if msg_id not in self.pending[engine]:
316 # prevent double-handling of messages
317 # prevent double-handling of messages
317 continue
318 continue
318
319
319 raw_msg = lost[msg_id][0]
320 raw_msg = lost[msg_id].raw_msg
320 idents,msg = self.session.feed_identities(raw_msg, copy=False)
321 idents,msg = self.session.feed_identities(raw_msg, copy=False)
321 parent = self.session.unpack(msg[1].bytes)
322 parent = self.session.unpack(msg[1].bytes)
322 idents = [engine, idents[0]]
323 idents = [engine, idents[0]]
323
324
324 # build fake error reply
325 # build fake error reply
325 try:
326 try:
326 raise error.EngineError("Engine %r died while running task %r"%(engine, msg_id))
327 raise error.EngineError("Engine %r died while running task %r"%(engine, msg_id))
327 except:
328 except:
328 content = error.wrap_exception()
329 content = error.wrap_exception()
329 # build fake header
330 # build fake header
330 header = dict(
331 header = dict(
331 status='error',
332 status='error',
332 engine=engine,
333 engine=engine,
333 date=datetime.now(),
334 date=datetime.now(),
334 )
335 )
335 msg = self.session.msg('apply_reply', content, parent=parent, subheader=header)
336 msg = self.session.msg('apply_reply', content, parent=parent, subheader=header)
336 raw_reply = map(zmq.Message, self.session.serialize(msg, ident=idents))
337 raw_reply = map(zmq.Message, self.session.serialize(msg, ident=idents))
337 # and dispatch it
338 # and dispatch it
338 self.dispatch_result(raw_reply)
339 self.dispatch_result(raw_reply)
339
340
340 # finally scrub completed/failed lists
341 # finally scrub completed/failed lists
341 self.completed.pop(engine)
342 self.completed.pop(engine)
342 self.failed.pop(engine)
343 self.failed.pop(engine)
343
344
344
345
345 #-----------------------------------------------------------------------
346 #-----------------------------------------------------------------------
346 # Job Submission
347 # Job Submission
347 #-----------------------------------------------------------------------
348 #-----------------------------------------------------------------------
348
349
349
350
350 @util.log_errors
351 @util.log_errors
351 def dispatch_submission(self, raw_msg):
352 def dispatch_submission(self, raw_msg):
352 """Dispatch job submission to appropriate handlers."""
353 """Dispatch job submission to appropriate handlers."""
353 # ensure targets up to date:
354 # ensure targets up to date:
354 self.notifier_stream.flush()
355 self.notifier_stream.flush()
355 try:
356 try:
356 idents, msg = self.session.feed_identities(raw_msg, copy=False)
357 idents, msg = self.session.feed_identities(raw_msg, copy=False)
357 msg = self.session.unserialize(msg, content=False, copy=False)
358 msg = self.session.unserialize(msg, content=False, copy=False)
358 except Exception:
359 except Exception:
359 self.log.error("task::Invaid task msg: %r"%raw_msg, exc_info=True)
360 self.log.error("task::Invaid task msg: %r"%raw_msg, exc_info=True)
360 return
361 return
361
362
362
363
363 # send to monitor
364 # send to monitor
364 self.mon_stream.send_multipart([b'intask']+raw_msg, copy=False)
365 self.mon_stream.send_multipart([b'intask']+raw_msg, copy=False)
365
366
366 header = msg['header']
367 header = msg['header']
367 msg_id = header['msg_id']
368 msg_id = header['msg_id']
368 self.all_ids.add(msg_id)
369 self.all_ids.add(msg_id)
369
370
370 # get targets as a set of bytes objects
371 # get targets as a set of bytes objects
371 # from a list of unicode objects
372 # from a list of unicode objects
372 targets = header.get('targets', [])
373 targets = header.get('targets', [])
373 targets = map(asbytes, targets)
374 targets = map(cast_bytes, targets)
374 targets = set(targets)
375 targets = set(targets)
375
376
376 retries = header.get('retries', 0)
377 retries = header.get('retries', 0)
377 self.retries[msg_id] = retries
378 self.retries[msg_id] = retries
378
379
379 # time dependencies
380 # time dependencies
380 after = header.get('after', None)
381 after = header.get('after', None)
381 if after:
382 if after:
382 after = Dependency(after)
383 after = Dependency(after)
383 if after.all:
384 if after.all:
384 if after.success:
385 if after.success:
385 after = Dependency(after.difference(self.all_completed),
386 after = Dependency(after.difference(self.all_completed),
386 success=after.success,
387 success=after.success,
387 failure=after.failure,
388 failure=after.failure,
388 all=after.all,
389 all=after.all,
389 )
390 )
390 if after.failure:
391 if after.failure:
391 after = Dependency(after.difference(self.all_failed),
392 after = Dependency(after.difference(self.all_failed),
392 success=after.success,
393 success=after.success,
393 failure=after.failure,
394 failure=after.failure,
394 all=after.all,
395 all=after.all,
395 )
396 )
396 if after.check(self.all_completed, self.all_failed):
397 if after.check(self.all_completed, self.all_failed):
397 # recast as empty set, if `after` already met,
398 # recast as empty set, if `after` already met,
398 # to prevent unnecessary set comparisons
399 # to prevent unnecessary set comparisons
399 after = MET
400 after = MET
400 else:
401 else:
401 after = MET
402 after = MET
402
403
403 # location dependencies
404 # location dependencies
404 follow = Dependency(header.get('follow', []))
405 follow = Dependency(header.get('follow', []))
405
406
406 # turn timeouts into datetime objects:
407 # turn timeouts into datetime objects:
407 timeout = header.get('timeout', None)
408 timeout = header.get('timeout', None)
408 if timeout:
409 if timeout:
409 # cast to float, because jsonlib returns floats as decimal.Decimal,
410 # cast to float, because jsonlib returns floats as decimal.Decimal,
410 # which timedelta does not accept
411 # which timedelta does not accept
411 timeout = datetime.now() + timedelta(0,float(timeout),0)
412 timeout = datetime.now() + timedelta(0,float(timeout),0)
412
413
413 job = Job(msg_id=msg_id, raw_msg=raw_msg, idents=idents, msg=msg,
414 job = Job(msg_id=msg_id, raw_msg=raw_msg, idents=idents, msg=msg,
414 header=header, targets=targets, after=after, follow=follow,
415 header=header, targets=targets, after=after, follow=follow,
415 timeout=timeout,
416 timeout=timeout,
416 )
417 )
417
418
418 # validate and reduce dependencies:
419 # validate and reduce dependencies:
419 for dep in after,follow:
420 for dep in after,follow:
420 if not dep: # empty dependency
421 if not dep: # empty dependency
421 continue
422 continue
422 # check valid:
423 # check valid:
423 if msg_id in dep or dep.difference(self.all_ids):
424 if msg_id in dep or dep.difference(self.all_ids):
424 self.depending[msg_id] = job
425 self.depending[msg_id] = job
425 return self.fail_unreachable(msg_id, error.InvalidDependency)
426 return self.fail_unreachable(msg_id, error.InvalidDependency)
426 # check if unreachable:
427 # check if unreachable:
427 if dep.unreachable(self.all_completed, self.all_failed):
428 if dep.unreachable(self.all_completed, self.all_failed):
428 self.depending[msg_id] = job
429 self.depending[msg_id] = job
429 return self.fail_unreachable(msg_id)
430 return self.fail_unreachable(msg_id)
430
431
431 if after.check(self.all_completed, self.all_failed):
432 if after.check(self.all_completed, self.all_failed):
432 # time deps already met, try to run
433 # time deps already met, try to run
433 if not self.maybe_run(job):
434 if not self.maybe_run(job):
434 # can't run yet
435 # can't run yet
435 if msg_id not in self.all_failed:
436 if msg_id not in self.all_failed:
436 # could have failed as unreachable
437 # could have failed as unreachable
437 self.save_unmet(job)
438 self.save_unmet(job)
438 else:
439 else:
439 self.save_unmet(job)
440 self.save_unmet(job)
440
441
441 def audit_timeouts(self):
442 def audit_timeouts(self):
442 """Audit all waiting tasks for expired timeouts."""
443 """Audit all waiting tasks for expired timeouts."""
443 now = datetime.now()
444 now = datetime.now()
444 for msg_id in self.depending.keys():
445 for msg_id in self.depending.keys():
445 # must recheck, in case one failure cascaded to another:
446 # must recheck, in case one failure cascaded to another:
446 if msg_id in self.depending:
447 if msg_id in self.depending:
447 job = self.depending[msg_id]
448 job = self.depending[msg_id]
448 if job.timeout and job.timeout < now:
449 if job.timeout and job.timeout < now:
449 self.fail_unreachable(msg_id, error.TaskTimeout)
450 self.fail_unreachable(msg_id, error.TaskTimeout)
450
451
451 def fail_unreachable(self, msg_id, why=error.ImpossibleDependency):
452 def fail_unreachable(self, msg_id, why=error.ImpossibleDependency):
452 """a task has become unreachable, send a reply with an ImpossibleDependency
453 """a task has become unreachable, send a reply with an ImpossibleDependency
453 error."""
454 error."""
454 if msg_id not in self.depending:
455 if msg_id not in self.depending:
455 self.log.error("msg %r already failed!", msg_id)
456 self.log.error("msg %r already failed!", msg_id)
456 return
457 return
457 job = self.depending.pop(msg_id)
458 job = self.depending.pop(msg_id)
458 for mid in job.dependents:
459 for mid in job.dependents:
459 if mid in self.graph:
460 if mid in self.graph:
460 self.graph[mid].remove(msg_id)
461 self.graph[mid].remove(msg_id)
461
462
462 try:
463 try:
463 raise why()
464 raise why()
464 except:
465 except:
465 content = error.wrap_exception()
466 content = error.wrap_exception()
466
467
467 self.all_done.add(msg_id)
468 self.all_done.add(msg_id)
468 self.all_failed.add(msg_id)
469 self.all_failed.add(msg_id)
469
470
470 msg = self.session.send(self.client_stream, 'apply_reply', content,
471 msg = self.session.send(self.client_stream, 'apply_reply', content,
471 parent=job.header, ident=job.idents)
472 parent=job.header, ident=job.idents)
472 self.session.send(self.mon_stream, msg, ident=[b'outtask']+job.idents)
473 self.session.send(self.mon_stream, msg, ident=[b'outtask']+job.idents)
473
474
474 self.update_graph(msg_id, success=False)
475 self.update_graph(msg_id, success=False)
475
476
476 def maybe_run(self, job):
477 def maybe_run(self, job):
477 """check location dependencies, and run if they are met."""
478 """check location dependencies, and run if they are met."""
478 msg_id = job.msg_id
479 msg_id = job.msg_id
479 self.log.debug("Attempting to assign task %s", msg_id)
480 self.log.debug("Attempting to assign task %s", msg_id)
480 if not self.targets:
481 if not self.targets:
481 # no engines, definitely can't run
482 # no engines, definitely can't run
482 return False
483 return False
483
484
484 if job.follow or job.targets or job.blacklist or self.hwm:
485 if job.follow or job.targets or job.blacklist or self.hwm:
485 # we need a can_run filter
486 # we need a can_run filter
486 def can_run(idx):
487 def can_run(idx):
487 # check hwm
488 # check hwm
488 if self.hwm and self.loads[idx] == self.hwm:
489 if self.hwm and self.loads[idx] == self.hwm:
489 return False
490 return False
490 target = self.targets[idx]
491 target = self.targets[idx]
491 # check blacklist
492 # check blacklist
492 if target in job.blacklist:
493 if target in job.blacklist:
493 return False
494 return False
494 # check targets
495 # check targets
495 if job.targets and target not in job.targets:
496 if job.targets and target not in job.targets:
496 return False
497 return False
497 # check follow
498 # check follow
498 return job.follow.check(self.completed[target], self.failed[target])
499 return job.follow.check(self.completed[target], self.failed[target])
499
500
500 indices = filter(can_run, range(len(self.targets)))
501 indices = filter(can_run, range(len(self.targets)))
501
502
502 if not indices:
503 if not indices:
503 # couldn't run
504 # couldn't run
504 if job.follow.all:
505 if job.follow.all:
505 # check follow for impossibility
506 # check follow for impossibility
506 dests = set()
507 dests = set()
507 relevant = set()
508 relevant = set()
508 if job.follow.success:
509 if job.follow.success:
509 relevant = self.all_completed
510 relevant = self.all_completed
510 if job.follow.failure:
511 if job.follow.failure:
511 relevant = relevant.union(self.all_failed)
512 relevant = relevant.union(self.all_failed)
512 for m in job.follow.intersection(relevant):
513 for m in job.follow.intersection(relevant):
513 dests.add(self.destinations[m])
514 dests.add(self.destinations[m])
514 if len(dests) > 1:
515 if len(dests) > 1:
515 self.depending[msg_id] = job
516 self.depending[msg_id] = job
516 self.fail_unreachable(msg_id)
517 self.fail_unreachable(msg_id)
517 return False
518 return False
518 if job.targets:
519 if job.targets:
519 # check blacklist+targets for impossibility
520 # check blacklist+targets for impossibility
520 job.targets.difference_update(job.blacklist)
521 job.targets.difference_update(job.blacklist)
521 if not job.targets or not job.targets.intersection(self.targets):
522 if not job.targets or not job.targets.intersection(self.targets):
522 self.depending[msg_id] = job
523 self.depending[msg_id] = job
523 self.fail_unreachable(msg_id)
524 self.fail_unreachable(msg_id)
524 return False
525 return False
525 return False
526 return False
526 else:
527 else:
527 indices = None
528 indices = None
528
529
529 self.submit_task(job, indices)
530 self.submit_task(job, indices)
530 return True
531 return True
531
532
532 def save_unmet(self, job):
533 def save_unmet(self, job):
533 """Save a message for later submission when its dependencies are met."""
534 """Save a message for later submission when its dependencies are met."""
534 msg_id = job.msg_id
535 msg_id = job.msg_id
535 self.depending[msg_id] = job
536 self.depending[msg_id] = job
536 # track the ids in follow or after, but not those already finished
537 # track the ids in follow or after, but not those already finished
537 for dep_id in job.after.union(job.follow).difference(self.all_done):
538 for dep_id in job.after.union(job.follow).difference(self.all_done):
538 if dep_id not in self.graph:
539 if dep_id not in self.graph:
539 self.graph[dep_id] = set()
540 self.graph[dep_id] = set()
540 self.graph[dep_id].add(msg_id)
541 self.graph[dep_id].add(msg_id)
541
542
542 def submit_task(self, job, indices=None):
543 def submit_task(self, job, indices=None):
543 """Submit a task to any of a subset of our targets."""
544 """Submit a task to any of a subset of our targets."""
544 if indices:
545 if indices:
545 loads = [self.loads[i] for i in indices]
546 loads = [self.loads[i] for i in indices]
546 else:
547 else:
547 loads = self.loads
548 loads = self.loads
548 idx = self.scheme(loads)
549 idx = self.scheme(loads)
549 if indices:
550 if indices:
550 idx = indices[idx]
551 idx = indices[idx]
551 target = self.targets[idx]
552 target = self.targets[idx]
552 # print (target, map(str, msg[:3]))
553 # print (target, map(str, msg[:3]))
553 # send job to the engine
554 # send job to the engine
554 self.engine_stream.send(target, flags=zmq.SNDMORE, copy=False)
555 self.engine_stream.send(target, flags=zmq.SNDMORE, copy=False)
555 self.engine_stream.send_multipart(job.raw_msg, copy=False)
556 self.engine_stream.send_multipart(job.raw_msg, copy=False)
556 # update load
557 # update load
557 self.add_job(idx)
558 self.add_job(idx)
558 self.pending[target][job.msg_id] = job
559 self.pending[target][job.msg_id] = job
559 # notify Hub
560 # notify Hub
560 content = dict(msg_id=job.msg_id, engine_id=target.decode('ascii'))
561 content = dict(msg_id=job.msg_id, engine_id=target.decode('ascii'))
561 self.session.send(self.mon_stream, 'task_destination', content=content,
562 self.session.send(self.mon_stream, 'task_destination', content=content,
562 ident=[b'tracktask',self.ident])
563 ident=[b'tracktask',self.ident])
563
564
564
565
565 #-----------------------------------------------------------------------
566 #-----------------------------------------------------------------------
566 # Result Handling
567 # Result Handling
567 #-----------------------------------------------------------------------
568 #-----------------------------------------------------------------------
568
569
569
570
570 @util.log_errors
571 @util.log_errors
571 def dispatch_result(self, raw_msg):
572 def dispatch_result(self, raw_msg):
572 """dispatch method for result replies"""
573 """dispatch method for result replies"""
573 try:
574 try:
574 idents,msg = self.session.feed_identities(raw_msg, copy=False)
575 idents,msg = self.session.feed_identities(raw_msg, copy=False)
575 msg = self.session.unserialize(msg, content=False, copy=False)
576 msg = self.session.unserialize(msg, content=False, copy=False)
576 engine = idents[0]
577 engine = idents[0]
577 try:
578 try:
578 idx = self.targets.index(engine)
579 idx = self.targets.index(engine)
579 except ValueError:
580 except ValueError:
580 pass # skip load-update for dead engines
581 pass # skip load-update for dead engines
581 else:
582 else:
582 self.finish_job(idx)
583 self.finish_job(idx)
583 except Exception:
584 except Exception:
584 self.log.error("task::Invaid result: %r", raw_msg, exc_info=True)
585 self.log.error("task::Invaid result: %r", raw_msg, exc_info=True)
585 return
586 return
586
587
587 header = msg['header']
588 header = msg['header']
588 parent = msg['parent_header']
589 parent = msg['parent_header']
589 if header.get('dependencies_met', True):
590 if header.get('dependencies_met', True):
590 success = (header['status'] == 'ok')
591 success = (header['status'] == 'ok')
591 msg_id = parent['msg_id']
592 msg_id = parent['msg_id']
592 retries = self.retries[msg_id]
593 retries = self.retries[msg_id]
593 if not success and retries > 0:
594 if not success and retries > 0:
594 # failed
595 # failed
595 self.retries[msg_id] = retries - 1
596 self.retries[msg_id] = retries - 1
596 self.handle_unmet_dependency(idents, parent)
597 self.handle_unmet_dependency(idents, parent)
597 else:
598 else:
598 del self.retries[msg_id]
599 del self.retries[msg_id]
599 # relay to client and update graph
600 # relay to client and update graph
600 self.handle_result(idents, parent, raw_msg, success)
601 self.handle_result(idents, parent, raw_msg, success)
601 # send to Hub monitor
602 # send to Hub monitor
602 self.mon_stream.send_multipart([b'outtask']+raw_msg, copy=False)
603 self.mon_stream.send_multipart([b'outtask']+raw_msg, copy=False)
603 else:
604 else:
604 self.handle_unmet_dependency(idents, parent)
605 self.handle_unmet_dependency(idents, parent)
605
606
606 def handle_result(self, idents, parent, raw_msg, success=True):
607 def handle_result(self, idents, parent, raw_msg, success=True):
607 """handle a real task result, either success or failure"""
608 """handle a real task result, either success or failure"""
608 # first, relay result to client
609 # first, relay result to client
609 engine = idents[0]
610 engine = idents[0]
610 client = idents[1]
611 client = idents[1]
611 # swap_ids for XREP-XREP mirror
612 # swap_ids for XREP-XREP mirror
612 raw_msg[:2] = [client,engine]
613 raw_msg[:2] = [client,engine]
613 # print (map(str, raw_msg[:4]))
614 # print (map(str, raw_msg[:4]))
614 self.client_stream.send_multipart(raw_msg, copy=False)
615 self.client_stream.send_multipart(raw_msg, copy=False)
615 # now, update our data structures
616 # now, update our data structures
616 msg_id = parent['msg_id']
617 msg_id = parent['msg_id']
617 self.pending[engine].pop(msg_id)
618 self.pending[engine].pop(msg_id)
618 if success:
619 if success:
619 self.completed[engine].add(msg_id)
620 self.completed[engine].add(msg_id)
620 self.all_completed.add(msg_id)
621 self.all_completed.add(msg_id)
621 else:
622 else:
622 self.failed[engine].add(msg_id)
623 self.failed[engine].add(msg_id)
623 self.all_failed.add(msg_id)
624 self.all_failed.add(msg_id)
624 self.all_done.add(msg_id)
625 self.all_done.add(msg_id)
625 self.destinations[msg_id] = engine
626 self.destinations[msg_id] = engine
626
627
627 self.update_graph(msg_id, success)
628 self.update_graph(msg_id, success)
628
629
629 def handle_unmet_dependency(self, idents, parent):
630 def handle_unmet_dependency(self, idents, parent):
630 """handle an unmet dependency"""
631 """handle an unmet dependency"""
631 engine = idents[0]
632 engine = idents[0]
632 msg_id = parent['msg_id']
633 msg_id = parent['msg_id']
633
634
634 job = self.pending[engine].pop(msg_id)
635 job = self.pending[engine].pop(msg_id)
635 job.blacklist.add(engine)
636 job.blacklist.add(engine)
636
637
637 if job.blacklist == job.targets:
638 if job.blacklist == job.targets:
638 self.depending[msg_id] = job
639 self.depending[msg_id] = job
639 self.fail_unreachable(msg_id)
640 self.fail_unreachable(msg_id)
640 elif not self.maybe_run(job):
641 elif not self.maybe_run(job):
641 # resubmit failed
642 # resubmit failed
642 if msg_id not in self.all_failed:
643 if msg_id not in self.all_failed:
643 # put it back in our dependency tree
644 # put it back in our dependency tree
644 self.save_unmet(job)
645 self.save_unmet(job)
645
646
646 if self.hwm:
647 if self.hwm:
647 try:
648 try:
648 idx = self.targets.index(engine)
649 idx = self.targets.index(engine)
649 except ValueError:
650 except ValueError:
650 pass # skip load-update for dead engines
651 pass # skip load-update for dead engines
651 else:
652 else:
652 if self.loads[idx] == self.hwm-1:
653 if self.loads[idx] == self.hwm-1:
653 self.update_graph(None)
654 self.update_graph(None)
654
655
655
656
656
657
657 def update_graph(self, dep_id=None, success=True):
658 def update_graph(self, dep_id=None, success=True):
658 """dep_id just finished. Update our dependency
659 """dep_id just finished. Update our dependency
659 graph and submit any jobs that just became runable.
660 graph and submit any jobs that just became runable.
660
661
661 Called with dep_id=None to update entire graph for hwm, but without finishing
662 Called with dep_id=None to update entire graph for hwm, but without finishing
662 a task.
663 a task.
663 """
664 """
664 # print ("\n\n***********")
665 # print ("\n\n***********")
665 # pprint (dep_id)
666 # pprint (dep_id)
666 # pprint (self.graph)
667 # pprint (self.graph)
667 # pprint (self.depending)
668 # pprint (self.depending)
668 # pprint (self.all_completed)
669 # pprint (self.all_completed)
669 # pprint (self.all_failed)
670 # pprint (self.all_failed)
670 # print ("\n\n***********\n\n")
671 # print ("\n\n***********\n\n")
671 # update any jobs that depended on the dependency
672 # update any jobs that depended on the dependency
672 jobs = self.graph.pop(dep_id, [])
673 jobs = self.graph.pop(dep_id, [])
673
674
674 # recheck *all* jobs if
675 # recheck *all* jobs if
675 # a) we have HWM and an engine just become no longer full
676 # a) we have HWM and an engine just become no longer full
676 # or b) dep_id was given as None
677 # or b) dep_id was given as None
677
678
678 if dep_id is None or self.hwm and any( [ load==self.hwm-1 for load in self.loads ]):
679 if dep_id is None or self.hwm and any( [ load==self.hwm-1 for load in self.loads ]):
679 jobs = self.depending.keys()
680 jobs = self.depending.keys()
680
681
681 for msg_id in sorted(jobs, key=lambda msg_id: self.depending[msg_id].timestamp):
682 for msg_id in sorted(jobs, key=lambda msg_id: self.depending[msg_id].timestamp):
682 job = self.depending[msg_id]
683 job = self.depending[msg_id]
683
684
684 if job.after.unreachable(self.all_completed, self.all_failed)\
685 if job.after.unreachable(self.all_completed, self.all_failed)\
685 or job.follow.unreachable(self.all_completed, self.all_failed):
686 or job.follow.unreachable(self.all_completed, self.all_failed):
686 self.fail_unreachable(msg_id)
687 self.fail_unreachable(msg_id)
687
688
688 elif job.after.check(self.all_completed, self.all_failed): # time deps met, maybe run
689 elif job.after.check(self.all_completed, self.all_failed): # time deps met, maybe run
689 if self.maybe_run(job):
690 if self.maybe_run(job):
690
691
691 self.depending.pop(msg_id)
692 self.depending.pop(msg_id)
692 for mid in job.dependents:
693 for mid in job.dependents:
693 if mid in self.graph:
694 if mid in self.graph:
694 self.graph[mid].remove(msg_id)
695 self.graph[mid].remove(msg_id)
695
696
696 #----------------------------------------------------------------------
697 #----------------------------------------------------------------------
697 # methods to be overridden by subclasses
698 # methods to be overridden by subclasses
698 #----------------------------------------------------------------------
699 #----------------------------------------------------------------------
699
700
700 def add_job(self, idx):
701 def add_job(self, idx):
701 """Called after self.targets[idx] just got the job with header.
702 """Called after self.targets[idx] just got the job with header.
702 Override with subclasses. The default ordering is simple LRU.
703 Override with subclasses. The default ordering is simple LRU.
703 The default loads are the number of outstanding jobs."""
704 The default loads are the number of outstanding jobs."""
704 self.loads[idx] += 1
705 self.loads[idx] += 1
705 for lis in (self.targets, self.loads):
706 for lis in (self.targets, self.loads):
706 lis.append(lis.pop(idx))
707 lis.append(lis.pop(idx))
707
708
708
709
709 def finish_job(self, idx):
710 def finish_job(self, idx):
710 """Called after self.targets[idx] just finished a job.
711 """Called after self.targets[idx] just finished a job.
711 Override with subclasses."""
712 Override with subclasses."""
712 self.loads[idx] -= 1
713 self.loads[idx] -= 1
713
714
714
715
715
716
716 def launch_scheduler(in_addr, out_addr, mon_addr, not_addr, config=None,
717 def launch_scheduler(in_addr, out_addr, mon_addr, not_addr, config=None,
717 logname='root', log_url=None, loglevel=logging.DEBUG,
718 logname='root', log_url=None, loglevel=logging.DEBUG,
718 identity=b'task', in_thread=False):
719 identity=b'task', in_thread=False):
719
720
720 ZMQStream = zmqstream.ZMQStream
721 ZMQStream = zmqstream.ZMQStream
721
722
722 if config:
723 if config:
723 # unwrap dict back into Config
724 # unwrap dict back into Config
724 config = Config(config)
725 config = Config(config)
725
726
726 if in_thread:
727 if in_thread:
727 # use instance() to get the same Context/Loop as our parent
728 # use instance() to get the same Context/Loop as our parent
728 ctx = zmq.Context.instance()
729 ctx = zmq.Context.instance()
729 loop = ioloop.IOLoop.instance()
730 loop = ioloop.IOLoop.instance()
730 else:
731 else:
731 # in a process, don't use instance()
732 # in a process, don't use instance()
732 # for safety with multiprocessing
733 # for safety with multiprocessing
733 ctx = zmq.Context()
734 ctx = zmq.Context()
734 loop = ioloop.IOLoop()
735 loop = ioloop.IOLoop()
735 ins = ZMQStream(ctx.socket(zmq.ROUTER),loop)
736 ins = ZMQStream(ctx.socket(zmq.ROUTER),loop)
736 ins.setsockopt(zmq.IDENTITY, identity)
737 ins.setsockopt(zmq.IDENTITY, identity)
737 ins.bind(in_addr)
738 ins.bind(in_addr)
738
739
739 outs = ZMQStream(ctx.socket(zmq.ROUTER),loop)
740 outs = ZMQStream(ctx.socket(zmq.ROUTER),loop)
740 outs.setsockopt(zmq.IDENTITY, identity)
741 outs.setsockopt(zmq.IDENTITY, identity)
741 outs.bind(out_addr)
742 outs.bind(out_addr)
742 mons = zmqstream.ZMQStream(ctx.socket(zmq.PUB),loop)
743 mons = zmqstream.ZMQStream(ctx.socket(zmq.PUB),loop)
743 mons.connect(mon_addr)
744 mons.connect(mon_addr)
744 nots = zmqstream.ZMQStream(ctx.socket(zmq.SUB),loop)
745 nots = zmqstream.ZMQStream(ctx.socket(zmq.SUB),loop)
745 nots.setsockopt(zmq.SUBSCRIBE, b'')
746 nots.setsockopt(zmq.SUBSCRIBE, b'')
746 nots.connect(not_addr)
747 nots.connect(not_addr)
747
748
748 # setup logging.
749 # setup logging.
749 if in_thread:
750 if in_thread:
750 log = Application.instance().log
751 log = Application.instance().log
751 else:
752 else:
752 if log_url:
753 if log_url:
753 log = connect_logger(logname, ctx, log_url, root="scheduler", loglevel=loglevel)
754 log = connect_logger(logname, ctx, log_url, root="scheduler", loglevel=loglevel)
754 else:
755 else:
755 log = local_logger(logname, loglevel)
756 log = local_logger(logname, loglevel)
756
757
757 scheduler = TaskScheduler(client_stream=ins, engine_stream=outs,
758 scheduler = TaskScheduler(client_stream=ins, engine_stream=outs,
758 mon_stream=mons, notifier_stream=nots,
759 mon_stream=mons, notifier_stream=nots,
759 loop=loop, log=log,
760 loop=loop, log=log,
760 config=config)
761 config=config)
761 scheduler.start()
762 scheduler.start()
762 if not in_thread:
763 if not in_thread:
763 try:
764 try:
764 loop.start()
765 loop.start()
765 except KeyboardInterrupt:
766 except KeyboardInterrupt:
766 scheduler.log.critical("Interrupted, exiting...")
767 scheduler.log.critical("Interrupted, exiting...")
767
768
@@ -1,236 +1,236 b''
1 """A simple engine that talks to a controller over 0MQ.
1 """A simple engine that talks to a controller over 0MQ.
2 it handles registration, etc. and launches a kernel
2 it handles registration, etc. and launches a kernel
3 connected to the Controller's Schedulers.
3 connected to the Controller's Schedulers.
4
4
5 Authors:
5 Authors:
6
6
7 * Min RK
7 * Min RK
8 """
8 """
9 #-----------------------------------------------------------------------------
9 #-----------------------------------------------------------------------------
10 # Copyright (C) 2010-2011 The IPython Development Team
10 # Copyright (C) 2010-2011 The IPython Development Team
11 #
11 #
12 # Distributed under the terms of the BSD License. The full license is in
12 # Distributed under the terms of the BSD License. The full license is in
13 # the file COPYING, distributed as part of this software.
13 # the file COPYING, distributed as part of this software.
14 #-----------------------------------------------------------------------------
14 #-----------------------------------------------------------------------------
15
15
16 from __future__ import print_function
16 from __future__ import print_function
17
17
18 import sys
18 import sys
19 import time
19 import time
20 from getpass import getpass
20 from getpass import getpass
21
21
22 import zmq
22 import zmq
23 from zmq.eventloop import ioloop, zmqstream
23 from zmq.eventloop import ioloop, zmqstream
24
24
25 from IPython.external.ssh import tunnel
25 from IPython.external.ssh import tunnel
26 # internal
26 # internal
27 from IPython.utils.traitlets import (
27 from IPython.utils.traitlets import (
28 Instance, Dict, Integer, Type, CFloat, Unicode, CBytes, Bool
28 Instance, Dict, Integer, Type, CFloat, Unicode, CBytes, Bool
29 )
29 )
30 from IPython.utils import py3compat
30 from IPython.utils.py3compat import cast_bytes
31
31
32 from IPython.parallel.controller.heartmonitor import Heart
32 from IPython.parallel.controller.heartmonitor import Heart
33 from IPython.parallel.factory import RegistrationFactory
33 from IPython.parallel.factory import RegistrationFactory
34 from IPython.parallel.util import disambiguate_url, asbytes
34 from IPython.parallel.util import disambiguate_url
35
35
36 from IPython.zmq.session import Message
36 from IPython.zmq.session import Message
37 from IPython.zmq.ipkernel import Kernel
37 from IPython.zmq.ipkernel import Kernel
38
38
39 class EngineFactory(RegistrationFactory):
39 class EngineFactory(RegistrationFactory):
40 """IPython engine"""
40 """IPython engine"""
41
41
42 # configurables:
42 # configurables:
43 out_stream_factory=Type('IPython.zmq.iostream.OutStream', config=True,
43 out_stream_factory=Type('IPython.zmq.iostream.OutStream', config=True,
44 help="""The OutStream for handling stdout/err.
44 help="""The OutStream for handling stdout/err.
45 Typically 'IPython.zmq.iostream.OutStream'""")
45 Typically 'IPython.zmq.iostream.OutStream'""")
46 display_hook_factory=Type('IPython.zmq.displayhook.ZMQDisplayHook', config=True,
46 display_hook_factory=Type('IPython.zmq.displayhook.ZMQDisplayHook', config=True,
47 help="""The class for handling displayhook.
47 help="""The class for handling displayhook.
48 Typically 'IPython.zmq.displayhook.ZMQDisplayHook'""")
48 Typically 'IPython.zmq.displayhook.ZMQDisplayHook'""")
49 location=Unicode(config=True,
49 location=Unicode(config=True,
50 help="""The location (an IP address) of the controller. This is
50 help="""The location (an IP address) of the controller. This is
51 used for disambiguating URLs, to determine whether
51 used for disambiguating URLs, to determine whether
52 loopback should be used to connect or the public address.""")
52 loopback should be used to connect or the public address.""")
53 timeout=CFloat(2,config=True,
53 timeout=CFloat(2,config=True,
54 help="""The time (in seconds) to wait for the Controller to respond
54 help="""The time (in seconds) to wait for the Controller to respond
55 to registration requests before giving up.""")
55 to registration requests before giving up.""")
56 sshserver=Unicode(config=True,
56 sshserver=Unicode(config=True,
57 help="""The SSH server to use for tunneling connections to the Controller.""")
57 help="""The SSH server to use for tunneling connections to the Controller.""")
58 sshkey=Unicode(config=True,
58 sshkey=Unicode(config=True,
59 help="""The SSH private key file to use when tunneling connections to the Controller.""")
59 help="""The SSH private key file to use when tunneling connections to the Controller.""")
60 paramiko=Bool(sys.platform == 'win32', config=True,
60 paramiko=Bool(sys.platform == 'win32', config=True,
61 help="""Whether to use paramiko instead of openssh for tunnels.""")
61 help="""Whether to use paramiko instead of openssh for tunnels.""")
62
62
63 # not configurable:
63 # not configurable:
64 user_ns=Dict()
64 user_ns=Dict()
65 id=Integer(allow_none=True)
65 id=Integer(allow_none=True)
66 registrar=Instance('zmq.eventloop.zmqstream.ZMQStream')
66 registrar=Instance('zmq.eventloop.zmqstream.ZMQStream')
67 kernel=Instance(Kernel)
67 kernel=Instance(Kernel)
68
68
69 bident = CBytes()
69 bident = CBytes()
70 ident = Unicode()
70 ident = Unicode()
71 def _ident_changed(self, name, old, new):
71 def _ident_changed(self, name, old, new):
72 self.bident = asbytes(new)
72 self.bident = cast_bytes(new)
73 using_ssh=Bool(False)
73 using_ssh=Bool(False)
74
74
75
75
76 def __init__(self, **kwargs):
76 def __init__(self, **kwargs):
77 super(EngineFactory, self).__init__(**kwargs)
77 super(EngineFactory, self).__init__(**kwargs)
78 self.ident = self.session.session
78 self.ident = self.session.session
79
79
80 def init_connector(self):
80 def init_connector(self):
81 """construct connection function, which handles tunnels."""
81 """construct connection function, which handles tunnels."""
82 self.using_ssh = bool(self.sshkey or self.sshserver)
82 self.using_ssh = bool(self.sshkey or self.sshserver)
83
83
84 if self.sshkey and not self.sshserver:
84 if self.sshkey and not self.sshserver:
85 # We are using ssh directly to the controller, tunneling localhost to localhost
85 # We are using ssh directly to the controller, tunneling localhost to localhost
86 self.sshserver = self.url.split('://')[1].split(':')[0]
86 self.sshserver = self.url.split('://')[1].split(':')[0]
87
87
88 if self.using_ssh:
88 if self.using_ssh:
89 if tunnel.try_passwordless_ssh(self.sshserver, self.sshkey, self.paramiko):
89 if tunnel.try_passwordless_ssh(self.sshserver, self.sshkey, self.paramiko):
90 password=False
90 password=False
91 else:
91 else:
92 password = getpass("SSH Password for %s: "%self.sshserver)
92 password = getpass("SSH Password for %s: "%self.sshserver)
93 else:
93 else:
94 password = False
94 password = False
95
95
96 def connect(s, url):
96 def connect(s, url):
97 url = disambiguate_url(url, self.location)
97 url = disambiguate_url(url, self.location)
98 if self.using_ssh:
98 if self.using_ssh:
99 self.log.debug("Tunneling connection to %s via %s"%(url, self.sshserver))
99 self.log.debug("Tunneling connection to %s via %s"%(url, self.sshserver))
100 return tunnel.tunnel_connection(s, url, self.sshserver,
100 return tunnel.tunnel_connection(s, url, self.sshserver,
101 keyfile=self.sshkey, paramiko=self.paramiko,
101 keyfile=self.sshkey, paramiko=self.paramiko,
102 password=password,
102 password=password,
103 )
103 )
104 else:
104 else:
105 return s.connect(url)
105 return s.connect(url)
106
106
107 def maybe_tunnel(url):
107 def maybe_tunnel(url):
108 """like connect, but don't complete the connection (for use by heartbeat)"""
108 """like connect, but don't complete the connection (for use by heartbeat)"""
109 url = disambiguate_url(url, self.location)
109 url = disambiguate_url(url, self.location)
110 if self.using_ssh:
110 if self.using_ssh:
111 self.log.debug("Tunneling connection to %s via %s"%(url, self.sshserver))
111 self.log.debug("Tunneling connection to %s via %s"%(url, self.sshserver))
112 url,tunnelobj = tunnel.open_tunnel(url, self.sshserver,
112 url,tunnelobj = tunnel.open_tunnel(url, self.sshserver,
113 keyfile=self.sshkey, paramiko=self.paramiko,
113 keyfile=self.sshkey, paramiko=self.paramiko,
114 password=password,
114 password=password,
115 )
115 )
116 return url
116 return url
117 return connect, maybe_tunnel
117 return connect, maybe_tunnel
118
118
119 def register(self):
119 def register(self):
120 """send the registration_request"""
120 """send the registration_request"""
121
121
122 self.log.info("Registering with controller at %s"%self.url)
122 self.log.info("Registering with controller at %s"%self.url)
123 ctx = self.context
123 ctx = self.context
124 connect,maybe_tunnel = self.init_connector()
124 connect,maybe_tunnel = self.init_connector()
125 reg = ctx.socket(zmq.DEALER)
125 reg = ctx.socket(zmq.DEALER)
126 reg.setsockopt(zmq.IDENTITY, self.bident)
126 reg.setsockopt(zmq.IDENTITY, self.bident)
127 connect(reg, self.url)
127 connect(reg, self.url)
128 self.registrar = zmqstream.ZMQStream(reg, self.loop)
128 self.registrar = zmqstream.ZMQStream(reg, self.loop)
129
129
130
130
131 content = dict(queue=self.ident, heartbeat=self.ident, control=self.ident)
131 content = dict(queue=self.ident, heartbeat=self.ident, control=self.ident)
132 self.registrar.on_recv(lambda msg: self.complete_registration(msg, connect, maybe_tunnel))
132 self.registrar.on_recv(lambda msg: self.complete_registration(msg, connect, maybe_tunnel))
133 # print (self.session.key)
133 # print (self.session.key)
134 self.session.send(self.registrar, "registration_request",content=content)
134 self.session.send(self.registrar, "registration_request",content=content)
135
135
136 def complete_registration(self, msg, connect, maybe_tunnel):
136 def complete_registration(self, msg, connect, maybe_tunnel):
137 # print msg
137 # print msg
138 self._abort_dc.stop()
138 self._abort_dc.stop()
139 ctx = self.context
139 ctx = self.context
140 loop = self.loop
140 loop = self.loop
141 identity = self.bident
141 identity = self.bident
142 idents,msg = self.session.feed_identities(msg)
142 idents,msg = self.session.feed_identities(msg)
143 msg = Message(self.session.unserialize(msg))
143 msg = Message(self.session.unserialize(msg))
144
144
145 if msg.content.status == 'ok':
145 if msg.content.status == 'ok':
146 self.id = int(msg.content.id)
146 self.id = int(msg.content.id)
147
147
148 # launch heartbeat
148 # launch heartbeat
149 hb_addrs = msg.content.heartbeat
149 hb_addrs = msg.content.heartbeat
150
150
151 # possibly forward hb ports with tunnels
151 # possibly forward hb ports with tunnels
152 hb_addrs = [ maybe_tunnel(addr) for addr in hb_addrs ]
152 hb_addrs = [ maybe_tunnel(addr) for addr in hb_addrs ]
153 heart = Heart(*map(str, hb_addrs), heart_id=identity)
153 heart = Heart(*map(str, hb_addrs), heart_id=identity)
154 heart.start()
154 heart.start()
155
155
156 # create Shell Streams (MUX, Task, etc.):
156 # create Shell Streams (MUX, Task, etc.):
157 queue_addr = msg.content.mux
157 queue_addr = msg.content.mux
158 shell_addrs = [ str(queue_addr) ]
158 shell_addrs = [ str(queue_addr) ]
159 task_addr = msg.content.task
159 task_addr = msg.content.task
160 if task_addr:
160 if task_addr:
161 shell_addrs.append(str(task_addr))
161 shell_addrs.append(str(task_addr))
162
162
163 # Uncomment this to go back to two-socket model
163 # Uncomment this to go back to two-socket model
164 # shell_streams = []
164 # shell_streams = []
165 # for addr in shell_addrs:
165 # for addr in shell_addrs:
166 # stream = zmqstream.ZMQStream(ctx.socket(zmq.ROUTER), loop)
166 # stream = zmqstream.ZMQStream(ctx.socket(zmq.ROUTER), loop)
167 # stream.setsockopt(zmq.IDENTITY, identity)
167 # stream.setsockopt(zmq.IDENTITY, identity)
168 # stream.connect(disambiguate_url(addr, self.location))
168 # stream.connect(disambiguate_url(addr, self.location))
169 # shell_streams.append(stream)
169 # shell_streams.append(stream)
170
170
171 # Now use only one shell stream for mux and tasks
171 # Now use only one shell stream for mux and tasks
172 stream = zmqstream.ZMQStream(ctx.socket(zmq.ROUTER), loop)
172 stream = zmqstream.ZMQStream(ctx.socket(zmq.ROUTER), loop)
173 stream.setsockopt(zmq.IDENTITY, identity)
173 stream.setsockopt(zmq.IDENTITY, identity)
174 shell_streams = [stream]
174 shell_streams = [stream]
175 for addr in shell_addrs:
175 for addr in shell_addrs:
176 connect(stream, addr)
176 connect(stream, addr)
177 # end single stream-socket
177 # end single stream-socket
178
178
179 # control stream:
179 # control stream:
180 control_addr = str(msg.content.control)
180 control_addr = str(msg.content.control)
181 control_stream = zmqstream.ZMQStream(ctx.socket(zmq.ROUTER), loop)
181 control_stream = zmqstream.ZMQStream(ctx.socket(zmq.ROUTER), loop)
182 control_stream.setsockopt(zmq.IDENTITY, identity)
182 control_stream.setsockopt(zmq.IDENTITY, identity)
183 connect(control_stream, control_addr)
183 connect(control_stream, control_addr)
184
184
185 # create iopub stream:
185 # create iopub stream:
186 iopub_addr = msg.content.iopub
186 iopub_addr = msg.content.iopub
187 iopub_socket = ctx.socket(zmq.PUB)
187 iopub_socket = ctx.socket(zmq.PUB)
188 iopub_socket.setsockopt(zmq.IDENTITY, identity)
188 iopub_socket.setsockopt(zmq.IDENTITY, identity)
189 connect(iopub_socket, iopub_addr)
189 connect(iopub_socket, iopub_addr)
190
190
191 # disable history:
191 # disable history:
192 self.config.HistoryManager.hist_file = ':memory:'
192 self.config.HistoryManager.hist_file = ':memory:'
193
193
194 # Redirect input streams and set a display hook.
194 # Redirect input streams and set a display hook.
195 if self.out_stream_factory:
195 if self.out_stream_factory:
196 sys.stdout = self.out_stream_factory(self.session, iopub_socket, u'stdout')
196 sys.stdout = self.out_stream_factory(self.session, iopub_socket, u'stdout')
197 sys.stdout.topic = py3compat.cast_bytes('engine.%i.stdout' % self.id)
197 sys.stdout.topic = cast_bytes('engine.%i.stdout' % self.id)
198 sys.stderr = self.out_stream_factory(self.session, iopub_socket, u'stderr')
198 sys.stderr = self.out_stream_factory(self.session, iopub_socket, u'stderr')
199 sys.stderr.topic = py3compat.cast_bytes('engine.%i.stderr' % self.id)
199 sys.stderr.topic = cast_bytes('engine.%i.stderr' % self.id)
200 if self.display_hook_factory:
200 if self.display_hook_factory:
201 sys.displayhook = self.display_hook_factory(self.session, iopub_socket)
201 sys.displayhook = self.display_hook_factory(self.session, iopub_socket)
202 sys.displayhook.topic = py3compat.cast_bytes('engine.%i.pyout' % self.id)
202 sys.displayhook.topic = cast_bytes('engine.%i.pyout' % self.id)
203
203
204 self.kernel = Kernel(config=self.config, int_id=self.id, ident=self.ident, session=self.session,
204 self.kernel = Kernel(config=self.config, int_id=self.id, ident=self.ident, session=self.session,
205 control_stream=control_stream, shell_streams=shell_streams, iopub_socket=iopub_socket,
205 control_stream=control_stream, shell_streams=shell_streams, iopub_socket=iopub_socket,
206 loop=loop, user_ns=self.user_ns, log=self.log)
206 loop=loop, user_ns=self.user_ns, log=self.log)
207 self.kernel.start()
207 self.kernel.start()
208
208
209
209
210 else:
210 else:
211 self.log.fatal("Registration Failed: %s"%msg)
211 self.log.fatal("Registration Failed: %s"%msg)
212 raise Exception("Registration Failed: %s"%msg)
212 raise Exception("Registration Failed: %s"%msg)
213
213
214 self.log.info("Completed registration with id %i"%self.id)
214 self.log.info("Completed registration with id %i"%self.id)
215
215
216
216
217 def abort(self):
217 def abort(self):
218 self.log.fatal("Registration timed out after %.1f seconds"%self.timeout)
218 self.log.fatal("Registration timed out after %.1f seconds"%self.timeout)
219 if self.url.startswith('127.'):
219 if self.url.startswith('127.'):
220 self.log.fatal("""
220 self.log.fatal("""
221 If the controller and engines are not on the same machine,
221 If the controller and engines are not on the same machine,
222 you will have to instruct the controller to listen on an external IP (in ipcontroller_config.py):
222 you will have to instruct the controller to listen on an external IP (in ipcontroller_config.py):
223 c.HubFactory.ip='*' # for all interfaces, internal and external
223 c.HubFactory.ip='*' # for all interfaces, internal and external
224 c.HubFactory.ip='192.168.1.101' # or any interface that the engines can see
224 c.HubFactory.ip='192.168.1.101' # or any interface that the engines can see
225 or tunnel connections via ssh.
225 or tunnel connections via ssh.
226 """)
226 """)
227 self.session.send(self.registrar, "unregistration_request", content=dict(id=self.id))
227 self.session.send(self.registrar, "unregistration_request", content=dict(id=self.id))
228 time.sleep(1)
228 time.sleep(1)
229 sys.exit(255)
229 sys.exit(255)
230
230
231 def start(self):
231 def start(self):
232 dc = ioloop.DelayedCallback(self.register, 0, self.loop)
232 dc = ioloop.DelayedCallback(self.register, 0, self.loop)
233 dc.start()
233 dc.start()
234 self._abort_dc = ioloop.DelayedCallback(self.abort, self.timeout*1000, self.loop)
234 self._abort_dc = ioloop.DelayedCallback(self.abort, self.timeout*1000, self.loop)
235 self._abort_dc.start()
235 self._abort_dc.start()
236
236
@@ -1,364 +1,358 b''
1 """some generic utilities for dealing with classes, urls, and serialization
1 """some generic utilities for dealing with classes, urls, and serialization
2
2
3 Authors:
3 Authors:
4
4
5 * Min RK
5 * Min RK
6 """
6 """
7 #-----------------------------------------------------------------------------
7 #-----------------------------------------------------------------------------
8 # Copyright (C) 2010-2011 The IPython Development Team
8 # Copyright (C) 2010-2011 The IPython Development Team
9 #
9 #
10 # Distributed under the terms of the BSD License. The full license is in
10 # Distributed under the terms of the BSD License. The full license is in
11 # the file COPYING, distributed as part of this software.
11 # the file COPYING, distributed as part of this software.
12 #-----------------------------------------------------------------------------
12 #-----------------------------------------------------------------------------
13
13
14 #-----------------------------------------------------------------------------
14 #-----------------------------------------------------------------------------
15 # Imports
15 # Imports
16 #-----------------------------------------------------------------------------
16 #-----------------------------------------------------------------------------
17
17
18 # Standard library imports.
18 # Standard library imports.
19 import logging
19 import logging
20 import os
20 import os
21 import re
21 import re
22 import stat
22 import stat
23 import socket
23 import socket
24 import sys
24 import sys
25 from signal import signal, SIGINT, SIGABRT, SIGTERM
25 from signal import signal, SIGINT, SIGABRT, SIGTERM
26 try:
26 try:
27 from signal import SIGKILL
27 from signal import SIGKILL
28 except ImportError:
28 except ImportError:
29 SIGKILL=None
29 SIGKILL=None
30
30
31 try:
31 try:
32 import cPickle
32 import cPickle
33 pickle = cPickle
33 pickle = cPickle
34 except:
34 except:
35 cPickle = None
35 cPickle = None
36 import pickle
36 import pickle
37
37
38 # System library imports
38 # System library imports
39 import zmq
39 import zmq
40 from zmq.log import handlers
40 from zmq.log import handlers
41
41
42 from IPython.external.decorator import decorator
42 from IPython.external.decorator import decorator
43
43
44 # IPython imports
44 # IPython imports
45 from IPython.config.application import Application
45 from IPython.config.application import Application
46 from IPython.utils import py3compat
46 from IPython.utils import py3compat
47 from IPython.utils.pickleutil import can, uncan, canSequence, uncanSequence
47 from IPython.utils.pickleutil import can, uncan, canSequence, uncanSequence
48 from IPython.utils.newserialized import serialize, unserialize
48 from IPython.utils.newserialized import serialize, unserialize
49 from IPython.zmq.log import EnginePUBHandler
49 from IPython.zmq.log import EnginePUBHandler
50 from IPython.zmq.serialize import (
50 from IPython.zmq.serialize import (
51 unserialize_object, serialize_object, pack_apply_message, unpack_apply_message
51 unserialize_object, serialize_object, pack_apply_message, unpack_apply_message
52 )
52 )
53
53
54 if py3compat.PY3:
54 if py3compat.PY3:
55 buffer = memoryview
55 buffer = memoryview
56
56
57 #-----------------------------------------------------------------------------
57 #-----------------------------------------------------------------------------
58 # Classes
58 # Classes
59 #-----------------------------------------------------------------------------
59 #-----------------------------------------------------------------------------
60
60
61 class Namespace(dict):
61 class Namespace(dict):
62 """Subclass of dict for attribute access to keys."""
62 """Subclass of dict for attribute access to keys."""
63
63
64 def __getattr__(self, key):
64 def __getattr__(self, key):
65 """getattr aliased to getitem"""
65 """getattr aliased to getitem"""
66 if key in self.iterkeys():
66 if key in self.iterkeys():
67 return self[key]
67 return self[key]
68 else:
68 else:
69 raise NameError(key)
69 raise NameError(key)
70
70
71 def __setattr__(self, key, value):
71 def __setattr__(self, key, value):
72 """setattr aliased to setitem, with strict"""
72 """setattr aliased to setitem, with strict"""
73 if hasattr(dict, key):
73 if hasattr(dict, key):
74 raise KeyError("Cannot override dict keys %r"%key)
74 raise KeyError("Cannot override dict keys %r"%key)
75 self[key] = value
75 self[key] = value
76
76
77
77
78 class ReverseDict(dict):
78 class ReverseDict(dict):
79 """simple double-keyed subset of dict methods."""
79 """simple double-keyed subset of dict methods."""
80
80
81 def __init__(self, *args, **kwargs):
81 def __init__(self, *args, **kwargs):
82 dict.__init__(self, *args, **kwargs)
82 dict.__init__(self, *args, **kwargs)
83 self._reverse = dict()
83 self._reverse = dict()
84 for key, value in self.iteritems():
84 for key, value in self.iteritems():
85 self._reverse[value] = key
85 self._reverse[value] = key
86
86
87 def __getitem__(self, key):
87 def __getitem__(self, key):
88 try:
88 try:
89 return dict.__getitem__(self, key)
89 return dict.__getitem__(self, key)
90 except KeyError:
90 except KeyError:
91 return self._reverse[key]
91 return self._reverse[key]
92
92
93 def __setitem__(self, key, value):
93 def __setitem__(self, key, value):
94 if key in self._reverse:
94 if key in self._reverse:
95 raise KeyError("Can't have key %r on both sides!"%key)
95 raise KeyError("Can't have key %r on both sides!"%key)
96 dict.__setitem__(self, key, value)
96 dict.__setitem__(self, key, value)
97 self._reverse[value] = key
97 self._reverse[value] = key
98
98
99 def pop(self, key):
99 def pop(self, key):
100 value = dict.pop(self, key)
100 value = dict.pop(self, key)
101 self._reverse.pop(value)
101 self._reverse.pop(value)
102 return value
102 return value
103
103
104 def get(self, key, default=None):
104 def get(self, key, default=None):
105 try:
105 try:
106 return self[key]
106 return self[key]
107 except KeyError:
107 except KeyError:
108 return default
108 return default
109
109
110 #-----------------------------------------------------------------------------
110 #-----------------------------------------------------------------------------
111 # Functions
111 # Functions
112 #-----------------------------------------------------------------------------
112 #-----------------------------------------------------------------------------
113
113
114 @decorator
114 @decorator
115 def log_errors(f, self, *args, **kwargs):
115 def log_errors(f, self, *args, **kwargs):
116 """decorator to log unhandled exceptions raised in a method.
116 """decorator to log unhandled exceptions raised in a method.
117
117
118 For use wrapping on_recv callbacks, so that exceptions
118 For use wrapping on_recv callbacks, so that exceptions
119 do not cause the stream to be closed.
119 do not cause the stream to be closed.
120 """
120 """
121 try:
121 try:
122 return f(self, *args, **kwargs)
122 return f(self, *args, **kwargs)
123 except Exception:
123 except Exception:
124 self.log.error("Uncaught exception in %r" % f, exc_info=True)
124 self.log.error("Uncaught exception in %r" % f, exc_info=True)
125
125
126
126
127 def asbytes(s):
128 """ensure that an object is ascii bytes"""
129 if isinstance(s, unicode):
130 s = s.encode('ascii')
131 return s
132
133 def is_url(url):
127 def is_url(url):
134 """boolean check for whether a string is a zmq url"""
128 """boolean check for whether a string is a zmq url"""
135 if '://' not in url:
129 if '://' not in url:
136 return False
130 return False
137 proto, addr = url.split('://', 1)
131 proto, addr = url.split('://', 1)
138 if proto.lower() not in ['tcp','pgm','epgm','ipc','inproc']:
132 if proto.lower() not in ['tcp','pgm','epgm','ipc','inproc']:
139 return False
133 return False
140 return True
134 return True
141
135
142 def validate_url(url):
136 def validate_url(url):
143 """validate a url for zeromq"""
137 """validate a url for zeromq"""
144 if not isinstance(url, basestring):
138 if not isinstance(url, basestring):
145 raise TypeError("url must be a string, not %r"%type(url))
139 raise TypeError("url must be a string, not %r"%type(url))
146 url = url.lower()
140 url = url.lower()
147
141
148 proto_addr = url.split('://')
142 proto_addr = url.split('://')
149 assert len(proto_addr) == 2, 'Invalid url: %r'%url
143 assert len(proto_addr) == 2, 'Invalid url: %r'%url
150 proto, addr = proto_addr
144 proto, addr = proto_addr
151 assert proto in ['tcp','pgm','epgm','ipc','inproc'], "Invalid protocol: %r"%proto
145 assert proto in ['tcp','pgm','epgm','ipc','inproc'], "Invalid protocol: %r"%proto
152
146
153 # domain pattern adapted from http://www.regexlib.com/REDetails.aspx?regexp_id=391
147 # domain pattern adapted from http://www.regexlib.com/REDetails.aspx?regexp_id=391
154 # author: Remi Sabourin
148 # author: Remi Sabourin
155 pat = re.compile(r'^([\w\d]([\w\d\-]{0,61}[\w\d])?\.)*[\w\d]([\w\d\-]{0,61}[\w\d])?$')
149 pat = re.compile(r'^([\w\d]([\w\d\-]{0,61}[\w\d])?\.)*[\w\d]([\w\d\-]{0,61}[\w\d])?$')
156
150
157 if proto == 'tcp':
151 if proto == 'tcp':
158 lis = addr.split(':')
152 lis = addr.split(':')
159 assert len(lis) == 2, 'Invalid url: %r'%url
153 assert len(lis) == 2, 'Invalid url: %r'%url
160 addr,s_port = lis
154 addr,s_port = lis
161 try:
155 try:
162 port = int(s_port)
156 port = int(s_port)
163 except ValueError:
157 except ValueError:
164 raise AssertionError("Invalid port %r in url: %r"%(port, url))
158 raise AssertionError("Invalid port %r in url: %r"%(port, url))
165
159
166 assert addr == '*' or pat.match(addr) is not None, 'Invalid url: %r'%url
160 assert addr == '*' or pat.match(addr) is not None, 'Invalid url: %r'%url
167
161
168 else:
162 else:
169 # only validate tcp urls currently
163 # only validate tcp urls currently
170 pass
164 pass
171
165
172 return True
166 return True
173
167
174
168
175 def validate_url_container(container):
169 def validate_url_container(container):
176 """validate a potentially nested collection of urls."""
170 """validate a potentially nested collection of urls."""
177 if isinstance(container, basestring):
171 if isinstance(container, basestring):
178 url = container
172 url = container
179 return validate_url(url)
173 return validate_url(url)
180 elif isinstance(container, dict):
174 elif isinstance(container, dict):
181 container = container.itervalues()
175 container = container.itervalues()
182
176
183 for element in container:
177 for element in container:
184 validate_url_container(element)
178 validate_url_container(element)
185
179
186
180
187 def split_url(url):
181 def split_url(url):
188 """split a zmq url (tcp://ip:port) into ('tcp','ip','port')."""
182 """split a zmq url (tcp://ip:port) into ('tcp','ip','port')."""
189 proto_addr = url.split('://')
183 proto_addr = url.split('://')
190 assert len(proto_addr) == 2, 'Invalid url: %r'%url
184 assert len(proto_addr) == 2, 'Invalid url: %r'%url
191 proto, addr = proto_addr
185 proto, addr = proto_addr
192 lis = addr.split(':')
186 lis = addr.split(':')
193 assert len(lis) == 2, 'Invalid url: %r'%url
187 assert len(lis) == 2, 'Invalid url: %r'%url
194 addr,s_port = lis
188 addr,s_port = lis
195 return proto,addr,s_port
189 return proto,addr,s_port
196
190
197 def disambiguate_ip_address(ip, location=None):
191 def disambiguate_ip_address(ip, location=None):
198 """turn multi-ip interfaces '0.0.0.0' and '*' into connectable
192 """turn multi-ip interfaces '0.0.0.0' and '*' into connectable
199 ones, based on the location (default interpretation of location is localhost)."""
193 ones, based on the location (default interpretation of location is localhost)."""
200 if ip in ('0.0.0.0', '*'):
194 if ip in ('0.0.0.0', '*'):
201 try:
195 try:
202 external_ips = socket.gethostbyname_ex(socket.gethostname())[2]
196 external_ips = socket.gethostbyname_ex(socket.gethostname())[2]
203 except (socket.gaierror, IndexError):
197 except (socket.gaierror, IndexError):
204 # couldn't identify this machine, assume localhost
198 # couldn't identify this machine, assume localhost
205 external_ips = []
199 external_ips = []
206 if location is None or location in external_ips or not external_ips:
200 if location is None or location in external_ips or not external_ips:
207 # If location is unspecified or cannot be determined, assume local
201 # If location is unspecified or cannot be determined, assume local
208 ip='127.0.0.1'
202 ip='127.0.0.1'
209 elif location:
203 elif location:
210 return location
204 return location
211 return ip
205 return ip
212
206
213 def disambiguate_url(url, location=None):
207 def disambiguate_url(url, location=None):
214 """turn multi-ip interfaces '0.0.0.0' and '*' into connectable
208 """turn multi-ip interfaces '0.0.0.0' and '*' into connectable
215 ones, based on the location (default interpretation is localhost).
209 ones, based on the location (default interpretation is localhost).
216
210
217 This is for zeromq urls, such as tcp://*:10101."""
211 This is for zeromq urls, such as tcp://*:10101."""
218 try:
212 try:
219 proto,ip,port = split_url(url)
213 proto,ip,port = split_url(url)
220 except AssertionError:
214 except AssertionError:
221 # probably not tcp url; could be ipc, etc.
215 # probably not tcp url; could be ipc, etc.
222 return url
216 return url
223
217
224 ip = disambiguate_ip_address(ip,location)
218 ip = disambiguate_ip_address(ip,location)
225
219
226 return "%s://%s:%s"%(proto,ip,port)
220 return "%s://%s:%s"%(proto,ip,port)
227
221
228
222
229 #--------------------------------------------------------------------------
223 #--------------------------------------------------------------------------
230 # helpers for implementing old MEC API via view.apply
224 # helpers for implementing old MEC API via view.apply
231 #--------------------------------------------------------------------------
225 #--------------------------------------------------------------------------
232
226
233 def interactive(f):
227 def interactive(f):
234 """decorator for making functions appear as interactively defined.
228 """decorator for making functions appear as interactively defined.
235 This results in the function being linked to the user_ns as globals()
229 This results in the function being linked to the user_ns as globals()
236 instead of the module globals().
230 instead of the module globals().
237 """
231 """
238 f.__module__ = '__main__'
232 f.__module__ = '__main__'
239 return f
233 return f
240
234
241 @interactive
235 @interactive
242 def _push(**ns):
236 def _push(**ns):
243 """helper method for implementing `client.push` via `client.apply`"""
237 """helper method for implementing `client.push` via `client.apply`"""
244 globals().update(ns)
238 globals().update(ns)
245
239
246 @interactive
240 @interactive
247 def _pull(keys):
241 def _pull(keys):
248 """helper method for implementing `client.pull` via `client.apply`"""
242 """helper method for implementing `client.pull` via `client.apply`"""
249 user_ns = globals()
243 user_ns = globals()
250 if isinstance(keys, (list,tuple, set)):
244 if isinstance(keys, (list,tuple, set)):
251 for key in keys:
245 for key in keys:
252 if not user_ns.has_key(key):
246 if not user_ns.has_key(key):
253 raise NameError("name '%s' is not defined"%key)
247 raise NameError("name '%s' is not defined"%key)
254 return map(user_ns.get, keys)
248 return map(user_ns.get, keys)
255 else:
249 else:
256 if not user_ns.has_key(keys):
250 if not user_ns.has_key(keys):
257 raise NameError("name '%s' is not defined"%keys)
251 raise NameError("name '%s' is not defined"%keys)
258 return user_ns.get(keys)
252 return user_ns.get(keys)
259
253
260 @interactive
254 @interactive
261 def _execute(code):
255 def _execute(code):
262 """helper method for implementing `client.execute` via `client.apply`"""
256 """helper method for implementing `client.execute` via `client.apply`"""
263 exec code in globals()
257 exec code in globals()
264
258
265 #--------------------------------------------------------------------------
259 #--------------------------------------------------------------------------
266 # extra process management utilities
260 # extra process management utilities
267 #--------------------------------------------------------------------------
261 #--------------------------------------------------------------------------
268
262
269 _random_ports = set()
263 _random_ports = set()
270
264
271 def select_random_ports(n):
265 def select_random_ports(n):
272 """Selects and return n random ports that are available."""
266 """Selects and return n random ports that are available."""
273 ports = []
267 ports = []
274 for i in xrange(n):
268 for i in xrange(n):
275 sock = socket.socket()
269 sock = socket.socket()
276 sock.bind(('', 0))
270 sock.bind(('', 0))
277 while sock.getsockname()[1] in _random_ports:
271 while sock.getsockname()[1] in _random_ports:
278 sock.close()
272 sock.close()
279 sock = socket.socket()
273 sock = socket.socket()
280 sock.bind(('', 0))
274 sock.bind(('', 0))
281 ports.append(sock)
275 ports.append(sock)
282 for i, sock in enumerate(ports):
276 for i, sock in enumerate(ports):
283 port = sock.getsockname()[1]
277 port = sock.getsockname()[1]
284 sock.close()
278 sock.close()
285 ports[i] = port
279 ports[i] = port
286 _random_ports.add(port)
280 _random_ports.add(port)
287 return ports
281 return ports
288
282
289 def signal_children(children):
283 def signal_children(children):
290 """Relay interupt/term signals to children, for more solid process cleanup."""
284 """Relay interupt/term signals to children, for more solid process cleanup."""
291 def terminate_children(sig, frame):
285 def terminate_children(sig, frame):
292 log = Application.instance().log
286 log = Application.instance().log
293 log.critical("Got signal %i, terminating children..."%sig)
287 log.critical("Got signal %i, terminating children..."%sig)
294 for child in children:
288 for child in children:
295 child.terminate()
289 child.terminate()
296
290
297 sys.exit(sig != SIGINT)
291 sys.exit(sig != SIGINT)
298 # sys.exit(sig)
292 # sys.exit(sig)
299 for sig in (SIGINT, SIGABRT, SIGTERM):
293 for sig in (SIGINT, SIGABRT, SIGTERM):
300 signal(sig, terminate_children)
294 signal(sig, terminate_children)
301
295
302 def generate_exec_key(keyfile):
296 def generate_exec_key(keyfile):
303 import uuid
297 import uuid
304 newkey = str(uuid.uuid4())
298 newkey = str(uuid.uuid4())
305 with open(keyfile, 'w') as f:
299 with open(keyfile, 'w') as f:
306 # f.write('ipython-key ')
300 # f.write('ipython-key ')
307 f.write(newkey+'\n')
301 f.write(newkey+'\n')
308 # set user-only RW permissions (0600)
302 # set user-only RW permissions (0600)
309 # this will have no effect on Windows
303 # this will have no effect on Windows
310 os.chmod(keyfile, stat.S_IRUSR|stat.S_IWUSR)
304 os.chmod(keyfile, stat.S_IRUSR|stat.S_IWUSR)
311
305
312
306
313 def integer_loglevel(loglevel):
307 def integer_loglevel(loglevel):
314 try:
308 try:
315 loglevel = int(loglevel)
309 loglevel = int(loglevel)
316 except ValueError:
310 except ValueError:
317 if isinstance(loglevel, str):
311 if isinstance(loglevel, str):
318 loglevel = getattr(logging, loglevel)
312 loglevel = getattr(logging, loglevel)
319 return loglevel
313 return loglevel
320
314
321 def connect_logger(logname, context, iface, root="ip", loglevel=logging.DEBUG):
315 def connect_logger(logname, context, iface, root="ip", loglevel=logging.DEBUG):
322 logger = logging.getLogger(logname)
316 logger = logging.getLogger(logname)
323 if any([isinstance(h, handlers.PUBHandler) for h in logger.handlers]):
317 if any([isinstance(h, handlers.PUBHandler) for h in logger.handlers]):
324 # don't add a second PUBHandler
318 # don't add a second PUBHandler
325 return
319 return
326 loglevel = integer_loglevel(loglevel)
320 loglevel = integer_loglevel(loglevel)
327 lsock = context.socket(zmq.PUB)
321 lsock = context.socket(zmq.PUB)
328 lsock.connect(iface)
322 lsock.connect(iface)
329 handler = handlers.PUBHandler(lsock)
323 handler = handlers.PUBHandler(lsock)
330 handler.setLevel(loglevel)
324 handler.setLevel(loglevel)
331 handler.root_topic = root
325 handler.root_topic = root
332 logger.addHandler(handler)
326 logger.addHandler(handler)
333 logger.setLevel(loglevel)
327 logger.setLevel(loglevel)
334
328
335 def connect_engine_logger(context, iface, engine, loglevel=logging.DEBUG):
329 def connect_engine_logger(context, iface, engine, loglevel=logging.DEBUG):
336 logger = logging.getLogger()
330 logger = logging.getLogger()
337 if any([isinstance(h, handlers.PUBHandler) for h in logger.handlers]):
331 if any([isinstance(h, handlers.PUBHandler) for h in logger.handlers]):
338 # don't add a second PUBHandler
332 # don't add a second PUBHandler
339 return
333 return
340 loglevel = integer_loglevel(loglevel)
334 loglevel = integer_loglevel(loglevel)
341 lsock = context.socket(zmq.PUB)
335 lsock = context.socket(zmq.PUB)
342 lsock.connect(iface)
336 lsock.connect(iface)
343 handler = EnginePUBHandler(engine, lsock)
337 handler = EnginePUBHandler(engine, lsock)
344 handler.setLevel(loglevel)
338 handler.setLevel(loglevel)
345 logger.addHandler(handler)
339 logger.addHandler(handler)
346 logger.setLevel(loglevel)
340 logger.setLevel(loglevel)
347 return logger
341 return logger
348
342
349 def local_logger(logname, loglevel=logging.DEBUG):
343 def local_logger(logname, loglevel=logging.DEBUG):
350 loglevel = integer_loglevel(loglevel)
344 loglevel = integer_loglevel(loglevel)
351 logger = logging.getLogger(logname)
345 logger = logging.getLogger(logname)
352 if any([isinstance(h, logging.StreamHandler) for h in logger.handlers]):
346 if any([isinstance(h, logging.StreamHandler) for h in logger.handlers]):
353 # don't add a second StreamHandler
347 # don't add a second StreamHandler
354 return
348 return
355 handler = logging.StreamHandler()
349 handler = logging.StreamHandler()
356 handler.setLevel(loglevel)
350 handler.setLevel(loglevel)
357 formatter = logging.Formatter("%(asctime)s.%(msecs).03d [%(name)s] %(message)s",
351 formatter = logging.Formatter("%(asctime)s.%(msecs).03d [%(name)s] %(message)s",
358 datefmt="%Y-%m-%d %H:%M:%S")
352 datefmt="%Y-%m-%d %H:%M:%S")
359 handler.setFormatter(formatter)
353 handler.setFormatter(formatter)
360
354
361 logger.addHandler(handler)
355 logger.addHandler(handler)
362 logger.setLevel(loglevel)
356 logger.setLevel(loglevel)
363 return logger
357 return logger
364
358
General Comments 0
You need to be logged in to leave comments. Login now