##// END OF EJS Templates
Merge pull request #1471 from minrk/connection...
Fernando Perez -
r7962:bff463b5 merge
parent child Browse files
Show More
@@ -1,491 +1,528
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 # encoding: utf-8
2 # encoding: utf-8
3 """
3 """
4 The IPython controller application.
4 The IPython controller application.
5
5
6 Authors:
6 Authors:
7
7
8 * Brian Granger
8 * Brian Granger
9 * MinRK
9 * MinRK
10
10
11 """
11 """
12
12
13 #-----------------------------------------------------------------------------
13 #-----------------------------------------------------------------------------
14 # Copyright (C) 2008-2011 The IPython Development Team
14 # Copyright (C) 2008-2011 The IPython Development Team
15 #
15 #
16 # Distributed under the terms of the BSD License. The full license is in
16 # Distributed under the terms of the BSD License. The full license is in
17 # the file COPYING, distributed as part of this software.
17 # the file COPYING, distributed as part of this software.
18 #-----------------------------------------------------------------------------
18 #-----------------------------------------------------------------------------
19
19
20 #-----------------------------------------------------------------------------
20 #-----------------------------------------------------------------------------
21 # Imports
21 # Imports
22 #-----------------------------------------------------------------------------
22 #-----------------------------------------------------------------------------
23
23
24 from __future__ import with_statement
24 from __future__ import with_statement
25
25
26 import json
26 import json
27 import os
27 import os
28 import socket
28 import socket
29 import stat
29 import stat
30 import sys
30 import sys
31
31
32 from multiprocessing import Process
32 from multiprocessing import Process
33 from signal import signal, SIGINT, SIGABRT, SIGTERM
33 from signal import signal, SIGINT, SIGABRT, SIGTERM
34
34
35 import zmq
35 import zmq
36 from zmq.devices import ProcessMonitoredQueue
36 from zmq.devices import ProcessMonitoredQueue
37 from zmq.log.handlers import PUBHandler
37 from zmq.log.handlers import PUBHandler
38
38
39 from IPython.core.profiledir import ProfileDir
39 from IPython.core.profiledir import ProfileDir
40
40
41 from IPython.parallel.apps.baseapp import (
41 from IPython.parallel.apps.baseapp import (
42 BaseParallelApplication,
42 BaseParallelApplication,
43 base_aliases,
43 base_aliases,
44 base_flags,
44 base_flags,
45 catch_config_error,
45 catch_config_error,
46 )
46 )
47 from IPython.utils.importstring import import_item
47 from IPython.utils.importstring import import_item
48 from IPython.utils.traitlets import Instance, Unicode, Bool, List, Dict, TraitError
48 from IPython.utils.traitlets import Instance, Unicode, Bool, List, Dict, TraitError
49
49
50 from IPython.zmq.session import (
50 from IPython.zmq.session import (
51 Session, session_aliases, session_flags, default_secure
51 Session, session_aliases, session_flags, default_secure
52 )
52 )
53
53
54 from IPython.parallel.controller.heartmonitor import HeartMonitor
54 from IPython.parallel.controller.heartmonitor import HeartMonitor
55 from IPython.parallel.controller.hub import HubFactory
55 from IPython.parallel.controller.hub import HubFactory
56 from IPython.parallel.controller.scheduler import TaskScheduler,launch_scheduler
56 from IPython.parallel.controller.scheduler import TaskScheduler,launch_scheduler
57 from IPython.parallel.controller.sqlitedb import SQLiteDB
57 from IPython.parallel.controller.sqlitedb import SQLiteDB
58
58
59 from IPython.parallel.util import split_url, disambiguate_url
59 from IPython.parallel.util import split_url, disambiguate_url
60
60
61 # conditional import of MongoDB backend class
61 # conditional import of MongoDB backend class
62
62
63 try:
63 try:
64 from IPython.parallel.controller.mongodb import MongoDB
64 from IPython.parallel.controller.mongodb import MongoDB
65 except ImportError:
65 except ImportError:
66 maybe_mongo = []
66 maybe_mongo = []
67 else:
67 else:
68 maybe_mongo = [MongoDB]
68 maybe_mongo = [MongoDB]
69
69
70
70
71 #-----------------------------------------------------------------------------
71 #-----------------------------------------------------------------------------
72 # Module level variables
72 # Module level variables
73 #-----------------------------------------------------------------------------
73 #-----------------------------------------------------------------------------
74
74
75
75
76 #: The default config file name for this application
76 #: The default config file name for this application
77 default_config_file_name = u'ipcontroller_config.py'
77 default_config_file_name = u'ipcontroller_config.py'
78
78
79
79
80 _description = """Start the IPython controller for parallel computing.
80 _description = """Start the IPython controller for parallel computing.
81
81
82 The IPython controller provides a gateway between the IPython engines and
82 The IPython controller provides a gateway between the IPython engines and
83 clients. The controller needs to be started before the engines and can be
83 clients. The controller needs to be started before the engines and can be
84 configured using command line options or using a cluster directory. Cluster
84 configured using command line options or using a cluster directory. Cluster
85 directories contain config, log and security files and are usually located in
85 directories contain config, log and security files and are usually located in
86 your ipython directory and named as "profile_name". See the `profile`
86 your ipython directory and named as "profile_name". See the `profile`
87 and `profile-dir` options for details.
87 and `profile-dir` options for details.
88 """
88 """
89
89
90 _examples = """
90 _examples = """
91 ipcontroller --ip=192.168.0.1 --port=1000 # listen on ip, port for engines
91 ipcontroller --ip=192.168.0.1 --port=1000 # listen on ip, port for engines
92 ipcontroller --scheme=pure # use the pure zeromq scheduler
92 ipcontroller --scheme=pure # use the pure zeromq scheduler
93 """
93 """
94
94
95
95
96 #-----------------------------------------------------------------------------
96 #-----------------------------------------------------------------------------
97 # The main application
97 # The main application
98 #-----------------------------------------------------------------------------
98 #-----------------------------------------------------------------------------
99 flags = {}
99 flags = {}
100 flags.update(base_flags)
100 flags.update(base_flags)
101 flags.update({
101 flags.update({
102 'usethreads' : ( {'IPControllerApp' : {'use_threads' : True}},
102 'usethreads' : ( {'IPControllerApp' : {'use_threads' : True}},
103 'Use threads instead of processes for the schedulers'),
103 'Use threads instead of processes for the schedulers'),
104 'sqlitedb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.sqlitedb.SQLiteDB'}},
104 'sqlitedb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.sqlitedb.SQLiteDB'}},
105 'use the SQLiteDB backend'),
105 'use the SQLiteDB backend'),
106 'mongodb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.mongodb.MongoDB'}},
106 'mongodb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.mongodb.MongoDB'}},
107 'use the MongoDB backend'),
107 'use the MongoDB backend'),
108 'dictdb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.dictdb.DictDB'}},
108 'dictdb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.dictdb.DictDB'}},
109 'use the in-memory DictDB backend'),
109 'use the in-memory DictDB backend'),
110 'nodb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.dictdb.NoDB'}},
110 'nodb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.dictdb.NoDB'}},
111 """use dummy DB backend, which doesn't store any information.
111 """use dummy DB backend, which doesn't store any information.
112
112
113 This is the default as of IPython 0.13.
113 This is the default as of IPython 0.13.
114
114
115 To enable delayed or repeated retrieval of results from the Hub,
115 To enable delayed or repeated retrieval of results from the Hub,
116 select one of the true db backends.
116 select one of the true db backends.
117 """),
117 """),
118 'reuse' : ({'IPControllerApp' : {'reuse_files' : True}},
118 'reuse' : ({'IPControllerApp' : {'reuse_files' : True}},
119 'reuse existing json connection files')
119 'reuse existing json connection files'),
120 'restore' : ({'IPControllerApp' : {'restore_engines' : True, 'reuse_files' : True}},
121 'Attempt to restore engines from a JSON file. '
122 'For use when resuming a crashed controller'),
120 })
123 })
121
124
122 flags.update(session_flags)
125 flags.update(session_flags)
123
126
124 aliases = dict(
127 aliases = dict(
125 ssh = 'IPControllerApp.ssh_server',
128 ssh = 'IPControllerApp.ssh_server',
126 enginessh = 'IPControllerApp.engine_ssh_server',
129 enginessh = 'IPControllerApp.engine_ssh_server',
127 location = 'IPControllerApp.location',
130 location = 'IPControllerApp.location',
128
131
129 url = 'HubFactory.url',
132 url = 'HubFactory.url',
130 ip = 'HubFactory.ip',
133 ip = 'HubFactory.ip',
131 transport = 'HubFactory.transport',
134 transport = 'HubFactory.transport',
132 port = 'HubFactory.regport',
135 port = 'HubFactory.regport',
133
136
134 ping = 'HeartMonitor.period',
137 ping = 'HeartMonitor.period',
135
138
136 scheme = 'TaskScheduler.scheme_name',
139 scheme = 'TaskScheduler.scheme_name',
137 hwm = 'TaskScheduler.hwm',
140 hwm = 'TaskScheduler.hwm',
138 )
141 )
139 aliases.update(base_aliases)
142 aliases.update(base_aliases)
140 aliases.update(session_aliases)
143 aliases.update(session_aliases)
141
144
142 class IPControllerApp(BaseParallelApplication):
145 class IPControllerApp(BaseParallelApplication):
143
146
144 name = u'ipcontroller'
147 name = u'ipcontroller'
145 description = _description
148 description = _description
146 examples = _examples
149 examples = _examples
147 config_file_name = Unicode(default_config_file_name)
150 config_file_name = Unicode(default_config_file_name)
148 classes = [ProfileDir, Session, HubFactory, TaskScheduler, HeartMonitor, SQLiteDB] + maybe_mongo
151 classes = [ProfileDir, Session, HubFactory, TaskScheduler, HeartMonitor, SQLiteDB] + maybe_mongo
149
152
150 # change default to True
153 # change default to True
151 auto_create = Bool(True, config=True,
154 auto_create = Bool(True, config=True,
152 help="""Whether to create profile dir if it doesn't exist.""")
155 help="""Whether to create profile dir if it doesn't exist.""")
153
156
154 reuse_files = Bool(False, config=True,
157 reuse_files = Bool(False, config=True,
155 help="""Whether to reuse existing json connection files.
158 help="""Whether to reuse existing json connection files.
156 If False, connection files will be removed on a clean exit.
159 If False, connection files will be removed on a clean exit.
157 """
160 """
158 )
161 )
162 restore_engines = Bool(False, config=True,
163 help="""Reload engine state from JSON file
164 """
165 )
159 ssh_server = Unicode(u'', config=True,
166 ssh_server = Unicode(u'', config=True,
160 help="""ssh url for clients to use when connecting to the Controller
167 help="""ssh url for clients to use when connecting to the Controller
161 processes. It should be of the form: [user@]server[:port]. The
168 processes. It should be of the form: [user@]server[:port]. The
162 Controller's listening addresses must be accessible from the ssh server""",
169 Controller's listening addresses must be accessible from the ssh server""",
163 )
170 )
164 engine_ssh_server = Unicode(u'', config=True,
171 engine_ssh_server = Unicode(u'', config=True,
165 help="""ssh url for engines to use when connecting to the Controller
172 help="""ssh url for engines to use when connecting to the Controller
166 processes. It should be of the form: [user@]server[:port]. The
173 processes. It should be of the form: [user@]server[:port]. The
167 Controller's listening addresses must be accessible from the ssh server""",
174 Controller's listening addresses must be accessible from the ssh server""",
168 )
175 )
169 location = Unicode(u'', config=True,
176 location = Unicode(u'', config=True,
170 help="""The external IP or domain name of the Controller, used for disambiguating
177 help="""The external IP or domain name of the Controller, used for disambiguating
171 engine and client connections.""",
178 engine and client connections.""",
172 )
179 )
173 import_statements = List([], config=True,
180 import_statements = List([], config=True,
174 help="import statements to be run at startup. Necessary in some environments"
181 help="import statements to be run at startup. Necessary in some environments"
175 )
182 )
176
183
177 use_threads = Bool(False, config=True,
184 use_threads = Bool(False, config=True,
178 help='Use threads instead of processes for the schedulers',
185 help='Use threads instead of processes for the schedulers',
179 )
186 )
180
187
181 engine_json_file = Unicode('ipcontroller-engine.json', config=True,
188 engine_json_file = Unicode('ipcontroller-engine.json', config=True,
182 help="JSON filename where engine connection info will be stored.")
189 help="JSON filename where engine connection info will be stored.")
183 client_json_file = Unicode('ipcontroller-client.json', config=True,
190 client_json_file = Unicode('ipcontroller-client.json', config=True,
184 help="JSON filename where client connection info will be stored.")
191 help="JSON filename where client connection info will be stored.")
185
192
186 def _cluster_id_changed(self, name, old, new):
193 def _cluster_id_changed(self, name, old, new):
187 super(IPControllerApp, self)._cluster_id_changed(name, old, new)
194 super(IPControllerApp, self)._cluster_id_changed(name, old, new)
188 self.engine_json_file = "%s-engine.json" % self.name
195 self.engine_json_file = "%s-engine.json" % self.name
189 self.client_json_file = "%s-client.json" % self.name
196 self.client_json_file = "%s-client.json" % self.name
190
197
191
198
192 # internal
199 # internal
193 children = List()
200 children = List()
194 mq_class = Unicode('zmq.devices.ProcessMonitoredQueue')
201 mq_class = Unicode('zmq.devices.ProcessMonitoredQueue')
195
202
196 def _use_threads_changed(self, name, old, new):
203 def _use_threads_changed(self, name, old, new):
197 self.mq_class = 'zmq.devices.%sMonitoredQueue'%('Thread' if new else 'Process')
204 self.mq_class = 'zmq.devices.%sMonitoredQueue'%('Thread' if new else 'Process')
198
205
199 write_connection_files = Bool(True,
206 write_connection_files = Bool(True,
200 help="""Whether to write connection files to disk.
207 help="""Whether to write connection files to disk.
201 True in all cases other than runs with `reuse_files=True` *after the first*
208 True in all cases other than runs with `reuse_files=True` *after the first*
202 """
209 """
203 )
210 )
204
211
205 aliases = Dict(aliases)
212 aliases = Dict(aliases)
206 flags = Dict(flags)
213 flags = Dict(flags)
207
214
208
215
209 def save_connection_dict(self, fname, cdict):
216 def save_connection_dict(self, fname, cdict):
210 """save a connection dict to json file."""
217 """save a connection dict to json file."""
211 c = self.config
218 c = self.config
212 url = cdict['url']
219 url = cdict['registration']
213 location = cdict['location']
220 location = cdict['location']
221
214 if not location:
222 if not location:
215 try:
223 try:
216 proto,ip,port = split_url(url)
217 except AssertionError:
218 pass
219 else:
220 try:
221 location = socket.gethostbyname_ex(socket.gethostname())[2][-1]
224 location = socket.gethostbyname_ex(socket.gethostname())[2][-1]
222 except (socket.gaierror, IndexError):
225 except (socket.gaierror, IndexError):
223 self.log.warn("Could not identify this machine's IP, assuming 127.0.0.1."
226 self.log.warn("Could not identify this machine's IP, assuming 127.0.0.1."
224 " You may need to specify '--location=<external_ip_address>' to help"
227 " You may need to specify '--location=<external_ip_address>' to help"
225 " IPython decide when to connect via loopback.")
228 " IPython decide when to connect via loopback.")
226 location = '127.0.0.1'
229 location = '127.0.0.1'
227 cdict['location'] = location
230 cdict['location'] = location
228 fname = os.path.join(self.profile_dir.security_dir, fname)
231 fname = os.path.join(self.profile_dir.security_dir, fname)
229 self.log.info("writing connection info to %s", fname)
232 self.log.info("writing connection info to %s", fname)
230 with open(fname, 'w') as f:
233 with open(fname, 'w') as f:
231 f.write(json.dumps(cdict, indent=2))
234 f.write(json.dumps(cdict, indent=2))
232 os.chmod(fname, stat.S_IRUSR|stat.S_IWUSR)
235 os.chmod(fname, stat.S_IRUSR|stat.S_IWUSR)
233
236
234 def load_config_from_json(self):
237 def load_config_from_json(self):
235 """load config from existing json connector files."""
238 """load config from existing json connector files."""
236 c = self.config
239 c = self.config
237 self.log.debug("loading config from JSON")
240 self.log.debug("loading config from JSON")
238 # load from engine config
241
242 # load engine config
243
239 fname = os.path.join(self.profile_dir.security_dir, self.engine_json_file)
244 fname = os.path.join(self.profile_dir.security_dir, self.engine_json_file)
240 self.log.info("loading connection info from %s", fname)
245 self.log.info("loading connection info from %s", fname)
241 with open(fname) as f:
246 with open(fname) as f:
242 cfg = json.loads(f.read())
247 ecfg = json.loads(f.read())
243 key = cfg['exec_key']
248
244 # json gives unicode, Session.key wants bytes
249 # json gives unicode, Session.key wants bytes
245 c.Session.key = key.encode('ascii')
250 c.Session.key = ecfg['exec_key'].encode('ascii')
246 xport,addr = cfg['url'].split('://')
251
247 c.HubFactory.engine_transport = xport
252 xport,ip = ecfg['interface'].split('://')
248 ip,ports = addr.split(':')
253
249 c.HubFactory.engine_ip = ip
254 c.HubFactory.engine_ip = ip
250 c.HubFactory.regport = int(ports)
255 c.HubFactory.engine_transport = xport
251 self.location = cfg['location']
256
257 self.location = ecfg['location']
252 if not self.engine_ssh_server:
258 if not self.engine_ssh_server:
253 self.engine_ssh_server = cfg['ssh']
259 self.engine_ssh_server = ecfg['ssh']
260
254 # load client config
261 # load client config
262
255 fname = os.path.join(self.profile_dir.security_dir, self.client_json_file)
263 fname = os.path.join(self.profile_dir.security_dir, self.client_json_file)
256 self.log.info("loading connection info from %s", fname)
264 self.log.info("loading connection info from %s", fname)
257 with open(fname) as f:
265 with open(fname) as f:
258 cfg = json.loads(f.read())
266 ccfg = json.loads(f.read())
259 assert key == cfg['exec_key'], "exec_key mismatch between engine and client keys"
267
260 xport,addr = cfg['url'].split('://')
268 for key in ('exec_key', 'registration', 'pack', 'unpack'):
269 assert ccfg[key] == ecfg[key], "mismatch between engine and client info: %r" % key
270
271 xport,addr = ccfg['interface'].split('://')
272
261 c.HubFactory.client_transport = xport
273 c.HubFactory.client_transport = xport
262 ip,ports = addr.split(':')
263 c.HubFactory.client_ip = ip
274 c.HubFactory.client_ip = ip
264 if not self.ssh_server:
275 if not self.ssh_server:
265 self.ssh_server = cfg['ssh']
276 self.ssh_server = ccfg['ssh']
266 assert int(ports) == c.HubFactory.regport, "regport mismatch"
277
278 # load port config:
279 c.HubFactory.regport = ecfg['registration']
280 c.HubFactory.hb = (ecfg['hb_ping'], ecfg['hb_pong'])
281 c.HubFactory.control = (ccfg['control'], ecfg['control'])
282 c.HubFactory.mux = (ccfg['mux'], ecfg['mux'])
283 c.HubFactory.task = (ccfg['task'], ecfg['task'])
284 c.HubFactory.iopub = (ccfg['iopub'], ecfg['iopub'])
285 c.HubFactory.notifier_port = ccfg['notification']
267
286
268 def cleanup_connection_files(self):
287 def cleanup_connection_files(self):
269 if self.reuse_files:
288 if self.reuse_files:
270 self.log.debug("leaving JSON connection files for reuse")
289 self.log.debug("leaving JSON connection files for reuse")
271 return
290 return
272 self.log.debug("cleaning up JSON connection files")
291 self.log.debug("cleaning up JSON connection files")
273 for f in (self.client_json_file, self.engine_json_file):
292 for f in (self.client_json_file, self.engine_json_file):
274 f = os.path.join(self.profile_dir.security_dir, f)
293 f = os.path.join(self.profile_dir.security_dir, f)
275 try:
294 try:
276 os.remove(f)
295 os.remove(f)
277 except Exception as e:
296 except Exception as e:
278 self.log.error("Failed to cleanup connection file: %s", e)
297 self.log.error("Failed to cleanup connection file: %s", e)
279 else:
298 else:
280 self.log.debug(u"removed %s", f)
299 self.log.debug(u"removed %s", f)
281
300
282 def load_secondary_config(self):
301 def load_secondary_config(self):
283 """secondary config, loading from JSON and setting defaults"""
302 """secondary config, loading from JSON and setting defaults"""
284 if self.reuse_files:
303 if self.reuse_files:
285 try:
304 try:
286 self.load_config_from_json()
305 self.load_config_from_json()
287 except (AssertionError,IOError) as e:
306 except (AssertionError,IOError) as e:
288 self.log.error("Could not load config from JSON: %s" % e)
307 self.log.error("Could not load config from JSON: %s" % e)
289 else:
308 else:
290 # successfully loaded config from JSON, and reuse=True
309 # successfully loaded config from JSON, and reuse=True
291 # no need to wite back the same file
310 # no need to wite back the same file
292 self.write_connection_files = False
311 self.write_connection_files = False
293
312
294 # switch Session.key default to secure
313 # switch Session.key default to secure
295 default_secure(self.config)
314 default_secure(self.config)
296 self.log.debug("Config changed")
315 self.log.debug("Config changed")
297 self.log.debug(repr(self.config))
316 self.log.debug(repr(self.config))
298
317
299 def init_hub(self):
318 def init_hub(self):
300 c = self.config
319 c = self.config
301
320
302 self.do_import_statements()
321 self.do_import_statements()
303
322
304 try:
323 try:
305 self.factory = HubFactory(config=c, log=self.log)
324 self.factory = HubFactory(config=c, log=self.log)
306 # self.start_logging()
325 # self.start_logging()
307 self.factory.init_hub()
326 self.factory.init_hub()
308 except TraitError:
327 except TraitError:
309 raise
328 raise
310 except Exception:
329 except Exception:
311 self.log.error("Couldn't construct the Controller", exc_info=True)
330 self.log.error("Couldn't construct the Controller", exc_info=True)
312 self.exit(1)
331 self.exit(1)
313
332
314 if self.write_connection_files:
333 if self.write_connection_files:
315 # save to new json config files
334 # save to new json config files
316 f = self.factory
335 f = self.factory
317 cdict = {'exec_key' : f.session.key.decode('ascii'),
336 base = {
318 'ssh' : self.ssh_server,
337 'exec_key' : f.session.key.decode('ascii'),
319 'url' : "%s://%s:%s"%(f.client_transport, f.client_ip, f.regport),
338 'location' : self.location,
320 'location' : self.location
339 'pack' : f.session.packer,
340 'unpack' : f.session.unpacker,
321 }
341 }
342
343 cdict = {'ssh' : self.ssh_server}
344 cdict.update(f.client_info)
345 cdict.update(base)
322 self.save_connection_dict(self.client_json_file, cdict)
346 self.save_connection_dict(self.client_json_file, cdict)
323 edict = cdict
347
324 edict['url']="%s://%s:%s"%((f.client_transport, f.client_ip, f.regport))
348 edict = {'ssh' : self.engine_ssh_server}
325 edict['ssh'] = self.engine_ssh_server
349 edict.update(f.engine_info)
350 edict.update(base)
326 self.save_connection_dict(self.engine_json_file, edict)
351 self.save_connection_dict(self.engine_json_file, edict)
327
352
353 fname = "engines%s.json" % self.cluster_id
354 self.factory.hub.engine_state_file = os.path.join(self.profile_dir.log_dir, fname)
355 if self.restore_engines:
356 self.factory.hub._load_engine_state()
357
328 def init_schedulers(self):
358 def init_schedulers(self):
329 children = self.children
359 children = self.children
330 mq = import_item(str(self.mq_class))
360 mq = import_item(str(self.mq_class))
331
361
332 hub = self.factory
362 f = self.factory
363 ident = f.session.bsession
333 # disambiguate url, in case of *
364 # disambiguate url, in case of *
334 monitor_url = disambiguate_url(hub.monitor_url)
365 monitor_url = disambiguate_url(f.monitor_url)
335 # maybe_inproc = 'inproc://monitor' if self.use_threads else monitor_url
366 # maybe_inproc = 'inproc://monitor' if self.use_threads else monitor_url
336 # IOPub relay (in a Process)
367 # IOPub relay (in a Process)
337 q = mq(zmq.PUB, zmq.SUB, zmq.PUB, b'N/A',b'iopub')
368 q = mq(zmq.PUB, zmq.SUB, zmq.PUB, b'N/A',b'iopub')
338 q.bind_in(hub.client_info['iopub'])
369 q.bind_in(f.client_url('iopub'))
339 q.bind_out(hub.engine_info['iopub'])
370 q.setsockopt_in(zmq.IDENTITY, ident + b"_iopub")
371 q.bind_out(f.engine_url('iopub'))
340 q.setsockopt_out(zmq.SUBSCRIBE, b'')
372 q.setsockopt_out(zmq.SUBSCRIBE, b'')
341 q.connect_mon(monitor_url)
373 q.connect_mon(monitor_url)
342 q.daemon=True
374 q.daemon=True
343 children.append(q)
375 children.append(q)
344
376
345 # Multiplexer Queue (in a Process)
377 # Multiplexer Queue (in a Process)
346 q = mq(zmq.ROUTER, zmq.ROUTER, zmq.PUB, b'in', b'out')
378 q = mq(zmq.ROUTER, zmq.ROUTER, zmq.PUB, b'in', b'out')
347 q.bind_in(hub.client_info['mux'])
379 q.bind_in(f.client_url('mux'))
348 q.setsockopt_in(zmq.IDENTITY, b'mux')
380 q.setsockopt_in(zmq.IDENTITY, b'mux_in')
349 q.bind_out(hub.engine_info['mux'])
381 q.bind_out(f.engine_url('mux'))
382 q.setsockopt_out(zmq.IDENTITY, b'mux_out')
350 q.connect_mon(monitor_url)
383 q.connect_mon(monitor_url)
351 q.daemon=True
384 q.daemon=True
352 children.append(q)
385 children.append(q)
353
386
354 # Control Queue (in a Process)
387 # Control Queue (in a Process)
355 q = mq(zmq.ROUTER, zmq.ROUTER, zmq.PUB, b'incontrol', b'outcontrol')
388 q = mq(zmq.ROUTER, zmq.ROUTER, zmq.PUB, b'incontrol', b'outcontrol')
356 q.bind_in(hub.client_info['control'])
389 q.bind_in(f.client_url('control'))
357 q.setsockopt_in(zmq.IDENTITY, b'control')
390 q.setsockopt_in(zmq.IDENTITY, b'control_in')
358 q.bind_out(hub.engine_info['control'])
391 q.bind_out(f.engine_url('control'))
392 q.setsockopt_out(zmq.IDENTITY, b'control_out')
359 q.connect_mon(monitor_url)
393 q.connect_mon(monitor_url)
360 q.daemon=True
394 q.daemon=True
361 children.append(q)
395 children.append(q)
362 try:
396 try:
363 scheme = self.config.TaskScheduler.scheme_name
397 scheme = self.config.TaskScheduler.scheme_name
364 except AttributeError:
398 except AttributeError:
365 scheme = TaskScheduler.scheme_name.get_default_value()
399 scheme = TaskScheduler.scheme_name.get_default_value()
366 # Task Queue (in a Process)
400 # Task Queue (in a Process)
367 if scheme == 'pure':
401 if scheme == 'pure':
368 self.log.warn("task::using pure DEALER Task scheduler")
402 self.log.warn("task::using pure DEALER Task scheduler")
369 q = mq(zmq.ROUTER, zmq.DEALER, zmq.PUB, b'intask', b'outtask')
403 q = mq(zmq.ROUTER, zmq.DEALER, zmq.PUB, b'intask', b'outtask')
370 # q.setsockopt_out(zmq.HWM, hub.hwm)
404 # q.setsockopt_out(zmq.HWM, hub.hwm)
371 q.bind_in(hub.client_info['task'][1])
405 q.bind_in(f.client_url('task'))
372 q.setsockopt_in(zmq.IDENTITY, b'task')
406 q.setsockopt_in(zmq.IDENTITY, b'task_in')
373 q.bind_out(hub.engine_info['task'])
407 q.bind_out(f.engine_url('task'))
408 q.setsockopt_out(zmq.IDENTITY, b'task_out')
374 q.connect_mon(monitor_url)
409 q.connect_mon(monitor_url)
375 q.daemon=True
410 q.daemon=True
376 children.append(q)
411 children.append(q)
377 elif scheme == 'none':
412 elif scheme == 'none':
378 self.log.warn("task::using no Task scheduler")
413 self.log.warn("task::using no Task scheduler")
379
414
380 else:
415 else:
381 self.log.info("task::using Python %s Task scheduler"%scheme)
416 self.log.info("task::using Python %s Task scheduler"%scheme)
382 sargs = (hub.client_info['task'][1], hub.engine_info['task'],
417 sargs = (f.client_url('task'), f.engine_url('task'),
383 monitor_url, disambiguate_url(hub.client_info['notification']))
418 monitor_url, disambiguate_url(f.client_url('notification')),
419 disambiguate_url(f.client_url('registration')),
420 )
384 kwargs = dict(logname='scheduler', loglevel=self.log_level,
421 kwargs = dict(logname='scheduler', loglevel=self.log_level,
385 log_url = self.log_url, config=dict(self.config))
422 log_url = self.log_url, config=dict(self.config))
386 if 'Process' in self.mq_class:
423 if 'Process' in self.mq_class:
387 # run the Python scheduler in a Process
424 # run the Python scheduler in a Process
388 q = Process(target=launch_scheduler, args=sargs, kwargs=kwargs)
425 q = Process(target=launch_scheduler, args=sargs, kwargs=kwargs)
389 q.daemon=True
426 q.daemon=True
390 children.append(q)
427 children.append(q)
391 else:
428 else:
392 # single-threaded Controller
429 # single-threaded Controller
393 kwargs['in_thread'] = True
430 kwargs['in_thread'] = True
394 launch_scheduler(*sargs, **kwargs)
431 launch_scheduler(*sargs, **kwargs)
395
432
396 def terminate_children(self):
433 def terminate_children(self):
397 child_procs = []
434 child_procs = []
398 for child in self.children:
435 for child in self.children:
399 if isinstance(child, ProcessMonitoredQueue):
436 if isinstance(child, ProcessMonitoredQueue):
400 child_procs.append(child.launcher)
437 child_procs.append(child.launcher)
401 elif isinstance(child, Process):
438 elif isinstance(child, Process):
402 child_procs.append(child)
439 child_procs.append(child)
403 if child_procs:
440 if child_procs:
404 self.log.critical("terminating children...")
441 self.log.critical("terminating children...")
405 for child in child_procs:
442 for child in child_procs:
406 try:
443 try:
407 child.terminate()
444 child.terminate()
408 except OSError:
445 except OSError:
409 # already dead
446 # already dead
410 pass
447 pass
411
448
412 def handle_signal(self, sig, frame):
449 def handle_signal(self, sig, frame):
413 self.log.critical("Received signal %i, shutting down", sig)
450 self.log.critical("Received signal %i, shutting down", sig)
414 self.terminate_children()
451 self.terminate_children()
415 self.loop.stop()
452 self.loop.stop()
416
453
417 def init_signal(self):
454 def init_signal(self):
418 for sig in (SIGINT, SIGABRT, SIGTERM):
455 for sig in (SIGINT, SIGABRT, SIGTERM):
419 signal(sig, self.handle_signal)
456 signal(sig, self.handle_signal)
420
457
421 def do_import_statements(self):
458 def do_import_statements(self):
422 statements = self.import_statements
459 statements = self.import_statements
423 for s in statements:
460 for s in statements:
424 try:
461 try:
425 self.log.msg("Executing statement: '%s'" % s)
462 self.log.msg("Executing statement: '%s'" % s)
426 exec s in globals(), locals()
463 exec s in globals(), locals()
427 except:
464 except:
428 self.log.msg("Error running statement: %s" % s)
465 self.log.msg("Error running statement: %s" % s)
429
466
430 def forward_logging(self):
467 def forward_logging(self):
431 if self.log_url:
468 if self.log_url:
432 self.log.info("Forwarding logging to %s"%self.log_url)
469 self.log.info("Forwarding logging to %s"%self.log_url)
433 context = zmq.Context.instance()
470 context = zmq.Context.instance()
434 lsock = context.socket(zmq.PUB)
471 lsock = context.socket(zmq.PUB)
435 lsock.connect(self.log_url)
472 lsock.connect(self.log_url)
436 handler = PUBHandler(lsock)
473 handler = PUBHandler(lsock)
437 handler.root_topic = 'controller'
474 handler.root_topic = 'controller'
438 handler.setLevel(self.log_level)
475 handler.setLevel(self.log_level)
439 self.log.addHandler(handler)
476 self.log.addHandler(handler)
440
477
441 @catch_config_error
478 @catch_config_error
442 def initialize(self, argv=None):
479 def initialize(self, argv=None):
443 super(IPControllerApp, self).initialize(argv)
480 super(IPControllerApp, self).initialize(argv)
444 self.forward_logging()
481 self.forward_logging()
445 self.load_secondary_config()
482 self.load_secondary_config()
446 self.init_hub()
483 self.init_hub()
447 self.init_schedulers()
484 self.init_schedulers()
448
485
449 def start(self):
486 def start(self):
450 # Start the subprocesses:
487 # Start the subprocesses:
451 self.factory.start()
488 self.factory.start()
452 # children must be started before signals are setup,
489 # children must be started before signals are setup,
453 # otherwise signal-handling will fire multiple times
490 # otherwise signal-handling will fire multiple times
454 for child in self.children:
491 for child in self.children:
455 child.start()
492 child.start()
456 self.init_signal()
493 self.init_signal()
457
494
458 self.write_pid_file(overwrite=True)
495 self.write_pid_file(overwrite=True)
459
496
460 try:
497 try:
461 self.factory.loop.start()
498 self.factory.loop.start()
462 except KeyboardInterrupt:
499 except KeyboardInterrupt:
463 self.log.critical("Interrupted, Exiting...\n")
500 self.log.critical("Interrupted, Exiting...\n")
464 finally:
501 finally:
465 self.cleanup_connection_files()
502 self.cleanup_connection_files()
466
503
467
504
468
505
469 def launch_new_instance():
506 def launch_new_instance():
470 """Create and run the IPython controller"""
507 """Create and run the IPython controller"""
471 if sys.platform == 'win32':
508 if sys.platform == 'win32':
472 # make sure we don't get called from a multiprocessing subprocess
509 # make sure we don't get called from a multiprocessing subprocess
473 # this can result in infinite Controllers being started on Windows
510 # this can result in infinite Controllers being started on Windows
474 # which doesn't have a proper fork, so multiprocessing is wonky
511 # which doesn't have a proper fork, so multiprocessing is wonky
475
512
476 # this only comes up when IPython has been installed using vanilla
513 # this only comes up when IPython has been installed using vanilla
477 # setuptools, and *not* distribute.
514 # setuptools, and *not* distribute.
478 import multiprocessing
515 import multiprocessing
479 p = multiprocessing.current_process()
516 p = multiprocessing.current_process()
480 # the main process has name 'MainProcess'
517 # the main process has name 'MainProcess'
481 # subprocesses will have names like 'Process-1'
518 # subprocesses will have names like 'Process-1'
482 if p.name != 'MainProcess':
519 if p.name != 'MainProcess':
483 # we are a subprocess, don't start another Controller!
520 # we are a subprocess, don't start another Controller!
484 return
521 return
485 app = IPControllerApp.instance()
522 app = IPControllerApp.instance()
486 app.initialize()
523 app.initialize()
487 app.start()
524 app.start()
488
525
489
526
490 if __name__ == '__main__':
527 if __name__ == '__main__':
491 launch_new_instance()
528 launch_new_instance()
@@ -1,377 +1,391
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 # encoding: utf-8
2 # encoding: utf-8
3 """
3 """
4 The IPython engine application
4 The IPython engine application
5
5
6 Authors:
6 Authors:
7
7
8 * Brian Granger
8 * Brian Granger
9 * MinRK
9 * MinRK
10
10
11 """
11 """
12
12
13 #-----------------------------------------------------------------------------
13 #-----------------------------------------------------------------------------
14 # Copyright (C) 2008-2011 The IPython Development Team
14 # Copyright (C) 2008-2011 The IPython Development Team
15 #
15 #
16 # Distributed under the terms of the BSD License. The full license is in
16 # Distributed under the terms of the BSD License. The full license is in
17 # the file COPYING, distributed as part of this software.
17 # the file COPYING, distributed as part of this software.
18 #-----------------------------------------------------------------------------
18 #-----------------------------------------------------------------------------
19
19
20 #-----------------------------------------------------------------------------
20 #-----------------------------------------------------------------------------
21 # Imports
21 # Imports
22 #-----------------------------------------------------------------------------
22 #-----------------------------------------------------------------------------
23
23
24 import json
24 import json
25 import os
25 import os
26 import sys
26 import sys
27 import time
27 import time
28
28
29 import zmq
29 import zmq
30 from zmq.eventloop import ioloop
30 from zmq.eventloop import ioloop
31
31
32 from IPython.core.profiledir import ProfileDir
32 from IPython.core.profiledir import ProfileDir
33 from IPython.parallel.apps.baseapp import (
33 from IPython.parallel.apps.baseapp import (
34 BaseParallelApplication,
34 BaseParallelApplication,
35 base_aliases,
35 base_aliases,
36 base_flags,
36 base_flags,
37 catch_config_error,
37 catch_config_error,
38 )
38 )
39 from IPython.zmq.log import EnginePUBHandler
39 from IPython.zmq.log import EnginePUBHandler
40 from IPython.zmq.ipkernel import Kernel, IPKernelApp
40 from IPython.zmq.ipkernel import Kernel, IPKernelApp
41 from IPython.zmq.session import (
41 from IPython.zmq.session import (
42 Session, session_aliases, session_flags
42 Session, session_aliases, session_flags
43 )
43 )
44
44
45 from IPython.config.configurable import Configurable
45 from IPython.config.configurable import Configurable
46
46
47 from IPython.parallel.engine.engine import EngineFactory
47 from IPython.parallel.engine.engine import EngineFactory
48 from IPython.parallel.util import disambiguate_url
48 from IPython.parallel.util import disambiguate_ip_address
49
49
50 from IPython.utils.importstring import import_item
50 from IPython.utils.importstring import import_item
51 from IPython.utils.py3compat import cast_bytes
51 from IPython.utils.py3compat import cast_bytes
52 from IPython.utils.traitlets import Bool, Unicode, Dict, List, Float, Instance
52 from IPython.utils.traitlets import Bool, Unicode, Dict, List, Float, Instance
53
53
54
54
55 #-----------------------------------------------------------------------------
55 #-----------------------------------------------------------------------------
56 # Module level variables
56 # Module level variables
57 #-----------------------------------------------------------------------------
57 #-----------------------------------------------------------------------------
58
58
59 #: The default config file name for this application
59 #: The default config file name for this application
60 default_config_file_name = u'ipengine_config.py'
60 default_config_file_name = u'ipengine_config.py'
61
61
62 _description = """Start an IPython engine for parallel computing.
62 _description = """Start an IPython engine for parallel computing.
63
63
64 IPython engines run in parallel and perform computations on behalf of a client
64 IPython engines run in parallel and perform computations on behalf of a client
65 and controller. A controller needs to be started before the engines. The
65 and controller. A controller needs to be started before the engines. The
66 engine can be configured using command line options or using a cluster
66 engine can be configured using command line options or using a cluster
67 directory. Cluster directories contain config, log and security files and are
67 directory. Cluster directories contain config, log and security files and are
68 usually located in your ipython directory and named as "profile_name".
68 usually located in your ipython directory and named as "profile_name".
69 See the `profile` and `profile-dir` options for details.
69 See the `profile` and `profile-dir` options for details.
70 """
70 """
71
71
72 _examples = """
72 _examples = """
73 ipengine --ip=192.168.0.1 --port=1000 # connect to hub at ip and port
73 ipengine --ip=192.168.0.1 --port=1000 # connect to hub at ip and port
74 ipengine --log-to-file --log-level=DEBUG # log to a file with DEBUG verbosity
74 ipengine --log-to-file --log-level=DEBUG # log to a file with DEBUG verbosity
75 """
75 """
76
76
77 #-----------------------------------------------------------------------------
77 #-----------------------------------------------------------------------------
78 # MPI configuration
78 # MPI configuration
79 #-----------------------------------------------------------------------------
79 #-----------------------------------------------------------------------------
80
80
81 mpi4py_init = """from mpi4py import MPI as mpi
81 mpi4py_init = """from mpi4py import MPI as mpi
82 mpi.size = mpi.COMM_WORLD.Get_size()
82 mpi.size = mpi.COMM_WORLD.Get_size()
83 mpi.rank = mpi.COMM_WORLD.Get_rank()
83 mpi.rank = mpi.COMM_WORLD.Get_rank()
84 """
84 """
85
85
86
86
87 pytrilinos_init = """from PyTrilinos import Epetra
87 pytrilinos_init = """from PyTrilinos import Epetra
88 class SimpleStruct:
88 class SimpleStruct:
89 pass
89 pass
90 mpi = SimpleStruct()
90 mpi = SimpleStruct()
91 mpi.rank = 0
91 mpi.rank = 0
92 mpi.size = 0
92 mpi.size = 0
93 """
93 """
94
94
95 class MPI(Configurable):
95 class MPI(Configurable):
96 """Configurable for MPI initialization"""
96 """Configurable for MPI initialization"""
97 use = Unicode('', config=True,
97 use = Unicode('', config=True,
98 help='How to enable MPI (mpi4py, pytrilinos, or empty string to disable).'
98 help='How to enable MPI (mpi4py, pytrilinos, or empty string to disable).'
99 )
99 )
100
100
101 def _use_changed(self, name, old, new):
101 def _use_changed(self, name, old, new):
102 # load default init script if it's not set
102 # load default init script if it's not set
103 if not self.init_script:
103 if not self.init_script:
104 self.init_script = self.default_inits.get(new, '')
104 self.init_script = self.default_inits.get(new, '')
105
105
106 init_script = Unicode('', config=True,
106 init_script = Unicode('', config=True,
107 help="Initialization code for MPI")
107 help="Initialization code for MPI")
108
108
109 default_inits = Dict({'mpi4py' : mpi4py_init, 'pytrilinos':pytrilinos_init},
109 default_inits = Dict({'mpi4py' : mpi4py_init, 'pytrilinos':pytrilinos_init},
110 config=True)
110 config=True)
111
111
112
112
113 #-----------------------------------------------------------------------------
113 #-----------------------------------------------------------------------------
114 # Main application
114 # Main application
115 #-----------------------------------------------------------------------------
115 #-----------------------------------------------------------------------------
116 aliases = dict(
116 aliases = dict(
117 file = 'IPEngineApp.url_file',
117 file = 'IPEngineApp.url_file',
118 c = 'IPEngineApp.startup_command',
118 c = 'IPEngineApp.startup_command',
119 s = 'IPEngineApp.startup_script',
119 s = 'IPEngineApp.startup_script',
120
120
121 url = 'EngineFactory.url',
121 url = 'EngineFactory.url',
122 ssh = 'EngineFactory.sshserver',
122 ssh = 'EngineFactory.sshserver',
123 sshkey = 'EngineFactory.sshkey',
123 sshkey = 'EngineFactory.sshkey',
124 ip = 'EngineFactory.ip',
124 ip = 'EngineFactory.ip',
125 transport = 'EngineFactory.transport',
125 transport = 'EngineFactory.transport',
126 port = 'EngineFactory.regport',
126 port = 'EngineFactory.regport',
127 location = 'EngineFactory.location',
127 location = 'EngineFactory.location',
128
128
129 timeout = 'EngineFactory.timeout',
129 timeout = 'EngineFactory.timeout',
130
130
131 mpi = 'MPI.use',
131 mpi = 'MPI.use',
132
132
133 )
133 )
134 aliases.update(base_aliases)
134 aliases.update(base_aliases)
135 aliases.update(session_aliases)
135 aliases.update(session_aliases)
136 flags = {}
136 flags = {}
137 flags.update(base_flags)
137 flags.update(base_flags)
138 flags.update(session_flags)
138 flags.update(session_flags)
139
139
140 class IPEngineApp(BaseParallelApplication):
140 class IPEngineApp(BaseParallelApplication):
141
141
142 name = 'ipengine'
142 name = 'ipengine'
143 description = _description
143 description = _description
144 examples = _examples
144 examples = _examples
145 config_file_name = Unicode(default_config_file_name)
145 config_file_name = Unicode(default_config_file_name)
146 classes = List([ProfileDir, Session, EngineFactory, Kernel, MPI])
146 classes = List([ProfileDir, Session, EngineFactory, Kernel, MPI])
147
147
148 startup_script = Unicode(u'', config=True,
148 startup_script = Unicode(u'', config=True,
149 help='specify a script to be run at startup')
149 help='specify a script to be run at startup')
150 startup_command = Unicode('', config=True,
150 startup_command = Unicode('', config=True,
151 help='specify a command to be run at startup')
151 help='specify a command to be run at startup')
152
152
153 url_file = Unicode(u'', config=True,
153 url_file = Unicode(u'', config=True,
154 help="""The full location of the file containing the connection information for
154 help="""The full location of the file containing the connection information for
155 the controller. If this is not given, the file must be in the
155 the controller. If this is not given, the file must be in the
156 security directory of the cluster directory. This location is
156 security directory of the cluster directory. This location is
157 resolved using the `profile` or `profile_dir` options.""",
157 resolved using the `profile` or `profile_dir` options.""",
158 )
158 )
159 wait_for_url_file = Float(5, config=True,
159 wait_for_url_file = Float(5, config=True,
160 help="""The maximum number of seconds to wait for url_file to exist.
160 help="""The maximum number of seconds to wait for url_file to exist.
161 This is useful for batch-systems and shared-filesystems where the
161 This is useful for batch-systems and shared-filesystems where the
162 controller and engine are started at the same time and it
162 controller and engine are started at the same time and it
163 may take a moment for the controller to write the connector files.""")
163 may take a moment for the controller to write the connector files.""")
164
164
165 url_file_name = Unicode(u'ipcontroller-engine.json', config=True)
165 url_file_name = Unicode(u'ipcontroller-engine.json', config=True)
166
166
167 def _cluster_id_changed(self, name, old, new):
167 def _cluster_id_changed(self, name, old, new):
168 if new:
168 if new:
169 base = 'ipcontroller-%s' % new
169 base = 'ipcontroller-%s' % new
170 else:
170 else:
171 base = 'ipcontroller'
171 base = 'ipcontroller'
172 self.url_file_name = "%s-engine.json" % base
172 self.url_file_name = "%s-engine.json" % base
173
173
174 log_url = Unicode('', config=True,
174 log_url = Unicode('', config=True,
175 help="""The URL for the iploggerapp instance, for forwarding
175 help="""The URL for the iploggerapp instance, for forwarding
176 logging to a central location.""")
176 logging to a central location.""")
177
177
178 # an IPKernelApp instance, used to setup listening for shell frontends
178 # an IPKernelApp instance, used to setup listening for shell frontends
179 kernel_app = Instance(IPKernelApp)
179 kernel_app = Instance(IPKernelApp)
180
180
181 aliases = Dict(aliases)
181 aliases = Dict(aliases)
182 flags = Dict(flags)
182 flags = Dict(flags)
183
183
184 @property
184 @property
185 def kernel(self):
185 def kernel(self):
186 """allow access to the Kernel object, so I look like IPKernelApp"""
186 """allow access to the Kernel object, so I look like IPKernelApp"""
187 return self.engine.kernel
187 return self.engine.kernel
188
188
189 def find_url_file(self):
189 def find_url_file(self):
190 """Set the url file.
190 """Set the url file.
191
191
192 Here we don't try to actually see if it exists for is valid as that
192 Here we don't try to actually see if it exists for is valid as that
193 is hadled by the connection logic.
193 is hadled by the connection logic.
194 """
194 """
195 config = self.config
195 config = self.config
196 # Find the actual controller key file
196 # Find the actual controller key file
197 if not self.url_file:
197 if not self.url_file:
198 self.url_file = os.path.join(
198 self.url_file = os.path.join(
199 self.profile_dir.security_dir,
199 self.profile_dir.security_dir,
200 self.url_file_name
200 self.url_file_name
201 )
201 )
202
202
203 def load_connector_file(self):
203 def load_connector_file(self):
204 """load config from a JSON connector file,
204 """load config from a JSON connector file,
205 at a *lower* priority than command-line/config files.
205 at a *lower* priority than command-line/config files.
206 """
206 """
207
207
208 self.log.info("Loading url_file %r", self.url_file)
208 self.log.info("Loading url_file %r", self.url_file)
209 config = self.config
209 config = self.config
210
210
211 with open(self.url_file) as f:
211 with open(self.url_file) as f:
212 d = json.loads(f.read())
212 d = json.loads(f.read())
213
213
214 if 'exec_key' in d:
214 # allow hand-override of location for disambiguation
215 config.Session.key = cast_bytes(d['exec_key'])
215 # and ssh-server
216
217 try:
216 try:
218 config.EngineFactory.location
217 config.EngineFactory.location
219 except AttributeError:
218 except AttributeError:
220 config.EngineFactory.location = d['location']
219 config.EngineFactory.location = d['location']
221
220
222 d['url'] = disambiguate_url(d['url'], config.EngineFactory.location)
223 try:
224 config.EngineFactory.url
225 except AttributeError:
226 config.EngineFactory.url = d['url']
227
228 try:
221 try:
229 config.EngineFactory.sshserver
222 config.EngineFactory.sshserver
230 except AttributeError:
223 except AttributeError:
231 config.EngineFactory.sshserver = d['ssh']
224 config.EngineFactory.sshserver = d.get('ssh')
225
226 location = config.EngineFactory.location
227
228 proto, ip = d['interface'].split('://')
229 ip = disambiguate_ip_address(ip)
230 d['interface'] = '%s://%s' % (proto, ip)
231
232 # DO NOT allow override of basic URLs, serialization, or exec_key
233 # JSON file takes top priority there
234 config.Session.key = cast_bytes(d['exec_key'])
235
236 config.EngineFactory.url = d['interface'] + ':%i' % d['registration']
237
238 config.Session.packer = d['pack']
239 config.Session.unpacker = d['unpack']
240
241 self.log.debug("Config changed:")
242 self.log.debug("%r", config)
243 self.connection_info = d
232
244
233 def bind_kernel(self, **kwargs):
245 def bind_kernel(self, **kwargs):
234 """Promote engine to listening kernel, accessible to frontends."""
246 """Promote engine to listening kernel, accessible to frontends."""
235 if self.kernel_app is not None:
247 if self.kernel_app is not None:
236 return
248 return
237
249
238 self.log.info("Opening ports for direct connections as an IPython kernel")
250 self.log.info("Opening ports for direct connections as an IPython kernel")
239
251
240 kernel = self.kernel
252 kernel = self.kernel
241
253
242 kwargs.setdefault('config', self.config)
254 kwargs.setdefault('config', self.config)
243 kwargs.setdefault('log', self.log)
255 kwargs.setdefault('log', self.log)
244 kwargs.setdefault('profile_dir', self.profile_dir)
256 kwargs.setdefault('profile_dir', self.profile_dir)
245 kwargs.setdefault('session', self.engine.session)
257 kwargs.setdefault('session', self.engine.session)
246
258
247 app = self.kernel_app = IPKernelApp(**kwargs)
259 app = self.kernel_app = IPKernelApp(**kwargs)
248
260
249 # allow IPKernelApp.instance():
261 # allow IPKernelApp.instance():
250 IPKernelApp._instance = app
262 IPKernelApp._instance = app
251
263
252 app.init_connection_file()
264 app.init_connection_file()
253 # relevant contents of init_sockets:
265 # relevant contents of init_sockets:
254
266
255 app.shell_port = app._bind_socket(kernel.shell_streams[0], app.shell_port)
267 app.shell_port = app._bind_socket(kernel.shell_streams[0], app.shell_port)
256 app.log.debug("shell ROUTER Channel on port: %i", app.shell_port)
268 app.log.debug("shell ROUTER Channel on port: %i", app.shell_port)
257
269
258 app.iopub_port = app._bind_socket(kernel.iopub_socket, app.iopub_port)
270 app.iopub_port = app._bind_socket(kernel.iopub_socket, app.iopub_port)
259 app.log.debug("iopub PUB Channel on port: %i", app.iopub_port)
271 app.log.debug("iopub PUB Channel on port: %i", app.iopub_port)
260
272
261 kernel.stdin_socket = self.engine.context.socket(zmq.ROUTER)
273 kernel.stdin_socket = self.engine.context.socket(zmq.ROUTER)
262 app.stdin_port = app._bind_socket(kernel.stdin_socket, app.stdin_port)
274 app.stdin_port = app._bind_socket(kernel.stdin_socket, app.stdin_port)
263 app.log.debug("stdin ROUTER Channel on port: %i", app.stdin_port)
275 app.log.debug("stdin ROUTER Channel on port: %i", app.stdin_port)
264
276
265 # start the heartbeat, and log connection info:
277 # start the heartbeat, and log connection info:
266
278
267 app.init_heartbeat()
279 app.init_heartbeat()
268
280
269 app.log_connection_info()
281 app.log_connection_info()
270 app.write_connection_file()
282 app.write_connection_file()
271
283
272
284
273 def init_engine(self):
285 def init_engine(self):
274 # This is the working dir by now.
286 # This is the working dir by now.
275 sys.path.insert(0, '')
287 sys.path.insert(0, '')
276 config = self.config
288 config = self.config
277 # print config
289 # print config
278 self.find_url_file()
290 self.find_url_file()
279
291
280 # was the url manually specified?
292 # was the url manually specified?
281 keys = set(self.config.EngineFactory.keys())
293 keys = set(self.config.EngineFactory.keys())
282 keys = keys.union(set(self.config.RegistrationFactory.keys()))
294 keys = keys.union(set(self.config.RegistrationFactory.keys()))
283
295
284 if keys.intersection(set(['ip', 'url', 'port'])):
296 if keys.intersection(set(['ip', 'url', 'port'])):
285 # Connection info was specified, don't wait for the file
297 # Connection info was specified, don't wait for the file
286 url_specified = True
298 url_specified = True
287 self.wait_for_url_file = 0
299 self.wait_for_url_file = 0
288 else:
300 else:
289 url_specified = False
301 url_specified = False
290
302
291 if self.wait_for_url_file and not os.path.exists(self.url_file):
303 if self.wait_for_url_file and not os.path.exists(self.url_file):
292 self.log.warn("url_file %r not found", self.url_file)
304 self.log.warn("url_file %r not found", self.url_file)
293 self.log.warn("Waiting up to %.1f seconds for it to arrive.", self.wait_for_url_file)
305 self.log.warn("Waiting up to %.1f seconds for it to arrive.", self.wait_for_url_file)
294 tic = time.time()
306 tic = time.time()
295 while not os.path.exists(self.url_file) and (time.time()-tic < self.wait_for_url_file):
307 while not os.path.exists(self.url_file) and (time.time()-tic < self.wait_for_url_file):
296 # wait for url_file to exist, or until time limit
308 # wait for url_file to exist, or until time limit
297 time.sleep(0.1)
309 time.sleep(0.1)
298
310
299 if os.path.exists(self.url_file):
311 if os.path.exists(self.url_file):
300 self.load_connector_file()
312 self.load_connector_file()
301 elif not url_specified:
313 elif not url_specified:
302 self.log.fatal("Fatal: url file never arrived: %s", self.url_file)
314 self.log.fatal("Fatal: url file never arrived: %s", self.url_file)
303 self.exit(1)
315 self.exit(1)
304
316
305
317
306 try:
318 try:
307 exec_lines = config.Kernel.exec_lines
319 exec_lines = config.Kernel.exec_lines
308 except AttributeError:
320 except AttributeError:
309 config.Kernel.exec_lines = []
321 config.Kernel.exec_lines = []
310 exec_lines = config.Kernel.exec_lines
322 exec_lines = config.Kernel.exec_lines
311
323
312 if self.startup_script:
324 if self.startup_script:
313 enc = sys.getfilesystemencoding() or 'utf8'
325 enc = sys.getfilesystemencoding() or 'utf8'
314 cmd="execfile(%r)" % self.startup_script.encode(enc)
326 cmd="execfile(%r)" % self.startup_script.encode(enc)
315 exec_lines.append(cmd)
327 exec_lines.append(cmd)
316 if self.startup_command:
328 if self.startup_command:
317 exec_lines.append(self.startup_command)
329 exec_lines.append(self.startup_command)
318
330
319 # Create the underlying shell class and Engine
331 # Create the underlying shell class and Engine
320 # shell_class = import_item(self.master_config.Global.shell_class)
332 # shell_class = import_item(self.master_config.Global.shell_class)
321 # print self.config
333 # print self.config
322 try:
334 try:
323 self.engine = EngineFactory(config=config, log=self.log)
335 self.engine = EngineFactory(config=config, log=self.log,
336 connection_info=self.connection_info,
337 )
324 except:
338 except:
325 self.log.error("Couldn't start the Engine", exc_info=True)
339 self.log.error("Couldn't start the Engine", exc_info=True)
326 self.exit(1)
340 self.exit(1)
327
341
328 def forward_logging(self):
342 def forward_logging(self):
329 if self.log_url:
343 if self.log_url:
330 self.log.info("Forwarding logging to %s", self.log_url)
344 self.log.info("Forwarding logging to %s", self.log_url)
331 context = self.engine.context
345 context = self.engine.context
332 lsock = context.socket(zmq.PUB)
346 lsock = context.socket(zmq.PUB)
333 lsock.connect(self.log_url)
347 lsock.connect(self.log_url)
334 handler = EnginePUBHandler(self.engine, lsock)
348 handler = EnginePUBHandler(self.engine, lsock)
335 handler.setLevel(self.log_level)
349 handler.setLevel(self.log_level)
336 self.log.addHandler(handler)
350 self.log.addHandler(handler)
337
351
338 def init_mpi(self):
352 def init_mpi(self):
339 global mpi
353 global mpi
340 self.mpi = MPI(config=self.config)
354 self.mpi = MPI(config=self.config)
341
355
342 mpi_import_statement = self.mpi.init_script
356 mpi_import_statement = self.mpi.init_script
343 if mpi_import_statement:
357 if mpi_import_statement:
344 try:
358 try:
345 self.log.info("Initializing MPI:")
359 self.log.info("Initializing MPI:")
346 self.log.info(mpi_import_statement)
360 self.log.info(mpi_import_statement)
347 exec mpi_import_statement in globals()
361 exec mpi_import_statement in globals()
348 except:
362 except:
349 mpi = None
363 mpi = None
350 else:
364 else:
351 mpi = None
365 mpi = None
352
366
353 @catch_config_error
367 @catch_config_error
354 def initialize(self, argv=None):
368 def initialize(self, argv=None):
355 super(IPEngineApp, self).initialize(argv)
369 super(IPEngineApp, self).initialize(argv)
356 self.init_mpi()
370 self.init_mpi()
357 self.init_engine()
371 self.init_engine()
358 self.forward_logging()
372 self.forward_logging()
359
373
360 def start(self):
374 def start(self):
361 self.engine.start()
375 self.engine.start()
362 try:
376 try:
363 self.engine.loop.start()
377 self.engine.loop.start()
364 except KeyboardInterrupt:
378 except KeyboardInterrupt:
365 self.log.critical("Engine Interrupted, shutting down...\n")
379 self.log.critical("Engine Interrupted, shutting down...\n")
366
380
367
381
368 def launch_new_instance():
382 def launch_new_instance():
369 """Create and run the IPython engine"""
383 """Create and run the IPython engine"""
370 app = IPEngineApp.instance()
384 app = IPEngineApp.instance()
371 app.initialize()
385 app.initialize()
372 app.start()
386 app.start()
373
387
374
388
375 if __name__ == '__main__':
389 if __name__ == '__main__':
376 launch_new_instance()
390 launch_new_instance()
377
391
@@ -1,1721 +1,1700
1 """A semi-synchronous Client for the ZMQ cluster
1 """A semi-synchronous Client for the ZMQ cluster
2
2
3 Authors:
3 Authors:
4
4
5 * MinRK
5 * MinRK
6 """
6 """
7 #-----------------------------------------------------------------------------
7 #-----------------------------------------------------------------------------
8 # Copyright (C) 2010-2011 The IPython Development Team
8 # Copyright (C) 2010-2011 The IPython Development Team
9 #
9 #
10 # Distributed under the terms of the BSD License. The full license is in
10 # Distributed under the terms of the BSD License. The full license is in
11 # the file COPYING, distributed as part of this software.
11 # the file COPYING, distributed as part of this software.
12 #-----------------------------------------------------------------------------
12 #-----------------------------------------------------------------------------
13
13
14 #-----------------------------------------------------------------------------
14 #-----------------------------------------------------------------------------
15 # Imports
15 # Imports
16 #-----------------------------------------------------------------------------
16 #-----------------------------------------------------------------------------
17
17
18 import os
18 import os
19 import json
19 import json
20 import sys
20 import sys
21 from threading import Thread, Event
21 from threading import Thread, Event
22 import time
22 import time
23 import warnings
23 import warnings
24 from datetime import datetime
24 from datetime import datetime
25 from getpass import getpass
25 from getpass import getpass
26 from pprint import pprint
26 from pprint import pprint
27
27
28 pjoin = os.path.join
28 pjoin = os.path.join
29
29
30 import zmq
30 import zmq
31 # from zmq.eventloop import ioloop, zmqstream
31 # from zmq.eventloop import ioloop, zmqstream
32
32
33 from IPython.config.configurable import MultipleInstanceError
33 from IPython.config.configurable import MultipleInstanceError
34 from IPython.core.application import BaseIPythonApplication
34 from IPython.core.application import BaseIPythonApplication
35 from IPython.core.profiledir import ProfileDir, ProfileDirError
35 from IPython.core.profiledir import ProfileDir, ProfileDirError
36
36
37 from IPython.utils.coloransi import TermColors
37 from IPython.utils.coloransi import TermColors
38 from IPython.utils.jsonutil import rekey
38 from IPython.utils.jsonutil import rekey
39 from IPython.utils.localinterfaces import LOCAL_IPS
39 from IPython.utils.localinterfaces import LOCAL_IPS
40 from IPython.utils.path import get_ipython_dir
40 from IPython.utils.path import get_ipython_dir
41 from IPython.utils.py3compat import cast_bytes
41 from IPython.utils.py3compat import cast_bytes
42 from IPython.utils.traitlets import (HasTraits, Integer, Instance, Unicode,
42 from IPython.utils.traitlets import (HasTraits, Integer, Instance, Unicode,
43 Dict, List, Bool, Set, Any)
43 Dict, List, Bool, Set, Any)
44 from IPython.external.decorator import decorator
44 from IPython.external.decorator import decorator
45 from IPython.external.ssh import tunnel
45 from IPython.external.ssh import tunnel
46
46
47 from IPython.parallel import Reference
47 from IPython.parallel import Reference
48 from IPython.parallel import error
48 from IPython.parallel import error
49 from IPython.parallel import util
49 from IPython.parallel import util
50
50
51 from IPython.zmq.session import Session, Message
51 from IPython.zmq.session import Session, Message
52
52
53 from .asyncresult import AsyncResult, AsyncHubResult
53 from .asyncresult import AsyncResult, AsyncHubResult
54 from .view import DirectView, LoadBalancedView
54 from .view import DirectView, LoadBalancedView
55
55
56 if sys.version_info[0] >= 3:
56 if sys.version_info[0] >= 3:
57 # xrange is used in a couple 'isinstance' tests in py2
57 # xrange is used in a couple 'isinstance' tests in py2
58 # should be just 'range' in 3k
58 # should be just 'range' in 3k
59 xrange = range
59 xrange = range
60
60
61 #--------------------------------------------------------------------------
61 #--------------------------------------------------------------------------
62 # Decorators for Client methods
62 # Decorators for Client methods
63 #--------------------------------------------------------------------------
63 #--------------------------------------------------------------------------
64
64
65 @decorator
65 @decorator
66 def spin_first(f, self, *args, **kwargs):
66 def spin_first(f, self, *args, **kwargs):
67 """Call spin() to sync state prior to calling the method."""
67 """Call spin() to sync state prior to calling the method."""
68 self.spin()
68 self.spin()
69 return f(self, *args, **kwargs)
69 return f(self, *args, **kwargs)
70
70
71
71
72 #--------------------------------------------------------------------------
72 #--------------------------------------------------------------------------
73 # Classes
73 # Classes
74 #--------------------------------------------------------------------------
74 #--------------------------------------------------------------------------
75
75
76
76
77 class ExecuteReply(object):
77 class ExecuteReply(object):
78 """wrapper for finished Execute results"""
78 """wrapper for finished Execute results"""
79 def __init__(self, msg_id, content, metadata):
79 def __init__(self, msg_id, content, metadata):
80 self.msg_id = msg_id
80 self.msg_id = msg_id
81 self._content = content
81 self._content = content
82 self.execution_count = content['execution_count']
82 self.execution_count = content['execution_count']
83 self.metadata = metadata
83 self.metadata = metadata
84
84
85 def __getitem__(self, key):
85 def __getitem__(self, key):
86 return self.metadata[key]
86 return self.metadata[key]
87
87
88 def __getattr__(self, key):
88 def __getattr__(self, key):
89 if key not in self.metadata:
89 if key not in self.metadata:
90 raise AttributeError(key)
90 raise AttributeError(key)
91 return self.metadata[key]
91 return self.metadata[key]
92
92
93 def __repr__(self):
93 def __repr__(self):
94 pyout = self.metadata['pyout'] or {'data':{}}
94 pyout = self.metadata['pyout'] or {'data':{}}
95 text_out = pyout['data'].get('text/plain', '')
95 text_out = pyout['data'].get('text/plain', '')
96 if len(text_out) > 32:
96 if len(text_out) > 32:
97 text_out = text_out[:29] + '...'
97 text_out = text_out[:29] + '...'
98
98
99 return "<ExecuteReply[%i]: %s>" % (self.execution_count, text_out)
99 return "<ExecuteReply[%i]: %s>" % (self.execution_count, text_out)
100
100
101 def _repr_pretty_(self, p, cycle):
101 def _repr_pretty_(self, p, cycle):
102 pyout = self.metadata['pyout'] or {'data':{}}
102 pyout = self.metadata['pyout'] or {'data':{}}
103 text_out = pyout['data'].get('text/plain', '')
103 text_out = pyout['data'].get('text/plain', '')
104
104
105 if not text_out:
105 if not text_out:
106 return
106 return
107
107
108 try:
108 try:
109 ip = get_ipython()
109 ip = get_ipython()
110 except NameError:
110 except NameError:
111 colors = "NoColor"
111 colors = "NoColor"
112 else:
112 else:
113 colors = ip.colors
113 colors = ip.colors
114
114
115 if colors == "NoColor":
115 if colors == "NoColor":
116 out = normal = ""
116 out = normal = ""
117 else:
117 else:
118 out = TermColors.Red
118 out = TermColors.Red
119 normal = TermColors.Normal
119 normal = TermColors.Normal
120
120
121 if '\n' in text_out and not text_out.startswith('\n'):
121 if '\n' in text_out and not text_out.startswith('\n'):
122 # add newline for multiline reprs
122 # add newline for multiline reprs
123 text_out = '\n' + text_out
123 text_out = '\n' + text_out
124
124
125 p.text(
125 p.text(
126 out + u'Out[%i:%i]: ' % (
126 out + u'Out[%i:%i]: ' % (
127 self.metadata['engine_id'], self.execution_count
127 self.metadata['engine_id'], self.execution_count
128 ) + normal + text_out
128 ) + normal + text_out
129 )
129 )
130
130
131 def _repr_html_(self):
131 def _repr_html_(self):
132 pyout = self.metadata['pyout'] or {'data':{}}
132 pyout = self.metadata['pyout'] or {'data':{}}
133 return pyout['data'].get("text/html")
133 return pyout['data'].get("text/html")
134
134
135 def _repr_latex_(self):
135 def _repr_latex_(self):
136 pyout = self.metadata['pyout'] or {'data':{}}
136 pyout = self.metadata['pyout'] or {'data':{}}
137 return pyout['data'].get("text/latex")
137 return pyout['data'].get("text/latex")
138
138
139 def _repr_json_(self):
139 def _repr_json_(self):
140 pyout = self.metadata['pyout'] or {'data':{}}
140 pyout = self.metadata['pyout'] or {'data':{}}
141 return pyout['data'].get("application/json")
141 return pyout['data'].get("application/json")
142
142
143 def _repr_javascript_(self):
143 def _repr_javascript_(self):
144 pyout = self.metadata['pyout'] or {'data':{}}
144 pyout = self.metadata['pyout'] or {'data':{}}
145 return pyout['data'].get("application/javascript")
145 return pyout['data'].get("application/javascript")
146
146
147 def _repr_png_(self):
147 def _repr_png_(self):
148 pyout = self.metadata['pyout'] or {'data':{}}
148 pyout = self.metadata['pyout'] or {'data':{}}
149 return pyout['data'].get("image/png")
149 return pyout['data'].get("image/png")
150
150
151 def _repr_jpeg_(self):
151 def _repr_jpeg_(self):
152 pyout = self.metadata['pyout'] or {'data':{}}
152 pyout = self.metadata['pyout'] or {'data':{}}
153 return pyout['data'].get("image/jpeg")
153 return pyout['data'].get("image/jpeg")
154
154
155 def _repr_svg_(self):
155 def _repr_svg_(self):
156 pyout = self.metadata['pyout'] or {'data':{}}
156 pyout = self.metadata['pyout'] or {'data':{}}
157 return pyout['data'].get("image/svg+xml")
157 return pyout['data'].get("image/svg+xml")
158
158
159
159
160 class Metadata(dict):
160 class Metadata(dict):
161 """Subclass of dict for initializing metadata values.
161 """Subclass of dict for initializing metadata values.
162
162
163 Attribute access works on keys.
163 Attribute access works on keys.
164
164
165 These objects have a strict set of keys - errors will raise if you try
165 These objects have a strict set of keys - errors will raise if you try
166 to add new keys.
166 to add new keys.
167 """
167 """
168 def __init__(self, *args, **kwargs):
168 def __init__(self, *args, **kwargs):
169 dict.__init__(self)
169 dict.__init__(self)
170 md = {'msg_id' : None,
170 md = {'msg_id' : None,
171 'submitted' : None,
171 'submitted' : None,
172 'started' : None,
172 'started' : None,
173 'completed' : None,
173 'completed' : None,
174 'received' : None,
174 'received' : None,
175 'engine_uuid' : None,
175 'engine_uuid' : None,
176 'engine_id' : None,
176 'engine_id' : None,
177 'follow' : None,
177 'follow' : None,
178 'after' : None,
178 'after' : None,
179 'status' : None,
179 'status' : None,
180
180
181 'pyin' : None,
181 'pyin' : None,
182 'pyout' : None,
182 'pyout' : None,
183 'pyerr' : None,
183 'pyerr' : None,
184 'stdout' : '',
184 'stdout' : '',
185 'stderr' : '',
185 'stderr' : '',
186 'outputs' : [],
186 'outputs' : [],
187 'outputs_ready' : False,
187 'outputs_ready' : False,
188 }
188 }
189 self.update(md)
189 self.update(md)
190 self.update(dict(*args, **kwargs))
190 self.update(dict(*args, **kwargs))
191
191
192 def __getattr__(self, key):
192 def __getattr__(self, key):
193 """getattr aliased to getitem"""
193 """getattr aliased to getitem"""
194 if key in self.iterkeys():
194 if key in self.iterkeys():
195 return self[key]
195 return self[key]
196 else:
196 else:
197 raise AttributeError(key)
197 raise AttributeError(key)
198
198
199 def __setattr__(self, key, value):
199 def __setattr__(self, key, value):
200 """setattr aliased to setitem, with strict"""
200 """setattr aliased to setitem, with strict"""
201 if key in self.iterkeys():
201 if key in self.iterkeys():
202 self[key] = value
202 self[key] = value
203 else:
203 else:
204 raise AttributeError(key)
204 raise AttributeError(key)
205
205
206 def __setitem__(self, key, value):
206 def __setitem__(self, key, value):
207 """strict static key enforcement"""
207 """strict static key enforcement"""
208 if key in self.iterkeys():
208 if key in self.iterkeys():
209 dict.__setitem__(self, key, value)
209 dict.__setitem__(self, key, value)
210 else:
210 else:
211 raise KeyError(key)
211 raise KeyError(key)
212
212
213
213
214 class Client(HasTraits):
214 class Client(HasTraits):
215 """A semi-synchronous client to the IPython ZMQ cluster
215 """A semi-synchronous client to the IPython ZMQ cluster
216
216
217 Parameters
217 Parameters
218 ----------
218 ----------
219
219
220 url_or_file : bytes or unicode; zmq url or path to ipcontroller-client.json
220 url_file : str/unicode; path to ipcontroller-client.json
221 This JSON file should contain all the information needed to connect to a cluster,
222 and is likely the only argument needed.
221 Connection information for the Hub's registration. If a json connector
223 Connection information for the Hub's registration. If a json connector
222 file is given, then likely no further configuration is necessary.
224 file is given, then likely no further configuration is necessary.
223 [Default: use profile]
225 [Default: use profile]
224 profile : bytes
226 profile : bytes
225 The name of the Cluster profile to be used to find connector information.
227 The name of the Cluster profile to be used to find connector information.
226 If run from an IPython application, the default profile will be the same
228 If run from an IPython application, the default profile will be the same
227 as the running application, otherwise it will be 'default'.
229 as the running application, otherwise it will be 'default'.
228 context : zmq.Context
230 context : zmq.Context
229 Pass an existing zmq.Context instance, otherwise the client will create its own.
231 Pass an existing zmq.Context instance, otherwise the client will create its own.
230 debug : bool
232 debug : bool
231 flag for lots of message printing for debug purposes
233 flag for lots of message printing for debug purposes
232 timeout : int/float
234 timeout : int/float
233 time (in seconds) to wait for connection replies from the Hub
235 time (in seconds) to wait for connection replies from the Hub
234 [Default: 10]
236 [Default: 10]
235
237
236 #-------------- session related args ----------------
238 #-------------- session related args ----------------
237
239
238 config : Config object
240 config : Config object
239 If specified, this will be relayed to the Session for configuration
241 If specified, this will be relayed to the Session for configuration
240 username : str
242 username : str
241 set username for the session object
243 set username for the session object
242 packer : str (import_string) or callable
243 Can be either the simple keyword 'json' or 'pickle', or an import_string to a
244 function to serialize messages. Must support same input as
245 JSON, and output must be bytes.
246 You can pass a callable directly as `pack`
247 unpacker : str (import_string) or callable
248 The inverse of packer. Only necessary if packer is specified as *not* one
249 of 'json' or 'pickle'.
250
244
251 #-------------- ssh related args ----------------
245 #-------------- ssh related args ----------------
252 # These are args for configuring the ssh tunnel to be used
246 # These are args for configuring the ssh tunnel to be used
253 # credentials are used to forward connections over ssh to the Controller
247 # credentials are used to forward connections over ssh to the Controller
254 # Note that the ip given in `addr` needs to be relative to sshserver
248 # Note that the ip given in `addr` needs to be relative to sshserver
255 # The most basic case is to leave addr as pointing to localhost (127.0.0.1),
249 # The most basic case is to leave addr as pointing to localhost (127.0.0.1),
256 # and set sshserver as the same machine the Controller is on. However,
250 # and set sshserver as the same machine the Controller is on. However,
257 # the only requirement is that sshserver is able to see the Controller
251 # the only requirement is that sshserver is able to see the Controller
258 # (i.e. is within the same trusted network).
252 # (i.e. is within the same trusted network).
259
253
260 sshserver : str
254 sshserver : str
261 A string of the form passed to ssh, i.e. 'server.tld' or 'user@server.tld:port'
255 A string of the form passed to ssh, i.e. 'server.tld' or 'user@server.tld:port'
262 If keyfile or password is specified, and this is not, it will default to
256 If keyfile or password is specified, and this is not, it will default to
263 the ip given in addr.
257 the ip given in addr.
264 sshkey : str; path to ssh private key file
258 sshkey : str; path to ssh private key file
265 This specifies a key to be used in ssh login, default None.
259 This specifies a key to be used in ssh login, default None.
266 Regular default ssh keys will be used without specifying this argument.
260 Regular default ssh keys will be used without specifying this argument.
267 password : str
261 password : str
268 Your ssh password to sshserver. Note that if this is left None,
262 Your ssh password to sshserver. Note that if this is left None,
269 you will be prompted for it if passwordless key based login is unavailable.
263 you will be prompted for it if passwordless key based login is unavailable.
270 paramiko : bool
264 paramiko : bool
271 flag for whether to use paramiko instead of shell ssh for tunneling.
265 flag for whether to use paramiko instead of shell ssh for tunneling.
272 [default: True on win32, False else]
266 [default: True on win32, False else]
273
267
274 ------- exec authentication args -------
275 If even localhost is untrusted, you can have some protection against
276 unauthorized execution by signing messages with HMAC digests.
277 Messages are still sent as cleartext, so if someone can snoop your
278 loopback traffic this will not protect your privacy, but will prevent
279 unauthorized execution.
280
281 exec_key : str
282 an authentication key or file containing a key
283 default: None
284
285
268
286 Attributes
269 Attributes
287 ----------
270 ----------
288
271
289 ids : list of int engine IDs
272 ids : list of int engine IDs
290 requesting the ids attribute always synchronizes
273 requesting the ids attribute always synchronizes
291 the registration state. To request ids without synchronization,
274 the registration state. To request ids without synchronization,
292 use semi-private _ids attributes.
275 use semi-private _ids attributes.
293
276
294 history : list of msg_ids
277 history : list of msg_ids
295 a list of msg_ids, keeping track of all the execution
278 a list of msg_ids, keeping track of all the execution
296 messages you have submitted in order.
279 messages you have submitted in order.
297
280
298 outstanding : set of msg_ids
281 outstanding : set of msg_ids
299 a set of msg_ids that have been submitted, but whose
282 a set of msg_ids that have been submitted, but whose
300 results have not yet been received.
283 results have not yet been received.
301
284
302 results : dict
285 results : dict
303 a dict of all our results, keyed by msg_id
286 a dict of all our results, keyed by msg_id
304
287
305 block : bool
288 block : bool
306 determines default behavior when block not specified
289 determines default behavior when block not specified
307 in execution methods
290 in execution methods
308
291
309 Methods
292 Methods
310 -------
293 -------
311
294
312 spin
295 spin
313 flushes incoming results and registration state changes
296 flushes incoming results and registration state changes
314 control methods spin, and requesting `ids` also ensures up to date
297 control methods spin, and requesting `ids` also ensures up to date
315
298
316 wait
299 wait
317 wait on one or more msg_ids
300 wait on one or more msg_ids
318
301
319 execution methods
302 execution methods
320 apply
303 apply
321 legacy: execute, run
304 legacy: execute, run
322
305
323 data movement
306 data movement
324 push, pull, scatter, gather
307 push, pull, scatter, gather
325
308
326 query methods
309 query methods
327 queue_status, get_result, purge, result_status
310 queue_status, get_result, purge, result_status
328
311
329 control methods
312 control methods
330 abort, shutdown
313 abort, shutdown
331
314
332 """
315 """
333
316
334
317
335 block = Bool(False)
318 block = Bool(False)
336 outstanding = Set()
319 outstanding = Set()
337 results = Instance('collections.defaultdict', (dict,))
320 results = Instance('collections.defaultdict', (dict,))
338 metadata = Instance('collections.defaultdict', (Metadata,))
321 metadata = Instance('collections.defaultdict', (Metadata,))
339 history = List()
322 history = List()
340 debug = Bool(False)
323 debug = Bool(False)
341 _spin_thread = Any()
324 _spin_thread = Any()
342 _stop_spinning = Any()
325 _stop_spinning = Any()
343
326
344 profile=Unicode()
327 profile=Unicode()
345 def _profile_default(self):
328 def _profile_default(self):
346 if BaseIPythonApplication.initialized():
329 if BaseIPythonApplication.initialized():
347 # an IPython app *might* be running, try to get its profile
330 # an IPython app *might* be running, try to get its profile
348 try:
331 try:
349 return BaseIPythonApplication.instance().profile
332 return BaseIPythonApplication.instance().profile
350 except (AttributeError, MultipleInstanceError):
333 except (AttributeError, MultipleInstanceError):
351 # could be a *different* subclass of config.Application,
334 # could be a *different* subclass of config.Application,
352 # which would raise one of these two errors.
335 # which would raise one of these two errors.
353 return u'default'
336 return u'default'
354 else:
337 else:
355 return u'default'
338 return u'default'
356
339
357
340
358 _outstanding_dict = Instance('collections.defaultdict', (set,))
341 _outstanding_dict = Instance('collections.defaultdict', (set,))
359 _ids = List()
342 _ids = List()
360 _connected=Bool(False)
343 _connected=Bool(False)
361 _ssh=Bool(False)
344 _ssh=Bool(False)
362 _context = Instance('zmq.Context')
345 _context = Instance('zmq.Context')
363 _config = Dict()
346 _config = Dict()
364 _engines=Instance(util.ReverseDict, (), {})
347 _engines=Instance(util.ReverseDict, (), {})
365 # _hub_socket=Instance('zmq.Socket')
348 # _hub_socket=Instance('zmq.Socket')
366 _query_socket=Instance('zmq.Socket')
349 _query_socket=Instance('zmq.Socket')
367 _control_socket=Instance('zmq.Socket')
350 _control_socket=Instance('zmq.Socket')
368 _iopub_socket=Instance('zmq.Socket')
351 _iopub_socket=Instance('zmq.Socket')
369 _notification_socket=Instance('zmq.Socket')
352 _notification_socket=Instance('zmq.Socket')
370 _mux_socket=Instance('zmq.Socket')
353 _mux_socket=Instance('zmq.Socket')
371 _task_socket=Instance('zmq.Socket')
354 _task_socket=Instance('zmq.Socket')
372 _task_scheme=Unicode()
355 _task_scheme=Unicode()
373 _closed = False
356 _closed = False
374 _ignored_control_replies=Integer(0)
357 _ignored_control_replies=Integer(0)
375 _ignored_hub_replies=Integer(0)
358 _ignored_hub_replies=Integer(0)
376
359
377 def __new__(self, *args, **kw):
360 def __new__(self, *args, **kw):
378 # don't raise on positional args
361 # don't raise on positional args
379 return HasTraits.__new__(self, **kw)
362 return HasTraits.__new__(self, **kw)
380
363
381 def __init__(self, url_or_file=None, profile=None, profile_dir=None, ipython_dir=None,
364 def __init__(self, url_file=None, profile=None, profile_dir=None, ipython_dir=None,
382 context=None, debug=False, exec_key=None,
365 context=None, debug=False,
383 sshserver=None, sshkey=None, password=None, paramiko=None,
366 sshserver=None, sshkey=None, password=None, paramiko=None,
384 timeout=10, **extra_args
367 timeout=10, **extra_args
385 ):
368 ):
386 if profile:
369 if profile:
387 super(Client, self).__init__(debug=debug, profile=profile)
370 super(Client, self).__init__(debug=debug, profile=profile)
388 else:
371 else:
389 super(Client, self).__init__(debug=debug)
372 super(Client, self).__init__(debug=debug)
390 if context is None:
373 if context is None:
391 context = zmq.Context.instance()
374 context = zmq.Context.instance()
392 self._context = context
375 self._context = context
393 self._stop_spinning = Event()
376 self._stop_spinning = Event()
394
377
378 if 'url_or_file' in extra_args:
379 url_file = extra_args['url_or_file']
380 warnings.warn("url_or_file arg no longer supported, use url_file", DeprecationWarning)
381
382 if url_file and util.is_url(url_file):
383 raise ValueError("single urls cannot be specified, url-files must be used.")
384
395 self._setup_profile_dir(self.profile, profile_dir, ipython_dir)
385 self._setup_profile_dir(self.profile, profile_dir, ipython_dir)
386
396 if self._cd is not None:
387 if self._cd is not None:
397 if url_or_file is None:
388 if url_file is None:
398 url_or_file = pjoin(self._cd.security_dir, 'ipcontroller-client.json')
389 url_file = pjoin(self._cd.security_dir, 'ipcontroller-client.json')
399 if url_or_file is None:
390 if url_file is None:
400 raise ValueError(
391 raise ValueError(
401 "I can't find enough information to connect to a hub!"
392 "I can't find enough information to connect to a hub!"
402 " Please specify at least one of url_or_file or profile."
393 " Please specify at least one of url_file or profile."
403 )
394 )
404
395
405 if not util.is_url(url_or_file):
396 with open(url_file) as f:
406 # it's not a url, try for a file
397 cfg = json.load(f)
407 if not os.path.exists(url_or_file):
398
408 if self._cd:
399 self._task_scheme = cfg['task_scheme']
409 url_or_file = os.path.join(self._cd.security_dir, url_or_file)
410 if not os.path.exists(url_or_file):
411 raise IOError("Connection file not found: %r" % url_or_file)
412 with open(url_or_file) as f:
413 cfg = json.loads(f.read())
414 else:
415 cfg = {'url':url_or_file}
416
400
417 # sync defaults from args, json:
401 # sync defaults from args, json:
418 if sshserver:
402 if sshserver:
419 cfg['ssh'] = sshserver
403 cfg['ssh'] = sshserver
420 if exec_key:
404
421 cfg['exec_key'] = exec_key
422 exec_key = cfg['exec_key']
423 location = cfg.setdefault('location', None)
405 location = cfg.setdefault('location', None)
424 cfg['url'] = util.disambiguate_url(cfg['url'], location)
406
425 url = cfg['url']
407 proto,addr = cfg['interface'].split('://')
426 proto,addr,port = util.split_url(url)
408 addr = util.disambiguate_ip_address(addr)
409 cfg['interface'] = "%s://%s" % (proto, addr)
410
411 # turn interface,port into full urls:
412 for key in ('control', 'task', 'mux', 'iopub', 'notification', 'registration'):
413 cfg[key] = cfg['interface'] + ':%i' % cfg[key]
414
415 url = cfg['registration']
416
427 if location is not None and addr == '127.0.0.1':
417 if location is not None and addr == '127.0.0.1':
428 # location specified, and connection is expected to be local
418 # location specified, and connection is expected to be local
429 if location not in LOCAL_IPS and not sshserver:
419 if location not in LOCAL_IPS and not sshserver:
430 # load ssh from JSON *only* if the controller is not on
420 # load ssh from JSON *only* if the controller is not on
431 # this machine
421 # this machine
432 sshserver=cfg['ssh']
422 sshserver=cfg['ssh']
433 if location not in LOCAL_IPS and not sshserver:
423 if location not in LOCAL_IPS and not sshserver:
434 # warn if no ssh specified, but SSH is probably needed
424 # warn if no ssh specified, but SSH is probably needed
435 # This is only a warning, because the most likely cause
425 # This is only a warning, because the most likely cause
436 # is a local Controller on a laptop whose IP is dynamic
426 # is a local Controller on a laptop whose IP is dynamic
437 warnings.warn("""
427 warnings.warn("""
438 Controller appears to be listening on localhost, but not on this machine.
428 Controller appears to be listening on localhost, but not on this machine.
439 If this is true, you should specify Client(...,sshserver='you@%s')
429 If this is true, you should specify Client(...,sshserver='you@%s')
440 or instruct your controller to listen on an external IP."""%location,
430 or instruct your controller to listen on an external IP."""%location,
441 RuntimeWarning)
431 RuntimeWarning)
442 elif not sshserver:
432 elif not sshserver:
443 # otherwise sync with cfg
433 # otherwise sync with cfg
444 sshserver = cfg['ssh']
434 sshserver = cfg['ssh']
445
435
446 self._config = cfg
436 self._config = cfg
447
437
448 self._ssh = bool(sshserver or sshkey or password)
438 self._ssh = bool(sshserver or sshkey or password)
449 if self._ssh and sshserver is None:
439 if self._ssh and sshserver is None:
450 # default to ssh via localhost
440 # default to ssh via localhost
451 sshserver = url.split('://')[1].split(':')[0]
441 sshserver = addr
452 if self._ssh and password is None:
442 if self._ssh and password is None:
453 if tunnel.try_passwordless_ssh(sshserver, sshkey, paramiko):
443 if tunnel.try_passwordless_ssh(sshserver, sshkey, paramiko):
454 password=False
444 password=False
455 else:
445 else:
456 password = getpass("SSH Password for %s: "%sshserver)
446 password = getpass("SSH Password for %s: "%sshserver)
457 ssh_kwargs = dict(keyfile=sshkey, password=password, paramiko=paramiko)
447 ssh_kwargs = dict(keyfile=sshkey, password=password, paramiko=paramiko)
458
448
459 # configure and construct the session
449 # configure and construct the session
460 if exec_key is not None:
450 extra_args['packer'] = cfg['pack']
461 if os.path.isfile(exec_key):
451 extra_args['unpacker'] = cfg['unpack']
462 extra_args['keyfile'] = exec_key
452 extra_args['key'] = cast_bytes(cfg['exec_key'])
463 else:
453
464 exec_key = cast_bytes(exec_key)
465 extra_args['key'] = exec_key
466 self.session = Session(**extra_args)
454 self.session = Session(**extra_args)
467
455
468 self._query_socket = self._context.socket(zmq.DEALER)
456 self._query_socket = self._context.socket(zmq.DEALER)
469 self._query_socket.setsockopt(zmq.IDENTITY, self.session.bsession)
457
470 if self._ssh:
458 if self._ssh:
471 tunnel.tunnel_connection(self._query_socket, url, sshserver, **ssh_kwargs)
459 tunnel.tunnel_connection(self._query_socket, cfg['registration'], sshserver, **ssh_kwargs)
472 else:
460 else:
473 self._query_socket.connect(url)
461 self._query_socket.connect(cfg['registration'])
474
462
475 self.session.debug = self.debug
463 self.session.debug = self.debug
476
464
477 self._notification_handlers = {'registration_notification' : self._register_engine,
465 self._notification_handlers = {'registration_notification' : self._register_engine,
478 'unregistration_notification' : self._unregister_engine,
466 'unregistration_notification' : self._unregister_engine,
479 'shutdown_notification' : lambda msg: self.close(),
467 'shutdown_notification' : lambda msg: self.close(),
480 }
468 }
481 self._queue_handlers = {'execute_reply' : self._handle_execute_reply,
469 self._queue_handlers = {'execute_reply' : self._handle_execute_reply,
482 'apply_reply' : self._handle_apply_reply}
470 'apply_reply' : self._handle_apply_reply}
483 self._connect(sshserver, ssh_kwargs, timeout)
471 self._connect(sshserver, ssh_kwargs, timeout)
484
472
485 # last step: setup magics, if we are in IPython:
473 # last step: setup magics, if we are in IPython:
486
474
487 try:
475 try:
488 ip = get_ipython()
476 ip = get_ipython()
489 except NameError:
477 except NameError:
490 return
478 return
491 else:
479 else:
492 if 'px' not in ip.magics_manager.magics:
480 if 'px' not in ip.magics_manager.magics:
493 # in IPython but we are the first Client.
481 # in IPython but we are the first Client.
494 # activate a default view for parallel magics.
482 # activate a default view for parallel magics.
495 self.activate()
483 self.activate()
496
484
497 def __del__(self):
485 def __del__(self):
498 """cleanup sockets, but _not_ context."""
486 """cleanup sockets, but _not_ context."""
499 self.close()
487 self.close()
500
488
501 def _setup_profile_dir(self, profile, profile_dir, ipython_dir):
489 def _setup_profile_dir(self, profile, profile_dir, ipython_dir):
502 if ipython_dir is None:
490 if ipython_dir is None:
503 ipython_dir = get_ipython_dir()
491 ipython_dir = get_ipython_dir()
504 if profile_dir is not None:
492 if profile_dir is not None:
505 try:
493 try:
506 self._cd = ProfileDir.find_profile_dir(profile_dir)
494 self._cd = ProfileDir.find_profile_dir(profile_dir)
507 return
495 return
508 except ProfileDirError:
496 except ProfileDirError:
509 pass
497 pass
510 elif profile is not None:
498 elif profile is not None:
511 try:
499 try:
512 self._cd = ProfileDir.find_profile_dir_by_name(
500 self._cd = ProfileDir.find_profile_dir_by_name(
513 ipython_dir, profile)
501 ipython_dir, profile)
514 return
502 return
515 except ProfileDirError:
503 except ProfileDirError:
516 pass
504 pass
517 self._cd = None
505 self._cd = None
518
506
519 def _update_engines(self, engines):
507 def _update_engines(self, engines):
520 """Update our engines dict and _ids from a dict of the form: {id:uuid}."""
508 """Update our engines dict and _ids from a dict of the form: {id:uuid}."""
521 for k,v in engines.iteritems():
509 for k,v in engines.iteritems():
522 eid = int(k)
510 eid = int(k)
523 self._engines[eid] = v
511 if eid not in self._engines:
524 self._ids.append(eid)
512 self._ids.append(eid)
513 self._engines[eid] = v
525 self._ids = sorted(self._ids)
514 self._ids = sorted(self._ids)
526 if sorted(self._engines.keys()) != range(len(self._engines)) and \
515 if sorted(self._engines.keys()) != range(len(self._engines)) and \
527 self._task_scheme == 'pure' and self._task_socket:
516 self._task_scheme == 'pure' and self._task_socket:
528 self._stop_scheduling_tasks()
517 self._stop_scheduling_tasks()
529
518
530 def _stop_scheduling_tasks(self):
519 def _stop_scheduling_tasks(self):
531 """Stop scheduling tasks because an engine has been unregistered
520 """Stop scheduling tasks because an engine has been unregistered
532 from a pure ZMQ scheduler.
521 from a pure ZMQ scheduler.
533 """
522 """
534 self._task_socket.close()
523 self._task_socket.close()
535 self._task_socket = None
524 self._task_socket = None
536 msg = "An engine has been unregistered, and we are using pure " +\
525 msg = "An engine has been unregistered, and we are using pure " +\
537 "ZMQ task scheduling. Task farming will be disabled."
526 "ZMQ task scheduling. Task farming will be disabled."
538 if self.outstanding:
527 if self.outstanding:
539 msg += " If you were running tasks when this happened, " +\
528 msg += " If you were running tasks when this happened, " +\
540 "some `outstanding` msg_ids may never resolve."
529 "some `outstanding` msg_ids may never resolve."
541 warnings.warn(msg, RuntimeWarning)
530 warnings.warn(msg, RuntimeWarning)
542
531
543 def _build_targets(self, targets):
532 def _build_targets(self, targets):
544 """Turn valid target IDs or 'all' into two lists:
533 """Turn valid target IDs or 'all' into two lists:
545 (int_ids, uuids).
534 (int_ids, uuids).
546 """
535 """
547 if not self._ids:
536 if not self._ids:
548 # flush notification socket if no engines yet, just in case
537 # flush notification socket if no engines yet, just in case
549 if not self.ids:
538 if not self.ids:
550 raise error.NoEnginesRegistered("Can't build targets without any engines")
539 raise error.NoEnginesRegistered("Can't build targets without any engines")
551
540
552 if targets is None:
541 if targets is None:
553 targets = self._ids
542 targets = self._ids
554 elif isinstance(targets, basestring):
543 elif isinstance(targets, basestring):
555 if targets.lower() == 'all':
544 if targets.lower() == 'all':
556 targets = self._ids
545 targets = self._ids
557 else:
546 else:
558 raise TypeError("%r not valid str target, must be 'all'"%(targets))
547 raise TypeError("%r not valid str target, must be 'all'"%(targets))
559 elif isinstance(targets, int):
548 elif isinstance(targets, int):
560 if targets < 0:
549 if targets < 0:
561 targets = self.ids[targets]
550 targets = self.ids[targets]
562 if targets not in self._ids:
551 if targets not in self._ids:
563 raise IndexError("No such engine: %i"%targets)
552 raise IndexError("No such engine: %i"%targets)
564 targets = [targets]
553 targets = [targets]
565
554
566 if isinstance(targets, slice):
555 if isinstance(targets, slice):
567 indices = range(len(self._ids))[targets]
556 indices = range(len(self._ids))[targets]
568 ids = self.ids
557 ids = self.ids
569 targets = [ ids[i] for i in indices ]
558 targets = [ ids[i] for i in indices ]
570
559
571 if not isinstance(targets, (tuple, list, xrange)):
560 if not isinstance(targets, (tuple, list, xrange)):
572 raise TypeError("targets by int/slice/collection of ints only, not %s"%(type(targets)))
561 raise TypeError("targets by int/slice/collection of ints only, not %s"%(type(targets)))
573
562
574 return [cast_bytes(self._engines[t]) for t in targets], list(targets)
563 return [cast_bytes(self._engines[t]) for t in targets], list(targets)
575
564
576 def _connect(self, sshserver, ssh_kwargs, timeout):
565 def _connect(self, sshserver, ssh_kwargs, timeout):
577 """setup all our socket connections to the cluster. This is called from
566 """setup all our socket connections to the cluster. This is called from
578 __init__."""
567 __init__."""
579
568
580 # Maybe allow reconnecting?
569 # Maybe allow reconnecting?
581 if self._connected:
570 if self._connected:
582 return
571 return
583 self._connected=True
572 self._connected=True
584
573
585 def connect_socket(s, url):
574 def connect_socket(s, url):
586 url = util.disambiguate_url(url, self._config['location'])
575 # url = util.disambiguate_url(url, self._config['location'])
587 if self._ssh:
576 if self._ssh:
588 return tunnel.tunnel_connection(s, url, sshserver, **ssh_kwargs)
577 return tunnel.tunnel_connection(s, url, sshserver, **ssh_kwargs)
589 else:
578 else:
590 return s.connect(url)
579 return s.connect(url)
591
580
592 self.session.send(self._query_socket, 'connection_request')
581 self.session.send(self._query_socket, 'connection_request')
593 # use Poller because zmq.select has wrong units in pyzmq 2.1.7
582 # use Poller because zmq.select has wrong units in pyzmq 2.1.7
594 poller = zmq.Poller()
583 poller = zmq.Poller()
595 poller.register(self._query_socket, zmq.POLLIN)
584 poller.register(self._query_socket, zmq.POLLIN)
596 # poll expects milliseconds, timeout is seconds
585 # poll expects milliseconds, timeout is seconds
597 evts = poller.poll(timeout*1000)
586 evts = poller.poll(timeout*1000)
598 if not evts:
587 if not evts:
599 raise error.TimeoutError("Hub connection request timed out")
588 raise error.TimeoutError("Hub connection request timed out")
600 idents,msg = self.session.recv(self._query_socket,mode=0)
589 idents,msg = self.session.recv(self._query_socket,mode=0)
601 if self.debug:
590 if self.debug:
602 pprint(msg)
591 pprint(msg)
603 msg = Message(msg)
592 content = msg['content']
604 content = msg.content
593 # self._config['registration'] = dict(content)
605 self._config['registration'] = dict(content)
594 cfg = self._config
606 if content.status == 'ok':
595 if content['status'] == 'ok':
607 ident = self.session.bsession
608 if content.mux:
609 self._mux_socket = self._context.socket(zmq.DEALER)
596 self._mux_socket = self._context.socket(zmq.DEALER)
610 self._mux_socket.setsockopt(zmq.IDENTITY, ident)
597 connect_socket(self._mux_socket, cfg['mux'])
611 connect_socket(self._mux_socket, content.mux)
598
612 if content.task:
613 self._task_scheme, task_addr = content.task
614 self._task_socket = self._context.socket(zmq.DEALER)
599 self._task_socket = self._context.socket(zmq.DEALER)
615 self._task_socket.setsockopt(zmq.IDENTITY, ident)
600 connect_socket(self._task_socket, cfg['task'])
616 connect_socket(self._task_socket, task_addr)
601
617 if content.notification:
618 self._notification_socket = self._context.socket(zmq.SUB)
602 self._notification_socket = self._context.socket(zmq.SUB)
619 connect_socket(self._notification_socket, content.notification)
620 self._notification_socket.setsockopt(zmq.SUBSCRIBE, b'')
603 self._notification_socket.setsockopt(zmq.SUBSCRIBE, b'')
621 # if content.query:
604 connect_socket(self._notification_socket, cfg['notification'])
622 # self._query_socket = self._context.socket(zmq.DEALER)
605
623 # self._query_socket.setsockopt(zmq.IDENTITY, self.session.bsession)
624 # connect_socket(self._query_socket, content.query)
625 if content.control:
626 self._control_socket = self._context.socket(zmq.DEALER)
606 self._control_socket = self._context.socket(zmq.DEALER)
627 self._control_socket.setsockopt(zmq.IDENTITY, ident)
607 connect_socket(self._control_socket, cfg['control'])
628 connect_socket(self._control_socket, content.control)
608
629 if content.iopub:
630 self._iopub_socket = self._context.socket(zmq.SUB)
609 self._iopub_socket = self._context.socket(zmq.SUB)
631 self._iopub_socket.setsockopt(zmq.SUBSCRIBE, b'')
610 self._iopub_socket.setsockopt(zmq.SUBSCRIBE, b'')
632 self._iopub_socket.setsockopt(zmq.IDENTITY, ident)
611 connect_socket(self._iopub_socket, cfg['iopub'])
633 connect_socket(self._iopub_socket, content.iopub)
612
634 self._update_engines(dict(content.engines))
613 self._update_engines(dict(content['engines']))
635 else:
614 else:
636 self._connected = False
615 self._connected = False
637 raise Exception("Failed to connect!")
616 raise Exception("Failed to connect!")
638
617
639 #--------------------------------------------------------------------------
618 #--------------------------------------------------------------------------
640 # handlers and callbacks for incoming messages
619 # handlers and callbacks for incoming messages
641 #--------------------------------------------------------------------------
620 #--------------------------------------------------------------------------
642
621
643 def _unwrap_exception(self, content):
622 def _unwrap_exception(self, content):
644 """unwrap exception, and remap engine_id to int."""
623 """unwrap exception, and remap engine_id to int."""
645 e = error.unwrap_exception(content)
624 e = error.unwrap_exception(content)
646 # print e.traceback
625 # print e.traceback
647 if e.engine_info:
626 if e.engine_info:
648 e_uuid = e.engine_info['engine_uuid']
627 e_uuid = e.engine_info['engine_uuid']
649 eid = self._engines[e_uuid]
628 eid = self._engines[e_uuid]
650 e.engine_info['engine_id'] = eid
629 e.engine_info['engine_id'] = eid
651 return e
630 return e
652
631
653 def _extract_metadata(self, header, parent, content):
632 def _extract_metadata(self, header, parent, content):
654 md = {'msg_id' : parent['msg_id'],
633 md = {'msg_id' : parent['msg_id'],
655 'received' : datetime.now(),
634 'received' : datetime.now(),
656 'engine_uuid' : header.get('engine', None),
635 'engine_uuid' : header.get('engine', None),
657 'follow' : parent.get('follow', []),
636 'follow' : parent.get('follow', []),
658 'after' : parent.get('after', []),
637 'after' : parent.get('after', []),
659 'status' : content['status'],
638 'status' : content['status'],
660 }
639 }
661
640
662 if md['engine_uuid'] is not None:
641 if md['engine_uuid'] is not None:
663 md['engine_id'] = self._engines.get(md['engine_uuid'], None)
642 md['engine_id'] = self._engines.get(md['engine_uuid'], None)
664
643
665 if 'date' in parent:
644 if 'date' in parent:
666 md['submitted'] = parent['date']
645 md['submitted'] = parent['date']
667 if 'started' in header:
646 if 'started' in header:
668 md['started'] = header['started']
647 md['started'] = header['started']
669 if 'date' in header:
648 if 'date' in header:
670 md['completed'] = header['date']
649 md['completed'] = header['date']
671 return md
650 return md
672
651
673 def _register_engine(self, msg):
652 def _register_engine(self, msg):
674 """Register a new engine, and update our connection info."""
653 """Register a new engine, and update our connection info."""
675 content = msg['content']
654 content = msg['content']
676 eid = content['id']
655 eid = content['id']
677 d = {eid : content['queue']}
656 d = {eid : content['uuid']}
678 self._update_engines(d)
657 self._update_engines(d)
679
658
680 def _unregister_engine(self, msg):
659 def _unregister_engine(self, msg):
681 """Unregister an engine that has died."""
660 """Unregister an engine that has died."""
682 content = msg['content']
661 content = msg['content']
683 eid = int(content['id'])
662 eid = int(content['id'])
684 if eid in self._ids:
663 if eid in self._ids:
685 self._ids.remove(eid)
664 self._ids.remove(eid)
686 uuid = self._engines.pop(eid)
665 uuid = self._engines.pop(eid)
687
666
688 self._handle_stranded_msgs(eid, uuid)
667 self._handle_stranded_msgs(eid, uuid)
689
668
690 if self._task_socket and self._task_scheme == 'pure':
669 if self._task_socket and self._task_scheme == 'pure':
691 self._stop_scheduling_tasks()
670 self._stop_scheduling_tasks()
692
671
693 def _handle_stranded_msgs(self, eid, uuid):
672 def _handle_stranded_msgs(self, eid, uuid):
694 """Handle messages known to be on an engine when the engine unregisters.
673 """Handle messages known to be on an engine when the engine unregisters.
695
674
696 It is possible that this will fire prematurely - that is, an engine will
675 It is possible that this will fire prematurely - that is, an engine will
697 go down after completing a result, and the client will be notified
676 go down after completing a result, and the client will be notified
698 of the unregistration and later receive the successful result.
677 of the unregistration and later receive the successful result.
699 """
678 """
700
679
701 outstanding = self._outstanding_dict[uuid]
680 outstanding = self._outstanding_dict[uuid]
702
681
703 for msg_id in list(outstanding):
682 for msg_id in list(outstanding):
704 if msg_id in self.results:
683 if msg_id in self.results:
705 # we already
684 # we already
706 continue
685 continue
707 try:
686 try:
708 raise error.EngineError("Engine %r died while running task %r"%(eid, msg_id))
687 raise error.EngineError("Engine %r died while running task %r"%(eid, msg_id))
709 except:
688 except:
710 content = error.wrap_exception()
689 content = error.wrap_exception()
711 # build a fake message:
690 # build a fake message:
712 parent = {}
691 parent = {}
713 header = {}
692 header = {}
714 parent['msg_id'] = msg_id
693 parent['msg_id'] = msg_id
715 header['engine'] = uuid
694 header['engine'] = uuid
716 header['date'] = datetime.now()
695 header['date'] = datetime.now()
717 msg = dict(parent_header=parent, header=header, content=content)
696 msg = dict(parent_header=parent, header=header, content=content)
718 self._handle_apply_reply(msg)
697 self._handle_apply_reply(msg)
719
698
720 def _handle_execute_reply(self, msg):
699 def _handle_execute_reply(self, msg):
721 """Save the reply to an execute_request into our results.
700 """Save the reply to an execute_request into our results.
722
701
723 execute messages are never actually used. apply is used instead.
702 execute messages are never actually used. apply is used instead.
724 """
703 """
725
704
726 parent = msg['parent_header']
705 parent = msg['parent_header']
727 msg_id = parent['msg_id']
706 msg_id = parent['msg_id']
728 if msg_id not in self.outstanding:
707 if msg_id not in self.outstanding:
729 if msg_id in self.history:
708 if msg_id in self.history:
730 print ("got stale result: %s"%msg_id)
709 print ("got stale result: %s"%msg_id)
731 else:
710 else:
732 print ("got unknown result: %s"%msg_id)
711 print ("got unknown result: %s"%msg_id)
733 else:
712 else:
734 self.outstanding.remove(msg_id)
713 self.outstanding.remove(msg_id)
735
714
736 content = msg['content']
715 content = msg['content']
737 header = msg['header']
716 header = msg['header']
738
717
739 # construct metadata:
718 # construct metadata:
740 md = self.metadata[msg_id]
719 md = self.metadata[msg_id]
741 md.update(self._extract_metadata(header, parent, content))
720 md.update(self._extract_metadata(header, parent, content))
742 # is this redundant?
721 # is this redundant?
743 self.metadata[msg_id] = md
722 self.metadata[msg_id] = md
744
723
745 e_outstanding = self._outstanding_dict[md['engine_uuid']]
724 e_outstanding = self._outstanding_dict[md['engine_uuid']]
746 if msg_id in e_outstanding:
725 if msg_id in e_outstanding:
747 e_outstanding.remove(msg_id)
726 e_outstanding.remove(msg_id)
748
727
749 # construct result:
728 # construct result:
750 if content['status'] == 'ok':
729 if content['status'] == 'ok':
751 self.results[msg_id] = ExecuteReply(msg_id, content, md)
730 self.results[msg_id] = ExecuteReply(msg_id, content, md)
752 elif content['status'] == 'aborted':
731 elif content['status'] == 'aborted':
753 self.results[msg_id] = error.TaskAborted(msg_id)
732 self.results[msg_id] = error.TaskAborted(msg_id)
754 elif content['status'] == 'resubmitted':
733 elif content['status'] == 'resubmitted':
755 # TODO: handle resubmission
734 # TODO: handle resubmission
756 pass
735 pass
757 else:
736 else:
758 self.results[msg_id] = self._unwrap_exception(content)
737 self.results[msg_id] = self._unwrap_exception(content)
759
738
760 def _handle_apply_reply(self, msg):
739 def _handle_apply_reply(self, msg):
761 """Save the reply to an apply_request into our results."""
740 """Save the reply to an apply_request into our results."""
762 parent = msg['parent_header']
741 parent = msg['parent_header']
763 msg_id = parent['msg_id']
742 msg_id = parent['msg_id']
764 if msg_id not in self.outstanding:
743 if msg_id not in self.outstanding:
765 if msg_id in self.history:
744 if msg_id in self.history:
766 print ("got stale result: %s"%msg_id)
745 print ("got stale result: %s"%msg_id)
767 print self.results[msg_id]
746 print self.results[msg_id]
768 print msg
747 print msg
769 else:
748 else:
770 print ("got unknown result: %s"%msg_id)
749 print ("got unknown result: %s"%msg_id)
771 else:
750 else:
772 self.outstanding.remove(msg_id)
751 self.outstanding.remove(msg_id)
773 content = msg['content']
752 content = msg['content']
774 header = msg['header']
753 header = msg['header']
775
754
776 # construct metadata:
755 # construct metadata:
777 md = self.metadata[msg_id]
756 md = self.metadata[msg_id]
778 md.update(self._extract_metadata(header, parent, content))
757 md.update(self._extract_metadata(header, parent, content))
779 # is this redundant?
758 # is this redundant?
780 self.metadata[msg_id] = md
759 self.metadata[msg_id] = md
781
760
782 e_outstanding = self._outstanding_dict[md['engine_uuid']]
761 e_outstanding = self._outstanding_dict[md['engine_uuid']]
783 if msg_id in e_outstanding:
762 if msg_id in e_outstanding:
784 e_outstanding.remove(msg_id)
763 e_outstanding.remove(msg_id)
785
764
786 # construct result:
765 # construct result:
787 if content['status'] == 'ok':
766 if content['status'] == 'ok':
788 self.results[msg_id] = util.unserialize_object(msg['buffers'])[0]
767 self.results[msg_id] = util.unserialize_object(msg['buffers'])[0]
789 elif content['status'] == 'aborted':
768 elif content['status'] == 'aborted':
790 self.results[msg_id] = error.TaskAborted(msg_id)
769 self.results[msg_id] = error.TaskAborted(msg_id)
791 elif content['status'] == 'resubmitted':
770 elif content['status'] == 'resubmitted':
792 # TODO: handle resubmission
771 # TODO: handle resubmission
793 pass
772 pass
794 else:
773 else:
795 self.results[msg_id] = self._unwrap_exception(content)
774 self.results[msg_id] = self._unwrap_exception(content)
796
775
797 def _flush_notifications(self):
776 def _flush_notifications(self):
798 """Flush notifications of engine registrations waiting
777 """Flush notifications of engine registrations waiting
799 in ZMQ queue."""
778 in ZMQ queue."""
800 idents,msg = self.session.recv(self._notification_socket, mode=zmq.NOBLOCK)
779 idents,msg = self.session.recv(self._notification_socket, mode=zmq.NOBLOCK)
801 while msg is not None:
780 while msg is not None:
802 if self.debug:
781 if self.debug:
803 pprint(msg)
782 pprint(msg)
804 msg_type = msg['header']['msg_type']
783 msg_type = msg['header']['msg_type']
805 handler = self._notification_handlers.get(msg_type, None)
784 handler = self._notification_handlers.get(msg_type, None)
806 if handler is None:
785 if handler is None:
807 raise Exception("Unhandled message type: %s"%msg.msg_type)
786 raise Exception("Unhandled message type: %s"%msg.msg_type)
808 else:
787 else:
809 handler(msg)
788 handler(msg)
810 idents,msg = self.session.recv(self._notification_socket, mode=zmq.NOBLOCK)
789 idents,msg = self.session.recv(self._notification_socket, mode=zmq.NOBLOCK)
811
790
812 def _flush_results(self, sock):
791 def _flush_results(self, sock):
813 """Flush task or queue results waiting in ZMQ queue."""
792 """Flush task or queue results waiting in ZMQ queue."""
814 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
793 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
815 while msg is not None:
794 while msg is not None:
816 if self.debug:
795 if self.debug:
817 pprint(msg)
796 pprint(msg)
818 msg_type = msg['header']['msg_type']
797 msg_type = msg['header']['msg_type']
819 handler = self._queue_handlers.get(msg_type, None)
798 handler = self._queue_handlers.get(msg_type, None)
820 if handler is None:
799 if handler is None:
821 raise Exception("Unhandled message type: %s"%msg.msg_type)
800 raise Exception("Unhandled message type: %s"%msg.msg_type)
822 else:
801 else:
823 handler(msg)
802 handler(msg)
824 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
803 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
825
804
826 def _flush_control(self, sock):
805 def _flush_control(self, sock):
827 """Flush replies from the control channel waiting
806 """Flush replies from the control channel waiting
828 in the ZMQ queue.
807 in the ZMQ queue.
829
808
830 Currently: ignore them."""
809 Currently: ignore them."""
831 if self._ignored_control_replies <= 0:
810 if self._ignored_control_replies <= 0:
832 return
811 return
833 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
812 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
834 while msg is not None:
813 while msg is not None:
835 self._ignored_control_replies -= 1
814 self._ignored_control_replies -= 1
836 if self.debug:
815 if self.debug:
837 pprint(msg)
816 pprint(msg)
838 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
817 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
839
818
840 def _flush_ignored_control(self):
819 def _flush_ignored_control(self):
841 """flush ignored control replies"""
820 """flush ignored control replies"""
842 while self._ignored_control_replies > 0:
821 while self._ignored_control_replies > 0:
843 self.session.recv(self._control_socket)
822 self.session.recv(self._control_socket)
844 self._ignored_control_replies -= 1
823 self._ignored_control_replies -= 1
845
824
846 def _flush_ignored_hub_replies(self):
825 def _flush_ignored_hub_replies(self):
847 ident,msg = self.session.recv(self._query_socket, mode=zmq.NOBLOCK)
826 ident,msg = self.session.recv(self._query_socket, mode=zmq.NOBLOCK)
848 while msg is not None:
827 while msg is not None:
849 ident,msg = self.session.recv(self._query_socket, mode=zmq.NOBLOCK)
828 ident,msg = self.session.recv(self._query_socket, mode=zmq.NOBLOCK)
850
829
851 def _flush_iopub(self, sock):
830 def _flush_iopub(self, sock):
852 """Flush replies from the iopub channel waiting
831 """Flush replies from the iopub channel waiting
853 in the ZMQ queue.
832 in the ZMQ queue.
854 """
833 """
855 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
834 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
856 while msg is not None:
835 while msg is not None:
857 if self.debug:
836 if self.debug:
858 pprint(msg)
837 pprint(msg)
859 parent = msg['parent_header']
838 parent = msg['parent_header']
860 # ignore IOPub messages with no parent.
839 # ignore IOPub messages with no parent.
861 # Caused by print statements or warnings from before the first execution.
840 # Caused by print statements or warnings from before the first execution.
862 if not parent:
841 if not parent:
863 continue
842 continue
864 msg_id = parent['msg_id']
843 msg_id = parent['msg_id']
865 content = msg['content']
844 content = msg['content']
866 header = msg['header']
845 header = msg['header']
867 msg_type = msg['header']['msg_type']
846 msg_type = msg['header']['msg_type']
868
847
869 # init metadata:
848 # init metadata:
870 md = self.metadata[msg_id]
849 md = self.metadata[msg_id]
871
850
872 if msg_type == 'stream':
851 if msg_type == 'stream':
873 name = content['name']
852 name = content['name']
874 s = md[name] or ''
853 s = md[name] or ''
875 md[name] = s + content['data']
854 md[name] = s + content['data']
876 elif msg_type == 'pyerr':
855 elif msg_type == 'pyerr':
877 md.update({'pyerr' : self._unwrap_exception(content)})
856 md.update({'pyerr' : self._unwrap_exception(content)})
878 elif msg_type == 'pyin':
857 elif msg_type == 'pyin':
879 md.update({'pyin' : content['code']})
858 md.update({'pyin' : content['code']})
880 elif msg_type == 'display_data':
859 elif msg_type == 'display_data':
881 md['outputs'].append(content)
860 md['outputs'].append(content)
882 elif msg_type == 'pyout':
861 elif msg_type == 'pyout':
883 md['pyout'] = content
862 md['pyout'] = content
884 elif msg_type == 'status':
863 elif msg_type == 'status':
885 # idle message comes after all outputs
864 # idle message comes after all outputs
886 if content['execution_state'] == 'idle':
865 if content['execution_state'] == 'idle':
887 md['outputs_ready'] = True
866 md['outputs_ready'] = True
888 else:
867 else:
889 # unhandled msg_type (status, etc.)
868 # unhandled msg_type (status, etc.)
890 pass
869 pass
891
870
892 # reduntant?
871 # reduntant?
893 self.metadata[msg_id] = md
872 self.metadata[msg_id] = md
894
873
895 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
874 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
896
875
897 #--------------------------------------------------------------------------
876 #--------------------------------------------------------------------------
898 # len, getitem
877 # len, getitem
899 #--------------------------------------------------------------------------
878 #--------------------------------------------------------------------------
900
879
901 def __len__(self):
880 def __len__(self):
902 """len(client) returns # of engines."""
881 """len(client) returns # of engines."""
903 return len(self.ids)
882 return len(self.ids)
904
883
905 def __getitem__(self, key):
884 def __getitem__(self, key):
906 """index access returns DirectView multiplexer objects
885 """index access returns DirectView multiplexer objects
907
886
908 Must be int, slice, or list/tuple/xrange of ints"""
887 Must be int, slice, or list/tuple/xrange of ints"""
909 if not isinstance(key, (int, slice, tuple, list, xrange)):
888 if not isinstance(key, (int, slice, tuple, list, xrange)):
910 raise TypeError("key by int/slice/iterable of ints only, not %s"%(type(key)))
889 raise TypeError("key by int/slice/iterable of ints only, not %s"%(type(key)))
911 else:
890 else:
912 return self.direct_view(key)
891 return self.direct_view(key)
913
892
914 #--------------------------------------------------------------------------
893 #--------------------------------------------------------------------------
915 # Begin public methods
894 # Begin public methods
916 #--------------------------------------------------------------------------
895 #--------------------------------------------------------------------------
917
896
918 @property
897 @property
919 def ids(self):
898 def ids(self):
920 """Always up-to-date ids property."""
899 """Always up-to-date ids property."""
921 self._flush_notifications()
900 self._flush_notifications()
922 # always copy:
901 # always copy:
923 return list(self._ids)
902 return list(self._ids)
924
903
925 def activate(self, targets='all', suffix=''):
904 def activate(self, targets='all', suffix=''):
926 """Create a DirectView and register it with IPython magics
905 """Create a DirectView and register it with IPython magics
927
906
928 Defines the magics `%px, %autopx, %pxresult, %%px`
907 Defines the magics `%px, %autopx, %pxresult, %%px`
929
908
930 Parameters
909 Parameters
931 ----------
910 ----------
932
911
933 targets: int, list of ints, or 'all'
912 targets: int, list of ints, or 'all'
934 The engines on which the view's magics will run
913 The engines on which the view's magics will run
935 suffix: str [default: '']
914 suffix: str [default: '']
936 The suffix, if any, for the magics. This allows you to have
915 The suffix, if any, for the magics. This allows you to have
937 multiple views associated with parallel magics at the same time.
916 multiple views associated with parallel magics at the same time.
938
917
939 e.g. ``rc.activate(targets=0, suffix='0')`` will give you
918 e.g. ``rc.activate(targets=0, suffix='0')`` will give you
940 the magics ``%px0``, ``%pxresult0``, etc. for running magics just
919 the magics ``%px0``, ``%pxresult0``, etc. for running magics just
941 on engine 0.
920 on engine 0.
942 """
921 """
943 view = self.direct_view(targets)
922 view = self.direct_view(targets)
944 view.block = True
923 view.block = True
945 view.activate(suffix)
924 view.activate(suffix)
946 return view
925 return view
947
926
948 def close(self):
927 def close(self):
949 if self._closed:
928 if self._closed:
950 return
929 return
951 self.stop_spin_thread()
930 self.stop_spin_thread()
952 snames = filter(lambda n: n.endswith('socket'), dir(self))
931 snames = filter(lambda n: n.endswith('socket'), dir(self))
953 for socket in map(lambda name: getattr(self, name), snames):
932 for socket in map(lambda name: getattr(self, name), snames):
954 if isinstance(socket, zmq.Socket) and not socket.closed:
933 if isinstance(socket, zmq.Socket) and not socket.closed:
955 socket.close()
934 socket.close()
956 self._closed = True
935 self._closed = True
957
936
958 def _spin_every(self, interval=1):
937 def _spin_every(self, interval=1):
959 """target func for use in spin_thread"""
938 """target func for use in spin_thread"""
960 while True:
939 while True:
961 if self._stop_spinning.is_set():
940 if self._stop_spinning.is_set():
962 return
941 return
963 time.sleep(interval)
942 time.sleep(interval)
964 self.spin()
943 self.spin()
965
944
966 def spin_thread(self, interval=1):
945 def spin_thread(self, interval=1):
967 """call Client.spin() in a background thread on some regular interval
946 """call Client.spin() in a background thread on some regular interval
968
947
969 This helps ensure that messages don't pile up too much in the zmq queue
948 This helps ensure that messages don't pile up too much in the zmq queue
970 while you are working on other things, or just leaving an idle terminal.
949 while you are working on other things, or just leaving an idle terminal.
971
950
972 It also helps limit potential padding of the `received` timestamp
951 It also helps limit potential padding of the `received` timestamp
973 on AsyncResult objects, used for timings.
952 on AsyncResult objects, used for timings.
974
953
975 Parameters
954 Parameters
976 ----------
955 ----------
977
956
978 interval : float, optional
957 interval : float, optional
979 The interval on which to spin the client in the background thread
958 The interval on which to spin the client in the background thread
980 (simply passed to time.sleep).
959 (simply passed to time.sleep).
981
960
982 Notes
961 Notes
983 -----
962 -----
984
963
985 For precision timing, you may want to use this method to put a bound
964 For precision timing, you may want to use this method to put a bound
986 on the jitter (in seconds) in `received` timestamps used
965 on the jitter (in seconds) in `received` timestamps used
987 in AsyncResult.wall_time.
966 in AsyncResult.wall_time.
988
967
989 """
968 """
990 if self._spin_thread is not None:
969 if self._spin_thread is not None:
991 self.stop_spin_thread()
970 self.stop_spin_thread()
992 self._stop_spinning.clear()
971 self._stop_spinning.clear()
993 self._spin_thread = Thread(target=self._spin_every, args=(interval,))
972 self._spin_thread = Thread(target=self._spin_every, args=(interval,))
994 self._spin_thread.daemon = True
973 self._spin_thread.daemon = True
995 self._spin_thread.start()
974 self._spin_thread.start()
996
975
997 def stop_spin_thread(self):
976 def stop_spin_thread(self):
998 """stop background spin_thread, if any"""
977 """stop background spin_thread, if any"""
999 if self._spin_thread is not None:
978 if self._spin_thread is not None:
1000 self._stop_spinning.set()
979 self._stop_spinning.set()
1001 self._spin_thread.join()
980 self._spin_thread.join()
1002 self._spin_thread = None
981 self._spin_thread = None
1003
982
1004 def spin(self):
983 def spin(self):
1005 """Flush any registration notifications and execution results
984 """Flush any registration notifications and execution results
1006 waiting in the ZMQ queue.
985 waiting in the ZMQ queue.
1007 """
986 """
1008 if self._notification_socket:
987 if self._notification_socket:
1009 self._flush_notifications()
988 self._flush_notifications()
1010 if self._iopub_socket:
989 if self._iopub_socket:
1011 self._flush_iopub(self._iopub_socket)
990 self._flush_iopub(self._iopub_socket)
1012 if self._mux_socket:
991 if self._mux_socket:
1013 self._flush_results(self._mux_socket)
992 self._flush_results(self._mux_socket)
1014 if self._task_socket:
993 if self._task_socket:
1015 self._flush_results(self._task_socket)
994 self._flush_results(self._task_socket)
1016 if self._control_socket:
995 if self._control_socket:
1017 self._flush_control(self._control_socket)
996 self._flush_control(self._control_socket)
1018 if self._query_socket:
997 if self._query_socket:
1019 self._flush_ignored_hub_replies()
998 self._flush_ignored_hub_replies()
1020
999
1021 def wait(self, jobs=None, timeout=-1):
1000 def wait(self, jobs=None, timeout=-1):
1022 """waits on one or more `jobs`, for up to `timeout` seconds.
1001 """waits on one or more `jobs`, for up to `timeout` seconds.
1023
1002
1024 Parameters
1003 Parameters
1025 ----------
1004 ----------
1026
1005
1027 jobs : int, str, or list of ints and/or strs, or one or more AsyncResult objects
1006 jobs : int, str, or list of ints and/or strs, or one or more AsyncResult objects
1028 ints are indices to self.history
1007 ints are indices to self.history
1029 strs are msg_ids
1008 strs are msg_ids
1030 default: wait on all outstanding messages
1009 default: wait on all outstanding messages
1031 timeout : float
1010 timeout : float
1032 a time in seconds, after which to give up.
1011 a time in seconds, after which to give up.
1033 default is -1, which means no timeout
1012 default is -1, which means no timeout
1034
1013
1035 Returns
1014 Returns
1036 -------
1015 -------
1037
1016
1038 True : when all msg_ids are done
1017 True : when all msg_ids are done
1039 False : timeout reached, some msg_ids still outstanding
1018 False : timeout reached, some msg_ids still outstanding
1040 """
1019 """
1041 tic = time.time()
1020 tic = time.time()
1042 if jobs is None:
1021 if jobs is None:
1043 theids = self.outstanding
1022 theids = self.outstanding
1044 else:
1023 else:
1045 if isinstance(jobs, (int, basestring, AsyncResult)):
1024 if isinstance(jobs, (int, basestring, AsyncResult)):
1046 jobs = [jobs]
1025 jobs = [jobs]
1047 theids = set()
1026 theids = set()
1048 for job in jobs:
1027 for job in jobs:
1049 if isinstance(job, int):
1028 if isinstance(job, int):
1050 # index access
1029 # index access
1051 job = self.history[job]
1030 job = self.history[job]
1052 elif isinstance(job, AsyncResult):
1031 elif isinstance(job, AsyncResult):
1053 map(theids.add, job.msg_ids)
1032 map(theids.add, job.msg_ids)
1054 continue
1033 continue
1055 theids.add(job)
1034 theids.add(job)
1056 if not theids.intersection(self.outstanding):
1035 if not theids.intersection(self.outstanding):
1057 return True
1036 return True
1058 self.spin()
1037 self.spin()
1059 while theids.intersection(self.outstanding):
1038 while theids.intersection(self.outstanding):
1060 if timeout >= 0 and ( time.time()-tic ) > timeout:
1039 if timeout >= 0 and ( time.time()-tic ) > timeout:
1061 break
1040 break
1062 time.sleep(1e-3)
1041 time.sleep(1e-3)
1063 self.spin()
1042 self.spin()
1064 return len(theids.intersection(self.outstanding)) == 0
1043 return len(theids.intersection(self.outstanding)) == 0
1065
1044
1066 #--------------------------------------------------------------------------
1045 #--------------------------------------------------------------------------
1067 # Control methods
1046 # Control methods
1068 #--------------------------------------------------------------------------
1047 #--------------------------------------------------------------------------
1069
1048
1070 @spin_first
1049 @spin_first
1071 def clear(self, targets=None, block=None):
1050 def clear(self, targets=None, block=None):
1072 """Clear the namespace in target(s)."""
1051 """Clear the namespace in target(s)."""
1073 block = self.block if block is None else block
1052 block = self.block if block is None else block
1074 targets = self._build_targets(targets)[0]
1053 targets = self._build_targets(targets)[0]
1075 for t in targets:
1054 for t in targets:
1076 self.session.send(self._control_socket, 'clear_request', content={}, ident=t)
1055 self.session.send(self._control_socket, 'clear_request', content={}, ident=t)
1077 error = False
1056 error = False
1078 if block:
1057 if block:
1079 self._flush_ignored_control()
1058 self._flush_ignored_control()
1080 for i in range(len(targets)):
1059 for i in range(len(targets)):
1081 idents,msg = self.session.recv(self._control_socket,0)
1060 idents,msg = self.session.recv(self._control_socket,0)
1082 if self.debug:
1061 if self.debug:
1083 pprint(msg)
1062 pprint(msg)
1084 if msg['content']['status'] != 'ok':
1063 if msg['content']['status'] != 'ok':
1085 error = self._unwrap_exception(msg['content'])
1064 error = self._unwrap_exception(msg['content'])
1086 else:
1065 else:
1087 self._ignored_control_replies += len(targets)
1066 self._ignored_control_replies += len(targets)
1088 if error:
1067 if error:
1089 raise error
1068 raise error
1090
1069
1091
1070
1092 @spin_first
1071 @spin_first
1093 def abort(self, jobs=None, targets=None, block=None):
1072 def abort(self, jobs=None, targets=None, block=None):
1094 """Abort specific jobs from the execution queues of target(s).
1073 """Abort specific jobs from the execution queues of target(s).
1095
1074
1096 This is a mechanism to prevent jobs that have already been submitted
1075 This is a mechanism to prevent jobs that have already been submitted
1097 from executing.
1076 from executing.
1098
1077
1099 Parameters
1078 Parameters
1100 ----------
1079 ----------
1101
1080
1102 jobs : msg_id, list of msg_ids, or AsyncResult
1081 jobs : msg_id, list of msg_ids, or AsyncResult
1103 The jobs to be aborted
1082 The jobs to be aborted
1104
1083
1105 If unspecified/None: abort all outstanding jobs.
1084 If unspecified/None: abort all outstanding jobs.
1106
1085
1107 """
1086 """
1108 block = self.block if block is None else block
1087 block = self.block if block is None else block
1109 jobs = jobs if jobs is not None else list(self.outstanding)
1088 jobs = jobs if jobs is not None else list(self.outstanding)
1110 targets = self._build_targets(targets)[0]
1089 targets = self._build_targets(targets)[0]
1111
1090
1112 msg_ids = []
1091 msg_ids = []
1113 if isinstance(jobs, (basestring,AsyncResult)):
1092 if isinstance(jobs, (basestring,AsyncResult)):
1114 jobs = [jobs]
1093 jobs = [jobs]
1115 bad_ids = filter(lambda obj: not isinstance(obj, (basestring, AsyncResult)), jobs)
1094 bad_ids = filter(lambda obj: not isinstance(obj, (basestring, AsyncResult)), jobs)
1116 if bad_ids:
1095 if bad_ids:
1117 raise TypeError("Invalid msg_id type %r, expected str or AsyncResult"%bad_ids[0])
1096 raise TypeError("Invalid msg_id type %r, expected str or AsyncResult"%bad_ids[0])
1118 for j in jobs:
1097 for j in jobs:
1119 if isinstance(j, AsyncResult):
1098 if isinstance(j, AsyncResult):
1120 msg_ids.extend(j.msg_ids)
1099 msg_ids.extend(j.msg_ids)
1121 else:
1100 else:
1122 msg_ids.append(j)
1101 msg_ids.append(j)
1123 content = dict(msg_ids=msg_ids)
1102 content = dict(msg_ids=msg_ids)
1124 for t in targets:
1103 for t in targets:
1125 self.session.send(self._control_socket, 'abort_request',
1104 self.session.send(self._control_socket, 'abort_request',
1126 content=content, ident=t)
1105 content=content, ident=t)
1127 error = False
1106 error = False
1128 if block:
1107 if block:
1129 self._flush_ignored_control()
1108 self._flush_ignored_control()
1130 for i in range(len(targets)):
1109 for i in range(len(targets)):
1131 idents,msg = self.session.recv(self._control_socket,0)
1110 idents,msg = self.session.recv(self._control_socket,0)
1132 if self.debug:
1111 if self.debug:
1133 pprint(msg)
1112 pprint(msg)
1134 if msg['content']['status'] != 'ok':
1113 if msg['content']['status'] != 'ok':
1135 error = self._unwrap_exception(msg['content'])
1114 error = self._unwrap_exception(msg['content'])
1136 else:
1115 else:
1137 self._ignored_control_replies += len(targets)
1116 self._ignored_control_replies += len(targets)
1138 if error:
1117 if error:
1139 raise error
1118 raise error
1140
1119
1141 @spin_first
1120 @spin_first
1142 def shutdown(self, targets='all', restart=False, hub=False, block=None):
1121 def shutdown(self, targets='all', restart=False, hub=False, block=None):
1143 """Terminates one or more engine processes, optionally including the hub.
1122 """Terminates one or more engine processes, optionally including the hub.
1144
1123
1145 Parameters
1124 Parameters
1146 ----------
1125 ----------
1147
1126
1148 targets: list of ints or 'all' [default: all]
1127 targets: list of ints or 'all' [default: all]
1149 Which engines to shutdown.
1128 Which engines to shutdown.
1150 hub: bool [default: False]
1129 hub: bool [default: False]
1151 Whether to include the Hub. hub=True implies targets='all'.
1130 Whether to include the Hub. hub=True implies targets='all'.
1152 block: bool [default: self.block]
1131 block: bool [default: self.block]
1153 Whether to wait for clean shutdown replies or not.
1132 Whether to wait for clean shutdown replies or not.
1154 restart: bool [default: False]
1133 restart: bool [default: False]
1155 NOT IMPLEMENTED
1134 NOT IMPLEMENTED
1156 whether to restart engines after shutting them down.
1135 whether to restart engines after shutting them down.
1157 """
1136 """
1158
1137
1159 if restart:
1138 if restart:
1160 raise NotImplementedError("Engine restart is not yet implemented")
1139 raise NotImplementedError("Engine restart is not yet implemented")
1161
1140
1162 block = self.block if block is None else block
1141 block = self.block if block is None else block
1163 if hub:
1142 if hub:
1164 targets = 'all'
1143 targets = 'all'
1165 targets = self._build_targets(targets)[0]
1144 targets = self._build_targets(targets)[0]
1166 for t in targets:
1145 for t in targets:
1167 self.session.send(self._control_socket, 'shutdown_request',
1146 self.session.send(self._control_socket, 'shutdown_request',
1168 content={'restart':restart},ident=t)
1147 content={'restart':restart},ident=t)
1169 error = False
1148 error = False
1170 if block or hub:
1149 if block or hub:
1171 self._flush_ignored_control()
1150 self._flush_ignored_control()
1172 for i in range(len(targets)):
1151 for i in range(len(targets)):
1173 idents,msg = self.session.recv(self._control_socket, 0)
1152 idents,msg = self.session.recv(self._control_socket, 0)
1174 if self.debug:
1153 if self.debug:
1175 pprint(msg)
1154 pprint(msg)
1176 if msg['content']['status'] != 'ok':
1155 if msg['content']['status'] != 'ok':
1177 error = self._unwrap_exception(msg['content'])
1156 error = self._unwrap_exception(msg['content'])
1178 else:
1157 else:
1179 self._ignored_control_replies += len(targets)
1158 self._ignored_control_replies += len(targets)
1180
1159
1181 if hub:
1160 if hub:
1182 time.sleep(0.25)
1161 time.sleep(0.25)
1183 self.session.send(self._query_socket, 'shutdown_request')
1162 self.session.send(self._query_socket, 'shutdown_request')
1184 idents,msg = self.session.recv(self._query_socket, 0)
1163 idents,msg = self.session.recv(self._query_socket, 0)
1185 if self.debug:
1164 if self.debug:
1186 pprint(msg)
1165 pprint(msg)
1187 if msg['content']['status'] != 'ok':
1166 if msg['content']['status'] != 'ok':
1188 error = self._unwrap_exception(msg['content'])
1167 error = self._unwrap_exception(msg['content'])
1189
1168
1190 if error:
1169 if error:
1191 raise error
1170 raise error
1192
1171
1193 #--------------------------------------------------------------------------
1172 #--------------------------------------------------------------------------
1194 # Execution related methods
1173 # Execution related methods
1195 #--------------------------------------------------------------------------
1174 #--------------------------------------------------------------------------
1196
1175
1197 def _maybe_raise(self, result):
1176 def _maybe_raise(self, result):
1198 """wrapper for maybe raising an exception if apply failed."""
1177 """wrapper for maybe raising an exception if apply failed."""
1199 if isinstance(result, error.RemoteError):
1178 if isinstance(result, error.RemoteError):
1200 raise result
1179 raise result
1201
1180
1202 return result
1181 return result
1203
1182
1204 def send_apply_request(self, socket, f, args=None, kwargs=None, subheader=None, track=False,
1183 def send_apply_request(self, socket, f, args=None, kwargs=None, subheader=None, track=False,
1205 ident=None):
1184 ident=None):
1206 """construct and send an apply message via a socket.
1185 """construct and send an apply message via a socket.
1207
1186
1208 This is the principal method with which all engine execution is performed by views.
1187 This is the principal method with which all engine execution is performed by views.
1209 """
1188 """
1210
1189
1211 if self._closed:
1190 if self._closed:
1212 raise RuntimeError("Client cannot be used after its sockets have been closed")
1191 raise RuntimeError("Client cannot be used after its sockets have been closed")
1213
1192
1214 # defaults:
1193 # defaults:
1215 args = args if args is not None else []
1194 args = args if args is not None else []
1216 kwargs = kwargs if kwargs is not None else {}
1195 kwargs = kwargs if kwargs is not None else {}
1217 subheader = subheader if subheader is not None else {}
1196 subheader = subheader if subheader is not None else {}
1218
1197
1219 # validate arguments
1198 # validate arguments
1220 if not callable(f) and not isinstance(f, Reference):
1199 if not callable(f) and not isinstance(f, Reference):
1221 raise TypeError("f must be callable, not %s"%type(f))
1200 raise TypeError("f must be callable, not %s"%type(f))
1222 if not isinstance(args, (tuple, list)):
1201 if not isinstance(args, (tuple, list)):
1223 raise TypeError("args must be tuple or list, not %s"%type(args))
1202 raise TypeError("args must be tuple or list, not %s"%type(args))
1224 if not isinstance(kwargs, dict):
1203 if not isinstance(kwargs, dict):
1225 raise TypeError("kwargs must be dict, not %s"%type(kwargs))
1204 raise TypeError("kwargs must be dict, not %s"%type(kwargs))
1226 if not isinstance(subheader, dict):
1205 if not isinstance(subheader, dict):
1227 raise TypeError("subheader must be dict, not %s"%type(subheader))
1206 raise TypeError("subheader must be dict, not %s"%type(subheader))
1228
1207
1229 bufs = util.pack_apply_message(f,args,kwargs)
1208 bufs = util.pack_apply_message(f,args,kwargs)
1230
1209
1231 msg = self.session.send(socket, "apply_request", buffers=bufs, ident=ident,
1210 msg = self.session.send(socket, "apply_request", buffers=bufs, ident=ident,
1232 subheader=subheader, track=track)
1211 subheader=subheader, track=track)
1233
1212
1234 msg_id = msg['header']['msg_id']
1213 msg_id = msg['header']['msg_id']
1235 self.outstanding.add(msg_id)
1214 self.outstanding.add(msg_id)
1236 if ident:
1215 if ident:
1237 # possibly routed to a specific engine
1216 # possibly routed to a specific engine
1238 if isinstance(ident, list):
1217 if isinstance(ident, list):
1239 ident = ident[-1]
1218 ident = ident[-1]
1240 if ident in self._engines.values():
1219 if ident in self._engines.values():
1241 # save for later, in case of engine death
1220 # save for later, in case of engine death
1242 self._outstanding_dict[ident].add(msg_id)
1221 self._outstanding_dict[ident].add(msg_id)
1243 self.history.append(msg_id)
1222 self.history.append(msg_id)
1244 self.metadata[msg_id]['submitted'] = datetime.now()
1223 self.metadata[msg_id]['submitted'] = datetime.now()
1245
1224
1246 return msg
1225 return msg
1247
1226
1248 def send_execute_request(self, socket, code, silent=True, subheader=None, ident=None):
1227 def send_execute_request(self, socket, code, silent=True, subheader=None, ident=None):
1249 """construct and send an execute request via a socket.
1228 """construct and send an execute request via a socket.
1250
1229
1251 """
1230 """
1252
1231
1253 if self._closed:
1232 if self._closed:
1254 raise RuntimeError("Client cannot be used after its sockets have been closed")
1233 raise RuntimeError("Client cannot be used after its sockets have been closed")
1255
1234
1256 # defaults:
1235 # defaults:
1257 subheader = subheader if subheader is not None else {}
1236 subheader = subheader if subheader is not None else {}
1258
1237
1259 # validate arguments
1238 # validate arguments
1260 if not isinstance(code, basestring):
1239 if not isinstance(code, basestring):
1261 raise TypeError("code must be text, not %s" % type(code))
1240 raise TypeError("code must be text, not %s" % type(code))
1262 if not isinstance(subheader, dict):
1241 if not isinstance(subheader, dict):
1263 raise TypeError("subheader must be dict, not %s" % type(subheader))
1242 raise TypeError("subheader must be dict, not %s" % type(subheader))
1264
1243
1265 content = dict(code=code, silent=bool(silent), user_variables=[], user_expressions={})
1244 content = dict(code=code, silent=bool(silent), user_variables=[], user_expressions={})
1266
1245
1267
1246
1268 msg = self.session.send(socket, "execute_request", content=content, ident=ident,
1247 msg = self.session.send(socket, "execute_request", content=content, ident=ident,
1269 subheader=subheader)
1248 subheader=subheader)
1270
1249
1271 msg_id = msg['header']['msg_id']
1250 msg_id = msg['header']['msg_id']
1272 self.outstanding.add(msg_id)
1251 self.outstanding.add(msg_id)
1273 if ident:
1252 if ident:
1274 # possibly routed to a specific engine
1253 # possibly routed to a specific engine
1275 if isinstance(ident, list):
1254 if isinstance(ident, list):
1276 ident = ident[-1]
1255 ident = ident[-1]
1277 if ident in self._engines.values():
1256 if ident in self._engines.values():
1278 # save for later, in case of engine death
1257 # save for later, in case of engine death
1279 self._outstanding_dict[ident].add(msg_id)
1258 self._outstanding_dict[ident].add(msg_id)
1280 self.history.append(msg_id)
1259 self.history.append(msg_id)
1281 self.metadata[msg_id]['submitted'] = datetime.now()
1260 self.metadata[msg_id]['submitted'] = datetime.now()
1282
1261
1283 return msg
1262 return msg
1284
1263
1285 #--------------------------------------------------------------------------
1264 #--------------------------------------------------------------------------
1286 # construct a View object
1265 # construct a View object
1287 #--------------------------------------------------------------------------
1266 #--------------------------------------------------------------------------
1288
1267
1289 def load_balanced_view(self, targets=None):
1268 def load_balanced_view(self, targets=None):
1290 """construct a DirectView object.
1269 """construct a DirectView object.
1291
1270
1292 If no arguments are specified, create a LoadBalancedView
1271 If no arguments are specified, create a LoadBalancedView
1293 using all engines.
1272 using all engines.
1294
1273
1295 Parameters
1274 Parameters
1296 ----------
1275 ----------
1297
1276
1298 targets: list,slice,int,etc. [default: use all engines]
1277 targets: list,slice,int,etc. [default: use all engines]
1299 The subset of engines across which to load-balance
1278 The subset of engines across which to load-balance
1300 """
1279 """
1301 if targets == 'all':
1280 if targets == 'all':
1302 targets = None
1281 targets = None
1303 if targets is not None:
1282 if targets is not None:
1304 targets = self._build_targets(targets)[1]
1283 targets = self._build_targets(targets)[1]
1305 return LoadBalancedView(client=self, socket=self._task_socket, targets=targets)
1284 return LoadBalancedView(client=self, socket=self._task_socket, targets=targets)
1306
1285
1307 def direct_view(self, targets='all'):
1286 def direct_view(self, targets='all'):
1308 """construct a DirectView object.
1287 """construct a DirectView object.
1309
1288
1310 If no targets are specified, create a DirectView using all engines.
1289 If no targets are specified, create a DirectView using all engines.
1311
1290
1312 rc.direct_view('all') is distinguished from rc[:] in that 'all' will
1291 rc.direct_view('all') is distinguished from rc[:] in that 'all' will
1313 evaluate the target engines at each execution, whereas rc[:] will connect to
1292 evaluate the target engines at each execution, whereas rc[:] will connect to
1314 all *current* engines, and that list will not change.
1293 all *current* engines, and that list will not change.
1315
1294
1316 That is, 'all' will always use all engines, whereas rc[:] will not use
1295 That is, 'all' will always use all engines, whereas rc[:] will not use
1317 engines added after the DirectView is constructed.
1296 engines added after the DirectView is constructed.
1318
1297
1319 Parameters
1298 Parameters
1320 ----------
1299 ----------
1321
1300
1322 targets: list,slice,int,etc. [default: use all engines]
1301 targets: list,slice,int,etc. [default: use all engines]
1323 The engines to use for the View
1302 The engines to use for the View
1324 """
1303 """
1325 single = isinstance(targets, int)
1304 single = isinstance(targets, int)
1326 # allow 'all' to be lazily evaluated at each execution
1305 # allow 'all' to be lazily evaluated at each execution
1327 if targets != 'all':
1306 if targets != 'all':
1328 targets = self._build_targets(targets)[1]
1307 targets = self._build_targets(targets)[1]
1329 if single:
1308 if single:
1330 targets = targets[0]
1309 targets = targets[0]
1331 return DirectView(client=self, socket=self._mux_socket, targets=targets)
1310 return DirectView(client=self, socket=self._mux_socket, targets=targets)
1332
1311
1333 #--------------------------------------------------------------------------
1312 #--------------------------------------------------------------------------
1334 # Query methods
1313 # Query methods
1335 #--------------------------------------------------------------------------
1314 #--------------------------------------------------------------------------
1336
1315
1337 @spin_first
1316 @spin_first
1338 def get_result(self, indices_or_msg_ids=None, block=None):
1317 def get_result(self, indices_or_msg_ids=None, block=None):
1339 """Retrieve a result by msg_id or history index, wrapped in an AsyncResult object.
1318 """Retrieve a result by msg_id or history index, wrapped in an AsyncResult object.
1340
1319
1341 If the client already has the results, no request to the Hub will be made.
1320 If the client already has the results, no request to the Hub will be made.
1342
1321
1343 This is a convenient way to construct AsyncResult objects, which are wrappers
1322 This is a convenient way to construct AsyncResult objects, which are wrappers
1344 that include metadata about execution, and allow for awaiting results that
1323 that include metadata about execution, and allow for awaiting results that
1345 were not submitted by this Client.
1324 were not submitted by this Client.
1346
1325
1347 It can also be a convenient way to retrieve the metadata associated with
1326 It can also be a convenient way to retrieve the metadata associated with
1348 blocking execution, since it always retrieves
1327 blocking execution, since it always retrieves
1349
1328
1350 Examples
1329 Examples
1351 --------
1330 --------
1352 ::
1331 ::
1353
1332
1354 In [10]: r = client.apply()
1333 In [10]: r = client.apply()
1355
1334
1356 Parameters
1335 Parameters
1357 ----------
1336 ----------
1358
1337
1359 indices_or_msg_ids : integer history index, str msg_id, or list of either
1338 indices_or_msg_ids : integer history index, str msg_id, or list of either
1360 The indices or msg_ids of indices to be retrieved
1339 The indices or msg_ids of indices to be retrieved
1361
1340
1362 block : bool
1341 block : bool
1363 Whether to wait for the result to be done
1342 Whether to wait for the result to be done
1364
1343
1365 Returns
1344 Returns
1366 -------
1345 -------
1367
1346
1368 AsyncResult
1347 AsyncResult
1369 A single AsyncResult object will always be returned.
1348 A single AsyncResult object will always be returned.
1370
1349
1371 AsyncHubResult
1350 AsyncHubResult
1372 A subclass of AsyncResult that retrieves results from the Hub
1351 A subclass of AsyncResult that retrieves results from the Hub
1373
1352
1374 """
1353 """
1375 block = self.block if block is None else block
1354 block = self.block if block is None else block
1376 if indices_or_msg_ids is None:
1355 if indices_or_msg_ids is None:
1377 indices_or_msg_ids = -1
1356 indices_or_msg_ids = -1
1378
1357
1379 if not isinstance(indices_or_msg_ids, (list,tuple)):
1358 if not isinstance(indices_or_msg_ids, (list,tuple)):
1380 indices_or_msg_ids = [indices_or_msg_ids]
1359 indices_or_msg_ids = [indices_or_msg_ids]
1381
1360
1382 theids = []
1361 theids = []
1383 for id in indices_or_msg_ids:
1362 for id in indices_or_msg_ids:
1384 if isinstance(id, int):
1363 if isinstance(id, int):
1385 id = self.history[id]
1364 id = self.history[id]
1386 if not isinstance(id, basestring):
1365 if not isinstance(id, basestring):
1387 raise TypeError("indices must be str or int, not %r"%id)
1366 raise TypeError("indices must be str or int, not %r"%id)
1388 theids.append(id)
1367 theids.append(id)
1389
1368
1390 local_ids = filter(lambda msg_id: msg_id in self.history or msg_id in self.results, theids)
1369 local_ids = filter(lambda msg_id: msg_id in self.history or msg_id in self.results, theids)
1391 remote_ids = filter(lambda msg_id: msg_id not in local_ids, theids)
1370 remote_ids = filter(lambda msg_id: msg_id not in local_ids, theids)
1392
1371
1393 if remote_ids:
1372 if remote_ids:
1394 ar = AsyncHubResult(self, msg_ids=theids)
1373 ar = AsyncHubResult(self, msg_ids=theids)
1395 else:
1374 else:
1396 ar = AsyncResult(self, msg_ids=theids)
1375 ar = AsyncResult(self, msg_ids=theids)
1397
1376
1398 if block:
1377 if block:
1399 ar.wait()
1378 ar.wait()
1400
1379
1401 return ar
1380 return ar
1402
1381
1403 @spin_first
1382 @spin_first
1404 def resubmit(self, indices_or_msg_ids=None, subheader=None, block=None):
1383 def resubmit(self, indices_or_msg_ids=None, subheader=None, block=None):
1405 """Resubmit one or more tasks.
1384 """Resubmit one or more tasks.
1406
1385
1407 in-flight tasks may not be resubmitted.
1386 in-flight tasks may not be resubmitted.
1408
1387
1409 Parameters
1388 Parameters
1410 ----------
1389 ----------
1411
1390
1412 indices_or_msg_ids : integer history index, str msg_id, or list of either
1391 indices_or_msg_ids : integer history index, str msg_id, or list of either
1413 The indices or msg_ids of indices to be retrieved
1392 The indices or msg_ids of indices to be retrieved
1414
1393
1415 block : bool
1394 block : bool
1416 Whether to wait for the result to be done
1395 Whether to wait for the result to be done
1417
1396
1418 Returns
1397 Returns
1419 -------
1398 -------
1420
1399
1421 AsyncHubResult
1400 AsyncHubResult
1422 A subclass of AsyncResult that retrieves results from the Hub
1401 A subclass of AsyncResult that retrieves results from the Hub
1423
1402
1424 """
1403 """
1425 block = self.block if block is None else block
1404 block = self.block if block is None else block
1426 if indices_or_msg_ids is None:
1405 if indices_or_msg_ids is None:
1427 indices_or_msg_ids = -1
1406 indices_or_msg_ids = -1
1428
1407
1429 if not isinstance(indices_or_msg_ids, (list,tuple)):
1408 if not isinstance(indices_or_msg_ids, (list,tuple)):
1430 indices_or_msg_ids = [indices_or_msg_ids]
1409 indices_or_msg_ids = [indices_or_msg_ids]
1431
1410
1432 theids = []
1411 theids = []
1433 for id in indices_or_msg_ids:
1412 for id in indices_or_msg_ids:
1434 if isinstance(id, int):
1413 if isinstance(id, int):
1435 id = self.history[id]
1414 id = self.history[id]
1436 if not isinstance(id, basestring):
1415 if not isinstance(id, basestring):
1437 raise TypeError("indices must be str or int, not %r"%id)
1416 raise TypeError("indices must be str or int, not %r"%id)
1438 theids.append(id)
1417 theids.append(id)
1439
1418
1440 content = dict(msg_ids = theids)
1419 content = dict(msg_ids = theids)
1441
1420
1442 self.session.send(self._query_socket, 'resubmit_request', content)
1421 self.session.send(self._query_socket, 'resubmit_request', content)
1443
1422
1444 zmq.select([self._query_socket], [], [])
1423 zmq.select([self._query_socket], [], [])
1445 idents,msg = self.session.recv(self._query_socket, zmq.NOBLOCK)
1424 idents,msg = self.session.recv(self._query_socket, zmq.NOBLOCK)
1446 if self.debug:
1425 if self.debug:
1447 pprint(msg)
1426 pprint(msg)
1448 content = msg['content']
1427 content = msg['content']
1449 if content['status'] != 'ok':
1428 if content['status'] != 'ok':
1450 raise self._unwrap_exception(content)
1429 raise self._unwrap_exception(content)
1451 mapping = content['resubmitted']
1430 mapping = content['resubmitted']
1452 new_ids = [ mapping[msg_id] for msg_id in theids ]
1431 new_ids = [ mapping[msg_id] for msg_id in theids ]
1453
1432
1454 ar = AsyncHubResult(self, msg_ids=new_ids)
1433 ar = AsyncHubResult(self, msg_ids=new_ids)
1455
1434
1456 if block:
1435 if block:
1457 ar.wait()
1436 ar.wait()
1458
1437
1459 return ar
1438 return ar
1460
1439
1461 @spin_first
1440 @spin_first
1462 def result_status(self, msg_ids, status_only=True):
1441 def result_status(self, msg_ids, status_only=True):
1463 """Check on the status of the result(s) of the apply request with `msg_ids`.
1442 """Check on the status of the result(s) of the apply request with `msg_ids`.
1464
1443
1465 If status_only is False, then the actual results will be retrieved, else
1444 If status_only is False, then the actual results will be retrieved, else
1466 only the status of the results will be checked.
1445 only the status of the results will be checked.
1467
1446
1468 Parameters
1447 Parameters
1469 ----------
1448 ----------
1470
1449
1471 msg_ids : list of msg_ids
1450 msg_ids : list of msg_ids
1472 if int:
1451 if int:
1473 Passed as index to self.history for convenience.
1452 Passed as index to self.history for convenience.
1474 status_only : bool (default: True)
1453 status_only : bool (default: True)
1475 if False:
1454 if False:
1476 Retrieve the actual results of completed tasks.
1455 Retrieve the actual results of completed tasks.
1477
1456
1478 Returns
1457 Returns
1479 -------
1458 -------
1480
1459
1481 results : dict
1460 results : dict
1482 There will always be the keys 'pending' and 'completed', which will
1461 There will always be the keys 'pending' and 'completed', which will
1483 be lists of msg_ids that are incomplete or complete. If `status_only`
1462 be lists of msg_ids that are incomplete or complete. If `status_only`
1484 is False, then completed results will be keyed by their `msg_id`.
1463 is False, then completed results will be keyed by their `msg_id`.
1485 """
1464 """
1486 if not isinstance(msg_ids, (list,tuple)):
1465 if not isinstance(msg_ids, (list,tuple)):
1487 msg_ids = [msg_ids]
1466 msg_ids = [msg_ids]
1488
1467
1489 theids = []
1468 theids = []
1490 for msg_id in msg_ids:
1469 for msg_id in msg_ids:
1491 if isinstance(msg_id, int):
1470 if isinstance(msg_id, int):
1492 msg_id = self.history[msg_id]
1471 msg_id = self.history[msg_id]
1493 if not isinstance(msg_id, basestring):
1472 if not isinstance(msg_id, basestring):
1494 raise TypeError("msg_ids must be str, not %r"%msg_id)
1473 raise TypeError("msg_ids must be str, not %r"%msg_id)
1495 theids.append(msg_id)
1474 theids.append(msg_id)
1496
1475
1497 completed = []
1476 completed = []
1498 local_results = {}
1477 local_results = {}
1499
1478
1500 # comment this block out to temporarily disable local shortcut:
1479 # comment this block out to temporarily disable local shortcut:
1501 for msg_id in theids:
1480 for msg_id in theids:
1502 if msg_id in self.results:
1481 if msg_id in self.results:
1503 completed.append(msg_id)
1482 completed.append(msg_id)
1504 local_results[msg_id] = self.results[msg_id]
1483 local_results[msg_id] = self.results[msg_id]
1505 theids.remove(msg_id)
1484 theids.remove(msg_id)
1506
1485
1507 if theids: # some not locally cached
1486 if theids: # some not locally cached
1508 content = dict(msg_ids=theids, status_only=status_only)
1487 content = dict(msg_ids=theids, status_only=status_only)
1509 msg = self.session.send(self._query_socket, "result_request", content=content)
1488 msg = self.session.send(self._query_socket, "result_request", content=content)
1510 zmq.select([self._query_socket], [], [])
1489 zmq.select([self._query_socket], [], [])
1511 idents,msg = self.session.recv(self._query_socket, zmq.NOBLOCK)
1490 idents,msg = self.session.recv(self._query_socket, zmq.NOBLOCK)
1512 if self.debug:
1491 if self.debug:
1513 pprint(msg)
1492 pprint(msg)
1514 content = msg['content']
1493 content = msg['content']
1515 if content['status'] != 'ok':
1494 if content['status'] != 'ok':
1516 raise self._unwrap_exception(content)
1495 raise self._unwrap_exception(content)
1517 buffers = msg['buffers']
1496 buffers = msg['buffers']
1518 else:
1497 else:
1519 content = dict(completed=[],pending=[])
1498 content = dict(completed=[],pending=[])
1520
1499
1521 content['completed'].extend(completed)
1500 content['completed'].extend(completed)
1522
1501
1523 if status_only:
1502 if status_only:
1524 return content
1503 return content
1525
1504
1526 failures = []
1505 failures = []
1527 # load cached results into result:
1506 # load cached results into result:
1528 content.update(local_results)
1507 content.update(local_results)
1529
1508
1530 # update cache with results:
1509 # update cache with results:
1531 for msg_id in sorted(theids):
1510 for msg_id in sorted(theids):
1532 if msg_id in content['completed']:
1511 if msg_id in content['completed']:
1533 rec = content[msg_id]
1512 rec = content[msg_id]
1534 parent = rec['header']
1513 parent = rec['header']
1535 header = rec['result_header']
1514 header = rec['result_header']
1536 rcontent = rec['result_content']
1515 rcontent = rec['result_content']
1537 iodict = rec['io']
1516 iodict = rec['io']
1538 if isinstance(rcontent, str):
1517 if isinstance(rcontent, str):
1539 rcontent = self.session.unpack(rcontent)
1518 rcontent = self.session.unpack(rcontent)
1540
1519
1541 md = self.metadata[msg_id]
1520 md = self.metadata[msg_id]
1542 md.update(self._extract_metadata(header, parent, rcontent))
1521 md.update(self._extract_metadata(header, parent, rcontent))
1543 if rec.get('received'):
1522 if rec.get('received'):
1544 md['received'] = rec['received']
1523 md['received'] = rec['received']
1545 md.update(iodict)
1524 md.update(iodict)
1546
1525
1547 if rcontent['status'] == 'ok':
1526 if rcontent['status'] == 'ok':
1548 if header['msg_type'] == 'apply_reply':
1527 if header['msg_type'] == 'apply_reply':
1549 res,buffers = util.unserialize_object(buffers)
1528 res,buffers = util.unserialize_object(buffers)
1550 elif header['msg_type'] == 'execute_reply':
1529 elif header['msg_type'] == 'execute_reply':
1551 res = ExecuteReply(msg_id, rcontent, md)
1530 res = ExecuteReply(msg_id, rcontent, md)
1552 else:
1531 else:
1553 raise KeyError("unhandled msg type: %r" % header[msg_type])
1532 raise KeyError("unhandled msg type: %r" % header[msg_type])
1554 else:
1533 else:
1555 res = self._unwrap_exception(rcontent)
1534 res = self._unwrap_exception(rcontent)
1556 failures.append(res)
1535 failures.append(res)
1557
1536
1558 self.results[msg_id] = res
1537 self.results[msg_id] = res
1559 content[msg_id] = res
1538 content[msg_id] = res
1560
1539
1561 if len(theids) == 1 and failures:
1540 if len(theids) == 1 and failures:
1562 raise failures[0]
1541 raise failures[0]
1563
1542
1564 error.collect_exceptions(failures, "result_status")
1543 error.collect_exceptions(failures, "result_status")
1565 return content
1544 return content
1566
1545
1567 @spin_first
1546 @spin_first
1568 def queue_status(self, targets='all', verbose=False):
1547 def queue_status(self, targets='all', verbose=False):
1569 """Fetch the status of engine queues.
1548 """Fetch the status of engine queues.
1570
1549
1571 Parameters
1550 Parameters
1572 ----------
1551 ----------
1573
1552
1574 targets : int/str/list of ints/strs
1553 targets : int/str/list of ints/strs
1575 the engines whose states are to be queried.
1554 the engines whose states are to be queried.
1576 default : all
1555 default : all
1577 verbose : bool
1556 verbose : bool
1578 Whether to return lengths only, or lists of ids for each element
1557 Whether to return lengths only, or lists of ids for each element
1579 """
1558 """
1580 if targets == 'all':
1559 if targets == 'all':
1581 # allow 'all' to be evaluated on the engine
1560 # allow 'all' to be evaluated on the engine
1582 engine_ids = None
1561 engine_ids = None
1583 else:
1562 else:
1584 engine_ids = self._build_targets(targets)[1]
1563 engine_ids = self._build_targets(targets)[1]
1585 content = dict(targets=engine_ids, verbose=verbose)
1564 content = dict(targets=engine_ids, verbose=verbose)
1586 self.session.send(self._query_socket, "queue_request", content=content)
1565 self.session.send(self._query_socket, "queue_request", content=content)
1587 idents,msg = self.session.recv(self._query_socket, 0)
1566 idents,msg = self.session.recv(self._query_socket, 0)
1588 if self.debug:
1567 if self.debug:
1589 pprint(msg)
1568 pprint(msg)
1590 content = msg['content']
1569 content = msg['content']
1591 status = content.pop('status')
1570 status = content.pop('status')
1592 if status != 'ok':
1571 if status != 'ok':
1593 raise self._unwrap_exception(content)
1572 raise self._unwrap_exception(content)
1594 content = rekey(content)
1573 content = rekey(content)
1595 if isinstance(targets, int):
1574 if isinstance(targets, int):
1596 return content[targets]
1575 return content[targets]
1597 else:
1576 else:
1598 return content
1577 return content
1599
1578
1600 @spin_first
1579 @spin_first
1601 def purge_results(self, jobs=[], targets=[]):
1580 def purge_results(self, jobs=[], targets=[]):
1602 """Tell the Hub to forget results.
1581 """Tell the Hub to forget results.
1603
1582
1604 Individual results can be purged by msg_id, or the entire
1583 Individual results can be purged by msg_id, or the entire
1605 history of specific targets can be purged.
1584 history of specific targets can be purged.
1606
1585
1607 Use `purge_results('all')` to scrub everything from the Hub's db.
1586 Use `purge_results('all')` to scrub everything from the Hub's db.
1608
1587
1609 Parameters
1588 Parameters
1610 ----------
1589 ----------
1611
1590
1612 jobs : str or list of str or AsyncResult objects
1591 jobs : str or list of str or AsyncResult objects
1613 the msg_ids whose results should be forgotten.
1592 the msg_ids whose results should be forgotten.
1614 targets : int/str/list of ints/strs
1593 targets : int/str/list of ints/strs
1615 The targets, by int_id, whose entire history is to be purged.
1594 The targets, by int_id, whose entire history is to be purged.
1616
1595
1617 default : None
1596 default : None
1618 """
1597 """
1619 if not targets and not jobs:
1598 if not targets and not jobs:
1620 raise ValueError("Must specify at least one of `targets` and `jobs`")
1599 raise ValueError("Must specify at least one of `targets` and `jobs`")
1621 if targets:
1600 if targets:
1622 targets = self._build_targets(targets)[1]
1601 targets = self._build_targets(targets)[1]
1623
1602
1624 # construct msg_ids from jobs
1603 # construct msg_ids from jobs
1625 if jobs == 'all':
1604 if jobs == 'all':
1626 msg_ids = jobs
1605 msg_ids = jobs
1627 else:
1606 else:
1628 msg_ids = []
1607 msg_ids = []
1629 if isinstance(jobs, (basestring,AsyncResult)):
1608 if isinstance(jobs, (basestring,AsyncResult)):
1630 jobs = [jobs]
1609 jobs = [jobs]
1631 bad_ids = filter(lambda obj: not isinstance(obj, (basestring, AsyncResult)), jobs)
1610 bad_ids = filter(lambda obj: not isinstance(obj, (basestring, AsyncResult)), jobs)
1632 if bad_ids:
1611 if bad_ids:
1633 raise TypeError("Invalid msg_id type %r, expected str or AsyncResult"%bad_ids[0])
1612 raise TypeError("Invalid msg_id type %r, expected str or AsyncResult"%bad_ids[0])
1634 for j in jobs:
1613 for j in jobs:
1635 if isinstance(j, AsyncResult):
1614 if isinstance(j, AsyncResult):
1636 msg_ids.extend(j.msg_ids)
1615 msg_ids.extend(j.msg_ids)
1637 else:
1616 else:
1638 msg_ids.append(j)
1617 msg_ids.append(j)
1639
1618
1640 content = dict(engine_ids=targets, msg_ids=msg_ids)
1619 content = dict(engine_ids=targets, msg_ids=msg_ids)
1641 self.session.send(self._query_socket, "purge_request", content=content)
1620 self.session.send(self._query_socket, "purge_request", content=content)
1642 idents, msg = self.session.recv(self._query_socket, 0)
1621 idents, msg = self.session.recv(self._query_socket, 0)
1643 if self.debug:
1622 if self.debug:
1644 pprint(msg)
1623 pprint(msg)
1645 content = msg['content']
1624 content = msg['content']
1646 if content['status'] != 'ok':
1625 if content['status'] != 'ok':
1647 raise self._unwrap_exception(content)
1626 raise self._unwrap_exception(content)
1648
1627
1649 @spin_first
1628 @spin_first
1650 def hub_history(self):
1629 def hub_history(self):
1651 """Get the Hub's history
1630 """Get the Hub's history
1652
1631
1653 Just like the Client, the Hub has a history, which is a list of msg_ids.
1632 Just like the Client, the Hub has a history, which is a list of msg_ids.
1654 This will contain the history of all clients, and, depending on configuration,
1633 This will contain the history of all clients, and, depending on configuration,
1655 may contain history across multiple cluster sessions.
1634 may contain history across multiple cluster sessions.
1656
1635
1657 Any msg_id returned here is a valid argument to `get_result`.
1636 Any msg_id returned here is a valid argument to `get_result`.
1658
1637
1659 Returns
1638 Returns
1660 -------
1639 -------
1661
1640
1662 msg_ids : list of strs
1641 msg_ids : list of strs
1663 list of all msg_ids, ordered by task submission time.
1642 list of all msg_ids, ordered by task submission time.
1664 """
1643 """
1665
1644
1666 self.session.send(self._query_socket, "history_request", content={})
1645 self.session.send(self._query_socket, "history_request", content={})
1667 idents, msg = self.session.recv(self._query_socket, 0)
1646 idents, msg = self.session.recv(self._query_socket, 0)
1668
1647
1669 if self.debug:
1648 if self.debug:
1670 pprint(msg)
1649 pprint(msg)
1671 content = msg['content']
1650 content = msg['content']
1672 if content['status'] != 'ok':
1651 if content['status'] != 'ok':
1673 raise self._unwrap_exception(content)
1652 raise self._unwrap_exception(content)
1674 else:
1653 else:
1675 return content['history']
1654 return content['history']
1676
1655
1677 @spin_first
1656 @spin_first
1678 def db_query(self, query, keys=None):
1657 def db_query(self, query, keys=None):
1679 """Query the Hub's TaskRecord database
1658 """Query the Hub's TaskRecord database
1680
1659
1681 This will return a list of task record dicts that match `query`
1660 This will return a list of task record dicts that match `query`
1682
1661
1683 Parameters
1662 Parameters
1684 ----------
1663 ----------
1685
1664
1686 query : mongodb query dict
1665 query : mongodb query dict
1687 The search dict. See mongodb query docs for details.
1666 The search dict. See mongodb query docs for details.
1688 keys : list of strs [optional]
1667 keys : list of strs [optional]
1689 The subset of keys to be returned. The default is to fetch everything but buffers.
1668 The subset of keys to be returned. The default is to fetch everything but buffers.
1690 'msg_id' will *always* be included.
1669 'msg_id' will *always* be included.
1691 """
1670 """
1692 if isinstance(keys, basestring):
1671 if isinstance(keys, basestring):
1693 keys = [keys]
1672 keys = [keys]
1694 content = dict(query=query, keys=keys)
1673 content = dict(query=query, keys=keys)
1695 self.session.send(self._query_socket, "db_request", content=content)
1674 self.session.send(self._query_socket, "db_request", content=content)
1696 idents, msg = self.session.recv(self._query_socket, 0)
1675 idents, msg = self.session.recv(self._query_socket, 0)
1697 if self.debug:
1676 if self.debug:
1698 pprint(msg)
1677 pprint(msg)
1699 content = msg['content']
1678 content = msg['content']
1700 if content['status'] != 'ok':
1679 if content['status'] != 'ok':
1701 raise self._unwrap_exception(content)
1680 raise self._unwrap_exception(content)
1702
1681
1703 records = content['records']
1682 records = content['records']
1704
1683
1705 buffer_lens = content['buffer_lens']
1684 buffer_lens = content['buffer_lens']
1706 result_buffer_lens = content['result_buffer_lens']
1685 result_buffer_lens = content['result_buffer_lens']
1707 buffers = msg['buffers']
1686 buffers = msg['buffers']
1708 has_bufs = buffer_lens is not None
1687 has_bufs = buffer_lens is not None
1709 has_rbufs = result_buffer_lens is not None
1688 has_rbufs = result_buffer_lens is not None
1710 for i,rec in enumerate(records):
1689 for i,rec in enumerate(records):
1711 # relink buffers
1690 # relink buffers
1712 if has_bufs:
1691 if has_bufs:
1713 blen = buffer_lens[i]
1692 blen = buffer_lens[i]
1714 rec['buffers'], buffers = buffers[:blen],buffers[blen:]
1693 rec['buffers'], buffers = buffers[:blen],buffers[blen:]
1715 if has_rbufs:
1694 if has_rbufs:
1716 blen = result_buffer_lens[i]
1695 blen = result_buffer_lens[i]
1717 rec['result_buffers'], buffers = buffers[:blen],buffers[blen:]
1696 rec['result_buffers'], buffers = buffers[:blen],buffers[blen:]
1718
1697
1719 return records
1698 return records
1720
1699
1721 __all__ = [ 'Client' ]
1700 __all__ = [ 'Client' ]
@@ -1,1337 +1,1401
1 """The IPython Controller Hub with 0MQ
1 """The IPython Controller Hub with 0MQ
2 This is the master object that handles connections from engines and clients,
2 This is the master object that handles connections from engines and clients,
3 and monitors traffic through the various queues.
3 and monitors traffic through the various queues.
4
4
5 Authors:
5 Authors:
6
6
7 * Min RK
7 * Min RK
8 """
8 """
9 #-----------------------------------------------------------------------------
9 #-----------------------------------------------------------------------------
10 # Copyright (C) 2010-2011 The IPython Development Team
10 # Copyright (C) 2010-2011 The IPython Development Team
11 #
11 #
12 # Distributed under the terms of the BSD License. The full license is in
12 # Distributed under the terms of the BSD License. The full license is in
13 # the file COPYING, distributed as part of this software.
13 # the file COPYING, distributed as part of this software.
14 #-----------------------------------------------------------------------------
14 #-----------------------------------------------------------------------------
15
15
16 #-----------------------------------------------------------------------------
16 #-----------------------------------------------------------------------------
17 # Imports
17 # Imports
18 #-----------------------------------------------------------------------------
18 #-----------------------------------------------------------------------------
19 from __future__ import print_function
19 from __future__ import print_function
20
20
21 import json
22 import os
21 import sys
23 import sys
22 import time
24 import time
23 from datetime import datetime
25 from datetime import datetime
24
26
25 import zmq
27 import zmq
26 from zmq.eventloop import ioloop
28 from zmq.eventloop import ioloop
27 from zmq.eventloop.zmqstream import ZMQStream
29 from zmq.eventloop.zmqstream import ZMQStream
28
30
29 # internal:
31 # internal:
30 from IPython.utils.importstring import import_item
32 from IPython.utils.importstring import import_item
31 from IPython.utils.py3compat import cast_bytes
33 from IPython.utils.py3compat import cast_bytes
32 from IPython.utils.traitlets import (
34 from IPython.utils.traitlets import (
33 HasTraits, Instance, Integer, Unicode, Dict, Set, Tuple, CBytes, DottedObjectName
35 HasTraits, Instance, Integer, Unicode, Dict, Set, Tuple, CBytes, DottedObjectName
34 )
36 )
35
37
36 from IPython.parallel import error, util
38 from IPython.parallel import error, util
37 from IPython.parallel.factory import RegistrationFactory
39 from IPython.parallel.factory import RegistrationFactory
38
40
39 from IPython.zmq.session import SessionFactory
41 from IPython.zmq.session import SessionFactory
40
42
41 from .heartmonitor import HeartMonitor
43 from .heartmonitor import HeartMonitor
42
44
43 #-----------------------------------------------------------------------------
45 #-----------------------------------------------------------------------------
44 # Code
46 # Code
45 #-----------------------------------------------------------------------------
47 #-----------------------------------------------------------------------------
46
48
47 def _passer(*args, **kwargs):
49 def _passer(*args, **kwargs):
48 return
50 return
49
51
50 def _printer(*args, **kwargs):
52 def _printer(*args, **kwargs):
51 print (args)
53 print (args)
52 print (kwargs)
54 print (kwargs)
53
55
54 def empty_record():
56 def empty_record():
55 """Return an empty dict with all record keys."""
57 """Return an empty dict with all record keys."""
56 return {
58 return {
57 'msg_id' : None,
59 'msg_id' : None,
58 'header' : None,
60 'header' : None,
59 'content': None,
61 'content': None,
60 'buffers': None,
62 'buffers': None,
61 'submitted': None,
63 'submitted': None,
62 'client_uuid' : None,
64 'client_uuid' : None,
63 'engine_uuid' : None,
65 'engine_uuid' : None,
64 'started': None,
66 'started': None,
65 'completed': None,
67 'completed': None,
66 'resubmitted': None,
68 'resubmitted': None,
67 'received': None,
69 'received': None,
68 'result_header' : None,
70 'result_header' : None,
69 'result_content' : None,
71 'result_content' : None,
70 'result_buffers' : None,
72 'result_buffers' : None,
71 'queue' : None,
73 'queue' : None,
72 'pyin' : None,
74 'pyin' : None,
73 'pyout': None,
75 'pyout': None,
74 'pyerr': None,
76 'pyerr': None,
75 'stdout': '',
77 'stdout': '',
76 'stderr': '',
78 'stderr': '',
77 }
79 }
78
80
79 def init_record(msg):
81 def init_record(msg):
80 """Initialize a TaskRecord based on a request."""
82 """Initialize a TaskRecord based on a request."""
81 header = msg['header']
83 header = msg['header']
82 return {
84 return {
83 'msg_id' : header['msg_id'],
85 'msg_id' : header['msg_id'],
84 'header' : header,
86 'header' : header,
85 'content': msg['content'],
87 'content': msg['content'],
86 'buffers': msg['buffers'],
88 'buffers': msg['buffers'],
87 'submitted': header['date'],
89 'submitted': header['date'],
88 'client_uuid' : None,
90 'client_uuid' : None,
89 'engine_uuid' : None,
91 'engine_uuid' : None,
90 'started': None,
92 'started': None,
91 'completed': None,
93 'completed': None,
92 'resubmitted': None,
94 'resubmitted': None,
93 'received': None,
95 'received': None,
94 'result_header' : None,
96 'result_header' : None,
95 'result_content' : None,
97 'result_content' : None,
96 'result_buffers' : None,
98 'result_buffers' : None,
97 'queue' : None,
99 'queue' : None,
98 'pyin' : None,
100 'pyin' : None,
99 'pyout': None,
101 'pyout': None,
100 'pyerr': None,
102 'pyerr': None,
101 'stdout': '',
103 'stdout': '',
102 'stderr': '',
104 'stderr': '',
103 }
105 }
104
106
105
107
106 class EngineConnector(HasTraits):
108 class EngineConnector(HasTraits):
107 """A simple object for accessing the various zmq connections of an object.
109 """A simple object for accessing the various zmq connections of an object.
108 Attributes are:
110 Attributes are:
109 id (int): engine ID
111 id (int): engine ID
110 uuid (str): uuid (unused?)
112 uuid (unicode): engine UUID
111 queue (str): identity of queue's DEALER socket
113 pending: set of msg_ids
112 registration (str): identity of registration DEALER socket
114 stallback: DelayedCallback for stalled registration
113 heartbeat (str): identity of heartbeat DEALER socket
114 """
115 """
116
115 id=Integer(0)
117 id = Integer(0)
116 queue=CBytes()
118 uuid = Unicode()
117 control=CBytes()
118 registration=CBytes()
119 heartbeat=CBytes()
120 pending=Set()
119 pending = Set()
120 stallback = Instance(ioloop.DelayedCallback)
121
121
122
122 _db_shortcuts = {
123 _db_shortcuts = {
123 'sqlitedb' : 'IPython.parallel.controller.sqlitedb.SQLiteDB',
124 'sqlitedb' : 'IPython.parallel.controller.sqlitedb.SQLiteDB',
124 'mongodb' : 'IPython.parallel.controller.mongodb.MongoDB',
125 'mongodb' : 'IPython.parallel.controller.mongodb.MongoDB',
125 'dictdb' : 'IPython.parallel.controller.dictdb.DictDB',
126 'dictdb' : 'IPython.parallel.controller.dictdb.DictDB',
126 'nodb' : 'IPython.parallel.controller.dictdb.NoDB',
127 'nodb' : 'IPython.parallel.controller.dictdb.NoDB',
127 }
128 }
128
129
129 class HubFactory(RegistrationFactory):
130 class HubFactory(RegistrationFactory):
130 """The Configurable for setting up a Hub."""
131 """The Configurable for setting up a Hub."""
131
132
132 # port-pairs for monitoredqueues:
133 # port-pairs for monitoredqueues:
133 hb = Tuple(Integer,Integer,config=True,
134 hb = Tuple(Integer,Integer,config=True,
134 help="""DEALER/SUB Port pair for Engine heartbeats""")
135 help="""PUB/ROUTER Port pair for Engine heartbeats""")
135 def _hb_default(self):
136 def _hb_default(self):
136 return tuple(util.select_random_ports(2))
137 return tuple(util.select_random_ports(2))
137
138
138 mux = Tuple(Integer,Integer,config=True,
139 mux = Tuple(Integer,Integer,config=True,
139 help="""Engine/Client Port pair for MUX queue""")
140 help="""Client/Engine Port pair for MUX queue""")
140
141
141 def _mux_default(self):
142 def _mux_default(self):
142 return tuple(util.select_random_ports(2))
143 return tuple(util.select_random_ports(2))
143
144
144 task = Tuple(Integer,Integer,config=True,
145 task = Tuple(Integer,Integer,config=True,
145 help="""Engine/Client Port pair for Task queue""")
146 help="""Client/Engine Port pair for Task queue""")
146 def _task_default(self):
147 def _task_default(self):
147 return tuple(util.select_random_ports(2))
148 return tuple(util.select_random_ports(2))
148
149
149 control = Tuple(Integer,Integer,config=True,
150 control = Tuple(Integer,Integer,config=True,
150 help="""Engine/Client Port pair for Control queue""")
151 help="""Client/Engine Port pair for Control queue""")
151
152
152 def _control_default(self):
153 def _control_default(self):
153 return tuple(util.select_random_ports(2))
154 return tuple(util.select_random_ports(2))
154
155
155 iopub = Tuple(Integer,Integer,config=True,
156 iopub = Tuple(Integer,Integer,config=True,
156 help="""Engine/Client Port pair for IOPub relay""")
157 help="""Client/Engine Port pair for IOPub relay""")
157
158
158 def _iopub_default(self):
159 def _iopub_default(self):
159 return tuple(util.select_random_ports(2))
160 return tuple(util.select_random_ports(2))
160
161
161 # single ports:
162 # single ports:
162 mon_port = Integer(config=True,
163 mon_port = Integer(config=True,
163 help="""Monitor (SUB) port for queue traffic""")
164 help="""Monitor (SUB) port for queue traffic""")
164
165
165 def _mon_port_default(self):
166 def _mon_port_default(self):
166 return util.select_random_ports(1)[0]
167 return util.select_random_ports(1)[0]
167
168
168 notifier_port = Integer(config=True,
169 notifier_port = Integer(config=True,
169 help="""PUB port for sending engine status notifications""")
170 help="""PUB port for sending engine status notifications""")
170
171
171 def _notifier_port_default(self):
172 def _notifier_port_default(self):
172 return util.select_random_ports(1)[0]
173 return util.select_random_ports(1)[0]
173
174
174 engine_ip = Unicode('127.0.0.1', config=True,
175 engine_ip = Unicode('127.0.0.1', config=True,
175 help="IP on which to listen for engine connections. [default: loopback]")
176 help="IP on which to listen for engine connections. [default: loopback]")
176 engine_transport = Unicode('tcp', config=True,
177 engine_transport = Unicode('tcp', config=True,
177 help="0MQ transport for engine connections. [default: tcp]")
178 help="0MQ transport for engine connections. [default: tcp]")
178
179
179 client_ip = Unicode('127.0.0.1', config=True,
180 client_ip = Unicode('127.0.0.1', config=True,
180 help="IP on which to listen for client connections. [default: loopback]")
181 help="IP on which to listen for client connections. [default: loopback]")
181 client_transport = Unicode('tcp', config=True,
182 client_transport = Unicode('tcp', config=True,
182 help="0MQ transport for client connections. [default : tcp]")
183 help="0MQ transport for client connections. [default : tcp]")
183
184
184 monitor_ip = Unicode('127.0.0.1', config=True,
185 monitor_ip = Unicode('127.0.0.1', config=True,
185 help="IP on which to listen for monitor messages. [default: loopback]")
186 help="IP on which to listen for monitor messages. [default: loopback]")
186 monitor_transport = Unicode('tcp', config=True,
187 monitor_transport = Unicode('tcp', config=True,
187 help="0MQ transport for monitor messages. [default : tcp]")
188 help="0MQ transport for monitor messages. [default : tcp]")
188
189
189 monitor_url = Unicode('')
190 monitor_url = Unicode('')
190
191
191 db_class = DottedObjectName('NoDB',
192 db_class = DottedObjectName('NoDB',
192 config=True, help="""The class to use for the DB backend
193 config=True, help="""The class to use for the DB backend
193
194
194 Options include:
195 Options include:
195
196
196 SQLiteDB: SQLite
197 SQLiteDB: SQLite
197 MongoDB : use MongoDB
198 MongoDB : use MongoDB
198 DictDB : in-memory storage (fastest, but be mindful of memory growth of the Hub)
199 DictDB : in-memory storage (fastest, but be mindful of memory growth of the Hub)
199 NoDB : disable database altogether (default)
200 NoDB : disable database altogether (default)
200
201
201 """)
202 """)
202
203
203 # not configurable
204 # not configurable
204 db = Instance('IPython.parallel.controller.dictdb.BaseDB')
205 db = Instance('IPython.parallel.controller.dictdb.BaseDB')
205 heartmonitor = Instance('IPython.parallel.controller.heartmonitor.HeartMonitor')
206 heartmonitor = Instance('IPython.parallel.controller.heartmonitor.HeartMonitor')
206
207
207 def _ip_changed(self, name, old, new):
208 def _ip_changed(self, name, old, new):
208 self.engine_ip = new
209 self.engine_ip = new
209 self.client_ip = new
210 self.client_ip = new
210 self.monitor_ip = new
211 self.monitor_ip = new
211 self._update_monitor_url()
212 self._update_monitor_url()
212
213
213 def _update_monitor_url(self):
214 def _update_monitor_url(self):
214 self.monitor_url = "%s://%s:%i" % (self.monitor_transport, self.monitor_ip, self.mon_port)
215 self.monitor_url = "%s://%s:%i" % (self.monitor_transport, self.monitor_ip, self.mon_port)
215
216
216 def _transport_changed(self, name, old, new):
217 def _transport_changed(self, name, old, new):
217 self.engine_transport = new
218 self.engine_transport = new
218 self.client_transport = new
219 self.client_transport = new
219 self.monitor_transport = new
220 self.monitor_transport = new
220 self._update_monitor_url()
221 self._update_monitor_url()
221
222
222 def __init__(self, **kwargs):
223 def __init__(self, **kwargs):
223 super(HubFactory, self).__init__(**kwargs)
224 super(HubFactory, self).__init__(**kwargs)
224 self._update_monitor_url()
225 self._update_monitor_url()
225
226
226
227
227 def construct(self):
228 def construct(self):
228 self.init_hub()
229 self.init_hub()
229
230
230 def start(self):
231 def start(self):
231 self.heartmonitor.start()
232 self.heartmonitor.start()
232 self.log.info("Heartmonitor started")
233 self.log.info("Heartmonitor started")
233
234
235 def client_url(self, channel):
236 """return full zmq url for a named client channel"""
237 return "%s://%s:%i" % (self.client_transport, self.client_ip, self.client_info[channel])
238
239 def engine_url(self, channel):
240 """return full zmq url for a named engine channel"""
241 return "%s://%s:%i" % (self.engine_transport, self.engine_ip, self.engine_info[channel])
242
234 def init_hub(self):
243 def init_hub(self):
235 """construct"""
244 """construct Hub object"""
236 client_iface = "%s://%s:" % (self.client_transport, self.client_ip) + "%i"
237 engine_iface = "%s://%s:" % (self.engine_transport, self.engine_ip) + "%i"
238
245
239 ctx = self.context
246 ctx = self.context
240 loop = self.loop
247 loop = self.loop
241
248
249 try:
250 scheme = self.config.TaskScheduler.scheme_name
251 except AttributeError:
252 from .scheduler import TaskScheduler
253 scheme = TaskScheduler.scheme_name.get_default_value()
254
255 # build connection dicts
256 engine = self.engine_info = {
257 'interface' : "%s://%s" % (self.engine_transport, self.engine_ip),
258 'registration' : self.regport,
259 'control' : self.control[1],
260 'mux' : self.mux[1],
261 'hb_ping' : self.hb[0],
262 'hb_pong' : self.hb[1],
263 'task' : self.task[1],
264 'iopub' : self.iopub[1],
265 }
266
267 client = self.client_info = {
268 'interface' : "%s://%s" % (self.client_transport, self.client_ip),
269 'registration' : self.regport,
270 'control' : self.control[0],
271 'mux' : self.mux[0],
272 'task' : self.task[0],
273 'task_scheme' : scheme,
274 'iopub' : self.iopub[0],
275 'notification' : self.notifier_port,
276 }
277
278 self.log.debug("Hub engine addrs: %s", self.engine_info)
279 self.log.debug("Hub client addrs: %s", self.client_info)
280
242 # Registrar socket
281 # Registrar socket
243 q = ZMQStream(ctx.socket(zmq.ROUTER), loop)
282 q = ZMQStream(ctx.socket(zmq.ROUTER), loop)
244 q.bind(client_iface % self.regport)
283 q.bind(self.client_url('registration'))
245 self.log.info("Hub listening on %s for registration.", client_iface % self.regport)
284 self.log.info("Hub listening on %s for registration.", self.client_url('registration'))
246 if self.client_ip != self.engine_ip:
285 if self.client_ip != self.engine_ip:
247 q.bind(engine_iface % self.regport)
286 q.bind(self.engine_url('registration'))
248 self.log.info("Hub listening on %s for registration.", engine_iface % self.regport)
287 self.log.info("Hub listening on %s for registration.", self.engine_url('registration'))
249
288
250 ### Engine connections ###
289 ### Engine connections ###
251
290
252 # heartbeat
291 # heartbeat
253 hpub = ctx.socket(zmq.PUB)
292 hpub = ctx.socket(zmq.PUB)
254 hpub.bind(engine_iface % self.hb[0])
293 hpub.bind(self.engine_url('hb_ping'))
255 hrep = ctx.socket(zmq.ROUTER)
294 hrep = ctx.socket(zmq.ROUTER)
256 hrep.bind(engine_iface % self.hb[1])
295 hrep.bind(self.engine_url('hb_pong'))
257 self.heartmonitor = HeartMonitor(loop=loop, config=self.config, log=self.log,
296 self.heartmonitor = HeartMonitor(loop=loop, config=self.config, log=self.log,
258 pingstream=ZMQStream(hpub,loop),
297 pingstream=ZMQStream(hpub,loop),
259 pongstream=ZMQStream(hrep,loop)
298 pongstream=ZMQStream(hrep,loop)
260 )
299 )
261
300
262 ### Client connections ###
301 ### Client connections ###
302
263 # Notifier socket
303 # Notifier socket
264 n = ZMQStream(ctx.socket(zmq.PUB), loop)
304 n = ZMQStream(ctx.socket(zmq.PUB), loop)
265 n.bind(client_iface%self.notifier_port)
305 n.bind(self.client_url('notification'))
266
306
267 ### build and launch the queues ###
307 ### build and launch the queues ###
268
308
269 # monitor socket
309 # monitor socket
270 sub = ctx.socket(zmq.SUB)
310 sub = ctx.socket(zmq.SUB)
271 sub.setsockopt(zmq.SUBSCRIBE, b"")
311 sub.setsockopt(zmq.SUBSCRIBE, b"")
272 sub.bind(self.monitor_url)
312 sub.bind(self.monitor_url)
273 sub.bind('inproc://monitor')
313 sub.bind('inproc://monitor')
274 sub = ZMQStream(sub, loop)
314 sub = ZMQStream(sub, loop)
275
315
276 # connect the db
316 # connect the db
277 db_class = _db_shortcuts.get(self.db_class.lower(), self.db_class)
317 db_class = _db_shortcuts.get(self.db_class.lower(), self.db_class)
278 self.log.info('Hub using DB backend: %r', (db_class.split('.')[-1]))
318 self.log.info('Hub using DB backend: %r', (db_class.split('.')[-1]))
279 self.db = import_item(str(db_class))(session=self.session.session,
319 self.db = import_item(str(db_class))(session=self.session.session,
280 config=self.config, log=self.log)
320 config=self.config, log=self.log)
281 time.sleep(.25)
321 time.sleep(.25)
282 try:
283 scheme = self.config.TaskScheduler.scheme_name
284 except AttributeError:
285 from .scheduler import TaskScheduler
286 scheme = TaskScheduler.scheme_name.get_default_value()
287 # build connection dicts
288 self.engine_info = {
289 'control' : engine_iface%self.control[1],
290 'mux': engine_iface%self.mux[1],
291 'heartbeat': (engine_iface%self.hb[0], engine_iface%self.hb[1]),
292 'task' : engine_iface%self.task[1],
293 'iopub' : engine_iface%self.iopub[1],
294 # 'monitor' : engine_iface%self.mon_port,
295 }
296
297 self.client_info = {
298 'control' : client_iface%self.control[0],
299 'mux': client_iface%self.mux[0],
300 'task' : (scheme, client_iface%self.task[0]),
301 'iopub' : client_iface%self.iopub[0],
302 'notification': client_iface%self.notifier_port
303 }
304 self.log.debug("Hub engine addrs: %s", self.engine_info)
305 self.log.debug("Hub client addrs: %s", self.client_info)
306
322
307 # resubmit stream
323 # resubmit stream
308 r = ZMQStream(ctx.socket(zmq.DEALER), loop)
324 r = ZMQStream(ctx.socket(zmq.DEALER), loop)
309 url = util.disambiguate_url(self.client_info['task'][-1])
325 url = util.disambiguate_url(self.client_url('task'))
310 r.setsockopt(zmq.IDENTITY, self.session.bsession)
311 r.connect(url)
326 r.connect(url)
312
327
313 self.hub = Hub(loop=loop, session=self.session, monitor=sub, heartmonitor=self.heartmonitor,
328 self.hub = Hub(loop=loop, session=self.session, monitor=sub, heartmonitor=self.heartmonitor,
314 query=q, notifier=n, resubmit=r, db=self.db,
329 query=q, notifier=n, resubmit=r, db=self.db,
315 engine_info=self.engine_info, client_info=self.client_info,
330 engine_info=self.engine_info, client_info=self.client_info,
316 log=self.log)
331 log=self.log)
317
332
318
333
319 class Hub(SessionFactory):
334 class Hub(SessionFactory):
320 """The IPython Controller Hub with 0MQ connections
335 """The IPython Controller Hub with 0MQ connections
321
336
322 Parameters
337 Parameters
323 ==========
338 ==========
324 loop: zmq IOLoop instance
339 loop: zmq IOLoop instance
325 session: Session object
340 session: Session object
326 <removed> context: zmq context for creating new connections (?)
341 <removed> context: zmq context for creating new connections (?)
327 queue: ZMQStream for monitoring the command queue (SUB)
342 queue: ZMQStream for monitoring the command queue (SUB)
328 query: ZMQStream for engine registration and client queries requests (ROUTER)
343 query: ZMQStream for engine registration and client queries requests (ROUTER)
329 heartbeat: HeartMonitor object checking the pulse of the engines
344 heartbeat: HeartMonitor object checking the pulse of the engines
330 notifier: ZMQStream for broadcasting engine registration changes (PUB)
345 notifier: ZMQStream for broadcasting engine registration changes (PUB)
331 db: connection to db for out of memory logging of commands
346 db: connection to db for out of memory logging of commands
332 NotImplemented
347 NotImplemented
333 engine_info: dict of zmq connection information for engines to connect
348 engine_info: dict of zmq connection information for engines to connect
334 to the queues.
349 to the queues.
335 client_info: dict of zmq connection information for engines to connect
350 client_info: dict of zmq connection information for engines to connect
336 to the queues.
351 to the queues.
337 """
352 """
353
354 engine_state_file = Unicode()
355
338 # internal data structures:
356 # internal data structures:
339 ids=Set() # engine IDs
357 ids=Set() # engine IDs
340 keytable=Dict()
358 keytable=Dict()
341 by_ident=Dict()
359 by_ident=Dict()
342 engines=Dict()
360 engines=Dict()
343 clients=Dict()
361 clients=Dict()
344 hearts=Dict()
362 hearts=Dict()
345 pending=Set()
363 pending=Set()
346 queues=Dict() # pending msg_ids keyed by engine_id
364 queues=Dict() # pending msg_ids keyed by engine_id
347 tasks=Dict() # pending msg_ids submitted as tasks, keyed by client_id
365 tasks=Dict() # pending msg_ids submitted as tasks, keyed by client_id
348 completed=Dict() # completed msg_ids keyed by engine_id
366 completed=Dict() # completed msg_ids keyed by engine_id
349 all_completed=Set() # completed msg_ids keyed by engine_id
367 all_completed=Set() # completed msg_ids keyed by engine_id
350 dead_engines=Set() # completed msg_ids keyed by engine_id
368 dead_engines=Set() # completed msg_ids keyed by engine_id
351 unassigned=Set() # set of task msg_ds not yet assigned a destination
369 unassigned=Set() # set of task msg_ds not yet assigned a destination
352 incoming_registrations=Dict()
370 incoming_registrations=Dict()
353 registration_timeout=Integer()
371 registration_timeout=Integer()
354 _idcounter=Integer(0)
372 _idcounter=Integer(0)
355
373
356 # objects from constructor:
374 # objects from constructor:
357 query=Instance(ZMQStream)
375 query=Instance(ZMQStream)
358 monitor=Instance(ZMQStream)
376 monitor=Instance(ZMQStream)
359 notifier=Instance(ZMQStream)
377 notifier=Instance(ZMQStream)
360 resubmit=Instance(ZMQStream)
378 resubmit=Instance(ZMQStream)
361 heartmonitor=Instance(HeartMonitor)
379 heartmonitor=Instance(HeartMonitor)
362 db=Instance(object)
380 db=Instance(object)
363 client_info=Dict()
381 client_info=Dict()
364 engine_info=Dict()
382 engine_info=Dict()
365
383
366
384
367 def __init__(self, **kwargs):
385 def __init__(self, **kwargs):
368 """
386 """
369 # universal:
387 # universal:
370 loop: IOLoop for creating future connections
388 loop: IOLoop for creating future connections
371 session: streamsession for sending serialized data
389 session: streamsession for sending serialized data
372 # engine:
390 # engine:
373 queue: ZMQStream for monitoring queue messages
391 queue: ZMQStream for monitoring queue messages
374 query: ZMQStream for engine+client registration and client requests
392 query: ZMQStream for engine+client registration and client requests
375 heartbeat: HeartMonitor object for tracking engines
393 heartbeat: HeartMonitor object for tracking engines
376 # extra:
394 # extra:
377 db: ZMQStream for db connection (NotImplemented)
395 db: ZMQStream for db connection (NotImplemented)
378 engine_info: zmq address/protocol dict for engine connections
396 engine_info: zmq address/protocol dict for engine connections
379 client_info: zmq address/protocol dict for client connections
397 client_info: zmq address/protocol dict for client connections
380 """
398 """
381
399
382 super(Hub, self).__init__(**kwargs)
400 super(Hub, self).__init__(**kwargs)
383 self.registration_timeout = max(5000, 2*self.heartmonitor.period)
401 self.registration_timeout = max(5000, 2*self.heartmonitor.period)
384
402
385 # validate connection dicts:
386 for k,v in self.client_info.iteritems():
387 if k == 'task':
388 util.validate_url_container(v[1])
389 else:
390 util.validate_url_container(v)
391 # util.validate_url_container(self.client_info)
392 util.validate_url_container(self.engine_info)
393
394 # register our callbacks
403 # register our callbacks
395 self.query.on_recv(self.dispatch_query)
404 self.query.on_recv(self.dispatch_query)
396 self.monitor.on_recv(self.dispatch_monitor_traffic)
405 self.monitor.on_recv(self.dispatch_monitor_traffic)
397
406
398 self.heartmonitor.add_heart_failure_handler(self.handle_heart_failure)
407 self.heartmonitor.add_heart_failure_handler(self.handle_heart_failure)
399 self.heartmonitor.add_new_heart_handler(self.handle_new_heart)
408 self.heartmonitor.add_new_heart_handler(self.handle_new_heart)
400
409
401 self.monitor_handlers = {b'in' : self.save_queue_request,
410 self.monitor_handlers = {b'in' : self.save_queue_request,
402 b'out': self.save_queue_result,
411 b'out': self.save_queue_result,
403 b'intask': self.save_task_request,
412 b'intask': self.save_task_request,
404 b'outtask': self.save_task_result,
413 b'outtask': self.save_task_result,
405 b'tracktask': self.save_task_destination,
414 b'tracktask': self.save_task_destination,
406 b'incontrol': _passer,
415 b'incontrol': _passer,
407 b'outcontrol': _passer,
416 b'outcontrol': _passer,
408 b'iopub': self.save_iopub_message,
417 b'iopub': self.save_iopub_message,
409 }
418 }
410
419
411 self.query_handlers = {'queue_request': self.queue_status,
420 self.query_handlers = {'queue_request': self.queue_status,
412 'result_request': self.get_results,
421 'result_request': self.get_results,
413 'history_request': self.get_history,
422 'history_request': self.get_history,
414 'db_request': self.db_query,
423 'db_request': self.db_query,
415 'purge_request': self.purge_results,
424 'purge_request': self.purge_results,
416 'load_request': self.check_load,
425 'load_request': self.check_load,
417 'resubmit_request': self.resubmit_task,
426 'resubmit_request': self.resubmit_task,
418 'shutdown_request': self.shutdown_request,
427 'shutdown_request': self.shutdown_request,
419 'registration_request' : self.register_engine,
428 'registration_request' : self.register_engine,
420 'unregistration_request' : self.unregister_engine,
429 'unregistration_request' : self.unregister_engine,
421 'connection_request': self.connection_request,
430 'connection_request': self.connection_request,
422 }
431 }
423
432
424 # ignore resubmit replies
433 # ignore resubmit replies
425 self.resubmit.on_recv(lambda msg: None, copy=False)
434 self.resubmit.on_recv(lambda msg: None, copy=False)
426
435
427 self.log.info("hub::created hub")
436 self.log.info("hub::created hub")
428
437
429 @property
438 @property
430 def _next_id(self):
439 def _next_id(self):
431 """gemerate a new ID.
440 """gemerate a new ID.
432
441
433 No longer reuse old ids, just count from 0."""
442 No longer reuse old ids, just count from 0."""
434 newid = self._idcounter
443 newid = self._idcounter
435 self._idcounter += 1
444 self._idcounter += 1
436 return newid
445 return newid
437 # newid = 0
446 # newid = 0
438 # incoming = [id[0] for id in self.incoming_registrations.itervalues()]
447 # incoming = [id[0] for id in self.incoming_registrations.itervalues()]
439 # # print newid, self.ids, self.incoming_registrations
448 # # print newid, self.ids, self.incoming_registrations
440 # while newid in self.ids or newid in incoming:
449 # while newid in self.ids or newid in incoming:
441 # newid += 1
450 # newid += 1
442 # return newid
451 # return newid
443
452
444 #-----------------------------------------------------------------------------
453 #-----------------------------------------------------------------------------
445 # message validation
454 # message validation
446 #-----------------------------------------------------------------------------
455 #-----------------------------------------------------------------------------
447
456
448 def _validate_targets(self, targets):
457 def _validate_targets(self, targets):
449 """turn any valid targets argument into a list of integer ids"""
458 """turn any valid targets argument into a list of integer ids"""
450 if targets is None:
459 if targets is None:
451 # default to all
460 # default to all
452 return self.ids
461 return self.ids
453
462
454 if isinstance(targets, (int,str,unicode)):
463 if isinstance(targets, (int,str,unicode)):
455 # only one target specified
464 # only one target specified
456 targets = [targets]
465 targets = [targets]
457 _targets = []
466 _targets = []
458 for t in targets:
467 for t in targets:
459 # map raw identities to ids
468 # map raw identities to ids
460 if isinstance(t, (str,unicode)):
469 if isinstance(t, (str,unicode)):
461 t = self.by_ident.get(cast_bytes(t), t)
470 t = self.by_ident.get(cast_bytes(t), t)
462 _targets.append(t)
471 _targets.append(t)
463 targets = _targets
472 targets = _targets
464 bad_targets = [ t for t in targets if t not in self.ids ]
473 bad_targets = [ t for t in targets if t not in self.ids ]
465 if bad_targets:
474 if bad_targets:
466 raise IndexError("No Such Engine: %r" % bad_targets)
475 raise IndexError("No Such Engine: %r" % bad_targets)
467 if not targets:
476 if not targets:
468 raise IndexError("No Engines Registered")
477 raise IndexError("No Engines Registered")
469 return targets
478 return targets
470
479
471 #-----------------------------------------------------------------------------
480 #-----------------------------------------------------------------------------
472 # dispatch methods (1 per stream)
481 # dispatch methods (1 per stream)
473 #-----------------------------------------------------------------------------
482 #-----------------------------------------------------------------------------
474
483
475
484
476 @util.log_errors
485 @util.log_errors
477 def dispatch_monitor_traffic(self, msg):
486 def dispatch_monitor_traffic(self, msg):
478 """all ME and Task queue messages come through here, as well as
487 """all ME and Task queue messages come through here, as well as
479 IOPub traffic."""
488 IOPub traffic."""
480 self.log.debug("monitor traffic: %r", msg[0])
489 self.log.debug("monitor traffic: %r", msg[0])
481 switch = msg[0]
490 switch = msg[0]
482 try:
491 try:
483 idents, msg = self.session.feed_identities(msg[1:])
492 idents, msg = self.session.feed_identities(msg[1:])
484 except ValueError:
493 except ValueError:
485 idents=[]
494 idents=[]
486 if not idents:
495 if not idents:
487 self.log.error("Monitor message without topic: %r", msg)
496 self.log.error("Monitor message without topic: %r", msg)
488 return
497 return
489 handler = self.monitor_handlers.get(switch, None)
498 handler = self.monitor_handlers.get(switch, None)
490 if handler is not None:
499 if handler is not None:
491 handler(idents, msg)
500 handler(idents, msg)
492 else:
501 else:
493 self.log.error("Unrecognized monitor topic: %r", switch)
502 self.log.error("Unrecognized monitor topic: %r", switch)
494
503
495
504
496 @util.log_errors
505 @util.log_errors
497 def dispatch_query(self, msg):
506 def dispatch_query(self, msg):
498 """Route registration requests and queries from clients."""
507 """Route registration requests and queries from clients."""
499 try:
508 try:
500 idents, msg = self.session.feed_identities(msg)
509 idents, msg = self.session.feed_identities(msg)
501 except ValueError:
510 except ValueError:
502 idents = []
511 idents = []
503 if not idents:
512 if not idents:
504 self.log.error("Bad Query Message: %r", msg)
513 self.log.error("Bad Query Message: %r", msg)
505 return
514 return
506 client_id = idents[0]
515 client_id = idents[0]
507 try:
516 try:
508 msg = self.session.unserialize(msg, content=True)
517 msg = self.session.unserialize(msg, content=True)
509 except Exception:
518 except Exception:
510 content = error.wrap_exception()
519 content = error.wrap_exception()
511 self.log.error("Bad Query Message: %r", msg, exc_info=True)
520 self.log.error("Bad Query Message: %r", msg, exc_info=True)
512 self.session.send(self.query, "hub_error", ident=client_id,
521 self.session.send(self.query, "hub_error", ident=client_id,
513 content=content)
522 content=content)
514 return
523 return
515 # print client_id, header, parent, content
524 # print client_id, header, parent, content
516 #switch on message type:
525 #switch on message type:
517 msg_type = msg['header']['msg_type']
526 msg_type = msg['header']['msg_type']
518 self.log.info("client::client %r requested %r", client_id, msg_type)
527 self.log.info("client::client %r requested %r", client_id, msg_type)
519 handler = self.query_handlers.get(msg_type, None)
528 handler = self.query_handlers.get(msg_type, None)
520 try:
529 try:
521 assert handler is not None, "Bad Message Type: %r" % msg_type
530 assert handler is not None, "Bad Message Type: %r" % msg_type
522 except:
531 except:
523 content = error.wrap_exception()
532 content = error.wrap_exception()
524 self.log.error("Bad Message Type: %r", msg_type, exc_info=True)
533 self.log.error("Bad Message Type: %r", msg_type, exc_info=True)
525 self.session.send(self.query, "hub_error", ident=client_id,
534 self.session.send(self.query, "hub_error", ident=client_id,
526 content=content)
535 content=content)
527 return
536 return
528
537
529 else:
538 else:
530 handler(idents, msg)
539 handler(idents, msg)
531
540
532 def dispatch_db(self, msg):
541 def dispatch_db(self, msg):
533 """"""
542 """"""
534 raise NotImplementedError
543 raise NotImplementedError
535
544
536 #---------------------------------------------------------------------------
545 #---------------------------------------------------------------------------
537 # handler methods (1 per event)
546 # handler methods (1 per event)
538 #---------------------------------------------------------------------------
547 #---------------------------------------------------------------------------
539
548
540 #----------------------- Heartbeat --------------------------------------
549 #----------------------- Heartbeat --------------------------------------
541
550
542 def handle_new_heart(self, heart):
551 def handle_new_heart(self, heart):
543 """handler to attach to heartbeater.
552 """handler to attach to heartbeater.
544 Called when a new heart starts to beat.
553 Called when a new heart starts to beat.
545 Triggers completion of registration."""
554 Triggers completion of registration."""
546 self.log.debug("heartbeat::handle_new_heart(%r)", heart)
555 self.log.debug("heartbeat::handle_new_heart(%r)", heart)
547 if heart not in self.incoming_registrations:
556 if heart not in self.incoming_registrations:
548 self.log.info("heartbeat::ignoring new heart: %r", heart)
557 self.log.info("heartbeat::ignoring new heart: %r", heart)
549 else:
558 else:
550 self.finish_registration(heart)
559 self.finish_registration(heart)
551
560
552
561
553 def handle_heart_failure(self, heart):
562 def handle_heart_failure(self, heart):
554 """handler to attach to heartbeater.
563 """handler to attach to heartbeater.
555 called when a previously registered heart fails to respond to beat request.
564 called when a previously registered heart fails to respond to beat request.
556 triggers unregistration"""
565 triggers unregistration"""
557 self.log.debug("heartbeat::handle_heart_failure(%r)", heart)
566 self.log.debug("heartbeat::handle_heart_failure(%r)", heart)
558 eid = self.hearts.get(heart, None)
567 eid = self.hearts.get(heart, None)
559 queue = self.engines[eid].queue
568 uuid = self.engines[eid].uuid
560 if eid is None or self.keytable[eid] in self.dead_engines:
569 if eid is None or self.keytable[eid] in self.dead_engines:
561 self.log.info("heartbeat::ignoring heart failure %r (not an engine or already dead)", heart)
570 self.log.info("heartbeat::ignoring heart failure %r (not an engine or already dead)", heart)
562 else:
571 else:
563 self.unregister_engine(heart, dict(content=dict(id=eid, queue=queue)))
572 self.unregister_engine(heart, dict(content=dict(id=eid, queue=uuid)))
564
573
565 #----------------------- MUX Queue Traffic ------------------------------
574 #----------------------- MUX Queue Traffic ------------------------------
566
575
567 def save_queue_request(self, idents, msg):
576 def save_queue_request(self, idents, msg):
568 if len(idents) < 2:
577 if len(idents) < 2:
569 self.log.error("invalid identity prefix: %r", idents)
578 self.log.error("invalid identity prefix: %r", idents)
570 return
579 return
571 queue_id, client_id = idents[:2]
580 queue_id, client_id = idents[:2]
572 try:
581 try:
573 msg = self.session.unserialize(msg)
582 msg = self.session.unserialize(msg)
574 except Exception:
583 except Exception:
575 self.log.error("queue::client %r sent invalid message to %r: %r", client_id, queue_id, msg, exc_info=True)
584 self.log.error("queue::client %r sent invalid message to %r: %r", client_id, queue_id, msg, exc_info=True)
576 return
585 return
577
586
578 eid = self.by_ident.get(queue_id, None)
587 eid = self.by_ident.get(queue_id, None)
579 if eid is None:
588 if eid is None:
580 self.log.error("queue::target %r not registered", queue_id)
589 self.log.error("queue::target %r not registered", queue_id)
581 self.log.debug("queue:: valid are: %r", self.by_ident.keys())
590 self.log.debug("queue:: valid are: %r", self.by_ident.keys())
582 return
591 return
583 record = init_record(msg)
592 record = init_record(msg)
584 msg_id = record['msg_id']
593 msg_id = record['msg_id']
585 self.log.info("queue::client %r submitted request %r to %s", client_id, msg_id, eid)
594 self.log.info("queue::client %r submitted request %r to %s", client_id, msg_id, eid)
586 # Unicode in records
595 # Unicode in records
587 record['engine_uuid'] = queue_id.decode('ascii')
596 record['engine_uuid'] = queue_id.decode('ascii')
588 record['client_uuid'] = client_id.decode('ascii')
597 record['client_uuid'] = msg['header']['session']
589 record['queue'] = 'mux'
598 record['queue'] = 'mux'
590
599
591 try:
600 try:
592 # it's posible iopub arrived first:
601 # it's posible iopub arrived first:
593 existing = self.db.get_record(msg_id)
602 existing = self.db.get_record(msg_id)
594 for key,evalue in existing.iteritems():
603 for key,evalue in existing.iteritems():
595 rvalue = record.get(key, None)
604 rvalue = record.get(key, None)
596 if evalue and rvalue and evalue != rvalue:
605 if evalue and rvalue and evalue != rvalue:
597 self.log.warn("conflicting initial state for record: %r:%r <%r> %r", msg_id, rvalue, key, evalue)
606 self.log.warn("conflicting initial state for record: %r:%r <%r> %r", msg_id, rvalue, key, evalue)
598 elif evalue and not rvalue:
607 elif evalue and not rvalue:
599 record[key] = evalue
608 record[key] = evalue
600 try:
609 try:
601 self.db.update_record(msg_id, record)
610 self.db.update_record(msg_id, record)
602 except Exception:
611 except Exception:
603 self.log.error("DB Error updating record %r", msg_id, exc_info=True)
612 self.log.error("DB Error updating record %r", msg_id, exc_info=True)
604 except KeyError:
613 except KeyError:
605 try:
614 try:
606 self.db.add_record(msg_id, record)
615 self.db.add_record(msg_id, record)
607 except Exception:
616 except Exception:
608 self.log.error("DB Error adding record %r", msg_id, exc_info=True)
617 self.log.error("DB Error adding record %r", msg_id, exc_info=True)
609
618
610
619
611 self.pending.add(msg_id)
620 self.pending.add(msg_id)
612 self.queues[eid].append(msg_id)
621 self.queues[eid].append(msg_id)
613
622
614 def save_queue_result(self, idents, msg):
623 def save_queue_result(self, idents, msg):
615 if len(idents) < 2:
624 if len(idents) < 2:
616 self.log.error("invalid identity prefix: %r", idents)
625 self.log.error("invalid identity prefix: %r", idents)
617 return
626 return
618
627
619 client_id, queue_id = idents[:2]
628 client_id, queue_id = idents[:2]
620 try:
629 try:
621 msg = self.session.unserialize(msg)
630 msg = self.session.unserialize(msg)
622 except Exception:
631 except Exception:
623 self.log.error("queue::engine %r sent invalid message to %r: %r",
632 self.log.error("queue::engine %r sent invalid message to %r: %r",
624 queue_id, client_id, msg, exc_info=True)
633 queue_id, client_id, msg, exc_info=True)
625 return
634 return
626
635
627 eid = self.by_ident.get(queue_id, None)
636 eid = self.by_ident.get(queue_id, None)
628 if eid is None:
637 if eid is None:
629 self.log.error("queue::unknown engine %r is sending a reply: ", queue_id)
638 self.log.error("queue::unknown engine %r is sending a reply: ", queue_id)
630 return
639 return
631
640
632 parent = msg['parent_header']
641 parent = msg['parent_header']
633 if not parent:
642 if not parent:
634 return
643 return
635 msg_id = parent['msg_id']
644 msg_id = parent['msg_id']
636 if msg_id in self.pending:
645 if msg_id in self.pending:
637 self.pending.remove(msg_id)
646 self.pending.remove(msg_id)
638 self.all_completed.add(msg_id)
647 self.all_completed.add(msg_id)
639 self.queues[eid].remove(msg_id)
648 self.queues[eid].remove(msg_id)
640 self.completed[eid].append(msg_id)
649 self.completed[eid].append(msg_id)
641 self.log.info("queue::request %r completed on %s", msg_id, eid)
650 self.log.info("queue::request %r completed on %s", msg_id, eid)
642 elif msg_id not in self.all_completed:
651 elif msg_id not in self.all_completed:
643 # it could be a result from a dead engine that died before delivering the
652 # it could be a result from a dead engine that died before delivering the
644 # result
653 # result
645 self.log.warn("queue:: unknown msg finished %r", msg_id)
654 self.log.warn("queue:: unknown msg finished %r", msg_id)
646 return
655 return
647 # update record anyway, because the unregistration could have been premature
656 # update record anyway, because the unregistration could have been premature
648 rheader = msg['header']
657 rheader = msg['header']
649 completed = rheader['date']
658 completed = rheader['date']
650 started = rheader.get('started', None)
659 started = rheader.get('started', None)
651 result = {
660 result = {
652 'result_header' : rheader,
661 'result_header' : rheader,
653 'result_content': msg['content'],
662 'result_content': msg['content'],
654 'received': datetime.now(),
663 'received': datetime.now(),
655 'started' : started,
664 'started' : started,
656 'completed' : completed
665 'completed' : completed
657 }
666 }
658
667
659 result['result_buffers'] = msg['buffers']
668 result['result_buffers'] = msg['buffers']
660 try:
669 try:
661 self.db.update_record(msg_id, result)
670 self.db.update_record(msg_id, result)
662 except Exception:
671 except Exception:
663 self.log.error("DB Error updating record %r", msg_id, exc_info=True)
672 self.log.error("DB Error updating record %r", msg_id, exc_info=True)
664
673
665
674
666 #--------------------- Task Queue Traffic ------------------------------
675 #--------------------- Task Queue Traffic ------------------------------
667
676
668 def save_task_request(self, idents, msg):
677 def save_task_request(self, idents, msg):
669 """Save the submission of a task."""
678 """Save the submission of a task."""
670 client_id = idents[0]
679 client_id = idents[0]
671
680
672 try:
681 try:
673 msg = self.session.unserialize(msg)
682 msg = self.session.unserialize(msg)
674 except Exception:
683 except Exception:
675 self.log.error("task::client %r sent invalid task message: %r",
684 self.log.error("task::client %r sent invalid task message: %r",
676 client_id, msg, exc_info=True)
685 client_id, msg, exc_info=True)
677 return
686 return
678 record = init_record(msg)
687 record = init_record(msg)
679
688
680 record['client_uuid'] = client_id.decode('ascii')
689 record['client_uuid'] = msg['header']['session']
681 record['queue'] = 'task'
690 record['queue'] = 'task'
682 header = msg['header']
691 header = msg['header']
683 msg_id = header['msg_id']
692 msg_id = header['msg_id']
684 self.pending.add(msg_id)
693 self.pending.add(msg_id)
685 self.unassigned.add(msg_id)
694 self.unassigned.add(msg_id)
686 try:
695 try:
687 # it's posible iopub arrived first:
696 # it's posible iopub arrived first:
688 existing = self.db.get_record(msg_id)
697 existing = self.db.get_record(msg_id)
689 if existing['resubmitted']:
698 if existing['resubmitted']:
690 for key in ('submitted', 'client_uuid', 'buffers'):
699 for key in ('submitted', 'client_uuid', 'buffers'):
691 # don't clobber these keys on resubmit
700 # don't clobber these keys on resubmit
692 # submitted and client_uuid should be different
701 # submitted and client_uuid should be different
693 # and buffers might be big, and shouldn't have changed
702 # and buffers might be big, and shouldn't have changed
694 record.pop(key)
703 record.pop(key)
695 # still check content,header which should not change
704 # still check content,header which should not change
696 # but are not expensive to compare as buffers
705 # but are not expensive to compare as buffers
697
706
698 for key,evalue in existing.iteritems():
707 for key,evalue in existing.iteritems():
699 if key.endswith('buffers'):
708 if key.endswith('buffers'):
700 # don't compare buffers
709 # don't compare buffers
701 continue
710 continue
702 rvalue = record.get(key, None)
711 rvalue = record.get(key, None)
703 if evalue and rvalue and evalue != rvalue:
712 if evalue and rvalue and evalue != rvalue:
704 self.log.warn("conflicting initial state for record: %r:%r <%r> %r", msg_id, rvalue, key, evalue)
713 self.log.warn("conflicting initial state for record: %r:%r <%r> %r", msg_id, rvalue, key, evalue)
705 elif evalue and not rvalue:
714 elif evalue and not rvalue:
706 record[key] = evalue
715 record[key] = evalue
707 try:
716 try:
708 self.db.update_record(msg_id, record)
717 self.db.update_record(msg_id, record)
709 except Exception:
718 except Exception:
710 self.log.error("DB Error updating record %r", msg_id, exc_info=True)
719 self.log.error("DB Error updating record %r", msg_id, exc_info=True)
711 except KeyError:
720 except KeyError:
712 try:
721 try:
713 self.db.add_record(msg_id, record)
722 self.db.add_record(msg_id, record)
714 except Exception:
723 except Exception:
715 self.log.error("DB Error adding record %r", msg_id, exc_info=True)
724 self.log.error("DB Error adding record %r", msg_id, exc_info=True)
716 except Exception:
725 except Exception:
717 self.log.error("DB Error saving task request %r", msg_id, exc_info=True)
726 self.log.error("DB Error saving task request %r", msg_id, exc_info=True)
718
727
719 def save_task_result(self, idents, msg):
728 def save_task_result(self, idents, msg):
720 """save the result of a completed task."""
729 """save the result of a completed task."""
721 client_id = idents[0]
730 client_id = idents[0]
722 try:
731 try:
723 msg = self.session.unserialize(msg)
732 msg = self.session.unserialize(msg)
724 except Exception:
733 except Exception:
725 self.log.error("task::invalid task result message send to %r: %r",
734 self.log.error("task::invalid task result message send to %r: %r",
726 client_id, msg, exc_info=True)
735 client_id, msg, exc_info=True)
727 return
736 return
728
737
729 parent = msg['parent_header']
738 parent = msg['parent_header']
730 if not parent:
739 if not parent:
731 # print msg
740 # print msg
732 self.log.warn("Task %r had no parent!", msg)
741 self.log.warn("Task %r had no parent!", msg)
733 return
742 return
734 msg_id = parent['msg_id']
743 msg_id = parent['msg_id']
735 if msg_id in self.unassigned:
744 if msg_id in self.unassigned:
736 self.unassigned.remove(msg_id)
745 self.unassigned.remove(msg_id)
737
746
738 header = msg['header']
747 header = msg['header']
739 engine_uuid = header.get('engine', u'')
748 engine_uuid = header.get('engine', u'')
740 eid = self.by_ident.get(cast_bytes(engine_uuid), None)
749 eid = self.by_ident.get(cast_bytes(engine_uuid), None)
741
750
742 status = header.get('status', None)
751 status = header.get('status', None)
743
752
744 if msg_id in self.pending:
753 if msg_id in self.pending:
745 self.log.info("task::task %r finished on %s", msg_id, eid)
754 self.log.info("task::task %r finished on %s", msg_id, eid)
746 self.pending.remove(msg_id)
755 self.pending.remove(msg_id)
747 self.all_completed.add(msg_id)
756 self.all_completed.add(msg_id)
748 if eid is not None:
757 if eid is not None:
749 if status != 'aborted':
758 if status != 'aborted':
750 self.completed[eid].append(msg_id)
759 self.completed[eid].append(msg_id)
751 if msg_id in self.tasks[eid]:
760 if msg_id in self.tasks[eid]:
752 self.tasks[eid].remove(msg_id)
761 self.tasks[eid].remove(msg_id)
753 completed = header['date']
762 completed = header['date']
754 started = header.get('started', None)
763 started = header.get('started', None)
755 result = {
764 result = {
756 'result_header' : header,
765 'result_header' : header,
757 'result_content': msg['content'],
766 'result_content': msg['content'],
758 'started' : started,
767 'started' : started,
759 'completed' : completed,
768 'completed' : completed,
760 'received' : datetime.now(),
769 'received' : datetime.now(),
761 'engine_uuid': engine_uuid,
770 'engine_uuid': engine_uuid,
762 }
771 }
763
772
764 result['result_buffers'] = msg['buffers']
773 result['result_buffers'] = msg['buffers']
765 try:
774 try:
766 self.db.update_record(msg_id, result)
775 self.db.update_record(msg_id, result)
767 except Exception:
776 except Exception:
768 self.log.error("DB Error saving task request %r", msg_id, exc_info=True)
777 self.log.error("DB Error saving task request %r", msg_id, exc_info=True)
769
778
770 else:
779 else:
771 self.log.debug("task::unknown task %r finished", msg_id)
780 self.log.debug("task::unknown task %r finished", msg_id)
772
781
773 def save_task_destination(self, idents, msg):
782 def save_task_destination(self, idents, msg):
774 try:
783 try:
775 msg = self.session.unserialize(msg, content=True)
784 msg = self.session.unserialize(msg, content=True)
776 except Exception:
785 except Exception:
777 self.log.error("task::invalid task tracking message", exc_info=True)
786 self.log.error("task::invalid task tracking message", exc_info=True)
778 return
787 return
779 content = msg['content']
788 content = msg['content']
780 # print (content)
789 # print (content)
781 msg_id = content['msg_id']
790 msg_id = content['msg_id']
782 engine_uuid = content['engine_id']
791 engine_uuid = content['engine_id']
783 eid = self.by_ident[cast_bytes(engine_uuid)]
792 eid = self.by_ident[cast_bytes(engine_uuid)]
784
793
785 self.log.info("task::task %r arrived on %r", msg_id, eid)
794 self.log.info("task::task %r arrived on %r", msg_id, eid)
786 if msg_id in self.unassigned:
795 if msg_id in self.unassigned:
787 self.unassigned.remove(msg_id)
796 self.unassigned.remove(msg_id)
788 # else:
797 # else:
789 # self.log.debug("task::task %r not listed as MIA?!"%(msg_id))
798 # self.log.debug("task::task %r not listed as MIA?!"%(msg_id))
790
799
791 self.tasks[eid].append(msg_id)
800 self.tasks[eid].append(msg_id)
792 # self.pending[msg_id][1].update(received=datetime.now(),engine=(eid,engine_uuid))
801 # self.pending[msg_id][1].update(received=datetime.now(),engine=(eid,engine_uuid))
793 try:
802 try:
794 self.db.update_record(msg_id, dict(engine_uuid=engine_uuid))
803 self.db.update_record(msg_id, dict(engine_uuid=engine_uuid))
795 except Exception:
804 except Exception:
796 self.log.error("DB Error saving task destination %r", msg_id, exc_info=True)
805 self.log.error("DB Error saving task destination %r", msg_id, exc_info=True)
797
806
798
807
799 def mia_task_request(self, idents, msg):
808 def mia_task_request(self, idents, msg):
800 raise NotImplementedError
809 raise NotImplementedError
801 client_id = idents[0]
810 client_id = idents[0]
802 # content = dict(mia=self.mia,status='ok')
811 # content = dict(mia=self.mia,status='ok')
803 # self.session.send('mia_reply', content=content, idents=client_id)
812 # self.session.send('mia_reply', content=content, idents=client_id)
804
813
805
814
806 #--------------------- IOPub Traffic ------------------------------
815 #--------------------- IOPub Traffic ------------------------------
807
816
808 def save_iopub_message(self, topics, msg):
817 def save_iopub_message(self, topics, msg):
809 """save an iopub message into the db"""
818 """save an iopub message into the db"""
810 # print (topics)
819 # print (topics)
811 try:
820 try:
812 msg = self.session.unserialize(msg, content=True)
821 msg = self.session.unserialize(msg, content=True)
813 except Exception:
822 except Exception:
814 self.log.error("iopub::invalid IOPub message", exc_info=True)
823 self.log.error("iopub::invalid IOPub message", exc_info=True)
815 return
824 return
816
825
817 parent = msg['parent_header']
826 parent = msg['parent_header']
818 if not parent:
827 if not parent:
819 self.log.warn("iopub::IOPub message lacks parent: %r", msg)
828 self.log.warn("iopub::IOPub message lacks parent: %r", msg)
820 return
829 return
821 msg_id = parent['msg_id']
830 msg_id = parent['msg_id']
822 msg_type = msg['header']['msg_type']
831 msg_type = msg['header']['msg_type']
823 content = msg['content']
832 content = msg['content']
824
833
825 # ensure msg_id is in db
834 # ensure msg_id is in db
826 try:
835 try:
827 rec = self.db.get_record(msg_id)
836 rec = self.db.get_record(msg_id)
828 except KeyError:
837 except KeyError:
829 rec = empty_record()
838 rec = empty_record()
830 rec['msg_id'] = msg_id
839 rec['msg_id'] = msg_id
831 self.db.add_record(msg_id, rec)
840 self.db.add_record(msg_id, rec)
832 # stream
841 # stream
833 d = {}
842 d = {}
834 if msg_type == 'stream':
843 if msg_type == 'stream':
835 name = content['name']
844 name = content['name']
836 s = rec[name] or ''
845 s = rec[name] or ''
837 d[name] = s + content['data']
846 d[name] = s + content['data']
838
847
839 elif msg_type == 'pyerr':
848 elif msg_type == 'pyerr':
840 d['pyerr'] = content
849 d['pyerr'] = content
841 elif msg_type == 'pyin':
850 elif msg_type == 'pyin':
842 d['pyin'] = content['code']
851 d['pyin'] = content['code']
843 elif msg_type in ('display_data', 'pyout'):
852 elif msg_type in ('display_data', 'pyout'):
844 d[msg_type] = content
853 d[msg_type] = content
845 elif msg_type == 'status':
854 elif msg_type == 'status':
846 pass
855 pass
847 else:
856 else:
848 self.log.warn("unhandled iopub msg_type: %r", msg_type)
857 self.log.warn("unhandled iopub msg_type: %r", msg_type)
849
858
850 if not d:
859 if not d:
851 return
860 return
852
861
853 try:
862 try:
854 self.db.update_record(msg_id, d)
863 self.db.update_record(msg_id, d)
855 except Exception:
864 except Exception:
856 self.log.error("DB Error saving iopub message %r", msg_id, exc_info=True)
865 self.log.error("DB Error saving iopub message %r", msg_id, exc_info=True)
857
866
858
867
859
868
860 #-------------------------------------------------------------------------
869 #-------------------------------------------------------------------------
861 # Registration requests
870 # Registration requests
862 #-------------------------------------------------------------------------
871 #-------------------------------------------------------------------------
863
872
864 def connection_request(self, client_id, msg):
873 def connection_request(self, client_id, msg):
865 """Reply with connection addresses for clients."""
874 """Reply with connection addresses for clients."""
866 self.log.info("client::client %r connected", client_id)
875 self.log.info("client::client %r connected", client_id)
867 content = dict(status='ok')
876 content = dict(status='ok')
868 content.update(self.client_info)
869 jsonable = {}
877 jsonable = {}
870 for k,v in self.keytable.iteritems():
878 for k,v in self.keytable.iteritems():
871 if v not in self.dead_engines:
879 if v not in self.dead_engines:
872 jsonable[str(k)] = v.decode('ascii')
880 jsonable[str(k)] = v
873 content['engines'] = jsonable
881 content['engines'] = jsonable
874 self.session.send(self.query, 'connection_reply', content, parent=msg, ident=client_id)
882 self.session.send(self.query, 'connection_reply', content, parent=msg, ident=client_id)
875
883
876 def register_engine(self, reg, msg):
884 def register_engine(self, reg, msg):
877 """Register a new engine."""
885 """Register a new engine."""
878 content = msg['content']
886 content = msg['content']
879 try:
887 try:
880 queue = cast_bytes(content['queue'])
888 uuid = content['uuid']
881 except KeyError:
889 except KeyError:
882 self.log.error("registration::queue not specified", exc_info=True)
890 self.log.error("registration::queue not specified", exc_info=True)
883 return
891 return
884 heart = content.get('heartbeat', None)
892
885 if heart:
886 heart = cast_bytes(heart)
887 """register a new engine, and create the socket(s) necessary"""
888 eid = self._next_id
893 eid = self._next_id
889 # print (eid, queue, reg, heart)
890
894
891 self.log.debug("registration::register_engine(%i, %r, %r, %r)", eid, queue, reg, heart)
895 self.log.debug("registration::register_engine(%i, %r)", eid, uuid)
892
896
893 content = dict(id=eid,status='ok')
897 content = dict(id=eid,status='ok')
894 content.update(self.engine_info)
895 # check if requesting available IDs:
898 # check if requesting available IDs:
896 if queue in self.by_ident:
899 if cast_bytes(uuid) in self.by_ident:
897 try:
898 raise KeyError("queue_id %r in use" % queue)
899 except:
900 content = error.wrap_exception()
901 self.log.error("queue_id %r in use", queue, exc_info=True)
902 elif heart in self.hearts: # need to check unique hearts?
903 try:
900 try:
904 raise KeyError("heart_id %r in use" % heart)
901 raise KeyError("uuid %r in use" % uuid)
905 except:
902 except:
906 self.log.error("heart_id %r in use", heart, exc_info=True)
907 content = error.wrap_exception()
903 content = error.wrap_exception()
904 self.log.error("uuid %r in use", uuid, exc_info=True)
908 else:
905 else:
909 for h, pack in self.incoming_registrations.iteritems():
906 for h, ec in self.incoming_registrations.iteritems():
910 if heart == h:
907 if uuid == h:
911 try:
908 try:
912 raise KeyError("heart_id %r in use" % heart)
909 raise KeyError("heart_id %r in use" % uuid)
913 except:
910 except:
914 self.log.error("heart_id %r in use", heart, exc_info=True)
911 self.log.error("heart_id %r in use", uuid, exc_info=True)
915 content = error.wrap_exception()
912 content = error.wrap_exception()
916 break
913 break
917 elif queue == pack[1]:
914 elif uuid == ec.uuid:
918 try:
915 try:
919 raise KeyError("queue_id %r in use" % queue)
916 raise KeyError("uuid %r in use" % uuid)
920 except:
917 except:
921 self.log.error("queue_id %r in use", queue, exc_info=True)
918 self.log.error("uuid %r in use", uuid, exc_info=True)
922 content = error.wrap_exception()
919 content = error.wrap_exception()
923 break
920 break
924
921
925 msg = self.session.send(self.query, "registration_reply",
922 msg = self.session.send(self.query, "registration_reply",
926 content=content,
923 content=content,
927 ident=reg)
924 ident=reg)
928
925
926 heart = cast_bytes(uuid)
927
929 if content['status'] == 'ok':
928 if content['status'] == 'ok':
930 if heart in self.heartmonitor.hearts:
929 if heart in self.heartmonitor.hearts:
931 # already beating
930 # already beating
932 self.incoming_registrations[heart] = (eid,queue,reg[0],None)
931 self.incoming_registrations[heart] = EngineConnector(id=eid,uuid=uuid)
933 self.finish_registration(heart)
932 self.finish_registration(heart)
934 else:
933 else:
935 purge = lambda : self._purge_stalled_registration(heart)
934 purge = lambda : self._purge_stalled_registration(heart)
936 dc = ioloop.DelayedCallback(purge, self.registration_timeout, self.loop)
935 dc = ioloop.DelayedCallback(purge, self.registration_timeout, self.loop)
937 dc.start()
936 dc.start()
938 self.incoming_registrations[heart] = (eid,queue,reg[0],dc)
937 self.incoming_registrations[heart] = EngineConnector(id=eid,uuid=uuid,stallback=dc)
939 else:
938 else:
940 self.log.error("registration::registration %i failed: %r", eid, content['evalue'])
939 self.log.error("registration::registration %i failed: %r", eid, content['evalue'])
940
941 return eid
941 return eid
942
942
943 def unregister_engine(self, ident, msg):
943 def unregister_engine(self, ident, msg):
944 """Unregister an engine that explicitly requested to leave."""
944 """Unregister an engine that explicitly requested to leave."""
945 try:
945 try:
946 eid = msg['content']['id']
946 eid = msg['content']['id']
947 except:
947 except:
948 self.log.error("registration::bad engine id for unregistration: %r", ident, exc_info=True)
948 self.log.error("registration::bad engine id for unregistration: %r", ident, exc_info=True)
949 return
949 return
950 self.log.info("registration::unregister_engine(%r)", eid)
950 self.log.info("registration::unregister_engine(%r)", eid)
951 # print (eid)
951 # print (eid)
952 uuid = self.keytable[eid]
952 uuid = self.keytable[eid]
953 content=dict(id=eid, queue=uuid.decode('ascii'))
953 content=dict(id=eid, uuid=uuid)
954 self.dead_engines.add(uuid)
954 self.dead_engines.add(uuid)
955 # self.ids.remove(eid)
955 # self.ids.remove(eid)
956 # uuid = self.keytable.pop(eid)
956 # uuid = self.keytable.pop(eid)
957 #
957 #
958 # ec = self.engines.pop(eid)
958 # ec = self.engines.pop(eid)
959 # self.hearts.pop(ec.heartbeat)
959 # self.hearts.pop(ec.heartbeat)
960 # self.by_ident.pop(ec.queue)
960 # self.by_ident.pop(ec.queue)
961 # self.completed.pop(eid)
961 # self.completed.pop(eid)
962 handleit = lambda : self._handle_stranded_msgs(eid, uuid)
962 handleit = lambda : self._handle_stranded_msgs(eid, uuid)
963 dc = ioloop.DelayedCallback(handleit, self.registration_timeout, self.loop)
963 dc = ioloop.DelayedCallback(handleit, self.registration_timeout, self.loop)
964 dc.start()
964 dc.start()
965 ############## TODO: HANDLE IT ################
965 ############## TODO: HANDLE IT ################
966
966
967 self._save_engine_state()
968
967 if self.notifier:
969 if self.notifier:
968 self.session.send(self.notifier, "unregistration_notification", content=content)
970 self.session.send(self.notifier, "unregistration_notification", content=content)
969
971
970 def _handle_stranded_msgs(self, eid, uuid):
972 def _handle_stranded_msgs(self, eid, uuid):
971 """Handle messages known to be on an engine when the engine unregisters.
973 """Handle messages known to be on an engine when the engine unregisters.
972
974
973 It is possible that this will fire prematurely - that is, an engine will
975 It is possible that this will fire prematurely - that is, an engine will
974 go down after completing a result, and the client will be notified
976 go down after completing a result, and the client will be notified
975 that the result failed and later receive the actual result.
977 that the result failed and later receive the actual result.
976 """
978 """
977
979
978 outstanding = self.queues[eid]
980 outstanding = self.queues[eid]
979
981
980 for msg_id in outstanding:
982 for msg_id in outstanding:
981 self.pending.remove(msg_id)
983 self.pending.remove(msg_id)
982 self.all_completed.add(msg_id)
984 self.all_completed.add(msg_id)
983 try:
985 try:
984 raise error.EngineError("Engine %r died while running task %r" % (eid, msg_id))
986 raise error.EngineError("Engine %r died while running task %r" % (eid, msg_id))
985 except:
987 except:
986 content = error.wrap_exception()
988 content = error.wrap_exception()
987 # build a fake header:
989 # build a fake header:
988 header = {}
990 header = {}
989 header['engine'] = uuid
991 header['engine'] = uuid
990 header['date'] = datetime.now()
992 header['date'] = datetime.now()
991 rec = dict(result_content=content, result_header=header, result_buffers=[])
993 rec = dict(result_content=content, result_header=header, result_buffers=[])
992 rec['completed'] = header['date']
994 rec['completed'] = header['date']
993 rec['engine_uuid'] = uuid
995 rec['engine_uuid'] = uuid
994 try:
996 try:
995 self.db.update_record(msg_id, rec)
997 self.db.update_record(msg_id, rec)
996 except Exception:
998 except Exception:
997 self.log.error("DB Error handling stranded msg %r", msg_id, exc_info=True)
999 self.log.error("DB Error handling stranded msg %r", msg_id, exc_info=True)
998
1000
999
1001
1000 def finish_registration(self, heart):
1002 def finish_registration(self, heart):
1001 """Second half of engine registration, called after our HeartMonitor
1003 """Second half of engine registration, called after our HeartMonitor
1002 has received a beat from the Engine's Heart."""
1004 has received a beat from the Engine's Heart."""
1003 try:
1005 try:
1004 (eid,queue,reg,purge) = self.incoming_registrations.pop(heart)
1006 ec = self.incoming_registrations.pop(heart)
1005 except KeyError:
1007 except KeyError:
1006 self.log.error("registration::tried to finish nonexistant registration", exc_info=True)
1008 self.log.error("registration::tried to finish nonexistant registration", exc_info=True)
1007 return
1009 return
1008 self.log.info("registration::finished registering engine %i:%r", eid, queue)
1010 self.log.info("registration::finished registering engine %i:%s", ec.id, ec.uuid)
1009 if purge is not None:
1011 if ec.stallback is not None:
1010 purge.stop()
1012 ec.stallback.stop()
1011 control = queue
1013 eid = ec.id
1012 self.ids.add(eid)
1014 self.ids.add(eid)
1013 self.keytable[eid] = queue
1015 self.keytable[eid] = ec.uuid
1014 self.engines[eid] = EngineConnector(id=eid, queue=queue, registration=reg,
1016 self.engines[eid] = ec
1015 control=control, heartbeat=heart)
1017 self.by_ident[cast_bytes(ec.uuid)] = ec.id
1016 self.by_ident[queue] = eid
1017 self.queues[eid] = list()
1018 self.queues[eid] = list()
1018 self.tasks[eid] = list()
1019 self.tasks[eid] = list()
1019 self.completed[eid] = list()
1020 self.completed[eid] = list()
1020 self.hearts[heart] = eid
1021 self.hearts[heart] = eid
1021 content = dict(id=eid, queue=self.engines[eid].queue.decode('ascii'))
1022 content = dict(id=eid, uuid=self.engines[eid].uuid)
1022 if self.notifier:
1023 if self.notifier:
1023 self.session.send(self.notifier, "registration_notification", content=content)
1024 self.session.send(self.notifier, "registration_notification", content=content)
1024 self.log.info("engine::Engine Connected: %i", eid)
1025 self.log.info("engine::Engine Connected: %i", eid)
1025
1026
1027 self._save_engine_state()
1028
1026 def _purge_stalled_registration(self, heart):
1029 def _purge_stalled_registration(self, heart):
1027 if heart in self.incoming_registrations:
1030 if heart in self.incoming_registrations:
1028 eid = self.incoming_registrations.pop(heart)[0]
1031 ec = self.incoming_registrations.pop(heart)
1029 self.log.info("registration::purging stalled registration: %i", eid)
1032 self.log.info("registration::purging stalled registration: %i", ec.id)
1030 else:
1033 else:
1031 pass
1034 pass
1032
1035
1033 #-------------------------------------------------------------------------
1036 #-------------------------------------------------------------------------
1037 # Engine State
1038 #-------------------------------------------------------------------------
1039
1040
1041 def _cleanup_engine_state_file(self):
1042 """cleanup engine state mapping"""
1043
1044 if os.path.exists(self.engine_state_file):
1045 self.log.debug("cleaning up engine state: %s", self.engine_state_file)
1046 try:
1047 os.remove(self.engine_state_file)
1048 except IOError:
1049 self.log.error("Couldn't cleanup file: %s", self.engine_state_file, exc_info=True)
1050
1051
1052 def _save_engine_state(self):
1053 """save engine mapping to JSON file"""
1054 if not self.engine_state_file:
1055 return
1056 self.log.debug("save engine state to %s" % self.engine_state_file)
1057 state = {}
1058 engines = {}
1059 for eid, ec in self.engines.iteritems():
1060 if ec.uuid not in self.dead_engines:
1061 engines[eid] = ec.uuid
1062
1063 state['engines'] = engines
1064
1065 state['next_id'] = self._idcounter
1066
1067 with open(self.engine_state_file, 'w') as f:
1068 json.dump(state, f)
1069
1070
1071 def _load_engine_state(self):
1072 """load engine mapping from JSON file"""
1073 if not os.path.exists(self.engine_state_file):
1074 return
1075
1076 self.log.info("loading engine state from %s" % self.engine_state_file)
1077
1078 with open(self.engine_state_file) as f:
1079 state = json.load(f)
1080
1081 save_notifier = self.notifier
1082 self.notifier = None
1083 for eid, uuid in state['engines'].iteritems():
1084 heart = uuid.encode('ascii')
1085 # start with this heart as current and beating:
1086 self.heartmonitor.responses.add(heart)
1087 self.heartmonitor.hearts.add(heart)
1088
1089 self.incoming_registrations[heart] = EngineConnector(id=int(eid), uuid=uuid)
1090 self.finish_registration(heart)
1091
1092 self.notifier = save_notifier
1093
1094 self._idcounter = state['next_id']
1095
1096 #-------------------------------------------------------------------------
1034 # Client Requests
1097 # Client Requests
1035 #-------------------------------------------------------------------------
1098 #-------------------------------------------------------------------------
1036
1099
1037 def shutdown_request(self, client_id, msg):
1100 def shutdown_request(self, client_id, msg):
1038 """handle shutdown request."""
1101 """handle shutdown request."""
1039 self.session.send(self.query, 'shutdown_reply', content={'status': 'ok'}, ident=client_id)
1102 self.session.send(self.query, 'shutdown_reply', content={'status': 'ok'}, ident=client_id)
1040 # also notify other clients of shutdown
1103 # also notify other clients of shutdown
1041 self.session.send(self.notifier, 'shutdown_notice', content={'status': 'ok'})
1104 self.session.send(self.notifier, 'shutdown_notice', content={'status': 'ok'})
1042 dc = ioloop.DelayedCallback(lambda : self._shutdown(), 1000, self.loop)
1105 dc = ioloop.DelayedCallback(lambda : self._shutdown(), 1000, self.loop)
1043 dc.start()
1106 dc.start()
1044
1107
1045 def _shutdown(self):
1108 def _shutdown(self):
1046 self.log.info("hub::hub shutting down.")
1109 self.log.info("hub::hub shutting down.")
1047 time.sleep(0.1)
1110 time.sleep(0.1)
1048 sys.exit(0)
1111 sys.exit(0)
1049
1112
1050
1113
1051 def check_load(self, client_id, msg):
1114 def check_load(self, client_id, msg):
1052 content = msg['content']
1115 content = msg['content']
1053 try:
1116 try:
1054 targets = content['targets']
1117 targets = content['targets']
1055 targets = self._validate_targets(targets)
1118 targets = self._validate_targets(targets)
1056 except:
1119 except:
1057 content = error.wrap_exception()
1120 content = error.wrap_exception()
1058 self.session.send(self.query, "hub_error",
1121 self.session.send(self.query, "hub_error",
1059 content=content, ident=client_id)
1122 content=content, ident=client_id)
1060 return
1123 return
1061
1124
1062 content = dict(status='ok')
1125 content = dict(status='ok')
1063 # loads = {}
1126 # loads = {}
1064 for t in targets:
1127 for t in targets:
1065 content[bytes(t)] = len(self.queues[t])+len(self.tasks[t])
1128 content[bytes(t)] = len(self.queues[t])+len(self.tasks[t])
1066 self.session.send(self.query, "load_reply", content=content, ident=client_id)
1129 self.session.send(self.query, "load_reply", content=content, ident=client_id)
1067
1130
1068
1131
1069 def queue_status(self, client_id, msg):
1132 def queue_status(self, client_id, msg):
1070 """Return the Queue status of one or more targets.
1133 """Return the Queue status of one or more targets.
1071 if verbose: return the msg_ids
1134 if verbose: return the msg_ids
1072 else: return len of each type.
1135 else: return len of each type.
1073 keys: queue (pending MUX jobs)
1136 keys: queue (pending MUX jobs)
1074 tasks (pending Task jobs)
1137 tasks (pending Task jobs)
1075 completed (finished jobs from both queues)"""
1138 completed (finished jobs from both queues)"""
1076 content = msg['content']
1139 content = msg['content']
1077 targets = content['targets']
1140 targets = content['targets']
1078 try:
1141 try:
1079 targets = self._validate_targets(targets)
1142 targets = self._validate_targets(targets)
1080 except:
1143 except:
1081 content = error.wrap_exception()
1144 content = error.wrap_exception()
1082 self.session.send(self.query, "hub_error",
1145 self.session.send(self.query, "hub_error",
1083 content=content, ident=client_id)
1146 content=content, ident=client_id)
1084 return
1147 return
1085 verbose = content.get('verbose', False)
1148 verbose = content.get('verbose', False)
1086 content = dict(status='ok')
1149 content = dict(status='ok')
1087 for t in targets:
1150 for t in targets:
1088 queue = self.queues[t]
1151 queue = self.queues[t]
1089 completed = self.completed[t]
1152 completed = self.completed[t]
1090 tasks = self.tasks[t]
1153 tasks = self.tasks[t]
1091 if not verbose:
1154 if not verbose:
1092 queue = len(queue)
1155 queue = len(queue)
1093 completed = len(completed)
1156 completed = len(completed)
1094 tasks = len(tasks)
1157 tasks = len(tasks)
1095 content[str(t)] = {'queue': queue, 'completed': completed , 'tasks': tasks}
1158 content[str(t)] = {'queue': queue, 'completed': completed , 'tasks': tasks}
1096 content['unassigned'] = list(self.unassigned) if verbose else len(self.unassigned)
1159 content['unassigned'] = list(self.unassigned) if verbose else len(self.unassigned)
1097 # print (content)
1160 # print (content)
1098 self.session.send(self.query, "queue_reply", content=content, ident=client_id)
1161 self.session.send(self.query, "queue_reply", content=content, ident=client_id)
1099
1162
1100 def purge_results(self, client_id, msg):
1163 def purge_results(self, client_id, msg):
1101 """Purge results from memory. This method is more valuable before we move
1164 """Purge results from memory. This method is more valuable before we move
1102 to a DB based message storage mechanism."""
1165 to a DB based message storage mechanism."""
1103 content = msg['content']
1166 content = msg['content']
1104 self.log.info("Dropping records with %s", content)
1167 self.log.info("Dropping records with %s", content)
1105 msg_ids = content.get('msg_ids', [])
1168 msg_ids = content.get('msg_ids', [])
1106 reply = dict(status='ok')
1169 reply = dict(status='ok')
1107 if msg_ids == 'all':
1170 if msg_ids == 'all':
1108 try:
1171 try:
1109 self.db.drop_matching_records(dict(completed={'$ne':None}))
1172 self.db.drop_matching_records(dict(completed={'$ne':None}))
1110 except Exception:
1173 except Exception:
1111 reply = error.wrap_exception()
1174 reply = error.wrap_exception()
1112 else:
1175 else:
1113 pending = filter(lambda m: m in self.pending, msg_ids)
1176 pending = filter(lambda m: m in self.pending, msg_ids)
1114 if pending:
1177 if pending:
1115 try:
1178 try:
1116 raise IndexError("msg pending: %r" % pending[0])
1179 raise IndexError("msg pending: %r" % pending[0])
1117 except:
1180 except:
1118 reply = error.wrap_exception()
1181 reply = error.wrap_exception()
1119 else:
1182 else:
1120 try:
1183 try:
1121 self.db.drop_matching_records(dict(msg_id={'$in':msg_ids}))
1184 self.db.drop_matching_records(dict(msg_id={'$in':msg_ids}))
1122 except Exception:
1185 except Exception:
1123 reply = error.wrap_exception()
1186 reply = error.wrap_exception()
1124
1187
1125 if reply['status'] == 'ok':
1188 if reply['status'] == 'ok':
1126 eids = content.get('engine_ids', [])
1189 eids = content.get('engine_ids', [])
1127 for eid in eids:
1190 for eid in eids:
1128 if eid not in self.engines:
1191 if eid not in self.engines:
1129 try:
1192 try:
1130 raise IndexError("No such engine: %i" % eid)
1193 raise IndexError("No such engine: %i" % eid)
1131 except:
1194 except:
1132 reply = error.wrap_exception()
1195 reply = error.wrap_exception()
1133 break
1196 break
1134 uid = self.engines[eid].queue
1197 uid = self.engines[eid].uuid
1135 try:
1198 try:
1136 self.db.drop_matching_records(dict(engine_uuid=uid, completed={'$ne':None}))
1199 self.db.drop_matching_records(dict(engine_uuid=uid, completed={'$ne':None}))
1137 except Exception:
1200 except Exception:
1138 reply = error.wrap_exception()
1201 reply = error.wrap_exception()
1139 break
1202 break
1140
1203
1141 self.session.send(self.query, 'purge_reply', content=reply, ident=client_id)
1204 self.session.send(self.query, 'purge_reply', content=reply, ident=client_id)
1142
1205
1143 def resubmit_task(self, client_id, msg):
1206 def resubmit_task(self, client_id, msg):
1144 """Resubmit one or more tasks."""
1207 """Resubmit one or more tasks."""
1145 def finish(reply):
1208 def finish(reply):
1146 self.session.send(self.query, 'resubmit_reply', content=reply, ident=client_id)
1209 self.session.send(self.query, 'resubmit_reply', content=reply, ident=client_id)
1147
1210
1148 content = msg['content']
1211 content = msg['content']
1149 msg_ids = content['msg_ids']
1212 msg_ids = content['msg_ids']
1150 reply = dict(status='ok')
1213 reply = dict(status='ok')
1151 try:
1214 try:
1152 records = self.db.find_records({'msg_id' : {'$in' : msg_ids}}, keys=[
1215 records = self.db.find_records({'msg_id' : {'$in' : msg_ids}}, keys=[
1153 'header', 'content', 'buffers'])
1216 'header', 'content', 'buffers'])
1154 except Exception:
1217 except Exception:
1155 self.log.error('db::db error finding tasks to resubmit', exc_info=True)
1218 self.log.error('db::db error finding tasks to resubmit', exc_info=True)
1156 return finish(error.wrap_exception())
1219 return finish(error.wrap_exception())
1157
1220
1158 # validate msg_ids
1221 # validate msg_ids
1159 found_ids = [ rec['msg_id'] for rec in records ]
1222 found_ids = [ rec['msg_id'] for rec in records ]
1160 pending_ids = [ msg_id for msg_id in found_ids if msg_id in self.pending ]
1223 pending_ids = [ msg_id for msg_id in found_ids if msg_id in self.pending ]
1161 if len(records) > len(msg_ids):
1224 if len(records) > len(msg_ids):
1162 try:
1225 try:
1163 raise RuntimeError("DB appears to be in an inconsistent state."
1226 raise RuntimeError("DB appears to be in an inconsistent state."
1164 "More matching records were found than should exist")
1227 "More matching records were found than should exist")
1165 except Exception:
1228 except Exception:
1166 return finish(error.wrap_exception())
1229 return finish(error.wrap_exception())
1167 elif len(records) < len(msg_ids):
1230 elif len(records) < len(msg_ids):
1168 missing = [ m for m in msg_ids if m not in found_ids ]
1231 missing = [ m for m in msg_ids if m not in found_ids ]
1169 try:
1232 try:
1170 raise KeyError("No such msg(s): %r" % missing)
1233 raise KeyError("No such msg(s): %r" % missing)
1171 except KeyError:
1234 except KeyError:
1172 return finish(error.wrap_exception())
1235 return finish(error.wrap_exception())
1173 elif pending_ids:
1236 elif pending_ids:
1174 pass
1237 pass
1175 # no need to raise on resubmit of pending task, now that we
1238 # no need to raise on resubmit of pending task, now that we
1176 # resubmit under new ID, but do we want to raise anyway?
1239 # resubmit under new ID, but do we want to raise anyway?
1177 # msg_id = invalid_ids[0]
1240 # msg_id = invalid_ids[0]
1178 # try:
1241 # try:
1179 # raise ValueError("Task(s) %r appears to be inflight" % )
1242 # raise ValueError("Task(s) %r appears to be inflight" % )
1180 # except Exception:
1243 # except Exception:
1181 # return finish(error.wrap_exception())
1244 # return finish(error.wrap_exception())
1182
1245
1183 # mapping of original IDs to resubmitted IDs
1246 # mapping of original IDs to resubmitted IDs
1184 resubmitted = {}
1247 resubmitted = {}
1185
1248
1186 # send the messages
1249 # send the messages
1187 for rec in records:
1250 for rec in records:
1188 header = rec['header']
1251 header = rec['header']
1189 msg = self.session.msg(header['msg_type'], parent=header)
1252 msg = self.session.msg(header['msg_type'], parent=header)
1190 msg_id = msg['msg_id']
1253 msg_id = msg['msg_id']
1191 msg['content'] = rec['content']
1254 msg['content'] = rec['content']
1192
1255
1193 # use the old header, but update msg_id and timestamp
1256 # use the old header, but update msg_id and timestamp
1194 fresh = msg['header']
1257 fresh = msg['header']
1195 header['msg_id'] = fresh['msg_id']
1258 header['msg_id'] = fresh['msg_id']
1196 header['date'] = fresh['date']
1259 header['date'] = fresh['date']
1197 msg['header'] = header
1260 msg['header'] = header
1198
1261
1199 self.session.send(self.resubmit, msg, buffers=rec['buffers'])
1262 self.session.send(self.resubmit, msg, buffers=rec['buffers'])
1200
1263
1201 resubmitted[rec['msg_id']] = msg_id
1264 resubmitted[rec['msg_id']] = msg_id
1202 self.pending.add(msg_id)
1265 self.pending.add(msg_id)
1203 msg['buffers'] = rec['buffers']
1266 msg['buffers'] = rec['buffers']
1204 try:
1267 try:
1205 self.db.add_record(msg_id, init_record(msg))
1268 self.db.add_record(msg_id, init_record(msg))
1206 except Exception:
1269 except Exception:
1207 self.log.error("db::DB Error updating record: %s", msg_id, exc_info=True)
1270 self.log.error("db::DB Error updating record: %s", msg_id, exc_info=True)
1271 return finish(error.wrap_exception())
1208
1272
1209 finish(dict(status='ok', resubmitted=resubmitted))
1273 finish(dict(status='ok', resubmitted=resubmitted))
1210
1274
1211 # store the new IDs in the Task DB
1275 # store the new IDs in the Task DB
1212 for msg_id, resubmit_id in resubmitted.iteritems():
1276 for msg_id, resubmit_id in resubmitted.iteritems():
1213 try:
1277 try:
1214 self.db.update_record(msg_id, {'resubmitted' : resubmit_id})
1278 self.db.update_record(msg_id, {'resubmitted' : resubmit_id})
1215 except Exception:
1279 except Exception:
1216 self.log.error("db::DB Error updating record: %s", msg_id, exc_info=True)
1280 self.log.error("db::DB Error updating record: %s", msg_id, exc_info=True)
1217
1281
1218
1282
1219 def _extract_record(self, rec):
1283 def _extract_record(self, rec):
1220 """decompose a TaskRecord dict into subsection of reply for get_result"""
1284 """decompose a TaskRecord dict into subsection of reply for get_result"""
1221 io_dict = {}
1285 io_dict = {}
1222 for key in ('pyin', 'pyout', 'pyerr', 'stdout', 'stderr'):
1286 for key in ('pyin', 'pyout', 'pyerr', 'stdout', 'stderr'):
1223 io_dict[key] = rec[key]
1287 io_dict[key] = rec[key]
1224 content = { 'result_content': rec['result_content'],
1288 content = { 'result_content': rec['result_content'],
1225 'header': rec['header'],
1289 'header': rec['header'],
1226 'result_header' : rec['result_header'],
1290 'result_header' : rec['result_header'],
1227 'received' : rec['received'],
1291 'received' : rec['received'],
1228 'io' : io_dict,
1292 'io' : io_dict,
1229 }
1293 }
1230 if rec['result_buffers']:
1294 if rec['result_buffers']:
1231 buffers = map(bytes, rec['result_buffers'])
1295 buffers = map(bytes, rec['result_buffers'])
1232 else:
1296 else:
1233 buffers = []
1297 buffers = []
1234
1298
1235 return content, buffers
1299 return content, buffers
1236
1300
1237 def get_results(self, client_id, msg):
1301 def get_results(self, client_id, msg):
1238 """Get the result of 1 or more messages."""
1302 """Get the result of 1 or more messages."""
1239 content = msg['content']
1303 content = msg['content']
1240 msg_ids = sorted(set(content['msg_ids']))
1304 msg_ids = sorted(set(content['msg_ids']))
1241 statusonly = content.get('status_only', False)
1305 statusonly = content.get('status_only', False)
1242 pending = []
1306 pending = []
1243 completed = []
1307 completed = []
1244 content = dict(status='ok')
1308 content = dict(status='ok')
1245 content['pending'] = pending
1309 content['pending'] = pending
1246 content['completed'] = completed
1310 content['completed'] = completed
1247 buffers = []
1311 buffers = []
1248 if not statusonly:
1312 if not statusonly:
1249 try:
1313 try:
1250 matches = self.db.find_records(dict(msg_id={'$in':msg_ids}))
1314 matches = self.db.find_records(dict(msg_id={'$in':msg_ids}))
1251 # turn match list into dict, for faster lookup
1315 # turn match list into dict, for faster lookup
1252 records = {}
1316 records = {}
1253 for rec in matches:
1317 for rec in matches:
1254 records[rec['msg_id']] = rec
1318 records[rec['msg_id']] = rec
1255 except Exception:
1319 except Exception:
1256 content = error.wrap_exception()
1320 content = error.wrap_exception()
1257 self.session.send(self.query, "result_reply", content=content,
1321 self.session.send(self.query, "result_reply", content=content,
1258 parent=msg, ident=client_id)
1322 parent=msg, ident=client_id)
1259 return
1323 return
1260 else:
1324 else:
1261 records = {}
1325 records = {}
1262 for msg_id in msg_ids:
1326 for msg_id in msg_ids:
1263 if msg_id in self.pending:
1327 if msg_id in self.pending:
1264 pending.append(msg_id)
1328 pending.append(msg_id)
1265 elif msg_id in self.all_completed:
1329 elif msg_id in self.all_completed:
1266 completed.append(msg_id)
1330 completed.append(msg_id)
1267 if not statusonly:
1331 if not statusonly:
1268 c,bufs = self._extract_record(records[msg_id])
1332 c,bufs = self._extract_record(records[msg_id])
1269 content[msg_id] = c
1333 content[msg_id] = c
1270 buffers.extend(bufs)
1334 buffers.extend(bufs)
1271 elif msg_id in records:
1335 elif msg_id in records:
1272 if rec['completed']:
1336 if rec['completed']:
1273 completed.append(msg_id)
1337 completed.append(msg_id)
1274 c,bufs = self._extract_record(records[msg_id])
1338 c,bufs = self._extract_record(records[msg_id])
1275 content[msg_id] = c
1339 content[msg_id] = c
1276 buffers.extend(bufs)
1340 buffers.extend(bufs)
1277 else:
1341 else:
1278 pending.append(msg_id)
1342 pending.append(msg_id)
1279 else:
1343 else:
1280 try:
1344 try:
1281 raise KeyError('No such message: '+msg_id)
1345 raise KeyError('No such message: '+msg_id)
1282 except:
1346 except:
1283 content = error.wrap_exception()
1347 content = error.wrap_exception()
1284 break
1348 break
1285 self.session.send(self.query, "result_reply", content=content,
1349 self.session.send(self.query, "result_reply", content=content,
1286 parent=msg, ident=client_id,
1350 parent=msg, ident=client_id,
1287 buffers=buffers)
1351 buffers=buffers)
1288
1352
1289 def get_history(self, client_id, msg):
1353 def get_history(self, client_id, msg):
1290 """Get a list of all msg_ids in our DB records"""
1354 """Get a list of all msg_ids in our DB records"""
1291 try:
1355 try:
1292 msg_ids = self.db.get_history()
1356 msg_ids = self.db.get_history()
1293 except Exception as e:
1357 except Exception as e:
1294 content = error.wrap_exception()
1358 content = error.wrap_exception()
1295 else:
1359 else:
1296 content = dict(status='ok', history=msg_ids)
1360 content = dict(status='ok', history=msg_ids)
1297
1361
1298 self.session.send(self.query, "history_reply", content=content,
1362 self.session.send(self.query, "history_reply", content=content,
1299 parent=msg, ident=client_id)
1363 parent=msg, ident=client_id)
1300
1364
1301 def db_query(self, client_id, msg):
1365 def db_query(self, client_id, msg):
1302 """Perform a raw query on the task record database."""
1366 """Perform a raw query on the task record database."""
1303 content = msg['content']
1367 content = msg['content']
1304 query = content.get('query', {})
1368 query = content.get('query', {})
1305 keys = content.get('keys', None)
1369 keys = content.get('keys', None)
1306 buffers = []
1370 buffers = []
1307 empty = list()
1371 empty = list()
1308 try:
1372 try:
1309 records = self.db.find_records(query, keys)
1373 records = self.db.find_records(query, keys)
1310 except Exception as e:
1374 except Exception as e:
1311 content = error.wrap_exception()
1375 content = error.wrap_exception()
1312 else:
1376 else:
1313 # extract buffers from reply content:
1377 # extract buffers from reply content:
1314 if keys is not None:
1378 if keys is not None:
1315 buffer_lens = [] if 'buffers' in keys else None
1379 buffer_lens = [] if 'buffers' in keys else None
1316 result_buffer_lens = [] if 'result_buffers' in keys else None
1380 result_buffer_lens = [] if 'result_buffers' in keys else None
1317 else:
1381 else:
1318 buffer_lens = None
1382 buffer_lens = None
1319 result_buffer_lens = None
1383 result_buffer_lens = None
1320
1384
1321 for rec in records:
1385 for rec in records:
1322 # buffers may be None, so double check
1386 # buffers may be None, so double check
1323 b = rec.pop('buffers', empty) or empty
1387 b = rec.pop('buffers', empty) or empty
1324 if buffer_lens is not None:
1388 if buffer_lens is not None:
1325 buffer_lens.append(len(b))
1389 buffer_lens.append(len(b))
1326 buffers.extend(b)
1390 buffers.extend(b)
1327 rb = rec.pop('result_buffers', empty) or empty
1391 rb = rec.pop('result_buffers', empty) or empty
1328 if result_buffer_lens is not None:
1392 if result_buffer_lens is not None:
1329 result_buffer_lens.append(len(rb))
1393 result_buffer_lens.append(len(rb))
1330 buffers.extend(rb)
1394 buffers.extend(rb)
1331 content = dict(status='ok', records=records, buffer_lens=buffer_lens,
1395 content = dict(status='ok', records=records, buffer_lens=buffer_lens,
1332 result_buffer_lens=result_buffer_lens)
1396 result_buffer_lens=result_buffer_lens)
1333 # self.log.debug (content)
1397 # self.log.debug (content)
1334 self.session.send(self.query, "db_reply", content=content,
1398 self.session.send(self.query, "db_reply", content=content,
1335 parent=msg, ident=client_id,
1399 parent=msg, ident=client_id,
1336 buffers=buffers)
1400 buffers=buffers)
1337
1401
@@ -1,768 +1,794
1 """The Python scheduler for rich scheduling.
1 """The Python scheduler for rich scheduling.
2
2
3 The Pure ZMQ scheduler does not allow routing schemes other than LRU,
3 The Pure ZMQ scheduler does not allow routing schemes other than LRU,
4 nor does it check msg_id DAG dependencies. For those, a slightly slower
4 nor does it check msg_id DAG dependencies. For those, a slightly slower
5 Python Scheduler exists.
5 Python Scheduler exists.
6
6
7 Authors:
7 Authors:
8
8
9 * Min RK
9 * Min RK
10 """
10 """
11 #-----------------------------------------------------------------------------
11 #-----------------------------------------------------------------------------
12 # Copyright (C) 2010-2011 The IPython Development Team
12 # Copyright (C) 2010-2011 The IPython Development Team
13 #
13 #
14 # Distributed under the terms of the BSD License. The full license is in
14 # Distributed under the terms of the BSD License. The full license is in
15 # the file COPYING, distributed as part of this software.
15 # the file COPYING, distributed as part of this software.
16 #-----------------------------------------------------------------------------
16 #-----------------------------------------------------------------------------
17
17
18 #----------------------------------------------------------------------
18 #----------------------------------------------------------------------
19 # Imports
19 # Imports
20 #----------------------------------------------------------------------
20 #----------------------------------------------------------------------
21
21
22 from __future__ import print_function
22 from __future__ import print_function
23
23
24 import logging
24 import logging
25 import sys
25 import sys
26 import time
26 import time
27
27
28 from datetime import datetime, timedelta
28 from datetime import datetime, timedelta
29 from random import randint, random
29 from random import randint, random
30 from types import FunctionType
30 from types import FunctionType
31
31
32 try:
32 try:
33 import numpy
33 import numpy
34 except ImportError:
34 except ImportError:
35 numpy = None
35 numpy = None
36
36
37 import zmq
37 import zmq
38 from zmq.eventloop import ioloop, zmqstream
38 from zmq.eventloop import ioloop, zmqstream
39
39
40 # local imports
40 # local imports
41 from IPython.external.decorator import decorator
41 from IPython.external.decorator import decorator
42 from IPython.config.application import Application
42 from IPython.config.application import Application
43 from IPython.config.loader import Config
43 from IPython.config.loader import Config
44 from IPython.utils.traitlets import Instance, Dict, List, Set, Integer, Enum, CBytes
44 from IPython.utils.traitlets import Instance, Dict, List, Set, Integer, Enum, CBytes
45 from IPython.utils.py3compat import cast_bytes
45 from IPython.utils.py3compat import cast_bytes
46
46
47 from IPython.parallel import error, util
47 from IPython.parallel import error, util
48 from IPython.parallel.factory import SessionFactory
48 from IPython.parallel.factory import SessionFactory
49 from IPython.parallel.util import connect_logger, local_logger
49 from IPython.parallel.util import connect_logger, local_logger
50
50
51 from .dependency import Dependency
51 from .dependency import Dependency
52
52
53 @decorator
53 @decorator
54 def logged(f,self,*args,**kwargs):
54 def logged(f,self,*args,**kwargs):
55 # print ("#--------------------")
55 # print ("#--------------------")
56 self.log.debug("scheduler::%s(*%s,**%s)", f.func_name, args, kwargs)
56 self.log.debug("scheduler::%s(*%s,**%s)", f.func_name, args, kwargs)
57 # print ("#--")
57 # print ("#--")
58 return f(self,*args, **kwargs)
58 return f(self,*args, **kwargs)
59
59
60 #----------------------------------------------------------------------
60 #----------------------------------------------------------------------
61 # Chooser functions
61 # Chooser functions
62 #----------------------------------------------------------------------
62 #----------------------------------------------------------------------
63
63
64 def plainrandom(loads):
64 def plainrandom(loads):
65 """Plain random pick."""
65 """Plain random pick."""
66 n = len(loads)
66 n = len(loads)
67 return randint(0,n-1)
67 return randint(0,n-1)
68
68
69 def lru(loads):
69 def lru(loads):
70 """Always pick the front of the line.
70 """Always pick the front of the line.
71
71
72 The content of `loads` is ignored.
72 The content of `loads` is ignored.
73
73
74 Assumes LRU ordering of loads, with oldest first.
74 Assumes LRU ordering of loads, with oldest first.
75 """
75 """
76 return 0
76 return 0
77
77
78 def twobin(loads):
78 def twobin(loads):
79 """Pick two at random, use the LRU of the two.
79 """Pick two at random, use the LRU of the two.
80
80
81 The content of loads is ignored.
81 The content of loads is ignored.
82
82
83 Assumes LRU ordering of loads, with oldest first.
83 Assumes LRU ordering of loads, with oldest first.
84 """
84 """
85 n = len(loads)
85 n = len(loads)
86 a = randint(0,n-1)
86 a = randint(0,n-1)
87 b = randint(0,n-1)
87 b = randint(0,n-1)
88 return min(a,b)
88 return min(a,b)
89
89
90 def weighted(loads):
90 def weighted(loads):
91 """Pick two at random using inverse load as weight.
91 """Pick two at random using inverse load as weight.
92
92
93 Return the less loaded of the two.
93 Return the less loaded of the two.
94 """
94 """
95 # weight 0 a million times more than 1:
95 # weight 0 a million times more than 1:
96 weights = 1./(1e-6+numpy.array(loads))
96 weights = 1./(1e-6+numpy.array(loads))
97 sums = weights.cumsum()
97 sums = weights.cumsum()
98 t = sums[-1]
98 t = sums[-1]
99 x = random()*t
99 x = random()*t
100 y = random()*t
100 y = random()*t
101 idx = 0
101 idx = 0
102 idy = 0
102 idy = 0
103 while sums[idx] < x:
103 while sums[idx] < x:
104 idx += 1
104 idx += 1
105 while sums[idy] < y:
105 while sums[idy] < y:
106 idy += 1
106 idy += 1
107 if weights[idy] > weights[idx]:
107 if weights[idy] > weights[idx]:
108 return idy
108 return idy
109 else:
109 else:
110 return idx
110 return idx
111
111
112 def leastload(loads):
112 def leastload(loads):
113 """Always choose the lowest load.
113 """Always choose the lowest load.
114
114
115 If the lowest load occurs more than once, the first
115 If the lowest load occurs more than once, the first
116 occurance will be used. If loads has LRU ordering, this means
116 occurance will be used. If loads has LRU ordering, this means
117 the LRU of those with the lowest load is chosen.
117 the LRU of those with the lowest load is chosen.
118 """
118 """
119 return loads.index(min(loads))
119 return loads.index(min(loads))
120
120
121 #---------------------------------------------------------------------
121 #---------------------------------------------------------------------
122 # Classes
122 # Classes
123 #---------------------------------------------------------------------
123 #---------------------------------------------------------------------
124
124
125
125
126 # store empty default dependency:
126 # store empty default dependency:
127 MET = Dependency([])
127 MET = Dependency([])
128
128
129
129
130 class Job(object):
130 class Job(object):
131 """Simple container for a job"""
131 """Simple container for a job"""
132 def __init__(self, msg_id, raw_msg, idents, msg, header, targets, after, follow, timeout):
132 def __init__(self, msg_id, raw_msg, idents, msg, header, targets, after, follow, timeout):
133 self.msg_id = msg_id
133 self.msg_id = msg_id
134 self.raw_msg = raw_msg
134 self.raw_msg = raw_msg
135 self.idents = idents
135 self.idents = idents
136 self.msg = msg
136 self.msg = msg
137 self.header = header
137 self.header = header
138 self.targets = targets
138 self.targets = targets
139 self.after = after
139 self.after = after
140 self.follow = follow
140 self.follow = follow
141 self.timeout = timeout
141 self.timeout = timeout
142
142
143
143
144 self.timestamp = time.time()
144 self.timestamp = time.time()
145 self.blacklist = set()
145 self.blacklist = set()
146
146
147 @property
147 @property
148 def dependents(self):
148 def dependents(self):
149 return self.follow.union(self.after)
149 return self.follow.union(self.after)
150
150
151 class TaskScheduler(SessionFactory):
151 class TaskScheduler(SessionFactory):
152 """Python TaskScheduler object.
152 """Python TaskScheduler object.
153
153
154 This is the simplest object that supports msg_id based
154 This is the simplest object that supports msg_id based
155 DAG dependencies. *Only* task msg_ids are checked, not
155 DAG dependencies. *Only* task msg_ids are checked, not
156 msg_ids of jobs submitted via the MUX queue.
156 msg_ids of jobs submitted via the MUX queue.
157
157
158 """
158 """
159
159
160 hwm = Integer(1, config=True,
160 hwm = Integer(1, config=True,
161 help="""specify the High Water Mark (HWM) for the downstream
161 help="""specify the High Water Mark (HWM) for the downstream
162 socket in the Task scheduler. This is the maximum number
162 socket in the Task scheduler. This is the maximum number
163 of allowed outstanding tasks on each engine.
163 of allowed outstanding tasks on each engine.
164
164
165 The default (1) means that only one task can be outstanding on each
165 The default (1) means that only one task can be outstanding on each
166 engine. Setting TaskScheduler.hwm=0 means there is no limit, and the
166 engine. Setting TaskScheduler.hwm=0 means there is no limit, and the
167 engines continue to be assigned tasks while they are working,
167 engines continue to be assigned tasks while they are working,
168 effectively hiding network latency behind computation, but can result
168 effectively hiding network latency behind computation, but can result
169 in an imbalance of work when submitting many heterogenous tasks all at
169 in an imbalance of work when submitting many heterogenous tasks all at
170 once. Any positive value greater than one is a compromise between the
170 once. Any positive value greater than one is a compromise between the
171 two.
171 two.
172
172
173 """
173 """
174 )
174 )
175 scheme_name = Enum(('leastload', 'pure', 'lru', 'plainrandom', 'weighted', 'twobin'),
175 scheme_name = Enum(('leastload', 'pure', 'lru', 'plainrandom', 'weighted', 'twobin'),
176 'leastload', config=True, allow_none=False,
176 'leastload', config=True, allow_none=False,
177 help="""select the task scheduler scheme [default: Python LRU]
177 help="""select the task scheduler scheme [default: Python LRU]
178 Options are: 'pure', 'lru', 'plainrandom', 'weighted', 'twobin','leastload'"""
178 Options are: 'pure', 'lru', 'plainrandom', 'weighted', 'twobin','leastload'"""
179 )
179 )
180 def _scheme_name_changed(self, old, new):
180 def _scheme_name_changed(self, old, new):
181 self.log.debug("Using scheme %r"%new)
181 self.log.debug("Using scheme %r"%new)
182 self.scheme = globals()[new]
182 self.scheme = globals()[new]
183
183
184 # input arguments:
184 # input arguments:
185 scheme = Instance(FunctionType) # function for determining the destination
185 scheme = Instance(FunctionType) # function for determining the destination
186 def _scheme_default(self):
186 def _scheme_default(self):
187 return leastload
187 return leastload
188 client_stream = Instance(zmqstream.ZMQStream) # client-facing stream
188 client_stream = Instance(zmqstream.ZMQStream) # client-facing stream
189 engine_stream = Instance(zmqstream.ZMQStream) # engine-facing stream
189 engine_stream = Instance(zmqstream.ZMQStream) # engine-facing stream
190 notifier_stream = Instance(zmqstream.ZMQStream) # hub-facing sub stream
190 notifier_stream = Instance(zmqstream.ZMQStream) # hub-facing sub stream
191 mon_stream = Instance(zmqstream.ZMQStream) # hub-facing pub stream
191 mon_stream = Instance(zmqstream.ZMQStream) # hub-facing pub stream
192 query_stream = Instance(zmqstream.ZMQStream) # hub-facing DEALER stream
192
193
193 # internals:
194 # internals:
194 graph = Dict() # dict by msg_id of [ msg_ids that depend on key ]
195 graph = Dict() # dict by msg_id of [ msg_ids that depend on key ]
195 retries = Dict() # dict by msg_id of retries remaining (non-neg ints)
196 retries = Dict() # dict by msg_id of retries remaining (non-neg ints)
196 # waiting = List() # list of msg_ids ready to run, but haven't due to HWM
197 # waiting = List() # list of msg_ids ready to run, but haven't due to HWM
197 depending = Dict() # dict by msg_id of Jobs
198 depending = Dict() # dict by msg_id of Jobs
198 pending = Dict() # dict by engine_uuid of submitted tasks
199 pending = Dict() # dict by engine_uuid of submitted tasks
199 completed = Dict() # dict by engine_uuid of completed tasks
200 completed = Dict() # dict by engine_uuid of completed tasks
200 failed = Dict() # dict by engine_uuid of failed tasks
201 failed = Dict() # dict by engine_uuid of failed tasks
201 destinations = Dict() # dict by msg_id of engine_uuids where jobs ran (reverse of completed+failed)
202 destinations = Dict() # dict by msg_id of engine_uuids where jobs ran (reverse of completed+failed)
202 clients = Dict() # dict by msg_id for who submitted the task
203 clients = Dict() # dict by msg_id for who submitted the task
203 targets = List() # list of target IDENTs
204 targets = List() # list of target IDENTs
204 loads = List() # list of engine loads
205 loads = List() # list of engine loads
205 # full = Set() # set of IDENTs that have HWM outstanding tasks
206 # full = Set() # set of IDENTs that have HWM outstanding tasks
206 all_completed = Set() # set of all completed tasks
207 all_completed = Set() # set of all completed tasks
207 all_failed = Set() # set of all failed tasks
208 all_failed = Set() # set of all failed tasks
208 all_done = Set() # set of all finished tasks=union(completed,failed)
209 all_done = Set() # set of all finished tasks=union(completed,failed)
209 all_ids = Set() # set of all submitted task IDs
210 all_ids = Set() # set of all submitted task IDs
210
211
211 auditor = Instance('zmq.eventloop.ioloop.PeriodicCallback')
212 auditor = Instance('zmq.eventloop.ioloop.PeriodicCallback')
212
213
213 ident = CBytes() # ZMQ identity. This should just be self.session.session
214 ident = CBytes() # ZMQ identity. This should just be self.session.session
214 # but ensure Bytes
215 # but ensure Bytes
215 def _ident_default(self):
216 def _ident_default(self):
216 return self.session.bsession
217 return self.session.bsession
217
218
218 def start(self):
219 def start(self):
220 self.query_stream.on_recv(self.dispatch_query_reply)
221 self.session.send(self.query_stream, "connection_request", {})
222
219 self.engine_stream.on_recv(self.dispatch_result, copy=False)
223 self.engine_stream.on_recv(self.dispatch_result, copy=False)
220 self.client_stream.on_recv(self.dispatch_submission, copy=False)
224 self.client_stream.on_recv(self.dispatch_submission, copy=False)
221
225
222 self._notification_handlers = dict(
226 self._notification_handlers = dict(
223 registration_notification = self._register_engine,
227 registration_notification = self._register_engine,
224 unregistration_notification = self._unregister_engine
228 unregistration_notification = self._unregister_engine
225 )
229 )
226 self.notifier_stream.on_recv(self.dispatch_notification)
230 self.notifier_stream.on_recv(self.dispatch_notification)
227 self.auditor = ioloop.PeriodicCallback(self.audit_timeouts, 2e3, self.loop) # 1 Hz
231 self.auditor = ioloop.PeriodicCallback(self.audit_timeouts, 2e3, self.loop) # 1 Hz
228 self.auditor.start()
232 self.auditor.start()
229 self.log.info("Scheduler started [%s]"%self.scheme_name)
233 self.log.info("Scheduler started [%s]"%self.scheme_name)
230
234
231 def resume_receiving(self):
235 def resume_receiving(self):
232 """Resume accepting jobs."""
236 """Resume accepting jobs."""
233 self.client_stream.on_recv(self.dispatch_submission, copy=False)
237 self.client_stream.on_recv(self.dispatch_submission, copy=False)
234
238
235 def stop_receiving(self):
239 def stop_receiving(self):
236 """Stop accepting jobs while there are no engines.
240 """Stop accepting jobs while there are no engines.
237 Leave them in the ZMQ queue."""
241 Leave them in the ZMQ queue."""
238 self.client_stream.on_recv(None)
242 self.client_stream.on_recv(None)
239
243
240 #-----------------------------------------------------------------------
244 #-----------------------------------------------------------------------
241 # [Un]Registration Handling
245 # [Un]Registration Handling
242 #-----------------------------------------------------------------------
246 #-----------------------------------------------------------------------
243
247
244
248
249 def dispatch_query_reply(self, msg):
250 """handle reply to our initial connection request"""
251 try:
252 idents,msg = self.session.feed_identities(msg)
253 except ValueError:
254 self.log.warn("task::Invalid Message: %r",msg)
255 return
256 try:
257 msg = self.session.unserialize(msg)
258 except ValueError:
259 self.log.warn("task::Unauthorized message from: %r"%idents)
260 return
261
262 content = msg['content']
263 for uuid in content.get('engines', {}).values():
264 self._register_engine(cast_bytes(uuid))
265
266
245 @util.log_errors
267 @util.log_errors
246 def dispatch_notification(self, msg):
268 def dispatch_notification(self, msg):
247 """dispatch register/unregister events."""
269 """dispatch register/unregister events."""
248 try:
270 try:
249 idents,msg = self.session.feed_identities(msg)
271 idents,msg = self.session.feed_identities(msg)
250 except ValueError:
272 except ValueError:
251 self.log.warn("task::Invalid Message: %r",msg)
273 self.log.warn("task::Invalid Message: %r",msg)
252 return
274 return
253 try:
275 try:
254 msg = self.session.unserialize(msg)
276 msg = self.session.unserialize(msg)
255 except ValueError:
277 except ValueError:
256 self.log.warn("task::Unauthorized message from: %r"%idents)
278 self.log.warn("task::Unauthorized message from: %r"%idents)
257 return
279 return
258
280
259 msg_type = msg['header']['msg_type']
281 msg_type = msg['header']['msg_type']
260
282
261 handler = self._notification_handlers.get(msg_type, None)
283 handler = self._notification_handlers.get(msg_type, None)
262 if handler is None:
284 if handler is None:
263 self.log.error("Unhandled message type: %r"%msg_type)
285 self.log.error("Unhandled message type: %r"%msg_type)
264 else:
286 else:
265 try:
287 try:
266 handler(cast_bytes(msg['content']['queue']))
288 handler(cast_bytes(msg['content']['uuid']))
267 except Exception:
289 except Exception:
268 self.log.error("task::Invalid notification msg: %r", msg, exc_info=True)
290 self.log.error("task::Invalid notification msg: %r", msg, exc_info=True)
269
291
270 def _register_engine(self, uid):
292 def _register_engine(self, uid):
271 """New engine with ident `uid` became available."""
293 """New engine with ident `uid` became available."""
272 # head of the line:
294 # head of the line:
273 self.targets.insert(0,uid)
295 self.targets.insert(0,uid)
274 self.loads.insert(0,0)
296 self.loads.insert(0,0)
275
297
276 # initialize sets
298 # initialize sets
277 self.completed[uid] = set()
299 self.completed[uid] = set()
278 self.failed[uid] = set()
300 self.failed[uid] = set()
279 self.pending[uid] = {}
301 self.pending[uid] = {}
280
302
281 # rescan the graph:
303 # rescan the graph:
282 self.update_graph(None)
304 self.update_graph(None)
283
305
284 def _unregister_engine(self, uid):
306 def _unregister_engine(self, uid):
285 """Existing engine with ident `uid` became unavailable."""
307 """Existing engine with ident `uid` became unavailable."""
286 if len(self.targets) == 1:
308 if len(self.targets) == 1:
287 # this was our only engine
309 # this was our only engine
288 pass
310 pass
289
311
290 # handle any potentially finished tasks:
312 # handle any potentially finished tasks:
291 self.engine_stream.flush()
313 self.engine_stream.flush()
292
314
293 # don't pop destinations, because they might be used later
315 # don't pop destinations, because they might be used later
294 # map(self.destinations.pop, self.completed.pop(uid))
316 # map(self.destinations.pop, self.completed.pop(uid))
295 # map(self.destinations.pop, self.failed.pop(uid))
317 # map(self.destinations.pop, self.failed.pop(uid))
296
318
297 # prevent this engine from receiving work
319 # prevent this engine from receiving work
298 idx = self.targets.index(uid)
320 idx = self.targets.index(uid)
299 self.targets.pop(idx)
321 self.targets.pop(idx)
300 self.loads.pop(idx)
322 self.loads.pop(idx)
301
323
302 # wait 5 seconds before cleaning up pending jobs, since the results might
324 # wait 5 seconds before cleaning up pending jobs, since the results might
303 # still be incoming
325 # still be incoming
304 if self.pending[uid]:
326 if self.pending[uid]:
305 dc = ioloop.DelayedCallback(lambda : self.handle_stranded_tasks(uid), 5000, self.loop)
327 dc = ioloop.DelayedCallback(lambda : self.handle_stranded_tasks(uid), 5000, self.loop)
306 dc.start()
328 dc.start()
307 else:
329 else:
308 self.completed.pop(uid)
330 self.completed.pop(uid)
309 self.failed.pop(uid)
331 self.failed.pop(uid)
310
332
311
333
312 def handle_stranded_tasks(self, engine):
334 def handle_stranded_tasks(self, engine):
313 """Deal with jobs resident in an engine that died."""
335 """Deal with jobs resident in an engine that died."""
314 lost = self.pending[engine]
336 lost = self.pending[engine]
315 for msg_id in lost.keys():
337 for msg_id in lost.keys():
316 if msg_id not in self.pending[engine]:
338 if msg_id not in self.pending[engine]:
317 # prevent double-handling of messages
339 # prevent double-handling of messages
318 continue
340 continue
319
341
320 raw_msg = lost[msg_id].raw_msg
342 raw_msg = lost[msg_id].raw_msg
321 idents,msg = self.session.feed_identities(raw_msg, copy=False)
343 idents,msg = self.session.feed_identities(raw_msg, copy=False)
322 parent = self.session.unpack(msg[1].bytes)
344 parent = self.session.unpack(msg[1].bytes)
323 idents = [engine, idents[0]]
345 idents = [engine, idents[0]]
324
346
325 # build fake error reply
347 # build fake error reply
326 try:
348 try:
327 raise error.EngineError("Engine %r died while running task %r"%(engine, msg_id))
349 raise error.EngineError("Engine %r died while running task %r"%(engine, msg_id))
328 except:
350 except:
329 content = error.wrap_exception()
351 content = error.wrap_exception()
330 # build fake header
352 # build fake header
331 header = dict(
353 header = dict(
332 status='error',
354 status='error',
333 engine=engine,
355 engine=engine,
334 date=datetime.now(),
356 date=datetime.now(),
335 )
357 )
336 msg = self.session.msg('apply_reply', content, parent=parent, subheader=header)
358 msg = self.session.msg('apply_reply', content, parent=parent, subheader=header)
337 raw_reply = map(zmq.Message, self.session.serialize(msg, ident=idents))
359 raw_reply = map(zmq.Message, self.session.serialize(msg, ident=idents))
338 # and dispatch it
360 # and dispatch it
339 self.dispatch_result(raw_reply)
361 self.dispatch_result(raw_reply)
340
362
341 # finally scrub completed/failed lists
363 # finally scrub completed/failed lists
342 self.completed.pop(engine)
364 self.completed.pop(engine)
343 self.failed.pop(engine)
365 self.failed.pop(engine)
344
366
345
367
346 #-----------------------------------------------------------------------
368 #-----------------------------------------------------------------------
347 # Job Submission
369 # Job Submission
348 #-----------------------------------------------------------------------
370 #-----------------------------------------------------------------------
349
371
350
372
351 @util.log_errors
373 @util.log_errors
352 def dispatch_submission(self, raw_msg):
374 def dispatch_submission(self, raw_msg):
353 """Dispatch job submission to appropriate handlers."""
375 """Dispatch job submission to appropriate handlers."""
354 # ensure targets up to date:
376 # ensure targets up to date:
355 self.notifier_stream.flush()
377 self.notifier_stream.flush()
356 try:
378 try:
357 idents, msg = self.session.feed_identities(raw_msg, copy=False)
379 idents, msg = self.session.feed_identities(raw_msg, copy=False)
358 msg = self.session.unserialize(msg, content=False, copy=False)
380 msg = self.session.unserialize(msg, content=False, copy=False)
359 except Exception:
381 except Exception:
360 self.log.error("task::Invaid task msg: %r"%raw_msg, exc_info=True)
382 self.log.error("task::Invaid task msg: %r"%raw_msg, exc_info=True)
361 return
383 return
362
384
363
385
364 # send to monitor
386 # send to monitor
365 self.mon_stream.send_multipart([b'intask']+raw_msg, copy=False)
387 self.mon_stream.send_multipart([b'intask']+raw_msg, copy=False)
366
388
367 header = msg['header']
389 header = msg['header']
368 msg_id = header['msg_id']
390 msg_id = header['msg_id']
369 self.all_ids.add(msg_id)
391 self.all_ids.add(msg_id)
370
392
371 # get targets as a set of bytes objects
393 # get targets as a set of bytes objects
372 # from a list of unicode objects
394 # from a list of unicode objects
373 targets = header.get('targets', [])
395 targets = header.get('targets', [])
374 targets = map(cast_bytes, targets)
396 targets = map(cast_bytes, targets)
375 targets = set(targets)
397 targets = set(targets)
376
398
377 retries = header.get('retries', 0)
399 retries = header.get('retries', 0)
378 self.retries[msg_id] = retries
400 self.retries[msg_id] = retries
379
401
380 # time dependencies
402 # time dependencies
381 after = header.get('after', None)
403 after = header.get('after', None)
382 if after:
404 if after:
383 after = Dependency(after)
405 after = Dependency(after)
384 if after.all:
406 if after.all:
385 if after.success:
407 if after.success:
386 after = Dependency(after.difference(self.all_completed),
408 after = Dependency(after.difference(self.all_completed),
387 success=after.success,
409 success=after.success,
388 failure=after.failure,
410 failure=after.failure,
389 all=after.all,
411 all=after.all,
390 )
412 )
391 if after.failure:
413 if after.failure:
392 after = Dependency(after.difference(self.all_failed),
414 after = Dependency(after.difference(self.all_failed),
393 success=after.success,
415 success=after.success,
394 failure=after.failure,
416 failure=after.failure,
395 all=after.all,
417 all=after.all,
396 )
418 )
397 if after.check(self.all_completed, self.all_failed):
419 if after.check(self.all_completed, self.all_failed):
398 # recast as empty set, if `after` already met,
420 # recast as empty set, if `after` already met,
399 # to prevent unnecessary set comparisons
421 # to prevent unnecessary set comparisons
400 after = MET
422 after = MET
401 else:
423 else:
402 after = MET
424 after = MET
403
425
404 # location dependencies
426 # location dependencies
405 follow = Dependency(header.get('follow', []))
427 follow = Dependency(header.get('follow', []))
406
428
407 # turn timeouts into datetime objects:
429 # turn timeouts into datetime objects:
408 timeout = header.get('timeout', None)
430 timeout = header.get('timeout', None)
409 if timeout:
431 if timeout:
410 # cast to float, because jsonlib returns floats as decimal.Decimal,
432 # cast to float, because jsonlib returns floats as decimal.Decimal,
411 # which timedelta does not accept
433 # which timedelta does not accept
412 timeout = datetime.now() + timedelta(0,float(timeout),0)
434 timeout = datetime.now() + timedelta(0,float(timeout),0)
413
435
414 job = Job(msg_id=msg_id, raw_msg=raw_msg, idents=idents, msg=msg,
436 job = Job(msg_id=msg_id, raw_msg=raw_msg, idents=idents, msg=msg,
415 header=header, targets=targets, after=after, follow=follow,
437 header=header, targets=targets, after=after, follow=follow,
416 timeout=timeout,
438 timeout=timeout,
417 )
439 )
418
440
419 # validate and reduce dependencies:
441 # validate and reduce dependencies:
420 for dep in after,follow:
442 for dep in after,follow:
421 if not dep: # empty dependency
443 if not dep: # empty dependency
422 continue
444 continue
423 # check valid:
445 # check valid:
424 if msg_id in dep or dep.difference(self.all_ids):
446 if msg_id in dep or dep.difference(self.all_ids):
425 self.depending[msg_id] = job
447 self.depending[msg_id] = job
426 return self.fail_unreachable(msg_id, error.InvalidDependency)
448 return self.fail_unreachable(msg_id, error.InvalidDependency)
427 # check if unreachable:
449 # check if unreachable:
428 if dep.unreachable(self.all_completed, self.all_failed):
450 if dep.unreachable(self.all_completed, self.all_failed):
429 self.depending[msg_id] = job
451 self.depending[msg_id] = job
430 return self.fail_unreachable(msg_id)
452 return self.fail_unreachable(msg_id)
431
453
432 if after.check(self.all_completed, self.all_failed):
454 if after.check(self.all_completed, self.all_failed):
433 # time deps already met, try to run
455 # time deps already met, try to run
434 if not self.maybe_run(job):
456 if not self.maybe_run(job):
435 # can't run yet
457 # can't run yet
436 if msg_id not in self.all_failed:
458 if msg_id not in self.all_failed:
437 # could have failed as unreachable
459 # could have failed as unreachable
438 self.save_unmet(job)
460 self.save_unmet(job)
439 else:
461 else:
440 self.save_unmet(job)
462 self.save_unmet(job)
441
463
442 def audit_timeouts(self):
464 def audit_timeouts(self):
443 """Audit all waiting tasks for expired timeouts."""
465 """Audit all waiting tasks for expired timeouts."""
444 now = datetime.now()
466 now = datetime.now()
445 for msg_id in self.depending.keys():
467 for msg_id in self.depending.keys():
446 # must recheck, in case one failure cascaded to another:
468 # must recheck, in case one failure cascaded to another:
447 if msg_id in self.depending:
469 if msg_id in self.depending:
448 job = self.depending[msg_id]
470 job = self.depending[msg_id]
449 if job.timeout and job.timeout < now:
471 if job.timeout and job.timeout < now:
450 self.fail_unreachable(msg_id, error.TaskTimeout)
472 self.fail_unreachable(msg_id, error.TaskTimeout)
451
473
452 def fail_unreachable(self, msg_id, why=error.ImpossibleDependency):
474 def fail_unreachable(self, msg_id, why=error.ImpossibleDependency):
453 """a task has become unreachable, send a reply with an ImpossibleDependency
475 """a task has become unreachable, send a reply with an ImpossibleDependency
454 error."""
476 error."""
455 if msg_id not in self.depending:
477 if msg_id not in self.depending:
456 self.log.error("msg %r already failed!", msg_id)
478 self.log.error("msg %r already failed!", msg_id)
457 return
479 return
458 job = self.depending.pop(msg_id)
480 job = self.depending.pop(msg_id)
459 for mid in job.dependents:
481 for mid in job.dependents:
460 if mid in self.graph:
482 if mid in self.graph:
461 self.graph[mid].remove(msg_id)
483 self.graph[mid].remove(msg_id)
462
484
463 try:
485 try:
464 raise why()
486 raise why()
465 except:
487 except:
466 content = error.wrap_exception()
488 content = error.wrap_exception()
467
489
468 self.all_done.add(msg_id)
490 self.all_done.add(msg_id)
469 self.all_failed.add(msg_id)
491 self.all_failed.add(msg_id)
470
492
471 msg = self.session.send(self.client_stream, 'apply_reply', content,
493 msg = self.session.send(self.client_stream, 'apply_reply', content,
472 parent=job.header, ident=job.idents)
494 parent=job.header, ident=job.idents)
473 self.session.send(self.mon_stream, msg, ident=[b'outtask']+job.idents)
495 self.session.send(self.mon_stream, msg, ident=[b'outtask']+job.idents)
474
496
475 self.update_graph(msg_id, success=False)
497 self.update_graph(msg_id, success=False)
476
498
477 def maybe_run(self, job):
499 def maybe_run(self, job):
478 """check location dependencies, and run if they are met."""
500 """check location dependencies, and run if they are met."""
479 msg_id = job.msg_id
501 msg_id = job.msg_id
480 self.log.debug("Attempting to assign task %s", msg_id)
502 self.log.debug("Attempting to assign task %s", msg_id)
481 if not self.targets:
503 if not self.targets:
482 # no engines, definitely can't run
504 # no engines, definitely can't run
483 return False
505 return False
484
506
485 if job.follow or job.targets or job.blacklist or self.hwm:
507 if job.follow or job.targets or job.blacklist or self.hwm:
486 # we need a can_run filter
508 # we need a can_run filter
487 def can_run(idx):
509 def can_run(idx):
488 # check hwm
510 # check hwm
489 if self.hwm and self.loads[idx] == self.hwm:
511 if self.hwm and self.loads[idx] == self.hwm:
490 return False
512 return False
491 target = self.targets[idx]
513 target = self.targets[idx]
492 # check blacklist
514 # check blacklist
493 if target in job.blacklist:
515 if target in job.blacklist:
494 return False
516 return False
495 # check targets
517 # check targets
496 if job.targets and target not in job.targets:
518 if job.targets and target not in job.targets:
497 return False
519 return False
498 # check follow
520 # check follow
499 return job.follow.check(self.completed[target], self.failed[target])
521 return job.follow.check(self.completed[target], self.failed[target])
500
522
501 indices = filter(can_run, range(len(self.targets)))
523 indices = filter(can_run, range(len(self.targets)))
502
524
503 if not indices:
525 if not indices:
504 # couldn't run
526 # couldn't run
505 if job.follow.all:
527 if job.follow.all:
506 # check follow for impossibility
528 # check follow for impossibility
507 dests = set()
529 dests = set()
508 relevant = set()
530 relevant = set()
509 if job.follow.success:
531 if job.follow.success:
510 relevant = self.all_completed
532 relevant = self.all_completed
511 if job.follow.failure:
533 if job.follow.failure:
512 relevant = relevant.union(self.all_failed)
534 relevant = relevant.union(self.all_failed)
513 for m in job.follow.intersection(relevant):
535 for m in job.follow.intersection(relevant):
514 dests.add(self.destinations[m])
536 dests.add(self.destinations[m])
515 if len(dests) > 1:
537 if len(dests) > 1:
516 self.depending[msg_id] = job
538 self.depending[msg_id] = job
517 self.fail_unreachable(msg_id)
539 self.fail_unreachable(msg_id)
518 return False
540 return False
519 if job.targets:
541 if job.targets:
520 # check blacklist+targets for impossibility
542 # check blacklist+targets for impossibility
521 job.targets.difference_update(job.blacklist)
543 job.targets.difference_update(job.blacklist)
522 if not job.targets or not job.targets.intersection(self.targets):
544 if not job.targets or not job.targets.intersection(self.targets):
523 self.depending[msg_id] = job
545 self.depending[msg_id] = job
524 self.fail_unreachable(msg_id)
546 self.fail_unreachable(msg_id)
525 return False
547 return False
526 return False
548 return False
527 else:
549 else:
528 indices = None
550 indices = None
529
551
530 self.submit_task(job, indices)
552 self.submit_task(job, indices)
531 return True
553 return True
532
554
533 def save_unmet(self, job):
555 def save_unmet(self, job):
534 """Save a message for later submission when its dependencies are met."""
556 """Save a message for later submission when its dependencies are met."""
535 msg_id = job.msg_id
557 msg_id = job.msg_id
536 self.depending[msg_id] = job
558 self.depending[msg_id] = job
537 # track the ids in follow or after, but not those already finished
559 # track the ids in follow or after, but not those already finished
538 for dep_id in job.after.union(job.follow).difference(self.all_done):
560 for dep_id in job.after.union(job.follow).difference(self.all_done):
539 if dep_id not in self.graph:
561 if dep_id not in self.graph:
540 self.graph[dep_id] = set()
562 self.graph[dep_id] = set()
541 self.graph[dep_id].add(msg_id)
563 self.graph[dep_id].add(msg_id)
542
564
543 def submit_task(self, job, indices=None):
565 def submit_task(self, job, indices=None):
544 """Submit a task to any of a subset of our targets."""
566 """Submit a task to any of a subset of our targets."""
545 if indices:
567 if indices:
546 loads = [self.loads[i] for i in indices]
568 loads = [self.loads[i] for i in indices]
547 else:
569 else:
548 loads = self.loads
570 loads = self.loads
549 idx = self.scheme(loads)
571 idx = self.scheme(loads)
550 if indices:
572 if indices:
551 idx = indices[idx]
573 idx = indices[idx]
552 target = self.targets[idx]
574 target = self.targets[idx]
553 # print (target, map(str, msg[:3]))
575 # print (target, map(str, msg[:3]))
554 # send job to the engine
576 # send job to the engine
555 self.engine_stream.send(target, flags=zmq.SNDMORE, copy=False)
577 self.engine_stream.send(target, flags=zmq.SNDMORE, copy=False)
556 self.engine_stream.send_multipart(job.raw_msg, copy=False)
578 self.engine_stream.send_multipart(job.raw_msg, copy=False)
557 # update load
579 # update load
558 self.add_job(idx)
580 self.add_job(idx)
559 self.pending[target][job.msg_id] = job
581 self.pending[target][job.msg_id] = job
560 # notify Hub
582 # notify Hub
561 content = dict(msg_id=job.msg_id, engine_id=target.decode('ascii'))
583 content = dict(msg_id=job.msg_id, engine_id=target.decode('ascii'))
562 self.session.send(self.mon_stream, 'task_destination', content=content,
584 self.session.send(self.mon_stream, 'task_destination', content=content,
563 ident=[b'tracktask',self.ident])
585 ident=[b'tracktask',self.ident])
564
586
565
587
566 #-----------------------------------------------------------------------
588 #-----------------------------------------------------------------------
567 # Result Handling
589 # Result Handling
568 #-----------------------------------------------------------------------
590 #-----------------------------------------------------------------------
569
591
570
592
571 @util.log_errors
593 @util.log_errors
572 def dispatch_result(self, raw_msg):
594 def dispatch_result(self, raw_msg):
573 """dispatch method for result replies"""
595 """dispatch method for result replies"""
574 try:
596 try:
575 idents,msg = self.session.feed_identities(raw_msg, copy=False)
597 idents,msg = self.session.feed_identities(raw_msg, copy=False)
576 msg = self.session.unserialize(msg, content=False, copy=False)
598 msg = self.session.unserialize(msg, content=False, copy=False)
577 engine = idents[0]
599 engine = idents[0]
578 try:
600 try:
579 idx = self.targets.index(engine)
601 idx = self.targets.index(engine)
580 except ValueError:
602 except ValueError:
581 pass # skip load-update for dead engines
603 pass # skip load-update for dead engines
582 else:
604 else:
583 self.finish_job(idx)
605 self.finish_job(idx)
584 except Exception:
606 except Exception:
585 self.log.error("task::Invaid result: %r", raw_msg, exc_info=True)
607 self.log.error("task::Invaid result: %r", raw_msg, exc_info=True)
586 return
608 return
587
609
588 header = msg['header']
610 header = msg['header']
589 parent = msg['parent_header']
611 parent = msg['parent_header']
590 if header.get('dependencies_met', True):
612 if header.get('dependencies_met', True):
591 success = (header['status'] == 'ok')
613 success = (header['status'] == 'ok')
592 msg_id = parent['msg_id']
614 msg_id = parent['msg_id']
593 retries = self.retries[msg_id]
615 retries = self.retries[msg_id]
594 if not success and retries > 0:
616 if not success and retries > 0:
595 # failed
617 # failed
596 self.retries[msg_id] = retries - 1
618 self.retries[msg_id] = retries - 1
597 self.handle_unmet_dependency(idents, parent)
619 self.handle_unmet_dependency(idents, parent)
598 else:
620 else:
599 del self.retries[msg_id]
621 del self.retries[msg_id]
600 # relay to client and update graph
622 # relay to client and update graph
601 self.handle_result(idents, parent, raw_msg, success)
623 self.handle_result(idents, parent, raw_msg, success)
602 # send to Hub monitor
624 # send to Hub monitor
603 self.mon_stream.send_multipart([b'outtask']+raw_msg, copy=False)
625 self.mon_stream.send_multipart([b'outtask']+raw_msg, copy=False)
604 else:
626 else:
605 self.handle_unmet_dependency(idents, parent)
627 self.handle_unmet_dependency(idents, parent)
606
628
607 def handle_result(self, idents, parent, raw_msg, success=True):
629 def handle_result(self, idents, parent, raw_msg, success=True):
608 """handle a real task result, either success or failure"""
630 """handle a real task result, either success or failure"""
609 # first, relay result to client
631 # first, relay result to client
610 engine = idents[0]
632 engine = idents[0]
611 client = idents[1]
633 client = idents[1]
612 # swap_ids for ROUTER-ROUTER mirror
634 # swap_ids for ROUTER-ROUTER mirror
613 raw_msg[:2] = [client,engine]
635 raw_msg[:2] = [client,engine]
614 # print (map(str, raw_msg[:4]))
636 # print (map(str, raw_msg[:4]))
615 self.client_stream.send_multipart(raw_msg, copy=False)
637 self.client_stream.send_multipart(raw_msg, copy=False)
616 # now, update our data structures
638 # now, update our data structures
617 msg_id = parent['msg_id']
639 msg_id = parent['msg_id']
618 self.pending[engine].pop(msg_id)
640 self.pending[engine].pop(msg_id)
619 if success:
641 if success:
620 self.completed[engine].add(msg_id)
642 self.completed[engine].add(msg_id)
621 self.all_completed.add(msg_id)
643 self.all_completed.add(msg_id)
622 else:
644 else:
623 self.failed[engine].add(msg_id)
645 self.failed[engine].add(msg_id)
624 self.all_failed.add(msg_id)
646 self.all_failed.add(msg_id)
625 self.all_done.add(msg_id)
647 self.all_done.add(msg_id)
626 self.destinations[msg_id] = engine
648 self.destinations[msg_id] = engine
627
649
628 self.update_graph(msg_id, success)
650 self.update_graph(msg_id, success)
629
651
630 def handle_unmet_dependency(self, idents, parent):
652 def handle_unmet_dependency(self, idents, parent):
631 """handle an unmet dependency"""
653 """handle an unmet dependency"""
632 engine = idents[0]
654 engine = idents[0]
633 msg_id = parent['msg_id']
655 msg_id = parent['msg_id']
634
656
635 job = self.pending[engine].pop(msg_id)
657 job = self.pending[engine].pop(msg_id)
636 job.blacklist.add(engine)
658 job.blacklist.add(engine)
637
659
638 if job.blacklist == job.targets:
660 if job.blacklist == job.targets:
639 self.depending[msg_id] = job
661 self.depending[msg_id] = job
640 self.fail_unreachable(msg_id)
662 self.fail_unreachable(msg_id)
641 elif not self.maybe_run(job):
663 elif not self.maybe_run(job):
642 # resubmit failed
664 # resubmit failed
643 if msg_id not in self.all_failed:
665 if msg_id not in self.all_failed:
644 # put it back in our dependency tree
666 # put it back in our dependency tree
645 self.save_unmet(job)
667 self.save_unmet(job)
646
668
647 if self.hwm:
669 if self.hwm:
648 try:
670 try:
649 idx = self.targets.index(engine)
671 idx = self.targets.index(engine)
650 except ValueError:
672 except ValueError:
651 pass # skip load-update for dead engines
673 pass # skip load-update for dead engines
652 else:
674 else:
653 if self.loads[idx] == self.hwm-1:
675 if self.loads[idx] == self.hwm-1:
654 self.update_graph(None)
676 self.update_graph(None)
655
677
656
678
657
679
658 def update_graph(self, dep_id=None, success=True):
680 def update_graph(self, dep_id=None, success=True):
659 """dep_id just finished. Update our dependency
681 """dep_id just finished. Update our dependency
660 graph and submit any jobs that just became runable.
682 graph and submit any jobs that just became runable.
661
683
662 Called with dep_id=None to update entire graph for hwm, but without finishing
684 Called with dep_id=None to update entire graph for hwm, but without finishing
663 a task.
685 a task.
664 """
686 """
665 # print ("\n\n***********")
687 # print ("\n\n***********")
666 # pprint (dep_id)
688 # pprint (dep_id)
667 # pprint (self.graph)
689 # pprint (self.graph)
668 # pprint (self.depending)
690 # pprint (self.depending)
669 # pprint (self.all_completed)
691 # pprint (self.all_completed)
670 # pprint (self.all_failed)
692 # pprint (self.all_failed)
671 # print ("\n\n***********\n\n")
693 # print ("\n\n***********\n\n")
672 # update any jobs that depended on the dependency
694 # update any jobs that depended on the dependency
673 jobs = self.graph.pop(dep_id, [])
695 jobs = self.graph.pop(dep_id, [])
674
696
675 # recheck *all* jobs if
697 # recheck *all* jobs if
676 # a) we have HWM and an engine just become no longer full
698 # a) we have HWM and an engine just become no longer full
677 # or b) dep_id was given as None
699 # or b) dep_id was given as None
678
700
679 if dep_id is None or self.hwm and any( [ load==self.hwm-1 for load in self.loads ]):
701 if dep_id is None or self.hwm and any( [ load==self.hwm-1 for load in self.loads ]):
680 jobs = self.depending.keys()
702 jobs = self.depending.keys()
681
703
682 for msg_id in sorted(jobs, key=lambda msg_id: self.depending[msg_id].timestamp):
704 for msg_id in sorted(jobs, key=lambda msg_id: self.depending[msg_id].timestamp):
683 job = self.depending[msg_id]
705 job = self.depending[msg_id]
684
706
685 if job.after.unreachable(self.all_completed, self.all_failed)\
707 if job.after.unreachable(self.all_completed, self.all_failed)\
686 or job.follow.unreachable(self.all_completed, self.all_failed):
708 or job.follow.unreachable(self.all_completed, self.all_failed):
687 self.fail_unreachable(msg_id)
709 self.fail_unreachable(msg_id)
688
710
689 elif job.after.check(self.all_completed, self.all_failed): # time deps met, maybe run
711 elif job.after.check(self.all_completed, self.all_failed): # time deps met, maybe run
690 if self.maybe_run(job):
712 if self.maybe_run(job):
691
713
692 self.depending.pop(msg_id)
714 self.depending.pop(msg_id)
693 for mid in job.dependents:
715 for mid in job.dependents:
694 if mid in self.graph:
716 if mid in self.graph:
695 self.graph[mid].remove(msg_id)
717 self.graph[mid].remove(msg_id)
696
718
697 #----------------------------------------------------------------------
719 #----------------------------------------------------------------------
698 # methods to be overridden by subclasses
720 # methods to be overridden by subclasses
699 #----------------------------------------------------------------------
721 #----------------------------------------------------------------------
700
722
701 def add_job(self, idx):
723 def add_job(self, idx):
702 """Called after self.targets[idx] just got the job with header.
724 """Called after self.targets[idx] just got the job with header.
703 Override with subclasses. The default ordering is simple LRU.
725 Override with subclasses. The default ordering is simple LRU.
704 The default loads are the number of outstanding jobs."""
726 The default loads are the number of outstanding jobs."""
705 self.loads[idx] += 1
727 self.loads[idx] += 1
706 for lis in (self.targets, self.loads):
728 for lis in (self.targets, self.loads):
707 lis.append(lis.pop(idx))
729 lis.append(lis.pop(idx))
708
730
709
731
710 def finish_job(self, idx):
732 def finish_job(self, idx):
711 """Called after self.targets[idx] just finished a job.
733 """Called after self.targets[idx] just finished a job.
712 Override with subclasses."""
734 Override with subclasses."""
713 self.loads[idx] -= 1
735 self.loads[idx] -= 1
714
736
715
737
716
738
717 def launch_scheduler(in_addr, out_addr, mon_addr, not_addr, config=None,
739 def launch_scheduler(in_addr, out_addr, mon_addr, not_addr, reg_addr, config=None,
718 logname='root', log_url=None, loglevel=logging.DEBUG,
740 logname='root', log_url=None, loglevel=logging.DEBUG,
719 identity=b'task', in_thread=False):
741 identity=b'task', in_thread=False):
720
742
721 ZMQStream = zmqstream.ZMQStream
743 ZMQStream = zmqstream.ZMQStream
722
744
723 if config:
745 if config:
724 # unwrap dict back into Config
746 # unwrap dict back into Config
725 config = Config(config)
747 config = Config(config)
726
748
727 if in_thread:
749 if in_thread:
728 # use instance() to get the same Context/Loop as our parent
750 # use instance() to get the same Context/Loop as our parent
729 ctx = zmq.Context.instance()
751 ctx = zmq.Context.instance()
730 loop = ioloop.IOLoop.instance()
752 loop = ioloop.IOLoop.instance()
731 else:
753 else:
732 # in a process, don't use instance()
754 # in a process, don't use instance()
733 # for safety with multiprocessing
755 # for safety with multiprocessing
734 ctx = zmq.Context()
756 ctx = zmq.Context()
735 loop = ioloop.IOLoop()
757 loop = ioloop.IOLoop()
736 ins = ZMQStream(ctx.socket(zmq.ROUTER),loop)
758 ins = ZMQStream(ctx.socket(zmq.ROUTER),loop)
737 ins.setsockopt(zmq.IDENTITY, identity)
759 ins.setsockopt(zmq.IDENTITY, identity + b'_in')
738 ins.bind(in_addr)
760 ins.bind(in_addr)
739
761
740 outs = ZMQStream(ctx.socket(zmq.ROUTER),loop)
762 outs = ZMQStream(ctx.socket(zmq.ROUTER),loop)
741 outs.setsockopt(zmq.IDENTITY, identity)
763 outs.setsockopt(zmq.IDENTITY, identity + b'_out')
742 outs.bind(out_addr)
764 outs.bind(out_addr)
743 mons = zmqstream.ZMQStream(ctx.socket(zmq.PUB),loop)
765 mons = zmqstream.ZMQStream(ctx.socket(zmq.PUB),loop)
744 mons.connect(mon_addr)
766 mons.connect(mon_addr)
745 nots = zmqstream.ZMQStream(ctx.socket(zmq.SUB),loop)
767 nots = zmqstream.ZMQStream(ctx.socket(zmq.SUB),loop)
746 nots.setsockopt(zmq.SUBSCRIBE, b'')
768 nots.setsockopt(zmq.SUBSCRIBE, b'')
747 nots.connect(not_addr)
769 nots.connect(not_addr)
748
770
771 querys = ZMQStream(ctx.socket(zmq.DEALER),loop)
772 querys.connect(reg_addr)
773
749 # setup logging.
774 # setup logging.
750 if in_thread:
775 if in_thread:
751 log = Application.instance().log
776 log = Application.instance().log
752 else:
777 else:
753 if log_url:
778 if log_url:
754 log = connect_logger(logname, ctx, log_url, root="scheduler", loglevel=loglevel)
779 log = connect_logger(logname, ctx, log_url, root="scheduler", loglevel=loglevel)
755 else:
780 else:
756 log = local_logger(logname, loglevel)
781 log = local_logger(logname, loglevel)
757
782
758 scheduler = TaskScheduler(client_stream=ins, engine_stream=outs,
783 scheduler = TaskScheduler(client_stream=ins, engine_stream=outs,
759 mon_stream=mons, notifier_stream=nots,
784 mon_stream=mons, notifier_stream=nots,
785 query_stream=querys,
760 loop=loop, log=log,
786 loop=loop, log=log,
761 config=config)
787 config=config)
762 scheduler.start()
788 scheduler.start()
763 if not in_thread:
789 if not in_thread:
764 try:
790 try:
765 loop.start()
791 loop.start()
766 except KeyboardInterrupt:
792 except KeyboardInterrupt:
767 scheduler.log.critical("Interrupted, exiting...")
793 scheduler.log.critical("Interrupted, exiting...")
768
794
@@ -1,237 +1,231
1 """A simple engine that talks to a controller over 0MQ.
1 """A simple engine that talks to a controller over 0MQ.
2 it handles registration, etc. and launches a kernel
2 it handles registration, etc. and launches a kernel
3 connected to the Controller's Schedulers.
3 connected to the Controller's Schedulers.
4
4
5 Authors:
5 Authors:
6
6
7 * Min RK
7 * Min RK
8 """
8 """
9 #-----------------------------------------------------------------------------
9 #-----------------------------------------------------------------------------
10 # Copyright (C) 2010-2011 The IPython Development Team
10 # Copyright (C) 2010-2011 The IPython Development Team
11 #
11 #
12 # Distributed under the terms of the BSD License. The full license is in
12 # Distributed under the terms of the BSD License. The full license is in
13 # the file COPYING, distributed as part of this software.
13 # the file COPYING, distributed as part of this software.
14 #-----------------------------------------------------------------------------
14 #-----------------------------------------------------------------------------
15
15
16 from __future__ import print_function
16 from __future__ import print_function
17
17
18 import sys
18 import sys
19 import time
19 import time
20 from getpass import getpass
20 from getpass import getpass
21
21
22 import zmq
22 import zmq
23 from zmq.eventloop import ioloop, zmqstream
23 from zmq.eventloop import ioloop, zmqstream
24
24
25 from IPython.external.ssh import tunnel
25 from IPython.external.ssh import tunnel
26 # internal
26 # internal
27 from IPython.utils.traitlets import (
27 from IPython.utils.traitlets import (
28 Instance, Dict, Integer, Type, CFloat, Unicode, CBytes, Bool
28 Instance, Dict, Integer, Type, CFloat, Unicode, CBytes, Bool
29 )
29 )
30 from IPython.utils.py3compat import cast_bytes
30 from IPython.utils.py3compat import cast_bytes
31
31
32 from IPython.parallel.controller.heartmonitor import Heart
32 from IPython.parallel.controller.heartmonitor import Heart
33 from IPython.parallel.factory import RegistrationFactory
33 from IPython.parallel.factory import RegistrationFactory
34 from IPython.parallel.util import disambiguate_url
34 from IPython.parallel.util import disambiguate_url
35
35
36 from IPython.zmq.session import Message
36 from IPython.zmq.session import Message
37 from IPython.zmq.ipkernel import Kernel
37 from IPython.zmq.ipkernel import Kernel
38
38
39 class EngineFactory(RegistrationFactory):
39 class EngineFactory(RegistrationFactory):
40 """IPython engine"""
40 """IPython engine"""
41
41
42 # configurables:
42 # configurables:
43 out_stream_factory=Type('IPython.zmq.iostream.OutStream', config=True,
43 out_stream_factory=Type('IPython.zmq.iostream.OutStream', config=True,
44 help="""The OutStream for handling stdout/err.
44 help="""The OutStream for handling stdout/err.
45 Typically 'IPython.zmq.iostream.OutStream'""")
45 Typically 'IPython.zmq.iostream.OutStream'""")
46 display_hook_factory=Type('IPython.zmq.displayhook.ZMQDisplayHook', config=True,
46 display_hook_factory=Type('IPython.zmq.displayhook.ZMQDisplayHook', config=True,
47 help="""The class for handling displayhook.
47 help="""The class for handling displayhook.
48 Typically 'IPython.zmq.displayhook.ZMQDisplayHook'""")
48 Typically 'IPython.zmq.displayhook.ZMQDisplayHook'""")
49 location=Unicode(config=True,
49 location=Unicode(config=True,
50 help="""The location (an IP address) of the controller. This is
50 help="""The location (an IP address) of the controller. This is
51 used for disambiguating URLs, to determine whether
51 used for disambiguating URLs, to determine whether
52 loopback should be used to connect or the public address.""")
52 loopback should be used to connect or the public address.""")
53 timeout=CFloat(2,config=True,
53 timeout=CFloat(5, config=True,
54 help="""The time (in seconds) to wait for the Controller to respond
54 help="""The time (in seconds) to wait for the Controller to respond
55 to registration requests before giving up.""")
55 to registration requests before giving up.""")
56 sshserver=Unicode(config=True,
56 sshserver=Unicode(config=True,
57 help="""The SSH server to use for tunneling connections to the Controller.""")
57 help="""The SSH server to use for tunneling connections to the Controller.""")
58 sshkey=Unicode(config=True,
58 sshkey=Unicode(config=True,
59 help="""The SSH private key file to use when tunneling connections to the Controller.""")
59 help="""The SSH private key file to use when tunneling connections to the Controller.""")
60 paramiko=Bool(sys.platform == 'win32', config=True,
60 paramiko=Bool(sys.platform == 'win32', config=True,
61 help="""Whether to use paramiko instead of openssh for tunnels.""")
61 help="""Whether to use paramiko instead of openssh for tunnels.""")
62
62
63 # not configurable:
63 # not configurable:
64 connection_info = Dict()
64 user_ns=Dict()
65 user_ns = Dict()
65 id=Integer(allow_none=True)
66 id = Integer(allow_none=True)
66 registrar=Instance('zmq.eventloop.zmqstream.ZMQStream')
67 registrar = Instance('zmq.eventloop.zmqstream.ZMQStream')
67 kernel=Instance(Kernel)
68 kernel = Instance(Kernel)
68
69
69 bident = CBytes()
70 bident = CBytes()
70 ident = Unicode()
71 ident = Unicode()
71 def _ident_changed(self, name, old, new):
72 def _ident_changed(self, name, old, new):
72 self.bident = cast_bytes(new)
73 self.bident = cast_bytes(new)
73 using_ssh=Bool(False)
74 using_ssh=Bool(False)
74
75
75
76
76 def __init__(self, **kwargs):
77 def __init__(self, **kwargs):
77 super(EngineFactory, self).__init__(**kwargs)
78 super(EngineFactory, self).__init__(**kwargs)
78 self.ident = self.session.session
79 self.ident = self.session.session
79
80
80 def init_connector(self):
81 def init_connector(self):
81 """construct connection function, which handles tunnels."""
82 """construct connection function, which handles tunnels."""
82 self.using_ssh = bool(self.sshkey or self.sshserver)
83 self.using_ssh = bool(self.sshkey or self.sshserver)
83
84
84 if self.sshkey and not self.sshserver:
85 if self.sshkey and not self.sshserver:
85 # We are using ssh directly to the controller, tunneling localhost to localhost
86 # We are using ssh directly to the controller, tunneling localhost to localhost
86 self.sshserver = self.url.split('://')[1].split(':')[0]
87 self.sshserver = self.url.split('://')[1].split(':')[0]
87
88
88 if self.using_ssh:
89 if self.using_ssh:
89 if tunnel.try_passwordless_ssh(self.sshserver, self.sshkey, self.paramiko):
90 if tunnel.try_passwordless_ssh(self.sshserver, self.sshkey, self.paramiko):
90 password=False
91 password=False
91 else:
92 else:
92 password = getpass("SSH Password for %s: "%self.sshserver)
93 password = getpass("SSH Password for %s: "%self.sshserver)
93 else:
94 else:
94 password = False
95 password = False
95
96
96 def connect(s, url):
97 def connect(s, url):
97 url = disambiguate_url(url, self.location)
98 url = disambiguate_url(url, self.location)
98 if self.using_ssh:
99 if self.using_ssh:
99 self.log.debug("Tunneling connection to %s via %s"%(url, self.sshserver))
100 self.log.debug("Tunneling connection to %s via %s", url, self.sshserver)
100 return tunnel.tunnel_connection(s, url, self.sshserver,
101 return tunnel.tunnel_connection(s, url, self.sshserver,
101 keyfile=self.sshkey, paramiko=self.paramiko,
102 keyfile=self.sshkey, paramiko=self.paramiko,
102 password=password,
103 password=password,
103 )
104 )
104 else:
105 else:
105 return s.connect(url)
106 return s.connect(url)
106
107
107 def maybe_tunnel(url):
108 def maybe_tunnel(url):
108 """like connect, but don't complete the connection (for use by heartbeat)"""
109 """like connect, but don't complete the connection (for use by heartbeat)"""
109 url = disambiguate_url(url, self.location)
110 url = disambiguate_url(url, self.location)
110 if self.using_ssh:
111 if self.using_ssh:
111 self.log.debug("Tunneling connection to %s via %s"%(url, self.sshserver))
112 self.log.debug("Tunneling connection to %s via %s", url, self.sshserver)
112 url,tunnelobj = tunnel.open_tunnel(url, self.sshserver,
113 url,tunnelobj = tunnel.open_tunnel(url, self.sshserver,
113 keyfile=self.sshkey, paramiko=self.paramiko,
114 keyfile=self.sshkey, paramiko=self.paramiko,
114 password=password,
115 password=password,
115 )
116 )
116 return url
117 return str(url)
117 return connect, maybe_tunnel
118 return connect, maybe_tunnel
118
119
119 def register(self):
120 def register(self):
120 """send the registration_request"""
121 """send the registration_request"""
121
122
122 self.log.info("Registering with controller at %s"%self.url)
123 self.log.info("Registering with controller at %s"%self.url)
123 ctx = self.context
124 ctx = self.context
124 connect,maybe_tunnel = self.init_connector()
125 connect,maybe_tunnel = self.init_connector()
125 reg = ctx.socket(zmq.DEALER)
126 reg = ctx.socket(zmq.DEALER)
126 reg.setsockopt(zmq.IDENTITY, self.bident)
127 reg.setsockopt(zmq.IDENTITY, self.bident)
127 connect(reg, self.url)
128 connect(reg, self.url)
128 self.registrar = zmqstream.ZMQStream(reg, self.loop)
129 self.registrar = zmqstream.ZMQStream(reg, self.loop)
129
130
130
131
131 content = dict(queue=self.ident, heartbeat=self.ident, control=self.ident)
132 content = dict(uuid=self.ident)
132 self.registrar.on_recv(lambda msg: self.complete_registration(msg, connect, maybe_tunnel))
133 self.registrar.on_recv(lambda msg: self.complete_registration(msg, connect, maybe_tunnel))
133 # print (self.session.key)
134 # print (self.session.key)
134 self.session.send(self.registrar, "registration_request",content=content)
135 self.session.send(self.registrar, "registration_request", content=content)
135
136
136 def complete_registration(self, msg, connect, maybe_tunnel):
137 def complete_registration(self, msg, connect, maybe_tunnel):
137 # print msg
138 # print msg
138 self._abort_dc.stop()
139 self._abort_dc.stop()
139 ctx = self.context
140 ctx = self.context
140 loop = self.loop
141 loop = self.loop
141 identity = self.bident
142 identity = self.bident
142 idents,msg = self.session.feed_identities(msg)
143 idents,msg = self.session.feed_identities(msg)
143 msg = Message(self.session.unserialize(msg))
144 msg = self.session.unserialize(msg)
145 content = msg['content']
146 info = self.connection_info
144
147
145 if msg.content.status == 'ok':
148 def url(key):
146 self.id = int(msg.content.id)
149 """get zmq url for given channel"""
150 return str(info["interface"] + ":%i" % info[key])
147
151
148 # launch heartbeat
152 if content['status'] == 'ok':
149 hb_addrs = msg.content.heartbeat
153 self.id = int(content['id'])
150
154
155 # launch heartbeat
151 # possibly forward hb ports with tunnels
156 # possibly forward hb ports with tunnels
152 hb_addrs = [ maybe_tunnel(addr) for addr in hb_addrs ]
157 hb_ping = maybe_tunnel(url('hb_ping'))
153 heart = Heart(*map(str, hb_addrs), heart_id=identity)
158 hb_pong = maybe_tunnel(url('hb_pong'))
159
160 heart = Heart(hb_ping, hb_pong, heart_id=identity)
154 heart.start()
161 heart.start()
155
162
156 # create Shell Streams (MUX, Task, etc.):
163 # create Shell Connections (MUX, Task, etc.):
157 queue_addr = msg.content.mux
164 shell_addrs = url('mux'), url('task')
158 shell_addrs = [ str(queue_addr) ]
165
159 task_addr = msg.content.task
166 # Use only one shell stream for mux and tasks
160 if task_addr:
161 shell_addrs.append(str(task_addr))
162
163 # Uncomment this to go back to two-socket model
164 # shell_streams = []
165 # for addr in shell_addrs:
166 # stream = zmqstream.ZMQStream(ctx.socket(zmq.ROUTER), loop)
167 # stream.setsockopt(zmq.IDENTITY, identity)
168 # stream.connect(disambiguate_url(addr, self.location))
169 # shell_streams.append(stream)
170
171 # Now use only one shell stream for mux and tasks
172 stream = zmqstream.ZMQStream(ctx.socket(zmq.ROUTER), loop)
167 stream = zmqstream.ZMQStream(ctx.socket(zmq.ROUTER), loop)
173 stream.setsockopt(zmq.IDENTITY, identity)
168 stream.setsockopt(zmq.IDENTITY, identity)
174 shell_streams = [stream]
169 shell_streams = [stream]
175 for addr in shell_addrs:
170 for addr in shell_addrs:
176 connect(stream, addr)
171 connect(stream, addr)
177 # end single stream-socket
178
172
179 # control stream:
173 # control stream:
180 control_addr = str(msg.content.control)
174 control_addr = url('control')
181 control_stream = zmqstream.ZMQStream(ctx.socket(zmq.ROUTER), loop)
175 control_stream = zmqstream.ZMQStream(ctx.socket(zmq.ROUTER), loop)
182 control_stream.setsockopt(zmq.IDENTITY, identity)
176 control_stream.setsockopt(zmq.IDENTITY, identity)
183 connect(control_stream, control_addr)
177 connect(control_stream, control_addr)
184
178
185 # create iopub stream:
179 # create iopub stream:
186 iopub_addr = msg.content.iopub
180 iopub_addr = url('iopub')
187 iopub_socket = ctx.socket(zmq.PUB)
181 iopub_socket = ctx.socket(zmq.PUB)
188 iopub_socket.setsockopt(zmq.IDENTITY, identity)
182 iopub_socket.setsockopt(zmq.IDENTITY, identity)
189 connect(iopub_socket, iopub_addr)
183 connect(iopub_socket, iopub_addr)
190
184
191 # disable history:
185 # disable history:
192 self.config.HistoryManager.hist_file = ':memory:'
186 self.config.HistoryManager.hist_file = ':memory:'
193
187
194 # Redirect input streams and set a display hook.
188 # Redirect input streams and set a display hook.
195 if self.out_stream_factory:
189 if self.out_stream_factory:
196 sys.stdout = self.out_stream_factory(self.session, iopub_socket, u'stdout')
190 sys.stdout = self.out_stream_factory(self.session, iopub_socket, u'stdout')
197 sys.stdout.topic = cast_bytes('engine.%i.stdout' % self.id)
191 sys.stdout.topic = cast_bytes('engine.%i.stdout' % self.id)
198 sys.stderr = self.out_stream_factory(self.session, iopub_socket, u'stderr')
192 sys.stderr = self.out_stream_factory(self.session, iopub_socket, u'stderr')
199 sys.stderr.topic = cast_bytes('engine.%i.stderr' % self.id)
193 sys.stderr.topic = cast_bytes('engine.%i.stderr' % self.id)
200 if self.display_hook_factory:
194 if self.display_hook_factory:
201 sys.displayhook = self.display_hook_factory(self.session, iopub_socket)
195 sys.displayhook = self.display_hook_factory(self.session, iopub_socket)
202 sys.displayhook.topic = cast_bytes('engine.%i.pyout' % self.id)
196 sys.displayhook.topic = cast_bytes('engine.%i.pyout' % self.id)
203
197
204 self.kernel = Kernel(config=self.config, int_id=self.id, ident=self.ident, session=self.session,
198 self.kernel = Kernel(config=self.config, int_id=self.id, ident=self.ident, session=self.session,
205 control_stream=control_stream, shell_streams=shell_streams, iopub_socket=iopub_socket,
199 control_stream=control_stream, shell_streams=shell_streams, iopub_socket=iopub_socket,
206 loop=loop, user_ns=self.user_ns, log=self.log)
200 loop=loop, user_ns=self.user_ns, log=self.log)
207 self.kernel.shell.display_pub.topic = cast_bytes('engine.%i.displaypub' % self.id)
201 self.kernel.shell.display_pub.topic = cast_bytes('engine.%i.displaypub' % self.id)
208 self.kernel.start()
202 self.kernel.start()
209
203
210
204
211 else:
205 else:
212 self.log.fatal("Registration Failed: %s"%msg)
206 self.log.fatal("Registration Failed: %s"%msg)
213 raise Exception("Registration Failed: %s"%msg)
207 raise Exception("Registration Failed: %s"%msg)
214
208
215 self.log.info("Completed registration with id %i"%self.id)
209 self.log.info("Completed registration with id %i"%self.id)
216
210
217
211
218 def abort(self):
212 def abort(self):
219 self.log.fatal("Registration timed out after %.1f seconds"%self.timeout)
213 self.log.fatal("Registration timed out after %.1f seconds"%self.timeout)
220 if self.url.startswith('127.'):
214 if self.url.startswith('127.'):
221 self.log.fatal("""
215 self.log.fatal("""
222 If the controller and engines are not on the same machine,
216 If the controller and engines are not on the same machine,
223 you will have to instruct the controller to listen on an external IP (in ipcontroller_config.py):
217 you will have to instruct the controller to listen on an external IP (in ipcontroller_config.py):
224 c.HubFactory.ip='*' # for all interfaces, internal and external
218 c.HubFactory.ip='*' # for all interfaces, internal and external
225 c.HubFactory.ip='192.168.1.101' # or any interface that the engines can see
219 c.HubFactory.ip='192.168.1.101' # or any interface that the engines can see
226 or tunnel connections via ssh.
220 or tunnel connections via ssh.
227 """)
221 """)
228 self.session.send(self.registrar, "unregistration_request", content=dict(id=self.id))
222 self.session.send(self.registrar, "unregistration_request", content=dict(id=self.id))
229 time.sleep(1)
223 time.sleep(1)
230 sys.exit(255)
224 sys.exit(255)
231
225
232 def start(self):
226 def start(self):
233 dc = ioloop.DelayedCallback(self.register, 0, self.loop)
227 dc = ioloop.DelayedCallback(self.register, 0, self.loop)
234 dc.start()
228 dc.start()
235 self._abort_dc = ioloop.DelayedCallback(self.abort, self.timeout*1000, self.loop)
229 self._abort_dc = ioloop.DelayedCallback(self.abort, self.timeout*1000, self.loop)
236 self._abort_dc.start()
230 self._abort_dc.start()
237
231
@@ -1,756 +1,760
1 """Session object for building, serializing, sending, and receiving messages in
1 """Session object for building, serializing, sending, and receiving messages in
2 IPython. The Session object supports serialization, HMAC signatures, and
2 IPython. The Session object supports serialization, HMAC signatures, and
3 metadata on messages.
3 metadata on messages.
4
4
5 Also defined here are utilities for working with Sessions:
5 Also defined here are utilities for working with Sessions:
6 * A SessionFactory to be used as a base class for configurables that work with
6 * A SessionFactory to be used as a base class for configurables that work with
7 Sessions.
7 Sessions.
8 * A Message object for convenience that allows attribute-access to the msg dict.
8 * A Message object for convenience that allows attribute-access to the msg dict.
9
9
10 Authors:
10 Authors:
11
11
12 * Min RK
12 * Min RK
13 * Brian Granger
13 * Brian Granger
14 * Fernando Perez
14 * Fernando Perez
15 """
15 """
16 #-----------------------------------------------------------------------------
16 #-----------------------------------------------------------------------------
17 # Copyright (C) 2010-2011 The IPython Development Team
17 # Copyright (C) 2010-2011 The IPython Development Team
18 #
18 #
19 # Distributed under the terms of the BSD License. The full license is in
19 # Distributed under the terms of the BSD License. The full license is in
20 # the file COPYING, distributed as part of this software.
20 # the file COPYING, distributed as part of this software.
21 #-----------------------------------------------------------------------------
21 #-----------------------------------------------------------------------------
22
22
23 #-----------------------------------------------------------------------------
23 #-----------------------------------------------------------------------------
24 # Imports
24 # Imports
25 #-----------------------------------------------------------------------------
25 #-----------------------------------------------------------------------------
26
26
27 import hmac
27 import hmac
28 import logging
28 import logging
29 import os
29 import os
30 import pprint
30 import pprint
31 import uuid
31 import uuid
32 from datetime import datetime
32 from datetime import datetime
33
33
34 try:
34 try:
35 import cPickle
35 import cPickle
36 pickle = cPickle
36 pickle = cPickle
37 except:
37 except:
38 cPickle = None
38 cPickle = None
39 import pickle
39 import pickle
40
40
41 import zmq
41 import zmq
42 from zmq.utils import jsonapi
42 from zmq.utils import jsonapi
43 from zmq.eventloop.ioloop import IOLoop
43 from zmq.eventloop.ioloop import IOLoop
44 from zmq.eventloop.zmqstream import ZMQStream
44 from zmq.eventloop.zmqstream import ZMQStream
45
45
46 from IPython.config.application import Application, boolean_flag
46 from IPython.config.application import Application, boolean_flag
47 from IPython.config.configurable import Configurable, LoggingConfigurable
47 from IPython.config.configurable import Configurable, LoggingConfigurable
48 from IPython.utils.importstring import import_item
48 from IPython.utils.importstring import import_item
49 from IPython.utils.jsonutil import extract_dates, squash_dates, date_default
49 from IPython.utils.jsonutil import extract_dates, squash_dates, date_default
50 from IPython.utils.py3compat import str_to_bytes
50 from IPython.utils.py3compat import str_to_bytes
51 from IPython.utils.traitlets import (CBytes, Unicode, Bool, Any, Instance, Set,
51 from IPython.utils.traitlets import (CBytes, Unicode, Bool, Any, Instance, Set,
52 DottedObjectName, CUnicode)
52 DottedObjectName, CUnicode)
53
53
54 #-----------------------------------------------------------------------------
54 #-----------------------------------------------------------------------------
55 # utility functions
55 # utility functions
56 #-----------------------------------------------------------------------------
56 #-----------------------------------------------------------------------------
57
57
58 def squash_unicode(obj):
58 def squash_unicode(obj):
59 """coerce unicode back to bytestrings."""
59 """coerce unicode back to bytestrings."""
60 if isinstance(obj,dict):
60 if isinstance(obj,dict):
61 for key in obj.keys():
61 for key in obj.keys():
62 obj[key] = squash_unicode(obj[key])
62 obj[key] = squash_unicode(obj[key])
63 if isinstance(key, unicode):
63 if isinstance(key, unicode):
64 obj[squash_unicode(key)] = obj.pop(key)
64 obj[squash_unicode(key)] = obj.pop(key)
65 elif isinstance(obj, list):
65 elif isinstance(obj, list):
66 for i,v in enumerate(obj):
66 for i,v in enumerate(obj):
67 obj[i] = squash_unicode(v)
67 obj[i] = squash_unicode(v)
68 elif isinstance(obj, unicode):
68 elif isinstance(obj, unicode):
69 obj = obj.encode('utf8')
69 obj = obj.encode('utf8')
70 return obj
70 return obj
71
71
72 #-----------------------------------------------------------------------------
72 #-----------------------------------------------------------------------------
73 # globals and defaults
73 # globals and defaults
74 #-----------------------------------------------------------------------------
74 #-----------------------------------------------------------------------------
75
75
76
76
77 # ISO8601-ify datetime objects
77 # ISO8601-ify datetime objects
78 json_packer = lambda obj: jsonapi.dumps(obj, default=date_default)
78 json_packer = lambda obj: jsonapi.dumps(obj, default=date_default)
79 json_unpacker = lambda s: extract_dates(jsonapi.loads(s))
79 json_unpacker = lambda s: extract_dates(jsonapi.loads(s))
80
80
81 pickle_packer = lambda o: pickle.dumps(o,-1)
81 pickle_packer = lambda o: pickle.dumps(o,-1)
82 pickle_unpacker = pickle.loads
82 pickle_unpacker = pickle.loads
83
83
84 default_packer = json_packer
84 default_packer = json_packer
85 default_unpacker = json_unpacker
85 default_unpacker = json_unpacker
86
86
87 DELIM=b"<IDS|MSG>"
87 DELIM=b"<IDS|MSG>"
88
88
89
89
90 #-----------------------------------------------------------------------------
90 #-----------------------------------------------------------------------------
91 # Mixin tools for apps that use Sessions
91 # Mixin tools for apps that use Sessions
92 #-----------------------------------------------------------------------------
92 #-----------------------------------------------------------------------------
93
93
94 session_aliases = dict(
94 session_aliases = dict(
95 ident = 'Session.session',
95 ident = 'Session.session',
96 user = 'Session.username',
96 user = 'Session.username',
97 keyfile = 'Session.keyfile',
97 keyfile = 'Session.keyfile',
98 )
98 )
99
99
100 session_flags = {
100 session_flags = {
101 'secure' : ({'Session' : { 'key' : str_to_bytes(str(uuid.uuid4())),
101 'secure' : ({'Session' : { 'key' : str_to_bytes(str(uuid.uuid4())),
102 'keyfile' : '' }},
102 'keyfile' : '' }},
103 """Use HMAC digests for authentication of messages.
103 """Use HMAC digests for authentication of messages.
104 Setting this flag will generate a new UUID to use as the HMAC key.
104 Setting this flag will generate a new UUID to use as the HMAC key.
105 """),
105 """),
106 'no-secure' : ({'Session' : { 'key' : b'', 'keyfile' : '' }},
106 'no-secure' : ({'Session' : { 'key' : b'', 'keyfile' : '' }},
107 """Don't authenticate messages."""),
107 """Don't authenticate messages."""),
108 }
108 }
109
109
110 def default_secure(cfg):
110 def default_secure(cfg):
111 """Set the default behavior for a config environment to be secure.
111 """Set the default behavior for a config environment to be secure.
112
112
113 If Session.key/keyfile have not been set, set Session.key to
113 If Session.key/keyfile have not been set, set Session.key to
114 a new random UUID.
114 a new random UUID.
115 """
115 """
116
116
117 if 'Session' in cfg:
117 if 'Session' in cfg:
118 if 'key' in cfg.Session or 'keyfile' in cfg.Session:
118 if 'key' in cfg.Session or 'keyfile' in cfg.Session:
119 return
119 return
120 # key/keyfile not specified, generate new UUID:
120 # key/keyfile not specified, generate new UUID:
121 cfg.Session.key = str_to_bytes(str(uuid.uuid4()))
121 cfg.Session.key = str_to_bytes(str(uuid.uuid4()))
122
122
123
123
124 #-----------------------------------------------------------------------------
124 #-----------------------------------------------------------------------------
125 # Classes
125 # Classes
126 #-----------------------------------------------------------------------------
126 #-----------------------------------------------------------------------------
127
127
128 class SessionFactory(LoggingConfigurable):
128 class SessionFactory(LoggingConfigurable):
129 """The Base class for configurables that have a Session, Context, logger,
129 """The Base class for configurables that have a Session, Context, logger,
130 and IOLoop.
130 and IOLoop.
131 """
131 """
132
132
133 logname = Unicode('')
133 logname = Unicode('')
134 def _logname_changed(self, name, old, new):
134 def _logname_changed(self, name, old, new):
135 self.log = logging.getLogger(new)
135 self.log = logging.getLogger(new)
136
136
137 # not configurable:
137 # not configurable:
138 context = Instance('zmq.Context')
138 context = Instance('zmq.Context')
139 def _context_default(self):
139 def _context_default(self):
140 return zmq.Context.instance()
140 return zmq.Context.instance()
141
141
142 session = Instance('IPython.zmq.session.Session')
142 session = Instance('IPython.zmq.session.Session')
143
143
144 loop = Instance('zmq.eventloop.ioloop.IOLoop', allow_none=False)
144 loop = Instance('zmq.eventloop.ioloop.IOLoop', allow_none=False)
145 def _loop_default(self):
145 def _loop_default(self):
146 return IOLoop.instance()
146 return IOLoop.instance()
147
147
148 def __init__(self, **kwargs):
148 def __init__(self, **kwargs):
149 super(SessionFactory, self).__init__(**kwargs)
149 super(SessionFactory, self).__init__(**kwargs)
150
150
151 if self.session is None:
151 if self.session is None:
152 # construct the session
152 # construct the session
153 self.session = Session(**kwargs)
153 self.session = Session(**kwargs)
154
154
155
155
156 class Message(object):
156 class Message(object):
157 """A simple message object that maps dict keys to attributes.
157 """A simple message object that maps dict keys to attributes.
158
158
159 A Message can be created from a dict and a dict from a Message instance
159 A Message can be created from a dict and a dict from a Message instance
160 simply by calling dict(msg_obj)."""
160 simply by calling dict(msg_obj)."""
161
161
162 def __init__(self, msg_dict):
162 def __init__(self, msg_dict):
163 dct = self.__dict__
163 dct = self.__dict__
164 for k, v in dict(msg_dict).iteritems():
164 for k, v in dict(msg_dict).iteritems():
165 if isinstance(v, dict):
165 if isinstance(v, dict):
166 v = Message(v)
166 v = Message(v)
167 dct[k] = v
167 dct[k] = v
168
168
169 # Having this iterator lets dict(msg_obj) work out of the box.
169 # Having this iterator lets dict(msg_obj) work out of the box.
170 def __iter__(self):
170 def __iter__(self):
171 return iter(self.__dict__.iteritems())
171 return iter(self.__dict__.iteritems())
172
172
173 def __repr__(self):
173 def __repr__(self):
174 return repr(self.__dict__)
174 return repr(self.__dict__)
175
175
176 def __str__(self):
176 def __str__(self):
177 return pprint.pformat(self.__dict__)
177 return pprint.pformat(self.__dict__)
178
178
179 def __contains__(self, k):
179 def __contains__(self, k):
180 return k in self.__dict__
180 return k in self.__dict__
181
181
182 def __getitem__(self, k):
182 def __getitem__(self, k):
183 return self.__dict__[k]
183 return self.__dict__[k]
184
184
185
185
186 def msg_header(msg_id, msg_type, username, session):
186 def msg_header(msg_id, msg_type, username, session):
187 date = datetime.now()
187 date = datetime.now()
188 return locals()
188 return locals()
189
189
190 def extract_header(msg_or_header):
190 def extract_header(msg_or_header):
191 """Given a message or header, return the header."""
191 """Given a message or header, return the header."""
192 if not msg_or_header:
192 if not msg_or_header:
193 return {}
193 return {}
194 try:
194 try:
195 # See if msg_or_header is the entire message.
195 # See if msg_or_header is the entire message.
196 h = msg_or_header['header']
196 h = msg_or_header['header']
197 except KeyError:
197 except KeyError:
198 try:
198 try:
199 # See if msg_or_header is just the header
199 # See if msg_or_header is just the header
200 h = msg_or_header['msg_id']
200 h = msg_or_header['msg_id']
201 except KeyError:
201 except KeyError:
202 raise
202 raise
203 else:
203 else:
204 h = msg_or_header
204 h = msg_or_header
205 if not isinstance(h, dict):
205 if not isinstance(h, dict):
206 h = dict(h)
206 h = dict(h)
207 return h
207 return h
208
208
209 class Session(Configurable):
209 class Session(Configurable):
210 """Object for handling serialization and sending of messages.
210 """Object for handling serialization and sending of messages.
211
211
212 The Session object handles building messages and sending them
212 The Session object handles building messages and sending them
213 with ZMQ sockets or ZMQStream objects. Objects can communicate with each
213 with ZMQ sockets or ZMQStream objects. Objects can communicate with each
214 other over the network via Session objects, and only need to work with the
214 other over the network via Session objects, and only need to work with the
215 dict-based IPython message spec. The Session will handle
215 dict-based IPython message spec. The Session will handle
216 serialization/deserialization, security, and metadata.
216 serialization/deserialization, security, and metadata.
217
217
218 Sessions support configurable serialiization via packer/unpacker traits,
218 Sessions support configurable serialiization via packer/unpacker traits,
219 and signing with HMAC digests via the key/keyfile traits.
219 and signing with HMAC digests via the key/keyfile traits.
220
220
221 Parameters
221 Parameters
222 ----------
222 ----------
223
223
224 debug : bool
224 debug : bool
225 whether to trigger extra debugging statements
225 whether to trigger extra debugging statements
226 packer/unpacker : str : 'json', 'pickle' or import_string
226 packer/unpacker : str : 'json', 'pickle' or import_string
227 importstrings for methods to serialize message parts. If just
227 importstrings for methods to serialize message parts. If just
228 'json' or 'pickle', predefined JSON and pickle packers will be used.
228 'json' or 'pickle', predefined JSON and pickle packers will be used.
229 Otherwise, the entire importstring must be used.
229 Otherwise, the entire importstring must be used.
230
230
231 The functions must accept at least valid JSON input, and output *bytes*.
231 The functions must accept at least valid JSON input, and output *bytes*.
232
232
233 For example, to use msgpack:
233 For example, to use msgpack:
234 packer = 'msgpack.packb', unpacker='msgpack.unpackb'
234 packer = 'msgpack.packb', unpacker='msgpack.unpackb'
235 pack/unpack : callables
235 pack/unpack : callables
236 You can also set the pack/unpack callables for serialization directly.
236 You can also set the pack/unpack callables for serialization directly.
237 session : bytes
237 session : bytes
238 the ID of this Session object. The default is to generate a new UUID.
238 the ID of this Session object. The default is to generate a new UUID.
239 username : unicode
239 username : unicode
240 username added to message headers. The default is to ask the OS.
240 username added to message headers. The default is to ask the OS.
241 key : bytes
241 key : bytes
242 The key used to initialize an HMAC signature. If unset, messages
242 The key used to initialize an HMAC signature. If unset, messages
243 will not be signed or checked.
243 will not be signed or checked.
244 keyfile : filepath
244 keyfile : filepath
245 The file containing a key. If this is set, `key` will be initialized
245 The file containing a key. If this is set, `key` will be initialized
246 to the contents of the file.
246 to the contents of the file.
247
247
248 """
248 """
249
249
250 debug=Bool(False, config=True, help="""Debug output in the Session""")
250 debug=Bool(False, config=True, help="""Debug output in the Session""")
251
251
252 packer = DottedObjectName('json',config=True,
252 packer = DottedObjectName('json',config=True,
253 help="""The name of the packer for serializing messages.
253 help="""The name of the packer for serializing messages.
254 Should be one of 'json', 'pickle', or an import name
254 Should be one of 'json', 'pickle', or an import name
255 for a custom callable serializer.""")
255 for a custom callable serializer.""")
256 def _packer_changed(self, name, old, new):
256 def _packer_changed(self, name, old, new):
257 if new.lower() == 'json':
257 if new.lower() == 'json':
258 self.pack = json_packer
258 self.pack = json_packer
259 self.unpack = json_unpacker
259 self.unpack = json_unpacker
260 self.unpacker = new
260 elif new.lower() == 'pickle':
261 elif new.lower() == 'pickle':
261 self.pack = pickle_packer
262 self.pack = pickle_packer
262 self.unpack = pickle_unpacker
263 self.unpack = pickle_unpacker
264 self.unpacker = new
263 else:
265 else:
264 self.pack = import_item(str(new))
266 self.pack = import_item(str(new))
265
267
266 unpacker = DottedObjectName('json', config=True,
268 unpacker = DottedObjectName('json', config=True,
267 help="""The name of the unpacker for unserializing messages.
269 help="""The name of the unpacker for unserializing messages.
268 Only used with custom functions for `packer`.""")
270 Only used with custom functions for `packer`.""")
269 def _unpacker_changed(self, name, old, new):
271 def _unpacker_changed(self, name, old, new):
270 if new.lower() == 'json':
272 if new.lower() == 'json':
271 self.pack = json_packer
273 self.pack = json_packer
272 self.unpack = json_unpacker
274 self.unpack = json_unpacker
275 self.packer = new
273 elif new.lower() == 'pickle':
276 elif new.lower() == 'pickle':
274 self.pack = pickle_packer
277 self.pack = pickle_packer
275 self.unpack = pickle_unpacker
278 self.unpack = pickle_unpacker
279 self.packer = new
276 else:
280 else:
277 self.unpack = import_item(str(new))
281 self.unpack = import_item(str(new))
278
282
279 session = CUnicode(u'', config=True,
283 session = CUnicode(u'', config=True,
280 help="""The UUID identifying this session.""")
284 help="""The UUID identifying this session.""")
281 def _session_default(self):
285 def _session_default(self):
282 u = unicode(uuid.uuid4())
286 u = unicode(uuid.uuid4())
283 self.bsession = u.encode('ascii')
287 self.bsession = u.encode('ascii')
284 return u
288 return u
285
289
286 def _session_changed(self, name, old, new):
290 def _session_changed(self, name, old, new):
287 self.bsession = self.session.encode('ascii')
291 self.bsession = self.session.encode('ascii')
288
292
289 # bsession is the session as bytes
293 # bsession is the session as bytes
290 bsession = CBytes(b'')
294 bsession = CBytes(b'')
291
295
292 username = Unicode(os.environ.get('USER',u'username'), config=True,
296 username = Unicode(os.environ.get('USER',u'username'), config=True,
293 help="""Username for the Session. Default is your system username.""")
297 help="""Username for the Session. Default is your system username.""")
294
298
295 # message signature related traits:
299 # message signature related traits:
296
300
297 key = CBytes(b'', config=True,
301 key = CBytes(b'', config=True,
298 help="""execution key, for extra authentication.""")
302 help="""execution key, for extra authentication.""")
299 def _key_changed(self, name, old, new):
303 def _key_changed(self, name, old, new):
300 if new:
304 if new:
301 self.auth = hmac.HMAC(new)
305 self.auth = hmac.HMAC(new)
302 else:
306 else:
303 self.auth = None
307 self.auth = None
304 auth = Instance(hmac.HMAC)
308 auth = Instance(hmac.HMAC)
305 digest_history = Set()
309 digest_history = Set()
306
310
307 keyfile = Unicode('', config=True,
311 keyfile = Unicode('', config=True,
308 help="""path to file containing execution key.""")
312 help="""path to file containing execution key.""")
309 def _keyfile_changed(self, name, old, new):
313 def _keyfile_changed(self, name, old, new):
310 with open(new, 'rb') as f:
314 with open(new, 'rb') as f:
311 self.key = f.read().strip()
315 self.key = f.read().strip()
312
316
313 # serialization traits:
317 # serialization traits:
314
318
315 pack = Any(default_packer) # the actual packer function
319 pack = Any(default_packer) # the actual packer function
316 def _pack_changed(self, name, old, new):
320 def _pack_changed(self, name, old, new):
317 if not callable(new):
321 if not callable(new):
318 raise TypeError("packer must be callable, not %s"%type(new))
322 raise TypeError("packer must be callable, not %s"%type(new))
319
323
320 unpack = Any(default_unpacker) # the actual packer function
324 unpack = Any(default_unpacker) # the actual packer function
321 def _unpack_changed(self, name, old, new):
325 def _unpack_changed(self, name, old, new):
322 # unpacker is not checked - it is assumed to be
326 # unpacker is not checked - it is assumed to be
323 if not callable(new):
327 if not callable(new):
324 raise TypeError("unpacker must be callable, not %s"%type(new))
328 raise TypeError("unpacker must be callable, not %s"%type(new))
325
329
326 def __init__(self, **kwargs):
330 def __init__(self, **kwargs):
327 """create a Session object
331 """create a Session object
328
332
329 Parameters
333 Parameters
330 ----------
334 ----------
331
335
332 debug : bool
336 debug : bool
333 whether to trigger extra debugging statements
337 whether to trigger extra debugging statements
334 packer/unpacker : str : 'json', 'pickle' or import_string
338 packer/unpacker : str : 'json', 'pickle' or import_string
335 importstrings for methods to serialize message parts. If just
339 importstrings for methods to serialize message parts. If just
336 'json' or 'pickle', predefined JSON and pickle packers will be used.
340 'json' or 'pickle', predefined JSON and pickle packers will be used.
337 Otherwise, the entire importstring must be used.
341 Otherwise, the entire importstring must be used.
338
342
339 The functions must accept at least valid JSON input, and output
343 The functions must accept at least valid JSON input, and output
340 *bytes*.
344 *bytes*.
341
345
342 For example, to use msgpack:
346 For example, to use msgpack:
343 packer = 'msgpack.packb', unpacker='msgpack.unpackb'
347 packer = 'msgpack.packb', unpacker='msgpack.unpackb'
344 pack/unpack : callables
348 pack/unpack : callables
345 You can also set the pack/unpack callables for serialization
349 You can also set the pack/unpack callables for serialization
346 directly.
350 directly.
347 session : unicode (must be ascii)
351 session : unicode (must be ascii)
348 the ID of this Session object. The default is to generate a new
352 the ID of this Session object. The default is to generate a new
349 UUID.
353 UUID.
350 bsession : bytes
354 bsession : bytes
351 The session as bytes
355 The session as bytes
352 username : unicode
356 username : unicode
353 username added to message headers. The default is to ask the OS.
357 username added to message headers. The default is to ask the OS.
354 key : bytes
358 key : bytes
355 The key used to initialize an HMAC signature. If unset, messages
359 The key used to initialize an HMAC signature. If unset, messages
356 will not be signed or checked.
360 will not be signed or checked.
357 keyfile : filepath
361 keyfile : filepath
358 The file containing a key. If this is set, `key` will be
362 The file containing a key. If this is set, `key` will be
359 initialized to the contents of the file.
363 initialized to the contents of the file.
360 """
364 """
361 super(Session, self).__init__(**kwargs)
365 super(Session, self).__init__(**kwargs)
362 self._check_packers()
366 self._check_packers()
363 self.none = self.pack({})
367 self.none = self.pack({})
364 # ensure self._session_default() if necessary, so bsession is defined:
368 # ensure self._session_default() if necessary, so bsession is defined:
365 self.session
369 self.session
366
370
367 @property
371 @property
368 def msg_id(self):
372 def msg_id(self):
369 """always return new uuid"""
373 """always return new uuid"""
370 return str(uuid.uuid4())
374 return str(uuid.uuid4())
371
375
372 def _check_packers(self):
376 def _check_packers(self):
373 """check packers for binary data and datetime support."""
377 """check packers for binary data and datetime support."""
374 pack = self.pack
378 pack = self.pack
375 unpack = self.unpack
379 unpack = self.unpack
376
380
377 # check simple serialization
381 # check simple serialization
378 msg = dict(a=[1,'hi'])
382 msg = dict(a=[1,'hi'])
379 try:
383 try:
380 packed = pack(msg)
384 packed = pack(msg)
381 except Exception:
385 except Exception:
382 raise ValueError("packer could not serialize a simple message")
386 raise ValueError("packer could not serialize a simple message")
383
387
384 # ensure packed message is bytes
388 # ensure packed message is bytes
385 if not isinstance(packed, bytes):
389 if not isinstance(packed, bytes):
386 raise ValueError("message packed to %r, but bytes are required"%type(packed))
390 raise ValueError("message packed to %r, but bytes are required"%type(packed))
387
391
388 # check that unpack is pack's inverse
392 # check that unpack is pack's inverse
389 try:
393 try:
390 unpacked = unpack(packed)
394 unpacked = unpack(packed)
391 except Exception:
395 except Exception:
392 raise ValueError("unpacker could not handle the packer's output")
396 raise ValueError("unpacker could not handle the packer's output")
393
397
394 # check datetime support
398 # check datetime support
395 msg = dict(t=datetime.now())
399 msg = dict(t=datetime.now())
396 try:
400 try:
397 unpacked = unpack(pack(msg))
401 unpacked = unpack(pack(msg))
398 except Exception:
402 except Exception:
399 self.pack = lambda o: pack(squash_dates(o))
403 self.pack = lambda o: pack(squash_dates(o))
400 self.unpack = lambda s: extract_dates(unpack(s))
404 self.unpack = lambda s: extract_dates(unpack(s))
401
405
402 def msg_header(self, msg_type):
406 def msg_header(self, msg_type):
403 return msg_header(self.msg_id, msg_type, self.username, self.session)
407 return msg_header(self.msg_id, msg_type, self.username, self.session)
404
408
405 def msg(self, msg_type, content=None, parent=None, subheader=None, header=None):
409 def msg(self, msg_type, content=None, parent=None, subheader=None, header=None):
406 """Return the nested message dict.
410 """Return the nested message dict.
407
411
408 This format is different from what is sent over the wire. The
412 This format is different from what is sent over the wire. The
409 serialize/unserialize methods converts this nested message dict to the wire
413 serialize/unserialize methods converts this nested message dict to the wire
410 format, which is a list of message parts.
414 format, which is a list of message parts.
411 """
415 """
412 msg = {}
416 msg = {}
413 header = self.msg_header(msg_type) if header is None else header
417 header = self.msg_header(msg_type) if header is None else header
414 msg['header'] = header
418 msg['header'] = header
415 msg['msg_id'] = header['msg_id']
419 msg['msg_id'] = header['msg_id']
416 msg['msg_type'] = header['msg_type']
420 msg['msg_type'] = header['msg_type']
417 msg['parent_header'] = {} if parent is None else extract_header(parent)
421 msg['parent_header'] = {} if parent is None else extract_header(parent)
418 msg['content'] = {} if content is None else content
422 msg['content'] = {} if content is None else content
419 sub = {} if subheader is None else subheader
423 sub = {} if subheader is None else subheader
420 msg['header'].update(sub)
424 msg['header'].update(sub)
421 return msg
425 return msg
422
426
423 def sign(self, msg_list):
427 def sign(self, msg_list):
424 """Sign a message with HMAC digest. If no auth, return b''.
428 """Sign a message with HMAC digest. If no auth, return b''.
425
429
426 Parameters
430 Parameters
427 ----------
431 ----------
428 msg_list : list
432 msg_list : list
429 The [p_header,p_parent,p_content] part of the message list.
433 The [p_header,p_parent,p_content] part of the message list.
430 """
434 """
431 if self.auth is None:
435 if self.auth is None:
432 return b''
436 return b''
433 h = self.auth.copy()
437 h = self.auth.copy()
434 for m in msg_list:
438 for m in msg_list:
435 h.update(m)
439 h.update(m)
436 return str_to_bytes(h.hexdigest())
440 return str_to_bytes(h.hexdigest())
437
441
438 def serialize(self, msg, ident=None):
442 def serialize(self, msg, ident=None):
439 """Serialize the message components to bytes.
443 """Serialize the message components to bytes.
440
444
441 This is roughly the inverse of unserialize. The serialize/unserialize
445 This is roughly the inverse of unserialize. The serialize/unserialize
442 methods work with full message lists, whereas pack/unpack work with
446 methods work with full message lists, whereas pack/unpack work with
443 the individual message parts in the message list.
447 the individual message parts in the message list.
444
448
445 Parameters
449 Parameters
446 ----------
450 ----------
447 msg : dict or Message
451 msg : dict or Message
448 The nexted message dict as returned by the self.msg method.
452 The nexted message dict as returned by the self.msg method.
449
453
450 Returns
454 Returns
451 -------
455 -------
452 msg_list : list
456 msg_list : list
453 The list of bytes objects to be sent with the format:
457 The list of bytes objects to be sent with the format:
454 [ident1,ident2,...,DELIM,HMAC,p_header,p_parent,p_content,
458 [ident1,ident2,...,DELIM,HMAC,p_header,p_parent,p_content,
455 buffer1,buffer2,...]. In this list, the p_* entities are
459 buffer1,buffer2,...]. In this list, the p_* entities are
456 the packed or serialized versions, so if JSON is used, these
460 the packed or serialized versions, so if JSON is used, these
457 are utf8 encoded JSON strings.
461 are utf8 encoded JSON strings.
458 """
462 """
459 content = msg.get('content', {})
463 content = msg.get('content', {})
460 if content is None:
464 if content is None:
461 content = self.none
465 content = self.none
462 elif isinstance(content, dict):
466 elif isinstance(content, dict):
463 content = self.pack(content)
467 content = self.pack(content)
464 elif isinstance(content, bytes):
468 elif isinstance(content, bytes):
465 # content is already packed, as in a relayed message
469 # content is already packed, as in a relayed message
466 pass
470 pass
467 elif isinstance(content, unicode):
471 elif isinstance(content, unicode):
468 # should be bytes, but JSON often spits out unicode
472 # should be bytes, but JSON often spits out unicode
469 content = content.encode('utf8')
473 content = content.encode('utf8')
470 else:
474 else:
471 raise TypeError("Content incorrect type: %s"%type(content))
475 raise TypeError("Content incorrect type: %s"%type(content))
472
476
473 real_message = [self.pack(msg['header']),
477 real_message = [self.pack(msg['header']),
474 self.pack(msg['parent_header']),
478 self.pack(msg['parent_header']),
475 content
479 content
476 ]
480 ]
477
481
478 to_send = []
482 to_send = []
479
483
480 if isinstance(ident, list):
484 if isinstance(ident, list):
481 # accept list of idents
485 # accept list of idents
482 to_send.extend(ident)
486 to_send.extend(ident)
483 elif ident is not None:
487 elif ident is not None:
484 to_send.append(ident)
488 to_send.append(ident)
485 to_send.append(DELIM)
489 to_send.append(DELIM)
486
490
487 signature = self.sign(real_message)
491 signature = self.sign(real_message)
488 to_send.append(signature)
492 to_send.append(signature)
489
493
490 to_send.extend(real_message)
494 to_send.extend(real_message)
491
495
492 return to_send
496 return to_send
493
497
494 def send(self, stream, msg_or_type, content=None, parent=None, ident=None,
498 def send(self, stream, msg_or_type, content=None, parent=None, ident=None,
495 buffers=None, subheader=None, track=False, header=None):
499 buffers=None, subheader=None, track=False, header=None):
496 """Build and send a message via stream or socket.
500 """Build and send a message via stream or socket.
497
501
498 The message format used by this function internally is as follows:
502 The message format used by this function internally is as follows:
499
503
500 [ident1,ident2,...,DELIM,HMAC,p_header,p_parent,p_content,
504 [ident1,ident2,...,DELIM,HMAC,p_header,p_parent,p_content,
501 buffer1,buffer2,...]
505 buffer1,buffer2,...]
502
506
503 The serialize/unserialize methods convert the nested message dict into this
507 The serialize/unserialize methods convert the nested message dict into this
504 format.
508 format.
505
509
506 Parameters
510 Parameters
507 ----------
511 ----------
508
512
509 stream : zmq.Socket or ZMQStream
513 stream : zmq.Socket or ZMQStream
510 The socket-like object used to send the data.
514 The socket-like object used to send the data.
511 msg_or_type : str or Message/dict
515 msg_or_type : str or Message/dict
512 Normally, msg_or_type will be a msg_type unless a message is being
516 Normally, msg_or_type will be a msg_type unless a message is being
513 sent more than once. If a header is supplied, this can be set to
517 sent more than once. If a header is supplied, this can be set to
514 None and the msg_type will be pulled from the header.
518 None and the msg_type will be pulled from the header.
515
519
516 content : dict or None
520 content : dict or None
517 The content of the message (ignored if msg_or_type is a message).
521 The content of the message (ignored if msg_or_type is a message).
518 header : dict or None
522 header : dict or None
519 The header dict for the message (ignores if msg_to_type is a message).
523 The header dict for the message (ignores if msg_to_type is a message).
520 parent : Message or dict or None
524 parent : Message or dict or None
521 The parent or parent header describing the parent of this message
525 The parent or parent header describing the parent of this message
522 (ignored if msg_or_type is a message).
526 (ignored if msg_or_type is a message).
523 ident : bytes or list of bytes
527 ident : bytes or list of bytes
524 The zmq.IDENTITY routing path.
528 The zmq.IDENTITY routing path.
525 subheader : dict or None
529 subheader : dict or None
526 Extra header keys for this message's header (ignored if msg_or_type
530 Extra header keys for this message's header (ignored if msg_or_type
527 is a message).
531 is a message).
528 buffers : list or None
532 buffers : list or None
529 The already-serialized buffers to be appended to the message.
533 The already-serialized buffers to be appended to the message.
530 track : bool
534 track : bool
531 Whether to track. Only for use with Sockets, because ZMQStream
535 Whether to track. Only for use with Sockets, because ZMQStream
532 objects cannot track messages.
536 objects cannot track messages.
533
537
534 Returns
538 Returns
535 -------
539 -------
536 msg : dict
540 msg : dict
537 The constructed message.
541 The constructed message.
538 (msg,tracker) : (dict, MessageTracker)
542 (msg,tracker) : (dict, MessageTracker)
539 if track=True, then a 2-tuple will be returned,
543 if track=True, then a 2-tuple will be returned,
540 the first element being the constructed
544 the first element being the constructed
541 message, and the second being the MessageTracker
545 message, and the second being the MessageTracker
542
546
543 """
547 """
544
548
545 if not isinstance(stream, (zmq.Socket, ZMQStream)):
549 if not isinstance(stream, (zmq.Socket, ZMQStream)):
546 raise TypeError("stream must be Socket or ZMQStream, not %r"%type(stream))
550 raise TypeError("stream must be Socket or ZMQStream, not %r"%type(stream))
547 elif track and isinstance(stream, ZMQStream):
551 elif track and isinstance(stream, ZMQStream):
548 raise TypeError("ZMQStream cannot track messages")
552 raise TypeError("ZMQStream cannot track messages")
549
553
550 if isinstance(msg_or_type, (Message, dict)):
554 if isinstance(msg_or_type, (Message, dict)):
551 # We got a Message or message dict, not a msg_type so don't
555 # We got a Message or message dict, not a msg_type so don't
552 # build a new Message.
556 # build a new Message.
553 msg = msg_or_type
557 msg = msg_or_type
554 else:
558 else:
555 msg = self.msg(msg_or_type, content=content, parent=parent,
559 msg = self.msg(msg_or_type, content=content, parent=parent,
556 subheader=subheader, header=header)
560 subheader=subheader, header=header)
557
561
558 buffers = [] if buffers is None else buffers
562 buffers = [] if buffers is None else buffers
559 to_send = self.serialize(msg, ident)
563 to_send = self.serialize(msg, ident)
560 flag = 0
564 flag = 0
561 if buffers:
565 if buffers:
562 flag = zmq.SNDMORE
566 flag = zmq.SNDMORE
563 _track = False
567 _track = False
564 else:
568 else:
565 _track=track
569 _track=track
566 if track:
570 if track:
567 tracker = stream.send_multipart(to_send, flag, copy=False, track=_track)
571 tracker = stream.send_multipart(to_send, flag, copy=False, track=_track)
568 else:
572 else:
569 tracker = stream.send_multipart(to_send, flag, copy=False)
573 tracker = stream.send_multipart(to_send, flag, copy=False)
570 for b in buffers[:-1]:
574 for b in buffers[:-1]:
571 stream.send(b, flag, copy=False)
575 stream.send(b, flag, copy=False)
572 if buffers:
576 if buffers:
573 if track:
577 if track:
574 tracker = stream.send(buffers[-1], copy=False, track=track)
578 tracker = stream.send(buffers[-1], copy=False, track=track)
575 else:
579 else:
576 tracker = stream.send(buffers[-1], copy=False)
580 tracker = stream.send(buffers[-1], copy=False)
577
581
578 # omsg = Message(msg)
582 # omsg = Message(msg)
579 if self.debug:
583 if self.debug:
580 pprint.pprint(msg)
584 pprint.pprint(msg)
581 pprint.pprint(to_send)
585 pprint.pprint(to_send)
582 pprint.pprint(buffers)
586 pprint.pprint(buffers)
583
587
584 msg['tracker'] = tracker
588 msg['tracker'] = tracker
585
589
586 return msg
590 return msg
587
591
588 def send_raw(self, stream, msg_list, flags=0, copy=True, ident=None):
592 def send_raw(self, stream, msg_list, flags=0, copy=True, ident=None):
589 """Send a raw message via ident path.
593 """Send a raw message via ident path.
590
594
591 This method is used to send a already serialized message.
595 This method is used to send a already serialized message.
592
596
593 Parameters
597 Parameters
594 ----------
598 ----------
595 stream : ZMQStream or Socket
599 stream : ZMQStream or Socket
596 The ZMQ stream or socket to use for sending the message.
600 The ZMQ stream or socket to use for sending the message.
597 msg_list : list
601 msg_list : list
598 The serialized list of messages to send. This only includes the
602 The serialized list of messages to send. This only includes the
599 [p_header,p_parent,p_content,buffer1,buffer2,...] portion of
603 [p_header,p_parent,p_content,buffer1,buffer2,...] portion of
600 the message.
604 the message.
601 ident : ident or list
605 ident : ident or list
602 A single ident or a list of idents to use in sending.
606 A single ident or a list of idents to use in sending.
603 """
607 """
604 to_send = []
608 to_send = []
605 if isinstance(ident, bytes):
609 if isinstance(ident, bytes):
606 ident = [ident]
610 ident = [ident]
607 if ident is not None:
611 if ident is not None:
608 to_send.extend(ident)
612 to_send.extend(ident)
609
613
610 to_send.append(DELIM)
614 to_send.append(DELIM)
611 to_send.append(self.sign(msg_list))
615 to_send.append(self.sign(msg_list))
612 to_send.extend(msg_list)
616 to_send.extend(msg_list)
613 stream.send_multipart(msg_list, flags, copy=copy)
617 stream.send_multipart(msg_list, flags, copy=copy)
614
618
615 def recv(self, socket, mode=zmq.NOBLOCK, content=True, copy=True):
619 def recv(self, socket, mode=zmq.NOBLOCK, content=True, copy=True):
616 """Receive and unpack a message.
620 """Receive and unpack a message.
617
621
618 Parameters
622 Parameters
619 ----------
623 ----------
620 socket : ZMQStream or Socket
624 socket : ZMQStream or Socket
621 The socket or stream to use in receiving.
625 The socket or stream to use in receiving.
622
626
623 Returns
627 Returns
624 -------
628 -------
625 [idents], msg
629 [idents], msg
626 [idents] is a list of idents and msg is a nested message dict of
630 [idents] is a list of idents and msg is a nested message dict of
627 same format as self.msg returns.
631 same format as self.msg returns.
628 """
632 """
629 if isinstance(socket, ZMQStream):
633 if isinstance(socket, ZMQStream):
630 socket = socket.socket
634 socket = socket.socket
631 try:
635 try:
632 msg_list = socket.recv_multipart(mode, copy=copy)
636 msg_list = socket.recv_multipart(mode, copy=copy)
633 except zmq.ZMQError as e:
637 except zmq.ZMQError as e:
634 if e.errno == zmq.EAGAIN:
638 if e.errno == zmq.EAGAIN:
635 # We can convert EAGAIN to None as we know in this case
639 # We can convert EAGAIN to None as we know in this case
636 # recv_multipart won't return None.
640 # recv_multipart won't return None.
637 return None,None
641 return None,None
638 else:
642 else:
639 raise
643 raise
640 # split multipart message into identity list and message dict
644 # split multipart message into identity list and message dict
641 # invalid large messages can cause very expensive string comparisons
645 # invalid large messages can cause very expensive string comparisons
642 idents, msg_list = self.feed_identities(msg_list, copy)
646 idents, msg_list = self.feed_identities(msg_list, copy)
643 try:
647 try:
644 return idents, self.unserialize(msg_list, content=content, copy=copy)
648 return idents, self.unserialize(msg_list, content=content, copy=copy)
645 except Exception as e:
649 except Exception as e:
646 # TODO: handle it
650 # TODO: handle it
647 raise e
651 raise e
648
652
649 def feed_identities(self, msg_list, copy=True):
653 def feed_identities(self, msg_list, copy=True):
650 """Split the identities from the rest of the message.
654 """Split the identities from the rest of the message.
651
655
652 Feed until DELIM is reached, then return the prefix as idents and
656 Feed until DELIM is reached, then return the prefix as idents and
653 remainder as msg_list. This is easily broken by setting an IDENT to DELIM,
657 remainder as msg_list. This is easily broken by setting an IDENT to DELIM,
654 but that would be silly.
658 but that would be silly.
655
659
656 Parameters
660 Parameters
657 ----------
661 ----------
658 msg_list : a list of Message or bytes objects
662 msg_list : a list of Message or bytes objects
659 The message to be split.
663 The message to be split.
660 copy : bool
664 copy : bool
661 flag determining whether the arguments are bytes or Messages
665 flag determining whether the arguments are bytes or Messages
662
666
663 Returns
667 Returns
664 -------
668 -------
665 (idents, msg_list) : two lists
669 (idents, msg_list) : two lists
666 idents will always be a list of bytes, each of which is a ZMQ
670 idents will always be a list of bytes, each of which is a ZMQ
667 identity. msg_list will be a list of bytes or zmq.Messages of the
671 identity. msg_list will be a list of bytes or zmq.Messages of the
668 form [HMAC,p_header,p_parent,p_content,buffer1,buffer2,...] and
672 form [HMAC,p_header,p_parent,p_content,buffer1,buffer2,...] and
669 should be unpackable/unserializable via self.unserialize at this
673 should be unpackable/unserializable via self.unserialize at this
670 point.
674 point.
671 """
675 """
672 if copy:
676 if copy:
673 idx = msg_list.index(DELIM)
677 idx = msg_list.index(DELIM)
674 return msg_list[:idx], msg_list[idx+1:]
678 return msg_list[:idx], msg_list[idx+1:]
675 else:
679 else:
676 failed = True
680 failed = True
677 for idx,m in enumerate(msg_list):
681 for idx,m in enumerate(msg_list):
678 if m.bytes == DELIM:
682 if m.bytes == DELIM:
679 failed = False
683 failed = False
680 break
684 break
681 if failed:
685 if failed:
682 raise ValueError("DELIM not in msg_list")
686 raise ValueError("DELIM not in msg_list")
683 idents, msg_list = msg_list[:idx], msg_list[idx+1:]
687 idents, msg_list = msg_list[:idx], msg_list[idx+1:]
684 return [m.bytes for m in idents], msg_list
688 return [m.bytes for m in idents], msg_list
685
689
686 def unserialize(self, msg_list, content=True, copy=True):
690 def unserialize(self, msg_list, content=True, copy=True):
687 """Unserialize a msg_list to a nested message dict.
691 """Unserialize a msg_list to a nested message dict.
688
692
689 This is roughly the inverse of serialize. The serialize/unserialize
693 This is roughly the inverse of serialize. The serialize/unserialize
690 methods work with full message lists, whereas pack/unpack work with
694 methods work with full message lists, whereas pack/unpack work with
691 the individual message parts in the message list.
695 the individual message parts in the message list.
692
696
693 Parameters:
697 Parameters:
694 -----------
698 -----------
695 msg_list : list of bytes or Message objects
699 msg_list : list of bytes or Message objects
696 The list of message parts of the form [HMAC,p_header,p_parent,
700 The list of message parts of the form [HMAC,p_header,p_parent,
697 p_content,buffer1,buffer2,...].
701 p_content,buffer1,buffer2,...].
698 content : bool (True)
702 content : bool (True)
699 Whether to unpack the content dict (True), or leave it packed
703 Whether to unpack the content dict (True), or leave it packed
700 (False).
704 (False).
701 copy : bool (True)
705 copy : bool (True)
702 Whether to return the bytes (True), or the non-copying Message
706 Whether to return the bytes (True), or the non-copying Message
703 object in each place (False).
707 object in each place (False).
704
708
705 Returns
709 Returns
706 -------
710 -------
707 msg : dict
711 msg : dict
708 The nested message dict with top-level keys [header, parent_header,
712 The nested message dict with top-level keys [header, parent_header,
709 content, buffers].
713 content, buffers].
710 """
714 """
711 minlen = 4
715 minlen = 4
712 message = {}
716 message = {}
713 if not copy:
717 if not copy:
714 for i in range(minlen):
718 for i in range(minlen):
715 msg_list[i] = msg_list[i].bytes
719 msg_list[i] = msg_list[i].bytes
716 if self.auth is not None:
720 if self.auth is not None:
717 signature = msg_list[0]
721 signature = msg_list[0]
718 if not signature:
722 if not signature:
719 raise ValueError("Unsigned Message")
723 raise ValueError("Unsigned Message")
720 if signature in self.digest_history:
724 if signature in self.digest_history:
721 raise ValueError("Duplicate Signature: %r"%signature)
725 raise ValueError("Duplicate Signature: %r"%signature)
722 self.digest_history.add(signature)
726 self.digest_history.add(signature)
723 check = self.sign(msg_list[1:4])
727 check = self.sign(msg_list[1:4])
724 if not signature == check:
728 if not signature == check:
725 raise ValueError("Invalid Signature: %r"%signature)
729 raise ValueError("Invalid Signature: %r"%signature)
726 if not len(msg_list) >= minlen:
730 if not len(msg_list) >= minlen:
727 raise TypeError("malformed message, must have at least %i elements"%minlen)
731 raise TypeError("malformed message, must have at least %i elements"%minlen)
728 header = self.unpack(msg_list[1])
732 header = self.unpack(msg_list[1])
729 message['header'] = header
733 message['header'] = header
730 message['msg_id'] = header['msg_id']
734 message['msg_id'] = header['msg_id']
731 message['msg_type'] = header['msg_type']
735 message['msg_type'] = header['msg_type']
732 message['parent_header'] = self.unpack(msg_list[2])
736 message['parent_header'] = self.unpack(msg_list[2])
733 if content:
737 if content:
734 message['content'] = self.unpack(msg_list[3])
738 message['content'] = self.unpack(msg_list[3])
735 else:
739 else:
736 message['content'] = msg_list[3]
740 message['content'] = msg_list[3]
737
741
738 message['buffers'] = msg_list[4:]
742 message['buffers'] = msg_list[4:]
739 return message
743 return message
740
744
741 def test_msg2obj():
745 def test_msg2obj():
742 am = dict(x=1)
746 am = dict(x=1)
743 ao = Message(am)
747 ao = Message(am)
744 assert ao.x == am['x']
748 assert ao.x == am['x']
745
749
746 am['y'] = dict(z=1)
750 am['y'] = dict(z=1)
747 ao = Message(am)
751 ao = Message(am)
748 assert ao.y.z == am['y']['z']
752 assert ao.y.z == am['y']['z']
749
753
750 k1, k2 = 'y', 'z'
754 k1, k2 = 'y', 'z'
751 assert ao[k1][k2] == am[k1][k2]
755 assert ao[k1][k2] == am[k1][k2]
752
756
753 am2 = dict(ao)
757 am2 = dict(ao)
754 assert am['x'] == am2['x']
758 assert am['x'] == am2['x']
755 assert am['y']['z'] == am2['y']['z']
759 assert am['y']['z'] == am2['y']['z']
756
760
@@ -1,378 +1,368
1 .. _parallel_messages:
1 .. _parallel_messages:
2
2
3 Messaging for Parallel Computing
3 Messaging for Parallel Computing
4 ================================
4 ================================
5
5
6 This is an extension of the :ref:`messaging <messaging>` doc. Diagrams of the connections
6 This is an extension of the :ref:`messaging <messaging>` doc. Diagrams of the connections
7 can be found in the :ref:`parallel connections <parallel_connections>` doc.
7 can be found in the :ref:`parallel connections <parallel_connections>` doc.
8
8
9
9
10 ZMQ messaging is also used in the parallel computing IPython system. All messages to/from
10 ZMQ messaging is also used in the parallel computing IPython system. All messages to/from
11 kernels remain the same as the single kernel model, and are forwarded through a ZMQ Queue
11 kernels remain the same as the single kernel model, and are forwarded through a ZMQ Queue
12 device. The controller receives all messages and replies in these channels, and saves
12 device. The controller receives all messages and replies in these channels, and saves
13 results for future use.
13 results for future use.
14
14
15 The Controller
15 The Controller
16 --------------
16 --------------
17
17
18 The controller is the central collection of processes in the IPython parallel computing
18 The controller is the central collection of processes in the IPython parallel computing
19 model. It has two major components:
19 model. It has two major components:
20
20
21 * The Hub
21 * The Hub
22 * A collection of Schedulers
22 * A collection of Schedulers
23
23
24 The Hub
24 The Hub
25 -------
25 -------
26
26
27 The Hub is the central process for monitoring the state of the engines, and all task
27 The Hub is the central process for monitoring the state of the engines, and all task
28 requests and results. It has no role in execution and does no relay of messages, so
28 requests and results. It has no role in execution and does no relay of messages, so
29 large blocking requests or database actions in the Hub do not have the ability to impede
29 large blocking requests or database actions in the Hub do not have the ability to impede
30 job submission and results.
30 job submission and results.
31
31
32 Registration (``ROUTER``)
32 Registration (``ROUTER``)
33 ***********************
33 ***********************
34
34
35 The first function of the Hub is to facilitate and monitor connections of clients
35 The first function of the Hub is to facilitate and monitor connections of clients
36 and engines. Both client and engine registration are handled by the same socket, so only
36 and engines. Both client and engine registration are handled by the same socket, so only
37 one ip/port pair is needed to connect any number of connections and clients.
37 one ip/port pair is needed to connect any number of connections and clients.
38
38
39 Engines register with the ``zmq.IDENTITY`` of their two ``DEALER`` sockets, one for the
39 Engines register with the ``zmq.IDENTITY`` of their two ``DEALER`` sockets, one for the
40 queue, which receives execute requests, and one for the heartbeat, which is used to
40 queue, which receives execute requests, and one for the heartbeat, which is used to
41 monitor the survival of the Engine process.
41 monitor the survival of the Engine process.
42
42
43 Message type: ``registration_request``::
43 Message type: ``registration_request``::
44
44
45 content = {
45 content = {
46 'queue' : 'abcd-1234-...', # the MUX queue zmq.IDENTITY
46 'uuid' : 'abcd-1234-...', # the zmq.IDENTITY of the engine's sockets
47 'control' : 'abcd-1234-...', # the control queue zmq.IDENTITY
48 'heartbeat' : 'abcd-1234-...' # the heartbeat zmq.IDENTITY
49 }
47 }
50
48
51 .. note::
49 .. note::
52
50
53 these are always the same, at least for now.
51 these are always the same, at least for now.
54
52
55 The Controller replies to an Engine's registration request with the engine's integer ID,
53 The Controller replies to an Engine's registration request with the engine's integer ID,
56 and all the remaining connection information for connecting the heartbeat process, and
54 and all the remaining connection information for connecting the heartbeat process, and
57 kernel queue socket(s). The message status will be an error if the Engine requests IDs that
55 kernel queue socket(s). The message status will be an error if the Engine requests IDs that
58 already in use.
56 already in use.
59
57
60 Message type: ``registration_reply``::
58 Message type: ``registration_reply``::
61
59
62 content = {
60 content = {
63 'status' : 'ok', # or 'error'
61 'status' : 'ok', # or 'error'
64 # if ok:
62 # if ok:
65 'id' : 0, # int, the engine id
63 'id' : 0, # int, the engine id
66 'queue' : 'tcp://127.0.0.1:12345', # connection for engine side of the queue
67 'control' : 'tcp://...', # addr for control queue
68 'heartbeat' : ('tcp://...','tcp://...'), # tuple containing two interfaces needed for heartbeat
69 'task' : 'tcp://...', # addr for task queue, or None if no task queue running
70 }
64 }
71
65
72 Clients use the same socket as engines to start their connections. Connection requests
66 Clients use the same socket as engines to start their connections. Connection requests
73 from clients need no information:
67 from clients need no information:
74
68
75 Message type: ``connection_request``::
69 Message type: ``connection_request``::
76
70
77 content = {}
71 content = {}
78
72
79 The reply to a Client registration request contains the connection information for the
73 The reply to a Client registration request contains the connection information for the
80 multiplexer and load balanced queues, as well as the address for direct hub
74 multiplexer and load balanced queues, as well as the address for direct hub
81 queries. If any of these addresses is `None`, that functionality is not available.
75 queries. If any of these addresses is `None`, that functionality is not available.
82
76
83 Message type: ``connection_reply``::
77 Message type: ``connection_reply``::
84
78
85 content = {
79 content = {
86 'status' : 'ok', # or 'error'
80 'status' : 'ok', # or 'error'
87 # if ok:
88 'queue' : 'tcp://127.0.0.1:12345', # connection for client side of the MUX queue
89 'task' : ('lru','tcp...'), # routing scheme and addr for task queue (len 2 tuple)
90 'query' : 'tcp...', # addr for methods to query the hub, like queue_request, etc.
91 'control' : 'tcp...', # addr for control methods, like abort, etc.
92 }
81 }
93
82
94 Heartbeat
83 Heartbeat
95 *********
84 *********
96
85
97 The hub uses a heartbeat system to monitor engines, and track when they become
86 The hub uses a heartbeat system to monitor engines, and track when they become
98 unresponsive. As described in :ref:`messaging <messaging>`, and shown in :ref:`connections
87 unresponsive. As described in :ref:`messaging <messaging>`, and shown in :ref:`connections
99 <parallel_connections>`.
88 <parallel_connections>`.
100
89
101 Notification (``PUB``)
90 Notification (``PUB``)
102 **********************
91 **********************
103
92
104 The hub publishes all engine registration/unregistration events on a ``PUB`` socket.
93 The hub publishes all engine registration/unregistration events on a ``PUB`` socket.
105 This allows clients to have up-to-date engine ID sets without polling. Registration
94 This allows clients to have up-to-date engine ID sets without polling. Registration
106 notifications contain both the integer engine ID and the queue ID, which is necessary for
95 notifications contain both the integer engine ID and the queue ID, which is necessary for
107 sending messages via the Multiplexer Queue and Control Queues.
96 sending messages via the Multiplexer Queue and Control Queues.
108
97
109 Message type: ``registration_notification``::
98 Message type: ``registration_notification``::
110
99
111 content = {
100 content = {
112 'id' : 0, # engine ID that has been registered
101 'id' : 0, # engine ID that has been registered
113 'queue' : 'engine_id' # the IDENT for the engine's queue
102 'uuid' : 'engine_id' # the IDENT for the engine's sockets
114 }
103 }
115
104
116 Message type : ``unregistration_notification``::
105 Message type : ``unregistration_notification``::
117
106
118 content = {
107 content = {
119 'id' : 0 # engine ID that has been unregistered
108 'id' : 0 # engine ID that has been unregistered
109 'uuid' : 'engine_id' # the IDENT for the engine's sockets
120 }
110 }
121
111
122
112
123 Client Queries (``ROUTER``)
113 Client Queries (``ROUTER``)
124 *************************
114 *************************
125
115
126 The hub monitors and logs all queue traffic, so that clients can retrieve past
116 The hub monitors and logs all queue traffic, so that clients can retrieve past
127 results or monitor pending tasks. This information may reside in-memory on the Hub, or
117 results or monitor pending tasks. This information may reside in-memory on the Hub, or
128 on disk in a database (SQLite and MongoDB are currently supported). These requests are
118 on disk in a database (SQLite and MongoDB are currently supported). These requests are
129 handled by the same socket as registration.
119 handled by the same socket as registration.
130
120
131
121
132 :func:`queue_request` requests can specify multiple engines to query via the `targets`
122 :func:`queue_request` requests can specify multiple engines to query via the `targets`
133 element. A verbose flag can be passed, to determine whether the result should be the list
123 element. A verbose flag can be passed, to determine whether the result should be the list
134 of `msg_ids` in the queue or simply the length of each list.
124 of `msg_ids` in the queue or simply the length of each list.
135
125
136 Message type: ``queue_request``::
126 Message type: ``queue_request``::
137
127
138 content = {
128 content = {
139 'verbose' : True, # whether return should be lists themselves or just lens
129 'verbose' : True, # whether return should be lists themselves or just lens
140 'targets' : [0,3,1] # list of ints
130 'targets' : [0,3,1] # list of ints
141 }
131 }
142
132
143 The content of a reply to a :func:`queue_request` request is a dict, keyed by the engine
133 The content of a reply to a :func:`queue_request` request is a dict, keyed by the engine
144 IDs. Note that they will be the string representation of the integer keys, since JSON
134 IDs. Note that they will be the string representation of the integer keys, since JSON
145 cannot handle number keys. The three keys of each dict are::
135 cannot handle number keys. The three keys of each dict are::
146
136
147 'completed' : messages submitted via any queue that ran on the engine
137 'completed' : messages submitted via any queue that ran on the engine
148 'queue' : jobs submitted via MUX queue, whose results have not been received
138 'queue' : jobs submitted via MUX queue, whose results have not been received
149 'tasks' : tasks that are known to have been submitted to the engine, but
139 'tasks' : tasks that are known to have been submitted to the engine, but
150 have not completed. Note that with the pure zmq scheduler, this will
140 have not completed. Note that with the pure zmq scheduler, this will
151 always be 0/[].
141 always be 0/[].
152
142
153 Message type: ``queue_reply``::
143 Message type: ``queue_reply``::
154
144
155 content = {
145 content = {
156 'status' : 'ok', # or 'error'
146 'status' : 'ok', # or 'error'
157 # if verbose=False:
147 # if verbose=False:
158 '0' : {'completed' : 1, 'queue' : 7, 'tasks' : 0},
148 '0' : {'completed' : 1, 'queue' : 7, 'tasks' : 0},
159 # if verbose=True:
149 # if verbose=True:
160 '1' : {'completed' : ['abcd-...','1234-...'], 'queue' : ['58008-'], 'tasks' : []},
150 '1' : {'completed' : ['abcd-...','1234-...'], 'queue' : ['58008-'], 'tasks' : []},
161 }
151 }
162
152
163 Clients can request individual results directly from the hub. This is primarily for
153 Clients can request individual results directly from the hub. This is primarily for
164 gathering results of executions not submitted by the requesting client, as the client
154 gathering results of executions not submitted by the requesting client, as the client
165 will have all its own results already. Requests are made by msg_id, and can contain one or
155 will have all its own results already. Requests are made by msg_id, and can contain one or
166 more msg_id. An additional boolean key 'statusonly' can be used to not request the
156 more msg_id. An additional boolean key 'statusonly' can be used to not request the
167 results, but simply poll the status of the jobs.
157 results, but simply poll the status of the jobs.
168
158
169 Message type: ``result_request``::
159 Message type: ``result_request``::
170
160
171 content = {
161 content = {
172 'msg_ids' : ['uuid','...'], # list of strs
162 'msg_ids' : ['uuid','...'], # list of strs
173 'targets' : [1,2,3], # list of int ids or uuids
163 'targets' : [1,2,3], # list of int ids or uuids
174 'statusonly' : False, # bool
164 'statusonly' : False, # bool
175 }
165 }
176
166
177 The :func:`result_request` reply contains the content objects of the actual execution
167 The :func:`result_request` reply contains the content objects of the actual execution
178 reply messages. If `statusonly=True`, then there will be only the 'pending' and
168 reply messages. If `statusonly=True`, then there will be only the 'pending' and
179 'completed' lists.
169 'completed' lists.
180
170
181
171
182 Message type: ``result_reply``::
172 Message type: ``result_reply``::
183
173
184 content = {
174 content = {
185 'status' : 'ok', # else error
175 'status' : 'ok', # else error
186 # if ok:
176 # if ok:
187 'acbd-...' : msg, # the content dict is keyed by msg_ids,
177 'acbd-...' : msg, # the content dict is keyed by msg_ids,
188 # values are the result messages
178 # values are the result messages
189 # there will be none of these if `statusonly=True`
179 # there will be none of these if `statusonly=True`
190 'pending' : ['msg_id','...'], # msg_ids still pending
180 'pending' : ['msg_id','...'], # msg_ids still pending
191 'completed' : ['msg_id','...'], # list of completed msg_ids
181 'completed' : ['msg_id','...'], # list of completed msg_ids
192 }
182 }
193 buffers = ['bufs','...'] # the buffers that contained the results of the objects.
183 buffers = ['bufs','...'] # the buffers that contained the results of the objects.
194 # this will be empty if no messages are complete, or if
184 # this will be empty if no messages are complete, or if
195 # statusonly is True.
185 # statusonly is True.
196
186
197 For memory management purposes, Clients can also instruct the hub to forget the
187 For memory management purposes, Clients can also instruct the hub to forget the
198 results of messages. This can be done by message ID or engine ID. Individual messages are
188 results of messages. This can be done by message ID or engine ID. Individual messages are
199 dropped by msg_id, and all messages completed on an engine are dropped by engine ID. This
189 dropped by msg_id, and all messages completed on an engine are dropped by engine ID. This
200 may no longer be necessary with the mongodb-based message logging backend.
190 may no longer be necessary with the mongodb-based message logging backend.
201
191
202 If the msg_ids element is the string ``'all'`` instead of a list, then all completed
192 If the msg_ids element is the string ``'all'`` instead of a list, then all completed
203 results are forgotten.
193 results are forgotten.
204
194
205 Message type: ``purge_request``::
195 Message type: ``purge_request``::
206
196
207 content = {
197 content = {
208 'msg_ids' : ['id1', 'id2',...], # list of msg_ids or 'all'
198 'msg_ids' : ['id1', 'id2',...], # list of msg_ids or 'all'
209 'engine_ids' : [0,2,4] # list of engine IDs
199 'engine_ids' : [0,2,4] # list of engine IDs
210 }
200 }
211
201
212 The reply to a purge request is simply the status 'ok' if the request succeeded, or an
202 The reply to a purge request is simply the status 'ok' if the request succeeded, or an
213 explanation of why it failed, such as requesting the purge of a nonexistent or pending
203 explanation of why it failed, such as requesting the purge of a nonexistent or pending
214 message.
204 message.
215
205
216 Message type: ``purge_reply``::
206 Message type: ``purge_reply``::
217
207
218 content = {
208 content = {
219 'status' : 'ok', # or 'error'
209 'status' : 'ok', # or 'error'
220 }
210 }
221
211
222
212
223 Schedulers
213 Schedulers
224 ----------
214 ----------
225
215
226 There are three basic schedulers:
216 There are three basic schedulers:
227
217
228 * Task Scheduler
218 * Task Scheduler
229 * MUX Scheduler
219 * MUX Scheduler
230 * Control Scheduler
220 * Control Scheduler
231
221
232 The MUX and Control schedulers are simple MonitoredQueue ØMQ devices, with ``ROUTER``
222 The MUX and Control schedulers are simple MonitoredQueue ØMQ devices, with ``ROUTER``
233 sockets on either side. This allows the queue to relay individual messages to particular
223 sockets on either side. This allows the queue to relay individual messages to particular
234 targets via ``zmq.IDENTITY`` routing. The Task scheduler may be a MonitoredQueue ØMQ
224 targets via ``zmq.IDENTITY`` routing. The Task scheduler may be a MonitoredQueue ØMQ
235 device, in which case the client-facing socket is ``ROUTER``, and the engine-facing socket
225 device, in which case the client-facing socket is ``ROUTER``, and the engine-facing socket
236 is ``DEALER``. The result of this is that client-submitted messages are load-balanced via
226 is ``DEALER``. The result of this is that client-submitted messages are load-balanced via
237 the ``DEALER`` socket, but the engine's replies to each message go to the requesting client.
227 the ``DEALER`` socket, but the engine's replies to each message go to the requesting client.
238
228
239 Raw ``DEALER`` scheduling is quite primitive, and doesn't allow message introspection, so
229 Raw ``DEALER`` scheduling is quite primitive, and doesn't allow message introspection, so
240 there are also Python Schedulers that can be used. These Schedulers behave in much the
230 there are also Python Schedulers that can be used. These Schedulers behave in much the
241 same way as a MonitoredQueue does from the outside, but have rich internal logic to
231 same way as a MonitoredQueue does from the outside, but have rich internal logic to
242 determine destinations, as well as handle dependency graphs Their sockets are always
232 determine destinations, as well as handle dependency graphs Their sockets are always
243 ``ROUTER`` on both sides.
233 ``ROUTER`` on both sides.
244
234
245 The Python task schedulers have an additional message type, which informs the Hub of
235 The Python task schedulers have an additional message type, which informs the Hub of
246 the destination of a task as soon as that destination is known.
236 the destination of a task as soon as that destination is known.
247
237
248 Message type: ``task_destination``::
238 Message type: ``task_destination``::
249
239
250 content = {
240 content = {
251 'msg_id' : 'abcd-1234-...', # the msg's uuid
241 'msg_id' : 'abcd-1234-...', # the msg's uuid
252 'engine_id' : '1234-abcd-...', # the destination engine's zmq.IDENTITY
242 'engine_id' : '1234-abcd-...', # the destination engine's zmq.IDENTITY
253 }
243 }
254
244
255 :func:`apply` and :func:`apply_bound`
245 :func:`apply` and :func:`apply_bound`
256 *************************************
246 *************************************
257
247
258 In terms of message classes, the MUX scheduler and Task scheduler relay the exact same
248 In terms of message classes, the MUX scheduler and Task scheduler relay the exact same
259 message types. Their only difference lies in how the destination is selected.
249 message types. Their only difference lies in how the destination is selected.
260
250
261 The `Namespace <http://gist.github.com/483294>`_ model suggests that execution be able to
251 The `Namespace <http://gist.github.com/483294>`_ model suggests that execution be able to
262 use the model::
252 use the model::
263
253
264 ns.apply(f, *args, **kwargs)
254 ns.apply(f, *args, **kwargs)
265
255
266 which takes `f`, a function in the user's namespace, and executes ``f(*args, **kwargs)``
256 which takes `f`, a function in the user's namespace, and executes ``f(*args, **kwargs)``
267 on a remote engine, returning the result (or, for non-blocking, information facilitating
257 on a remote engine, returning the result (or, for non-blocking, information facilitating
268 later retrieval of the result). This model, unlike the execute message which just uses a
258 later retrieval of the result). This model, unlike the execute message which just uses a
269 code string, must be able to send arbitrary (pickleable) Python objects. And ideally, copy
259 code string, must be able to send arbitrary (pickleable) Python objects. And ideally, copy
270 as little data as we can. The `buffers` property of a Message was introduced for this
260 as little data as we can. The `buffers` property of a Message was introduced for this
271 purpose.
261 purpose.
272
262
273 Utility method :func:`build_apply_message` in :mod:`IPython.zmq.streamsession` wraps a
263 Utility method :func:`build_apply_message` in :mod:`IPython.zmq.streamsession` wraps a
274 function signature and builds a sendable buffer format for minimal data copying (exactly
264 function signature and builds a sendable buffer format for minimal data copying (exactly
275 zero copies of numpy array data or buffers or large strings).
265 zero copies of numpy array data or buffers or large strings).
276
266
277 Message type: ``apply_request``::
267 Message type: ``apply_request``::
278
268
279 content = {
269 content = {
280 'bound' : True, # whether to execute in the engine's namespace or unbound
270 'bound' : True, # whether to execute in the engine's namespace or unbound
281 'after' : ['msg_id',...], # list of msg_ids or output of Dependency.as_dict()
271 'after' : ['msg_id',...], # list of msg_ids or output of Dependency.as_dict()
282 'follow' : ['msg_id',...], # list of msg_ids or output of Dependency.as_dict()
272 'follow' : ['msg_id',...], # list of msg_ids or output of Dependency.as_dict()
283
273
284 }
274 }
285 buffers = ['...'] # at least 3 in length
275 buffers = ['...'] # at least 3 in length
286 # as built by build_apply_message(f,args,kwargs)
276 # as built by build_apply_message(f,args,kwargs)
287
277
288 after/follow represent task dependencies. 'after' corresponds to a time dependency. The
278 after/follow represent task dependencies. 'after' corresponds to a time dependency. The
289 request will not arrive at an engine until the 'after' dependency tasks have completed.
279 request will not arrive at an engine until the 'after' dependency tasks have completed.
290 'follow' corresponds to a location dependency. The task will be submitted to the same
280 'follow' corresponds to a location dependency. The task will be submitted to the same
291 engine as these msg_ids (see :class:`Dependency` docs for details).
281 engine as these msg_ids (see :class:`Dependency` docs for details).
292
282
293 Message type: ``apply_reply``::
283 Message type: ``apply_reply``::
294
284
295 content = {
285 content = {
296 'status' : 'ok' # 'ok' or 'error'
286 'status' : 'ok' # 'ok' or 'error'
297 # other error info here, as in other messages
287 # other error info here, as in other messages
298 }
288 }
299 buffers = ['...'] # either 1 or 2 in length
289 buffers = ['...'] # either 1 or 2 in length
300 # a serialization of the return value of f(*args,**kwargs)
290 # a serialization of the return value of f(*args,**kwargs)
301 # only populated if status is 'ok'
291 # only populated if status is 'ok'
302
292
303 All engine execution and data movement is performed via apply messages.
293 All engine execution and data movement is performed via apply messages.
304
294
305 Control Messages
295 Control Messages
306 ----------------
296 ----------------
307
297
308 Messages that interact with the engines, but are not meant to execute code, are submitted
298 Messages that interact with the engines, but are not meant to execute code, are submitted
309 via the Control queue. These messages have high priority, and are thus received and
299 via the Control queue. These messages have high priority, and are thus received and
310 handled before any execution requests.
300 handled before any execution requests.
311
301
312 Clients may want to clear the namespace on the engine. There are no arguments nor
302 Clients may want to clear the namespace on the engine. There are no arguments nor
313 information involved in this request, so the content is empty.
303 information involved in this request, so the content is empty.
314
304
315 Message type: ``clear_request``::
305 Message type: ``clear_request``::
316
306
317 content = {}
307 content = {}
318
308
319 Message type: ``clear_reply``::
309 Message type: ``clear_reply``::
320
310
321 content = {
311 content = {
322 'status' : 'ok' # 'ok' or 'error'
312 'status' : 'ok' # 'ok' or 'error'
323 # other error info here, as in other messages
313 # other error info here, as in other messages
324 }
314 }
325
315
326 Clients may want to abort tasks that have not yet run. This can by done by message id, or
316 Clients may want to abort tasks that have not yet run. This can by done by message id, or
327 all enqueued messages can be aborted if None is specified.
317 all enqueued messages can be aborted if None is specified.
328
318
329 Message type: ``abort_request``::
319 Message type: ``abort_request``::
330
320
331 content = {
321 content = {
332 'msg_ids' : ['1234-...', '...'] # list of msg_ids or None
322 'msg_ids' : ['1234-...', '...'] # list of msg_ids or None
333 }
323 }
334
324
335 Message type: ``abort_reply``::
325 Message type: ``abort_reply``::
336
326
337 content = {
327 content = {
338 'status' : 'ok' # 'ok' or 'error'
328 'status' : 'ok' # 'ok' or 'error'
339 # other error info here, as in other messages
329 # other error info here, as in other messages
340 }
330 }
341
331
342 The last action a client may want to do is shutdown the kernel. If a kernel receives a
332 The last action a client may want to do is shutdown the kernel. If a kernel receives a
343 shutdown request, then it aborts all queued messages, replies to the request, and exits.
333 shutdown request, then it aborts all queued messages, replies to the request, and exits.
344
334
345 Message type: ``shutdown_request``::
335 Message type: ``shutdown_request``::
346
336
347 content = {}
337 content = {}
348
338
349 Message type: ``shutdown_reply``::
339 Message type: ``shutdown_reply``::
350
340
351 content = {
341 content = {
352 'status' : 'ok' # 'ok' or 'error'
342 'status' : 'ok' # 'ok' or 'error'
353 # other error info here, as in other messages
343 # other error info here, as in other messages
354 }
344 }
355
345
356
346
357 Implementation
347 Implementation
358 --------------
348 --------------
359
349
360 There are a few differences in implementation between the `StreamSession` object used in
350 There are a few differences in implementation between the `StreamSession` object used in
361 the newparallel branch and the `Session` object, the main one being that messages are
351 the newparallel branch and the `Session` object, the main one being that messages are
362 sent in parts, rather than as a single serialized object. `StreamSession` objects also
352 sent in parts, rather than as a single serialized object. `StreamSession` objects also
363 take pack/unpack functions, which are to be used when serializing/deserializing objects.
353 take pack/unpack functions, which are to be used when serializing/deserializing objects.
364 These can be any functions that translate to/from formats that ZMQ sockets can send
354 These can be any functions that translate to/from formats that ZMQ sockets can send
365 (buffers,bytes, etc.).
355 (buffers,bytes, etc.).
366
356
367 Split Sends
357 Split Sends
368 ***********
358 ***********
369
359
370 Previously, messages were bundled as a single json object and one call to
360 Previously, messages were bundled as a single json object and one call to
371 :func:`socket.send_json`. Since the hub inspects all messages, and doesn't need to
361 :func:`socket.send_json`. Since the hub inspects all messages, and doesn't need to
372 see the content of the messages, which can be large, messages are now serialized and sent in
362 see the content of the messages, which can be large, messages are now serialized and sent in
373 pieces. All messages are sent in at least 3 parts: the header, the parent header, and the
363 pieces. All messages are sent in at least 3 parts: the header, the parent header, and the
374 content. This allows the controller to unpack and inspect the (always small) header,
364 content. This allows the controller to unpack and inspect the (always small) header,
375 without spending time unpacking the content unless the message is bound for the
365 without spending time unpacking the content unless the message is bound for the
376 controller. Buffers are added on to the end of the message, and can be any objects that
366 controller. Buffers are added on to the end of the message, and can be any objects that
377 present the buffer interface.
367 present the buffer interface.
378
368
General Comments 0
You need to be logged in to leave comments. Login now