##// END OF EJS Templates
use ROUTER/DEALER socket names instead of XREP/XREQ...
MinRK -
Show More
@@ -1,46 +1,40 b''
1 """The IPython ZMQ-based parallel computing interface.
1 """The IPython ZMQ-based parallel computing interface.
2
2
3 Authors:
3 Authors:
4
4
5 * MinRK
5 * MinRK
6 """
6 """
7 #-----------------------------------------------------------------------------
7 #-----------------------------------------------------------------------------
8 # Copyright (C) 2011 The IPython Development Team
8 # Copyright (C) 2011 The IPython Development Team
9 #
9 #
10 # Distributed under the terms of the BSD License. The full license is in
10 # Distributed under the terms of the BSD License. The full license is in
11 # the file COPYING, distributed as part of this software.
11 # the file COPYING, distributed as part of this software.
12 #-----------------------------------------------------------------------------
12 #-----------------------------------------------------------------------------
13
13
14 #-----------------------------------------------------------------------------
14 #-----------------------------------------------------------------------------
15 # Imports
15 # Imports
16 #-----------------------------------------------------------------------------
16 #-----------------------------------------------------------------------------
17
17
18 import os
18 import os
19 import warnings
19 import warnings
20
20
21 import zmq
21 import zmq
22
22
23 from IPython.zmq import check_for_zmq
23
24
24 if os.name == 'nt':
25 if os.name == 'nt':
25 if zmq.__version__ < '2.1.7':
26 min_pyzmq = '2.1.7'
26 raise ImportError("IPython.parallel requires pyzmq/0MQ >= 2.1.7 on Windows, "
27 else:
27 "and you appear to have %s"%zmq.__version__)
28 min_pyzmq = '2.1.4'
28 elif zmq.__version__ < '2.1.4':
29 raise ImportError("IPython.parallel requires pyzmq/0MQ >= 2.1.4, you appear to have %s"%zmq.__version__)
30
31 if zmq.zmq_version() >= '3.0.0':
32 warnings.warn("""libzmq 3 detected.
33 It is unlikely that IPython's zmq code will work properly.
34 Please install libzmq stable, which is 2.1.x or 2.2.x""",
35 RuntimeWarning)
36
29
30 check_for_zmq(min_pyzmq, 'IPython.parallel')
37
31
38 from IPython.utils.pickleutil import Reference
32 from IPython.utils.pickleutil import Reference
39
33
40 from .client.asyncresult import *
34 from .client.asyncresult import *
41 from .client.client import Client
35 from .client.client import Client
42 from .client.remotefunction import *
36 from .client.remotefunction import *
43 from .client.view import *
37 from .client.view import *
44 from .controller.dependency import *
38 from .controller.dependency import *
45
39
46
40
@@ -1,441 +1,441 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 # encoding: utf-8
2 # encoding: utf-8
3 """
3 """
4 The IPython controller application.
4 The IPython controller application.
5
5
6 Authors:
6 Authors:
7
7
8 * Brian Granger
8 * Brian Granger
9 * MinRK
9 * MinRK
10
10
11 """
11 """
12
12
13 #-----------------------------------------------------------------------------
13 #-----------------------------------------------------------------------------
14 # Copyright (C) 2008-2011 The IPython Development Team
14 # Copyright (C) 2008-2011 The IPython Development Team
15 #
15 #
16 # Distributed under the terms of the BSD License. The full license is in
16 # Distributed under the terms of the BSD License. The full license is in
17 # the file COPYING, distributed as part of this software.
17 # the file COPYING, distributed as part of this software.
18 #-----------------------------------------------------------------------------
18 #-----------------------------------------------------------------------------
19
19
20 #-----------------------------------------------------------------------------
20 #-----------------------------------------------------------------------------
21 # Imports
21 # Imports
22 #-----------------------------------------------------------------------------
22 #-----------------------------------------------------------------------------
23
23
24 from __future__ import with_statement
24 from __future__ import with_statement
25
25
26 import os
26 import os
27 import socket
27 import socket
28 import stat
28 import stat
29 import sys
29 import sys
30 import uuid
30 import uuid
31
31
32 from multiprocessing import Process
32 from multiprocessing import Process
33
33
34 import zmq
34 import zmq
35 from zmq.devices import ProcessMonitoredQueue
35 from zmq.devices import ProcessMonitoredQueue
36 from zmq.log.handlers import PUBHandler
36 from zmq.log.handlers import PUBHandler
37 from zmq.utils import jsonapi as json
37 from zmq.utils import jsonapi as json
38
38
39 from IPython.config.application import boolean_flag
39 from IPython.config.application import boolean_flag
40 from IPython.core.profiledir import ProfileDir
40 from IPython.core.profiledir import ProfileDir
41
41
42 from IPython.parallel.apps.baseapp import (
42 from IPython.parallel.apps.baseapp import (
43 BaseParallelApplication,
43 BaseParallelApplication,
44 base_aliases,
44 base_aliases,
45 base_flags,
45 base_flags,
46 )
46 )
47 from IPython.utils.importstring import import_item
47 from IPython.utils.importstring import import_item
48 from IPython.utils.traitlets import Instance, Unicode, Bool, List, Dict
48 from IPython.utils.traitlets import Instance, Unicode, Bool, List, Dict
49
49
50 # from IPython.parallel.controller.controller import ControllerFactory
50 # from IPython.parallel.controller.controller import ControllerFactory
51 from IPython.zmq.session import Session
51 from IPython.zmq.session import Session
52 from IPython.parallel.controller.heartmonitor import HeartMonitor
52 from IPython.parallel.controller.heartmonitor import HeartMonitor
53 from IPython.parallel.controller.hub import HubFactory
53 from IPython.parallel.controller.hub import HubFactory
54 from IPython.parallel.controller.scheduler import TaskScheduler,launch_scheduler
54 from IPython.parallel.controller.scheduler import TaskScheduler,launch_scheduler
55 from IPython.parallel.controller.sqlitedb import SQLiteDB
55 from IPython.parallel.controller.sqlitedb import SQLiteDB
56
56
57 from IPython.parallel.util import signal_children, split_url, asbytes
57 from IPython.parallel.util import signal_children, split_url, asbytes
58
58
59 # conditional import of MongoDB backend class
59 # conditional import of MongoDB backend class
60
60
61 try:
61 try:
62 from IPython.parallel.controller.mongodb import MongoDB
62 from IPython.parallel.controller.mongodb import MongoDB
63 except ImportError:
63 except ImportError:
64 maybe_mongo = []
64 maybe_mongo = []
65 else:
65 else:
66 maybe_mongo = [MongoDB]
66 maybe_mongo = [MongoDB]
67
67
68
68
69 #-----------------------------------------------------------------------------
69 #-----------------------------------------------------------------------------
70 # Module level variables
70 # Module level variables
71 #-----------------------------------------------------------------------------
71 #-----------------------------------------------------------------------------
72
72
73
73
74 #: The default config file name for this application
74 #: The default config file name for this application
75 default_config_file_name = u'ipcontroller_config.py'
75 default_config_file_name = u'ipcontroller_config.py'
76
76
77
77
78 _description = """Start the IPython controller for parallel computing.
78 _description = """Start the IPython controller for parallel computing.
79
79
80 The IPython controller provides a gateway between the IPython engines and
80 The IPython controller provides a gateway between the IPython engines and
81 clients. The controller needs to be started before the engines and can be
81 clients. The controller needs to be started before the engines and can be
82 configured using command line options or using a cluster directory. Cluster
82 configured using command line options or using a cluster directory. Cluster
83 directories contain config, log and security files and are usually located in
83 directories contain config, log and security files and are usually located in
84 your ipython directory and named as "profile_name". See the `profile`
84 your ipython directory and named as "profile_name". See the `profile`
85 and `profile-dir` options for details.
85 and `profile-dir` options for details.
86 """
86 """
87
87
88 _examples = """
88 _examples = """
89 ipcontroller --ip=192.168.0.1 --port=1000 # listen on ip, port for engines
89 ipcontroller --ip=192.168.0.1 --port=1000 # listen on ip, port for engines
90 ipcontroller --scheme=pure # use the pure zeromq scheduler
90 ipcontroller --scheme=pure # use the pure zeromq scheduler
91 """
91 """
92
92
93
93
94 #-----------------------------------------------------------------------------
94 #-----------------------------------------------------------------------------
95 # The main application
95 # The main application
96 #-----------------------------------------------------------------------------
96 #-----------------------------------------------------------------------------
97 flags = {}
97 flags = {}
98 flags.update(base_flags)
98 flags.update(base_flags)
99 flags.update({
99 flags.update({
100 'usethreads' : ( {'IPControllerApp' : {'use_threads' : True}},
100 'usethreads' : ( {'IPControllerApp' : {'use_threads' : True}},
101 'Use threads instead of processes for the schedulers'),
101 'Use threads instead of processes for the schedulers'),
102 'sqlitedb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.sqlitedb.SQLiteDB'}},
102 'sqlitedb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.sqlitedb.SQLiteDB'}},
103 'use the SQLiteDB backend'),
103 'use the SQLiteDB backend'),
104 'mongodb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.mongodb.MongoDB'}},
104 'mongodb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.mongodb.MongoDB'}},
105 'use the MongoDB backend'),
105 'use the MongoDB backend'),
106 'dictdb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.dictdb.DictDB'}},
106 'dictdb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.dictdb.DictDB'}},
107 'use the in-memory DictDB backend'),
107 'use the in-memory DictDB backend'),
108 'reuse' : ({'IPControllerApp' : {'reuse_files' : True}},
108 'reuse' : ({'IPControllerApp' : {'reuse_files' : True}},
109 'reuse existing json connection files')
109 'reuse existing json connection files')
110 })
110 })
111
111
112 flags.update(boolean_flag('secure', 'IPControllerApp.secure',
112 flags.update(boolean_flag('secure', 'IPControllerApp.secure',
113 "Use HMAC digests for authentication of messages.",
113 "Use HMAC digests for authentication of messages.",
114 "Don't authenticate messages."
114 "Don't authenticate messages."
115 ))
115 ))
116 aliases = dict(
116 aliases = dict(
117 secure = 'IPControllerApp.secure',
117 secure = 'IPControllerApp.secure',
118 ssh = 'IPControllerApp.ssh_server',
118 ssh = 'IPControllerApp.ssh_server',
119 enginessh = 'IPControllerApp.engine_ssh_server',
119 enginessh = 'IPControllerApp.engine_ssh_server',
120 location = 'IPControllerApp.location',
120 location = 'IPControllerApp.location',
121
121
122 ident = 'Session.session',
122 ident = 'Session.session',
123 user = 'Session.username',
123 user = 'Session.username',
124 keyfile = 'Session.keyfile',
124 keyfile = 'Session.keyfile',
125
125
126 url = 'HubFactory.url',
126 url = 'HubFactory.url',
127 ip = 'HubFactory.ip',
127 ip = 'HubFactory.ip',
128 transport = 'HubFactory.transport',
128 transport = 'HubFactory.transport',
129 port = 'HubFactory.regport',
129 port = 'HubFactory.regport',
130
130
131 ping = 'HeartMonitor.period',
131 ping = 'HeartMonitor.period',
132
132
133 scheme = 'TaskScheduler.scheme_name',
133 scheme = 'TaskScheduler.scheme_name',
134 hwm = 'TaskScheduler.hwm',
134 hwm = 'TaskScheduler.hwm',
135 )
135 )
136 aliases.update(base_aliases)
136 aliases.update(base_aliases)
137
137
138
138
139 class IPControllerApp(BaseParallelApplication):
139 class IPControllerApp(BaseParallelApplication):
140
140
141 name = u'ipcontroller'
141 name = u'ipcontroller'
142 description = _description
142 description = _description
143 examples = _examples
143 examples = _examples
144 config_file_name = Unicode(default_config_file_name)
144 config_file_name = Unicode(default_config_file_name)
145 classes = [ProfileDir, Session, HubFactory, TaskScheduler, HeartMonitor, SQLiteDB] + maybe_mongo
145 classes = [ProfileDir, Session, HubFactory, TaskScheduler, HeartMonitor, SQLiteDB] + maybe_mongo
146
146
147 # change default to True
147 # change default to True
148 auto_create = Bool(True, config=True,
148 auto_create = Bool(True, config=True,
149 help="""Whether to create profile dir if it doesn't exist.""")
149 help="""Whether to create profile dir if it doesn't exist.""")
150
150
151 reuse_files = Bool(False, config=True,
151 reuse_files = Bool(False, config=True,
152 help='Whether to reuse existing json connection files.'
152 help='Whether to reuse existing json connection files.'
153 )
153 )
154 secure = Bool(True, config=True,
154 secure = Bool(True, config=True,
155 help='Whether to use HMAC digests for extra message authentication.'
155 help='Whether to use HMAC digests for extra message authentication.'
156 )
156 )
157 ssh_server = Unicode(u'', config=True,
157 ssh_server = Unicode(u'', config=True,
158 help="""ssh url for clients to use when connecting to the Controller
158 help="""ssh url for clients to use when connecting to the Controller
159 processes. It should be of the form: [user@]server[:port]. The
159 processes. It should be of the form: [user@]server[:port]. The
160 Controller's listening addresses must be accessible from the ssh server""",
160 Controller's listening addresses must be accessible from the ssh server""",
161 )
161 )
162 engine_ssh_server = Unicode(u'', config=True,
162 engine_ssh_server = Unicode(u'', config=True,
163 help="""ssh url for engines to use when connecting to the Controller
163 help="""ssh url for engines to use when connecting to the Controller
164 processes. It should be of the form: [user@]server[:port]. The
164 processes. It should be of the form: [user@]server[:port]. The
165 Controller's listening addresses must be accessible from the ssh server""",
165 Controller's listening addresses must be accessible from the ssh server""",
166 )
166 )
167 location = Unicode(u'', config=True,
167 location = Unicode(u'', config=True,
168 help="""The external IP or domain name of the Controller, used for disambiguating
168 help="""The external IP or domain name of the Controller, used for disambiguating
169 engine and client connections.""",
169 engine and client connections.""",
170 )
170 )
171 import_statements = List([], config=True,
171 import_statements = List([], config=True,
172 help="import statements to be run at startup. Necessary in some environments"
172 help="import statements to be run at startup. Necessary in some environments"
173 )
173 )
174
174
175 use_threads = Bool(False, config=True,
175 use_threads = Bool(False, config=True,
176 help='Use threads instead of processes for the schedulers',
176 help='Use threads instead of processes for the schedulers',
177 )
177 )
178
178
179 # internal
179 # internal
180 children = List()
180 children = List()
181 mq_class = Unicode('zmq.devices.ProcessMonitoredQueue')
181 mq_class = Unicode('zmq.devices.ProcessMonitoredQueue')
182
182
183 def _use_threads_changed(self, name, old, new):
183 def _use_threads_changed(self, name, old, new):
184 self.mq_class = 'zmq.devices.%sMonitoredQueue'%('Thread' if new else 'Process')
184 self.mq_class = 'zmq.devices.%sMonitoredQueue'%('Thread' if new else 'Process')
185
185
186 aliases = Dict(aliases)
186 aliases = Dict(aliases)
187 flags = Dict(flags)
187 flags = Dict(flags)
188
188
189
189
190 def save_connection_dict(self, fname, cdict):
190 def save_connection_dict(self, fname, cdict):
191 """save a connection dict to json file."""
191 """save a connection dict to json file."""
192 c = self.config
192 c = self.config
193 url = cdict['url']
193 url = cdict['url']
194 location = cdict['location']
194 location = cdict['location']
195 if not location:
195 if not location:
196 try:
196 try:
197 proto,ip,port = split_url(url)
197 proto,ip,port = split_url(url)
198 except AssertionError:
198 except AssertionError:
199 pass
199 pass
200 else:
200 else:
201 try:
201 try:
202 location = socket.gethostbyname_ex(socket.gethostname())[2][-1]
202 location = socket.gethostbyname_ex(socket.gethostname())[2][-1]
203 except (socket.gaierror, IndexError):
203 except (socket.gaierror, IndexError):
204 self.log.warn("Could not identify this machine's IP, assuming 127.0.0.1."
204 self.log.warn("Could not identify this machine's IP, assuming 127.0.0.1."
205 " You may need to specify '--location=<external_ip_address>' to help"
205 " You may need to specify '--location=<external_ip_address>' to help"
206 " IPython decide when to connect via loopback.")
206 " IPython decide when to connect via loopback.")
207 location = '127.0.0.1'
207 location = '127.0.0.1'
208 cdict['location'] = location
208 cdict['location'] = location
209 fname = os.path.join(self.profile_dir.security_dir, fname)
209 fname = os.path.join(self.profile_dir.security_dir, fname)
210 with open(fname, 'wb') as f:
210 with open(fname, 'wb') as f:
211 f.write(json.dumps(cdict, indent=2))
211 f.write(json.dumps(cdict, indent=2))
212 os.chmod(fname, stat.S_IRUSR|stat.S_IWUSR)
212 os.chmod(fname, stat.S_IRUSR|stat.S_IWUSR)
213
213
214 def load_config_from_json(self):
214 def load_config_from_json(self):
215 """load config from existing json connector files."""
215 """load config from existing json connector files."""
216 c = self.config
216 c = self.config
217 # load from engine config
217 # load from engine config
218 with open(os.path.join(self.profile_dir.security_dir, 'ipcontroller-engine.json')) as f:
218 with open(os.path.join(self.profile_dir.security_dir, 'ipcontroller-engine.json')) as f:
219 cfg = json.loads(f.read())
219 cfg = json.loads(f.read())
220 key = c.Session.key = asbytes(cfg['exec_key'])
220 key = c.Session.key = asbytes(cfg['exec_key'])
221 xport,addr = cfg['url'].split('://')
221 xport,addr = cfg['url'].split('://')
222 c.HubFactory.engine_transport = xport
222 c.HubFactory.engine_transport = xport
223 ip,ports = addr.split(':')
223 ip,ports = addr.split(':')
224 c.HubFactory.engine_ip = ip
224 c.HubFactory.engine_ip = ip
225 c.HubFactory.regport = int(ports)
225 c.HubFactory.regport = int(ports)
226 self.location = cfg['location']
226 self.location = cfg['location']
227 if not self.engine_ssh_server:
227 if not self.engine_ssh_server:
228 self.engine_ssh_server = cfg['ssh']
228 self.engine_ssh_server = cfg['ssh']
229 # load client config
229 # load client config
230 with open(os.path.join(self.profile_dir.security_dir, 'ipcontroller-client.json')) as f:
230 with open(os.path.join(self.profile_dir.security_dir, 'ipcontroller-client.json')) as f:
231 cfg = json.loads(f.read())
231 cfg = json.loads(f.read())
232 assert key == cfg['exec_key'], "exec_key mismatch between engine and client keys"
232 assert key == cfg['exec_key'], "exec_key mismatch between engine and client keys"
233 xport,addr = cfg['url'].split('://')
233 xport,addr = cfg['url'].split('://')
234 c.HubFactory.client_transport = xport
234 c.HubFactory.client_transport = xport
235 ip,ports = addr.split(':')
235 ip,ports = addr.split(':')
236 c.HubFactory.client_ip = ip
236 c.HubFactory.client_ip = ip
237 if not self.ssh_server:
237 if not self.ssh_server:
238 self.ssh_server = cfg['ssh']
238 self.ssh_server = cfg['ssh']
239 assert int(ports) == c.HubFactory.regport, "regport mismatch"
239 assert int(ports) == c.HubFactory.regport, "regport mismatch"
240
240
241 def init_hub(self):
241 def init_hub(self):
242 c = self.config
242 c = self.config
243
243
244 self.do_import_statements()
244 self.do_import_statements()
245 reusing = self.reuse_files
245 reusing = self.reuse_files
246 if reusing:
246 if reusing:
247 try:
247 try:
248 self.load_config_from_json()
248 self.load_config_from_json()
249 except (AssertionError,IOError):
249 except (AssertionError,IOError):
250 reusing=False
250 reusing=False
251 # check again, because reusing may have failed:
251 # check again, because reusing may have failed:
252 if reusing:
252 if reusing:
253 pass
253 pass
254 elif self.secure:
254 elif self.secure:
255 key = str(uuid.uuid4())
255 key = str(uuid.uuid4())
256 # keyfile = os.path.join(self.profile_dir.security_dir, self.exec_key)
256 # keyfile = os.path.join(self.profile_dir.security_dir, self.exec_key)
257 # with open(keyfile, 'w') as f:
257 # with open(keyfile, 'w') as f:
258 # f.write(key)
258 # f.write(key)
259 # os.chmod(keyfile, stat.S_IRUSR|stat.S_IWUSR)
259 # os.chmod(keyfile, stat.S_IRUSR|stat.S_IWUSR)
260 c.Session.key = asbytes(key)
260 c.Session.key = asbytes(key)
261 else:
261 else:
262 key = c.Session.key = b''
262 key = c.Session.key = b''
263
263
264 try:
264 try:
265 self.factory = HubFactory(config=c, log=self.log)
265 self.factory = HubFactory(config=c, log=self.log)
266 # self.start_logging()
266 # self.start_logging()
267 self.factory.init_hub()
267 self.factory.init_hub()
268 except:
268 except:
269 self.log.error("Couldn't construct the Controller", exc_info=True)
269 self.log.error("Couldn't construct the Controller", exc_info=True)
270 self.exit(1)
270 self.exit(1)
271
271
272 if not reusing:
272 if not reusing:
273 # save to new json config files
273 # save to new json config files
274 f = self.factory
274 f = self.factory
275 cdict = {'exec_key' : key,
275 cdict = {'exec_key' : key,
276 'ssh' : self.ssh_server,
276 'ssh' : self.ssh_server,
277 'url' : "%s://%s:%s"%(f.client_transport, f.client_ip, f.regport),
277 'url' : "%s://%s:%s"%(f.client_transport, f.client_ip, f.regport),
278 'location' : self.location
278 'location' : self.location
279 }
279 }
280 self.save_connection_dict('ipcontroller-client.json', cdict)
280 self.save_connection_dict('ipcontroller-client.json', cdict)
281 edict = cdict
281 edict = cdict
282 edict['url']="%s://%s:%s"%((f.client_transport, f.client_ip, f.regport))
282 edict['url']="%s://%s:%s"%((f.client_transport, f.client_ip, f.regport))
283 edict['ssh'] = self.engine_ssh_server
283 edict['ssh'] = self.engine_ssh_server
284 self.save_connection_dict('ipcontroller-engine.json', edict)
284 self.save_connection_dict('ipcontroller-engine.json', edict)
285
285
286 #
286 #
287 def init_schedulers(self):
287 def init_schedulers(self):
288 children = self.children
288 children = self.children
289 mq = import_item(str(self.mq_class))
289 mq = import_item(str(self.mq_class))
290
290
291 hub = self.factory
291 hub = self.factory
292 # maybe_inproc = 'inproc://monitor' if self.use_threads else self.monitor_url
292 # maybe_inproc = 'inproc://monitor' if self.use_threads else self.monitor_url
293 # IOPub relay (in a Process)
293 # IOPub relay (in a Process)
294 q = mq(zmq.PUB, zmq.SUB, zmq.PUB, b'N/A',b'iopub')
294 q = mq(zmq.PUB, zmq.SUB, zmq.PUB, b'N/A',b'iopub')
295 q.bind_in(hub.client_info['iopub'])
295 q.bind_in(hub.client_info['iopub'])
296 q.bind_out(hub.engine_info['iopub'])
296 q.bind_out(hub.engine_info['iopub'])
297 q.setsockopt_out(zmq.SUBSCRIBE, b'')
297 q.setsockopt_out(zmq.SUBSCRIBE, b'')
298 q.connect_mon(hub.monitor_url)
298 q.connect_mon(hub.monitor_url)
299 q.daemon=True
299 q.daemon=True
300 children.append(q)
300 children.append(q)
301
301
302 # Multiplexer Queue (in a Process)
302 # Multiplexer Queue (in a Process)
303 q = mq(zmq.XREP, zmq.XREP, zmq.PUB, b'in', b'out')
303 q = mq(zmq.ROUTER, zmq.ROUTER, zmq.PUB, b'in', b'out')
304 q.bind_in(hub.client_info['mux'])
304 q.bind_in(hub.client_info['mux'])
305 q.setsockopt_in(zmq.IDENTITY, b'mux')
305 q.setsockopt_in(zmq.IDENTITY, b'mux')
306 q.bind_out(hub.engine_info['mux'])
306 q.bind_out(hub.engine_info['mux'])
307 q.connect_mon(hub.monitor_url)
307 q.connect_mon(hub.monitor_url)
308 q.daemon=True
308 q.daemon=True
309 children.append(q)
309 children.append(q)
310
310
311 # Control Queue (in a Process)
311 # Control Queue (in a Process)
312 q = mq(zmq.XREP, zmq.XREP, zmq.PUB, b'incontrol', b'outcontrol')
312 q = mq(zmq.ROUTER, zmq.ROUTER, zmq.PUB, b'incontrol', b'outcontrol')
313 q.bind_in(hub.client_info['control'])
313 q.bind_in(hub.client_info['control'])
314 q.setsockopt_in(zmq.IDENTITY, b'control')
314 q.setsockopt_in(zmq.IDENTITY, b'control')
315 q.bind_out(hub.engine_info['control'])
315 q.bind_out(hub.engine_info['control'])
316 q.connect_mon(hub.monitor_url)
316 q.connect_mon(hub.monitor_url)
317 q.daemon=True
317 q.daemon=True
318 children.append(q)
318 children.append(q)
319 try:
319 try:
320 scheme = self.config.TaskScheduler.scheme_name
320 scheme = self.config.TaskScheduler.scheme_name
321 except AttributeError:
321 except AttributeError:
322 scheme = TaskScheduler.scheme_name.get_default_value()
322 scheme = TaskScheduler.scheme_name.get_default_value()
323 # Task Queue (in a Process)
323 # Task Queue (in a Process)
324 if scheme == 'pure':
324 if scheme == 'pure':
325 self.log.warn("task::using pure XREQ Task scheduler")
325 self.log.warn("task::using pure XREQ Task scheduler")
326 q = mq(zmq.XREP, zmq.XREQ, zmq.PUB, b'intask', b'outtask')
326 q = mq(zmq.ROUTER, zmq.DEALER, zmq.PUB, b'intask', b'outtask')
327 # q.setsockopt_out(zmq.HWM, hub.hwm)
327 # q.setsockopt_out(zmq.HWM, hub.hwm)
328 q.bind_in(hub.client_info['task'][1])
328 q.bind_in(hub.client_info['task'][1])
329 q.setsockopt_in(zmq.IDENTITY, b'task')
329 q.setsockopt_in(zmq.IDENTITY, b'task')
330 q.bind_out(hub.engine_info['task'])
330 q.bind_out(hub.engine_info['task'])
331 q.connect_mon(hub.monitor_url)
331 q.connect_mon(hub.monitor_url)
332 q.daemon=True
332 q.daemon=True
333 children.append(q)
333 children.append(q)
334 elif scheme == 'none':
334 elif scheme == 'none':
335 self.log.warn("task::using no Task scheduler")
335 self.log.warn("task::using no Task scheduler")
336
336
337 else:
337 else:
338 self.log.info("task::using Python %s Task scheduler"%scheme)
338 self.log.info("task::using Python %s Task scheduler"%scheme)
339 sargs = (hub.client_info['task'][1], hub.engine_info['task'],
339 sargs = (hub.client_info['task'][1], hub.engine_info['task'],
340 hub.monitor_url, hub.client_info['notification'])
340 hub.monitor_url, hub.client_info['notification'])
341 kwargs = dict(logname='scheduler', loglevel=self.log_level,
341 kwargs = dict(logname='scheduler', loglevel=self.log_level,
342 log_url = self.log_url, config=dict(self.config))
342 log_url = self.log_url, config=dict(self.config))
343 if 'Process' in self.mq_class:
343 if 'Process' in self.mq_class:
344 # run the Python scheduler in a Process
344 # run the Python scheduler in a Process
345 q = Process(target=launch_scheduler, args=sargs, kwargs=kwargs)
345 q = Process(target=launch_scheduler, args=sargs, kwargs=kwargs)
346 q.daemon=True
346 q.daemon=True
347 children.append(q)
347 children.append(q)
348 else:
348 else:
349 # single-threaded Controller
349 # single-threaded Controller
350 kwargs['in_thread'] = True
350 kwargs['in_thread'] = True
351 launch_scheduler(*sargs, **kwargs)
351 launch_scheduler(*sargs, **kwargs)
352
352
353
353
354 def save_urls(self):
354 def save_urls(self):
355 """save the registration urls to files."""
355 """save the registration urls to files."""
356 c = self.config
356 c = self.config
357
357
358 sec_dir = self.profile_dir.security_dir
358 sec_dir = self.profile_dir.security_dir
359 cf = self.factory
359 cf = self.factory
360
360
361 with open(os.path.join(sec_dir, 'ipcontroller-engine.url'), 'w') as f:
361 with open(os.path.join(sec_dir, 'ipcontroller-engine.url'), 'w') as f:
362 f.write("%s://%s:%s"%(cf.engine_transport, cf.engine_ip, cf.regport))
362 f.write("%s://%s:%s"%(cf.engine_transport, cf.engine_ip, cf.regport))
363
363
364 with open(os.path.join(sec_dir, 'ipcontroller-client.url'), 'w') as f:
364 with open(os.path.join(sec_dir, 'ipcontroller-client.url'), 'w') as f:
365 f.write("%s://%s:%s"%(cf.client_transport, cf.client_ip, cf.regport))
365 f.write("%s://%s:%s"%(cf.client_transport, cf.client_ip, cf.regport))
366
366
367
367
368 def do_import_statements(self):
368 def do_import_statements(self):
369 statements = self.import_statements
369 statements = self.import_statements
370 for s in statements:
370 for s in statements:
371 try:
371 try:
372 self.log.msg("Executing statement: '%s'" % s)
372 self.log.msg("Executing statement: '%s'" % s)
373 exec s in globals(), locals()
373 exec s in globals(), locals()
374 except:
374 except:
375 self.log.msg("Error running statement: %s" % s)
375 self.log.msg("Error running statement: %s" % s)
376
376
377 def forward_logging(self):
377 def forward_logging(self):
378 if self.log_url:
378 if self.log_url:
379 self.log.info("Forwarding logging to %s"%self.log_url)
379 self.log.info("Forwarding logging to %s"%self.log_url)
380 context = zmq.Context.instance()
380 context = zmq.Context.instance()
381 lsock = context.socket(zmq.PUB)
381 lsock = context.socket(zmq.PUB)
382 lsock.connect(self.log_url)
382 lsock.connect(self.log_url)
383 handler = PUBHandler(lsock)
383 handler = PUBHandler(lsock)
384 self.log.removeHandler(self._log_handler)
384 self.log.removeHandler(self._log_handler)
385 handler.root_topic = 'controller'
385 handler.root_topic = 'controller'
386 handler.setLevel(self.log_level)
386 handler.setLevel(self.log_level)
387 self.log.addHandler(handler)
387 self.log.addHandler(handler)
388 self._log_handler = handler
388 self._log_handler = handler
389 # #
389 # #
390
390
391 def initialize(self, argv=None):
391 def initialize(self, argv=None):
392 super(IPControllerApp, self).initialize(argv)
392 super(IPControllerApp, self).initialize(argv)
393 self.forward_logging()
393 self.forward_logging()
394 self.init_hub()
394 self.init_hub()
395 self.init_schedulers()
395 self.init_schedulers()
396
396
397 def start(self):
397 def start(self):
398 # Start the subprocesses:
398 # Start the subprocesses:
399 self.factory.start()
399 self.factory.start()
400 child_procs = []
400 child_procs = []
401 for child in self.children:
401 for child in self.children:
402 child.start()
402 child.start()
403 if isinstance(child, ProcessMonitoredQueue):
403 if isinstance(child, ProcessMonitoredQueue):
404 child_procs.append(child.launcher)
404 child_procs.append(child.launcher)
405 elif isinstance(child, Process):
405 elif isinstance(child, Process):
406 child_procs.append(child)
406 child_procs.append(child)
407 if child_procs:
407 if child_procs:
408 signal_children(child_procs)
408 signal_children(child_procs)
409
409
410 self.write_pid_file(overwrite=True)
410 self.write_pid_file(overwrite=True)
411
411
412 try:
412 try:
413 self.factory.loop.start()
413 self.factory.loop.start()
414 except KeyboardInterrupt:
414 except KeyboardInterrupt:
415 self.log.critical("Interrupted, Exiting...\n")
415 self.log.critical("Interrupted, Exiting...\n")
416
416
417
417
418
418
419 def launch_new_instance():
419 def launch_new_instance():
420 """Create and run the IPython controller"""
420 """Create and run the IPython controller"""
421 if sys.platform == 'win32':
421 if sys.platform == 'win32':
422 # make sure we don't get called from a multiprocessing subprocess
422 # make sure we don't get called from a multiprocessing subprocess
423 # this can result in infinite Controllers being started on Windows
423 # this can result in infinite Controllers being started on Windows
424 # which doesn't have a proper fork, so multiprocessing is wonky
424 # which doesn't have a proper fork, so multiprocessing is wonky
425
425
426 # this only comes up when IPython has been installed using vanilla
426 # this only comes up when IPython has been installed using vanilla
427 # setuptools, and *not* distribute.
427 # setuptools, and *not* distribute.
428 import multiprocessing
428 import multiprocessing
429 p = multiprocessing.current_process()
429 p = multiprocessing.current_process()
430 # the main process has name 'MainProcess'
430 # the main process has name 'MainProcess'
431 # subprocesses will have names like 'Process-1'
431 # subprocesses will have names like 'Process-1'
432 if p.name != 'MainProcess':
432 if p.name != 'MainProcess':
433 # we are a subprocess, don't start another Controller!
433 # we are a subprocess, don't start another Controller!
434 return
434 return
435 app = IPControllerApp.instance()
435 app = IPControllerApp.instance()
436 app.initialize()
436 app.initialize()
437 app.start()
437 app.start()
438
438
439
439
440 if __name__ == '__main__':
440 if __name__ == '__main__':
441 launch_new_instance()
441 launch_new_instance()
@@ -1,1435 +1,1435 b''
1 """A semi-synchronous Client for the ZMQ cluster
1 """A semi-synchronous Client for the ZMQ cluster
2
2
3 Authors:
3 Authors:
4
4
5 * MinRK
5 * MinRK
6 """
6 """
7 #-----------------------------------------------------------------------------
7 #-----------------------------------------------------------------------------
8 # Copyright (C) 2010-2011 The IPython Development Team
8 # Copyright (C) 2010-2011 The IPython Development Team
9 #
9 #
10 # Distributed under the terms of the BSD License. The full license is in
10 # Distributed under the terms of the BSD License. The full license is in
11 # the file COPYING, distributed as part of this software.
11 # the file COPYING, distributed as part of this software.
12 #-----------------------------------------------------------------------------
12 #-----------------------------------------------------------------------------
13
13
14 #-----------------------------------------------------------------------------
14 #-----------------------------------------------------------------------------
15 # Imports
15 # Imports
16 #-----------------------------------------------------------------------------
16 #-----------------------------------------------------------------------------
17
17
18 import os
18 import os
19 import json
19 import json
20 import sys
20 import sys
21 import time
21 import time
22 import warnings
22 import warnings
23 from datetime import datetime
23 from datetime import datetime
24 from getpass import getpass
24 from getpass import getpass
25 from pprint import pprint
25 from pprint import pprint
26
26
27 pjoin = os.path.join
27 pjoin = os.path.join
28
28
29 import zmq
29 import zmq
30 # from zmq.eventloop import ioloop, zmqstream
30 # from zmq.eventloop import ioloop, zmqstream
31
31
32 from IPython.config.configurable import MultipleInstanceError
32 from IPython.config.configurable import MultipleInstanceError
33 from IPython.core.application import BaseIPythonApplication
33 from IPython.core.application import BaseIPythonApplication
34
34
35 from IPython.utils.jsonutil import rekey
35 from IPython.utils.jsonutil import rekey
36 from IPython.utils.localinterfaces import LOCAL_IPS
36 from IPython.utils.localinterfaces import LOCAL_IPS
37 from IPython.utils.path import get_ipython_dir
37 from IPython.utils.path import get_ipython_dir
38 from IPython.utils.traitlets import (HasTraits, Int, Instance, Unicode,
38 from IPython.utils.traitlets import (HasTraits, Int, Instance, Unicode,
39 Dict, List, Bool, Set)
39 Dict, List, Bool, Set)
40 from IPython.external.decorator import decorator
40 from IPython.external.decorator import decorator
41 from IPython.external.ssh import tunnel
41 from IPython.external.ssh import tunnel
42
42
43 from IPython.parallel import error
43 from IPython.parallel import error
44 from IPython.parallel import util
44 from IPython.parallel import util
45
45
46 from IPython.zmq.session import Session, Message
46 from IPython.zmq.session import Session, Message
47
47
48 from .asyncresult import AsyncResult, AsyncHubResult
48 from .asyncresult import AsyncResult, AsyncHubResult
49 from IPython.core.profiledir import ProfileDir, ProfileDirError
49 from IPython.core.profiledir import ProfileDir, ProfileDirError
50 from .view import DirectView, LoadBalancedView
50 from .view import DirectView, LoadBalancedView
51
51
52 if sys.version_info[0] >= 3:
52 if sys.version_info[0] >= 3:
53 # xrange is used in a couple 'isinstance' tests in py2
53 # xrange is used in a couple 'isinstance' tests in py2
54 # should be just 'range' in 3k
54 # should be just 'range' in 3k
55 xrange = range
55 xrange = range
56
56
57 #--------------------------------------------------------------------------
57 #--------------------------------------------------------------------------
58 # Decorators for Client methods
58 # Decorators for Client methods
59 #--------------------------------------------------------------------------
59 #--------------------------------------------------------------------------
60
60
61 @decorator
61 @decorator
62 def spin_first(f, self, *args, **kwargs):
62 def spin_first(f, self, *args, **kwargs):
63 """Call spin() to sync state prior to calling the method."""
63 """Call spin() to sync state prior to calling the method."""
64 self.spin()
64 self.spin()
65 return f(self, *args, **kwargs)
65 return f(self, *args, **kwargs)
66
66
67
67
68 #--------------------------------------------------------------------------
68 #--------------------------------------------------------------------------
69 # Classes
69 # Classes
70 #--------------------------------------------------------------------------
70 #--------------------------------------------------------------------------
71
71
72 class Metadata(dict):
72 class Metadata(dict):
73 """Subclass of dict for initializing metadata values.
73 """Subclass of dict for initializing metadata values.
74
74
75 Attribute access works on keys.
75 Attribute access works on keys.
76
76
77 These objects have a strict set of keys - errors will raise if you try
77 These objects have a strict set of keys - errors will raise if you try
78 to add new keys.
78 to add new keys.
79 """
79 """
80 def __init__(self, *args, **kwargs):
80 def __init__(self, *args, **kwargs):
81 dict.__init__(self)
81 dict.__init__(self)
82 md = {'msg_id' : None,
82 md = {'msg_id' : None,
83 'submitted' : None,
83 'submitted' : None,
84 'started' : None,
84 'started' : None,
85 'completed' : None,
85 'completed' : None,
86 'received' : None,
86 'received' : None,
87 'engine_uuid' : None,
87 'engine_uuid' : None,
88 'engine_id' : None,
88 'engine_id' : None,
89 'follow' : None,
89 'follow' : None,
90 'after' : None,
90 'after' : None,
91 'status' : None,
91 'status' : None,
92
92
93 'pyin' : None,
93 'pyin' : None,
94 'pyout' : None,
94 'pyout' : None,
95 'pyerr' : None,
95 'pyerr' : None,
96 'stdout' : '',
96 'stdout' : '',
97 'stderr' : '',
97 'stderr' : '',
98 }
98 }
99 self.update(md)
99 self.update(md)
100 self.update(dict(*args, **kwargs))
100 self.update(dict(*args, **kwargs))
101
101
102 def __getattr__(self, key):
102 def __getattr__(self, key):
103 """getattr aliased to getitem"""
103 """getattr aliased to getitem"""
104 if key in self.iterkeys():
104 if key in self.iterkeys():
105 return self[key]
105 return self[key]
106 else:
106 else:
107 raise AttributeError(key)
107 raise AttributeError(key)
108
108
109 def __setattr__(self, key, value):
109 def __setattr__(self, key, value):
110 """setattr aliased to setitem, with strict"""
110 """setattr aliased to setitem, with strict"""
111 if key in self.iterkeys():
111 if key in self.iterkeys():
112 self[key] = value
112 self[key] = value
113 else:
113 else:
114 raise AttributeError(key)
114 raise AttributeError(key)
115
115
116 def __setitem__(self, key, value):
116 def __setitem__(self, key, value):
117 """strict static key enforcement"""
117 """strict static key enforcement"""
118 if key in self.iterkeys():
118 if key in self.iterkeys():
119 dict.__setitem__(self, key, value)
119 dict.__setitem__(self, key, value)
120 else:
120 else:
121 raise KeyError(key)
121 raise KeyError(key)
122
122
123
123
124 class Client(HasTraits):
124 class Client(HasTraits):
125 """A semi-synchronous client to the IPython ZMQ cluster
125 """A semi-synchronous client to the IPython ZMQ cluster
126
126
127 Parameters
127 Parameters
128 ----------
128 ----------
129
129
130 url_or_file : bytes or unicode; zmq url or path to ipcontroller-client.json
130 url_or_file : bytes or unicode; zmq url or path to ipcontroller-client.json
131 Connection information for the Hub's registration. If a json connector
131 Connection information for the Hub's registration. If a json connector
132 file is given, then likely no further configuration is necessary.
132 file is given, then likely no further configuration is necessary.
133 [Default: use profile]
133 [Default: use profile]
134 profile : bytes
134 profile : bytes
135 The name of the Cluster profile to be used to find connector information.
135 The name of the Cluster profile to be used to find connector information.
136 If run from an IPython application, the default profile will be the same
136 If run from an IPython application, the default profile will be the same
137 as the running application, otherwise it will be 'default'.
137 as the running application, otherwise it will be 'default'.
138 context : zmq.Context
138 context : zmq.Context
139 Pass an existing zmq.Context instance, otherwise the client will create its own.
139 Pass an existing zmq.Context instance, otherwise the client will create its own.
140 debug : bool
140 debug : bool
141 flag for lots of message printing for debug purposes
141 flag for lots of message printing for debug purposes
142 timeout : int/float
142 timeout : int/float
143 time (in seconds) to wait for connection replies from the Hub
143 time (in seconds) to wait for connection replies from the Hub
144 [Default: 10]
144 [Default: 10]
145
145
146 #-------------- session related args ----------------
146 #-------------- session related args ----------------
147
147
148 config : Config object
148 config : Config object
149 If specified, this will be relayed to the Session for configuration
149 If specified, this will be relayed to the Session for configuration
150 username : str
150 username : str
151 set username for the session object
151 set username for the session object
152 packer : str (import_string) or callable
152 packer : str (import_string) or callable
153 Can be either the simple keyword 'json' or 'pickle', or an import_string to a
153 Can be either the simple keyword 'json' or 'pickle', or an import_string to a
154 function to serialize messages. Must support same input as
154 function to serialize messages. Must support same input as
155 JSON, and output must be bytes.
155 JSON, and output must be bytes.
156 You can pass a callable directly as `pack`
156 You can pass a callable directly as `pack`
157 unpacker : str (import_string) or callable
157 unpacker : str (import_string) or callable
158 The inverse of packer. Only necessary if packer is specified as *not* one
158 The inverse of packer. Only necessary if packer is specified as *not* one
159 of 'json' or 'pickle'.
159 of 'json' or 'pickle'.
160
160
161 #-------------- ssh related args ----------------
161 #-------------- ssh related args ----------------
162 # These are args for configuring the ssh tunnel to be used
162 # These are args for configuring the ssh tunnel to be used
163 # credentials are used to forward connections over ssh to the Controller
163 # credentials are used to forward connections over ssh to the Controller
164 # Note that the ip given in `addr` needs to be relative to sshserver
164 # Note that the ip given in `addr` needs to be relative to sshserver
165 # The most basic case is to leave addr as pointing to localhost (127.0.0.1),
165 # The most basic case is to leave addr as pointing to localhost (127.0.0.1),
166 # and set sshserver as the same machine the Controller is on. However,
166 # and set sshserver as the same machine the Controller is on. However,
167 # the only requirement is that sshserver is able to see the Controller
167 # the only requirement is that sshserver is able to see the Controller
168 # (i.e. is within the same trusted network).
168 # (i.e. is within the same trusted network).
169
169
170 sshserver : str
170 sshserver : str
171 A string of the form passed to ssh, i.e. 'server.tld' or 'user@server.tld:port'
171 A string of the form passed to ssh, i.e. 'server.tld' or 'user@server.tld:port'
172 If keyfile or password is specified, and this is not, it will default to
172 If keyfile or password is specified, and this is not, it will default to
173 the ip given in addr.
173 the ip given in addr.
174 sshkey : str; path to ssh private key file
174 sshkey : str; path to ssh private key file
175 This specifies a key to be used in ssh login, default None.
175 This specifies a key to be used in ssh login, default None.
176 Regular default ssh keys will be used without specifying this argument.
176 Regular default ssh keys will be used without specifying this argument.
177 password : str
177 password : str
178 Your ssh password to sshserver. Note that if this is left None,
178 Your ssh password to sshserver. Note that if this is left None,
179 you will be prompted for it if passwordless key based login is unavailable.
179 you will be prompted for it if passwordless key based login is unavailable.
180 paramiko : bool
180 paramiko : bool
181 flag for whether to use paramiko instead of shell ssh for tunneling.
181 flag for whether to use paramiko instead of shell ssh for tunneling.
182 [default: True on win32, False else]
182 [default: True on win32, False else]
183
183
184 ------- exec authentication args -------
184 ------- exec authentication args -------
185 If even localhost is untrusted, you can have some protection against
185 If even localhost is untrusted, you can have some protection against
186 unauthorized execution by signing messages with HMAC digests.
186 unauthorized execution by signing messages with HMAC digests.
187 Messages are still sent as cleartext, so if someone can snoop your
187 Messages are still sent as cleartext, so if someone can snoop your
188 loopback traffic this will not protect your privacy, but will prevent
188 loopback traffic this will not protect your privacy, but will prevent
189 unauthorized execution.
189 unauthorized execution.
190
190
191 exec_key : str
191 exec_key : str
192 an authentication key or file containing a key
192 an authentication key or file containing a key
193 default: None
193 default: None
194
194
195
195
196 Attributes
196 Attributes
197 ----------
197 ----------
198
198
199 ids : list of int engine IDs
199 ids : list of int engine IDs
200 requesting the ids attribute always synchronizes
200 requesting the ids attribute always synchronizes
201 the registration state. To request ids without synchronization,
201 the registration state. To request ids without synchronization,
202 use semi-private _ids attributes.
202 use semi-private _ids attributes.
203
203
204 history : list of msg_ids
204 history : list of msg_ids
205 a list of msg_ids, keeping track of all the execution
205 a list of msg_ids, keeping track of all the execution
206 messages you have submitted in order.
206 messages you have submitted in order.
207
207
208 outstanding : set of msg_ids
208 outstanding : set of msg_ids
209 a set of msg_ids that have been submitted, but whose
209 a set of msg_ids that have been submitted, but whose
210 results have not yet been received.
210 results have not yet been received.
211
211
212 results : dict
212 results : dict
213 a dict of all our results, keyed by msg_id
213 a dict of all our results, keyed by msg_id
214
214
215 block : bool
215 block : bool
216 determines default behavior when block not specified
216 determines default behavior when block not specified
217 in execution methods
217 in execution methods
218
218
219 Methods
219 Methods
220 -------
220 -------
221
221
222 spin
222 spin
223 flushes incoming results and registration state changes
223 flushes incoming results and registration state changes
224 control methods spin, and requesting `ids` also ensures up to date
224 control methods spin, and requesting `ids` also ensures up to date
225
225
226 wait
226 wait
227 wait on one or more msg_ids
227 wait on one or more msg_ids
228
228
229 execution methods
229 execution methods
230 apply
230 apply
231 legacy: execute, run
231 legacy: execute, run
232
232
233 data movement
233 data movement
234 push, pull, scatter, gather
234 push, pull, scatter, gather
235
235
236 query methods
236 query methods
237 queue_status, get_result, purge, result_status
237 queue_status, get_result, purge, result_status
238
238
239 control methods
239 control methods
240 abort, shutdown
240 abort, shutdown
241
241
242 """
242 """
243
243
244
244
245 block = Bool(False)
245 block = Bool(False)
246 outstanding = Set()
246 outstanding = Set()
247 results = Instance('collections.defaultdict', (dict,))
247 results = Instance('collections.defaultdict', (dict,))
248 metadata = Instance('collections.defaultdict', (Metadata,))
248 metadata = Instance('collections.defaultdict', (Metadata,))
249 history = List()
249 history = List()
250 debug = Bool(False)
250 debug = Bool(False)
251
251
252 profile=Unicode()
252 profile=Unicode()
253 def _profile_default(self):
253 def _profile_default(self):
254 if BaseIPythonApplication.initialized():
254 if BaseIPythonApplication.initialized():
255 # an IPython app *might* be running, try to get its profile
255 # an IPython app *might* be running, try to get its profile
256 try:
256 try:
257 return BaseIPythonApplication.instance().profile
257 return BaseIPythonApplication.instance().profile
258 except (AttributeError, MultipleInstanceError):
258 except (AttributeError, MultipleInstanceError):
259 # could be a *different* subclass of config.Application,
259 # could be a *different* subclass of config.Application,
260 # which would raise one of these two errors.
260 # which would raise one of these two errors.
261 return u'default'
261 return u'default'
262 else:
262 else:
263 return u'default'
263 return u'default'
264
264
265
265
266 _outstanding_dict = Instance('collections.defaultdict', (set,))
266 _outstanding_dict = Instance('collections.defaultdict', (set,))
267 _ids = List()
267 _ids = List()
268 _connected=Bool(False)
268 _connected=Bool(False)
269 _ssh=Bool(False)
269 _ssh=Bool(False)
270 _context = Instance('zmq.Context')
270 _context = Instance('zmq.Context')
271 _config = Dict()
271 _config = Dict()
272 _engines=Instance(util.ReverseDict, (), {})
272 _engines=Instance(util.ReverseDict, (), {})
273 # _hub_socket=Instance('zmq.Socket')
273 # _hub_socket=Instance('zmq.Socket')
274 _query_socket=Instance('zmq.Socket')
274 _query_socket=Instance('zmq.Socket')
275 _control_socket=Instance('zmq.Socket')
275 _control_socket=Instance('zmq.Socket')
276 _iopub_socket=Instance('zmq.Socket')
276 _iopub_socket=Instance('zmq.Socket')
277 _notification_socket=Instance('zmq.Socket')
277 _notification_socket=Instance('zmq.Socket')
278 _mux_socket=Instance('zmq.Socket')
278 _mux_socket=Instance('zmq.Socket')
279 _task_socket=Instance('zmq.Socket')
279 _task_socket=Instance('zmq.Socket')
280 _task_scheme=Unicode()
280 _task_scheme=Unicode()
281 _closed = False
281 _closed = False
282 _ignored_control_replies=Int(0)
282 _ignored_control_replies=Int(0)
283 _ignored_hub_replies=Int(0)
283 _ignored_hub_replies=Int(0)
284
284
285 def __new__(self, *args, **kw):
285 def __new__(self, *args, **kw):
286 # don't raise on positional args
286 # don't raise on positional args
287 return HasTraits.__new__(self, **kw)
287 return HasTraits.__new__(self, **kw)
288
288
289 def __init__(self, url_or_file=None, profile=None, profile_dir=None, ipython_dir=None,
289 def __init__(self, url_or_file=None, profile=None, profile_dir=None, ipython_dir=None,
290 context=None, debug=False, exec_key=None,
290 context=None, debug=False, exec_key=None,
291 sshserver=None, sshkey=None, password=None, paramiko=None,
291 sshserver=None, sshkey=None, password=None, paramiko=None,
292 timeout=10, **extra_args
292 timeout=10, **extra_args
293 ):
293 ):
294 if profile:
294 if profile:
295 super(Client, self).__init__(debug=debug, profile=profile)
295 super(Client, self).__init__(debug=debug, profile=profile)
296 else:
296 else:
297 super(Client, self).__init__(debug=debug)
297 super(Client, self).__init__(debug=debug)
298 if context is None:
298 if context is None:
299 context = zmq.Context.instance()
299 context = zmq.Context.instance()
300 self._context = context
300 self._context = context
301
301
302 self._setup_profile_dir(self.profile, profile_dir, ipython_dir)
302 self._setup_profile_dir(self.profile, profile_dir, ipython_dir)
303 if self._cd is not None:
303 if self._cd is not None:
304 if url_or_file is None:
304 if url_or_file is None:
305 url_or_file = pjoin(self._cd.security_dir, 'ipcontroller-client.json')
305 url_or_file = pjoin(self._cd.security_dir, 'ipcontroller-client.json')
306 assert url_or_file is not None, "I can't find enough information to connect to a hub!"\
306 assert url_or_file is not None, "I can't find enough information to connect to a hub!"\
307 " Please specify at least one of url_or_file or profile."
307 " Please specify at least one of url_or_file or profile."
308
308
309 try:
309 try:
310 util.validate_url(url_or_file)
310 util.validate_url(url_or_file)
311 except AssertionError:
311 except AssertionError:
312 if not os.path.exists(url_or_file):
312 if not os.path.exists(url_or_file):
313 if self._cd:
313 if self._cd:
314 url_or_file = os.path.join(self._cd.security_dir, url_or_file)
314 url_or_file = os.path.join(self._cd.security_dir, url_or_file)
315 assert os.path.exists(url_or_file), "Not a valid connection file or url: %r"%url_or_file
315 assert os.path.exists(url_or_file), "Not a valid connection file or url: %r"%url_or_file
316 with open(url_or_file) as f:
316 with open(url_or_file) as f:
317 cfg = json.loads(f.read())
317 cfg = json.loads(f.read())
318 else:
318 else:
319 cfg = {'url':url_or_file}
319 cfg = {'url':url_or_file}
320
320
321 # sync defaults from args, json:
321 # sync defaults from args, json:
322 if sshserver:
322 if sshserver:
323 cfg['ssh'] = sshserver
323 cfg['ssh'] = sshserver
324 if exec_key:
324 if exec_key:
325 cfg['exec_key'] = exec_key
325 cfg['exec_key'] = exec_key
326 exec_key = cfg['exec_key']
326 exec_key = cfg['exec_key']
327 location = cfg.setdefault('location', None)
327 location = cfg.setdefault('location', None)
328 cfg['url'] = util.disambiguate_url(cfg['url'], location)
328 cfg['url'] = util.disambiguate_url(cfg['url'], location)
329 url = cfg['url']
329 url = cfg['url']
330 proto,addr,port = util.split_url(url)
330 proto,addr,port = util.split_url(url)
331 if location is not None and addr == '127.0.0.1':
331 if location is not None and addr == '127.0.0.1':
332 # location specified, and connection is expected to be local
332 # location specified, and connection is expected to be local
333 if location not in LOCAL_IPS and not sshserver:
333 if location not in LOCAL_IPS and not sshserver:
334 # load ssh from JSON *only* if the controller is not on
334 # load ssh from JSON *only* if the controller is not on
335 # this machine
335 # this machine
336 sshserver=cfg['ssh']
336 sshserver=cfg['ssh']
337 if location not in LOCAL_IPS and not sshserver:
337 if location not in LOCAL_IPS and not sshserver:
338 # warn if no ssh specified, but SSH is probably needed
338 # warn if no ssh specified, but SSH is probably needed
339 # This is only a warning, because the most likely cause
339 # This is only a warning, because the most likely cause
340 # is a local Controller on a laptop whose IP is dynamic
340 # is a local Controller on a laptop whose IP is dynamic
341 warnings.warn("""
341 warnings.warn("""
342 Controller appears to be listening on localhost, but not on this machine.
342 Controller appears to be listening on localhost, but not on this machine.
343 If this is true, you should specify Client(...,sshserver='you@%s')
343 If this is true, you should specify Client(...,sshserver='you@%s')
344 or instruct your controller to listen on an external IP."""%location,
344 or instruct your controller to listen on an external IP."""%location,
345 RuntimeWarning)
345 RuntimeWarning)
346 elif not sshserver:
346 elif not sshserver:
347 # otherwise sync with cfg
347 # otherwise sync with cfg
348 sshserver = cfg['ssh']
348 sshserver = cfg['ssh']
349
349
350 self._config = cfg
350 self._config = cfg
351
351
352 self._ssh = bool(sshserver or sshkey or password)
352 self._ssh = bool(sshserver or sshkey or password)
353 if self._ssh and sshserver is None:
353 if self._ssh and sshserver is None:
354 # default to ssh via localhost
354 # default to ssh via localhost
355 sshserver = url.split('://')[1].split(':')[0]
355 sshserver = url.split('://')[1].split(':')[0]
356 if self._ssh and password is None:
356 if self._ssh and password is None:
357 if tunnel.try_passwordless_ssh(sshserver, sshkey, paramiko):
357 if tunnel.try_passwordless_ssh(sshserver, sshkey, paramiko):
358 password=False
358 password=False
359 else:
359 else:
360 password = getpass("SSH Password for %s: "%sshserver)
360 password = getpass("SSH Password for %s: "%sshserver)
361 ssh_kwargs = dict(keyfile=sshkey, password=password, paramiko=paramiko)
361 ssh_kwargs = dict(keyfile=sshkey, password=password, paramiko=paramiko)
362
362
363 # configure and construct the session
363 # configure and construct the session
364 if exec_key is not None:
364 if exec_key is not None:
365 if os.path.isfile(exec_key):
365 if os.path.isfile(exec_key):
366 extra_args['keyfile'] = exec_key
366 extra_args['keyfile'] = exec_key
367 else:
367 else:
368 exec_key = util.asbytes(exec_key)
368 exec_key = util.asbytes(exec_key)
369 extra_args['key'] = exec_key
369 extra_args['key'] = exec_key
370 self.session = Session(**extra_args)
370 self.session = Session(**extra_args)
371
371
372 self._query_socket = self._context.socket(zmq.XREQ)
372 self._query_socket = self._context.socket(zmq.DEALER)
373 self._query_socket.setsockopt(zmq.IDENTITY, util.asbytes(self.session.session))
373 self._query_socket.setsockopt(zmq.IDENTITY, util.asbytes(self.session.session))
374 if self._ssh:
374 if self._ssh:
375 tunnel.tunnel_connection(self._query_socket, url, sshserver, **ssh_kwargs)
375 tunnel.tunnel_connection(self._query_socket, url, sshserver, **ssh_kwargs)
376 else:
376 else:
377 self._query_socket.connect(url)
377 self._query_socket.connect(url)
378
378
379 self.session.debug = self.debug
379 self.session.debug = self.debug
380
380
381 self._notification_handlers = {'registration_notification' : self._register_engine,
381 self._notification_handlers = {'registration_notification' : self._register_engine,
382 'unregistration_notification' : self._unregister_engine,
382 'unregistration_notification' : self._unregister_engine,
383 'shutdown_notification' : lambda msg: self.close(),
383 'shutdown_notification' : lambda msg: self.close(),
384 }
384 }
385 self._queue_handlers = {'execute_reply' : self._handle_execute_reply,
385 self._queue_handlers = {'execute_reply' : self._handle_execute_reply,
386 'apply_reply' : self._handle_apply_reply}
386 'apply_reply' : self._handle_apply_reply}
387 self._connect(sshserver, ssh_kwargs, timeout)
387 self._connect(sshserver, ssh_kwargs, timeout)
388
388
389 def __del__(self):
389 def __del__(self):
390 """cleanup sockets, but _not_ context."""
390 """cleanup sockets, but _not_ context."""
391 self.close()
391 self.close()
392
392
393 def _setup_profile_dir(self, profile, profile_dir, ipython_dir):
393 def _setup_profile_dir(self, profile, profile_dir, ipython_dir):
394 if ipython_dir is None:
394 if ipython_dir is None:
395 ipython_dir = get_ipython_dir()
395 ipython_dir = get_ipython_dir()
396 if profile_dir is not None:
396 if profile_dir is not None:
397 try:
397 try:
398 self._cd = ProfileDir.find_profile_dir(profile_dir)
398 self._cd = ProfileDir.find_profile_dir(profile_dir)
399 return
399 return
400 except ProfileDirError:
400 except ProfileDirError:
401 pass
401 pass
402 elif profile is not None:
402 elif profile is not None:
403 try:
403 try:
404 self._cd = ProfileDir.find_profile_dir_by_name(
404 self._cd = ProfileDir.find_profile_dir_by_name(
405 ipython_dir, profile)
405 ipython_dir, profile)
406 return
406 return
407 except ProfileDirError:
407 except ProfileDirError:
408 pass
408 pass
409 self._cd = None
409 self._cd = None
410
410
411 def _update_engines(self, engines):
411 def _update_engines(self, engines):
412 """Update our engines dict and _ids from a dict of the form: {id:uuid}."""
412 """Update our engines dict and _ids from a dict of the form: {id:uuid}."""
413 for k,v in engines.iteritems():
413 for k,v in engines.iteritems():
414 eid = int(k)
414 eid = int(k)
415 self._engines[eid] = v
415 self._engines[eid] = v
416 self._ids.append(eid)
416 self._ids.append(eid)
417 self._ids = sorted(self._ids)
417 self._ids = sorted(self._ids)
418 if sorted(self._engines.keys()) != range(len(self._engines)) and \
418 if sorted(self._engines.keys()) != range(len(self._engines)) and \
419 self._task_scheme == 'pure' and self._task_socket:
419 self._task_scheme == 'pure' and self._task_socket:
420 self._stop_scheduling_tasks()
420 self._stop_scheduling_tasks()
421
421
422 def _stop_scheduling_tasks(self):
422 def _stop_scheduling_tasks(self):
423 """Stop scheduling tasks because an engine has been unregistered
423 """Stop scheduling tasks because an engine has been unregistered
424 from a pure ZMQ scheduler.
424 from a pure ZMQ scheduler.
425 """
425 """
426 self._task_socket.close()
426 self._task_socket.close()
427 self._task_socket = None
427 self._task_socket = None
428 msg = "An engine has been unregistered, and we are using pure " +\
428 msg = "An engine has been unregistered, and we are using pure " +\
429 "ZMQ task scheduling. Task farming will be disabled."
429 "ZMQ task scheduling. Task farming will be disabled."
430 if self.outstanding:
430 if self.outstanding:
431 msg += " If you were running tasks when this happened, " +\
431 msg += " If you were running tasks when this happened, " +\
432 "some `outstanding` msg_ids may never resolve."
432 "some `outstanding` msg_ids may never resolve."
433 warnings.warn(msg, RuntimeWarning)
433 warnings.warn(msg, RuntimeWarning)
434
434
435 def _build_targets(self, targets):
435 def _build_targets(self, targets):
436 """Turn valid target IDs or 'all' into two lists:
436 """Turn valid target IDs or 'all' into two lists:
437 (int_ids, uuids).
437 (int_ids, uuids).
438 """
438 """
439 if not self._ids:
439 if not self._ids:
440 # flush notification socket if no engines yet, just in case
440 # flush notification socket if no engines yet, just in case
441 if not self.ids:
441 if not self.ids:
442 raise error.NoEnginesRegistered("Can't build targets without any engines")
442 raise error.NoEnginesRegistered("Can't build targets without any engines")
443
443
444 if targets is None:
444 if targets is None:
445 targets = self._ids
445 targets = self._ids
446 elif isinstance(targets, basestring):
446 elif isinstance(targets, basestring):
447 if targets.lower() == 'all':
447 if targets.lower() == 'all':
448 targets = self._ids
448 targets = self._ids
449 else:
449 else:
450 raise TypeError("%r not valid str target, must be 'all'"%(targets))
450 raise TypeError("%r not valid str target, must be 'all'"%(targets))
451 elif isinstance(targets, int):
451 elif isinstance(targets, int):
452 if targets < 0:
452 if targets < 0:
453 targets = self.ids[targets]
453 targets = self.ids[targets]
454 if targets not in self._ids:
454 if targets not in self._ids:
455 raise IndexError("No such engine: %i"%targets)
455 raise IndexError("No such engine: %i"%targets)
456 targets = [targets]
456 targets = [targets]
457
457
458 if isinstance(targets, slice):
458 if isinstance(targets, slice):
459 indices = range(len(self._ids))[targets]
459 indices = range(len(self._ids))[targets]
460 ids = self.ids
460 ids = self.ids
461 targets = [ ids[i] for i in indices ]
461 targets = [ ids[i] for i in indices ]
462
462
463 if not isinstance(targets, (tuple, list, xrange)):
463 if not isinstance(targets, (tuple, list, xrange)):
464 raise TypeError("targets by int/slice/collection of ints only, not %s"%(type(targets)))
464 raise TypeError("targets by int/slice/collection of ints only, not %s"%(type(targets)))
465
465
466 return [util.asbytes(self._engines[t]) for t in targets], list(targets)
466 return [util.asbytes(self._engines[t]) for t in targets], list(targets)
467
467
468 def _connect(self, sshserver, ssh_kwargs, timeout):
468 def _connect(self, sshserver, ssh_kwargs, timeout):
469 """setup all our socket connections to the cluster. This is called from
469 """setup all our socket connections to the cluster. This is called from
470 __init__."""
470 __init__."""
471
471
472 # Maybe allow reconnecting?
472 # Maybe allow reconnecting?
473 if self._connected:
473 if self._connected:
474 return
474 return
475 self._connected=True
475 self._connected=True
476
476
477 def connect_socket(s, url):
477 def connect_socket(s, url):
478 url = util.disambiguate_url(url, self._config['location'])
478 url = util.disambiguate_url(url, self._config['location'])
479 if self._ssh:
479 if self._ssh:
480 return tunnel.tunnel_connection(s, url, sshserver, **ssh_kwargs)
480 return tunnel.tunnel_connection(s, url, sshserver, **ssh_kwargs)
481 else:
481 else:
482 return s.connect(url)
482 return s.connect(url)
483
483
484 self.session.send(self._query_socket, 'connection_request')
484 self.session.send(self._query_socket, 'connection_request')
485 # use Poller because zmq.select has wrong units in pyzmq 2.1.7
485 # use Poller because zmq.select has wrong units in pyzmq 2.1.7
486 poller = zmq.Poller()
486 poller = zmq.Poller()
487 poller.register(self._query_socket, zmq.POLLIN)
487 poller.register(self._query_socket, zmq.POLLIN)
488 # poll expects milliseconds, timeout is seconds
488 # poll expects milliseconds, timeout is seconds
489 evts = poller.poll(timeout*1000)
489 evts = poller.poll(timeout*1000)
490 if not evts:
490 if not evts:
491 raise error.TimeoutError("Hub connection request timed out")
491 raise error.TimeoutError("Hub connection request timed out")
492 idents,msg = self.session.recv(self._query_socket,mode=0)
492 idents,msg = self.session.recv(self._query_socket,mode=0)
493 if self.debug:
493 if self.debug:
494 pprint(msg)
494 pprint(msg)
495 msg = Message(msg)
495 msg = Message(msg)
496 content = msg.content
496 content = msg.content
497 self._config['registration'] = dict(content)
497 self._config['registration'] = dict(content)
498 if content.status == 'ok':
498 if content.status == 'ok':
499 ident = util.asbytes(self.session.session)
499 ident = util.asbytes(self.session.session)
500 if content.mux:
500 if content.mux:
501 self._mux_socket = self._context.socket(zmq.XREQ)
501 self._mux_socket = self._context.socket(zmq.DEALER)
502 self._mux_socket.setsockopt(zmq.IDENTITY, ident)
502 self._mux_socket.setsockopt(zmq.IDENTITY, ident)
503 connect_socket(self._mux_socket, content.mux)
503 connect_socket(self._mux_socket, content.mux)
504 if content.task:
504 if content.task:
505 self._task_scheme, task_addr = content.task
505 self._task_scheme, task_addr = content.task
506 self._task_socket = self._context.socket(zmq.XREQ)
506 self._task_socket = self._context.socket(zmq.DEALER)
507 self._task_socket.setsockopt(zmq.IDENTITY, ident)
507 self._task_socket.setsockopt(zmq.IDENTITY, ident)
508 connect_socket(self._task_socket, task_addr)
508 connect_socket(self._task_socket, task_addr)
509 if content.notification:
509 if content.notification:
510 self._notification_socket = self._context.socket(zmq.SUB)
510 self._notification_socket = self._context.socket(zmq.SUB)
511 connect_socket(self._notification_socket, content.notification)
511 connect_socket(self._notification_socket, content.notification)
512 self._notification_socket.setsockopt(zmq.SUBSCRIBE, b'')
512 self._notification_socket.setsockopt(zmq.SUBSCRIBE, b'')
513 # if content.query:
513 # if content.query:
514 # self._query_socket = self._context.socket(zmq.XREQ)
514 # self._query_socket = self._context.socket(zmq.DEALER)
515 # self._query_socket.setsockopt(zmq.IDENTITY, self.session.session)
515 # self._query_socket.setsockopt(zmq.IDENTITY, self.session.session)
516 # connect_socket(self._query_socket, content.query)
516 # connect_socket(self._query_socket, content.query)
517 if content.control:
517 if content.control:
518 self._control_socket = self._context.socket(zmq.XREQ)
518 self._control_socket = self._context.socket(zmq.DEALER)
519 self._control_socket.setsockopt(zmq.IDENTITY, ident)
519 self._control_socket.setsockopt(zmq.IDENTITY, ident)
520 connect_socket(self._control_socket, content.control)
520 connect_socket(self._control_socket, content.control)
521 if content.iopub:
521 if content.iopub:
522 self._iopub_socket = self._context.socket(zmq.SUB)
522 self._iopub_socket = self._context.socket(zmq.SUB)
523 self._iopub_socket.setsockopt(zmq.SUBSCRIBE, b'')
523 self._iopub_socket.setsockopt(zmq.SUBSCRIBE, b'')
524 self._iopub_socket.setsockopt(zmq.IDENTITY, ident)
524 self._iopub_socket.setsockopt(zmq.IDENTITY, ident)
525 connect_socket(self._iopub_socket, content.iopub)
525 connect_socket(self._iopub_socket, content.iopub)
526 self._update_engines(dict(content.engines))
526 self._update_engines(dict(content.engines))
527 else:
527 else:
528 self._connected = False
528 self._connected = False
529 raise Exception("Failed to connect!")
529 raise Exception("Failed to connect!")
530
530
531 #--------------------------------------------------------------------------
531 #--------------------------------------------------------------------------
532 # handlers and callbacks for incoming messages
532 # handlers and callbacks for incoming messages
533 #--------------------------------------------------------------------------
533 #--------------------------------------------------------------------------
534
534
535 def _unwrap_exception(self, content):
535 def _unwrap_exception(self, content):
536 """unwrap exception, and remap engine_id to int."""
536 """unwrap exception, and remap engine_id to int."""
537 e = error.unwrap_exception(content)
537 e = error.unwrap_exception(content)
538 # print e.traceback
538 # print e.traceback
539 if e.engine_info:
539 if e.engine_info:
540 e_uuid = e.engine_info['engine_uuid']
540 e_uuid = e.engine_info['engine_uuid']
541 eid = self._engines[e_uuid]
541 eid = self._engines[e_uuid]
542 e.engine_info['engine_id'] = eid
542 e.engine_info['engine_id'] = eid
543 return e
543 return e
544
544
545 def _extract_metadata(self, header, parent, content):
545 def _extract_metadata(self, header, parent, content):
546 md = {'msg_id' : parent['msg_id'],
546 md = {'msg_id' : parent['msg_id'],
547 'received' : datetime.now(),
547 'received' : datetime.now(),
548 'engine_uuid' : header.get('engine', None),
548 'engine_uuid' : header.get('engine', None),
549 'follow' : parent.get('follow', []),
549 'follow' : parent.get('follow', []),
550 'after' : parent.get('after', []),
550 'after' : parent.get('after', []),
551 'status' : content['status'],
551 'status' : content['status'],
552 }
552 }
553
553
554 if md['engine_uuid'] is not None:
554 if md['engine_uuid'] is not None:
555 md['engine_id'] = self._engines.get(md['engine_uuid'], None)
555 md['engine_id'] = self._engines.get(md['engine_uuid'], None)
556
556
557 if 'date' in parent:
557 if 'date' in parent:
558 md['submitted'] = parent['date']
558 md['submitted'] = parent['date']
559 if 'started' in header:
559 if 'started' in header:
560 md['started'] = header['started']
560 md['started'] = header['started']
561 if 'date' in header:
561 if 'date' in header:
562 md['completed'] = header['date']
562 md['completed'] = header['date']
563 return md
563 return md
564
564
565 def _register_engine(self, msg):
565 def _register_engine(self, msg):
566 """Register a new engine, and update our connection info."""
566 """Register a new engine, and update our connection info."""
567 content = msg['content']
567 content = msg['content']
568 eid = content['id']
568 eid = content['id']
569 d = {eid : content['queue']}
569 d = {eid : content['queue']}
570 self._update_engines(d)
570 self._update_engines(d)
571
571
572 def _unregister_engine(self, msg):
572 def _unregister_engine(self, msg):
573 """Unregister an engine that has died."""
573 """Unregister an engine that has died."""
574 content = msg['content']
574 content = msg['content']
575 eid = int(content['id'])
575 eid = int(content['id'])
576 if eid in self._ids:
576 if eid in self._ids:
577 self._ids.remove(eid)
577 self._ids.remove(eid)
578 uuid = self._engines.pop(eid)
578 uuid = self._engines.pop(eid)
579
579
580 self._handle_stranded_msgs(eid, uuid)
580 self._handle_stranded_msgs(eid, uuid)
581
581
582 if self._task_socket and self._task_scheme == 'pure':
582 if self._task_socket and self._task_scheme == 'pure':
583 self._stop_scheduling_tasks()
583 self._stop_scheduling_tasks()
584
584
585 def _handle_stranded_msgs(self, eid, uuid):
585 def _handle_stranded_msgs(self, eid, uuid):
586 """Handle messages known to be on an engine when the engine unregisters.
586 """Handle messages known to be on an engine when the engine unregisters.
587
587
588 It is possible that this will fire prematurely - that is, an engine will
588 It is possible that this will fire prematurely - that is, an engine will
589 go down after completing a result, and the client will be notified
589 go down after completing a result, and the client will be notified
590 of the unregistration and later receive the successful result.
590 of the unregistration and later receive the successful result.
591 """
591 """
592
592
593 outstanding = self._outstanding_dict[uuid]
593 outstanding = self._outstanding_dict[uuid]
594
594
595 for msg_id in list(outstanding):
595 for msg_id in list(outstanding):
596 if msg_id in self.results:
596 if msg_id in self.results:
597 # we already
597 # we already
598 continue
598 continue
599 try:
599 try:
600 raise error.EngineError("Engine %r died while running task %r"%(eid, msg_id))
600 raise error.EngineError("Engine %r died while running task %r"%(eid, msg_id))
601 except:
601 except:
602 content = error.wrap_exception()
602 content = error.wrap_exception()
603 # build a fake message:
603 # build a fake message:
604 parent = {}
604 parent = {}
605 header = {}
605 header = {}
606 parent['msg_id'] = msg_id
606 parent['msg_id'] = msg_id
607 header['engine'] = uuid
607 header['engine'] = uuid
608 header['date'] = datetime.now()
608 header['date'] = datetime.now()
609 msg = dict(parent_header=parent, header=header, content=content)
609 msg = dict(parent_header=parent, header=header, content=content)
610 self._handle_apply_reply(msg)
610 self._handle_apply_reply(msg)
611
611
612 def _handle_execute_reply(self, msg):
612 def _handle_execute_reply(self, msg):
613 """Save the reply to an execute_request into our results.
613 """Save the reply to an execute_request into our results.
614
614
615 execute messages are never actually used. apply is used instead.
615 execute messages are never actually used. apply is used instead.
616 """
616 """
617
617
618 parent = msg['parent_header']
618 parent = msg['parent_header']
619 msg_id = parent['msg_id']
619 msg_id = parent['msg_id']
620 if msg_id not in self.outstanding:
620 if msg_id not in self.outstanding:
621 if msg_id in self.history:
621 if msg_id in self.history:
622 print ("got stale result: %s"%msg_id)
622 print ("got stale result: %s"%msg_id)
623 else:
623 else:
624 print ("got unknown result: %s"%msg_id)
624 print ("got unknown result: %s"%msg_id)
625 else:
625 else:
626 self.outstanding.remove(msg_id)
626 self.outstanding.remove(msg_id)
627 self.results[msg_id] = self._unwrap_exception(msg['content'])
627 self.results[msg_id] = self._unwrap_exception(msg['content'])
628
628
629 def _handle_apply_reply(self, msg):
629 def _handle_apply_reply(self, msg):
630 """Save the reply to an apply_request into our results."""
630 """Save the reply to an apply_request into our results."""
631 parent = msg['parent_header']
631 parent = msg['parent_header']
632 msg_id = parent['msg_id']
632 msg_id = parent['msg_id']
633 if msg_id not in self.outstanding:
633 if msg_id not in self.outstanding:
634 if msg_id in self.history:
634 if msg_id in self.history:
635 print ("got stale result: %s"%msg_id)
635 print ("got stale result: %s"%msg_id)
636 print self.results[msg_id]
636 print self.results[msg_id]
637 print msg
637 print msg
638 else:
638 else:
639 print ("got unknown result: %s"%msg_id)
639 print ("got unknown result: %s"%msg_id)
640 else:
640 else:
641 self.outstanding.remove(msg_id)
641 self.outstanding.remove(msg_id)
642 content = msg['content']
642 content = msg['content']
643 header = msg['header']
643 header = msg['header']
644
644
645 # construct metadata:
645 # construct metadata:
646 md = self.metadata[msg_id]
646 md = self.metadata[msg_id]
647 md.update(self._extract_metadata(header, parent, content))
647 md.update(self._extract_metadata(header, parent, content))
648 # is this redundant?
648 # is this redundant?
649 self.metadata[msg_id] = md
649 self.metadata[msg_id] = md
650
650
651 e_outstanding = self._outstanding_dict[md['engine_uuid']]
651 e_outstanding = self._outstanding_dict[md['engine_uuid']]
652 if msg_id in e_outstanding:
652 if msg_id in e_outstanding:
653 e_outstanding.remove(msg_id)
653 e_outstanding.remove(msg_id)
654
654
655 # construct result:
655 # construct result:
656 if content['status'] == 'ok':
656 if content['status'] == 'ok':
657 self.results[msg_id] = util.unserialize_object(msg['buffers'])[0]
657 self.results[msg_id] = util.unserialize_object(msg['buffers'])[0]
658 elif content['status'] == 'aborted':
658 elif content['status'] == 'aborted':
659 self.results[msg_id] = error.TaskAborted(msg_id)
659 self.results[msg_id] = error.TaskAborted(msg_id)
660 elif content['status'] == 'resubmitted':
660 elif content['status'] == 'resubmitted':
661 # TODO: handle resubmission
661 # TODO: handle resubmission
662 pass
662 pass
663 else:
663 else:
664 self.results[msg_id] = self._unwrap_exception(content)
664 self.results[msg_id] = self._unwrap_exception(content)
665
665
666 def _flush_notifications(self):
666 def _flush_notifications(self):
667 """Flush notifications of engine registrations waiting
667 """Flush notifications of engine registrations waiting
668 in ZMQ queue."""
668 in ZMQ queue."""
669 idents,msg = self.session.recv(self._notification_socket, mode=zmq.NOBLOCK)
669 idents,msg = self.session.recv(self._notification_socket, mode=zmq.NOBLOCK)
670 while msg is not None:
670 while msg is not None:
671 if self.debug:
671 if self.debug:
672 pprint(msg)
672 pprint(msg)
673 msg_type = msg['header']['msg_type']
673 msg_type = msg['header']['msg_type']
674 handler = self._notification_handlers.get(msg_type, None)
674 handler = self._notification_handlers.get(msg_type, None)
675 if handler is None:
675 if handler is None:
676 raise Exception("Unhandled message type: %s"%msg.msg_type)
676 raise Exception("Unhandled message type: %s"%msg.msg_type)
677 else:
677 else:
678 handler(msg)
678 handler(msg)
679 idents,msg = self.session.recv(self._notification_socket, mode=zmq.NOBLOCK)
679 idents,msg = self.session.recv(self._notification_socket, mode=zmq.NOBLOCK)
680
680
681 def _flush_results(self, sock):
681 def _flush_results(self, sock):
682 """Flush task or queue results waiting in ZMQ queue."""
682 """Flush task or queue results waiting in ZMQ queue."""
683 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
683 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
684 while msg is not None:
684 while msg is not None:
685 if self.debug:
685 if self.debug:
686 pprint(msg)
686 pprint(msg)
687 msg_type = msg['header']['msg_type']
687 msg_type = msg['header']['msg_type']
688 handler = self._queue_handlers.get(msg_type, None)
688 handler = self._queue_handlers.get(msg_type, None)
689 if handler is None:
689 if handler is None:
690 raise Exception("Unhandled message type: %s"%msg.msg_type)
690 raise Exception("Unhandled message type: %s"%msg.msg_type)
691 else:
691 else:
692 handler(msg)
692 handler(msg)
693 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
693 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
694
694
695 def _flush_control(self, sock):
695 def _flush_control(self, sock):
696 """Flush replies from the control channel waiting
696 """Flush replies from the control channel waiting
697 in the ZMQ queue.
697 in the ZMQ queue.
698
698
699 Currently: ignore them."""
699 Currently: ignore them."""
700 if self._ignored_control_replies <= 0:
700 if self._ignored_control_replies <= 0:
701 return
701 return
702 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
702 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
703 while msg is not None:
703 while msg is not None:
704 self._ignored_control_replies -= 1
704 self._ignored_control_replies -= 1
705 if self.debug:
705 if self.debug:
706 pprint(msg)
706 pprint(msg)
707 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
707 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
708
708
709 def _flush_ignored_control(self):
709 def _flush_ignored_control(self):
710 """flush ignored control replies"""
710 """flush ignored control replies"""
711 while self._ignored_control_replies > 0:
711 while self._ignored_control_replies > 0:
712 self.session.recv(self._control_socket)
712 self.session.recv(self._control_socket)
713 self._ignored_control_replies -= 1
713 self._ignored_control_replies -= 1
714
714
715 def _flush_ignored_hub_replies(self):
715 def _flush_ignored_hub_replies(self):
716 ident,msg = self.session.recv(self._query_socket, mode=zmq.NOBLOCK)
716 ident,msg = self.session.recv(self._query_socket, mode=zmq.NOBLOCK)
717 while msg is not None:
717 while msg is not None:
718 ident,msg = self.session.recv(self._query_socket, mode=zmq.NOBLOCK)
718 ident,msg = self.session.recv(self._query_socket, mode=zmq.NOBLOCK)
719
719
720 def _flush_iopub(self, sock):
720 def _flush_iopub(self, sock):
721 """Flush replies from the iopub channel waiting
721 """Flush replies from the iopub channel waiting
722 in the ZMQ queue.
722 in the ZMQ queue.
723 """
723 """
724 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
724 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
725 while msg is not None:
725 while msg is not None:
726 if self.debug:
726 if self.debug:
727 pprint(msg)
727 pprint(msg)
728 parent = msg['parent_header']
728 parent = msg['parent_header']
729 msg_id = parent['msg_id']
729 msg_id = parent['msg_id']
730 content = msg['content']
730 content = msg['content']
731 header = msg['header']
731 header = msg['header']
732 msg_type = msg['header']['msg_type']
732 msg_type = msg['header']['msg_type']
733
733
734 # init metadata:
734 # init metadata:
735 md = self.metadata[msg_id]
735 md = self.metadata[msg_id]
736
736
737 if msg_type == 'stream':
737 if msg_type == 'stream':
738 name = content['name']
738 name = content['name']
739 s = md[name] or ''
739 s = md[name] or ''
740 md[name] = s + content['data']
740 md[name] = s + content['data']
741 elif msg_type == 'pyerr':
741 elif msg_type == 'pyerr':
742 md.update({'pyerr' : self._unwrap_exception(content)})
742 md.update({'pyerr' : self._unwrap_exception(content)})
743 elif msg_type == 'pyin':
743 elif msg_type == 'pyin':
744 md.update({'pyin' : content['code']})
744 md.update({'pyin' : content['code']})
745 else:
745 else:
746 md.update({msg_type : content.get('data', '')})
746 md.update({msg_type : content.get('data', '')})
747
747
748 # reduntant?
748 # reduntant?
749 self.metadata[msg_id] = md
749 self.metadata[msg_id] = md
750
750
751 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
751 idents,msg = self.session.recv(sock, mode=zmq.NOBLOCK)
752
752
753 #--------------------------------------------------------------------------
753 #--------------------------------------------------------------------------
754 # len, getitem
754 # len, getitem
755 #--------------------------------------------------------------------------
755 #--------------------------------------------------------------------------
756
756
757 def __len__(self):
757 def __len__(self):
758 """len(client) returns # of engines."""
758 """len(client) returns # of engines."""
759 return len(self.ids)
759 return len(self.ids)
760
760
761 def __getitem__(self, key):
761 def __getitem__(self, key):
762 """index access returns DirectView multiplexer objects
762 """index access returns DirectView multiplexer objects
763
763
764 Must be int, slice, or list/tuple/xrange of ints"""
764 Must be int, slice, or list/tuple/xrange of ints"""
765 if not isinstance(key, (int, slice, tuple, list, xrange)):
765 if not isinstance(key, (int, slice, tuple, list, xrange)):
766 raise TypeError("key by int/slice/iterable of ints only, not %s"%(type(key)))
766 raise TypeError("key by int/slice/iterable of ints only, not %s"%(type(key)))
767 else:
767 else:
768 return self.direct_view(key)
768 return self.direct_view(key)
769
769
770 #--------------------------------------------------------------------------
770 #--------------------------------------------------------------------------
771 # Begin public methods
771 # Begin public methods
772 #--------------------------------------------------------------------------
772 #--------------------------------------------------------------------------
773
773
774 @property
774 @property
775 def ids(self):
775 def ids(self):
776 """Always up-to-date ids property."""
776 """Always up-to-date ids property."""
777 self._flush_notifications()
777 self._flush_notifications()
778 # always copy:
778 # always copy:
779 return list(self._ids)
779 return list(self._ids)
780
780
781 def close(self):
781 def close(self):
782 if self._closed:
782 if self._closed:
783 return
783 return
784 snames = filter(lambda n: n.endswith('socket'), dir(self))
784 snames = filter(lambda n: n.endswith('socket'), dir(self))
785 for socket in map(lambda name: getattr(self, name), snames):
785 for socket in map(lambda name: getattr(self, name), snames):
786 if isinstance(socket, zmq.Socket) and not socket.closed:
786 if isinstance(socket, zmq.Socket) and not socket.closed:
787 socket.close()
787 socket.close()
788 self._closed = True
788 self._closed = True
789
789
790 def spin(self):
790 def spin(self):
791 """Flush any registration notifications and execution results
791 """Flush any registration notifications and execution results
792 waiting in the ZMQ queue.
792 waiting in the ZMQ queue.
793 """
793 """
794 if self._notification_socket:
794 if self._notification_socket:
795 self._flush_notifications()
795 self._flush_notifications()
796 if self._mux_socket:
796 if self._mux_socket:
797 self._flush_results(self._mux_socket)
797 self._flush_results(self._mux_socket)
798 if self._task_socket:
798 if self._task_socket:
799 self._flush_results(self._task_socket)
799 self._flush_results(self._task_socket)
800 if self._control_socket:
800 if self._control_socket:
801 self._flush_control(self._control_socket)
801 self._flush_control(self._control_socket)
802 if self._iopub_socket:
802 if self._iopub_socket:
803 self._flush_iopub(self._iopub_socket)
803 self._flush_iopub(self._iopub_socket)
804 if self._query_socket:
804 if self._query_socket:
805 self._flush_ignored_hub_replies()
805 self._flush_ignored_hub_replies()
806
806
807 def wait(self, jobs=None, timeout=-1):
807 def wait(self, jobs=None, timeout=-1):
808 """waits on one or more `jobs`, for up to `timeout` seconds.
808 """waits on one or more `jobs`, for up to `timeout` seconds.
809
809
810 Parameters
810 Parameters
811 ----------
811 ----------
812
812
813 jobs : int, str, or list of ints and/or strs, or one or more AsyncResult objects
813 jobs : int, str, or list of ints and/or strs, or one or more AsyncResult objects
814 ints are indices to self.history
814 ints are indices to self.history
815 strs are msg_ids
815 strs are msg_ids
816 default: wait on all outstanding messages
816 default: wait on all outstanding messages
817 timeout : float
817 timeout : float
818 a time in seconds, after which to give up.
818 a time in seconds, after which to give up.
819 default is -1, which means no timeout
819 default is -1, which means no timeout
820
820
821 Returns
821 Returns
822 -------
822 -------
823
823
824 True : when all msg_ids are done
824 True : when all msg_ids are done
825 False : timeout reached, some msg_ids still outstanding
825 False : timeout reached, some msg_ids still outstanding
826 """
826 """
827 tic = time.time()
827 tic = time.time()
828 if jobs is None:
828 if jobs is None:
829 theids = self.outstanding
829 theids = self.outstanding
830 else:
830 else:
831 if isinstance(jobs, (int, basestring, AsyncResult)):
831 if isinstance(jobs, (int, basestring, AsyncResult)):
832 jobs = [jobs]
832 jobs = [jobs]
833 theids = set()
833 theids = set()
834 for job in jobs:
834 for job in jobs:
835 if isinstance(job, int):
835 if isinstance(job, int):
836 # index access
836 # index access
837 job = self.history[job]
837 job = self.history[job]
838 elif isinstance(job, AsyncResult):
838 elif isinstance(job, AsyncResult):
839 map(theids.add, job.msg_ids)
839 map(theids.add, job.msg_ids)
840 continue
840 continue
841 theids.add(job)
841 theids.add(job)
842 if not theids.intersection(self.outstanding):
842 if not theids.intersection(self.outstanding):
843 return True
843 return True
844 self.spin()
844 self.spin()
845 while theids.intersection(self.outstanding):
845 while theids.intersection(self.outstanding):
846 if timeout >= 0 and ( time.time()-tic ) > timeout:
846 if timeout >= 0 and ( time.time()-tic ) > timeout:
847 break
847 break
848 time.sleep(1e-3)
848 time.sleep(1e-3)
849 self.spin()
849 self.spin()
850 return len(theids.intersection(self.outstanding)) == 0
850 return len(theids.intersection(self.outstanding)) == 0
851
851
852 #--------------------------------------------------------------------------
852 #--------------------------------------------------------------------------
853 # Control methods
853 # Control methods
854 #--------------------------------------------------------------------------
854 #--------------------------------------------------------------------------
855
855
856 @spin_first
856 @spin_first
857 def clear(self, targets=None, block=None):
857 def clear(self, targets=None, block=None):
858 """Clear the namespace in target(s)."""
858 """Clear the namespace in target(s)."""
859 block = self.block if block is None else block
859 block = self.block if block is None else block
860 targets = self._build_targets(targets)[0]
860 targets = self._build_targets(targets)[0]
861 for t in targets:
861 for t in targets:
862 self.session.send(self._control_socket, 'clear_request', content={}, ident=t)
862 self.session.send(self._control_socket, 'clear_request', content={}, ident=t)
863 error = False
863 error = False
864 if block:
864 if block:
865 self._flush_ignored_control()
865 self._flush_ignored_control()
866 for i in range(len(targets)):
866 for i in range(len(targets)):
867 idents,msg = self.session.recv(self._control_socket,0)
867 idents,msg = self.session.recv(self._control_socket,0)
868 if self.debug:
868 if self.debug:
869 pprint(msg)
869 pprint(msg)
870 if msg['content']['status'] != 'ok':
870 if msg['content']['status'] != 'ok':
871 error = self._unwrap_exception(msg['content'])
871 error = self._unwrap_exception(msg['content'])
872 else:
872 else:
873 self._ignored_control_replies += len(targets)
873 self._ignored_control_replies += len(targets)
874 if error:
874 if error:
875 raise error
875 raise error
876
876
877
877
878 @spin_first
878 @spin_first
879 def abort(self, jobs=None, targets=None, block=None):
879 def abort(self, jobs=None, targets=None, block=None):
880 """Abort specific jobs from the execution queues of target(s).
880 """Abort specific jobs from the execution queues of target(s).
881
881
882 This is a mechanism to prevent jobs that have already been submitted
882 This is a mechanism to prevent jobs that have already been submitted
883 from executing.
883 from executing.
884
884
885 Parameters
885 Parameters
886 ----------
886 ----------
887
887
888 jobs : msg_id, list of msg_ids, or AsyncResult
888 jobs : msg_id, list of msg_ids, or AsyncResult
889 The jobs to be aborted
889 The jobs to be aborted
890
890
891
891
892 """
892 """
893 block = self.block if block is None else block
893 block = self.block if block is None else block
894 targets = self._build_targets(targets)[0]
894 targets = self._build_targets(targets)[0]
895 msg_ids = []
895 msg_ids = []
896 if isinstance(jobs, (basestring,AsyncResult)):
896 if isinstance(jobs, (basestring,AsyncResult)):
897 jobs = [jobs]
897 jobs = [jobs]
898 bad_ids = filter(lambda obj: not isinstance(obj, (basestring, AsyncResult)), jobs)
898 bad_ids = filter(lambda obj: not isinstance(obj, (basestring, AsyncResult)), jobs)
899 if bad_ids:
899 if bad_ids:
900 raise TypeError("Invalid msg_id type %r, expected str or AsyncResult"%bad_ids[0])
900 raise TypeError("Invalid msg_id type %r, expected str or AsyncResult"%bad_ids[0])
901 for j in jobs:
901 for j in jobs:
902 if isinstance(j, AsyncResult):
902 if isinstance(j, AsyncResult):
903 msg_ids.extend(j.msg_ids)
903 msg_ids.extend(j.msg_ids)
904 else:
904 else:
905 msg_ids.append(j)
905 msg_ids.append(j)
906 content = dict(msg_ids=msg_ids)
906 content = dict(msg_ids=msg_ids)
907 for t in targets:
907 for t in targets:
908 self.session.send(self._control_socket, 'abort_request',
908 self.session.send(self._control_socket, 'abort_request',
909 content=content, ident=t)
909 content=content, ident=t)
910 error = False
910 error = False
911 if block:
911 if block:
912 self._flush_ignored_control()
912 self._flush_ignored_control()
913 for i in range(len(targets)):
913 for i in range(len(targets)):
914 idents,msg = self.session.recv(self._control_socket,0)
914 idents,msg = self.session.recv(self._control_socket,0)
915 if self.debug:
915 if self.debug:
916 pprint(msg)
916 pprint(msg)
917 if msg['content']['status'] != 'ok':
917 if msg['content']['status'] != 'ok':
918 error = self._unwrap_exception(msg['content'])
918 error = self._unwrap_exception(msg['content'])
919 else:
919 else:
920 self._ignored_control_replies += len(targets)
920 self._ignored_control_replies += len(targets)
921 if error:
921 if error:
922 raise error
922 raise error
923
923
924 @spin_first
924 @spin_first
925 def shutdown(self, targets=None, restart=False, hub=False, block=None):
925 def shutdown(self, targets=None, restart=False, hub=False, block=None):
926 """Terminates one or more engine processes, optionally including the hub."""
926 """Terminates one or more engine processes, optionally including the hub."""
927 block = self.block if block is None else block
927 block = self.block if block is None else block
928 if hub:
928 if hub:
929 targets = 'all'
929 targets = 'all'
930 targets = self._build_targets(targets)[0]
930 targets = self._build_targets(targets)[0]
931 for t in targets:
931 for t in targets:
932 self.session.send(self._control_socket, 'shutdown_request',
932 self.session.send(self._control_socket, 'shutdown_request',
933 content={'restart':restart},ident=t)
933 content={'restart':restart},ident=t)
934 error = False
934 error = False
935 if block or hub:
935 if block or hub:
936 self._flush_ignored_control()
936 self._flush_ignored_control()
937 for i in range(len(targets)):
937 for i in range(len(targets)):
938 idents,msg = self.session.recv(self._control_socket, 0)
938 idents,msg = self.session.recv(self._control_socket, 0)
939 if self.debug:
939 if self.debug:
940 pprint(msg)
940 pprint(msg)
941 if msg['content']['status'] != 'ok':
941 if msg['content']['status'] != 'ok':
942 error = self._unwrap_exception(msg['content'])
942 error = self._unwrap_exception(msg['content'])
943 else:
943 else:
944 self._ignored_control_replies += len(targets)
944 self._ignored_control_replies += len(targets)
945
945
946 if hub:
946 if hub:
947 time.sleep(0.25)
947 time.sleep(0.25)
948 self.session.send(self._query_socket, 'shutdown_request')
948 self.session.send(self._query_socket, 'shutdown_request')
949 idents,msg = self.session.recv(self._query_socket, 0)
949 idents,msg = self.session.recv(self._query_socket, 0)
950 if self.debug:
950 if self.debug:
951 pprint(msg)
951 pprint(msg)
952 if msg['content']['status'] != 'ok':
952 if msg['content']['status'] != 'ok':
953 error = self._unwrap_exception(msg['content'])
953 error = self._unwrap_exception(msg['content'])
954
954
955 if error:
955 if error:
956 raise error
956 raise error
957
957
958 #--------------------------------------------------------------------------
958 #--------------------------------------------------------------------------
959 # Execution related methods
959 # Execution related methods
960 #--------------------------------------------------------------------------
960 #--------------------------------------------------------------------------
961
961
962 def _maybe_raise(self, result):
962 def _maybe_raise(self, result):
963 """wrapper for maybe raising an exception if apply failed."""
963 """wrapper for maybe raising an exception if apply failed."""
964 if isinstance(result, error.RemoteError):
964 if isinstance(result, error.RemoteError):
965 raise result
965 raise result
966
966
967 return result
967 return result
968
968
969 def send_apply_message(self, socket, f, args=None, kwargs=None, subheader=None, track=False,
969 def send_apply_message(self, socket, f, args=None, kwargs=None, subheader=None, track=False,
970 ident=None):
970 ident=None):
971 """construct and send an apply message via a socket.
971 """construct and send an apply message via a socket.
972
972
973 This is the principal method with which all engine execution is performed by views.
973 This is the principal method with which all engine execution is performed by views.
974 """
974 """
975
975
976 assert not self._closed, "cannot use me anymore, I'm closed!"
976 assert not self._closed, "cannot use me anymore, I'm closed!"
977 # defaults:
977 # defaults:
978 args = args if args is not None else []
978 args = args if args is not None else []
979 kwargs = kwargs if kwargs is not None else {}
979 kwargs = kwargs if kwargs is not None else {}
980 subheader = subheader if subheader is not None else {}
980 subheader = subheader if subheader is not None else {}
981
981
982 # validate arguments
982 # validate arguments
983 if not callable(f):
983 if not callable(f):
984 raise TypeError("f must be callable, not %s"%type(f))
984 raise TypeError("f must be callable, not %s"%type(f))
985 if not isinstance(args, (tuple, list)):
985 if not isinstance(args, (tuple, list)):
986 raise TypeError("args must be tuple or list, not %s"%type(args))
986 raise TypeError("args must be tuple or list, not %s"%type(args))
987 if not isinstance(kwargs, dict):
987 if not isinstance(kwargs, dict):
988 raise TypeError("kwargs must be dict, not %s"%type(kwargs))
988 raise TypeError("kwargs must be dict, not %s"%type(kwargs))
989 if not isinstance(subheader, dict):
989 if not isinstance(subheader, dict):
990 raise TypeError("subheader must be dict, not %s"%type(subheader))
990 raise TypeError("subheader must be dict, not %s"%type(subheader))
991
991
992 bufs = util.pack_apply_message(f,args,kwargs)
992 bufs = util.pack_apply_message(f,args,kwargs)
993
993
994 msg = self.session.send(socket, "apply_request", buffers=bufs, ident=ident,
994 msg = self.session.send(socket, "apply_request", buffers=bufs, ident=ident,
995 subheader=subheader, track=track)
995 subheader=subheader, track=track)
996
996
997 msg_id = msg['header']['msg_id']
997 msg_id = msg['header']['msg_id']
998 self.outstanding.add(msg_id)
998 self.outstanding.add(msg_id)
999 if ident:
999 if ident:
1000 # possibly routed to a specific engine
1000 # possibly routed to a specific engine
1001 if isinstance(ident, list):
1001 if isinstance(ident, list):
1002 ident = ident[-1]
1002 ident = ident[-1]
1003 if ident in self._engines.values():
1003 if ident in self._engines.values():
1004 # save for later, in case of engine death
1004 # save for later, in case of engine death
1005 self._outstanding_dict[ident].add(msg_id)
1005 self._outstanding_dict[ident].add(msg_id)
1006 self.history.append(msg_id)
1006 self.history.append(msg_id)
1007 self.metadata[msg_id]['submitted'] = datetime.now()
1007 self.metadata[msg_id]['submitted'] = datetime.now()
1008
1008
1009 return msg
1009 return msg
1010
1010
1011 #--------------------------------------------------------------------------
1011 #--------------------------------------------------------------------------
1012 # construct a View object
1012 # construct a View object
1013 #--------------------------------------------------------------------------
1013 #--------------------------------------------------------------------------
1014
1014
1015 def load_balanced_view(self, targets=None):
1015 def load_balanced_view(self, targets=None):
1016 """construct a DirectView object.
1016 """construct a DirectView object.
1017
1017
1018 If no arguments are specified, create a LoadBalancedView
1018 If no arguments are specified, create a LoadBalancedView
1019 using all engines.
1019 using all engines.
1020
1020
1021 Parameters
1021 Parameters
1022 ----------
1022 ----------
1023
1023
1024 targets: list,slice,int,etc. [default: use all engines]
1024 targets: list,slice,int,etc. [default: use all engines]
1025 The subset of engines across which to load-balance
1025 The subset of engines across which to load-balance
1026 """
1026 """
1027 if targets == 'all':
1027 if targets == 'all':
1028 targets = None
1028 targets = None
1029 if targets is not None:
1029 if targets is not None:
1030 targets = self._build_targets(targets)[1]
1030 targets = self._build_targets(targets)[1]
1031 return LoadBalancedView(client=self, socket=self._task_socket, targets=targets)
1031 return LoadBalancedView(client=self, socket=self._task_socket, targets=targets)
1032
1032
1033 def direct_view(self, targets='all'):
1033 def direct_view(self, targets='all'):
1034 """construct a DirectView object.
1034 """construct a DirectView object.
1035
1035
1036 If no targets are specified, create a DirectView
1036 If no targets are specified, create a DirectView
1037 using all engines.
1037 using all engines.
1038
1038
1039 Parameters
1039 Parameters
1040 ----------
1040 ----------
1041
1041
1042 targets: list,slice,int,etc. [default: use all engines]
1042 targets: list,slice,int,etc. [default: use all engines]
1043 The engines to use for the View
1043 The engines to use for the View
1044 """
1044 """
1045 single = isinstance(targets, int)
1045 single = isinstance(targets, int)
1046 # allow 'all' to be lazily evaluated at each execution
1046 # allow 'all' to be lazily evaluated at each execution
1047 if targets != 'all':
1047 if targets != 'all':
1048 targets = self._build_targets(targets)[1]
1048 targets = self._build_targets(targets)[1]
1049 if single:
1049 if single:
1050 targets = targets[0]
1050 targets = targets[0]
1051 return DirectView(client=self, socket=self._mux_socket, targets=targets)
1051 return DirectView(client=self, socket=self._mux_socket, targets=targets)
1052
1052
1053 #--------------------------------------------------------------------------
1053 #--------------------------------------------------------------------------
1054 # Query methods
1054 # Query methods
1055 #--------------------------------------------------------------------------
1055 #--------------------------------------------------------------------------
1056
1056
1057 @spin_first
1057 @spin_first
1058 def get_result(self, indices_or_msg_ids=None, block=None):
1058 def get_result(self, indices_or_msg_ids=None, block=None):
1059 """Retrieve a result by msg_id or history index, wrapped in an AsyncResult object.
1059 """Retrieve a result by msg_id or history index, wrapped in an AsyncResult object.
1060
1060
1061 If the client already has the results, no request to the Hub will be made.
1061 If the client already has the results, no request to the Hub will be made.
1062
1062
1063 This is a convenient way to construct AsyncResult objects, which are wrappers
1063 This is a convenient way to construct AsyncResult objects, which are wrappers
1064 that include metadata about execution, and allow for awaiting results that
1064 that include metadata about execution, and allow for awaiting results that
1065 were not submitted by this Client.
1065 were not submitted by this Client.
1066
1066
1067 It can also be a convenient way to retrieve the metadata associated with
1067 It can also be a convenient way to retrieve the metadata associated with
1068 blocking execution, since it always retrieves
1068 blocking execution, since it always retrieves
1069
1069
1070 Examples
1070 Examples
1071 --------
1071 --------
1072 ::
1072 ::
1073
1073
1074 In [10]: r = client.apply()
1074 In [10]: r = client.apply()
1075
1075
1076 Parameters
1076 Parameters
1077 ----------
1077 ----------
1078
1078
1079 indices_or_msg_ids : integer history index, str msg_id, or list of either
1079 indices_or_msg_ids : integer history index, str msg_id, or list of either
1080 The indices or msg_ids of indices to be retrieved
1080 The indices or msg_ids of indices to be retrieved
1081
1081
1082 block : bool
1082 block : bool
1083 Whether to wait for the result to be done
1083 Whether to wait for the result to be done
1084
1084
1085 Returns
1085 Returns
1086 -------
1086 -------
1087
1087
1088 AsyncResult
1088 AsyncResult
1089 A single AsyncResult object will always be returned.
1089 A single AsyncResult object will always be returned.
1090
1090
1091 AsyncHubResult
1091 AsyncHubResult
1092 A subclass of AsyncResult that retrieves results from the Hub
1092 A subclass of AsyncResult that retrieves results from the Hub
1093
1093
1094 """
1094 """
1095 block = self.block if block is None else block
1095 block = self.block if block is None else block
1096 if indices_or_msg_ids is None:
1096 if indices_or_msg_ids is None:
1097 indices_or_msg_ids = -1
1097 indices_or_msg_ids = -1
1098
1098
1099 if not isinstance(indices_or_msg_ids, (list,tuple)):
1099 if not isinstance(indices_or_msg_ids, (list,tuple)):
1100 indices_or_msg_ids = [indices_or_msg_ids]
1100 indices_or_msg_ids = [indices_or_msg_ids]
1101
1101
1102 theids = []
1102 theids = []
1103 for id in indices_or_msg_ids:
1103 for id in indices_or_msg_ids:
1104 if isinstance(id, int):
1104 if isinstance(id, int):
1105 id = self.history[id]
1105 id = self.history[id]
1106 if not isinstance(id, basestring):
1106 if not isinstance(id, basestring):
1107 raise TypeError("indices must be str or int, not %r"%id)
1107 raise TypeError("indices must be str or int, not %r"%id)
1108 theids.append(id)
1108 theids.append(id)
1109
1109
1110 local_ids = filter(lambda msg_id: msg_id in self.history or msg_id in self.results, theids)
1110 local_ids = filter(lambda msg_id: msg_id in self.history or msg_id in self.results, theids)
1111 remote_ids = filter(lambda msg_id: msg_id not in local_ids, theids)
1111 remote_ids = filter(lambda msg_id: msg_id not in local_ids, theids)
1112
1112
1113 if remote_ids:
1113 if remote_ids:
1114 ar = AsyncHubResult(self, msg_ids=theids)
1114 ar = AsyncHubResult(self, msg_ids=theids)
1115 else:
1115 else:
1116 ar = AsyncResult(self, msg_ids=theids)
1116 ar = AsyncResult(self, msg_ids=theids)
1117
1117
1118 if block:
1118 if block:
1119 ar.wait()
1119 ar.wait()
1120
1120
1121 return ar
1121 return ar
1122
1122
1123 @spin_first
1123 @spin_first
1124 def resubmit(self, indices_or_msg_ids=None, subheader=None, block=None):
1124 def resubmit(self, indices_or_msg_ids=None, subheader=None, block=None):
1125 """Resubmit one or more tasks.
1125 """Resubmit one or more tasks.
1126
1126
1127 in-flight tasks may not be resubmitted.
1127 in-flight tasks may not be resubmitted.
1128
1128
1129 Parameters
1129 Parameters
1130 ----------
1130 ----------
1131
1131
1132 indices_or_msg_ids : integer history index, str msg_id, or list of either
1132 indices_or_msg_ids : integer history index, str msg_id, or list of either
1133 The indices or msg_ids of indices to be retrieved
1133 The indices or msg_ids of indices to be retrieved
1134
1134
1135 block : bool
1135 block : bool
1136 Whether to wait for the result to be done
1136 Whether to wait for the result to be done
1137
1137
1138 Returns
1138 Returns
1139 -------
1139 -------
1140
1140
1141 AsyncHubResult
1141 AsyncHubResult
1142 A subclass of AsyncResult that retrieves results from the Hub
1142 A subclass of AsyncResult that retrieves results from the Hub
1143
1143
1144 """
1144 """
1145 block = self.block if block is None else block
1145 block = self.block if block is None else block
1146 if indices_or_msg_ids is None:
1146 if indices_or_msg_ids is None:
1147 indices_or_msg_ids = -1
1147 indices_or_msg_ids = -1
1148
1148
1149 if not isinstance(indices_or_msg_ids, (list,tuple)):
1149 if not isinstance(indices_or_msg_ids, (list,tuple)):
1150 indices_or_msg_ids = [indices_or_msg_ids]
1150 indices_or_msg_ids = [indices_or_msg_ids]
1151
1151
1152 theids = []
1152 theids = []
1153 for id in indices_or_msg_ids:
1153 for id in indices_or_msg_ids:
1154 if isinstance(id, int):
1154 if isinstance(id, int):
1155 id = self.history[id]
1155 id = self.history[id]
1156 if not isinstance(id, basestring):
1156 if not isinstance(id, basestring):
1157 raise TypeError("indices must be str or int, not %r"%id)
1157 raise TypeError("indices must be str or int, not %r"%id)
1158 theids.append(id)
1158 theids.append(id)
1159
1159
1160 for msg_id in theids:
1160 for msg_id in theids:
1161 self.outstanding.discard(msg_id)
1161 self.outstanding.discard(msg_id)
1162 if msg_id in self.history:
1162 if msg_id in self.history:
1163 self.history.remove(msg_id)
1163 self.history.remove(msg_id)
1164 self.results.pop(msg_id, None)
1164 self.results.pop(msg_id, None)
1165 self.metadata.pop(msg_id, None)
1165 self.metadata.pop(msg_id, None)
1166 content = dict(msg_ids = theids)
1166 content = dict(msg_ids = theids)
1167
1167
1168 self.session.send(self._query_socket, 'resubmit_request', content)
1168 self.session.send(self._query_socket, 'resubmit_request', content)
1169
1169
1170 zmq.select([self._query_socket], [], [])
1170 zmq.select([self._query_socket], [], [])
1171 idents,msg = self.session.recv(self._query_socket, zmq.NOBLOCK)
1171 idents,msg = self.session.recv(self._query_socket, zmq.NOBLOCK)
1172 if self.debug:
1172 if self.debug:
1173 pprint(msg)
1173 pprint(msg)
1174 content = msg['content']
1174 content = msg['content']
1175 if content['status'] != 'ok':
1175 if content['status'] != 'ok':
1176 raise self._unwrap_exception(content)
1176 raise self._unwrap_exception(content)
1177
1177
1178 ar = AsyncHubResult(self, msg_ids=theids)
1178 ar = AsyncHubResult(self, msg_ids=theids)
1179
1179
1180 if block:
1180 if block:
1181 ar.wait()
1181 ar.wait()
1182
1182
1183 return ar
1183 return ar
1184
1184
1185 @spin_first
1185 @spin_first
1186 def result_status(self, msg_ids, status_only=True):
1186 def result_status(self, msg_ids, status_only=True):
1187 """Check on the status of the result(s) of the apply request with `msg_ids`.
1187 """Check on the status of the result(s) of the apply request with `msg_ids`.
1188
1188
1189 If status_only is False, then the actual results will be retrieved, else
1189 If status_only is False, then the actual results will be retrieved, else
1190 only the status of the results will be checked.
1190 only the status of the results will be checked.
1191
1191
1192 Parameters
1192 Parameters
1193 ----------
1193 ----------
1194
1194
1195 msg_ids : list of msg_ids
1195 msg_ids : list of msg_ids
1196 if int:
1196 if int:
1197 Passed as index to self.history for convenience.
1197 Passed as index to self.history for convenience.
1198 status_only : bool (default: True)
1198 status_only : bool (default: True)
1199 if False:
1199 if False:
1200 Retrieve the actual results of completed tasks.
1200 Retrieve the actual results of completed tasks.
1201
1201
1202 Returns
1202 Returns
1203 -------
1203 -------
1204
1204
1205 results : dict
1205 results : dict
1206 There will always be the keys 'pending' and 'completed', which will
1206 There will always be the keys 'pending' and 'completed', which will
1207 be lists of msg_ids that are incomplete or complete. If `status_only`
1207 be lists of msg_ids that are incomplete or complete. If `status_only`
1208 is False, then completed results will be keyed by their `msg_id`.
1208 is False, then completed results will be keyed by their `msg_id`.
1209 """
1209 """
1210 if not isinstance(msg_ids, (list,tuple)):
1210 if not isinstance(msg_ids, (list,tuple)):
1211 msg_ids = [msg_ids]
1211 msg_ids = [msg_ids]
1212
1212
1213 theids = []
1213 theids = []
1214 for msg_id in msg_ids:
1214 for msg_id in msg_ids:
1215 if isinstance(msg_id, int):
1215 if isinstance(msg_id, int):
1216 msg_id = self.history[msg_id]
1216 msg_id = self.history[msg_id]
1217 if not isinstance(msg_id, basestring):
1217 if not isinstance(msg_id, basestring):
1218 raise TypeError("msg_ids must be str, not %r"%msg_id)
1218 raise TypeError("msg_ids must be str, not %r"%msg_id)
1219 theids.append(msg_id)
1219 theids.append(msg_id)
1220
1220
1221 completed = []
1221 completed = []
1222 local_results = {}
1222 local_results = {}
1223
1223
1224 # comment this block out to temporarily disable local shortcut:
1224 # comment this block out to temporarily disable local shortcut:
1225 for msg_id in theids:
1225 for msg_id in theids:
1226 if msg_id in self.results:
1226 if msg_id in self.results:
1227 completed.append(msg_id)
1227 completed.append(msg_id)
1228 local_results[msg_id] = self.results[msg_id]
1228 local_results[msg_id] = self.results[msg_id]
1229 theids.remove(msg_id)
1229 theids.remove(msg_id)
1230
1230
1231 if theids: # some not locally cached
1231 if theids: # some not locally cached
1232 content = dict(msg_ids=theids, status_only=status_only)
1232 content = dict(msg_ids=theids, status_only=status_only)
1233 msg = self.session.send(self._query_socket, "result_request", content=content)
1233 msg = self.session.send(self._query_socket, "result_request", content=content)
1234 zmq.select([self._query_socket], [], [])
1234 zmq.select([self._query_socket], [], [])
1235 idents,msg = self.session.recv(self._query_socket, zmq.NOBLOCK)
1235 idents,msg = self.session.recv(self._query_socket, zmq.NOBLOCK)
1236 if self.debug:
1236 if self.debug:
1237 pprint(msg)
1237 pprint(msg)
1238 content = msg['content']
1238 content = msg['content']
1239 if content['status'] != 'ok':
1239 if content['status'] != 'ok':
1240 raise self._unwrap_exception(content)
1240 raise self._unwrap_exception(content)
1241 buffers = msg['buffers']
1241 buffers = msg['buffers']
1242 else:
1242 else:
1243 content = dict(completed=[],pending=[])
1243 content = dict(completed=[],pending=[])
1244
1244
1245 content['completed'].extend(completed)
1245 content['completed'].extend(completed)
1246
1246
1247 if status_only:
1247 if status_only:
1248 return content
1248 return content
1249
1249
1250 failures = []
1250 failures = []
1251 # load cached results into result:
1251 # load cached results into result:
1252 content.update(local_results)
1252 content.update(local_results)
1253
1253
1254 # update cache with results:
1254 # update cache with results:
1255 for msg_id in sorted(theids):
1255 for msg_id in sorted(theids):
1256 if msg_id in content['completed']:
1256 if msg_id in content['completed']:
1257 rec = content[msg_id]
1257 rec = content[msg_id]
1258 parent = rec['header']
1258 parent = rec['header']
1259 header = rec['result_header']
1259 header = rec['result_header']
1260 rcontent = rec['result_content']
1260 rcontent = rec['result_content']
1261 iodict = rec['io']
1261 iodict = rec['io']
1262 if isinstance(rcontent, str):
1262 if isinstance(rcontent, str):
1263 rcontent = self.session.unpack(rcontent)
1263 rcontent = self.session.unpack(rcontent)
1264
1264
1265 md = self.metadata[msg_id]
1265 md = self.metadata[msg_id]
1266 md.update(self._extract_metadata(header, parent, rcontent))
1266 md.update(self._extract_metadata(header, parent, rcontent))
1267 md.update(iodict)
1267 md.update(iodict)
1268
1268
1269 if rcontent['status'] == 'ok':
1269 if rcontent['status'] == 'ok':
1270 res,buffers = util.unserialize_object(buffers)
1270 res,buffers = util.unserialize_object(buffers)
1271 else:
1271 else:
1272 print rcontent
1272 print rcontent
1273 res = self._unwrap_exception(rcontent)
1273 res = self._unwrap_exception(rcontent)
1274 failures.append(res)
1274 failures.append(res)
1275
1275
1276 self.results[msg_id] = res
1276 self.results[msg_id] = res
1277 content[msg_id] = res
1277 content[msg_id] = res
1278
1278
1279 if len(theids) == 1 and failures:
1279 if len(theids) == 1 and failures:
1280 raise failures[0]
1280 raise failures[0]
1281
1281
1282 error.collect_exceptions(failures, "result_status")
1282 error.collect_exceptions(failures, "result_status")
1283 return content
1283 return content
1284
1284
1285 @spin_first
1285 @spin_first
1286 def queue_status(self, targets='all', verbose=False):
1286 def queue_status(self, targets='all', verbose=False):
1287 """Fetch the status of engine queues.
1287 """Fetch the status of engine queues.
1288
1288
1289 Parameters
1289 Parameters
1290 ----------
1290 ----------
1291
1291
1292 targets : int/str/list of ints/strs
1292 targets : int/str/list of ints/strs
1293 the engines whose states are to be queried.
1293 the engines whose states are to be queried.
1294 default : all
1294 default : all
1295 verbose : bool
1295 verbose : bool
1296 Whether to return lengths only, or lists of ids for each element
1296 Whether to return lengths only, or lists of ids for each element
1297 """
1297 """
1298 engine_ids = self._build_targets(targets)[1]
1298 engine_ids = self._build_targets(targets)[1]
1299 content = dict(targets=engine_ids, verbose=verbose)
1299 content = dict(targets=engine_ids, verbose=verbose)
1300 self.session.send(self._query_socket, "queue_request", content=content)
1300 self.session.send(self._query_socket, "queue_request", content=content)
1301 idents,msg = self.session.recv(self._query_socket, 0)
1301 idents,msg = self.session.recv(self._query_socket, 0)
1302 if self.debug:
1302 if self.debug:
1303 pprint(msg)
1303 pprint(msg)
1304 content = msg['content']
1304 content = msg['content']
1305 status = content.pop('status')
1305 status = content.pop('status')
1306 if status != 'ok':
1306 if status != 'ok':
1307 raise self._unwrap_exception(content)
1307 raise self._unwrap_exception(content)
1308 content = rekey(content)
1308 content = rekey(content)
1309 if isinstance(targets, int):
1309 if isinstance(targets, int):
1310 return content[targets]
1310 return content[targets]
1311 else:
1311 else:
1312 return content
1312 return content
1313
1313
1314 @spin_first
1314 @spin_first
1315 def purge_results(self, jobs=[], targets=[]):
1315 def purge_results(self, jobs=[], targets=[]):
1316 """Tell the Hub to forget results.
1316 """Tell the Hub to forget results.
1317
1317
1318 Individual results can be purged by msg_id, or the entire
1318 Individual results can be purged by msg_id, or the entire
1319 history of specific targets can be purged.
1319 history of specific targets can be purged.
1320
1320
1321 Use `purge_results('all')` to scrub everything from the Hub's db.
1321 Use `purge_results('all')` to scrub everything from the Hub's db.
1322
1322
1323 Parameters
1323 Parameters
1324 ----------
1324 ----------
1325
1325
1326 jobs : str or list of str or AsyncResult objects
1326 jobs : str or list of str or AsyncResult objects
1327 the msg_ids whose results should be forgotten.
1327 the msg_ids whose results should be forgotten.
1328 targets : int/str/list of ints/strs
1328 targets : int/str/list of ints/strs
1329 The targets, by int_id, whose entire history is to be purged.
1329 The targets, by int_id, whose entire history is to be purged.
1330
1330
1331 default : None
1331 default : None
1332 """
1332 """
1333 if not targets and not jobs:
1333 if not targets and not jobs:
1334 raise ValueError("Must specify at least one of `targets` and `jobs`")
1334 raise ValueError("Must specify at least one of `targets` and `jobs`")
1335 if targets:
1335 if targets:
1336 targets = self._build_targets(targets)[1]
1336 targets = self._build_targets(targets)[1]
1337
1337
1338 # construct msg_ids from jobs
1338 # construct msg_ids from jobs
1339 if jobs == 'all':
1339 if jobs == 'all':
1340 msg_ids = jobs
1340 msg_ids = jobs
1341 else:
1341 else:
1342 msg_ids = []
1342 msg_ids = []
1343 if isinstance(jobs, (basestring,AsyncResult)):
1343 if isinstance(jobs, (basestring,AsyncResult)):
1344 jobs = [jobs]
1344 jobs = [jobs]
1345 bad_ids = filter(lambda obj: not isinstance(obj, (basestring, AsyncResult)), jobs)
1345 bad_ids = filter(lambda obj: not isinstance(obj, (basestring, AsyncResult)), jobs)
1346 if bad_ids:
1346 if bad_ids:
1347 raise TypeError("Invalid msg_id type %r, expected str or AsyncResult"%bad_ids[0])
1347 raise TypeError("Invalid msg_id type %r, expected str or AsyncResult"%bad_ids[0])
1348 for j in jobs:
1348 for j in jobs:
1349 if isinstance(j, AsyncResult):
1349 if isinstance(j, AsyncResult):
1350 msg_ids.extend(j.msg_ids)
1350 msg_ids.extend(j.msg_ids)
1351 else:
1351 else:
1352 msg_ids.append(j)
1352 msg_ids.append(j)
1353
1353
1354 content = dict(engine_ids=targets, msg_ids=msg_ids)
1354 content = dict(engine_ids=targets, msg_ids=msg_ids)
1355 self.session.send(self._query_socket, "purge_request", content=content)
1355 self.session.send(self._query_socket, "purge_request", content=content)
1356 idents, msg = self.session.recv(self._query_socket, 0)
1356 idents, msg = self.session.recv(self._query_socket, 0)
1357 if self.debug:
1357 if self.debug:
1358 pprint(msg)
1358 pprint(msg)
1359 content = msg['content']
1359 content = msg['content']
1360 if content['status'] != 'ok':
1360 if content['status'] != 'ok':
1361 raise self._unwrap_exception(content)
1361 raise self._unwrap_exception(content)
1362
1362
1363 @spin_first
1363 @spin_first
1364 def hub_history(self):
1364 def hub_history(self):
1365 """Get the Hub's history
1365 """Get the Hub's history
1366
1366
1367 Just like the Client, the Hub has a history, which is a list of msg_ids.
1367 Just like the Client, the Hub has a history, which is a list of msg_ids.
1368 This will contain the history of all clients, and, depending on configuration,
1368 This will contain the history of all clients, and, depending on configuration,
1369 may contain history across multiple cluster sessions.
1369 may contain history across multiple cluster sessions.
1370
1370
1371 Any msg_id returned here is a valid argument to `get_result`.
1371 Any msg_id returned here is a valid argument to `get_result`.
1372
1372
1373 Returns
1373 Returns
1374 -------
1374 -------
1375
1375
1376 msg_ids : list of strs
1376 msg_ids : list of strs
1377 list of all msg_ids, ordered by task submission time.
1377 list of all msg_ids, ordered by task submission time.
1378 """
1378 """
1379
1379
1380 self.session.send(self._query_socket, "history_request", content={})
1380 self.session.send(self._query_socket, "history_request", content={})
1381 idents, msg = self.session.recv(self._query_socket, 0)
1381 idents, msg = self.session.recv(self._query_socket, 0)
1382
1382
1383 if self.debug:
1383 if self.debug:
1384 pprint(msg)
1384 pprint(msg)
1385 content = msg['content']
1385 content = msg['content']
1386 if content['status'] != 'ok':
1386 if content['status'] != 'ok':
1387 raise self._unwrap_exception(content)
1387 raise self._unwrap_exception(content)
1388 else:
1388 else:
1389 return content['history']
1389 return content['history']
1390
1390
1391 @spin_first
1391 @spin_first
1392 def db_query(self, query, keys=None):
1392 def db_query(self, query, keys=None):
1393 """Query the Hub's TaskRecord database
1393 """Query the Hub's TaskRecord database
1394
1394
1395 This will return a list of task record dicts that match `query`
1395 This will return a list of task record dicts that match `query`
1396
1396
1397 Parameters
1397 Parameters
1398 ----------
1398 ----------
1399
1399
1400 query : mongodb query dict
1400 query : mongodb query dict
1401 The search dict. See mongodb query docs for details.
1401 The search dict. See mongodb query docs for details.
1402 keys : list of strs [optional]
1402 keys : list of strs [optional]
1403 The subset of keys to be returned. The default is to fetch everything but buffers.
1403 The subset of keys to be returned. The default is to fetch everything but buffers.
1404 'msg_id' will *always* be included.
1404 'msg_id' will *always* be included.
1405 """
1405 """
1406 if isinstance(keys, basestring):
1406 if isinstance(keys, basestring):
1407 keys = [keys]
1407 keys = [keys]
1408 content = dict(query=query, keys=keys)
1408 content = dict(query=query, keys=keys)
1409 self.session.send(self._query_socket, "db_request", content=content)
1409 self.session.send(self._query_socket, "db_request", content=content)
1410 idents, msg = self.session.recv(self._query_socket, 0)
1410 idents, msg = self.session.recv(self._query_socket, 0)
1411 if self.debug:
1411 if self.debug:
1412 pprint(msg)
1412 pprint(msg)
1413 content = msg['content']
1413 content = msg['content']
1414 if content['status'] != 'ok':
1414 if content['status'] != 'ok':
1415 raise self._unwrap_exception(content)
1415 raise self._unwrap_exception(content)
1416
1416
1417 records = content['records']
1417 records = content['records']
1418
1418
1419 buffer_lens = content['buffer_lens']
1419 buffer_lens = content['buffer_lens']
1420 result_buffer_lens = content['result_buffer_lens']
1420 result_buffer_lens = content['result_buffer_lens']
1421 buffers = msg['buffers']
1421 buffers = msg['buffers']
1422 has_bufs = buffer_lens is not None
1422 has_bufs = buffer_lens is not None
1423 has_rbufs = result_buffer_lens is not None
1423 has_rbufs = result_buffer_lens is not None
1424 for i,rec in enumerate(records):
1424 for i,rec in enumerate(records):
1425 # relink buffers
1425 # relink buffers
1426 if has_bufs:
1426 if has_bufs:
1427 blen = buffer_lens[i]
1427 blen = buffer_lens[i]
1428 rec['buffers'], buffers = buffers[:blen],buffers[blen:]
1428 rec['buffers'], buffers = buffers[:blen],buffers[blen:]
1429 if has_rbufs:
1429 if has_rbufs:
1430 blen = result_buffer_lens[i]
1430 blen = result_buffer_lens[i]
1431 rec['result_buffers'], buffers = buffers[:blen],buffers[blen:]
1431 rec['result_buffers'], buffers = buffers[:blen],buffers[blen:]
1432
1432
1433 return records
1433 return records
1434
1434
1435 __all__ = [ 'Client' ]
1435 __all__ = [ 'Client' ]
@@ -1,173 +1,173 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 """
2 """
3 A multi-heart Heartbeat system using PUB and XREP sockets. pings are sent out on the PUB,
3 A multi-heart Heartbeat system using PUB and XREP sockets. pings are sent out on the PUB,
4 and hearts are tracked based on their XREQ identities.
4 and hearts are tracked based on their XREQ identities.
5
5
6 Authors:
6 Authors:
7
7
8 * Min RK
8 * Min RK
9 """
9 """
10 #-----------------------------------------------------------------------------
10 #-----------------------------------------------------------------------------
11 # Copyright (C) 2010-2011 The IPython Development Team
11 # Copyright (C) 2010-2011 The IPython Development Team
12 #
12 #
13 # Distributed under the terms of the BSD License. The full license is in
13 # Distributed under the terms of the BSD License. The full license is in
14 # the file COPYING, distributed as part of this software.
14 # the file COPYING, distributed as part of this software.
15 #-----------------------------------------------------------------------------
15 #-----------------------------------------------------------------------------
16
16
17 from __future__ import print_function
17 from __future__ import print_function
18 import time
18 import time
19 import uuid
19 import uuid
20
20
21 import zmq
21 import zmq
22 from zmq.devices import ThreadDevice
22 from zmq.devices import ThreadDevice
23 from zmq.eventloop import ioloop, zmqstream
23 from zmq.eventloop import ioloop, zmqstream
24
24
25 from IPython.config.configurable import LoggingConfigurable
25 from IPython.config.configurable import LoggingConfigurable
26 from IPython.utils.traitlets import Set, Instance, CFloat
26 from IPython.utils.traitlets import Set, Instance, CFloat
27
27
28 from IPython.parallel.util import asbytes
28 from IPython.parallel.util import asbytes
29
29
30 class Heart(object):
30 class Heart(object):
31 """A basic heart object for responding to a HeartMonitor.
31 """A basic heart object for responding to a HeartMonitor.
32 This is a simple wrapper with defaults for the most common
32 This is a simple wrapper with defaults for the most common
33 Device model for responding to heartbeats.
33 Device model for responding to heartbeats.
34
34
35 It simply builds a threadsafe zmq.FORWARDER Device, defaulting to using
35 It simply builds a threadsafe zmq.FORWARDER Device, defaulting to using
36 SUB/XREQ for in/out.
36 SUB/XREQ for in/out.
37
37
38 You can specify the XREQ's IDENTITY via the optional heart_id argument."""
38 You can specify the XREQ's IDENTITY via the optional heart_id argument."""
39 device=None
39 device=None
40 id=None
40 id=None
41 def __init__(self, in_addr, out_addr, in_type=zmq.SUB, out_type=zmq.XREQ, heart_id=None):
41 def __init__(self, in_addr, out_addr, in_type=zmq.SUB, out_type=zmq.DEALER, heart_id=None):
42 self.device = ThreadDevice(zmq.FORWARDER, in_type, out_type)
42 self.device = ThreadDevice(zmq.FORWARDER, in_type, out_type)
43 self.device.daemon=True
43 self.device.daemon=True
44 self.device.connect_in(in_addr)
44 self.device.connect_in(in_addr)
45 self.device.connect_out(out_addr)
45 self.device.connect_out(out_addr)
46 if in_type == zmq.SUB:
46 if in_type == zmq.SUB:
47 self.device.setsockopt_in(zmq.SUBSCRIBE, b"")
47 self.device.setsockopt_in(zmq.SUBSCRIBE, b"")
48 if heart_id is None:
48 if heart_id is None:
49 heart_id = uuid.uuid4().bytes
49 heart_id = uuid.uuid4().bytes
50 self.device.setsockopt_out(zmq.IDENTITY, heart_id)
50 self.device.setsockopt_out(zmq.IDENTITY, heart_id)
51 self.id = heart_id
51 self.id = heart_id
52
52
53 def start(self):
53 def start(self):
54 return self.device.start()
54 return self.device.start()
55
55
56 class HeartMonitor(LoggingConfigurable):
56 class HeartMonitor(LoggingConfigurable):
57 """A basic HeartMonitor class
57 """A basic HeartMonitor class
58 pingstream: a PUB stream
58 pingstream: a PUB stream
59 pongstream: an XREP stream
59 pongstream: an XREP stream
60 period: the period of the heartbeat in milliseconds"""
60 period: the period of the heartbeat in milliseconds"""
61
61
62 period=CFloat(1000, config=True,
62 period=CFloat(1000, config=True,
63 help='The frequency at which the Hub pings the engines for heartbeats '
63 help='The frequency at which the Hub pings the engines for heartbeats '
64 ' (in ms) [default: 100]',
64 ' (in ms) [default: 100]',
65 )
65 )
66
66
67 pingstream=Instance('zmq.eventloop.zmqstream.ZMQStream')
67 pingstream=Instance('zmq.eventloop.zmqstream.ZMQStream')
68 pongstream=Instance('zmq.eventloop.zmqstream.ZMQStream')
68 pongstream=Instance('zmq.eventloop.zmqstream.ZMQStream')
69 loop = Instance('zmq.eventloop.ioloop.IOLoop')
69 loop = Instance('zmq.eventloop.ioloop.IOLoop')
70 def _loop_default(self):
70 def _loop_default(self):
71 return ioloop.IOLoop.instance()
71 return ioloop.IOLoop.instance()
72
72
73 # not settable:
73 # not settable:
74 hearts=Set()
74 hearts=Set()
75 responses=Set()
75 responses=Set()
76 on_probation=Set()
76 on_probation=Set()
77 last_ping=CFloat(0)
77 last_ping=CFloat(0)
78 _new_handlers = Set()
78 _new_handlers = Set()
79 _failure_handlers = Set()
79 _failure_handlers = Set()
80 lifetime = CFloat(0)
80 lifetime = CFloat(0)
81 tic = CFloat(0)
81 tic = CFloat(0)
82
82
83 def __init__(self, **kwargs):
83 def __init__(self, **kwargs):
84 super(HeartMonitor, self).__init__(**kwargs)
84 super(HeartMonitor, self).__init__(**kwargs)
85
85
86 self.pongstream.on_recv(self.handle_pong)
86 self.pongstream.on_recv(self.handle_pong)
87
87
88 def start(self):
88 def start(self):
89 self.caller = ioloop.PeriodicCallback(self.beat, self.period, self.loop)
89 self.caller = ioloop.PeriodicCallback(self.beat, self.period, self.loop)
90 self.caller.start()
90 self.caller.start()
91
91
92 def add_new_heart_handler(self, handler):
92 def add_new_heart_handler(self, handler):
93 """add a new handler for new hearts"""
93 """add a new handler for new hearts"""
94 self.log.debug("heartbeat::new_heart_handler: %s"%handler)
94 self.log.debug("heartbeat::new_heart_handler: %s"%handler)
95 self._new_handlers.add(handler)
95 self._new_handlers.add(handler)
96
96
97 def add_heart_failure_handler(self, handler):
97 def add_heart_failure_handler(self, handler):
98 """add a new handler for heart failure"""
98 """add a new handler for heart failure"""
99 self.log.debug("heartbeat::new heart failure handler: %s"%handler)
99 self.log.debug("heartbeat::new heart failure handler: %s"%handler)
100 self._failure_handlers.add(handler)
100 self._failure_handlers.add(handler)
101
101
102 def beat(self):
102 def beat(self):
103 self.pongstream.flush()
103 self.pongstream.flush()
104 self.last_ping = self.lifetime
104 self.last_ping = self.lifetime
105
105
106 toc = time.time()
106 toc = time.time()
107 self.lifetime += toc-self.tic
107 self.lifetime += toc-self.tic
108 self.tic = toc
108 self.tic = toc
109 # self.log.debug("heartbeat::%s"%self.lifetime)
109 # self.log.debug("heartbeat::%s"%self.lifetime)
110 goodhearts = self.hearts.intersection(self.responses)
110 goodhearts = self.hearts.intersection(self.responses)
111 missed_beats = self.hearts.difference(goodhearts)
111 missed_beats = self.hearts.difference(goodhearts)
112 heartfailures = self.on_probation.intersection(missed_beats)
112 heartfailures = self.on_probation.intersection(missed_beats)
113 newhearts = self.responses.difference(goodhearts)
113 newhearts = self.responses.difference(goodhearts)
114 map(self.handle_new_heart, newhearts)
114 map(self.handle_new_heart, newhearts)
115 map(self.handle_heart_failure, heartfailures)
115 map(self.handle_heart_failure, heartfailures)
116 self.on_probation = missed_beats.intersection(self.hearts)
116 self.on_probation = missed_beats.intersection(self.hearts)
117 self.responses = set()
117 self.responses = set()
118 # print self.on_probation, self.hearts
118 # print self.on_probation, self.hearts
119 # self.log.debug("heartbeat::beat %.3f, %i beating hearts"%(self.lifetime, len(self.hearts)))
119 # self.log.debug("heartbeat::beat %.3f, %i beating hearts"%(self.lifetime, len(self.hearts)))
120 self.pingstream.send(asbytes(str(self.lifetime)))
120 self.pingstream.send(asbytes(str(self.lifetime)))
121
121
122 def handle_new_heart(self, heart):
122 def handle_new_heart(self, heart):
123 if self._new_handlers:
123 if self._new_handlers:
124 for handler in self._new_handlers:
124 for handler in self._new_handlers:
125 handler(heart)
125 handler(heart)
126 else:
126 else:
127 self.log.info("heartbeat::yay, got new heart %s!"%heart)
127 self.log.info("heartbeat::yay, got new heart %s!"%heart)
128 self.hearts.add(heart)
128 self.hearts.add(heart)
129
129
130 def handle_heart_failure(self, heart):
130 def handle_heart_failure(self, heart):
131 if self._failure_handlers:
131 if self._failure_handlers:
132 for handler in self._failure_handlers:
132 for handler in self._failure_handlers:
133 try:
133 try:
134 handler(heart)
134 handler(heart)
135 except Exception as e:
135 except Exception as e:
136 self.log.error("heartbeat::Bad Handler! %s"%handler, exc_info=True)
136 self.log.error("heartbeat::Bad Handler! %s"%handler, exc_info=True)
137 pass
137 pass
138 else:
138 else:
139 self.log.info("heartbeat::Heart %s failed :("%heart)
139 self.log.info("heartbeat::Heart %s failed :("%heart)
140 self.hearts.remove(heart)
140 self.hearts.remove(heart)
141
141
142
142
143 def handle_pong(self, msg):
143 def handle_pong(self, msg):
144 "a heart just beat"
144 "a heart just beat"
145 current = asbytes(str(self.lifetime))
145 current = asbytes(str(self.lifetime))
146 last = asbytes(str(self.last_ping))
146 last = asbytes(str(self.last_ping))
147 if msg[1] == current:
147 if msg[1] == current:
148 delta = time.time()-self.tic
148 delta = time.time()-self.tic
149 # self.log.debug("heartbeat::heart %r took %.2f ms to respond"%(msg[0], 1000*delta))
149 # self.log.debug("heartbeat::heart %r took %.2f ms to respond"%(msg[0], 1000*delta))
150 self.responses.add(msg[0])
150 self.responses.add(msg[0])
151 elif msg[1] == last:
151 elif msg[1] == last:
152 delta = time.time()-self.tic + (self.lifetime-self.last_ping)
152 delta = time.time()-self.tic + (self.lifetime-self.last_ping)
153 self.log.warn("heartbeat::heart %r missed a beat, and took %.2f ms to respond"%(msg[0], 1000*delta))
153 self.log.warn("heartbeat::heart %r missed a beat, and took %.2f ms to respond"%(msg[0], 1000*delta))
154 self.responses.add(msg[0])
154 self.responses.add(msg[0])
155 else:
155 else:
156 self.log.warn("heartbeat::got bad heartbeat (possibly old?): %s (current=%.3f)"%
156 self.log.warn("heartbeat::got bad heartbeat (possibly old?): %s (current=%.3f)"%
157 (msg[1],self.lifetime))
157 (msg[1],self.lifetime))
158
158
159
159
160 if __name__ == '__main__':
160 if __name__ == '__main__':
161 loop = ioloop.IOLoop.instance()
161 loop = ioloop.IOLoop.instance()
162 context = zmq.Context()
162 context = zmq.Context()
163 pub = context.socket(zmq.PUB)
163 pub = context.socket(zmq.PUB)
164 pub.bind('tcp://127.0.0.1:5555')
164 pub.bind('tcp://127.0.0.1:5555')
165 xrep = context.socket(zmq.XREP)
165 xrep = context.socket(zmq.ROUTER)
166 xrep.bind('tcp://127.0.0.1:5556')
166 xrep.bind('tcp://127.0.0.1:5556')
167
167
168 outstream = zmqstream.ZMQStream(pub, loop)
168 outstream = zmqstream.ZMQStream(pub, loop)
169 instream = zmqstream.ZMQStream(xrep, loop)
169 instream = zmqstream.ZMQStream(xrep, loop)
170
170
171 hb = HeartMonitor(loop, outstream, instream)
171 hb = HeartMonitor(loop, outstream, instream)
172
172
173 loop.start()
173 loop.start()
@@ -1,1290 +1,1290 b''
1 """The IPython Controller Hub with 0MQ
1 """The IPython Controller Hub with 0MQ
2 This is the master object that handles connections from engines and clients,
2 This is the master object that handles connections from engines and clients,
3 and monitors traffic through the various queues.
3 and monitors traffic through the various queues.
4
4
5 Authors:
5 Authors:
6
6
7 * Min RK
7 * Min RK
8 """
8 """
9 #-----------------------------------------------------------------------------
9 #-----------------------------------------------------------------------------
10 # Copyright (C) 2010 The IPython Development Team
10 # Copyright (C) 2010 The IPython Development Team
11 #
11 #
12 # Distributed under the terms of the BSD License. The full license is in
12 # Distributed under the terms of the BSD License. The full license is in
13 # the file COPYING, distributed as part of this software.
13 # the file COPYING, distributed as part of this software.
14 #-----------------------------------------------------------------------------
14 #-----------------------------------------------------------------------------
15
15
16 #-----------------------------------------------------------------------------
16 #-----------------------------------------------------------------------------
17 # Imports
17 # Imports
18 #-----------------------------------------------------------------------------
18 #-----------------------------------------------------------------------------
19 from __future__ import print_function
19 from __future__ import print_function
20
20
21 import sys
21 import sys
22 import time
22 import time
23 from datetime import datetime
23 from datetime import datetime
24
24
25 import zmq
25 import zmq
26 from zmq.eventloop import ioloop
26 from zmq.eventloop import ioloop
27 from zmq.eventloop.zmqstream import ZMQStream
27 from zmq.eventloop.zmqstream import ZMQStream
28
28
29 # internal:
29 # internal:
30 from IPython.utils.importstring import import_item
30 from IPython.utils.importstring import import_item
31 from IPython.utils.traitlets import (
31 from IPython.utils.traitlets import (
32 HasTraits, Instance, Int, Unicode, Dict, Set, Tuple, CBytes, DottedObjectName
32 HasTraits, Instance, Int, Unicode, Dict, Set, Tuple, CBytes, DottedObjectName
33 )
33 )
34
34
35 from IPython.parallel import error, util
35 from IPython.parallel import error, util
36 from IPython.parallel.factory import RegistrationFactory
36 from IPython.parallel.factory import RegistrationFactory
37
37
38 from IPython.zmq.session import SessionFactory
38 from IPython.zmq.session import SessionFactory
39
39
40 from .heartmonitor import HeartMonitor
40 from .heartmonitor import HeartMonitor
41
41
42 #-----------------------------------------------------------------------------
42 #-----------------------------------------------------------------------------
43 # Code
43 # Code
44 #-----------------------------------------------------------------------------
44 #-----------------------------------------------------------------------------
45
45
46 def _passer(*args, **kwargs):
46 def _passer(*args, **kwargs):
47 return
47 return
48
48
49 def _printer(*args, **kwargs):
49 def _printer(*args, **kwargs):
50 print (args)
50 print (args)
51 print (kwargs)
51 print (kwargs)
52
52
53 def empty_record():
53 def empty_record():
54 """Return an empty dict with all record keys."""
54 """Return an empty dict with all record keys."""
55 return {
55 return {
56 'msg_id' : None,
56 'msg_id' : None,
57 'header' : None,
57 'header' : None,
58 'content': None,
58 'content': None,
59 'buffers': None,
59 'buffers': None,
60 'submitted': None,
60 'submitted': None,
61 'client_uuid' : None,
61 'client_uuid' : None,
62 'engine_uuid' : None,
62 'engine_uuid' : None,
63 'started': None,
63 'started': None,
64 'completed': None,
64 'completed': None,
65 'resubmitted': None,
65 'resubmitted': None,
66 'result_header' : None,
66 'result_header' : None,
67 'result_content' : None,
67 'result_content' : None,
68 'result_buffers' : None,
68 'result_buffers' : None,
69 'queue' : None,
69 'queue' : None,
70 'pyin' : None,
70 'pyin' : None,
71 'pyout': None,
71 'pyout': None,
72 'pyerr': None,
72 'pyerr': None,
73 'stdout': '',
73 'stdout': '',
74 'stderr': '',
74 'stderr': '',
75 }
75 }
76
76
77 def init_record(msg):
77 def init_record(msg):
78 """Initialize a TaskRecord based on a request."""
78 """Initialize a TaskRecord based on a request."""
79 header = msg['header']
79 header = msg['header']
80 return {
80 return {
81 'msg_id' : header['msg_id'],
81 'msg_id' : header['msg_id'],
82 'header' : header,
82 'header' : header,
83 'content': msg['content'],
83 'content': msg['content'],
84 'buffers': msg['buffers'],
84 'buffers': msg['buffers'],
85 'submitted': header['date'],
85 'submitted': header['date'],
86 'client_uuid' : None,
86 'client_uuid' : None,
87 'engine_uuid' : None,
87 'engine_uuid' : None,
88 'started': None,
88 'started': None,
89 'completed': None,
89 'completed': None,
90 'resubmitted': None,
90 'resubmitted': None,
91 'result_header' : None,
91 'result_header' : None,
92 'result_content' : None,
92 'result_content' : None,
93 'result_buffers' : None,
93 'result_buffers' : None,
94 'queue' : None,
94 'queue' : None,
95 'pyin' : None,
95 'pyin' : None,
96 'pyout': None,
96 'pyout': None,
97 'pyerr': None,
97 'pyerr': None,
98 'stdout': '',
98 'stdout': '',
99 'stderr': '',
99 'stderr': '',
100 }
100 }
101
101
102
102
103 class EngineConnector(HasTraits):
103 class EngineConnector(HasTraits):
104 """A simple object for accessing the various zmq connections of an object.
104 """A simple object for accessing the various zmq connections of an object.
105 Attributes are:
105 Attributes are:
106 id (int): engine ID
106 id (int): engine ID
107 uuid (str): uuid (unused?)
107 uuid (str): uuid (unused?)
108 queue (str): identity of queue's XREQ socket
108 queue (str): identity of queue's XREQ socket
109 registration (str): identity of registration XREQ socket
109 registration (str): identity of registration XREQ socket
110 heartbeat (str): identity of heartbeat XREQ socket
110 heartbeat (str): identity of heartbeat XREQ socket
111 """
111 """
112 id=Int(0)
112 id=Int(0)
113 queue=CBytes()
113 queue=CBytes()
114 control=CBytes()
114 control=CBytes()
115 registration=CBytes()
115 registration=CBytes()
116 heartbeat=CBytes()
116 heartbeat=CBytes()
117 pending=Set()
117 pending=Set()
118
118
119 class HubFactory(RegistrationFactory):
119 class HubFactory(RegistrationFactory):
120 """The Configurable for setting up a Hub."""
120 """The Configurable for setting up a Hub."""
121
121
122 # port-pairs for monitoredqueues:
122 # port-pairs for monitoredqueues:
123 hb = Tuple(Int,Int,config=True,
123 hb = Tuple(Int,Int,config=True,
124 help="""XREQ/SUB Port pair for Engine heartbeats""")
124 help="""XREQ/SUB Port pair for Engine heartbeats""")
125 def _hb_default(self):
125 def _hb_default(self):
126 return tuple(util.select_random_ports(2))
126 return tuple(util.select_random_ports(2))
127
127
128 mux = Tuple(Int,Int,config=True,
128 mux = Tuple(Int,Int,config=True,
129 help="""Engine/Client Port pair for MUX queue""")
129 help="""Engine/Client Port pair for MUX queue""")
130
130
131 def _mux_default(self):
131 def _mux_default(self):
132 return tuple(util.select_random_ports(2))
132 return tuple(util.select_random_ports(2))
133
133
134 task = Tuple(Int,Int,config=True,
134 task = Tuple(Int,Int,config=True,
135 help="""Engine/Client Port pair for Task queue""")
135 help="""Engine/Client Port pair for Task queue""")
136 def _task_default(self):
136 def _task_default(self):
137 return tuple(util.select_random_ports(2))
137 return tuple(util.select_random_ports(2))
138
138
139 control = Tuple(Int,Int,config=True,
139 control = Tuple(Int,Int,config=True,
140 help="""Engine/Client Port pair for Control queue""")
140 help="""Engine/Client Port pair for Control queue""")
141
141
142 def _control_default(self):
142 def _control_default(self):
143 return tuple(util.select_random_ports(2))
143 return tuple(util.select_random_ports(2))
144
144
145 iopub = Tuple(Int,Int,config=True,
145 iopub = Tuple(Int,Int,config=True,
146 help="""Engine/Client Port pair for IOPub relay""")
146 help="""Engine/Client Port pair for IOPub relay""")
147
147
148 def _iopub_default(self):
148 def _iopub_default(self):
149 return tuple(util.select_random_ports(2))
149 return tuple(util.select_random_ports(2))
150
150
151 # single ports:
151 # single ports:
152 mon_port = Int(config=True,
152 mon_port = Int(config=True,
153 help="""Monitor (SUB) port for queue traffic""")
153 help="""Monitor (SUB) port for queue traffic""")
154
154
155 def _mon_port_default(self):
155 def _mon_port_default(self):
156 return util.select_random_ports(1)[0]
156 return util.select_random_ports(1)[0]
157
157
158 notifier_port = Int(config=True,
158 notifier_port = Int(config=True,
159 help="""PUB port for sending engine status notifications""")
159 help="""PUB port for sending engine status notifications""")
160
160
161 def _notifier_port_default(self):
161 def _notifier_port_default(self):
162 return util.select_random_ports(1)[0]
162 return util.select_random_ports(1)[0]
163
163
164 engine_ip = Unicode('127.0.0.1', config=True,
164 engine_ip = Unicode('127.0.0.1', config=True,
165 help="IP on which to listen for engine connections. [default: loopback]")
165 help="IP on which to listen for engine connections. [default: loopback]")
166 engine_transport = Unicode('tcp', config=True,
166 engine_transport = Unicode('tcp', config=True,
167 help="0MQ transport for engine connections. [default: tcp]")
167 help="0MQ transport for engine connections. [default: tcp]")
168
168
169 client_ip = Unicode('127.0.0.1', config=True,
169 client_ip = Unicode('127.0.0.1', config=True,
170 help="IP on which to listen for client connections. [default: loopback]")
170 help="IP on which to listen for client connections. [default: loopback]")
171 client_transport = Unicode('tcp', config=True,
171 client_transport = Unicode('tcp', config=True,
172 help="0MQ transport for client connections. [default : tcp]")
172 help="0MQ transport for client connections. [default : tcp]")
173
173
174 monitor_ip = Unicode('127.0.0.1', config=True,
174 monitor_ip = Unicode('127.0.0.1', config=True,
175 help="IP on which to listen for monitor messages. [default: loopback]")
175 help="IP on which to listen for monitor messages. [default: loopback]")
176 monitor_transport = Unicode('tcp', config=True,
176 monitor_transport = Unicode('tcp', config=True,
177 help="0MQ transport for monitor messages. [default : tcp]")
177 help="0MQ transport for monitor messages. [default : tcp]")
178
178
179 monitor_url = Unicode('')
179 monitor_url = Unicode('')
180
180
181 db_class = DottedObjectName('IPython.parallel.controller.dictdb.DictDB',
181 db_class = DottedObjectName('IPython.parallel.controller.dictdb.DictDB',
182 config=True, help="""The class to use for the DB backend""")
182 config=True, help="""The class to use for the DB backend""")
183
183
184 # not configurable
184 # not configurable
185 db = Instance('IPython.parallel.controller.dictdb.BaseDB')
185 db = Instance('IPython.parallel.controller.dictdb.BaseDB')
186 heartmonitor = Instance('IPython.parallel.controller.heartmonitor.HeartMonitor')
186 heartmonitor = Instance('IPython.parallel.controller.heartmonitor.HeartMonitor')
187
187
188 def _ip_changed(self, name, old, new):
188 def _ip_changed(self, name, old, new):
189 self.engine_ip = new
189 self.engine_ip = new
190 self.client_ip = new
190 self.client_ip = new
191 self.monitor_ip = new
191 self.monitor_ip = new
192 self._update_monitor_url()
192 self._update_monitor_url()
193
193
194 def _update_monitor_url(self):
194 def _update_monitor_url(self):
195 self.monitor_url = "%s://%s:%i"%(self.monitor_transport, self.monitor_ip, self.mon_port)
195 self.monitor_url = "%s://%s:%i"%(self.monitor_transport, self.monitor_ip, self.mon_port)
196
196
197 def _transport_changed(self, name, old, new):
197 def _transport_changed(self, name, old, new):
198 self.engine_transport = new
198 self.engine_transport = new
199 self.client_transport = new
199 self.client_transport = new
200 self.monitor_transport = new
200 self.monitor_transport = new
201 self._update_monitor_url()
201 self._update_monitor_url()
202
202
203 def __init__(self, **kwargs):
203 def __init__(self, **kwargs):
204 super(HubFactory, self).__init__(**kwargs)
204 super(HubFactory, self).__init__(**kwargs)
205 self._update_monitor_url()
205 self._update_monitor_url()
206
206
207
207
208 def construct(self):
208 def construct(self):
209 self.init_hub()
209 self.init_hub()
210
210
211 def start(self):
211 def start(self):
212 self.heartmonitor.start()
212 self.heartmonitor.start()
213 self.log.info("Heartmonitor started")
213 self.log.info("Heartmonitor started")
214
214
215 def init_hub(self):
215 def init_hub(self):
216 """construct"""
216 """construct"""
217 client_iface = "%s://%s:"%(self.client_transport, self.client_ip) + "%i"
217 client_iface = "%s://%s:"%(self.client_transport, self.client_ip) + "%i"
218 engine_iface = "%s://%s:"%(self.engine_transport, self.engine_ip) + "%i"
218 engine_iface = "%s://%s:"%(self.engine_transport, self.engine_ip) + "%i"
219
219
220 ctx = self.context
220 ctx = self.context
221 loop = self.loop
221 loop = self.loop
222
222
223 # Registrar socket
223 # Registrar socket
224 q = ZMQStream(ctx.socket(zmq.XREP), loop)
224 q = ZMQStream(ctx.socket(zmq.ROUTER), loop)
225 q.bind(client_iface % self.regport)
225 q.bind(client_iface % self.regport)
226 self.log.info("Hub listening on %s for registration."%(client_iface%self.regport))
226 self.log.info("Hub listening on %s for registration."%(client_iface%self.regport))
227 if self.client_ip != self.engine_ip:
227 if self.client_ip != self.engine_ip:
228 q.bind(engine_iface % self.regport)
228 q.bind(engine_iface % self.regport)
229 self.log.info("Hub listening on %s for registration."%(engine_iface%self.regport))
229 self.log.info("Hub listening on %s for registration."%(engine_iface%self.regport))
230
230
231 ### Engine connections ###
231 ### Engine connections ###
232
232
233 # heartbeat
233 # heartbeat
234 hpub = ctx.socket(zmq.PUB)
234 hpub = ctx.socket(zmq.PUB)
235 hpub.bind(engine_iface % self.hb[0])
235 hpub.bind(engine_iface % self.hb[0])
236 hrep = ctx.socket(zmq.XREP)
236 hrep = ctx.socket(zmq.ROUTER)
237 hrep.bind(engine_iface % self.hb[1])
237 hrep.bind(engine_iface % self.hb[1])
238 self.heartmonitor = HeartMonitor(loop=loop, config=self.config, log=self.log,
238 self.heartmonitor = HeartMonitor(loop=loop, config=self.config, log=self.log,
239 pingstream=ZMQStream(hpub,loop),
239 pingstream=ZMQStream(hpub,loop),
240 pongstream=ZMQStream(hrep,loop)
240 pongstream=ZMQStream(hrep,loop)
241 )
241 )
242
242
243 ### Client connections ###
243 ### Client connections ###
244 # Notifier socket
244 # Notifier socket
245 n = ZMQStream(ctx.socket(zmq.PUB), loop)
245 n = ZMQStream(ctx.socket(zmq.PUB), loop)
246 n.bind(client_iface%self.notifier_port)
246 n.bind(client_iface%self.notifier_port)
247
247
248 ### build and launch the queues ###
248 ### build and launch the queues ###
249
249
250 # monitor socket
250 # monitor socket
251 sub = ctx.socket(zmq.SUB)
251 sub = ctx.socket(zmq.SUB)
252 sub.setsockopt(zmq.SUBSCRIBE, b"")
252 sub.setsockopt(zmq.SUBSCRIBE, b"")
253 sub.bind(self.monitor_url)
253 sub.bind(self.monitor_url)
254 sub.bind('inproc://monitor')
254 sub.bind('inproc://monitor')
255 sub = ZMQStream(sub, loop)
255 sub = ZMQStream(sub, loop)
256
256
257 # connect the db
257 # connect the db
258 self.log.info('Hub using DB backend: %r'%(self.db_class.split()[-1]))
258 self.log.info('Hub using DB backend: %r'%(self.db_class.split()[-1]))
259 # cdir = self.config.Global.cluster_dir
259 # cdir = self.config.Global.cluster_dir
260 self.db = import_item(str(self.db_class))(session=self.session.session,
260 self.db = import_item(str(self.db_class))(session=self.session.session,
261 config=self.config, log=self.log)
261 config=self.config, log=self.log)
262 time.sleep(.25)
262 time.sleep(.25)
263 try:
263 try:
264 scheme = self.config.TaskScheduler.scheme_name
264 scheme = self.config.TaskScheduler.scheme_name
265 except AttributeError:
265 except AttributeError:
266 from .scheduler import TaskScheduler
266 from .scheduler import TaskScheduler
267 scheme = TaskScheduler.scheme_name.get_default_value()
267 scheme = TaskScheduler.scheme_name.get_default_value()
268 # build connection dicts
268 # build connection dicts
269 self.engine_info = {
269 self.engine_info = {
270 'control' : engine_iface%self.control[1],
270 'control' : engine_iface%self.control[1],
271 'mux': engine_iface%self.mux[1],
271 'mux': engine_iface%self.mux[1],
272 'heartbeat': (engine_iface%self.hb[0], engine_iface%self.hb[1]),
272 'heartbeat': (engine_iface%self.hb[0], engine_iface%self.hb[1]),
273 'task' : engine_iface%self.task[1],
273 'task' : engine_iface%self.task[1],
274 'iopub' : engine_iface%self.iopub[1],
274 'iopub' : engine_iface%self.iopub[1],
275 # 'monitor' : engine_iface%self.mon_port,
275 # 'monitor' : engine_iface%self.mon_port,
276 }
276 }
277
277
278 self.client_info = {
278 self.client_info = {
279 'control' : client_iface%self.control[0],
279 'control' : client_iface%self.control[0],
280 'mux': client_iface%self.mux[0],
280 'mux': client_iface%self.mux[0],
281 'task' : (scheme, client_iface%self.task[0]),
281 'task' : (scheme, client_iface%self.task[0]),
282 'iopub' : client_iface%self.iopub[0],
282 'iopub' : client_iface%self.iopub[0],
283 'notification': client_iface%self.notifier_port
283 'notification': client_iface%self.notifier_port
284 }
284 }
285 self.log.debug("Hub engine addrs: %s"%self.engine_info)
285 self.log.debug("Hub engine addrs: %s"%self.engine_info)
286 self.log.debug("Hub client addrs: %s"%self.client_info)
286 self.log.debug("Hub client addrs: %s"%self.client_info)
287
287
288 # resubmit stream
288 # resubmit stream
289 r = ZMQStream(ctx.socket(zmq.XREQ), loop)
289 r = ZMQStream(ctx.socket(zmq.DEALER), loop)
290 url = util.disambiguate_url(self.client_info['task'][-1])
290 url = util.disambiguate_url(self.client_info['task'][-1])
291 r.setsockopt(zmq.IDENTITY, util.asbytes(self.session.session))
291 r.setsockopt(zmq.IDENTITY, util.asbytes(self.session.session))
292 r.connect(url)
292 r.connect(url)
293
293
294 self.hub = Hub(loop=loop, session=self.session, monitor=sub, heartmonitor=self.heartmonitor,
294 self.hub = Hub(loop=loop, session=self.session, monitor=sub, heartmonitor=self.heartmonitor,
295 query=q, notifier=n, resubmit=r, db=self.db,
295 query=q, notifier=n, resubmit=r, db=self.db,
296 engine_info=self.engine_info, client_info=self.client_info,
296 engine_info=self.engine_info, client_info=self.client_info,
297 log=self.log)
297 log=self.log)
298
298
299
299
300 class Hub(SessionFactory):
300 class Hub(SessionFactory):
301 """The IPython Controller Hub with 0MQ connections
301 """The IPython Controller Hub with 0MQ connections
302
302
303 Parameters
303 Parameters
304 ==========
304 ==========
305 loop: zmq IOLoop instance
305 loop: zmq IOLoop instance
306 session: Session object
306 session: Session object
307 <removed> context: zmq context for creating new connections (?)
307 <removed> context: zmq context for creating new connections (?)
308 queue: ZMQStream for monitoring the command queue (SUB)
308 queue: ZMQStream for monitoring the command queue (SUB)
309 query: ZMQStream for engine registration and client queries requests (XREP)
309 query: ZMQStream for engine registration and client queries requests (XREP)
310 heartbeat: HeartMonitor object checking the pulse of the engines
310 heartbeat: HeartMonitor object checking the pulse of the engines
311 notifier: ZMQStream for broadcasting engine registration changes (PUB)
311 notifier: ZMQStream for broadcasting engine registration changes (PUB)
312 db: connection to db for out of memory logging of commands
312 db: connection to db for out of memory logging of commands
313 NotImplemented
313 NotImplemented
314 engine_info: dict of zmq connection information for engines to connect
314 engine_info: dict of zmq connection information for engines to connect
315 to the queues.
315 to the queues.
316 client_info: dict of zmq connection information for engines to connect
316 client_info: dict of zmq connection information for engines to connect
317 to the queues.
317 to the queues.
318 """
318 """
319 # internal data structures:
319 # internal data structures:
320 ids=Set() # engine IDs
320 ids=Set() # engine IDs
321 keytable=Dict()
321 keytable=Dict()
322 by_ident=Dict()
322 by_ident=Dict()
323 engines=Dict()
323 engines=Dict()
324 clients=Dict()
324 clients=Dict()
325 hearts=Dict()
325 hearts=Dict()
326 pending=Set()
326 pending=Set()
327 queues=Dict() # pending msg_ids keyed by engine_id
327 queues=Dict() # pending msg_ids keyed by engine_id
328 tasks=Dict() # pending msg_ids submitted as tasks, keyed by client_id
328 tasks=Dict() # pending msg_ids submitted as tasks, keyed by client_id
329 completed=Dict() # completed msg_ids keyed by engine_id
329 completed=Dict() # completed msg_ids keyed by engine_id
330 all_completed=Set() # completed msg_ids keyed by engine_id
330 all_completed=Set() # completed msg_ids keyed by engine_id
331 dead_engines=Set() # completed msg_ids keyed by engine_id
331 dead_engines=Set() # completed msg_ids keyed by engine_id
332 unassigned=Set() # set of task msg_ds not yet assigned a destination
332 unassigned=Set() # set of task msg_ds not yet assigned a destination
333 incoming_registrations=Dict()
333 incoming_registrations=Dict()
334 registration_timeout=Int()
334 registration_timeout=Int()
335 _idcounter=Int(0)
335 _idcounter=Int(0)
336
336
337 # objects from constructor:
337 # objects from constructor:
338 query=Instance(ZMQStream)
338 query=Instance(ZMQStream)
339 monitor=Instance(ZMQStream)
339 monitor=Instance(ZMQStream)
340 notifier=Instance(ZMQStream)
340 notifier=Instance(ZMQStream)
341 resubmit=Instance(ZMQStream)
341 resubmit=Instance(ZMQStream)
342 heartmonitor=Instance(HeartMonitor)
342 heartmonitor=Instance(HeartMonitor)
343 db=Instance(object)
343 db=Instance(object)
344 client_info=Dict()
344 client_info=Dict()
345 engine_info=Dict()
345 engine_info=Dict()
346
346
347
347
348 def __init__(self, **kwargs):
348 def __init__(self, **kwargs):
349 """
349 """
350 # universal:
350 # universal:
351 loop: IOLoop for creating future connections
351 loop: IOLoop for creating future connections
352 session: streamsession for sending serialized data
352 session: streamsession for sending serialized data
353 # engine:
353 # engine:
354 queue: ZMQStream for monitoring queue messages
354 queue: ZMQStream for monitoring queue messages
355 query: ZMQStream for engine+client registration and client requests
355 query: ZMQStream for engine+client registration and client requests
356 heartbeat: HeartMonitor object for tracking engines
356 heartbeat: HeartMonitor object for tracking engines
357 # extra:
357 # extra:
358 db: ZMQStream for db connection (NotImplemented)
358 db: ZMQStream for db connection (NotImplemented)
359 engine_info: zmq address/protocol dict for engine connections
359 engine_info: zmq address/protocol dict for engine connections
360 client_info: zmq address/protocol dict for client connections
360 client_info: zmq address/protocol dict for client connections
361 """
361 """
362
362
363 super(Hub, self).__init__(**kwargs)
363 super(Hub, self).__init__(**kwargs)
364 self.registration_timeout = max(5000, 2*self.heartmonitor.period)
364 self.registration_timeout = max(5000, 2*self.heartmonitor.period)
365
365
366 # validate connection dicts:
366 # validate connection dicts:
367 for k,v in self.client_info.iteritems():
367 for k,v in self.client_info.iteritems():
368 if k == 'task':
368 if k == 'task':
369 util.validate_url_container(v[1])
369 util.validate_url_container(v[1])
370 else:
370 else:
371 util.validate_url_container(v)
371 util.validate_url_container(v)
372 # util.validate_url_container(self.client_info)
372 # util.validate_url_container(self.client_info)
373 util.validate_url_container(self.engine_info)
373 util.validate_url_container(self.engine_info)
374
374
375 # register our callbacks
375 # register our callbacks
376 self.query.on_recv(self.dispatch_query)
376 self.query.on_recv(self.dispatch_query)
377 self.monitor.on_recv(self.dispatch_monitor_traffic)
377 self.monitor.on_recv(self.dispatch_monitor_traffic)
378
378
379 self.heartmonitor.add_heart_failure_handler(self.handle_heart_failure)
379 self.heartmonitor.add_heart_failure_handler(self.handle_heart_failure)
380 self.heartmonitor.add_new_heart_handler(self.handle_new_heart)
380 self.heartmonitor.add_new_heart_handler(self.handle_new_heart)
381
381
382 self.monitor_handlers = {b'in' : self.save_queue_request,
382 self.monitor_handlers = {b'in' : self.save_queue_request,
383 b'out': self.save_queue_result,
383 b'out': self.save_queue_result,
384 b'intask': self.save_task_request,
384 b'intask': self.save_task_request,
385 b'outtask': self.save_task_result,
385 b'outtask': self.save_task_result,
386 b'tracktask': self.save_task_destination,
386 b'tracktask': self.save_task_destination,
387 b'incontrol': _passer,
387 b'incontrol': _passer,
388 b'outcontrol': _passer,
388 b'outcontrol': _passer,
389 b'iopub': self.save_iopub_message,
389 b'iopub': self.save_iopub_message,
390 }
390 }
391
391
392 self.query_handlers = {'queue_request': self.queue_status,
392 self.query_handlers = {'queue_request': self.queue_status,
393 'result_request': self.get_results,
393 'result_request': self.get_results,
394 'history_request': self.get_history,
394 'history_request': self.get_history,
395 'db_request': self.db_query,
395 'db_request': self.db_query,
396 'purge_request': self.purge_results,
396 'purge_request': self.purge_results,
397 'load_request': self.check_load,
397 'load_request': self.check_load,
398 'resubmit_request': self.resubmit_task,
398 'resubmit_request': self.resubmit_task,
399 'shutdown_request': self.shutdown_request,
399 'shutdown_request': self.shutdown_request,
400 'registration_request' : self.register_engine,
400 'registration_request' : self.register_engine,
401 'unregistration_request' : self.unregister_engine,
401 'unregistration_request' : self.unregister_engine,
402 'connection_request': self.connection_request,
402 'connection_request': self.connection_request,
403 }
403 }
404
404
405 # ignore resubmit replies
405 # ignore resubmit replies
406 self.resubmit.on_recv(lambda msg: None, copy=False)
406 self.resubmit.on_recv(lambda msg: None, copy=False)
407
407
408 self.log.info("hub::created hub")
408 self.log.info("hub::created hub")
409
409
410 @property
410 @property
411 def _next_id(self):
411 def _next_id(self):
412 """gemerate a new ID.
412 """gemerate a new ID.
413
413
414 No longer reuse old ids, just count from 0."""
414 No longer reuse old ids, just count from 0."""
415 newid = self._idcounter
415 newid = self._idcounter
416 self._idcounter += 1
416 self._idcounter += 1
417 return newid
417 return newid
418 # newid = 0
418 # newid = 0
419 # incoming = [id[0] for id in self.incoming_registrations.itervalues()]
419 # incoming = [id[0] for id in self.incoming_registrations.itervalues()]
420 # # print newid, self.ids, self.incoming_registrations
420 # # print newid, self.ids, self.incoming_registrations
421 # while newid in self.ids or newid in incoming:
421 # while newid in self.ids or newid in incoming:
422 # newid += 1
422 # newid += 1
423 # return newid
423 # return newid
424
424
425 #-----------------------------------------------------------------------------
425 #-----------------------------------------------------------------------------
426 # message validation
426 # message validation
427 #-----------------------------------------------------------------------------
427 #-----------------------------------------------------------------------------
428
428
429 def _validate_targets(self, targets):
429 def _validate_targets(self, targets):
430 """turn any valid targets argument into a list of integer ids"""
430 """turn any valid targets argument into a list of integer ids"""
431 if targets is None:
431 if targets is None:
432 # default to all
432 # default to all
433 targets = self.ids
433 targets = self.ids
434
434
435 if isinstance(targets, (int,str,unicode)):
435 if isinstance(targets, (int,str,unicode)):
436 # only one target specified
436 # only one target specified
437 targets = [targets]
437 targets = [targets]
438 _targets = []
438 _targets = []
439 for t in targets:
439 for t in targets:
440 # map raw identities to ids
440 # map raw identities to ids
441 if isinstance(t, (str,unicode)):
441 if isinstance(t, (str,unicode)):
442 t = self.by_ident.get(t, t)
442 t = self.by_ident.get(t, t)
443 _targets.append(t)
443 _targets.append(t)
444 targets = _targets
444 targets = _targets
445 bad_targets = [ t for t in targets if t not in self.ids ]
445 bad_targets = [ t for t in targets if t not in self.ids ]
446 if bad_targets:
446 if bad_targets:
447 raise IndexError("No Such Engine: %r"%bad_targets)
447 raise IndexError("No Such Engine: %r"%bad_targets)
448 if not targets:
448 if not targets:
449 raise IndexError("No Engines Registered")
449 raise IndexError("No Engines Registered")
450 return targets
450 return targets
451
451
452 #-----------------------------------------------------------------------------
452 #-----------------------------------------------------------------------------
453 # dispatch methods (1 per stream)
453 # dispatch methods (1 per stream)
454 #-----------------------------------------------------------------------------
454 #-----------------------------------------------------------------------------
455
455
456
456
457 def dispatch_monitor_traffic(self, msg):
457 def dispatch_monitor_traffic(self, msg):
458 """all ME and Task queue messages come through here, as well as
458 """all ME and Task queue messages come through here, as well as
459 IOPub traffic."""
459 IOPub traffic."""
460 self.log.debug("monitor traffic: %r"%msg[:2])
460 self.log.debug("monitor traffic: %r"%msg[:2])
461 switch = msg[0]
461 switch = msg[0]
462 try:
462 try:
463 idents, msg = self.session.feed_identities(msg[1:])
463 idents, msg = self.session.feed_identities(msg[1:])
464 except ValueError:
464 except ValueError:
465 idents=[]
465 idents=[]
466 if not idents:
466 if not idents:
467 self.log.error("Bad Monitor Message: %r"%msg)
467 self.log.error("Bad Monitor Message: %r"%msg)
468 return
468 return
469 handler = self.monitor_handlers.get(switch, None)
469 handler = self.monitor_handlers.get(switch, None)
470 if handler is not None:
470 if handler is not None:
471 handler(idents, msg)
471 handler(idents, msg)
472 else:
472 else:
473 self.log.error("Invalid monitor topic: %r"%switch)
473 self.log.error("Invalid monitor topic: %r"%switch)
474
474
475
475
476 def dispatch_query(self, msg):
476 def dispatch_query(self, msg):
477 """Route registration requests and queries from clients."""
477 """Route registration requests and queries from clients."""
478 try:
478 try:
479 idents, msg = self.session.feed_identities(msg)
479 idents, msg = self.session.feed_identities(msg)
480 except ValueError:
480 except ValueError:
481 idents = []
481 idents = []
482 if not idents:
482 if not idents:
483 self.log.error("Bad Query Message: %r"%msg)
483 self.log.error("Bad Query Message: %r"%msg)
484 return
484 return
485 client_id = idents[0]
485 client_id = idents[0]
486 try:
486 try:
487 msg = self.session.unserialize(msg, content=True)
487 msg = self.session.unserialize(msg, content=True)
488 except Exception:
488 except Exception:
489 content = error.wrap_exception()
489 content = error.wrap_exception()
490 self.log.error("Bad Query Message: %r"%msg, exc_info=True)
490 self.log.error("Bad Query Message: %r"%msg, exc_info=True)
491 self.session.send(self.query, "hub_error", ident=client_id,
491 self.session.send(self.query, "hub_error", ident=client_id,
492 content=content)
492 content=content)
493 return
493 return
494 # print client_id, header, parent, content
494 # print client_id, header, parent, content
495 #switch on message type:
495 #switch on message type:
496 msg_type = msg['header']['msg_type']
496 msg_type = msg['header']['msg_type']
497 self.log.info("client::client %r requested %r"%(client_id, msg_type))
497 self.log.info("client::client %r requested %r"%(client_id, msg_type))
498 handler = self.query_handlers.get(msg_type, None)
498 handler = self.query_handlers.get(msg_type, None)
499 try:
499 try:
500 assert handler is not None, "Bad Message Type: %r"%msg_type
500 assert handler is not None, "Bad Message Type: %r"%msg_type
501 except:
501 except:
502 content = error.wrap_exception()
502 content = error.wrap_exception()
503 self.log.error("Bad Message Type: %r"%msg_type, exc_info=True)
503 self.log.error("Bad Message Type: %r"%msg_type, exc_info=True)
504 self.session.send(self.query, "hub_error", ident=client_id,
504 self.session.send(self.query, "hub_error", ident=client_id,
505 content=content)
505 content=content)
506 return
506 return
507
507
508 else:
508 else:
509 handler(idents, msg)
509 handler(idents, msg)
510
510
511 def dispatch_db(self, msg):
511 def dispatch_db(self, msg):
512 """"""
512 """"""
513 raise NotImplementedError
513 raise NotImplementedError
514
514
515 #---------------------------------------------------------------------------
515 #---------------------------------------------------------------------------
516 # handler methods (1 per event)
516 # handler methods (1 per event)
517 #---------------------------------------------------------------------------
517 #---------------------------------------------------------------------------
518
518
519 #----------------------- Heartbeat --------------------------------------
519 #----------------------- Heartbeat --------------------------------------
520
520
521 def handle_new_heart(self, heart):
521 def handle_new_heart(self, heart):
522 """handler to attach to heartbeater.
522 """handler to attach to heartbeater.
523 Called when a new heart starts to beat.
523 Called when a new heart starts to beat.
524 Triggers completion of registration."""
524 Triggers completion of registration."""
525 self.log.debug("heartbeat::handle_new_heart(%r)"%heart)
525 self.log.debug("heartbeat::handle_new_heart(%r)"%heart)
526 if heart not in self.incoming_registrations:
526 if heart not in self.incoming_registrations:
527 self.log.info("heartbeat::ignoring new heart: %r"%heart)
527 self.log.info("heartbeat::ignoring new heart: %r"%heart)
528 else:
528 else:
529 self.finish_registration(heart)
529 self.finish_registration(heart)
530
530
531
531
532 def handle_heart_failure(self, heart):
532 def handle_heart_failure(self, heart):
533 """handler to attach to heartbeater.
533 """handler to attach to heartbeater.
534 called when a previously registered heart fails to respond to beat request.
534 called when a previously registered heart fails to respond to beat request.
535 triggers unregistration"""
535 triggers unregistration"""
536 self.log.debug("heartbeat::handle_heart_failure(%r)"%heart)
536 self.log.debug("heartbeat::handle_heart_failure(%r)"%heart)
537 eid = self.hearts.get(heart, None)
537 eid = self.hearts.get(heart, None)
538 queue = self.engines[eid].queue
538 queue = self.engines[eid].queue
539 if eid is None:
539 if eid is None:
540 self.log.info("heartbeat::ignoring heart failure %r"%heart)
540 self.log.info("heartbeat::ignoring heart failure %r"%heart)
541 else:
541 else:
542 self.unregister_engine(heart, dict(content=dict(id=eid, queue=queue)))
542 self.unregister_engine(heart, dict(content=dict(id=eid, queue=queue)))
543
543
544 #----------------------- MUX Queue Traffic ------------------------------
544 #----------------------- MUX Queue Traffic ------------------------------
545
545
546 def save_queue_request(self, idents, msg):
546 def save_queue_request(self, idents, msg):
547 if len(idents) < 2:
547 if len(idents) < 2:
548 self.log.error("invalid identity prefix: %r"%idents)
548 self.log.error("invalid identity prefix: %r"%idents)
549 return
549 return
550 queue_id, client_id = idents[:2]
550 queue_id, client_id = idents[:2]
551 try:
551 try:
552 msg = self.session.unserialize(msg)
552 msg = self.session.unserialize(msg)
553 except Exception:
553 except Exception:
554 self.log.error("queue::client %r sent invalid message to %r: %r"%(client_id, queue_id, msg), exc_info=True)
554 self.log.error("queue::client %r sent invalid message to %r: %r"%(client_id, queue_id, msg), exc_info=True)
555 return
555 return
556
556
557 eid = self.by_ident.get(queue_id, None)
557 eid = self.by_ident.get(queue_id, None)
558 if eid is None:
558 if eid is None:
559 self.log.error("queue::target %r not registered"%queue_id)
559 self.log.error("queue::target %r not registered"%queue_id)
560 self.log.debug("queue:: valid are: %r"%(self.by_ident.keys()))
560 self.log.debug("queue:: valid are: %r"%(self.by_ident.keys()))
561 return
561 return
562 record = init_record(msg)
562 record = init_record(msg)
563 msg_id = record['msg_id']
563 msg_id = record['msg_id']
564 # Unicode in records
564 # Unicode in records
565 record['engine_uuid'] = queue_id.decode('ascii')
565 record['engine_uuid'] = queue_id.decode('ascii')
566 record['client_uuid'] = client_id.decode('ascii')
566 record['client_uuid'] = client_id.decode('ascii')
567 record['queue'] = 'mux'
567 record['queue'] = 'mux'
568
568
569 try:
569 try:
570 # it's posible iopub arrived first:
570 # it's posible iopub arrived first:
571 existing = self.db.get_record(msg_id)
571 existing = self.db.get_record(msg_id)
572 for key,evalue in existing.iteritems():
572 for key,evalue in existing.iteritems():
573 rvalue = record.get(key, None)
573 rvalue = record.get(key, None)
574 if evalue and rvalue and evalue != rvalue:
574 if evalue and rvalue and evalue != rvalue:
575 self.log.warn("conflicting initial state for record: %r:%r <%r> %r"%(msg_id, rvalue, key, evalue))
575 self.log.warn("conflicting initial state for record: %r:%r <%r> %r"%(msg_id, rvalue, key, evalue))
576 elif evalue and not rvalue:
576 elif evalue and not rvalue:
577 record[key] = evalue
577 record[key] = evalue
578 try:
578 try:
579 self.db.update_record(msg_id, record)
579 self.db.update_record(msg_id, record)
580 except Exception:
580 except Exception:
581 self.log.error("DB Error updating record %r"%msg_id, exc_info=True)
581 self.log.error("DB Error updating record %r"%msg_id, exc_info=True)
582 except KeyError:
582 except KeyError:
583 try:
583 try:
584 self.db.add_record(msg_id, record)
584 self.db.add_record(msg_id, record)
585 except Exception:
585 except Exception:
586 self.log.error("DB Error adding record %r"%msg_id, exc_info=True)
586 self.log.error("DB Error adding record %r"%msg_id, exc_info=True)
587
587
588
588
589 self.pending.add(msg_id)
589 self.pending.add(msg_id)
590 self.queues[eid].append(msg_id)
590 self.queues[eid].append(msg_id)
591
591
592 def save_queue_result(self, idents, msg):
592 def save_queue_result(self, idents, msg):
593 if len(idents) < 2:
593 if len(idents) < 2:
594 self.log.error("invalid identity prefix: %r"%idents)
594 self.log.error("invalid identity prefix: %r"%idents)
595 return
595 return
596
596
597 client_id, queue_id = idents[:2]
597 client_id, queue_id = idents[:2]
598 try:
598 try:
599 msg = self.session.unserialize(msg)
599 msg = self.session.unserialize(msg)
600 except Exception:
600 except Exception:
601 self.log.error("queue::engine %r sent invalid message to %r: %r"%(
601 self.log.error("queue::engine %r sent invalid message to %r: %r"%(
602 queue_id,client_id, msg), exc_info=True)
602 queue_id,client_id, msg), exc_info=True)
603 return
603 return
604
604
605 eid = self.by_ident.get(queue_id, None)
605 eid = self.by_ident.get(queue_id, None)
606 if eid is None:
606 if eid is None:
607 self.log.error("queue::unknown engine %r is sending a reply: "%queue_id)
607 self.log.error("queue::unknown engine %r is sending a reply: "%queue_id)
608 return
608 return
609
609
610 parent = msg['parent_header']
610 parent = msg['parent_header']
611 if not parent:
611 if not parent:
612 return
612 return
613 msg_id = parent['msg_id']
613 msg_id = parent['msg_id']
614 if msg_id in self.pending:
614 if msg_id in self.pending:
615 self.pending.remove(msg_id)
615 self.pending.remove(msg_id)
616 self.all_completed.add(msg_id)
616 self.all_completed.add(msg_id)
617 self.queues[eid].remove(msg_id)
617 self.queues[eid].remove(msg_id)
618 self.completed[eid].append(msg_id)
618 self.completed[eid].append(msg_id)
619 elif msg_id not in self.all_completed:
619 elif msg_id not in self.all_completed:
620 # it could be a result from a dead engine that died before delivering the
620 # it could be a result from a dead engine that died before delivering the
621 # result
621 # result
622 self.log.warn("queue:: unknown msg finished %r"%msg_id)
622 self.log.warn("queue:: unknown msg finished %r"%msg_id)
623 return
623 return
624 # update record anyway, because the unregistration could have been premature
624 # update record anyway, because the unregistration could have been premature
625 rheader = msg['header']
625 rheader = msg['header']
626 completed = rheader['date']
626 completed = rheader['date']
627 started = rheader.get('started', None)
627 started = rheader.get('started', None)
628 result = {
628 result = {
629 'result_header' : rheader,
629 'result_header' : rheader,
630 'result_content': msg['content'],
630 'result_content': msg['content'],
631 'started' : started,
631 'started' : started,
632 'completed' : completed
632 'completed' : completed
633 }
633 }
634
634
635 result['result_buffers'] = msg['buffers']
635 result['result_buffers'] = msg['buffers']
636 try:
636 try:
637 self.db.update_record(msg_id, result)
637 self.db.update_record(msg_id, result)
638 except Exception:
638 except Exception:
639 self.log.error("DB Error updating record %r"%msg_id, exc_info=True)
639 self.log.error("DB Error updating record %r"%msg_id, exc_info=True)
640
640
641
641
642 #--------------------- Task Queue Traffic ------------------------------
642 #--------------------- Task Queue Traffic ------------------------------
643
643
644 def save_task_request(self, idents, msg):
644 def save_task_request(self, idents, msg):
645 """Save the submission of a task."""
645 """Save the submission of a task."""
646 client_id = idents[0]
646 client_id = idents[0]
647
647
648 try:
648 try:
649 msg = self.session.unserialize(msg)
649 msg = self.session.unserialize(msg)
650 except Exception:
650 except Exception:
651 self.log.error("task::client %r sent invalid task message: %r"%(
651 self.log.error("task::client %r sent invalid task message: %r"%(
652 client_id, msg), exc_info=True)
652 client_id, msg), exc_info=True)
653 return
653 return
654 record = init_record(msg)
654 record = init_record(msg)
655
655
656 record['client_uuid'] = client_id
656 record['client_uuid'] = client_id
657 record['queue'] = 'task'
657 record['queue'] = 'task'
658 header = msg['header']
658 header = msg['header']
659 msg_id = header['msg_id']
659 msg_id = header['msg_id']
660 self.pending.add(msg_id)
660 self.pending.add(msg_id)
661 self.unassigned.add(msg_id)
661 self.unassigned.add(msg_id)
662 try:
662 try:
663 # it's posible iopub arrived first:
663 # it's posible iopub arrived first:
664 existing = self.db.get_record(msg_id)
664 existing = self.db.get_record(msg_id)
665 if existing['resubmitted']:
665 if existing['resubmitted']:
666 for key in ('submitted', 'client_uuid', 'buffers'):
666 for key in ('submitted', 'client_uuid', 'buffers'):
667 # don't clobber these keys on resubmit
667 # don't clobber these keys on resubmit
668 # submitted and client_uuid should be different
668 # submitted and client_uuid should be different
669 # and buffers might be big, and shouldn't have changed
669 # and buffers might be big, and shouldn't have changed
670 record.pop(key)
670 record.pop(key)
671 # still check content,header which should not change
671 # still check content,header which should not change
672 # but are not expensive to compare as buffers
672 # but are not expensive to compare as buffers
673
673
674 for key,evalue in existing.iteritems():
674 for key,evalue in existing.iteritems():
675 if key.endswith('buffers'):
675 if key.endswith('buffers'):
676 # don't compare buffers
676 # don't compare buffers
677 continue
677 continue
678 rvalue = record.get(key, None)
678 rvalue = record.get(key, None)
679 if evalue and rvalue and evalue != rvalue:
679 if evalue and rvalue and evalue != rvalue:
680 self.log.warn("conflicting initial state for record: %r:%r <%r> %r"%(msg_id, rvalue, key, evalue))
680 self.log.warn("conflicting initial state for record: %r:%r <%r> %r"%(msg_id, rvalue, key, evalue))
681 elif evalue and not rvalue:
681 elif evalue and not rvalue:
682 record[key] = evalue
682 record[key] = evalue
683 try:
683 try:
684 self.db.update_record(msg_id, record)
684 self.db.update_record(msg_id, record)
685 except Exception:
685 except Exception:
686 self.log.error("DB Error updating record %r"%msg_id, exc_info=True)
686 self.log.error("DB Error updating record %r"%msg_id, exc_info=True)
687 except KeyError:
687 except KeyError:
688 try:
688 try:
689 self.db.add_record(msg_id, record)
689 self.db.add_record(msg_id, record)
690 except Exception:
690 except Exception:
691 self.log.error("DB Error adding record %r"%msg_id, exc_info=True)
691 self.log.error("DB Error adding record %r"%msg_id, exc_info=True)
692 except Exception:
692 except Exception:
693 self.log.error("DB Error saving task request %r"%msg_id, exc_info=True)
693 self.log.error("DB Error saving task request %r"%msg_id, exc_info=True)
694
694
695 def save_task_result(self, idents, msg):
695 def save_task_result(self, idents, msg):
696 """save the result of a completed task."""
696 """save the result of a completed task."""
697 client_id = idents[0]
697 client_id = idents[0]
698 try:
698 try:
699 msg = self.session.unserialize(msg)
699 msg = self.session.unserialize(msg)
700 except Exception:
700 except Exception:
701 self.log.error("task::invalid task result message send to %r: %r"%(
701 self.log.error("task::invalid task result message send to %r: %r"%(
702 client_id, msg), exc_info=True)
702 client_id, msg), exc_info=True)
703 return
703 return
704
704
705 parent = msg['parent_header']
705 parent = msg['parent_header']
706 if not parent:
706 if not parent:
707 # print msg
707 # print msg
708 self.log.warn("Task %r had no parent!"%msg)
708 self.log.warn("Task %r had no parent!"%msg)
709 return
709 return
710 msg_id = parent['msg_id']
710 msg_id = parent['msg_id']
711 if msg_id in self.unassigned:
711 if msg_id in self.unassigned:
712 self.unassigned.remove(msg_id)
712 self.unassigned.remove(msg_id)
713
713
714 header = msg['header']
714 header = msg['header']
715 engine_uuid = header.get('engine', None)
715 engine_uuid = header.get('engine', None)
716 eid = self.by_ident.get(engine_uuid, None)
716 eid = self.by_ident.get(engine_uuid, None)
717
717
718 if msg_id in self.pending:
718 if msg_id in self.pending:
719 self.pending.remove(msg_id)
719 self.pending.remove(msg_id)
720 self.all_completed.add(msg_id)
720 self.all_completed.add(msg_id)
721 if eid is not None:
721 if eid is not None:
722 self.completed[eid].append(msg_id)
722 self.completed[eid].append(msg_id)
723 if msg_id in self.tasks[eid]:
723 if msg_id in self.tasks[eid]:
724 self.tasks[eid].remove(msg_id)
724 self.tasks[eid].remove(msg_id)
725 completed = header['date']
725 completed = header['date']
726 started = header.get('started', None)
726 started = header.get('started', None)
727 result = {
727 result = {
728 'result_header' : header,
728 'result_header' : header,
729 'result_content': msg['content'],
729 'result_content': msg['content'],
730 'started' : started,
730 'started' : started,
731 'completed' : completed,
731 'completed' : completed,
732 'engine_uuid': engine_uuid
732 'engine_uuid': engine_uuid
733 }
733 }
734
734
735 result['result_buffers'] = msg['buffers']
735 result['result_buffers'] = msg['buffers']
736 try:
736 try:
737 self.db.update_record(msg_id, result)
737 self.db.update_record(msg_id, result)
738 except Exception:
738 except Exception:
739 self.log.error("DB Error saving task request %r"%msg_id, exc_info=True)
739 self.log.error("DB Error saving task request %r"%msg_id, exc_info=True)
740
740
741 else:
741 else:
742 self.log.debug("task::unknown task %r finished"%msg_id)
742 self.log.debug("task::unknown task %r finished"%msg_id)
743
743
744 def save_task_destination(self, idents, msg):
744 def save_task_destination(self, idents, msg):
745 try:
745 try:
746 msg = self.session.unserialize(msg, content=True)
746 msg = self.session.unserialize(msg, content=True)
747 except Exception:
747 except Exception:
748 self.log.error("task::invalid task tracking message", exc_info=True)
748 self.log.error("task::invalid task tracking message", exc_info=True)
749 return
749 return
750 content = msg['content']
750 content = msg['content']
751 # print (content)
751 # print (content)
752 msg_id = content['msg_id']
752 msg_id = content['msg_id']
753 engine_uuid = content['engine_id']
753 engine_uuid = content['engine_id']
754 eid = self.by_ident[util.asbytes(engine_uuid)]
754 eid = self.by_ident[util.asbytes(engine_uuid)]
755
755
756 self.log.info("task::task %r arrived on %r"%(msg_id, eid))
756 self.log.info("task::task %r arrived on %r"%(msg_id, eid))
757 if msg_id in self.unassigned:
757 if msg_id in self.unassigned:
758 self.unassigned.remove(msg_id)
758 self.unassigned.remove(msg_id)
759 # else:
759 # else:
760 # self.log.debug("task::task %r not listed as MIA?!"%(msg_id))
760 # self.log.debug("task::task %r not listed as MIA?!"%(msg_id))
761
761
762 self.tasks[eid].append(msg_id)
762 self.tasks[eid].append(msg_id)
763 # self.pending[msg_id][1].update(received=datetime.now(),engine=(eid,engine_uuid))
763 # self.pending[msg_id][1].update(received=datetime.now(),engine=(eid,engine_uuid))
764 try:
764 try:
765 self.db.update_record(msg_id, dict(engine_uuid=engine_uuid))
765 self.db.update_record(msg_id, dict(engine_uuid=engine_uuid))
766 except Exception:
766 except Exception:
767 self.log.error("DB Error saving task destination %r"%msg_id, exc_info=True)
767 self.log.error("DB Error saving task destination %r"%msg_id, exc_info=True)
768
768
769
769
770 def mia_task_request(self, idents, msg):
770 def mia_task_request(self, idents, msg):
771 raise NotImplementedError
771 raise NotImplementedError
772 client_id = idents[0]
772 client_id = idents[0]
773 # content = dict(mia=self.mia,status='ok')
773 # content = dict(mia=self.mia,status='ok')
774 # self.session.send('mia_reply', content=content, idents=client_id)
774 # self.session.send('mia_reply', content=content, idents=client_id)
775
775
776
776
777 #--------------------- IOPub Traffic ------------------------------
777 #--------------------- IOPub Traffic ------------------------------
778
778
779 def save_iopub_message(self, topics, msg):
779 def save_iopub_message(self, topics, msg):
780 """save an iopub message into the db"""
780 """save an iopub message into the db"""
781 # print (topics)
781 # print (topics)
782 try:
782 try:
783 msg = self.session.unserialize(msg, content=True)
783 msg = self.session.unserialize(msg, content=True)
784 except Exception:
784 except Exception:
785 self.log.error("iopub::invalid IOPub message", exc_info=True)
785 self.log.error("iopub::invalid IOPub message", exc_info=True)
786 return
786 return
787
787
788 parent = msg['parent_header']
788 parent = msg['parent_header']
789 if not parent:
789 if not parent:
790 self.log.error("iopub::invalid IOPub message: %r"%msg)
790 self.log.error("iopub::invalid IOPub message: %r"%msg)
791 return
791 return
792 msg_id = parent['msg_id']
792 msg_id = parent['msg_id']
793 msg_type = msg['header']['msg_type']
793 msg_type = msg['header']['msg_type']
794 content = msg['content']
794 content = msg['content']
795
795
796 # ensure msg_id is in db
796 # ensure msg_id is in db
797 try:
797 try:
798 rec = self.db.get_record(msg_id)
798 rec = self.db.get_record(msg_id)
799 except KeyError:
799 except KeyError:
800 rec = empty_record()
800 rec = empty_record()
801 rec['msg_id'] = msg_id
801 rec['msg_id'] = msg_id
802 self.db.add_record(msg_id, rec)
802 self.db.add_record(msg_id, rec)
803 # stream
803 # stream
804 d = {}
804 d = {}
805 if msg_type == 'stream':
805 if msg_type == 'stream':
806 name = content['name']
806 name = content['name']
807 s = rec[name] or ''
807 s = rec[name] or ''
808 d[name] = s + content['data']
808 d[name] = s + content['data']
809
809
810 elif msg_type == 'pyerr':
810 elif msg_type == 'pyerr':
811 d['pyerr'] = content
811 d['pyerr'] = content
812 elif msg_type == 'pyin':
812 elif msg_type == 'pyin':
813 d['pyin'] = content['code']
813 d['pyin'] = content['code']
814 else:
814 else:
815 d[msg_type] = content.get('data', '')
815 d[msg_type] = content.get('data', '')
816
816
817 try:
817 try:
818 self.db.update_record(msg_id, d)
818 self.db.update_record(msg_id, d)
819 except Exception:
819 except Exception:
820 self.log.error("DB Error saving iopub message %r"%msg_id, exc_info=True)
820 self.log.error("DB Error saving iopub message %r"%msg_id, exc_info=True)
821
821
822
822
823
823
824 #-------------------------------------------------------------------------
824 #-------------------------------------------------------------------------
825 # Registration requests
825 # Registration requests
826 #-------------------------------------------------------------------------
826 #-------------------------------------------------------------------------
827
827
828 def connection_request(self, client_id, msg):
828 def connection_request(self, client_id, msg):
829 """Reply with connection addresses for clients."""
829 """Reply with connection addresses for clients."""
830 self.log.info("client::client %r connected"%client_id)
830 self.log.info("client::client %r connected"%client_id)
831 content = dict(status='ok')
831 content = dict(status='ok')
832 content.update(self.client_info)
832 content.update(self.client_info)
833 jsonable = {}
833 jsonable = {}
834 for k,v in self.keytable.iteritems():
834 for k,v in self.keytable.iteritems():
835 if v not in self.dead_engines:
835 if v not in self.dead_engines:
836 jsonable[str(k)] = v.decode('ascii')
836 jsonable[str(k)] = v.decode('ascii')
837 content['engines'] = jsonable
837 content['engines'] = jsonable
838 self.session.send(self.query, 'connection_reply', content, parent=msg, ident=client_id)
838 self.session.send(self.query, 'connection_reply', content, parent=msg, ident=client_id)
839
839
840 def register_engine(self, reg, msg):
840 def register_engine(self, reg, msg):
841 """Register a new engine."""
841 """Register a new engine."""
842 content = msg['content']
842 content = msg['content']
843 try:
843 try:
844 queue = util.asbytes(content['queue'])
844 queue = util.asbytes(content['queue'])
845 except KeyError:
845 except KeyError:
846 self.log.error("registration::queue not specified", exc_info=True)
846 self.log.error("registration::queue not specified", exc_info=True)
847 return
847 return
848 heart = content.get('heartbeat', None)
848 heart = content.get('heartbeat', None)
849 if heart:
849 if heart:
850 heart = util.asbytes(heart)
850 heart = util.asbytes(heart)
851 """register a new engine, and create the socket(s) necessary"""
851 """register a new engine, and create the socket(s) necessary"""
852 eid = self._next_id
852 eid = self._next_id
853 # print (eid, queue, reg, heart)
853 # print (eid, queue, reg, heart)
854
854
855 self.log.debug("registration::register_engine(%i, %r, %r, %r)"%(eid, queue, reg, heart))
855 self.log.debug("registration::register_engine(%i, %r, %r, %r)"%(eid, queue, reg, heart))
856
856
857 content = dict(id=eid,status='ok')
857 content = dict(id=eid,status='ok')
858 content.update(self.engine_info)
858 content.update(self.engine_info)
859 # check if requesting available IDs:
859 # check if requesting available IDs:
860 if queue in self.by_ident:
860 if queue in self.by_ident:
861 try:
861 try:
862 raise KeyError("queue_id %r in use"%queue)
862 raise KeyError("queue_id %r in use"%queue)
863 except:
863 except:
864 content = error.wrap_exception()
864 content = error.wrap_exception()
865 self.log.error("queue_id %r in use"%queue, exc_info=True)
865 self.log.error("queue_id %r in use"%queue, exc_info=True)
866 elif heart in self.hearts: # need to check unique hearts?
866 elif heart in self.hearts: # need to check unique hearts?
867 try:
867 try:
868 raise KeyError("heart_id %r in use"%heart)
868 raise KeyError("heart_id %r in use"%heart)
869 except:
869 except:
870 self.log.error("heart_id %r in use"%heart, exc_info=True)
870 self.log.error("heart_id %r in use"%heart, exc_info=True)
871 content = error.wrap_exception()
871 content = error.wrap_exception()
872 else:
872 else:
873 for h, pack in self.incoming_registrations.iteritems():
873 for h, pack in self.incoming_registrations.iteritems():
874 if heart == h:
874 if heart == h:
875 try:
875 try:
876 raise KeyError("heart_id %r in use"%heart)
876 raise KeyError("heart_id %r in use"%heart)
877 except:
877 except:
878 self.log.error("heart_id %r in use"%heart, exc_info=True)
878 self.log.error("heart_id %r in use"%heart, exc_info=True)
879 content = error.wrap_exception()
879 content = error.wrap_exception()
880 break
880 break
881 elif queue == pack[1]:
881 elif queue == pack[1]:
882 try:
882 try:
883 raise KeyError("queue_id %r in use"%queue)
883 raise KeyError("queue_id %r in use"%queue)
884 except:
884 except:
885 self.log.error("queue_id %r in use"%queue, exc_info=True)
885 self.log.error("queue_id %r in use"%queue, exc_info=True)
886 content = error.wrap_exception()
886 content = error.wrap_exception()
887 break
887 break
888
888
889 msg = self.session.send(self.query, "registration_reply",
889 msg = self.session.send(self.query, "registration_reply",
890 content=content,
890 content=content,
891 ident=reg)
891 ident=reg)
892
892
893 if content['status'] == 'ok':
893 if content['status'] == 'ok':
894 if heart in self.heartmonitor.hearts:
894 if heart in self.heartmonitor.hearts:
895 # already beating
895 # already beating
896 self.incoming_registrations[heart] = (eid,queue,reg[0],None)
896 self.incoming_registrations[heart] = (eid,queue,reg[0],None)
897 self.finish_registration(heart)
897 self.finish_registration(heart)
898 else:
898 else:
899 purge = lambda : self._purge_stalled_registration(heart)
899 purge = lambda : self._purge_stalled_registration(heart)
900 dc = ioloop.DelayedCallback(purge, self.registration_timeout, self.loop)
900 dc = ioloop.DelayedCallback(purge, self.registration_timeout, self.loop)
901 dc.start()
901 dc.start()
902 self.incoming_registrations[heart] = (eid,queue,reg[0],dc)
902 self.incoming_registrations[heart] = (eid,queue,reg[0],dc)
903 else:
903 else:
904 self.log.error("registration::registration %i failed: %r"%(eid, content['evalue']))
904 self.log.error("registration::registration %i failed: %r"%(eid, content['evalue']))
905 return eid
905 return eid
906
906
907 def unregister_engine(self, ident, msg):
907 def unregister_engine(self, ident, msg):
908 """Unregister an engine that explicitly requested to leave."""
908 """Unregister an engine that explicitly requested to leave."""
909 try:
909 try:
910 eid = msg['content']['id']
910 eid = msg['content']['id']
911 except:
911 except:
912 self.log.error("registration::bad engine id for unregistration: %r"%ident, exc_info=True)
912 self.log.error("registration::bad engine id for unregistration: %r"%ident, exc_info=True)
913 return
913 return
914 self.log.info("registration::unregister_engine(%r)"%eid)
914 self.log.info("registration::unregister_engine(%r)"%eid)
915 # print (eid)
915 # print (eid)
916 uuid = self.keytable[eid]
916 uuid = self.keytable[eid]
917 content=dict(id=eid, queue=uuid.decode('ascii'))
917 content=dict(id=eid, queue=uuid.decode('ascii'))
918 self.dead_engines.add(uuid)
918 self.dead_engines.add(uuid)
919 # self.ids.remove(eid)
919 # self.ids.remove(eid)
920 # uuid = self.keytable.pop(eid)
920 # uuid = self.keytable.pop(eid)
921 #
921 #
922 # ec = self.engines.pop(eid)
922 # ec = self.engines.pop(eid)
923 # self.hearts.pop(ec.heartbeat)
923 # self.hearts.pop(ec.heartbeat)
924 # self.by_ident.pop(ec.queue)
924 # self.by_ident.pop(ec.queue)
925 # self.completed.pop(eid)
925 # self.completed.pop(eid)
926 handleit = lambda : self._handle_stranded_msgs(eid, uuid)
926 handleit = lambda : self._handle_stranded_msgs(eid, uuid)
927 dc = ioloop.DelayedCallback(handleit, self.registration_timeout, self.loop)
927 dc = ioloop.DelayedCallback(handleit, self.registration_timeout, self.loop)
928 dc.start()
928 dc.start()
929 ############## TODO: HANDLE IT ################
929 ############## TODO: HANDLE IT ################
930
930
931 if self.notifier:
931 if self.notifier:
932 self.session.send(self.notifier, "unregistration_notification", content=content)
932 self.session.send(self.notifier, "unregistration_notification", content=content)
933
933
934 def _handle_stranded_msgs(self, eid, uuid):
934 def _handle_stranded_msgs(self, eid, uuid):
935 """Handle messages known to be on an engine when the engine unregisters.
935 """Handle messages known to be on an engine when the engine unregisters.
936
936
937 It is possible that this will fire prematurely - that is, an engine will
937 It is possible that this will fire prematurely - that is, an engine will
938 go down after completing a result, and the client will be notified
938 go down after completing a result, and the client will be notified
939 that the result failed and later receive the actual result.
939 that the result failed and later receive the actual result.
940 """
940 """
941
941
942 outstanding = self.queues[eid]
942 outstanding = self.queues[eid]
943
943
944 for msg_id in outstanding:
944 for msg_id in outstanding:
945 self.pending.remove(msg_id)
945 self.pending.remove(msg_id)
946 self.all_completed.add(msg_id)
946 self.all_completed.add(msg_id)
947 try:
947 try:
948 raise error.EngineError("Engine %r died while running task %r"%(eid, msg_id))
948 raise error.EngineError("Engine %r died while running task %r"%(eid, msg_id))
949 except:
949 except:
950 content = error.wrap_exception()
950 content = error.wrap_exception()
951 # build a fake header:
951 # build a fake header:
952 header = {}
952 header = {}
953 header['engine'] = uuid
953 header['engine'] = uuid
954 header['date'] = datetime.now()
954 header['date'] = datetime.now()
955 rec = dict(result_content=content, result_header=header, result_buffers=[])
955 rec = dict(result_content=content, result_header=header, result_buffers=[])
956 rec['completed'] = header['date']
956 rec['completed'] = header['date']
957 rec['engine_uuid'] = uuid
957 rec['engine_uuid'] = uuid
958 try:
958 try:
959 self.db.update_record(msg_id, rec)
959 self.db.update_record(msg_id, rec)
960 except Exception:
960 except Exception:
961 self.log.error("DB Error handling stranded msg %r"%msg_id, exc_info=True)
961 self.log.error("DB Error handling stranded msg %r"%msg_id, exc_info=True)
962
962
963
963
964 def finish_registration(self, heart):
964 def finish_registration(self, heart):
965 """Second half of engine registration, called after our HeartMonitor
965 """Second half of engine registration, called after our HeartMonitor
966 has received a beat from the Engine's Heart."""
966 has received a beat from the Engine's Heart."""
967 try:
967 try:
968 (eid,queue,reg,purge) = self.incoming_registrations.pop(heart)
968 (eid,queue,reg,purge) = self.incoming_registrations.pop(heart)
969 except KeyError:
969 except KeyError:
970 self.log.error("registration::tried to finish nonexistant registration", exc_info=True)
970 self.log.error("registration::tried to finish nonexistant registration", exc_info=True)
971 return
971 return
972 self.log.info("registration::finished registering engine %i:%r"%(eid,queue))
972 self.log.info("registration::finished registering engine %i:%r"%(eid,queue))
973 if purge is not None:
973 if purge is not None:
974 purge.stop()
974 purge.stop()
975 control = queue
975 control = queue
976 self.ids.add(eid)
976 self.ids.add(eid)
977 self.keytable[eid] = queue
977 self.keytable[eid] = queue
978 self.engines[eid] = EngineConnector(id=eid, queue=queue, registration=reg,
978 self.engines[eid] = EngineConnector(id=eid, queue=queue, registration=reg,
979 control=control, heartbeat=heart)
979 control=control, heartbeat=heart)
980 self.by_ident[queue] = eid
980 self.by_ident[queue] = eid
981 self.queues[eid] = list()
981 self.queues[eid] = list()
982 self.tasks[eid] = list()
982 self.tasks[eid] = list()
983 self.completed[eid] = list()
983 self.completed[eid] = list()
984 self.hearts[heart] = eid
984 self.hearts[heart] = eid
985 content = dict(id=eid, queue=self.engines[eid].queue.decode('ascii'))
985 content = dict(id=eid, queue=self.engines[eid].queue.decode('ascii'))
986 if self.notifier:
986 if self.notifier:
987 self.session.send(self.notifier, "registration_notification", content=content)
987 self.session.send(self.notifier, "registration_notification", content=content)
988 self.log.info("engine::Engine Connected: %i"%eid)
988 self.log.info("engine::Engine Connected: %i"%eid)
989
989
990 def _purge_stalled_registration(self, heart):
990 def _purge_stalled_registration(self, heart):
991 if heart in self.incoming_registrations:
991 if heart in self.incoming_registrations:
992 eid = self.incoming_registrations.pop(heart)[0]
992 eid = self.incoming_registrations.pop(heart)[0]
993 self.log.info("registration::purging stalled registration: %i"%eid)
993 self.log.info("registration::purging stalled registration: %i"%eid)
994 else:
994 else:
995 pass
995 pass
996
996
997 #-------------------------------------------------------------------------
997 #-------------------------------------------------------------------------
998 # Client Requests
998 # Client Requests
999 #-------------------------------------------------------------------------
999 #-------------------------------------------------------------------------
1000
1000
1001 def shutdown_request(self, client_id, msg):
1001 def shutdown_request(self, client_id, msg):
1002 """handle shutdown request."""
1002 """handle shutdown request."""
1003 self.session.send(self.query, 'shutdown_reply', content={'status': 'ok'}, ident=client_id)
1003 self.session.send(self.query, 'shutdown_reply', content={'status': 'ok'}, ident=client_id)
1004 # also notify other clients of shutdown
1004 # also notify other clients of shutdown
1005 self.session.send(self.notifier, 'shutdown_notice', content={'status': 'ok'})
1005 self.session.send(self.notifier, 'shutdown_notice', content={'status': 'ok'})
1006 dc = ioloop.DelayedCallback(lambda : self._shutdown(), 1000, self.loop)
1006 dc = ioloop.DelayedCallback(lambda : self._shutdown(), 1000, self.loop)
1007 dc.start()
1007 dc.start()
1008
1008
1009 def _shutdown(self):
1009 def _shutdown(self):
1010 self.log.info("hub::hub shutting down.")
1010 self.log.info("hub::hub shutting down.")
1011 time.sleep(0.1)
1011 time.sleep(0.1)
1012 sys.exit(0)
1012 sys.exit(0)
1013
1013
1014
1014
1015 def check_load(self, client_id, msg):
1015 def check_load(self, client_id, msg):
1016 content = msg['content']
1016 content = msg['content']
1017 try:
1017 try:
1018 targets = content['targets']
1018 targets = content['targets']
1019 targets = self._validate_targets(targets)
1019 targets = self._validate_targets(targets)
1020 except:
1020 except:
1021 content = error.wrap_exception()
1021 content = error.wrap_exception()
1022 self.session.send(self.query, "hub_error",
1022 self.session.send(self.query, "hub_error",
1023 content=content, ident=client_id)
1023 content=content, ident=client_id)
1024 return
1024 return
1025
1025
1026 content = dict(status='ok')
1026 content = dict(status='ok')
1027 # loads = {}
1027 # loads = {}
1028 for t in targets:
1028 for t in targets:
1029 content[bytes(t)] = len(self.queues[t])+len(self.tasks[t])
1029 content[bytes(t)] = len(self.queues[t])+len(self.tasks[t])
1030 self.session.send(self.query, "load_reply", content=content, ident=client_id)
1030 self.session.send(self.query, "load_reply", content=content, ident=client_id)
1031
1031
1032
1032
1033 def queue_status(self, client_id, msg):
1033 def queue_status(self, client_id, msg):
1034 """Return the Queue status of one or more targets.
1034 """Return the Queue status of one or more targets.
1035 if verbose: return the msg_ids
1035 if verbose: return the msg_ids
1036 else: return len of each type.
1036 else: return len of each type.
1037 keys: queue (pending MUX jobs)
1037 keys: queue (pending MUX jobs)
1038 tasks (pending Task jobs)
1038 tasks (pending Task jobs)
1039 completed (finished jobs from both queues)"""
1039 completed (finished jobs from both queues)"""
1040 content = msg['content']
1040 content = msg['content']
1041 targets = content['targets']
1041 targets = content['targets']
1042 try:
1042 try:
1043 targets = self._validate_targets(targets)
1043 targets = self._validate_targets(targets)
1044 except:
1044 except:
1045 content = error.wrap_exception()
1045 content = error.wrap_exception()
1046 self.session.send(self.query, "hub_error",
1046 self.session.send(self.query, "hub_error",
1047 content=content, ident=client_id)
1047 content=content, ident=client_id)
1048 return
1048 return
1049 verbose = content.get('verbose', False)
1049 verbose = content.get('verbose', False)
1050 content = dict(status='ok')
1050 content = dict(status='ok')
1051 for t in targets:
1051 for t in targets:
1052 queue = self.queues[t]
1052 queue = self.queues[t]
1053 completed = self.completed[t]
1053 completed = self.completed[t]
1054 tasks = self.tasks[t]
1054 tasks = self.tasks[t]
1055 if not verbose:
1055 if not verbose:
1056 queue = len(queue)
1056 queue = len(queue)
1057 completed = len(completed)
1057 completed = len(completed)
1058 tasks = len(tasks)
1058 tasks = len(tasks)
1059 content[str(t)] = {'queue': queue, 'completed': completed , 'tasks': tasks}
1059 content[str(t)] = {'queue': queue, 'completed': completed , 'tasks': tasks}
1060 content['unassigned'] = list(self.unassigned) if verbose else len(self.unassigned)
1060 content['unassigned'] = list(self.unassigned) if verbose else len(self.unassigned)
1061 # print (content)
1061 # print (content)
1062 self.session.send(self.query, "queue_reply", content=content, ident=client_id)
1062 self.session.send(self.query, "queue_reply", content=content, ident=client_id)
1063
1063
1064 def purge_results(self, client_id, msg):
1064 def purge_results(self, client_id, msg):
1065 """Purge results from memory. This method is more valuable before we move
1065 """Purge results from memory. This method is more valuable before we move
1066 to a DB based message storage mechanism."""
1066 to a DB based message storage mechanism."""
1067 content = msg['content']
1067 content = msg['content']
1068 self.log.info("Dropping records with %s", content)
1068 self.log.info("Dropping records with %s", content)
1069 msg_ids = content.get('msg_ids', [])
1069 msg_ids = content.get('msg_ids', [])
1070 reply = dict(status='ok')
1070 reply = dict(status='ok')
1071 if msg_ids == 'all':
1071 if msg_ids == 'all':
1072 try:
1072 try:
1073 self.db.drop_matching_records(dict(completed={'$ne':None}))
1073 self.db.drop_matching_records(dict(completed={'$ne':None}))
1074 except Exception:
1074 except Exception:
1075 reply = error.wrap_exception()
1075 reply = error.wrap_exception()
1076 else:
1076 else:
1077 pending = filter(lambda m: m in self.pending, msg_ids)
1077 pending = filter(lambda m: m in self.pending, msg_ids)
1078 if pending:
1078 if pending:
1079 try:
1079 try:
1080 raise IndexError("msg pending: %r"%pending[0])
1080 raise IndexError("msg pending: %r"%pending[0])
1081 except:
1081 except:
1082 reply = error.wrap_exception()
1082 reply = error.wrap_exception()
1083 else:
1083 else:
1084 try:
1084 try:
1085 self.db.drop_matching_records(dict(msg_id={'$in':msg_ids}))
1085 self.db.drop_matching_records(dict(msg_id={'$in':msg_ids}))
1086 except Exception:
1086 except Exception:
1087 reply = error.wrap_exception()
1087 reply = error.wrap_exception()
1088
1088
1089 if reply['status'] == 'ok':
1089 if reply['status'] == 'ok':
1090 eids = content.get('engine_ids', [])
1090 eids = content.get('engine_ids', [])
1091 for eid in eids:
1091 for eid in eids:
1092 if eid not in self.engines:
1092 if eid not in self.engines:
1093 try:
1093 try:
1094 raise IndexError("No such engine: %i"%eid)
1094 raise IndexError("No such engine: %i"%eid)
1095 except:
1095 except:
1096 reply = error.wrap_exception()
1096 reply = error.wrap_exception()
1097 break
1097 break
1098 uid = self.engines[eid].queue
1098 uid = self.engines[eid].queue
1099 try:
1099 try:
1100 self.db.drop_matching_records(dict(engine_uuid=uid, completed={'$ne':None}))
1100 self.db.drop_matching_records(dict(engine_uuid=uid, completed={'$ne':None}))
1101 except Exception:
1101 except Exception:
1102 reply = error.wrap_exception()
1102 reply = error.wrap_exception()
1103 break
1103 break
1104
1104
1105 self.session.send(self.query, 'purge_reply', content=reply, ident=client_id)
1105 self.session.send(self.query, 'purge_reply', content=reply, ident=client_id)
1106
1106
1107 def resubmit_task(self, client_id, msg):
1107 def resubmit_task(self, client_id, msg):
1108 """Resubmit one or more tasks."""
1108 """Resubmit one or more tasks."""
1109 def finish(reply):
1109 def finish(reply):
1110 self.session.send(self.query, 'resubmit_reply', content=reply, ident=client_id)
1110 self.session.send(self.query, 'resubmit_reply', content=reply, ident=client_id)
1111
1111
1112 content = msg['content']
1112 content = msg['content']
1113 msg_ids = content['msg_ids']
1113 msg_ids = content['msg_ids']
1114 reply = dict(status='ok')
1114 reply = dict(status='ok')
1115 try:
1115 try:
1116 records = self.db.find_records({'msg_id' : {'$in' : msg_ids}}, keys=[
1116 records = self.db.find_records({'msg_id' : {'$in' : msg_ids}}, keys=[
1117 'header', 'content', 'buffers'])
1117 'header', 'content', 'buffers'])
1118 except Exception:
1118 except Exception:
1119 self.log.error('db::db error finding tasks to resubmit', exc_info=True)
1119 self.log.error('db::db error finding tasks to resubmit', exc_info=True)
1120 return finish(error.wrap_exception())
1120 return finish(error.wrap_exception())
1121
1121
1122 # validate msg_ids
1122 # validate msg_ids
1123 found_ids = [ rec['msg_id'] for rec in records ]
1123 found_ids = [ rec['msg_id'] for rec in records ]
1124 invalid_ids = filter(lambda m: m in self.pending, found_ids)
1124 invalid_ids = filter(lambda m: m in self.pending, found_ids)
1125 if len(records) > len(msg_ids):
1125 if len(records) > len(msg_ids):
1126 try:
1126 try:
1127 raise RuntimeError("DB appears to be in an inconsistent state."
1127 raise RuntimeError("DB appears to be in an inconsistent state."
1128 "More matching records were found than should exist")
1128 "More matching records were found than should exist")
1129 except Exception:
1129 except Exception:
1130 return finish(error.wrap_exception())
1130 return finish(error.wrap_exception())
1131 elif len(records) < len(msg_ids):
1131 elif len(records) < len(msg_ids):
1132 missing = [ m for m in msg_ids if m not in found_ids ]
1132 missing = [ m for m in msg_ids if m not in found_ids ]
1133 try:
1133 try:
1134 raise KeyError("No such msg(s): %r"%missing)
1134 raise KeyError("No such msg(s): %r"%missing)
1135 except KeyError:
1135 except KeyError:
1136 return finish(error.wrap_exception())
1136 return finish(error.wrap_exception())
1137 elif invalid_ids:
1137 elif invalid_ids:
1138 msg_id = invalid_ids[0]
1138 msg_id = invalid_ids[0]
1139 try:
1139 try:
1140 raise ValueError("Task %r appears to be inflight"%(msg_id))
1140 raise ValueError("Task %r appears to be inflight"%(msg_id))
1141 except Exception:
1141 except Exception:
1142 return finish(error.wrap_exception())
1142 return finish(error.wrap_exception())
1143
1143
1144 # clear the existing records
1144 # clear the existing records
1145 now = datetime.now()
1145 now = datetime.now()
1146 rec = empty_record()
1146 rec = empty_record()
1147 map(rec.pop, ['msg_id', 'header', 'content', 'buffers', 'submitted'])
1147 map(rec.pop, ['msg_id', 'header', 'content', 'buffers', 'submitted'])
1148 rec['resubmitted'] = now
1148 rec['resubmitted'] = now
1149 rec['queue'] = 'task'
1149 rec['queue'] = 'task'
1150 rec['client_uuid'] = client_id[0]
1150 rec['client_uuid'] = client_id[0]
1151 try:
1151 try:
1152 for msg_id in msg_ids:
1152 for msg_id in msg_ids:
1153 self.all_completed.discard(msg_id)
1153 self.all_completed.discard(msg_id)
1154 self.db.update_record(msg_id, rec)
1154 self.db.update_record(msg_id, rec)
1155 except Exception:
1155 except Exception:
1156 self.log.error('db::db error upating record', exc_info=True)
1156 self.log.error('db::db error upating record', exc_info=True)
1157 reply = error.wrap_exception()
1157 reply = error.wrap_exception()
1158 else:
1158 else:
1159 # send the messages
1159 # send the messages
1160 for rec in records:
1160 for rec in records:
1161 header = rec['header']
1161 header = rec['header']
1162 # include resubmitted in header to prevent digest collision
1162 # include resubmitted in header to prevent digest collision
1163 header['resubmitted'] = now
1163 header['resubmitted'] = now
1164 msg = self.session.msg(header['msg_type'])
1164 msg = self.session.msg(header['msg_type'])
1165 msg['content'] = rec['content']
1165 msg['content'] = rec['content']
1166 msg['header'] = header
1166 msg['header'] = header
1167 msg['header']['msg_id'] = rec['msg_id']
1167 msg['header']['msg_id'] = rec['msg_id']
1168 self.session.send(self.resubmit, msg, buffers=rec['buffers'])
1168 self.session.send(self.resubmit, msg, buffers=rec['buffers'])
1169
1169
1170 finish(dict(status='ok'))
1170 finish(dict(status='ok'))
1171
1171
1172
1172
1173 def _extract_record(self, rec):
1173 def _extract_record(self, rec):
1174 """decompose a TaskRecord dict into subsection of reply for get_result"""
1174 """decompose a TaskRecord dict into subsection of reply for get_result"""
1175 io_dict = {}
1175 io_dict = {}
1176 for key in 'pyin pyout pyerr stdout stderr'.split():
1176 for key in 'pyin pyout pyerr stdout stderr'.split():
1177 io_dict[key] = rec[key]
1177 io_dict[key] = rec[key]
1178 content = { 'result_content': rec['result_content'],
1178 content = { 'result_content': rec['result_content'],
1179 'header': rec['header'],
1179 'header': rec['header'],
1180 'result_header' : rec['result_header'],
1180 'result_header' : rec['result_header'],
1181 'io' : io_dict,
1181 'io' : io_dict,
1182 }
1182 }
1183 if rec['result_buffers']:
1183 if rec['result_buffers']:
1184 buffers = map(bytes, rec['result_buffers'])
1184 buffers = map(bytes, rec['result_buffers'])
1185 else:
1185 else:
1186 buffers = []
1186 buffers = []
1187
1187
1188 return content, buffers
1188 return content, buffers
1189
1189
1190 def get_results(self, client_id, msg):
1190 def get_results(self, client_id, msg):
1191 """Get the result of 1 or more messages."""
1191 """Get the result of 1 or more messages."""
1192 content = msg['content']
1192 content = msg['content']
1193 msg_ids = sorted(set(content['msg_ids']))
1193 msg_ids = sorted(set(content['msg_ids']))
1194 statusonly = content.get('status_only', False)
1194 statusonly = content.get('status_only', False)
1195 pending = []
1195 pending = []
1196 completed = []
1196 completed = []
1197 content = dict(status='ok')
1197 content = dict(status='ok')
1198 content['pending'] = pending
1198 content['pending'] = pending
1199 content['completed'] = completed
1199 content['completed'] = completed
1200 buffers = []
1200 buffers = []
1201 if not statusonly:
1201 if not statusonly:
1202 try:
1202 try:
1203 matches = self.db.find_records(dict(msg_id={'$in':msg_ids}))
1203 matches = self.db.find_records(dict(msg_id={'$in':msg_ids}))
1204 # turn match list into dict, for faster lookup
1204 # turn match list into dict, for faster lookup
1205 records = {}
1205 records = {}
1206 for rec in matches:
1206 for rec in matches:
1207 records[rec['msg_id']] = rec
1207 records[rec['msg_id']] = rec
1208 except Exception:
1208 except Exception:
1209 content = error.wrap_exception()
1209 content = error.wrap_exception()
1210 self.session.send(self.query, "result_reply", content=content,
1210 self.session.send(self.query, "result_reply", content=content,
1211 parent=msg, ident=client_id)
1211 parent=msg, ident=client_id)
1212 return
1212 return
1213 else:
1213 else:
1214 records = {}
1214 records = {}
1215 for msg_id in msg_ids:
1215 for msg_id in msg_ids:
1216 if msg_id in self.pending:
1216 if msg_id in self.pending:
1217 pending.append(msg_id)
1217 pending.append(msg_id)
1218 elif msg_id in self.all_completed:
1218 elif msg_id in self.all_completed:
1219 completed.append(msg_id)
1219 completed.append(msg_id)
1220 if not statusonly:
1220 if not statusonly:
1221 c,bufs = self._extract_record(records[msg_id])
1221 c,bufs = self._extract_record(records[msg_id])
1222 content[msg_id] = c
1222 content[msg_id] = c
1223 buffers.extend(bufs)
1223 buffers.extend(bufs)
1224 elif msg_id in records:
1224 elif msg_id in records:
1225 if rec['completed']:
1225 if rec['completed']:
1226 completed.append(msg_id)
1226 completed.append(msg_id)
1227 c,bufs = self._extract_record(records[msg_id])
1227 c,bufs = self._extract_record(records[msg_id])
1228 content[msg_id] = c
1228 content[msg_id] = c
1229 buffers.extend(bufs)
1229 buffers.extend(bufs)
1230 else:
1230 else:
1231 pending.append(msg_id)
1231 pending.append(msg_id)
1232 else:
1232 else:
1233 try:
1233 try:
1234 raise KeyError('No such message: '+msg_id)
1234 raise KeyError('No such message: '+msg_id)
1235 except:
1235 except:
1236 content = error.wrap_exception()
1236 content = error.wrap_exception()
1237 break
1237 break
1238 self.session.send(self.query, "result_reply", content=content,
1238 self.session.send(self.query, "result_reply", content=content,
1239 parent=msg, ident=client_id,
1239 parent=msg, ident=client_id,
1240 buffers=buffers)
1240 buffers=buffers)
1241
1241
1242 def get_history(self, client_id, msg):
1242 def get_history(self, client_id, msg):
1243 """Get a list of all msg_ids in our DB records"""
1243 """Get a list of all msg_ids in our DB records"""
1244 try:
1244 try:
1245 msg_ids = self.db.get_history()
1245 msg_ids = self.db.get_history()
1246 except Exception as e:
1246 except Exception as e:
1247 content = error.wrap_exception()
1247 content = error.wrap_exception()
1248 else:
1248 else:
1249 content = dict(status='ok', history=msg_ids)
1249 content = dict(status='ok', history=msg_ids)
1250
1250
1251 self.session.send(self.query, "history_reply", content=content,
1251 self.session.send(self.query, "history_reply", content=content,
1252 parent=msg, ident=client_id)
1252 parent=msg, ident=client_id)
1253
1253
1254 def db_query(self, client_id, msg):
1254 def db_query(self, client_id, msg):
1255 """Perform a raw query on the task record database."""
1255 """Perform a raw query on the task record database."""
1256 content = msg['content']
1256 content = msg['content']
1257 query = content.get('query', {})
1257 query = content.get('query', {})
1258 keys = content.get('keys', None)
1258 keys = content.get('keys', None)
1259 buffers = []
1259 buffers = []
1260 empty = list()
1260 empty = list()
1261 try:
1261 try:
1262 records = self.db.find_records(query, keys)
1262 records = self.db.find_records(query, keys)
1263 except Exception as e:
1263 except Exception as e:
1264 content = error.wrap_exception()
1264 content = error.wrap_exception()
1265 else:
1265 else:
1266 # extract buffers from reply content:
1266 # extract buffers from reply content:
1267 if keys is not None:
1267 if keys is not None:
1268 buffer_lens = [] if 'buffers' in keys else None
1268 buffer_lens = [] if 'buffers' in keys else None
1269 result_buffer_lens = [] if 'result_buffers' in keys else None
1269 result_buffer_lens = [] if 'result_buffers' in keys else None
1270 else:
1270 else:
1271 buffer_lens = []
1271 buffer_lens = []
1272 result_buffer_lens = []
1272 result_buffer_lens = []
1273
1273
1274 for rec in records:
1274 for rec in records:
1275 # buffers may be None, so double check
1275 # buffers may be None, so double check
1276 if buffer_lens is not None:
1276 if buffer_lens is not None:
1277 b = rec.pop('buffers', empty) or empty
1277 b = rec.pop('buffers', empty) or empty
1278 buffer_lens.append(len(b))
1278 buffer_lens.append(len(b))
1279 buffers.extend(b)
1279 buffers.extend(b)
1280 if result_buffer_lens is not None:
1280 if result_buffer_lens is not None:
1281 rb = rec.pop('result_buffers', empty) or empty
1281 rb = rec.pop('result_buffers', empty) or empty
1282 result_buffer_lens.append(len(rb))
1282 result_buffer_lens.append(len(rb))
1283 buffers.extend(rb)
1283 buffers.extend(rb)
1284 content = dict(status='ok', records=records, buffer_lens=buffer_lens,
1284 content = dict(status='ok', records=records, buffer_lens=buffer_lens,
1285 result_buffer_lens=result_buffer_lens)
1285 result_buffer_lens=result_buffer_lens)
1286 # self.log.debug (content)
1286 # self.log.debug (content)
1287 self.session.send(self.query, "db_reply", content=content,
1287 self.session.send(self.query, "db_reply", content=content,
1288 parent=msg, ident=client_id,
1288 parent=msg, ident=client_id,
1289 buffers=buffers)
1289 buffers=buffers)
1290
1290
@@ -1,714 +1,714 b''
1 """The Python scheduler for rich scheduling.
1 """The Python scheduler for rich scheduling.
2
2
3 The Pure ZMQ scheduler does not allow routing schemes other than LRU,
3 The Pure ZMQ scheduler does not allow routing schemes other than LRU,
4 nor does it check msg_id DAG dependencies. For those, a slightly slower
4 nor does it check msg_id DAG dependencies. For those, a slightly slower
5 Python Scheduler exists.
5 Python Scheduler exists.
6
6
7 Authors:
7 Authors:
8
8
9 * Min RK
9 * Min RK
10 """
10 """
11 #-----------------------------------------------------------------------------
11 #-----------------------------------------------------------------------------
12 # Copyright (C) 2010-2011 The IPython Development Team
12 # Copyright (C) 2010-2011 The IPython Development Team
13 #
13 #
14 # Distributed under the terms of the BSD License. The full license is in
14 # Distributed under the terms of the BSD License. The full license is in
15 # the file COPYING, distributed as part of this software.
15 # the file COPYING, distributed as part of this software.
16 #-----------------------------------------------------------------------------
16 #-----------------------------------------------------------------------------
17
17
18 #----------------------------------------------------------------------
18 #----------------------------------------------------------------------
19 # Imports
19 # Imports
20 #----------------------------------------------------------------------
20 #----------------------------------------------------------------------
21
21
22 from __future__ import print_function
22 from __future__ import print_function
23
23
24 import logging
24 import logging
25 import sys
25 import sys
26
26
27 from datetime import datetime, timedelta
27 from datetime import datetime, timedelta
28 from random import randint, random
28 from random import randint, random
29 from types import FunctionType
29 from types import FunctionType
30
30
31 try:
31 try:
32 import numpy
32 import numpy
33 except ImportError:
33 except ImportError:
34 numpy = None
34 numpy = None
35
35
36 import zmq
36 import zmq
37 from zmq.eventloop import ioloop, zmqstream
37 from zmq.eventloop import ioloop, zmqstream
38
38
39 # local imports
39 # local imports
40 from IPython.external.decorator import decorator
40 from IPython.external.decorator import decorator
41 from IPython.config.application import Application
41 from IPython.config.application import Application
42 from IPython.config.loader import Config
42 from IPython.config.loader import Config
43 from IPython.utils.traitlets import Instance, Dict, List, Set, Int, Enum, CBytes
43 from IPython.utils.traitlets import Instance, Dict, List, Set, Int, Enum, CBytes
44
44
45 from IPython.parallel import error
45 from IPython.parallel import error
46 from IPython.parallel.factory import SessionFactory
46 from IPython.parallel.factory import SessionFactory
47 from IPython.parallel.util import connect_logger, local_logger, asbytes
47 from IPython.parallel.util import connect_logger, local_logger, asbytes
48
48
49 from .dependency import Dependency
49 from .dependency import Dependency
50
50
51 @decorator
51 @decorator
52 def logged(f,self,*args,**kwargs):
52 def logged(f,self,*args,**kwargs):
53 # print ("#--------------------")
53 # print ("#--------------------")
54 self.log.debug("scheduler::%s(*%s,**%s)", f.func_name, args, kwargs)
54 self.log.debug("scheduler::%s(*%s,**%s)", f.func_name, args, kwargs)
55 # print ("#--")
55 # print ("#--")
56 return f(self,*args, **kwargs)
56 return f(self,*args, **kwargs)
57
57
58 #----------------------------------------------------------------------
58 #----------------------------------------------------------------------
59 # Chooser functions
59 # Chooser functions
60 #----------------------------------------------------------------------
60 #----------------------------------------------------------------------
61
61
62 def plainrandom(loads):
62 def plainrandom(loads):
63 """Plain random pick."""
63 """Plain random pick."""
64 n = len(loads)
64 n = len(loads)
65 return randint(0,n-1)
65 return randint(0,n-1)
66
66
67 def lru(loads):
67 def lru(loads):
68 """Always pick the front of the line.
68 """Always pick the front of the line.
69
69
70 The content of `loads` is ignored.
70 The content of `loads` is ignored.
71
71
72 Assumes LRU ordering of loads, with oldest first.
72 Assumes LRU ordering of loads, with oldest first.
73 """
73 """
74 return 0
74 return 0
75
75
76 def twobin(loads):
76 def twobin(loads):
77 """Pick two at random, use the LRU of the two.
77 """Pick two at random, use the LRU of the two.
78
78
79 The content of loads is ignored.
79 The content of loads is ignored.
80
80
81 Assumes LRU ordering of loads, with oldest first.
81 Assumes LRU ordering of loads, with oldest first.
82 """
82 """
83 n = len(loads)
83 n = len(loads)
84 a = randint(0,n-1)
84 a = randint(0,n-1)
85 b = randint(0,n-1)
85 b = randint(0,n-1)
86 return min(a,b)
86 return min(a,b)
87
87
88 def weighted(loads):
88 def weighted(loads):
89 """Pick two at random using inverse load as weight.
89 """Pick two at random using inverse load as weight.
90
90
91 Return the less loaded of the two.
91 Return the less loaded of the two.
92 """
92 """
93 # weight 0 a million times more than 1:
93 # weight 0 a million times more than 1:
94 weights = 1./(1e-6+numpy.array(loads))
94 weights = 1./(1e-6+numpy.array(loads))
95 sums = weights.cumsum()
95 sums = weights.cumsum()
96 t = sums[-1]
96 t = sums[-1]
97 x = random()*t
97 x = random()*t
98 y = random()*t
98 y = random()*t
99 idx = 0
99 idx = 0
100 idy = 0
100 idy = 0
101 while sums[idx] < x:
101 while sums[idx] < x:
102 idx += 1
102 idx += 1
103 while sums[idy] < y:
103 while sums[idy] < y:
104 idy += 1
104 idy += 1
105 if weights[idy] > weights[idx]:
105 if weights[idy] > weights[idx]:
106 return idy
106 return idy
107 else:
107 else:
108 return idx
108 return idx
109
109
110 def leastload(loads):
110 def leastload(loads):
111 """Always choose the lowest load.
111 """Always choose the lowest load.
112
112
113 If the lowest load occurs more than once, the first
113 If the lowest load occurs more than once, the first
114 occurance will be used. If loads has LRU ordering, this means
114 occurance will be used. If loads has LRU ordering, this means
115 the LRU of those with the lowest load is chosen.
115 the LRU of those with the lowest load is chosen.
116 """
116 """
117 return loads.index(min(loads))
117 return loads.index(min(loads))
118
118
119 #---------------------------------------------------------------------
119 #---------------------------------------------------------------------
120 # Classes
120 # Classes
121 #---------------------------------------------------------------------
121 #---------------------------------------------------------------------
122 # store empty default dependency:
122 # store empty default dependency:
123 MET = Dependency([])
123 MET = Dependency([])
124
124
125 class TaskScheduler(SessionFactory):
125 class TaskScheduler(SessionFactory):
126 """Python TaskScheduler object.
126 """Python TaskScheduler object.
127
127
128 This is the simplest object that supports msg_id based
128 This is the simplest object that supports msg_id based
129 DAG dependencies. *Only* task msg_ids are checked, not
129 DAG dependencies. *Only* task msg_ids are checked, not
130 msg_ids of jobs submitted via the MUX queue.
130 msg_ids of jobs submitted via the MUX queue.
131
131
132 """
132 """
133
133
134 hwm = Int(0, config=True, shortname='hwm',
134 hwm = Int(0, config=True, shortname='hwm',
135 help="""specify the High Water Mark (HWM) for the downstream
135 help="""specify the High Water Mark (HWM) for the downstream
136 socket in the Task scheduler. This is the maximum number
136 socket in the Task scheduler. This is the maximum number
137 of allowed outstanding tasks on each engine."""
137 of allowed outstanding tasks on each engine."""
138 )
138 )
139 scheme_name = Enum(('leastload', 'pure', 'lru', 'plainrandom', 'weighted', 'twobin'),
139 scheme_name = Enum(('leastload', 'pure', 'lru', 'plainrandom', 'weighted', 'twobin'),
140 'leastload', config=True, shortname='scheme', allow_none=False,
140 'leastload', config=True, shortname='scheme', allow_none=False,
141 help="""select the task scheduler scheme [default: Python LRU]
141 help="""select the task scheduler scheme [default: Python LRU]
142 Options are: 'pure', 'lru', 'plainrandom', 'weighted', 'twobin','leastload'"""
142 Options are: 'pure', 'lru', 'plainrandom', 'weighted', 'twobin','leastload'"""
143 )
143 )
144 def _scheme_name_changed(self, old, new):
144 def _scheme_name_changed(self, old, new):
145 self.log.debug("Using scheme %r"%new)
145 self.log.debug("Using scheme %r"%new)
146 self.scheme = globals()[new]
146 self.scheme = globals()[new]
147
147
148 # input arguments:
148 # input arguments:
149 scheme = Instance(FunctionType) # function for determining the destination
149 scheme = Instance(FunctionType) # function for determining the destination
150 def _scheme_default(self):
150 def _scheme_default(self):
151 return leastload
151 return leastload
152 client_stream = Instance(zmqstream.ZMQStream) # client-facing stream
152 client_stream = Instance(zmqstream.ZMQStream) # client-facing stream
153 engine_stream = Instance(zmqstream.ZMQStream) # engine-facing stream
153 engine_stream = Instance(zmqstream.ZMQStream) # engine-facing stream
154 notifier_stream = Instance(zmqstream.ZMQStream) # hub-facing sub stream
154 notifier_stream = Instance(zmqstream.ZMQStream) # hub-facing sub stream
155 mon_stream = Instance(zmqstream.ZMQStream) # hub-facing pub stream
155 mon_stream = Instance(zmqstream.ZMQStream) # hub-facing pub stream
156
156
157 # internals:
157 # internals:
158 graph = Dict() # dict by msg_id of [ msg_ids that depend on key ]
158 graph = Dict() # dict by msg_id of [ msg_ids that depend on key ]
159 retries = Dict() # dict by msg_id of retries remaining (non-neg ints)
159 retries = Dict() # dict by msg_id of retries remaining (non-neg ints)
160 # waiting = List() # list of msg_ids ready to run, but haven't due to HWM
160 # waiting = List() # list of msg_ids ready to run, but haven't due to HWM
161 depending = Dict() # dict by msg_id of (msg_id, raw_msg, after, follow)
161 depending = Dict() # dict by msg_id of (msg_id, raw_msg, after, follow)
162 pending = Dict() # dict by engine_uuid of submitted tasks
162 pending = Dict() # dict by engine_uuid of submitted tasks
163 completed = Dict() # dict by engine_uuid of completed tasks
163 completed = Dict() # dict by engine_uuid of completed tasks
164 failed = Dict() # dict by engine_uuid of failed tasks
164 failed = Dict() # dict by engine_uuid of failed tasks
165 destinations = Dict() # dict by msg_id of engine_uuids where jobs ran (reverse of completed+failed)
165 destinations = Dict() # dict by msg_id of engine_uuids where jobs ran (reverse of completed+failed)
166 clients = Dict() # dict by msg_id for who submitted the task
166 clients = Dict() # dict by msg_id for who submitted the task
167 targets = List() # list of target IDENTs
167 targets = List() # list of target IDENTs
168 loads = List() # list of engine loads
168 loads = List() # list of engine loads
169 # full = Set() # set of IDENTs that have HWM outstanding tasks
169 # full = Set() # set of IDENTs that have HWM outstanding tasks
170 all_completed = Set() # set of all completed tasks
170 all_completed = Set() # set of all completed tasks
171 all_failed = Set() # set of all failed tasks
171 all_failed = Set() # set of all failed tasks
172 all_done = Set() # set of all finished tasks=union(completed,failed)
172 all_done = Set() # set of all finished tasks=union(completed,failed)
173 all_ids = Set() # set of all submitted task IDs
173 all_ids = Set() # set of all submitted task IDs
174 blacklist = Dict() # dict by msg_id of locations where a job has encountered UnmetDependency
174 blacklist = Dict() # dict by msg_id of locations where a job has encountered UnmetDependency
175 auditor = Instance('zmq.eventloop.ioloop.PeriodicCallback')
175 auditor = Instance('zmq.eventloop.ioloop.PeriodicCallback')
176
176
177 ident = CBytes() # ZMQ identity. This should just be self.session.session
177 ident = CBytes() # ZMQ identity. This should just be self.session.session
178 # but ensure Bytes
178 # but ensure Bytes
179 def _ident_default(self):
179 def _ident_default(self):
180 return asbytes(self.session.session)
180 return asbytes(self.session.session)
181
181
182 def start(self):
182 def start(self):
183 self.engine_stream.on_recv(self.dispatch_result, copy=False)
183 self.engine_stream.on_recv(self.dispatch_result, copy=False)
184 self._notification_handlers = dict(
184 self._notification_handlers = dict(
185 registration_notification = self._register_engine,
185 registration_notification = self._register_engine,
186 unregistration_notification = self._unregister_engine
186 unregistration_notification = self._unregister_engine
187 )
187 )
188 self.notifier_stream.on_recv(self.dispatch_notification)
188 self.notifier_stream.on_recv(self.dispatch_notification)
189 self.auditor = ioloop.PeriodicCallback(self.audit_timeouts, 2e3, self.loop) # 1 Hz
189 self.auditor = ioloop.PeriodicCallback(self.audit_timeouts, 2e3, self.loop) # 1 Hz
190 self.auditor.start()
190 self.auditor.start()
191 self.log.info("Scheduler started [%s]"%self.scheme_name)
191 self.log.info("Scheduler started [%s]"%self.scheme_name)
192
192
193 def resume_receiving(self):
193 def resume_receiving(self):
194 """Resume accepting jobs."""
194 """Resume accepting jobs."""
195 self.client_stream.on_recv(self.dispatch_submission, copy=False)
195 self.client_stream.on_recv(self.dispatch_submission, copy=False)
196
196
197 def stop_receiving(self):
197 def stop_receiving(self):
198 """Stop accepting jobs while there are no engines.
198 """Stop accepting jobs while there are no engines.
199 Leave them in the ZMQ queue."""
199 Leave them in the ZMQ queue."""
200 self.client_stream.on_recv(None)
200 self.client_stream.on_recv(None)
201
201
202 #-----------------------------------------------------------------------
202 #-----------------------------------------------------------------------
203 # [Un]Registration Handling
203 # [Un]Registration Handling
204 #-----------------------------------------------------------------------
204 #-----------------------------------------------------------------------
205
205
206 def dispatch_notification(self, msg):
206 def dispatch_notification(self, msg):
207 """dispatch register/unregister events."""
207 """dispatch register/unregister events."""
208 try:
208 try:
209 idents,msg = self.session.feed_identities(msg)
209 idents,msg = self.session.feed_identities(msg)
210 except ValueError:
210 except ValueError:
211 self.log.warn("task::Invalid Message: %r",msg)
211 self.log.warn("task::Invalid Message: %r",msg)
212 return
212 return
213 try:
213 try:
214 msg = self.session.unserialize(msg)
214 msg = self.session.unserialize(msg)
215 except ValueError:
215 except ValueError:
216 self.log.warn("task::Unauthorized message from: %r"%idents)
216 self.log.warn("task::Unauthorized message from: %r"%idents)
217 return
217 return
218
218
219 msg_type = msg['header']['msg_type']
219 msg_type = msg['header']['msg_type']
220
220
221 handler = self._notification_handlers.get(msg_type, None)
221 handler = self._notification_handlers.get(msg_type, None)
222 if handler is None:
222 if handler is None:
223 self.log.error("Unhandled message type: %r"%msg_type)
223 self.log.error("Unhandled message type: %r"%msg_type)
224 else:
224 else:
225 try:
225 try:
226 handler(asbytes(msg['content']['queue']))
226 handler(asbytes(msg['content']['queue']))
227 except Exception:
227 except Exception:
228 self.log.error("task::Invalid notification msg: %r",msg)
228 self.log.error("task::Invalid notification msg: %r",msg)
229
229
230 def _register_engine(self, uid):
230 def _register_engine(self, uid):
231 """New engine with ident `uid` became available."""
231 """New engine with ident `uid` became available."""
232 # head of the line:
232 # head of the line:
233 self.targets.insert(0,uid)
233 self.targets.insert(0,uid)
234 self.loads.insert(0,0)
234 self.loads.insert(0,0)
235
235
236 # initialize sets
236 # initialize sets
237 self.completed[uid] = set()
237 self.completed[uid] = set()
238 self.failed[uid] = set()
238 self.failed[uid] = set()
239 self.pending[uid] = {}
239 self.pending[uid] = {}
240 if len(self.targets) == 1:
240 if len(self.targets) == 1:
241 self.resume_receiving()
241 self.resume_receiving()
242 # rescan the graph:
242 # rescan the graph:
243 self.update_graph(None)
243 self.update_graph(None)
244
244
245 def _unregister_engine(self, uid):
245 def _unregister_engine(self, uid):
246 """Existing engine with ident `uid` became unavailable."""
246 """Existing engine with ident `uid` became unavailable."""
247 if len(self.targets) == 1:
247 if len(self.targets) == 1:
248 # this was our only engine
248 # this was our only engine
249 self.stop_receiving()
249 self.stop_receiving()
250
250
251 # handle any potentially finished tasks:
251 # handle any potentially finished tasks:
252 self.engine_stream.flush()
252 self.engine_stream.flush()
253
253
254 # don't pop destinations, because they might be used later
254 # don't pop destinations, because they might be used later
255 # map(self.destinations.pop, self.completed.pop(uid))
255 # map(self.destinations.pop, self.completed.pop(uid))
256 # map(self.destinations.pop, self.failed.pop(uid))
256 # map(self.destinations.pop, self.failed.pop(uid))
257
257
258 # prevent this engine from receiving work
258 # prevent this engine from receiving work
259 idx = self.targets.index(uid)
259 idx = self.targets.index(uid)
260 self.targets.pop(idx)
260 self.targets.pop(idx)
261 self.loads.pop(idx)
261 self.loads.pop(idx)
262
262
263 # wait 5 seconds before cleaning up pending jobs, since the results might
263 # wait 5 seconds before cleaning up pending jobs, since the results might
264 # still be incoming
264 # still be incoming
265 if self.pending[uid]:
265 if self.pending[uid]:
266 dc = ioloop.DelayedCallback(lambda : self.handle_stranded_tasks(uid), 5000, self.loop)
266 dc = ioloop.DelayedCallback(lambda : self.handle_stranded_tasks(uid), 5000, self.loop)
267 dc.start()
267 dc.start()
268 else:
268 else:
269 self.completed.pop(uid)
269 self.completed.pop(uid)
270 self.failed.pop(uid)
270 self.failed.pop(uid)
271
271
272
272
273 def handle_stranded_tasks(self, engine):
273 def handle_stranded_tasks(self, engine):
274 """Deal with jobs resident in an engine that died."""
274 """Deal with jobs resident in an engine that died."""
275 lost = self.pending[engine]
275 lost = self.pending[engine]
276 for msg_id in lost.keys():
276 for msg_id in lost.keys():
277 if msg_id not in self.pending[engine]:
277 if msg_id not in self.pending[engine]:
278 # prevent double-handling of messages
278 # prevent double-handling of messages
279 continue
279 continue
280
280
281 raw_msg = lost[msg_id][0]
281 raw_msg = lost[msg_id][0]
282 idents,msg = self.session.feed_identities(raw_msg, copy=False)
282 idents,msg = self.session.feed_identities(raw_msg, copy=False)
283 parent = self.session.unpack(msg[1].bytes)
283 parent = self.session.unpack(msg[1].bytes)
284 idents = [engine, idents[0]]
284 idents = [engine, idents[0]]
285
285
286 # build fake error reply
286 # build fake error reply
287 try:
287 try:
288 raise error.EngineError("Engine %r died while running task %r"%(engine, msg_id))
288 raise error.EngineError("Engine %r died while running task %r"%(engine, msg_id))
289 except:
289 except:
290 content = error.wrap_exception()
290 content = error.wrap_exception()
291 msg = self.session.msg('apply_reply', content, parent=parent, subheader={'status':'error'})
291 msg = self.session.msg('apply_reply', content, parent=parent, subheader={'status':'error'})
292 raw_reply = map(zmq.Message, self.session.serialize(msg, ident=idents))
292 raw_reply = map(zmq.Message, self.session.serialize(msg, ident=idents))
293 # and dispatch it
293 # and dispatch it
294 self.dispatch_result(raw_reply)
294 self.dispatch_result(raw_reply)
295
295
296 # finally scrub completed/failed lists
296 # finally scrub completed/failed lists
297 self.completed.pop(engine)
297 self.completed.pop(engine)
298 self.failed.pop(engine)
298 self.failed.pop(engine)
299
299
300
300
301 #-----------------------------------------------------------------------
301 #-----------------------------------------------------------------------
302 # Job Submission
302 # Job Submission
303 #-----------------------------------------------------------------------
303 #-----------------------------------------------------------------------
304 def dispatch_submission(self, raw_msg):
304 def dispatch_submission(self, raw_msg):
305 """Dispatch job submission to appropriate handlers."""
305 """Dispatch job submission to appropriate handlers."""
306 # ensure targets up to date:
306 # ensure targets up to date:
307 self.notifier_stream.flush()
307 self.notifier_stream.flush()
308 try:
308 try:
309 idents, msg = self.session.feed_identities(raw_msg, copy=False)
309 idents, msg = self.session.feed_identities(raw_msg, copy=False)
310 msg = self.session.unserialize(msg, content=False, copy=False)
310 msg = self.session.unserialize(msg, content=False, copy=False)
311 except Exception:
311 except Exception:
312 self.log.error("task::Invaid task msg: %r"%raw_msg, exc_info=True)
312 self.log.error("task::Invaid task msg: %r"%raw_msg, exc_info=True)
313 return
313 return
314
314
315
315
316 # send to monitor
316 # send to monitor
317 self.mon_stream.send_multipart([b'intask']+raw_msg, copy=False)
317 self.mon_stream.send_multipart([b'intask']+raw_msg, copy=False)
318
318
319 header = msg['header']
319 header = msg['header']
320 msg_id = header['msg_id']
320 msg_id = header['msg_id']
321 self.all_ids.add(msg_id)
321 self.all_ids.add(msg_id)
322
322
323 # get targets as a set of bytes objects
323 # get targets as a set of bytes objects
324 # from a list of unicode objects
324 # from a list of unicode objects
325 targets = header.get('targets', [])
325 targets = header.get('targets', [])
326 targets = map(asbytes, targets)
326 targets = map(asbytes, targets)
327 targets = set(targets)
327 targets = set(targets)
328
328
329 retries = header.get('retries', 0)
329 retries = header.get('retries', 0)
330 self.retries[msg_id] = retries
330 self.retries[msg_id] = retries
331
331
332 # time dependencies
332 # time dependencies
333 after = header.get('after', None)
333 after = header.get('after', None)
334 if after:
334 if after:
335 after = Dependency(after)
335 after = Dependency(after)
336 if after.all:
336 if after.all:
337 if after.success:
337 if after.success:
338 after = Dependency(after.difference(self.all_completed),
338 after = Dependency(after.difference(self.all_completed),
339 success=after.success,
339 success=after.success,
340 failure=after.failure,
340 failure=after.failure,
341 all=after.all,
341 all=after.all,
342 )
342 )
343 if after.failure:
343 if after.failure:
344 after = Dependency(after.difference(self.all_failed),
344 after = Dependency(after.difference(self.all_failed),
345 success=after.success,
345 success=after.success,
346 failure=after.failure,
346 failure=after.failure,
347 all=after.all,
347 all=after.all,
348 )
348 )
349 if after.check(self.all_completed, self.all_failed):
349 if after.check(self.all_completed, self.all_failed):
350 # recast as empty set, if `after` already met,
350 # recast as empty set, if `after` already met,
351 # to prevent unnecessary set comparisons
351 # to prevent unnecessary set comparisons
352 after = MET
352 after = MET
353 else:
353 else:
354 after = MET
354 after = MET
355
355
356 # location dependencies
356 # location dependencies
357 follow = Dependency(header.get('follow', []))
357 follow = Dependency(header.get('follow', []))
358
358
359 # turn timeouts into datetime objects:
359 # turn timeouts into datetime objects:
360 timeout = header.get('timeout', None)
360 timeout = header.get('timeout', None)
361 if timeout:
361 if timeout:
362 timeout = datetime.now() + timedelta(0,timeout,0)
362 timeout = datetime.now() + timedelta(0,timeout,0)
363
363
364 args = [raw_msg, targets, after, follow, timeout]
364 args = [raw_msg, targets, after, follow, timeout]
365
365
366 # validate and reduce dependencies:
366 # validate and reduce dependencies:
367 for dep in after,follow:
367 for dep in after,follow:
368 if not dep: # empty dependency
368 if not dep: # empty dependency
369 continue
369 continue
370 # check valid:
370 # check valid:
371 if msg_id in dep or dep.difference(self.all_ids):
371 if msg_id in dep or dep.difference(self.all_ids):
372 self.depending[msg_id] = args
372 self.depending[msg_id] = args
373 return self.fail_unreachable(msg_id, error.InvalidDependency)
373 return self.fail_unreachable(msg_id, error.InvalidDependency)
374 # check if unreachable:
374 # check if unreachable:
375 if dep.unreachable(self.all_completed, self.all_failed):
375 if dep.unreachable(self.all_completed, self.all_failed):
376 self.depending[msg_id] = args
376 self.depending[msg_id] = args
377 return self.fail_unreachable(msg_id)
377 return self.fail_unreachable(msg_id)
378
378
379 if after.check(self.all_completed, self.all_failed):
379 if after.check(self.all_completed, self.all_failed):
380 # time deps already met, try to run
380 # time deps already met, try to run
381 if not self.maybe_run(msg_id, *args):
381 if not self.maybe_run(msg_id, *args):
382 # can't run yet
382 # can't run yet
383 if msg_id not in self.all_failed:
383 if msg_id not in self.all_failed:
384 # could have failed as unreachable
384 # could have failed as unreachable
385 self.save_unmet(msg_id, *args)
385 self.save_unmet(msg_id, *args)
386 else:
386 else:
387 self.save_unmet(msg_id, *args)
387 self.save_unmet(msg_id, *args)
388
388
389 def audit_timeouts(self):
389 def audit_timeouts(self):
390 """Audit all waiting tasks for expired timeouts."""
390 """Audit all waiting tasks for expired timeouts."""
391 now = datetime.now()
391 now = datetime.now()
392 for msg_id in self.depending.keys():
392 for msg_id in self.depending.keys():
393 # must recheck, in case one failure cascaded to another:
393 # must recheck, in case one failure cascaded to another:
394 if msg_id in self.depending:
394 if msg_id in self.depending:
395 raw,after,targets,follow,timeout = self.depending[msg_id]
395 raw,after,targets,follow,timeout = self.depending[msg_id]
396 if timeout and timeout < now:
396 if timeout and timeout < now:
397 self.fail_unreachable(msg_id, error.TaskTimeout)
397 self.fail_unreachable(msg_id, error.TaskTimeout)
398
398
399 def fail_unreachable(self, msg_id, why=error.ImpossibleDependency):
399 def fail_unreachable(self, msg_id, why=error.ImpossibleDependency):
400 """a task has become unreachable, send a reply with an ImpossibleDependency
400 """a task has become unreachable, send a reply with an ImpossibleDependency
401 error."""
401 error."""
402 if msg_id not in self.depending:
402 if msg_id not in self.depending:
403 self.log.error("msg %r already failed!", msg_id)
403 self.log.error("msg %r already failed!", msg_id)
404 return
404 return
405 raw_msg,targets,after,follow,timeout = self.depending.pop(msg_id)
405 raw_msg,targets,after,follow,timeout = self.depending.pop(msg_id)
406 for mid in follow.union(after):
406 for mid in follow.union(after):
407 if mid in self.graph:
407 if mid in self.graph:
408 self.graph[mid].remove(msg_id)
408 self.graph[mid].remove(msg_id)
409
409
410 # FIXME: unpacking a message I've already unpacked, but didn't save:
410 # FIXME: unpacking a message I've already unpacked, but didn't save:
411 idents,msg = self.session.feed_identities(raw_msg, copy=False)
411 idents,msg = self.session.feed_identities(raw_msg, copy=False)
412 header = self.session.unpack(msg[1].bytes)
412 header = self.session.unpack(msg[1].bytes)
413
413
414 try:
414 try:
415 raise why()
415 raise why()
416 except:
416 except:
417 content = error.wrap_exception()
417 content = error.wrap_exception()
418
418
419 self.all_done.add(msg_id)
419 self.all_done.add(msg_id)
420 self.all_failed.add(msg_id)
420 self.all_failed.add(msg_id)
421
421
422 msg = self.session.send(self.client_stream, 'apply_reply', content,
422 msg = self.session.send(self.client_stream, 'apply_reply', content,
423 parent=header, ident=idents)
423 parent=header, ident=idents)
424 self.session.send(self.mon_stream, msg, ident=[b'outtask']+idents)
424 self.session.send(self.mon_stream, msg, ident=[b'outtask']+idents)
425
425
426 self.update_graph(msg_id, success=False)
426 self.update_graph(msg_id, success=False)
427
427
428 def maybe_run(self, msg_id, raw_msg, targets, after, follow, timeout):
428 def maybe_run(self, msg_id, raw_msg, targets, after, follow, timeout):
429 """check location dependencies, and run if they are met."""
429 """check location dependencies, and run if they are met."""
430 blacklist = self.blacklist.setdefault(msg_id, set())
430 blacklist = self.blacklist.setdefault(msg_id, set())
431 if follow or targets or blacklist or self.hwm:
431 if follow or targets or blacklist or self.hwm:
432 # we need a can_run filter
432 # we need a can_run filter
433 def can_run(idx):
433 def can_run(idx):
434 # check hwm
434 # check hwm
435 if self.hwm and self.loads[idx] == self.hwm:
435 if self.hwm and self.loads[idx] == self.hwm:
436 return False
436 return False
437 target = self.targets[idx]
437 target = self.targets[idx]
438 # check blacklist
438 # check blacklist
439 if target in blacklist:
439 if target in blacklist:
440 return False
440 return False
441 # check targets
441 # check targets
442 if targets and target not in targets:
442 if targets and target not in targets:
443 return False
443 return False
444 # check follow
444 # check follow
445 return follow.check(self.completed[target], self.failed[target])
445 return follow.check(self.completed[target], self.failed[target])
446
446
447 indices = filter(can_run, range(len(self.targets)))
447 indices = filter(can_run, range(len(self.targets)))
448
448
449 if not indices:
449 if not indices:
450 # couldn't run
450 # couldn't run
451 if follow.all:
451 if follow.all:
452 # check follow for impossibility
452 # check follow for impossibility
453 dests = set()
453 dests = set()
454 relevant = set()
454 relevant = set()
455 if follow.success:
455 if follow.success:
456 relevant = self.all_completed
456 relevant = self.all_completed
457 if follow.failure:
457 if follow.failure:
458 relevant = relevant.union(self.all_failed)
458 relevant = relevant.union(self.all_failed)
459 for m in follow.intersection(relevant):
459 for m in follow.intersection(relevant):
460 dests.add(self.destinations[m])
460 dests.add(self.destinations[m])
461 if len(dests) > 1:
461 if len(dests) > 1:
462 self.depending[msg_id] = (raw_msg, targets, after, follow, timeout)
462 self.depending[msg_id] = (raw_msg, targets, after, follow, timeout)
463 self.fail_unreachable(msg_id)
463 self.fail_unreachable(msg_id)
464 return False
464 return False
465 if targets:
465 if targets:
466 # check blacklist+targets for impossibility
466 # check blacklist+targets for impossibility
467 targets.difference_update(blacklist)
467 targets.difference_update(blacklist)
468 if not targets or not targets.intersection(self.targets):
468 if not targets or not targets.intersection(self.targets):
469 self.depending[msg_id] = (raw_msg, targets, after, follow, timeout)
469 self.depending[msg_id] = (raw_msg, targets, after, follow, timeout)
470 self.fail_unreachable(msg_id)
470 self.fail_unreachable(msg_id)
471 return False
471 return False
472 return False
472 return False
473 else:
473 else:
474 indices = None
474 indices = None
475
475
476 self.submit_task(msg_id, raw_msg, targets, follow, timeout, indices)
476 self.submit_task(msg_id, raw_msg, targets, follow, timeout, indices)
477 return True
477 return True
478
478
479 def save_unmet(self, msg_id, raw_msg, targets, after, follow, timeout):
479 def save_unmet(self, msg_id, raw_msg, targets, after, follow, timeout):
480 """Save a message for later submission when its dependencies are met."""
480 """Save a message for later submission when its dependencies are met."""
481 self.depending[msg_id] = [raw_msg,targets,after,follow,timeout]
481 self.depending[msg_id] = [raw_msg,targets,after,follow,timeout]
482 # track the ids in follow or after, but not those already finished
482 # track the ids in follow or after, but not those already finished
483 for dep_id in after.union(follow).difference(self.all_done):
483 for dep_id in after.union(follow).difference(self.all_done):
484 if dep_id not in self.graph:
484 if dep_id not in self.graph:
485 self.graph[dep_id] = set()
485 self.graph[dep_id] = set()
486 self.graph[dep_id].add(msg_id)
486 self.graph[dep_id].add(msg_id)
487
487
488 def submit_task(self, msg_id, raw_msg, targets, follow, timeout, indices=None):
488 def submit_task(self, msg_id, raw_msg, targets, follow, timeout, indices=None):
489 """Submit a task to any of a subset of our targets."""
489 """Submit a task to any of a subset of our targets."""
490 if indices:
490 if indices:
491 loads = [self.loads[i] for i in indices]
491 loads = [self.loads[i] for i in indices]
492 else:
492 else:
493 loads = self.loads
493 loads = self.loads
494 idx = self.scheme(loads)
494 idx = self.scheme(loads)
495 if indices:
495 if indices:
496 idx = indices[idx]
496 idx = indices[idx]
497 target = self.targets[idx]
497 target = self.targets[idx]
498 # print (target, map(str, msg[:3]))
498 # print (target, map(str, msg[:3]))
499 # send job to the engine
499 # send job to the engine
500 self.engine_stream.send(target, flags=zmq.SNDMORE, copy=False)
500 self.engine_stream.send(target, flags=zmq.SNDMORE, copy=False)
501 self.engine_stream.send_multipart(raw_msg, copy=False)
501 self.engine_stream.send_multipart(raw_msg, copy=False)
502 # update load
502 # update load
503 self.add_job(idx)
503 self.add_job(idx)
504 self.pending[target][msg_id] = (raw_msg, targets, MET, follow, timeout)
504 self.pending[target][msg_id] = (raw_msg, targets, MET, follow, timeout)
505 # notify Hub
505 # notify Hub
506 content = dict(msg_id=msg_id, engine_id=target.decode('ascii'))
506 content = dict(msg_id=msg_id, engine_id=target.decode('ascii'))
507 self.session.send(self.mon_stream, 'task_destination', content=content,
507 self.session.send(self.mon_stream, 'task_destination', content=content,
508 ident=[b'tracktask',self.ident])
508 ident=[b'tracktask',self.ident])
509
509
510
510
511 #-----------------------------------------------------------------------
511 #-----------------------------------------------------------------------
512 # Result Handling
512 # Result Handling
513 #-----------------------------------------------------------------------
513 #-----------------------------------------------------------------------
514 def dispatch_result(self, raw_msg):
514 def dispatch_result(self, raw_msg):
515 """dispatch method for result replies"""
515 """dispatch method for result replies"""
516 try:
516 try:
517 idents,msg = self.session.feed_identities(raw_msg, copy=False)
517 idents,msg = self.session.feed_identities(raw_msg, copy=False)
518 msg = self.session.unserialize(msg, content=False, copy=False)
518 msg = self.session.unserialize(msg, content=False, copy=False)
519 engine = idents[0]
519 engine = idents[0]
520 try:
520 try:
521 idx = self.targets.index(engine)
521 idx = self.targets.index(engine)
522 except ValueError:
522 except ValueError:
523 pass # skip load-update for dead engines
523 pass # skip load-update for dead engines
524 else:
524 else:
525 self.finish_job(idx)
525 self.finish_job(idx)
526 except Exception:
526 except Exception:
527 self.log.error("task::Invaid result: %r", raw_msg, exc_info=True)
527 self.log.error("task::Invaid result: %r", raw_msg, exc_info=True)
528 return
528 return
529
529
530 header = msg['header']
530 header = msg['header']
531 parent = msg['parent_header']
531 parent = msg['parent_header']
532 if header.get('dependencies_met', True):
532 if header.get('dependencies_met', True):
533 success = (header['status'] == 'ok')
533 success = (header['status'] == 'ok')
534 msg_id = parent['msg_id']
534 msg_id = parent['msg_id']
535 retries = self.retries[msg_id]
535 retries = self.retries[msg_id]
536 if not success and retries > 0:
536 if not success and retries > 0:
537 # failed
537 # failed
538 self.retries[msg_id] = retries - 1
538 self.retries[msg_id] = retries - 1
539 self.handle_unmet_dependency(idents, parent)
539 self.handle_unmet_dependency(idents, parent)
540 else:
540 else:
541 del self.retries[msg_id]
541 del self.retries[msg_id]
542 # relay to client and update graph
542 # relay to client and update graph
543 self.handle_result(idents, parent, raw_msg, success)
543 self.handle_result(idents, parent, raw_msg, success)
544 # send to Hub monitor
544 # send to Hub monitor
545 self.mon_stream.send_multipart([b'outtask']+raw_msg, copy=False)
545 self.mon_stream.send_multipart([b'outtask']+raw_msg, copy=False)
546 else:
546 else:
547 self.handle_unmet_dependency(idents, parent)
547 self.handle_unmet_dependency(idents, parent)
548
548
549 def handle_result(self, idents, parent, raw_msg, success=True):
549 def handle_result(self, idents, parent, raw_msg, success=True):
550 """handle a real task result, either success or failure"""
550 """handle a real task result, either success or failure"""
551 # first, relay result to client
551 # first, relay result to client
552 engine = idents[0]
552 engine = idents[0]
553 client = idents[1]
553 client = idents[1]
554 # swap_ids for XREP-XREP mirror
554 # swap_ids for XREP-XREP mirror
555 raw_msg[:2] = [client,engine]
555 raw_msg[:2] = [client,engine]
556 # print (map(str, raw_msg[:4]))
556 # print (map(str, raw_msg[:4]))
557 self.client_stream.send_multipart(raw_msg, copy=False)
557 self.client_stream.send_multipart(raw_msg, copy=False)
558 # now, update our data structures
558 # now, update our data structures
559 msg_id = parent['msg_id']
559 msg_id = parent['msg_id']
560 self.blacklist.pop(msg_id, None)
560 self.blacklist.pop(msg_id, None)
561 self.pending[engine].pop(msg_id)
561 self.pending[engine].pop(msg_id)
562 if success:
562 if success:
563 self.completed[engine].add(msg_id)
563 self.completed[engine].add(msg_id)
564 self.all_completed.add(msg_id)
564 self.all_completed.add(msg_id)
565 else:
565 else:
566 self.failed[engine].add(msg_id)
566 self.failed[engine].add(msg_id)
567 self.all_failed.add(msg_id)
567 self.all_failed.add(msg_id)
568 self.all_done.add(msg_id)
568 self.all_done.add(msg_id)
569 self.destinations[msg_id] = engine
569 self.destinations[msg_id] = engine
570
570
571 self.update_graph(msg_id, success)
571 self.update_graph(msg_id, success)
572
572
573 def handle_unmet_dependency(self, idents, parent):
573 def handle_unmet_dependency(self, idents, parent):
574 """handle an unmet dependency"""
574 """handle an unmet dependency"""
575 engine = idents[0]
575 engine = idents[0]
576 msg_id = parent['msg_id']
576 msg_id = parent['msg_id']
577
577
578 if msg_id not in self.blacklist:
578 if msg_id not in self.blacklist:
579 self.blacklist[msg_id] = set()
579 self.blacklist[msg_id] = set()
580 self.blacklist[msg_id].add(engine)
580 self.blacklist[msg_id].add(engine)
581
581
582 args = self.pending[engine].pop(msg_id)
582 args = self.pending[engine].pop(msg_id)
583 raw,targets,after,follow,timeout = args
583 raw,targets,after,follow,timeout = args
584
584
585 if self.blacklist[msg_id] == targets:
585 if self.blacklist[msg_id] == targets:
586 self.depending[msg_id] = args
586 self.depending[msg_id] = args
587 self.fail_unreachable(msg_id)
587 self.fail_unreachable(msg_id)
588 elif not self.maybe_run(msg_id, *args):
588 elif not self.maybe_run(msg_id, *args):
589 # resubmit failed
589 # resubmit failed
590 if msg_id not in self.all_failed:
590 if msg_id not in self.all_failed:
591 # put it back in our dependency tree
591 # put it back in our dependency tree
592 self.save_unmet(msg_id, *args)
592 self.save_unmet(msg_id, *args)
593
593
594 if self.hwm:
594 if self.hwm:
595 try:
595 try:
596 idx = self.targets.index(engine)
596 idx = self.targets.index(engine)
597 except ValueError:
597 except ValueError:
598 pass # skip load-update for dead engines
598 pass # skip load-update for dead engines
599 else:
599 else:
600 if self.loads[idx] == self.hwm-1:
600 if self.loads[idx] == self.hwm-1:
601 self.update_graph(None)
601 self.update_graph(None)
602
602
603
603
604
604
605 def update_graph(self, dep_id=None, success=True):
605 def update_graph(self, dep_id=None, success=True):
606 """dep_id just finished. Update our dependency
606 """dep_id just finished. Update our dependency
607 graph and submit any jobs that just became runable.
607 graph and submit any jobs that just became runable.
608
608
609 Called with dep_id=None to update entire graph for hwm, but without finishing
609 Called with dep_id=None to update entire graph for hwm, but without finishing
610 a task.
610 a task.
611 """
611 """
612 # print ("\n\n***********")
612 # print ("\n\n***********")
613 # pprint (dep_id)
613 # pprint (dep_id)
614 # pprint (self.graph)
614 # pprint (self.graph)
615 # pprint (self.depending)
615 # pprint (self.depending)
616 # pprint (self.all_completed)
616 # pprint (self.all_completed)
617 # pprint (self.all_failed)
617 # pprint (self.all_failed)
618 # print ("\n\n***********\n\n")
618 # print ("\n\n***********\n\n")
619 # update any jobs that depended on the dependency
619 # update any jobs that depended on the dependency
620 jobs = self.graph.pop(dep_id, [])
620 jobs = self.graph.pop(dep_id, [])
621
621
622 # recheck *all* jobs if
622 # recheck *all* jobs if
623 # a) we have HWM and an engine just become no longer full
623 # a) we have HWM and an engine just become no longer full
624 # or b) dep_id was given as None
624 # or b) dep_id was given as None
625 if dep_id is None or self.hwm and any( [ load==self.hwm-1 for load in self.loads ]):
625 if dep_id is None or self.hwm and any( [ load==self.hwm-1 for load in self.loads ]):
626 jobs = self.depending.keys()
626 jobs = self.depending.keys()
627
627
628 for msg_id in jobs:
628 for msg_id in jobs:
629 raw_msg, targets, after, follow, timeout = self.depending[msg_id]
629 raw_msg, targets, after, follow, timeout = self.depending[msg_id]
630
630
631 if after.unreachable(self.all_completed, self.all_failed)\
631 if after.unreachable(self.all_completed, self.all_failed)\
632 or follow.unreachable(self.all_completed, self.all_failed):
632 or follow.unreachable(self.all_completed, self.all_failed):
633 self.fail_unreachable(msg_id)
633 self.fail_unreachable(msg_id)
634
634
635 elif after.check(self.all_completed, self.all_failed): # time deps met, maybe run
635 elif after.check(self.all_completed, self.all_failed): # time deps met, maybe run
636 if self.maybe_run(msg_id, raw_msg, targets, MET, follow, timeout):
636 if self.maybe_run(msg_id, raw_msg, targets, MET, follow, timeout):
637
637
638 self.depending.pop(msg_id)
638 self.depending.pop(msg_id)
639 for mid in follow.union(after):
639 for mid in follow.union(after):
640 if mid in self.graph:
640 if mid in self.graph:
641 self.graph[mid].remove(msg_id)
641 self.graph[mid].remove(msg_id)
642
642
643 #----------------------------------------------------------------------
643 #----------------------------------------------------------------------
644 # methods to be overridden by subclasses
644 # methods to be overridden by subclasses
645 #----------------------------------------------------------------------
645 #----------------------------------------------------------------------
646
646
647 def add_job(self, idx):
647 def add_job(self, idx):
648 """Called after self.targets[idx] just got the job with header.
648 """Called after self.targets[idx] just got the job with header.
649 Override with subclasses. The default ordering is simple LRU.
649 Override with subclasses. The default ordering is simple LRU.
650 The default loads are the number of outstanding jobs."""
650 The default loads are the number of outstanding jobs."""
651 self.loads[idx] += 1
651 self.loads[idx] += 1
652 for lis in (self.targets, self.loads):
652 for lis in (self.targets, self.loads):
653 lis.append(lis.pop(idx))
653 lis.append(lis.pop(idx))
654
654
655
655
656 def finish_job(self, idx):
656 def finish_job(self, idx):
657 """Called after self.targets[idx] just finished a job.
657 """Called after self.targets[idx] just finished a job.
658 Override with subclasses."""
658 Override with subclasses."""
659 self.loads[idx] -= 1
659 self.loads[idx] -= 1
660
660
661
661
662
662
663 def launch_scheduler(in_addr, out_addr, mon_addr, not_addr, config=None,
663 def launch_scheduler(in_addr, out_addr, mon_addr, not_addr, config=None,
664 logname='root', log_url=None, loglevel=logging.DEBUG,
664 logname='root', log_url=None, loglevel=logging.DEBUG,
665 identity=b'task', in_thread=False):
665 identity=b'task', in_thread=False):
666
666
667 ZMQStream = zmqstream.ZMQStream
667 ZMQStream = zmqstream.ZMQStream
668
668
669 if config:
669 if config:
670 # unwrap dict back into Config
670 # unwrap dict back into Config
671 config = Config(config)
671 config = Config(config)
672
672
673 if in_thread:
673 if in_thread:
674 # use instance() to get the same Context/Loop as our parent
674 # use instance() to get the same Context/Loop as our parent
675 ctx = zmq.Context.instance()
675 ctx = zmq.Context.instance()
676 loop = ioloop.IOLoop.instance()
676 loop = ioloop.IOLoop.instance()
677 else:
677 else:
678 # in a process, don't use instance()
678 # in a process, don't use instance()
679 # for safety with multiprocessing
679 # for safety with multiprocessing
680 ctx = zmq.Context()
680 ctx = zmq.Context()
681 loop = ioloop.IOLoop()
681 loop = ioloop.IOLoop()
682 ins = ZMQStream(ctx.socket(zmq.XREP),loop)
682 ins = ZMQStream(ctx.socket(zmq.ROUTER),loop)
683 ins.setsockopt(zmq.IDENTITY, identity)
683 ins.setsockopt(zmq.IDENTITY, identity)
684 ins.bind(in_addr)
684 ins.bind(in_addr)
685
685
686 outs = ZMQStream(ctx.socket(zmq.XREP),loop)
686 outs = ZMQStream(ctx.socket(zmq.ROUTER),loop)
687 outs.setsockopt(zmq.IDENTITY, identity)
687 outs.setsockopt(zmq.IDENTITY, identity)
688 outs.bind(out_addr)
688 outs.bind(out_addr)
689 mons = zmqstream.ZMQStream(ctx.socket(zmq.PUB),loop)
689 mons = zmqstream.ZMQStream(ctx.socket(zmq.PUB),loop)
690 mons.connect(mon_addr)
690 mons.connect(mon_addr)
691 nots = zmqstream.ZMQStream(ctx.socket(zmq.SUB),loop)
691 nots = zmqstream.ZMQStream(ctx.socket(zmq.SUB),loop)
692 nots.setsockopt(zmq.SUBSCRIBE, b'')
692 nots.setsockopt(zmq.SUBSCRIBE, b'')
693 nots.connect(not_addr)
693 nots.connect(not_addr)
694
694
695 # setup logging.
695 # setup logging.
696 if in_thread:
696 if in_thread:
697 log = Application.instance().log
697 log = Application.instance().log
698 else:
698 else:
699 if log_url:
699 if log_url:
700 log = connect_logger(logname, ctx, log_url, root="scheduler", loglevel=loglevel)
700 log = connect_logger(logname, ctx, log_url, root="scheduler", loglevel=loglevel)
701 else:
701 else:
702 log = local_logger(logname, loglevel)
702 log = local_logger(logname, loglevel)
703
703
704 scheduler = TaskScheduler(client_stream=ins, engine_stream=outs,
704 scheduler = TaskScheduler(client_stream=ins, engine_stream=outs,
705 mon_stream=mons, notifier_stream=nots,
705 mon_stream=mons, notifier_stream=nots,
706 loop=loop, log=log,
706 loop=loop, log=log,
707 config=config)
707 config=config)
708 scheduler.start()
708 scheduler.start()
709 if not in_thread:
709 if not in_thread:
710 try:
710 try:
711 loop.start()
711 loop.start()
712 except KeyboardInterrupt:
712 except KeyboardInterrupt:
713 print ("interrupted, exiting...", file=sys.__stderr__)
713 print ("interrupted, exiting...", file=sys.__stderr__)
714
714
@@ -1,226 +1,226 b''
1 """A simple engine that talks to a controller over 0MQ.
1 """A simple engine that talks to a controller over 0MQ.
2 it handles registration, etc. and launches a kernel
2 it handles registration, etc. and launches a kernel
3 connected to the Controller's Schedulers.
3 connected to the Controller's Schedulers.
4
4
5 Authors:
5 Authors:
6
6
7 * Min RK
7 * Min RK
8 """
8 """
9 #-----------------------------------------------------------------------------
9 #-----------------------------------------------------------------------------
10 # Copyright (C) 2010-2011 The IPython Development Team
10 # Copyright (C) 2010-2011 The IPython Development Team
11 #
11 #
12 # Distributed under the terms of the BSD License. The full license is in
12 # Distributed under the terms of the BSD License. The full license is in
13 # the file COPYING, distributed as part of this software.
13 # the file COPYING, distributed as part of this software.
14 #-----------------------------------------------------------------------------
14 #-----------------------------------------------------------------------------
15
15
16 from __future__ import print_function
16 from __future__ import print_function
17
17
18 import sys
18 import sys
19 import time
19 import time
20 from getpass import getpass
20 from getpass import getpass
21
21
22 import zmq
22 import zmq
23 from zmq.eventloop import ioloop, zmqstream
23 from zmq.eventloop import ioloop, zmqstream
24
24
25 from IPython.external.ssh import tunnel
25 from IPython.external.ssh import tunnel
26 # internal
26 # internal
27 from IPython.utils.traitlets import (
27 from IPython.utils.traitlets import (
28 Instance, Dict, Int, Type, CFloat, Unicode, CBytes, Bool
28 Instance, Dict, Int, Type, CFloat, Unicode, CBytes, Bool
29 )
29 )
30 # from IPython.utils.localinterfaces import LOCALHOST
30 # from IPython.utils.localinterfaces import LOCALHOST
31
31
32 from IPython.parallel.controller.heartmonitor import Heart
32 from IPython.parallel.controller.heartmonitor import Heart
33 from IPython.parallel.factory import RegistrationFactory
33 from IPython.parallel.factory import RegistrationFactory
34 from IPython.parallel.util import disambiguate_url, asbytes
34 from IPython.parallel.util import disambiguate_url, asbytes
35
35
36 from IPython.zmq.session import Message
36 from IPython.zmq.session import Message
37
37
38 from .streamkernel import Kernel
38 from .streamkernel import Kernel
39
39
40 class EngineFactory(RegistrationFactory):
40 class EngineFactory(RegistrationFactory):
41 """IPython engine"""
41 """IPython engine"""
42
42
43 # configurables:
43 # configurables:
44 out_stream_factory=Type('IPython.zmq.iostream.OutStream', config=True,
44 out_stream_factory=Type('IPython.zmq.iostream.OutStream', config=True,
45 help="""The OutStream for handling stdout/err.
45 help="""The OutStream for handling stdout/err.
46 Typically 'IPython.zmq.iostream.OutStream'""")
46 Typically 'IPython.zmq.iostream.OutStream'""")
47 display_hook_factory=Type('IPython.zmq.displayhook.ZMQDisplayHook', config=True,
47 display_hook_factory=Type('IPython.zmq.displayhook.ZMQDisplayHook', config=True,
48 help="""The class for handling displayhook.
48 help="""The class for handling displayhook.
49 Typically 'IPython.zmq.displayhook.ZMQDisplayHook'""")
49 Typically 'IPython.zmq.displayhook.ZMQDisplayHook'""")
50 location=Unicode(config=True,
50 location=Unicode(config=True,
51 help="""The location (an IP address) of the controller. This is
51 help="""The location (an IP address) of the controller. This is
52 used for disambiguating URLs, to determine whether
52 used for disambiguating URLs, to determine whether
53 loopback should be used to connect or the public address.""")
53 loopback should be used to connect or the public address.""")
54 timeout=CFloat(2,config=True,
54 timeout=CFloat(2,config=True,
55 help="""The time (in seconds) to wait for the Controller to respond
55 help="""The time (in seconds) to wait for the Controller to respond
56 to registration requests before giving up.""")
56 to registration requests before giving up.""")
57 sshserver=Unicode(config=True,
57 sshserver=Unicode(config=True,
58 help="""The SSH server to use for tunneling connections to the Controller.""")
58 help="""The SSH server to use for tunneling connections to the Controller.""")
59 sshkey=Unicode(config=True,
59 sshkey=Unicode(config=True,
60 help="""The SSH private key file to use when tunneling connections to the Controller.""")
60 help="""The SSH private key file to use when tunneling connections to the Controller.""")
61 paramiko=Bool(sys.platform == 'win32', config=True,
61 paramiko=Bool(sys.platform == 'win32', config=True,
62 help="""Whether to use paramiko instead of openssh for tunnels.""")
62 help="""Whether to use paramiko instead of openssh for tunnels.""")
63
63
64 # not configurable:
64 # not configurable:
65 user_ns=Dict()
65 user_ns=Dict()
66 id=Int(allow_none=True)
66 id=Int(allow_none=True)
67 registrar=Instance('zmq.eventloop.zmqstream.ZMQStream')
67 registrar=Instance('zmq.eventloop.zmqstream.ZMQStream')
68 kernel=Instance(Kernel)
68 kernel=Instance(Kernel)
69
69
70 bident = CBytes()
70 bident = CBytes()
71 ident = Unicode()
71 ident = Unicode()
72 def _ident_changed(self, name, old, new):
72 def _ident_changed(self, name, old, new):
73 self.bident = asbytes(new)
73 self.bident = asbytes(new)
74 using_ssh=Bool(False)
74 using_ssh=Bool(False)
75
75
76
76
77 def __init__(self, **kwargs):
77 def __init__(self, **kwargs):
78 super(EngineFactory, self).__init__(**kwargs)
78 super(EngineFactory, self).__init__(**kwargs)
79 self.ident = self.session.session
79 self.ident = self.session.session
80
80
81 def init_connector(self):
81 def init_connector(self):
82 """construct connection function, which handles tunnels."""
82 """construct connection function, which handles tunnels."""
83 self.using_ssh = bool(self.sshkey or self.sshserver)
83 self.using_ssh = bool(self.sshkey or self.sshserver)
84
84
85 if self.sshkey and not self.sshserver:
85 if self.sshkey and not self.sshserver:
86 # We are using ssh directly to the controller, tunneling localhost to localhost
86 # We are using ssh directly to the controller, tunneling localhost to localhost
87 self.sshserver = self.url.split('://')[1].split(':')[0]
87 self.sshserver = self.url.split('://')[1].split(':')[0]
88
88
89 if self.using_ssh:
89 if self.using_ssh:
90 if tunnel.try_passwordless_ssh(self.sshserver, self.sshkey, self.paramiko):
90 if tunnel.try_passwordless_ssh(self.sshserver, self.sshkey, self.paramiko):
91 password=False
91 password=False
92 else:
92 else:
93 password = getpass("SSH Password for %s: "%self.sshserver)
93 password = getpass("SSH Password for %s: "%self.sshserver)
94 else:
94 else:
95 password = False
95 password = False
96
96
97 def connect(s, url):
97 def connect(s, url):
98 url = disambiguate_url(url, self.location)
98 url = disambiguate_url(url, self.location)
99 if self.using_ssh:
99 if self.using_ssh:
100 self.log.debug("Tunneling connection to %s via %s"%(url, self.sshserver))
100 self.log.debug("Tunneling connection to %s via %s"%(url, self.sshserver))
101 return tunnel.tunnel_connection(s, url, self.sshserver,
101 return tunnel.tunnel_connection(s, url, self.sshserver,
102 keyfile=self.sshkey, paramiko=self.paramiko,
102 keyfile=self.sshkey, paramiko=self.paramiko,
103 password=password,
103 password=password,
104 )
104 )
105 else:
105 else:
106 return s.connect(url)
106 return s.connect(url)
107
107
108 def maybe_tunnel(url):
108 def maybe_tunnel(url):
109 """like connect, but don't complete the connection (for use by heartbeat)"""
109 """like connect, but don't complete the connection (for use by heartbeat)"""
110 url = disambiguate_url(url, self.location)
110 url = disambiguate_url(url, self.location)
111 if self.using_ssh:
111 if self.using_ssh:
112 self.log.debug("Tunneling connection to %s via %s"%(url, self.sshserver))
112 self.log.debug("Tunneling connection to %s via %s"%(url, self.sshserver))
113 url,tunnelobj = tunnel.open_tunnel(url, self.sshserver,
113 url,tunnelobj = tunnel.open_tunnel(url, self.sshserver,
114 keyfile=self.sshkey, paramiko=self.paramiko,
114 keyfile=self.sshkey, paramiko=self.paramiko,
115 password=password,
115 password=password,
116 )
116 )
117 return url
117 return url
118 return connect, maybe_tunnel
118 return connect, maybe_tunnel
119
119
120 def register(self):
120 def register(self):
121 """send the registration_request"""
121 """send the registration_request"""
122
122
123 self.log.info("Registering with controller at %s"%self.url)
123 self.log.info("Registering with controller at %s"%self.url)
124 ctx = self.context
124 ctx = self.context
125 connect,maybe_tunnel = self.init_connector()
125 connect,maybe_tunnel = self.init_connector()
126 reg = ctx.socket(zmq.XREQ)
126 reg = ctx.socket(zmq.DEALER)
127 reg.setsockopt(zmq.IDENTITY, self.bident)
127 reg.setsockopt(zmq.IDENTITY, self.bident)
128 connect(reg, self.url)
128 connect(reg, self.url)
129 self.registrar = zmqstream.ZMQStream(reg, self.loop)
129 self.registrar = zmqstream.ZMQStream(reg, self.loop)
130
130
131
131
132 content = dict(queue=self.ident, heartbeat=self.ident, control=self.ident)
132 content = dict(queue=self.ident, heartbeat=self.ident, control=self.ident)
133 self.registrar.on_recv(lambda msg: self.complete_registration(msg, connect, maybe_tunnel))
133 self.registrar.on_recv(lambda msg: self.complete_registration(msg, connect, maybe_tunnel))
134 # print (self.session.key)
134 # print (self.session.key)
135 self.session.send(self.registrar, "registration_request",content=content)
135 self.session.send(self.registrar, "registration_request",content=content)
136
136
137 def complete_registration(self, msg, connect, maybe_tunnel):
137 def complete_registration(self, msg, connect, maybe_tunnel):
138 # print msg
138 # print msg
139 self._abort_dc.stop()
139 self._abort_dc.stop()
140 ctx = self.context
140 ctx = self.context
141 loop = self.loop
141 loop = self.loop
142 identity = self.bident
142 identity = self.bident
143 idents,msg = self.session.feed_identities(msg)
143 idents,msg = self.session.feed_identities(msg)
144 msg = Message(self.session.unserialize(msg))
144 msg = Message(self.session.unserialize(msg))
145
145
146 if msg.content.status == 'ok':
146 if msg.content.status == 'ok':
147 self.id = int(msg.content.id)
147 self.id = int(msg.content.id)
148
148
149 # launch heartbeat
149 # launch heartbeat
150 hb_addrs = msg.content.heartbeat
150 hb_addrs = msg.content.heartbeat
151
151
152 # possibly forward hb ports with tunnels
152 # possibly forward hb ports with tunnels
153 hb_addrs = [ maybe_tunnel(addr) for addr in hb_addrs ]
153 hb_addrs = [ maybe_tunnel(addr) for addr in hb_addrs ]
154 heart = Heart(*map(str, hb_addrs), heart_id=identity)
154 heart = Heart(*map(str, hb_addrs), heart_id=identity)
155 heart.start()
155 heart.start()
156
156
157 # create Shell Streams (MUX, Task, etc.):
157 # create Shell Streams (MUX, Task, etc.):
158 queue_addr = msg.content.mux
158 queue_addr = msg.content.mux
159 shell_addrs = [ str(queue_addr) ]
159 shell_addrs = [ str(queue_addr) ]
160 task_addr = msg.content.task
160 task_addr = msg.content.task
161 if task_addr:
161 if task_addr:
162 shell_addrs.append(str(task_addr))
162 shell_addrs.append(str(task_addr))
163
163
164 # Uncomment this to go back to two-socket model
164 # Uncomment this to go back to two-socket model
165 # shell_streams = []
165 # shell_streams = []
166 # for addr in shell_addrs:
166 # for addr in shell_addrs:
167 # stream = zmqstream.ZMQStream(ctx.socket(zmq.XREP), loop)
167 # stream = zmqstream.ZMQStream(ctx.socket(zmq.ROUTER), loop)
168 # stream.setsockopt(zmq.IDENTITY, identity)
168 # stream.setsockopt(zmq.IDENTITY, identity)
169 # stream.connect(disambiguate_url(addr, self.location))
169 # stream.connect(disambiguate_url(addr, self.location))
170 # shell_streams.append(stream)
170 # shell_streams.append(stream)
171
171
172 # Now use only one shell stream for mux and tasks
172 # Now use only one shell stream for mux and tasks
173 stream = zmqstream.ZMQStream(ctx.socket(zmq.XREP), loop)
173 stream = zmqstream.ZMQStream(ctx.socket(zmq.ROUTER), loop)
174 stream.setsockopt(zmq.IDENTITY, identity)
174 stream.setsockopt(zmq.IDENTITY, identity)
175 shell_streams = [stream]
175 shell_streams = [stream]
176 for addr in shell_addrs:
176 for addr in shell_addrs:
177 connect(stream, addr)
177 connect(stream, addr)
178 # end single stream-socket
178 # end single stream-socket
179
179
180 # control stream:
180 # control stream:
181 control_addr = str(msg.content.control)
181 control_addr = str(msg.content.control)
182 control_stream = zmqstream.ZMQStream(ctx.socket(zmq.XREP), loop)
182 control_stream = zmqstream.ZMQStream(ctx.socket(zmq.ROUTER), loop)
183 control_stream.setsockopt(zmq.IDENTITY, identity)
183 control_stream.setsockopt(zmq.IDENTITY, identity)
184 connect(control_stream, control_addr)
184 connect(control_stream, control_addr)
185
185
186 # create iopub stream:
186 # create iopub stream:
187 iopub_addr = msg.content.iopub
187 iopub_addr = msg.content.iopub
188 iopub_stream = zmqstream.ZMQStream(ctx.socket(zmq.PUB), loop)
188 iopub_stream = zmqstream.ZMQStream(ctx.socket(zmq.PUB), loop)
189 iopub_stream.setsockopt(zmq.IDENTITY, identity)
189 iopub_stream.setsockopt(zmq.IDENTITY, identity)
190 connect(iopub_stream, iopub_addr)
190 connect(iopub_stream, iopub_addr)
191
191
192 # # Redirect input streams and set a display hook.
192 # # Redirect input streams and set a display hook.
193 if self.out_stream_factory:
193 if self.out_stream_factory:
194 sys.stdout = self.out_stream_factory(self.session, iopub_stream, u'stdout')
194 sys.stdout = self.out_stream_factory(self.session, iopub_stream, u'stdout')
195 sys.stdout.topic = 'engine.%i.stdout'%self.id
195 sys.stdout.topic = 'engine.%i.stdout'%self.id
196 sys.stderr = self.out_stream_factory(self.session, iopub_stream, u'stderr')
196 sys.stderr = self.out_stream_factory(self.session, iopub_stream, u'stderr')
197 sys.stderr.topic = 'engine.%i.stderr'%self.id
197 sys.stderr.topic = 'engine.%i.stderr'%self.id
198 if self.display_hook_factory:
198 if self.display_hook_factory:
199 sys.displayhook = self.display_hook_factory(self.session, iopub_stream)
199 sys.displayhook = self.display_hook_factory(self.session, iopub_stream)
200 sys.displayhook.topic = 'engine.%i.pyout'%self.id
200 sys.displayhook.topic = 'engine.%i.pyout'%self.id
201
201
202 self.kernel = Kernel(config=self.config, int_id=self.id, ident=self.ident, session=self.session,
202 self.kernel = Kernel(config=self.config, int_id=self.id, ident=self.ident, session=self.session,
203 control_stream=control_stream, shell_streams=shell_streams, iopub_stream=iopub_stream,
203 control_stream=control_stream, shell_streams=shell_streams, iopub_stream=iopub_stream,
204 loop=loop, user_ns = self.user_ns, log=self.log)
204 loop=loop, user_ns = self.user_ns, log=self.log)
205 self.kernel.start()
205 self.kernel.start()
206
206
207
207
208 else:
208 else:
209 self.log.fatal("Registration Failed: %s"%msg)
209 self.log.fatal("Registration Failed: %s"%msg)
210 raise Exception("Registration Failed: %s"%msg)
210 raise Exception("Registration Failed: %s"%msg)
211
211
212 self.log.info("Completed registration with id %i"%self.id)
212 self.log.info("Completed registration with id %i"%self.id)
213
213
214
214
215 def abort(self):
215 def abort(self):
216 self.log.fatal("Registration timed out after %.1f seconds"%self.timeout)
216 self.log.fatal("Registration timed out after %.1f seconds"%self.timeout)
217 self.session.send(self.registrar, "unregistration_request", content=dict(id=self.id))
217 self.session.send(self.registrar, "unregistration_request", content=dict(id=self.id))
218 time.sleep(1)
218 time.sleep(1)
219 sys.exit(255)
219 sys.exit(255)
220
220
221 def start(self):
221 def start(self):
222 dc = ioloop.DelayedCallback(self.register, 0, self.loop)
222 dc = ioloop.DelayedCallback(self.register, 0, self.loop)
223 dc.start()
223 dc.start()
224 self._abort_dc = ioloop.DelayedCallback(self.abort, self.timeout*1000, self.loop)
224 self._abort_dc = ioloop.DelayedCallback(self.abort, self.timeout*1000, self.loop)
225 self._abort_dc.start()
225 self._abort_dc.start()
226
226
@@ -1,230 +1,230 b''
1 """KernelStarter class that intercepts Control Queue messages, and handles process management.
1 """KernelStarter class that intercepts Control Queue messages, and handles process management.
2
2
3 Authors:
3 Authors:
4
4
5 * Min RK
5 * Min RK
6 """
6 """
7 #-----------------------------------------------------------------------------
7 #-----------------------------------------------------------------------------
8 # Copyright (C) 2010-2011 The IPython Development Team
8 # Copyright (C) 2010-2011 The IPython Development Team
9 #
9 #
10 # Distributed under the terms of the BSD License. The full license is in
10 # Distributed under the terms of the BSD License. The full license is in
11 # the file COPYING, distributed as part of this software.
11 # the file COPYING, distributed as part of this software.
12 #-----------------------------------------------------------------------------
12 #-----------------------------------------------------------------------------
13
13
14 from zmq.eventloop import ioloop
14 from zmq.eventloop import ioloop
15
15
16 from IPython.zmq.session import Session
16 from IPython.zmq.session import Session
17
17
18 class KernelStarter(object):
18 class KernelStarter(object):
19 """Object for resetting/killing the Kernel."""
19 """Object for resetting/killing the Kernel."""
20
20
21
21
22 def __init__(self, session, upstream, downstream, *kernel_args, **kernel_kwargs):
22 def __init__(self, session, upstream, downstream, *kernel_args, **kernel_kwargs):
23 self.session = session
23 self.session = session
24 self.upstream = upstream
24 self.upstream = upstream
25 self.downstream = downstream
25 self.downstream = downstream
26 self.kernel_args = kernel_args
26 self.kernel_args = kernel_args
27 self.kernel_kwargs = kernel_kwargs
27 self.kernel_kwargs = kernel_kwargs
28 self.handlers = {}
28 self.handlers = {}
29 for method in 'shutdown_request shutdown_reply'.split():
29 for method in 'shutdown_request shutdown_reply'.split():
30 self.handlers[method] = getattr(self, method)
30 self.handlers[method] = getattr(self, method)
31
31
32 def start(self):
32 def start(self):
33 self.upstream.on_recv(self.dispatch_request)
33 self.upstream.on_recv(self.dispatch_request)
34 self.downstream.on_recv(self.dispatch_reply)
34 self.downstream.on_recv(self.dispatch_reply)
35
35
36 #--------------------------------------------------------------------------
36 #--------------------------------------------------------------------------
37 # Dispatch methods
37 # Dispatch methods
38 #--------------------------------------------------------------------------
38 #--------------------------------------------------------------------------
39
39
40 def dispatch_request(self, raw_msg):
40 def dispatch_request(self, raw_msg):
41 idents, msg = self.session.feed_identities()
41 idents, msg = self.session.feed_identities()
42 try:
42 try:
43 msg = self.session.unserialize(msg, content=False)
43 msg = self.session.unserialize(msg, content=False)
44 except:
44 except:
45 print ("bad msg: %s"%msg)
45 print ("bad msg: %s"%msg)
46
46
47 msgtype = msg['header']['msg_type']
47 msgtype = msg['header']['msg_type']
48 handler = self.handlers.get(msgtype, None)
48 handler = self.handlers.get(msgtype, None)
49 if handler is None:
49 if handler is None:
50 self.downstream.send_multipart(raw_msg, copy=False)
50 self.downstream.send_multipart(raw_msg, copy=False)
51 else:
51 else:
52 handler(msg)
52 handler(msg)
53
53
54 def dispatch_reply(self, raw_msg):
54 def dispatch_reply(self, raw_msg):
55 idents, msg = self.session.feed_identities()
55 idents, msg = self.session.feed_identities()
56 try:
56 try:
57 msg = self.session.unserialize(msg, content=False)
57 msg = self.session.unserialize(msg, content=False)
58 except:
58 except:
59 print ("bad msg: %s"%msg)
59 print ("bad msg: %s"%msg)
60
60
61 msgtype = msg['header']['msg_type']
61 msgtype = msg['header']['msg_type']
62 handler = self.handlers.get(msgtype, None)
62 handler = self.handlers.get(msgtype, None)
63 if handler is None:
63 if handler is None:
64 self.upstream.send_multipart(raw_msg, copy=False)
64 self.upstream.send_multipart(raw_msg, copy=False)
65 else:
65 else:
66 handler(msg)
66 handler(msg)
67
67
68 #--------------------------------------------------------------------------
68 #--------------------------------------------------------------------------
69 # Handlers
69 # Handlers
70 #--------------------------------------------------------------------------
70 #--------------------------------------------------------------------------
71
71
72 def shutdown_request(self, msg):
72 def shutdown_request(self, msg):
73 """"""
73 """"""
74 self.downstream.send_multipart(msg)
74 self.downstream.send_multipart(msg)
75
75
76 #--------------------------------------------------------------------------
76 #--------------------------------------------------------------------------
77 # Kernel process management methods, from KernelManager:
77 # Kernel process management methods, from KernelManager:
78 #--------------------------------------------------------------------------
78 #--------------------------------------------------------------------------
79
79
80 def _check_local(addr):
80 def _check_local(addr):
81 if isinstance(addr, tuple):
81 if isinstance(addr, tuple):
82 addr = addr[0]
82 addr = addr[0]
83 return addr in LOCAL_IPS
83 return addr in LOCAL_IPS
84
84
85 def start_kernel(self, **kw):
85 def start_kernel(self, **kw):
86 """Starts a kernel process and configures the manager to use it.
86 """Starts a kernel process and configures the manager to use it.
87
87
88 If random ports (port=0) are being used, this method must be called
88 If random ports (port=0) are being used, this method must be called
89 before the channels are created.
89 before the channels are created.
90
90
91 Parameters:
91 Parameters:
92 -----------
92 -----------
93 ipython : bool, optional (default True)
93 ipython : bool, optional (default True)
94 Whether to use an IPython kernel instead of a plain Python kernel.
94 Whether to use an IPython kernel instead of a plain Python kernel.
95 """
95 """
96 self.kernel = Process(target=make_kernel, args=self.kernel_args,
96 self.kernel = Process(target=make_kernel, args=self.kernel_args,
97 kwargs=self.kernel_kwargs)
97 kwargs=self.kernel_kwargs)
98
98
99 def shutdown_kernel(self, restart=False):
99 def shutdown_kernel(self, restart=False):
100 """ Attempts to the stop the kernel process cleanly. If the kernel
100 """ Attempts to the stop the kernel process cleanly. If the kernel
101 cannot be stopped, it is killed, if possible.
101 cannot be stopped, it is killed, if possible.
102 """
102 """
103 # FIXME: Shutdown does not work on Windows due to ZMQ errors!
103 # FIXME: Shutdown does not work on Windows due to ZMQ errors!
104 if sys.platform == 'win32':
104 if sys.platform == 'win32':
105 self.kill_kernel()
105 self.kill_kernel()
106 return
106 return
107
107
108 # Don't send any additional kernel kill messages immediately, to give
108 # Don't send any additional kernel kill messages immediately, to give
109 # the kernel a chance to properly execute shutdown actions. Wait for at
109 # the kernel a chance to properly execute shutdown actions. Wait for at
110 # most 1s, checking every 0.1s.
110 # most 1s, checking every 0.1s.
111 self.xreq_channel.shutdown(restart=restart)
111 self.xreq_channel.shutdown(restart=restart)
112 for i in range(10):
112 for i in range(10):
113 if self.is_alive:
113 if self.is_alive:
114 time.sleep(0.1)
114 time.sleep(0.1)
115 else:
115 else:
116 break
116 break
117 else:
117 else:
118 # OK, we've waited long enough.
118 # OK, we've waited long enough.
119 if self.has_kernel:
119 if self.has_kernel:
120 self.kill_kernel()
120 self.kill_kernel()
121
121
122 def restart_kernel(self, now=False):
122 def restart_kernel(self, now=False):
123 """Restarts a kernel with the same arguments that were used to launch
123 """Restarts a kernel with the same arguments that were used to launch
124 it. If the old kernel was launched with random ports, the same ports
124 it. If the old kernel was launched with random ports, the same ports
125 will be used for the new kernel.
125 will be used for the new kernel.
126
126
127 Parameters
127 Parameters
128 ----------
128 ----------
129 now : bool, optional
129 now : bool, optional
130 If True, the kernel is forcefully restarted *immediately*, without
130 If True, the kernel is forcefully restarted *immediately*, without
131 having a chance to do any cleanup action. Otherwise the kernel is
131 having a chance to do any cleanup action. Otherwise the kernel is
132 given 1s to clean up before a forceful restart is issued.
132 given 1s to clean up before a forceful restart is issued.
133
133
134 In all cases the kernel is restarted, the only difference is whether
134 In all cases the kernel is restarted, the only difference is whether
135 it is given a chance to perform a clean shutdown or not.
135 it is given a chance to perform a clean shutdown or not.
136 """
136 """
137 if self._launch_args is None:
137 if self._launch_args is None:
138 raise RuntimeError("Cannot restart the kernel. "
138 raise RuntimeError("Cannot restart the kernel. "
139 "No previous call to 'start_kernel'.")
139 "No previous call to 'start_kernel'.")
140 else:
140 else:
141 if self.has_kernel:
141 if self.has_kernel:
142 if now:
142 if now:
143 self.kill_kernel()
143 self.kill_kernel()
144 else:
144 else:
145 self.shutdown_kernel(restart=True)
145 self.shutdown_kernel(restart=True)
146 self.start_kernel(**self._launch_args)
146 self.start_kernel(**self._launch_args)
147
147
148 # FIXME: Messages get dropped in Windows due to probable ZMQ bug
148 # FIXME: Messages get dropped in Windows due to probable ZMQ bug
149 # unless there is some delay here.
149 # unless there is some delay here.
150 if sys.platform == 'win32':
150 if sys.platform == 'win32':
151 time.sleep(0.2)
151 time.sleep(0.2)
152
152
153 @property
153 @property
154 def has_kernel(self):
154 def has_kernel(self):
155 """Returns whether a kernel process has been specified for the kernel
155 """Returns whether a kernel process has been specified for the kernel
156 manager.
156 manager.
157 """
157 """
158 return self.kernel is not None
158 return self.kernel is not None
159
159
160 def kill_kernel(self):
160 def kill_kernel(self):
161 """ Kill the running kernel. """
161 """ Kill the running kernel. """
162 if self.has_kernel:
162 if self.has_kernel:
163 # Pause the heart beat channel if it exists.
163 # Pause the heart beat channel if it exists.
164 if self._hb_channel is not None:
164 if self._hb_channel is not None:
165 self._hb_channel.pause()
165 self._hb_channel.pause()
166
166
167 # Attempt to kill the kernel.
167 # Attempt to kill the kernel.
168 try:
168 try:
169 self.kernel.kill()
169 self.kernel.kill()
170 except OSError, e:
170 except OSError, e:
171 # In Windows, we will get an Access Denied error if the process
171 # In Windows, we will get an Access Denied error if the process
172 # has already terminated. Ignore it.
172 # has already terminated. Ignore it.
173 if not (sys.platform == 'win32' and e.winerror == 5):
173 if not (sys.platform == 'win32' and e.winerror == 5):
174 raise
174 raise
175 self.kernel = None
175 self.kernel = None
176 else:
176 else:
177 raise RuntimeError("Cannot kill kernel. No kernel is running!")
177 raise RuntimeError("Cannot kill kernel. No kernel is running!")
178
178
179 def interrupt_kernel(self):
179 def interrupt_kernel(self):
180 """ Interrupts the kernel. Unlike ``signal_kernel``, this operation is
180 """ Interrupts the kernel. Unlike ``signal_kernel``, this operation is
181 well supported on all platforms.
181 well supported on all platforms.
182 """
182 """
183 if self.has_kernel:
183 if self.has_kernel:
184 if sys.platform == 'win32':
184 if sys.platform == 'win32':
185 from parentpoller import ParentPollerWindows as Poller
185 from parentpoller import ParentPollerWindows as Poller
186 Poller.send_interrupt(self.kernel.win32_interrupt_event)
186 Poller.send_interrupt(self.kernel.win32_interrupt_event)
187 else:
187 else:
188 self.kernel.send_signal(signal.SIGINT)
188 self.kernel.send_signal(signal.SIGINT)
189 else:
189 else:
190 raise RuntimeError("Cannot interrupt kernel. No kernel is running!")
190 raise RuntimeError("Cannot interrupt kernel. No kernel is running!")
191
191
192 def signal_kernel(self, signum):
192 def signal_kernel(self, signum):
193 """ Sends a signal to the kernel. Note that since only SIGTERM is
193 """ Sends a signal to the kernel. Note that since only SIGTERM is
194 supported on Windows, this function is only useful on Unix systems.
194 supported on Windows, this function is only useful on Unix systems.
195 """
195 """
196 if self.has_kernel:
196 if self.has_kernel:
197 self.kernel.send_signal(signum)
197 self.kernel.send_signal(signum)
198 else:
198 else:
199 raise RuntimeError("Cannot signal kernel. No kernel is running!")
199 raise RuntimeError("Cannot signal kernel. No kernel is running!")
200
200
201 @property
201 @property
202 def is_alive(self):
202 def is_alive(self):
203 """Is the kernel process still running?"""
203 """Is the kernel process still running?"""
204 # FIXME: not using a heartbeat means this method is broken for any
204 # FIXME: not using a heartbeat means this method is broken for any
205 # remote kernel, it's only capable of handling local kernels.
205 # remote kernel, it's only capable of handling local kernels.
206 if self.has_kernel:
206 if self.has_kernel:
207 if self.kernel.poll() is None:
207 if self.kernel.poll() is None:
208 return True
208 return True
209 else:
209 else:
210 return False
210 return False
211 else:
211 else:
212 # We didn't start the kernel with this KernelManager so we don't
212 # We didn't start the kernel with this KernelManager so we don't
213 # know if it is running. We should use a heartbeat for this case.
213 # know if it is running. We should use a heartbeat for this case.
214 return True
214 return True
215
215
216
216
217 def make_starter(up_addr, down_addr, *args, **kwargs):
217 def make_starter(up_addr, down_addr, *args, **kwargs):
218 """entry point function for launching a kernelstarter in a subprocess"""
218 """entry point function for launching a kernelstarter in a subprocess"""
219 loop = ioloop.IOLoop.instance()
219 loop = ioloop.IOLoop.instance()
220 ctx = zmq.Context()
220 ctx = zmq.Context()
221 session = Session()
221 session = Session()
222 upstream = zmqstream.ZMQStream(ctx.socket(zmq.XREQ),loop)
222 upstream = zmqstream.ZMQStream(ctx.socket(zmq.DEALER),loop)
223 upstream.connect(up_addr)
223 upstream.connect(up_addr)
224 downstream = zmqstream.ZMQStream(ctx.socket(zmq.XREQ),loop)
224 downstream = zmqstream.ZMQStream(ctx.socket(zmq.DEALER),loop)
225 downstream.connect(down_addr)
225 downstream.connect(down_addr)
226
226
227 starter = KernelStarter(session, upstream, downstream, *args, **kwargs)
227 starter = KernelStarter(session, upstream, downstream, *args, **kwargs)
228 starter.start()
228 starter.start()
229 loop.start()
229 loop.start()
230
230
@@ -1,34 +1,42 b''
1 #-----------------------------------------------------------------------------
1 #-----------------------------------------------------------------------------
2 # Copyright (C) 2010 The IPython Development Team
2 # Copyright (C) 2010 The IPython Development Team
3 #
3 #
4 # Distributed under the terms of the BSD License. The full license is in
4 # Distributed under the terms of the BSD License. The full license is in
5 # the file COPYING.txt, distributed as part of this software.
5 # the file COPYING.txt, distributed as part of this software.
6 #-----------------------------------------------------------------------------
6 #-----------------------------------------------------------------------------
7
7
8 #-----------------------------------------------------------------------------
8 #-----------------------------------------------------------------------------
9 # Verify zmq version dependency >= 2.1.4
9 # Verify zmq version dependency >= 2.1.4
10 #-----------------------------------------------------------------------------
10 #-----------------------------------------------------------------------------
11
11
12 import re
12 import warnings
13 import warnings
13
14
14 minimum_pyzmq_version = "2.1.4"
15 def check_for_zmq(minimum_version, module='IPython.zmq'):
16 min_vlist = [int(n) for n in minimum_version.split('.')]
15
17
16 try:
18 try:
17 import zmq
19 import zmq
18 except ImportError:
20 except ImportError:
19 raise ImportError("IPython.zmq requires pyzmq >= %s"%minimum_pyzmq_version)
21 raise ImportError("%s requires pyzmq >= %s"%(module, minimum_version))
20
22
21 pyzmq_version = zmq.__version__
23 pyzmq_version = zmq.__version__
24 vlist = [int(n) for n in re.findall(r'\d+', pyzmq_version)]
22
25
23 if pyzmq_version < minimum_pyzmq_version:
26 if 'dev' not in pyzmq_version and vlist < min_vlist:
24 raise ImportError("IPython.zmq requires pyzmq >= %s, but you have %s"%(
27 raise ImportError("%s requires pyzmq >= %s, but you have %s"%(
25 minimum_pyzmq_version, pyzmq_version))
28 module, minimum_version, pyzmq_version))
26
29
27 del pyzmq_version
30 # fix missing DEALER/ROUTER aliases in pyzmq < 2.1.9
31 if not hasattr(zmq, 'DEALER'):
32 zmq.DEALER = zmq.XREQ
33 if not hasattr(zmq, 'ROUTER'):
34 zmq.ROUTER = zmq.XREP
28
35
29 if zmq.zmq_version() >= '3.0.0':
36 if zmq.zmq_version() >= '4.0.0':
30 warnings.warn("""libzmq 3 detected.
37 warnings.warn("""libzmq 4 detected.
31 It is unlikely that IPython's zmq code will work properly.
38 It is unlikely that IPython's zmq code will work properly.
32 Please install libzmq stable, which is 2.1.x or 2.2.x""",
39 Please install libzmq stable, which is 2.1.x or 2.2.x""",
33 RuntimeWarning)
40 RuntimeWarning)
34
41
42 check_for_zmq('2.1.4')
@@ -1,196 +1,196 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 """A simple interactive frontend that talks to a kernel over 0MQ.
2 """A simple interactive frontend that talks to a kernel over 0MQ.
3 """
3 """
4
4
5 #-----------------------------------------------------------------------------
5 #-----------------------------------------------------------------------------
6 # Imports
6 # Imports
7 #-----------------------------------------------------------------------------
7 #-----------------------------------------------------------------------------
8 # stdlib
8 # stdlib
9 import cPickle as pickle
9 import cPickle as pickle
10 import code
10 import code
11 import readline
11 import readline
12 import sys
12 import sys
13 import time
13 import time
14 import uuid
14 import uuid
15
15
16 # our own
16 # our own
17 import zmq
17 import zmq
18 import session
18 import session
19 import completer
19 import completer
20 from IPython.utils.localinterfaces import LOCALHOST
20 from IPython.utils.localinterfaces import LOCALHOST
21
21
22 #-----------------------------------------------------------------------------
22 #-----------------------------------------------------------------------------
23 # Classes and functions
23 # Classes and functions
24 #-----------------------------------------------------------------------------
24 #-----------------------------------------------------------------------------
25
25
26 class Console(code.InteractiveConsole):
26 class Console(code.InteractiveConsole):
27
27
28 def __init__(self, locals=None, filename="<console>",
28 def __init__(self, locals=None, filename="<console>",
29 session = session,
29 session = session,
30 request_socket=None,
30 request_socket=None,
31 sub_socket=None):
31 sub_socket=None):
32 code.InteractiveConsole.__init__(self, locals, filename)
32 code.InteractiveConsole.__init__(self, locals, filename)
33 self.session = session
33 self.session = session
34 self.request_socket = request_socket
34 self.request_socket = request_socket
35 self.sub_socket = sub_socket
35 self.sub_socket = sub_socket
36 self.backgrounded = 0
36 self.backgrounded = 0
37 self.messages = {}
37 self.messages = {}
38
38
39 # Set tab completion
39 # Set tab completion
40 self.completer = completer.ClientCompleter(self, session, request_socket)
40 self.completer = completer.ClientCompleter(self, session, request_socket)
41 readline.parse_and_bind('tab: complete')
41 readline.parse_and_bind('tab: complete')
42 readline.parse_and_bind('set show-all-if-ambiguous on')
42 readline.parse_and_bind('set show-all-if-ambiguous on')
43 readline.set_completer(self.completer.complete)
43 readline.set_completer(self.completer.complete)
44
44
45 # Set system prompts
45 # Set system prompts
46 sys.ps1 = 'Py>>> '
46 sys.ps1 = 'Py>>> '
47 sys.ps2 = ' ... '
47 sys.ps2 = ' ... '
48 sys.ps3 = 'Out : '
48 sys.ps3 = 'Out : '
49 # Build dict of handlers for message types
49 # Build dict of handlers for message types
50 self.handlers = {}
50 self.handlers = {}
51 for msg_type in ['pyin', 'pyout', 'pyerr', 'stream']:
51 for msg_type in ['pyin', 'pyout', 'pyerr', 'stream']:
52 self.handlers[msg_type] = getattr(self, 'handle_%s' % msg_type)
52 self.handlers[msg_type] = getattr(self, 'handle_%s' % msg_type)
53
53
54 def handle_pyin(self, omsg):
54 def handle_pyin(self, omsg):
55 if omsg.parent_header.session == self.session.session:
55 if omsg.parent_header.session == self.session.session:
56 return
56 return
57 c = omsg.content.code.rstrip()
57 c = omsg.content.code.rstrip()
58 if c:
58 if c:
59 print '[IN from %s]' % omsg.parent_header.username
59 print '[IN from %s]' % omsg.parent_header.username
60 print c
60 print c
61
61
62 def handle_pyout(self, omsg):
62 def handle_pyout(self, omsg):
63 #print omsg # dbg
63 #print omsg # dbg
64 if omsg.parent_header.session == self.session.session:
64 if omsg.parent_header.session == self.session.session:
65 print "%s%s" % (sys.ps3, omsg.content.data)
65 print "%s%s" % (sys.ps3, omsg.content.data)
66 else:
66 else:
67 print '[Out from %s]' % omsg.parent_header.username
67 print '[Out from %s]' % omsg.parent_header.username
68 print omsg.content.data
68 print omsg.content.data
69
69
70 def print_pyerr(self, err):
70 def print_pyerr(self, err):
71 print >> sys.stderr, err.etype,':', err.evalue
71 print >> sys.stderr, err.etype,':', err.evalue
72 print >> sys.stderr, ''.join(err.traceback)
72 print >> sys.stderr, ''.join(err.traceback)
73
73
74 def handle_pyerr(self, omsg):
74 def handle_pyerr(self, omsg):
75 if omsg.parent_header.session == self.session.session:
75 if omsg.parent_header.session == self.session.session:
76 return
76 return
77 print >> sys.stderr, '[ERR from %s]' % omsg.parent_header.username
77 print >> sys.stderr, '[ERR from %s]' % omsg.parent_header.username
78 self.print_pyerr(omsg.content)
78 self.print_pyerr(omsg.content)
79
79
80 def handle_stream(self, omsg):
80 def handle_stream(self, omsg):
81 if omsg.content.name == 'stdout':
81 if omsg.content.name == 'stdout':
82 outstream = sys.stdout
82 outstream = sys.stdout
83 else:
83 else:
84 outstream = sys.stderr
84 outstream = sys.stderr
85 print >> outstream, '*ERR*',
85 print >> outstream, '*ERR*',
86 print >> outstream, omsg.content.data,
86 print >> outstream, omsg.content.data,
87
87
88 def handle_output(self, omsg):
88 def handle_output(self, omsg):
89 handler = self.handlers.get(omsg.msg_type, None)
89 handler = self.handlers.get(omsg.msg_type, None)
90 if handler is not None:
90 if handler is not None:
91 handler(omsg)
91 handler(omsg)
92
92
93 def recv_output(self):
93 def recv_output(self):
94 while True:
94 while True:
95 ident,msg = self.session.recv(self.sub_socket)
95 ident,msg = self.session.recv(self.sub_socket)
96 if msg is None:
96 if msg is None:
97 break
97 break
98 self.handle_output(Message(msg))
98 self.handle_output(Message(msg))
99
99
100 def handle_reply(self, rep):
100 def handle_reply(self, rep):
101 # Handle any side effects on output channels
101 # Handle any side effects on output channels
102 self.recv_output()
102 self.recv_output()
103 # Now, dispatch on the possible reply types we must handle
103 # Now, dispatch on the possible reply types we must handle
104 if rep is None:
104 if rep is None:
105 return
105 return
106 if rep.content.status == 'error':
106 if rep.content.status == 'error':
107 self.print_pyerr(rep.content)
107 self.print_pyerr(rep.content)
108 elif rep.content.status == 'aborted':
108 elif rep.content.status == 'aborted':
109 print >> sys.stderr, "ERROR: ABORTED"
109 print >> sys.stderr, "ERROR: ABORTED"
110 ab = self.messages[rep.parent_header.msg_id].content
110 ab = self.messages[rep.parent_header.msg_id].content
111 if 'code' in ab:
111 if 'code' in ab:
112 print >> sys.stderr, ab.code
112 print >> sys.stderr, ab.code
113 else:
113 else:
114 print >> sys.stderr, ab
114 print >> sys.stderr, ab
115
115
116 def recv_reply(self):
116 def recv_reply(self):
117 ident,rep = self.session.recv(self.request_socket)
117 ident,rep = self.session.recv(self.request_socket)
118 mrep = Message(rep)
118 mrep = Message(rep)
119 self.handle_reply(mrep)
119 self.handle_reply(mrep)
120 return mrep
120 return mrep
121
121
122 def runcode(self, code):
122 def runcode(self, code):
123 # We can't pickle code objects, so fetch the actual source
123 # We can't pickle code objects, so fetch the actual source
124 src = '\n'.join(self.buffer)
124 src = '\n'.join(self.buffer)
125
125
126 # for non-background inputs, if we do have previoiusly backgrounded
126 # for non-background inputs, if we do have previoiusly backgrounded
127 # jobs, check to see if they've produced results
127 # jobs, check to see if they've produced results
128 if not src.endswith(';'):
128 if not src.endswith(';'):
129 while self.backgrounded > 0:
129 while self.backgrounded > 0:
130 #print 'checking background'
130 #print 'checking background'
131 rep = self.recv_reply()
131 rep = self.recv_reply()
132 if rep:
132 if rep:
133 self.backgrounded -= 1
133 self.backgrounded -= 1
134 time.sleep(0.05)
134 time.sleep(0.05)
135
135
136 # Send code execution message to kernel
136 # Send code execution message to kernel
137 omsg = self.session.send(self.request_socket,
137 omsg = self.session.send(self.request_socket,
138 'execute_request', dict(code=src))
138 'execute_request', dict(code=src))
139 self.messages[omsg.header.msg_id] = omsg
139 self.messages[omsg.header.msg_id] = omsg
140
140
141 # Fake asynchronicity by letting the user put ';' at the end of the line
141 # Fake asynchronicity by letting the user put ';' at the end of the line
142 if src.endswith(';'):
142 if src.endswith(';'):
143 self.backgrounded += 1
143 self.backgrounded += 1
144 return
144 return
145
145
146 # For foreground jobs, wait for reply
146 # For foreground jobs, wait for reply
147 while True:
147 while True:
148 rep = self.recv_reply()
148 rep = self.recv_reply()
149 if rep is not None:
149 if rep is not None:
150 break
150 break
151 self.recv_output()
151 self.recv_output()
152 time.sleep(0.05)
152 time.sleep(0.05)
153 else:
153 else:
154 # We exited without hearing back from the kernel!
154 # We exited without hearing back from the kernel!
155 print >> sys.stderr, 'ERROR!!! kernel never got back to us!!!'
155 print >> sys.stderr, 'ERROR!!! kernel never got back to us!!!'
156
156
157
157
158 class InteractiveClient(object):
158 class InteractiveClient(object):
159 def __init__(self, session, request_socket, sub_socket):
159 def __init__(self, session, request_socket, sub_socket):
160 self.session = session
160 self.session = session
161 self.request_socket = request_socket
161 self.request_socket = request_socket
162 self.sub_socket = sub_socket
162 self.sub_socket = sub_socket
163 self.console = Console(None, '<zmq-console>',
163 self.console = Console(None, '<zmq-console>',
164 session, request_socket, sub_socket)
164 session, request_socket, sub_socket)
165
165
166 def interact(self):
166 def interact(self):
167 self.console.interact()
167 self.console.interact()
168
168
169
169
170 def main():
170 def main():
171 # Defaults
171 # Defaults
172 #ip = '192.168.2.109'
172 #ip = '192.168.2.109'
173 ip = LOCALHOST
173 ip = LOCALHOST
174 #ip = '99.146.222.252'
174 #ip = '99.146.222.252'
175 port_base = 5575
175 port_base = 5575
176 connection = ('tcp://%s' % ip) + ':%i'
176 connection = ('tcp://%s' % ip) + ':%i'
177 req_conn = connection % port_base
177 req_conn = connection % port_base
178 sub_conn = connection % (port_base+1)
178 sub_conn = connection % (port_base+1)
179
179
180 # Create initial sockets
180 # Create initial sockets
181 c = zmq.Context()
181 c = zmq.Context()
182 request_socket = c.socket(zmq.XREQ)
182 request_socket = c.socket(zmq.DEALER)
183 request_socket.connect(req_conn)
183 request_socket.connect(req_conn)
184
184
185 sub_socket = c.socket(zmq.SUB)
185 sub_socket = c.socket(zmq.SUB)
186 sub_socket.connect(sub_conn)
186 sub_socket.connect(sub_conn)
187 sub_socket.setsockopt(zmq.SUBSCRIBE, '')
187 sub_socket.setsockopt(zmq.SUBSCRIBE, '')
188
188
189 # Make session and user-facing client
189 # Make session and user-facing client
190 sess = session.Session()
190 sess = session.Session()
191 client = InteractiveClient(sess, request_socket, sub_socket)
191 client = InteractiveClient(sess, request_socket, sub_socket)
192 client.interact()
192 client.interact()
193
193
194
194
195 if __name__ == '__main__':
195 if __name__ == '__main__':
196 main()
196 main()
@@ -1,226 +1,226 b''
1 """An Application for launching a kernel
1 """An Application for launching a kernel
2
2
3 Authors
3 Authors
4 -------
4 -------
5 * MinRK
5 * MinRK
6 """
6 """
7 #-----------------------------------------------------------------------------
7 #-----------------------------------------------------------------------------
8 # Copyright (C) 2011 The IPython Development Team
8 # Copyright (C) 2011 The IPython Development Team
9 #
9 #
10 # Distributed under the terms of the BSD License. The full license is in
10 # Distributed under the terms of the BSD License. The full license is in
11 # the file COPYING.txt, distributed as part of this software.
11 # the file COPYING.txt, distributed as part of this software.
12 #-----------------------------------------------------------------------------
12 #-----------------------------------------------------------------------------
13
13
14 #-----------------------------------------------------------------------------
14 #-----------------------------------------------------------------------------
15 # Imports
15 # Imports
16 #-----------------------------------------------------------------------------
16 #-----------------------------------------------------------------------------
17
17
18 # Standard library imports.
18 # Standard library imports.
19 import os
19 import os
20 import sys
20 import sys
21
21
22 # System library imports.
22 # System library imports.
23 import zmq
23 import zmq
24
24
25 # IPython imports.
25 # IPython imports.
26 from IPython.core.ultratb import FormattedTB
26 from IPython.core.ultratb import FormattedTB
27 from IPython.core.application import (
27 from IPython.core.application import (
28 BaseIPythonApplication, base_flags, base_aliases
28 BaseIPythonApplication, base_flags, base_aliases
29 )
29 )
30 from IPython.utils import io
30 from IPython.utils import io
31 from IPython.utils.localinterfaces import LOCALHOST
31 from IPython.utils.localinterfaces import LOCALHOST
32 from IPython.utils.traitlets import (Any, Instance, Dict, Unicode, Int, Bool,
32 from IPython.utils.traitlets import (Any, Instance, Dict, Unicode, Int, Bool,
33 DottedObjectName)
33 DottedObjectName)
34 from IPython.utils.importstring import import_item
34 from IPython.utils.importstring import import_item
35 # local imports
35 # local imports
36 from IPython.zmq.heartbeat import Heartbeat
36 from IPython.zmq.heartbeat import Heartbeat
37 from IPython.zmq.parentpoller import ParentPollerUnix, ParentPollerWindows
37 from IPython.zmq.parentpoller import ParentPollerUnix, ParentPollerWindows
38 from IPython.zmq.session import Session
38 from IPython.zmq.session import Session
39
39
40
40
41 #-----------------------------------------------------------------------------
41 #-----------------------------------------------------------------------------
42 # Flags and Aliases
42 # Flags and Aliases
43 #-----------------------------------------------------------------------------
43 #-----------------------------------------------------------------------------
44
44
45 kernel_aliases = dict(base_aliases)
45 kernel_aliases = dict(base_aliases)
46 kernel_aliases.update({
46 kernel_aliases.update({
47 'ip' : 'KernelApp.ip',
47 'ip' : 'KernelApp.ip',
48 'hb' : 'KernelApp.hb_port',
48 'hb' : 'KernelApp.hb_port',
49 'shell' : 'KernelApp.shell_port',
49 'shell' : 'KernelApp.shell_port',
50 'iopub' : 'KernelApp.iopub_port',
50 'iopub' : 'KernelApp.iopub_port',
51 'stdin' : 'KernelApp.stdin_port',
51 'stdin' : 'KernelApp.stdin_port',
52 'parent': 'KernelApp.parent',
52 'parent': 'KernelApp.parent',
53 })
53 })
54 if sys.platform.startswith('win'):
54 if sys.platform.startswith('win'):
55 kernel_aliases['interrupt'] = 'KernelApp.interrupt'
55 kernel_aliases['interrupt'] = 'KernelApp.interrupt'
56
56
57 kernel_flags = dict(base_flags)
57 kernel_flags = dict(base_flags)
58 kernel_flags.update({
58 kernel_flags.update({
59 'no-stdout' : (
59 'no-stdout' : (
60 {'KernelApp' : {'no_stdout' : True}},
60 {'KernelApp' : {'no_stdout' : True}},
61 "redirect stdout to the null device"),
61 "redirect stdout to the null device"),
62 'no-stderr' : (
62 'no-stderr' : (
63 {'KernelApp' : {'no_stderr' : True}},
63 {'KernelApp' : {'no_stderr' : True}},
64 "redirect stderr to the null device"),
64 "redirect stderr to the null device"),
65 })
65 })
66
66
67
67
68 #-----------------------------------------------------------------------------
68 #-----------------------------------------------------------------------------
69 # Application class for starting a Kernel
69 # Application class for starting a Kernel
70 #-----------------------------------------------------------------------------
70 #-----------------------------------------------------------------------------
71
71
72 class KernelApp(BaseIPythonApplication):
72 class KernelApp(BaseIPythonApplication):
73 name='pykernel'
73 name='pykernel'
74 aliases = Dict(kernel_aliases)
74 aliases = Dict(kernel_aliases)
75 flags = Dict(kernel_flags)
75 flags = Dict(kernel_flags)
76 classes = [Session]
76 classes = [Session]
77 # the kernel class, as an importstring
77 # the kernel class, as an importstring
78 kernel_class = DottedObjectName('IPython.zmq.pykernel.Kernel')
78 kernel_class = DottedObjectName('IPython.zmq.pykernel.Kernel')
79 kernel = Any()
79 kernel = Any()
80 poller = Any() # don't restrict this even though current pollers are all Threads
80 poller = Any() # don't restrict this even though current pollers are all Threads
81 heartbeat = Instance(Heartbeat)
81 heartbeat = Instance(Heartbeat)
82 session = Instance('IPython.zmq.session.Session')
82 session = Instance('IPython.zmq.session.Session')
83 ports = Dict()
83 ports = Dict()
84
84
85 # inherit config file name from parent:
85 # inherit config file name from parent:
86 parent_appname = Unicode(config=True)
86 parent_appname = Unicode(config=True)
87 def _parent_appname_changed(self, name, old, new):
87 def _parent_appname_changed(self, name, old, new):
88 if self.config_file_specified:
88 if self.config_file_specified:
89 # it was manually specified, ignore
89 # it was manually specified, ignore
90 return
90 return
91 self.config_file_name = new.replace('-','_') + u'_config.py'
91 self.config_file_name = new.replace('-','_') + u'_config.py'
92 # don't let this count as specifying the config file
92 # don't let this count as specifying the config file
93 self.config_file_specified = False
93 self.config_file_specified = False
94
94
95 # connection info:
95 # connection info:
96 ip = Unicode(LOCALHOST, config=True,
96 ip = Unicode(LOCALHOST, config=True,
97 help="Set the IP or interface on which the kernel will listen.")
97 help="Set the IP or interface on which the kernel will listen.")
98 hb_port = Int(0, config=True, help="set the heartbeat port [default: random]")
98 hb_port = Int(0, config=True, help="set the heartbeat port [default: random]")
99 shell_port = Int(0, config=True, help="set the shell (XREP) port [default: random]")
99 shell_port = Int(0, config=True, help="set the shell (XREP) port [default: random]")
100 iopub_port = Int(0, config=True, help="set the iopub (PUB) port [default: random]")
100 iopub_port = Int(0, config=True, help="set the iopub (PUB) port [default: random]")
101 stdin_port = Int(0, config=True, help="set the stdin (XREQ) port [default: random]")
101 stdin_port = Int(0, config=True, help="set the stdin (XREQ) port [default: random]")
102
102
103 # streams, etc.
103 # streams, etc.
104 no_stdout = Bool(False, config=True, help="redirect stdout to the null device")
104 no_stdout = Bool(False, config=True, help="redirect stdout to the null device")
105 no_stderr = Bool(False, config=True, help="redirect stderr to the null device")
105 no_stderr = Bool(False, config=True, help="redirect stderr to the null device")
106 outstream_class = DottedObjectName('IPython.zmq.iostream.OutStream',
106 outstream_class = DottedObjectName('IPython.zmq.iostream.OutStream',
107 config=True, help="The importstring for the OutStream factory")
107 config=True, help="The importstring for the OutStream factory")
108 displayhook_class = DottedObjectName('IPython.zmq.displayhook.ZMQDisplayHook',
108 displayhook_class = DottedObjectName('IPython.zmq.displayhook.ZMQDisplayHook',
109 config=True, help="The importstring for the DisplayHook factory")
109 config=True, help="The importstring for the DisplayHook factory")
110
110
111 # polling
111 # polling
112 parent = Int(0, config=True,
112 parent = Int(0, config=True,
113 help="""kill this process if its parent dies. On Windows, the argument
113 help="""kill this process if its parent dies. On Windows, the argument
114 specifies the HANDLE of the parent process, otherwise it is simply boolean.
114 specifies the HANDLE of the parent process, otherwise it is simply boolean.
115 """)
115 """)
116 interrupt = Int(0, config=True,
116 interrupt = Int(0, config=True,
117 help="""ONLY USED ON WINDOWS
117 help="""ONLY USED ON WINDOWS
118 Interrupt this process when the parent is signalled.
118 Interrupt this process when the parent is signalled.
119 """)
119 """)
120
120
121 def init_crash_handler(self):
121 def init_crash_handler(self):
122 # Install minimal exception handling
122 # Install minimal exception handling
123 sys.excepthook = FormattedTB(mode='Verbose', color_scheme='NoColor',
123 sys.excepthook = FormattedTB(mode='Verbose', color_scheme='NoColor',
124 ostream=sys.__stdout__)
124 ostream=sys.__stdout__)
125
125
126 def init_poller(self):
126 def init_poller(self):
127 if sys.platform == 'win32':
127 if sys.platform == 'win32':
128 if self.interrupt or self.parent:
128 if self.interrupt or self.parent:
129 self.poller = ParentPollerWindows(self.interrupt, self.parent)
129 self.poller = ParentPollerWindows(self.interrupt, self.parent)
130 elif self.parent:
130 elif self.parent:
131 self.poller = ParentPollerUnix()
131 self.poller = ParentPollerUnix()
132
132
133 def _bind_socket(self, s, port):
133 def _bind_socket(self, s, port):
134 iface = 'tcp://%s' % self.ip
134 iface = 'tcp://%s' % self.ip
135 if port <= 0:
135 if port <= 0:
136 port = s.bind_to_random_port(iface)
136 port = s.bind_to_random_port(iface)
137 else:
137 else:
138 s.bind(iface + ':%i'%port)
138 s.bind(iface + ':%i'%port)
139 return port
139 return port
140
140
141 def init_sockets(self):
141 def init_sockets(self):
142 # Create a context, a session, and the kernel sockets.
142 # Create a context, a session, and the kernel sockets.
143 self.log.info("Starting the kernel at pid: %i", os.getpid())
143 self.log.info("Starting the kernel at pid: %i", os.getpid())
144 context = zmq.Context.instance()
144 context = zmq.Context.instance()
145 # Uncomment this to try closing the context.
145 # Uncomment this to try closing the context.
146 # atexit.register(context.term)
146 # atexit.register(context.term)
147
147
148 self.shell_socket = context.socket(zmq.XREP)
148 self.shell_socket = context.socket(zmq.ROUTER)
149 self.shell_port = self._bind_socket(self.shell_socket, self.shell_port)
149 self.shell_port = self._bind_socket(self.shell_socket, self.shell_port)
150 self.log.debug("shell XREP Channel on port: %i"%self.shell_port)
150 self.log.debug("shell ROUTER Channel on port: %i"%self.shell_port)
151
151
152 self.iopub_socket = context.socket(zmq.PUB)
152 self.iopub_socket = context.socket(zmq.PUB)
153 self.iopub_port = self._bind_socket(self.iopub_socket, self.iopub_port)
153 self.iopub_port = self._bind_socket(self.iopub_socket, self.iopub_port)
154 self.log.debug("iopub PUB Channel on port: %i"%self.iopub_port)
154 self.log.debug("iopub PUB Channel on port: %i"%self.iopub_port)
155
155
156 self.stdin_socket = context.socket(zmq.XREQ)
156 self.stdin_socket = context.socket(zmq.XREQ)
157 self.stdin_port = self._bind_socket(self.stdin_socket, self.stdin_port)
157 self.stdin_port = self._bind_socket(self.stdin_socket, self.stdin_port)
158 self.log.debug("stdin XREQ Channel on port: %i"%self.stdin_port)
158 self.log.debug("stdin XREQ Channel on port: %i"%self.stdin_port)
159
159
160 self.heartbeat = Heartbeat(context, (self.ip, self.hb_port))
160 self.heartbeat = Heartbeat(context, (self.ip, self.hb_port))
161 self.hb_port = self.heartbeat.port
161 self.hb_port = self.heartbeat.port
162 self.log.debug("Heartbeat REP Channel on port: %i"%self.hb_port)
162 self.log.debug("Heartbeat REP Channel on port: %i"%self.hb_port)
163
163
164 # Helper to make it easier to connect to an existing kernel, until we have
164 # Helper to make it easier to connect to an existing kernel, until we have
165 # single-port connection negotiation fully implemented.
165 # single-port connection negotiation fully implemented.
166 # set log-level to critical, to make sure it is output
166 # set log-level to critical, to make sure it is output
167 self.log.critical("To connect another client to this kernel, use:")
167 self.log.critical("To connect another client to this kernel, use:")
168 self.log.critical("--existing --shell={0} --iopub={1} --stdin={2} --hb={3}".format(
168 self.log.critical("--existing --shell={0} --iopub={1} --stdin={2} --hb={3}".format(
169 self.shell_port, self.iopub_port, self.stdin_port, self.hb_port))
169 self.shell_port, self.iopub_port, self.stdin_port, self.hb_port))
170
170
171
171
172 self.ports = dict(shell=self.shell_port, iopub=self.iopub_port,
172 self.ports = dict(shell=self.shell_port, iopub=self.iopub_port,
173 stdin=self.stdin_port, hb=self.hb_port)
173 stdin=self.stdin_port, hb=self.hb_port)
174
174
175 def init_session(self):
175 def init_session(self):
176 """create our session object"""
176 """create our session object"""
177 self.session = Session(config=self.config, username=u'kernel')
177 self.session = Session(config=self.config, username=u'kernel')
178
178
179 def init_blackhole(self):
179 def init_blackhole(self):
180 """redirects stdout/stderr to devnull if necessary"""
180 """redirects stdout/stderr to devnull if necessary"""
181 if self.no_stdout or self.no_stderr:
181 if self.no_stdout or self.no_stderr:
182 blackhole = file(os.devnull, 'w')
182 blackhole = file(os.devnull, 'w')
183 if self.no_stdout:
183 if self.no_stdout:
184 sys.stdout = sys.__stdout__ = blackhole
184 sys.stdout = sys.__stdout__ = blackhole
185 if self.no_stderr:
185 if self.no_stderr:
186 sys.stderr = sys.__stderr__ = blackhole
186 sys.stderr = sys.__stderr__ = blackhole
187
187
188 def init_io(self):
188 def init_io(self):
189 """Redirect input streams and set a display hook."""
189 """Redirect input streams and set a display hook."""
190 if self.outstream_class:
190 if self.outstream_class:
191 outstream_factory = import_item(str(self.outstream_class))
191 outstream_factory = import_item(str(self.outstream_class))
192 sys.stdout = outstream_factory(self.session, self.iopub_socket, u'stdout')
192 sys.stdout = outstream_factory(self.session, self.iopub_socket, u'stdout')
193 sys.stderr = outstream_factory(self.session, self.iopub_socket, u'stderr')
193 sys.stderr = outstream_factory(self.session, self.iopub_socket, u'stderr')
194 if self.displayhook_class:
194 if self.displayhook_class:
195 displayhook_factory = import_item(str(self.displayhook_class))
195 displayhook_factory = import_item(str(self.displayhook_class))
196 sys.displayhook = displayhook_factory(self.session, self.iopub_socket)
196 sys.displayhook = displayhook_factory(self.session, self.iopub_socket)
197
197
198 def init_kernel(self):
198 def init_kernel(self):
199 """Create the Kernel object itself"""
199 """Create the Kernel object itself"""
200 kernel_factory = import_item(str(self.kernel_class))
200 kernel_factory = import_item(str(self.kernel_class))
201 self.kernel = kernel_factory(config=self.config, session=self.session,
201 self.kernel = kernel_factory(config=self.config, session=self.session,
202 shell_socket=self.shell_socket,
202 shell_socket=self.shell_socket,
203 iopub_socket=self.iopub_socket,
203 iopub_socket=self.iopub_socket,
204 stdin_socket=self.stdin_socket,
204 stdin_socket=self.stdin_socket,
205 log=self.log
205 log=self.log
206 )
206 )
207 self.kernel.record_ports(self.ports)
207 self.kernel.record_ports(self.ports)
208
208
209 def initialize(self, argv=None):
209 def initialize(self, argv=None):
210 super(KernelApp, self).initialize(argv)
210 super(KernelApp, self).initialize(argv)
211 self.init_blackhole()
211 self.init_blackhole()
212 self.init_session()
212 self.init_session()
213 self.init_poller()
213 self.init_poller()
214 self.init_sockets()
214 self.init_sockets()
215 self.init_io()
215 self.init_io()
216 self.init_kernel()
216 self.init_kernel()
217
217
218 def start(self):
218 def start(self):
219 self.heartbeat.start()
219 self.heartbeat.start()
220 if self.poller is not None:
220 if self.poller is not None:
221 self.poller.start()
221 self.poller.start()
222 try:
222 try:
223 self.kernel.start()
223 self.kernel.start()
224 except KeyboardInterrupt:
224 except KeyboardInterrupt:
225 pass
225 pass
226
226
@@ -1,983 +1,983 b''
1 """Base classes to manage the interaction with a running kernel.
1 """Base classes to manage the interaction with a running kernel.
2
2
3 TODO
3 TODO
4 * Create logger to handle debugging and console messages.
4 * Create logger to handle debugging and console messages.
5 """
5 """
6
6
7 #-----------------------------------------------------------------------------
7 #-----------------------------------------------------------------------------
8 # Copyright (C) 2008-2010 The IPython Development Team
8 # Copyright (C) 2008-2010 The IPython Development Team
9 #
9 #
10 # Distributed under the terms of the BSD License. The full license is in
10 # Distributed under the terms of the BSD License. The full license is in
11 # the file COPYING, distributed as part of this software.
11 # the file COPYING, distributed as part of this software.
12 #-----------------------------------------------------------------------------
12 #-----------------------------------------------------------------------------
13
13
14 #-----------------------------------------------------------------------------
14 #-----------------------------------------------------------------------------
15 # Imports
15 # Imports
16 #-----------------------------------------------------------------------------
16 #-----------------------------------------------------------------------------
17
17
18 # Standard library imports.
18 # Standard library imports.
19 import atexit
19 import atexit
20 import errno
20 import errno
21 from Queue import Queue, Empty
21 from Queue import Queue, Empty
22 from subprocess import Popen
22 from subprocess import Popen
23 import signal
23 import signal
24 import sys
24 import sys
25 from threading import Thread
25 from threading import Thread
26 import time
26 import time
27 import logging
27 import logging
28
28
29 # System library imports.
29 # System library imports.
30 import zmq
30 import zmq
31 from zmq import POLLIN, POLLOUT, POLLERR
31 from zmq import POLLIN, POLLOUT, POLLERR
32 from zmq.eventloop import ioloop
32 from zmq.eventloop import ioloop
33
33
34 # Local imports.
34 # Local imports.
35 from IPython.config.loader import Config
35 from IPython.config.loader import Config
36 from IPython.utils import io
36 from IPython.utils import io
37 from IPython.utils.localinterfaces import LOCALHOST, LOCAL_IPS
37 from IPython.utils.localinterfaces import LOCALHOST, LOCAL_IPS
38 from IPython.utils.traitlets import HasTraits, Any, Instance, Type, TCPAddress
38 from IPython.utils.traitlets import HasTraits, Any, Instance, Type, TCPAddress
39 from session import Session, Message
39 from session import Session, Message
40
40
41 #-----------------------------------------------------------------------------
41 #-----------------------------------------------------------------------------
42 # Constants and exceptions
42 # Constants and exceptions
43 #-----------------------------------------------------------------------------
43 #-----------------------------------------------------------------------------
44
44
45 class InvalidPortNumber(Exception):
45 class InvalidPortNumber(Exception):
46 pass
46 pass
47
47
48 #-----------------------------------------------------------------------------
48 #-----------------------------------------------------------------------------
49 # Utility functions
49 # Utility functions
50 #-----------------------------------------------------------------------------
50 #-----------------------------------------------------------------------------
51
51
52 # some utilities to validate message structure, these might get moved elsewhere
52 # some utilities to validate message structure, these might get moved elsewhere
53 # if they prove to have more generic utility
53 # if they prove to have more generic utility
54
54
55 def validate_string_list(lst):
55 def validate_string_list(lst):
56 """Validate that the input is a list of strings.
56 """Validate that the input is a list of strings.
57
57
58 Raises ValueError if not."""
58 Raises ValueError if not."""
59 if not isinstance(lst, list):
59 if not isinstance(lst, list):
60 raise ValueError('input %r must be a list' % lst)
60 raise ValueError('input %r must be a list' % lst)
61 for x in lst:
61 for x in lst:
62 if not isinstance(x, basestring):
62 if not isinstance(x, basestring):
63 raise ValueError('element %r in list must be a string' % x)
63 raise ValueError('element %r in list must be a string' % x)
64
64
65
65
66 def validate_string_dict(dct):
66 def validate_string_dict(dct):
67 """Validate that the input is a dict with string keys and values.
67 """Validate that the input is a dict with string keys and values.
68
68
69 Raises ValueError if not."""
69 Raises ValueError if not."""
70 for k,v in dct.iteritems():
70 for k,v in dct.iteritems():
71 if not isinstance(k, basestring):
71 if not isinstance(k, basestring):
72 raise ValueError('key %r in dict must be a string' % k)
72 raise ValueError('key %r in dict must be a string' % k)
73 if not isinstance(v, basestring):
73 if not isinstance(v, basestring):
74 raise ValueError('value %r in dict must be a string' % v)
74 raise ValueError('value %r in dict must be a string' % v)
75
75
76
76
77 #-----------------------------------------------------------------------------
77 #-----------------------------------------------------------------------------
78 # ZMQ Socket Channel classes
78 # ZMQ Socket Channel classes
79 #-----------------------------------------------------------------------------
79 #-----------------------------------------------------------------------------
80
80
81 class ZMQSocketChannel(Thread):
81 class ZMQSocketChannel(Thread):
82 """The base class for the channels that use ZMQ sockets.
82 """The base class for the channels that use ZMQ sockets.
83 """
83 """
84 context = None
84 context = None
85 session = None
85 session = None
86 socket = None
86 socket = None
87 ioloop = None
87 ioloop = None
88 iostate = None
88 iostate = None
89 _address = None
89 _address = None
90
90
91 def __init__(self, context, session, address):
91 def __init__(self, context, session, address):
92 """Create a channel
92 """Create a channel
93
93
94 Parameters
94 Parameters
95 ----------
95 ----------
96 context : :class:`zmq.Context`
96 context : :class:`zmq.Context`
97 The ZMQ context to use.
97 The ZMQ context to use.
98 session : :class:`session.Session`
98 session : :class:`session.Session`
99 The session to use.
99 The session to use.
100 address : tuple
100 address : tuple
101 Standard (ip, port) tuple that the kernel is listening on.
101 Standard (ip, port) tuple that the kernel is listening on.
102 """
102 """
103 super(ZMQSocketChannel, self).__init__()
103 super(ZMQSocketChannel, self).__init__()
104 self.daemon = True
104 self.daemon = True
105
105
106 self.context = context
106 self.context = context
107 self.session = session
107 self.session = session
108 if address[1] == 0:
108 if address[1] == 0:
109 message = 'The port number for a channel cannot be 0.'
109 message = 'The port number for a channel cannot be 0.'
110 raise InvalidPortNumber(message)
110 raise InvalidPortNumber(message)
111 self._address = address
111 self._address = address
112
112
113 def _run_loop(self):
113 def _run_loop(self):
114 """Run my loop, ignoring EINTR events in the poller"""
114 """Run my loop, ignoring EINTR events in the poller"""
115 while True:
115 while True:
116 try:
116 try:
117 self.ioloop.start()
117 self.ioloop.start()
118 except zmq.ZMQError as e:
118 except zmq.ZMQError as e:
119 if e.errno == errno.EINTR:
119 if e.errno == errno.EINTR:
120 continue
120 continue
121 else:
121 else:
122 raise
122 raise
123 else:
123 else:
124 break
124 break
125
125
126 def stop(self):
126 def stop(self):
127 """Stop the channel's activity.
127 """Stop the channel's activity.
128
128
129 This calls :method:`Thread.join` and returns when the thread
129 This calls :method:`Thread.join` and returns when the thread
130 terminates. :class:`RuntimeError` will be raised if
130 terminates. :class:`RuntimeError` will be raised if
131 :method:`self.start` is called again.
131 :method:`self.start` is called again.
132 """
132 """
133 self.join()
133 self.join()
134
134
135 @property
135 @property
136 def address(self):
136 def address(self):
137 """Get the channel's address as an (ip, port) tuple.
137 """Get the channel's address as an (ip, port) tuple.
138
138
139 By the default, the address is (localhost, 0), where 0 means a random
139 By the default, the address is (localhost, 0), where 0 means a random
140 port.
140 port.
141 """
141 """
142 return self._address
142 return self._address
143
143
144 def add_io_state(self, state):
144 def add_io_state(self, state):
145 """Add IO state to the eventloop.
145 """Add IO state to the eventloop.
146
146
147 Parameters
147 Parameters
148 ----------
148 ----------
149 state : zmq.POLLIN|zmq.POLLOUT|zmq.POLLERR
149 state : zmq.POLLIN|zmq.POLLOUT|zmq.POLLERR
150 The IO state flag to set.
150 The IO state flag to set.
151
151
152 This is thread safe as it uses the thread safe IOLoop.add_callback.
152 This is thread safe as it uses the thread safe IOLoop.add_callback.
153 """
153 """
154 def add_io_state_callback():
154 def add_io_state_callback():
155 if not self.iostate & state:
155 if not self.iostate & state:
156 self.iostate = self.iostate | state
156 self.iostate = self.iostate | state
157 self.ioloop.update_handler(self.socket, self.iostate)
157 self.ioloop.update_handler(self.socket, self.iostate)
158 self.ioloop.add_callback(add_io_state_callback)
158 self.ioloop.add_callback(add_io_state_callback)
159
159
160 def drop_io_state(self, state):
160 def drop_io_state(self, state):
161 """Drop IO state from the eventloop.
161 """Drop IO state from the eventloop.
162
162
163 Parameters
163 Parameters
164 ----------
164 ----------
165 state : zmq.POLLIN|zmq.POLLOUT|zmq.POLLERR
165 state : zmq.POLLIN|zmq.POLLOUT|zmq.POLLERR
166 The IO state flag to set.
166 The IO state flag to set.
167
167
168 This is thread safe as it uses the thread safe IOLoop.add_callback.
168 This is thread safe as it uses the thread safe IOLoop.add_callback.
169 """
169 """
170 def drop_io_state_callback():
170 def drop_io_state_callback():
171 if self.iostate & state:
171 if self.iostate & state:
172 self.iostate = self.iostate & (~state)
172 self.iostate = self.iostate & (~state)
173 self.ioloop.update_handler(self.socket, self.iostate)
173 self.ioloop.update_handler(self.socket, self.iostate)
174 self.ioloop.add_callback(drop_io_state_callback)
174 self.ioloop.add_callback(drop_io_state_callback)
175
175
176
176
177 class ShellSocketChannel(ZMQSocketChannel):
177 class ShellSocketChannel(ZMQSocketChannel):
178 """The XREQ channel for issues request/replies to the kernel.
178 """The XREQ channel for issues request/replies to the kernel.
179 """
179 """
180
180
181 command_queue = None
181 command_queue = None
182
182
183 def __init__(self, context, session, address):
183 def __init__(self, context, session, address):
184 super(ShellSocketChannel, self).__init__(context, session, address)
184 super(ShellSocketChannel, self).__init__(context, session, address)
185 self.command_queue = Queue()
185 self.command_queue = Queue()
186 self.ioloop = ioloop.IOLoop()
186 self.ioloop = ioloop.IOLoop()
187
187
188 def run(self):
188 def run(self):
189 """The thread's main activity. Call start() instead."""
189 """The thread's main activity. Call start() instead."""
190 self.socket = self.context.socket(zmq.XREQ)
190 self.socket = self.context.socket(zmq.DEALER)
191 self.socket.setsockopt(zmq.IDENTITY, self.session.session)
191 self.socket.setsockopt(zmq.IDENTITY, self.session.session)
192 self.socket.connect('tcp://%s:%i' % self.address)
192 self.socket.connect('tcp://%s:%i' % self.address)
193 self.iostate = POLLERR|POLLIN
193 self.iostate = POLLERR|POLLIN
194 self.ioloop.add_handler(self.socket, self._handle_events,
194 self.ioloop.add_handler(self.socket, self._handle_events,
195 self.iostate)
195 self.iostate)
196 self._run_loop()
196 self._run_loop()
197
197
198 def stop(self):
198 def stop(self):
199 self.ioloop.stop()
199 self.ioloop.stop()
200 super(ShellSocketChannel, self).stop()
200 super(ShellSocketChannel, self).stop()
201
201
202 def call_handlers(self, msg):
202 def call_handlers(self, msg):
203 """This method is called in the ioloop thread when a message arrives.
203 """This method is called in the ioloop thread when a message arrives.
204
204
205 Subclasses should override this method to handle incoming messages.
205 Subclasses should override this method to handle incoming messages.
206 It is important to remember that this method is called in the thread
206 It is important to remember that this method is called in the thread
207 so that some logic must be done to ensure that the application leve
207 so that some logic must be done to ensure that the application leve
208 handlers are called in the application thread.
208 handlers are called in the application thread.
209 """
209 """
210 raise NotImplementedError('call_handlers must be defined in a subclass.')
210 raise NotImplementedError('call_handlers must be defined in a subclass.')
211
211
212 def execute(self, code, silent=False,
212 def execute(self, code, silent=False,
213 user_variables=None, user_expressions=None):
213 user_variables=None, user_expressions=None):
214 """Execute code in the kernel.
214 """Execute code in the kernel.
215
215
216 Parameters
216 Parameters
217 ----------
217 ----------
218 code : str
218 code : str
219 A string of Python code.
219 A string of Python code.
220
220
221 silent : bool, optional (default False)
221 silent : bool, optional (default False)
222 If set, the kernel will execute the code as quietly possible.
222 If set, the kernel will execute the code as quietly possible.
223
223
224 user_variables : list, optional
224 user_variables : list, optional
225 A list of variable names to pull from the user's namespace. They
225 A list of variable names to pull from the user's namespace. They
226 will come back as a dict with these names as keys and their
226 will come back as a dict with these names as keys and their
227 :func:`repr` as values.
227 :func:`repr` as values.
228
228
229 user_expressions : dict, optional
229 user_expressions : dict, optional
230 A dict with string keys and to pull from the user's
230 A dict with string keys and to pull from the user's
231 namespace. They will come back as a dict with these names as keys
231 namespace. They will come back as a dict with these names as keys
232 and their :func:`repr` as values.
232 and their :func:`repr` as values.
233
233
234 Returns
234 Returns
235 -------
235 -------
236 The msg_id of the message sent.
236 The msg_id of the message sent.
237 """
237 """
238 if user_variables is None:
238 if user_variables is None:
239 user_variables = []
239 user_variables = []
240 if user_expressions is None:
240 if user_expressions is None:
241 user_expressions = {}
241 user_expressions = {}
242
242
243 # Don't waste network traffic if inputs are invalid
243 # Don't waste network traffic if inputs are invalid
244 if not isinstance(code, basestring):
244 if not isinstance(code, basestring):
245 raise ValueError('code %r must be a string' % code)
245 raise ValueError('code %r must be a string' % code)
246 validate_string_list(user_variables)
246 validate_string_list(user_variables)
247 validate_string_dict(user_expressions)
247 validate_string_dict(user_expressions)
248
248
249 # Create class for content/msg creation. Related to, but possibly
249 # Create class for content/msg creation. Related to, but possibly
250 # not in Session.
250 # not in Session.
251 content = dict(code=code, silent=silent,
251 content = dict(code=code, silent=silent,
252 user_variables=user_variables,
252 user_variables=user_variables,
253 user_expressions=user_expressions)
253 user_expressions=user_expressions)
254 msg = self.session.msg('execute_request', content)
254 msg = self.session.msg('execute_request', content)
255 self._queue_request(msg)
255 self._queue_request(msg)
256 return msg['header']['msg_id']
256 return msg['header']['msg_id']
257
257
258 def complete(self, text, line, cursor_pos, block=None):
258 def complete(self, text, line, cursor_pos, block=None):
259 """Tab complete text in the kernel's namespace.
259 """Tab complete text in the kernel's namespace.
260
260
261 Parameters
261 Parameters
262 ----------
262 ----------
263 text : str
263 text : str
264 The text to complete.
264 The text to complete.
265 line : str
265 line : str
266 The full line of text that is the surrounding context for the
266 The full line of text that is the surrounding context for the
267 text to complete.
267 text to complete.
268 cursor_pos : int
268 cursor_pos : int
269 The position of the cursor in the line where the completion was
269 The position of the cursor in the line where the completion was
270 requested.
270 requested.
271 block : str, optional
271 block : str, optional
272 The full block of code in which the completion is being requested.
272 The full block of code in which the completion is being requested.
273
273
274 Returns
274 Returns
275 -------
275 -------
276 The msg_id of the message sent.
276 The msg_id of the message sent.
277 """
277 """
278 content = dict(text=text, line=line, block=block, cursor_pos=cursor_pos)
278 content = dict(text=text, line=line, block=block, cursor_pos=cursor_pos)
279 msg = self.session.msg('complete_request', content)
279 msg = self.session.msg('complete_request', content)
280 self._queue_request(msg)
280 self._queue_request(msg)
281 return msg['header']['msg_id']
281 return msg['header']['msg_id']
282
282
283 def object_info(self, oname):
283 def object_info(self, oname):
284 """Get metadata information about an object.
284 """Get metadata information about an object.
285
285
286 Parameters
286 Parameters
287 ----------
287 ----------
288 oname : str
288 oname : str
289 A string specifying the object name.
289 A string specifying the object name.
290
290
291 Returns
291 Returns
292 -------
292 -------
293 The msg_id of the message sent.
293 The msg_id of the message sent.
294 """
294 """
295 content = dict(oname=oname)
295 content = dict(oname=oname)
296 msg = self.session.msg('object_info_request', content)
296 msg = self.session.msg('object_info_request', content)
297 self._queue_request(msg)
297 self._queue_request(msg)
298 return msg['header']['msg_id']
298 return msg['header']['msg_id']
299
299
300 def history(self, raw=True, output=False, hist_access_type='range', **kwargs):
300 def history(self, raw=True, output=False, hist_access_type='range', **kwargs):
301 """Get entries from the history list.
301 """Get entries from the history list.
302
302
303 Parameters
303 Parameters
304 ----------
304 ----------
305 raw : bool
305 raw : bool
306 If True, return the raw input.
306 If True, return the raw input.
307 output : bool
307 output : bool
308 If True, then return the output as well.
308 If True, then return the output as well.
309 hist_access_type : str
309 hist_access_type : str
310 'range' (fill in session, start and stop params), 'tail' (fill in n)
310 'range' (fill in session, start and stop params), 'tail' (fill in n)
311 or 'search' (fill in pattern param).
311 or 'search' (fill in pattern param).
312
312
313 session : int
313 session : int
314 For a range request, the session from which to get lines. Session
314 For a range request, the session from which to get lines. Session
315 numbers are positive integers; negative ones count back from the
315 numbers are positive integers; negative ones count back from the
316 current session.
316 current session.
317 start : int
317 start : int
318 The first line number of a history range.
318 The first line number of a history range.
319 stop : int
319 stop : int
320 The final (excluded) line number of a history range.
320 The final (excluded) line number of a history range.
321
321
322 n : int
322 n : int
323 The number of lines of history to get for a tail request.
323 The number of lines of history to get for a tail request.
324
324
325 pattern : str
325 pattern : str
326 The glob-syntax pattern for a search request.
326 The glob-syntax pattern for a search request.
327
327
328 Returns
328 Returns
329 -------
329 -------
330 The msg_id of the message sent.
330 The msg_id of the message sent.
331 """
331 """
332 content = dict(raw=raw, output=output, hist_access_type=hist_access_type,
332 content = dict(raw=raw, output=output, hist_access_type=hist_access_type,
333 **kwargs)
333 **kwargs)
334 msg = self.session.msg('history_request', content)
334 msg = self.session.msg('history_request', content)
335 self._queue_request(msg)
335 self._queue_request(msg)
336 return msg['header']['msg_id']
336 return msg['header']['msg_id']
337
337
338 def shutdown(self, restart=False):
338 def shutdown(self, restart=False):
339 """Request an immediate kernel shutdown.
339 """Request an immediate kernel shutdown.
340
340
341 Upon receipt of the (empty) reply, client code can safely assume that
341 Upon receipt of the (empty) reply, client code can safely assume that
342 the kernel has shut down and it's safe to forcefully terminate it if
342 the kernel has shut down and it's safe to forcefully terminate it if
343 it's still alive.
343 it's still alive.
344
344
345 The kernel will send the reply via a function registered with Python's
345 The kernel will send the reply via a function registered with Python's
346 atexit module, ensuring it's truly done as the kernel is done with all
346 atexit module, ensuring it's truly done as the kernel is done with all
347 normal operation.
347 normal operation.
348 """
348 """
349 # Send quit message to kernel. Once we implement kernel-side setattr,
349 # Send quit message to kernel. Once we implement kernel-side setattr,
350 # this should probably be done that way, but for now this will do.
350 # this should probably be done that way, but for now this will do.
351 msg = self.session.msg('shutdown_request', {'restart':restart})
351 msg = self.session.msg('shutdown_request', {'restart':restart})
352 self._queue_request(msg)
352 self._queue_request(msg)
353 return msg['header']['msg_id']
353 return msg['header']['msg_id']
354
354
355 def _handle_events(self, socket, events):
355 def _handle_events(self, socket, events):
356 if events & POLLERR:
356 if events & POLLERR:
357 self._handle_err()
357 self._handle_err()
358 if events & POLLOUT:
358 if events & POLLOUT:
359 self._handle_send()
359 self._handle_send()
360 if events & POLLIN:
360 if events & POLLIN:
361 self._handle_recv()
361 self._handle_recv()
362
362
363 def _handle_recv(self):
363 def _handle_recv(self):
364 ident,msg = self.session.recv(self.socket, 0)
364 ident,msg = self.session.recv(self.socket, 0)
365 self.call_handlers(msg)
365 self.call_handlers(msg)
366
366
367 def _handle_send(self):
367 def _handle_send(self):
368 try:
368 try:
369 msg = self.command_queue.get(False)
369 msg = self.command_queue.get(False)
370 except Empty:
370 except Empty:
371 pass
371 pass
372 else:
372 else:
373 self.session.send(self.socket,msg)
373 self.session.send(self.socket,msg)
374 if self.command_queue.empty():
374 if self.command_queue.empty():
375 self.drop_io_state(POLLOUT)
375 self.drop_io_state(POLLOUT)
376
376
377 def _handle_err(self):
377 def _handle_err(self):
378 # We don't want to let this go silently, so eventually we should log.
378 # We don't want to let this go silently, so eventually we should log.
379 raise zmq.ZMQError()
379 raise zmq.ZMQError()
380
380
381 def _queue_request(self, msg):
381 def _queue_request(self, msg):
382 self.command_queue.put(msg)
382 self.command_queue.put(msg)
383 self.add_io_state(POLLOUT)
383 self.add_io_state(POLLOUT)
384
384
385
385
386 class SubSocketChannel(ZMQSocketChannel):
386 class SubSocketChannel(ZMQSocketChannel):
387 """The SUB channel which listens for messages that the kernel publishes.
387 """The SUB channel which listens for messages that the kernel publishes.
388 """
388 """
389
389
390 def __init__(self, context, session, address):
390 def __init__(self, context, session, address):
391 super(SubSocketChannel, self).__init__(context, session, address)
391 super(SubSocketChannel, self).__init__(context, session, address)
392 self.ioloop = ioloop.IOLoop()
392 self.ioloop = ioloop.IOLoop()
393
393
394 def run(self):
394 def run(self):
395 """The thread's main activity. Call start() instead."""
395 """The thread's main activity. Call start() instead."""
396 self.socket = self.context.socket(zmq.SUB)
396 self.socket = self.context.socket(zmq.SUB)
397 self.socket.setsockopt(zmq.SUBSCRIBE,'')
397 self.socket.setsockopt(zmq.SUBSCRIBE,'')
398 self.socket.setsockopt(zmq.IDENTITY, self.session.session)
398 self.socket.setsockopt(zmq.IDENTITY, self.session.session)
399 self.socket.connect('tcp://%s:%i' % self.address)
399 self.socket.connect('tcp://%s:%i' % self.address)
400 self.iostate = POLLIN|POLLERR
400 self.iostate = POLLIN|POLLERR
401 self.ioloop.add_handler(self.socket, self._handle_events,
401 self.ioloop.add_handler(self.socket, self._handle_events,
402 self.iostate)
402 self.iostate)
403 self._run_loop()
403 self._run_loop()
404
404
405 def stop(self):
405 def stop(self):
406 self.ioloop.stop()
406 self.ioloop.stop()
407 super(SubSocketChannel, self).stop()
407 super(SubSocketChannel, self).stop()
408
408
409 def call_handlers(self, msg):
409 def call_handlers(self, msg):
410 """This method is called in the ioloop thread when a message arrives.
410 """This method is called in the ioloop thread when a message arrives.
411
411
412 Subclasses should override this method to handle incoming messages.
412 Subclasses should override this method to handle incoming messages.
413 It is important to remember that this method is called in the thread
413 It is important to remember that this method is called in the thread
414 so that some logic must be done to ensure that the application leve
414 so that some logic must be done to ensure that the application leve
415 handlers are called in the application thread.
415 handlers are called in the application thread.
416 """
416 """
417 raise NotImplementedError('call_handlers must be defined in a subclass.')
417 raise NotImplementedError('call_handlers must be defined in a subclass.')
418
418
419 def flush(self, timeout=1.0):
419 def flush(self, timeout=1.0):
420 """Immediately processes all pending messages on the SUB channel.
420 """Immediately processes all pending messages on the SUB channel.
421
421
422 Callers should use this method to ensure that :method:`call_handlers`
422 Callers should use this method to ensure that :method:`call_handlers`
423 has been called for all messages that have been received on the
423 has been called for all messages that have been received on the
424 0MQ SUB socket of this channel.
424 0MQ SUB socket of this channel.
425
425
426 This method is thread safe.
426 This method is thread safe.
427
427
428 Parameters
428 Parameters
429 ----------
429 ----------
430 timeout : float, optional
430 timeout : float, optional
431 The maximum amount of time to spend flushing, in seconds. The
431 The maximum amount of time to spend flushing, in seconds. The
432 default is one second.
432 default is one second.
433 """
433 """
434 # We do the IOLoop callback process twice to ensure that the IOLoop
434 # We do the IOLoop callback process twice to ensure that the IOLoop
435 # gets to perform at least one full poll.
435 # gets to perform at least one full poll.
436 stop_time = time.time() + timeout
436 stop_time = time.time() + timeout
437 for i in xrange(2):
437 for i in xrange(2):
438 self._flushed = False
438 self._flushed = False
439 self.ioloop.add_callback(self._flush)
439 self.ioloop.add_callback(self._flush)
440 while not self._flushed and time.time() < stop_time:
440 while not self._flushed and time.time() < stop_time:
441 time.sleep(0.01)
441 time.sleep(0.01)
442
442
443 def _handle_events(self, socket, events):
443 def _handle_events(self, socket, events):
444 # Turn on and off POLLOUT depending on if we have made a request
444 # Turn on and off POLLOUT depending on if we have made a request
445 if events & POLLERR:
445 if events & POLLERR:
446 self._handle_err()
446 self._handle_err()
447 if events & POLLIN:
447 if events & POLLIN:
448 self._handle_recv()
448 self._handle_recv()
449
449
450 def _handle_err(self):
450 def _handle_err(self):
451 # We don't want to let this go silently, so eventually we should log.
451 # We don't want to let this go silently, so eventually we should log.
452 raise zmq.ZMQError()
452 raise zmq.ZMQError()
453
453
454 def _handle_recv(self):
454 def _handle_recv(self):
455 # Get all of the messages we can
455 # Get all of the messages we can
456 while True:
456 while True:
457 try:
457 try:
458 ident,msg = self.session.recv(self.socket)
458 ident,msg = self.session.recv(self.socket)
459 except zmq.ZMQError:
459 except zmq.ZMQError:
460 # Check the errno?
460 # Check the errno?
461 # Will this trigger POLLERR?
461 # Will this trigger POLLERR?
462 break
462 break
463 else:
463 else:
464 if msg is None:
464 if msg is None:
465 break
465 break
466 self.call_handlers(msg)
466 self.call_handlers(msg)
467
467
468 def _flush(self):
468 def _flush(self):
469 """Callback for :method:`self.flush`."""
469 """Callback for :method:`self.flush`."""
470 self._flushed = True
470 self._flushed = True
471
471
472
472
473 class StdInSocketChannel(ZMQSocketChannel):
473 class StdInSocketChannel(ZMQSocketChannel):
474 """A reply channel to handle raw_input requests that the kernel makes."""
474 """A reply channel to handle raw_input requests that the kernel makes."""
475
475
476 msg_queue = None
476 msg_queue = None
477
477
478 def __init__(self, context, session, address):
478 def __init__(self, context, session, address):
479 super(StdInSocketChannel, self).__init__(context, session, address)
479 super(StdInSocketChannel, self).__init__(context, session, address)
480 self.ioloop = ioloop.IOLoop()
480 self.ioloop = ioloop.IOLoop()
481 self.msg_queue = Queue()
481 self.msg_queue = Queue()
482
482
483 def run(self):
483 def run(self):
484 """The thread's main activity. Call start() instead."""
484 """The thread's main activity. Call start() instead."""
485 self.socket = self.context.socket(zmq.XREQ)
485 self.socket = self.context.socket(zmq.DEALER)
486 self.socket.setsockopt(zmq.IDENTITY, self.session.session)
486 self.socket.setsockopt(zmq.IDENTITY, self.session.session)
487 self.socket.connect('tcp://%s:%i' % self.address)
487 self.socket.connect('tcp://%s:%i' % self.address)
488 self.iostate = POLLERR|POLLIN
488 self.iostate = POLLERR|POLLIN
489 self.ioloop.add_handler(self.socket, self._handle_events,
489 self.ioloop.add_handler(self.socket, self._handle_events,
490 self.iostate)
490 self.iostate)
491 self._run_loop()
491 self._run_loop()
492
492
493 def stop(self):
493 def stop(self):
494 self.ioloop.stop()
494 self.ioloop.stop()
495 super(StdInSocketChannel, self).stop()
495 super(StdInSocketChannel, self).stop()
496
496
497 def call_handlers(self, msg):
497 def call_handlers(self, msg):
498 """This method is called in the ioloop thread when a message arrives.
498 """This method is called in the ioloop thread when a message arrives.
499
499
500 Subclasses should override this method to handle incoming messages.
500 Subclasses should override this method to handle incoming messages.
501 It is important to remember that this method is called in the thread
501 It is important to remember that this method is called in the thread
502 so that some logic must be done to ensure that the application leve
502 so that some logic must be done to ensure that the application leve
503 handlers are called in the application thread.
503 handlers are called in the application thread.
504 """
504 """
505 raise NotImplementedError('call_handlers must be defined in a subclass.')
505 raise NotImplementedError('call_handlers must be defined in a subclass.')
506
506
507 def input(self, string):
507 def input(self, string):
508 """Send a string of raw input to the kernel."""
508 """Send a string of raw input to the kernel."""
509 content = dict(value=string)
509 content = dict(value=string)
510 msg = self.session.msg('input_reply', content)
510 msg = self.session.msg('input_reply', content)
511 self._queue_reply(msg)
511 self._queue_reply(msg)
512
512
513 def _handle_events(self, socket, events):
513 def _handle_events(self, socket, events):
514 if events & POLLERR:
514 if events & POLLERR:
515 self._handle_err()
515 self._handle_err()
516 if events & POLLOUT:
516 if events & POLLOUT:
517 self._handle_send()
517 self._handle_send()
518 if events & POLLIN:
518 if events & POLLIN:
519 self._handle_recv()
519 self._handle_recv()
520
520
521 def _handle_recv(self):
521 def _handle_recv(self):
522 ident,msg = self.session.recv(self.socket, 0)
522 ident,msg = self.session.recv(self.socket, 0)
523 self.call_handlers(msg)
523 self.call_handlers(msg)
524
524
525 def _handle_send(self):
525 def _handle_send(self):
526 try:
526 try:
527 msg = self.msg_queue.get(False)
527 msg = self.msg_queue.get(False)
528 except Empty:
528 except Empty:
529 pass
529 pass
530 else:
530 else:
531 self.session.send(self.socket,msg)
531 self.session.send(self.socket,msg)
532 if self.msg_queue.empty():
532 if self.msg_queue.empty():
533 self.drop_io_state(POLLOUT)
533 self.drop_io_state(POLLOUT)
534
534
535 def _handle_err(self):
535 def _handle_err(self):
536 # We don't want to let this go silently, so eventually we should log.
536 # We don't want to let this go silently, so eventually we should log.
537 raise zmq.ZMQError()
537 raise zmq.ZMQError()
538
538
539 def _queue_reply(self, msg):
539 def _queue_reply(self, msg):
540 self.msg_queue.put(msg)
540 self.msg_queue.put(msg)
541 self.add_io_state(POLLOUT)
541 self.add_io_state(POLLOUT)
542
542
543
543
544 class HBSocketChannel(ZMQSocketChannel):
544 class HBSocketChannel(ZMQSocketChannel):
545 """The heartbeat channel which monitors the kernel heartbeat.
545 """The heartbeat channel which monitors the kernel heartbeat.
546
546
547 Note that the heartbeat channel is paused by default. As long as you start
547 Note that the heartbeat channel is paused by default. As long as you start
548 this channel, the kernel manager will ensure that it is paused and un-paused
548 this channel, the kernel manager will ensure that it is paused and un-paused
549 as appropriate.
549 as appropriate.
550 """
550 """
551
551
552 time_to_dead = 3.0
552 time_to_dead = 3.0
553 socket = None
553 socket = None
554 poller = None
554 poller = None
555 _running = None
555 _running = None
556 _pause = None
556 _pause = None
557
557
558 def __init__(self, context, session, address):
558 def __init__(self, context, session, address):
559 super(HBSocketChannel, self).__init__(context, session, address)
559 super(HBSocketChannel, self).__init__(context, session, address)
560 self._running = False
560 self._running = False
561 self._pause = True
561 self._pause = True
562
562
563 def _create_socket(self):
563 def _create_socket(self):
564 self.socket = self.context.socket(zmq.REQ)
564 self.socket = self.context.socket(zmq.REQ)
565 self.socket.setsockopt(zmq.IDENTITY, self.session.session)
565 self.socket.setsockopt(zmq.IDENTITY, self.session.session)
566 self.socket.connect('tcp://%s:%i' % self.address)
566 self.socket.connect('tcp://%s:%i' % self.address)
567 self.poller = zmq.Poller()
567 self.poller = zmq.Poller()
568 self.poller.register(self.socket, zmq.POLLIN)
568 self.poller.register(self.socket, zmq.POLLIN)
569
569
570 def run(self):
570 def run(self):
571 """The thread's main activity. Call start() instead."""
571 """The thread's main activity. Call start() instead."""
572 self._create_socket()
572 self._create_socket()
573 self._running = True
573 self._running = True
574 while self._running:
574 while self._running:
575 if self._pause:
575 if self._pause:
576 time.sleep(self.time_to_dead)
576 time.sleep(self.time_to_dead)
577 else:
577 else:
578 since_last_heartbeat = 0.0
578 since_last_heartbeat = 0.0
579 request_time = time.time()
579 request_time = time.time()
580 try:
580 try:
581 #io.rprint('Ping from HB channel') # dbg
581 #io.rprint('Ping from HB channel') # dbg
582 self.socket.send(b'ping')
582 self.socket.send(b'ping')
583 except zmq.ZMQError, e:
583 except zmq.ZMQError, e:
584 #io.rprint('*** HB Error:', e) # dbg
584 #io.rprint('*** HB Error:', e) # dbg
585 if e.errno == zmq.EFSM:
585 if e.errno == zmq.EFSM:
586 #io.rprint('sleep...', self.time_to_dead) # dbg
586 #io.rprint('sleep...', self.time_to_dead) # dbg
587 time.sleep(self.time_to_dead)
587 time.sleep(self.time_to_dead)
588 self._create_socket()
588 self._create_socket()
589 else:
589 else:
590 raise
590 raise
591 else:
591 else:
592 while True:
592 while True:
593 try:
593 try:
594 self.socket.recv(zmq.NOBLOCK)
594 self.socket.recv(zmq.NOBLOCK)
595 except zmq.ZMQError, e:
595 except zmq.ZMQError, e:
596 #io.rprint('*** HB Error 2:', e) # dbg
596 #io.rprint('*** HB Error 2:', e) # dbg
597 if e.errno == zmq.EAGAIN:
597 if e.errno == zmq.EAGAIN:
598 before_poll = time.time()
598 before_poll = time.time()
599 until_dead = self.time_to_dead - (before_poll -
599 until_dead = self.time_to_dead - (before_poll -
600 request_time)
600 request_time)
601
601
602 # When the return value of poll() is an empty
602 # When the return value of poll() is an empty
603 # list, that is when things have gone wrong
603 # list, that is when things have gone wrong
604 # (zeromq bug). As long as it is not an empty
604 # (zeromq bug). As long as it is not an empty
605 # list, poll is working correctly even if it
605 # list, poll is working correctly even if it
606 # returns quickly. Note: poll timeout is in
606 # returns quickly. Note: poll timeout is in
607 # milliseconds.
607 # milliseconds.
608 if until_dead > 0.0:
608 if until_dead > 0.0:
609 while True:
609 while True:
610 try:
610 try:
611 self.poller.poll(1000 * until_dead)
611 self.poller.poll(1000 * until_dead)
612 except zmq.ZMQError as e:
612 except zmq.ZMQError as e:
613 if e.errno == errno.EINTR:
613 if e.errno == errno.EINTR:
614 continue
614 continue
615 else:
615 else:
616 raise
616 raise
617 else:
617 else:
618 break
618 break
619
619
620 since_last_heartbeat = time.time()-request_time
620 since_last_heartbeat = time.time()-request_time
621 if since_last_heartbeat > self.time_to_dead:
621 if since_last_heartbeat > self.time_to_dead:
622 self.call_handlers(since_last_heartbeat)
622 self.call_handlers(since_last_heartbeat)
623 break
623 break
624 else:
624 else:
625 # FIXME: We should probably log this instead.
625 # FIXME: We should probably log this instead.
626 raise
626 raise
627 else:
627 else:
628 until_dead = self.time_to_dead - (time.time() -
628 until_dead = self.time_to_dead - (time.time() -
629 request_time)
629 request_time)
630 if until_dead > 0.0:
630 if until_dead > 0.0:
631 #io.rprint('sleep...', self.time_to_dead) # dbg
631 #io.rprint('sleep...', self.time_to_dead) # dbg
632 time.sleep(until_dead)
632 time.sleep(until_dead)
633 break
633 break
634
634
635 def pause(self):
635 def pause(self):
636 """Pause the heartbeat."""
636 """Pause the heartbeat."""
637 self._pause = True
637 self._pause = True
638
638
639 def unpause(self):
639 def unpause(self):
640 """Unpause the heartbeat."""
640 """Unpause the heartbeat."""
641 self._pause = False
641 self._pause = False
642
642
643 def is_beating(self):
643 def is_beating(self):
644 """Is the heartbeat running and not paused."""
644 """Is the heartbeat running and not paused."""
645 if self.is_alive() and not self._pause:
645 if self.is_alive() and not self._pause:
646 return True
646 return True
647 else:
647 else:
648 return False
648 return False
649
649
650 def stop(self):
650 def stop(self):
651 self._running = False
651 self._running = False
652 super(HBSocketChannel, self).stop()
652 super(HBSocketChannel, self).stop()
653
653
654 def call_handlers(self, since_last_heartbeat):
654 def call_handlers(self, since_last_heartbeat):
655 """This method is called in the ioloop thread when a message arrives.
655 """This method is called in the ioloop thread when a message arrives.
656
656
657 Subclasses should override this method to handle incoming messages.
657 Subclasses should override this method to handle incoming messages.
658 It is important to remember that this method is called in the thread
658 It is important to remember that this method is called in the thread
659 so that some logic must be done to ensure that the application leve
659 so that some logic must be done to ensure that the application leve
660 handlers are called in the application thread.
660 handlers are called in the application thread.
661 """
661 """
662 raise NotImplementedError('call_handlers must be defined in a subclass.')
662 raise NotImplementedError('call_handlers must be defined in a subclass.')
663
663
664
664
665 #-----------------------------------------------------------------------------
665 #-----------------------------------------------------------------------------
666 # Main kernel manager class
666 # Main kernel manager class
667 #-----------------------------------------------------------------------------
667 #-----------------------------------------------------------------------------
668
668
669 class KernelManager(HasTraits):
669 class KernelManager(HasTraits):
670 """ Manages a kernel for a frontend.
670 """ Manages a kernel for a frontend.
671
671
672 The SUB channel is for the frontend to receive messages published by the
672 The SUB channel is for the frontend to receive messages published by the
673 kernel.
673 kernel.
674
674
675 The REQ channel is for the frontend to make requests of the kernel.
675 The REQ channel is for the frontend to make requests of the kernel.
676
676
677 The REP channel is for the kernel to request stdin (raw_input) from the
677 The REP channel is for the kernel to request stdin (raw_input) from the
678 frontend.
678 frontend.
679 """
679 """
680 # config object for passing to child configurables
680 # config object for passing to child configurables
681 config = Instance(Config)
681 config = Instance(Config)
682
682
683 # The PyZMQ Context to use for communication with the kernel.
683 # The PyZMQ Context to use for communication with the kernel.
684 context = Instance(zmq.Context)
684 context = Instance(zmq.Context)
685 def _context_default(self):
685 def _context_default(self):
686 return zmq.Context.instance()
686 return zmq.Context.instance()
687
687
688 # The Session to use for communication with the kernel.
688 # The Session to use for communication with the kernel.
689 session = Instance(Session)
689 session = Instance(Session)
690
690
691 # The kernel process with which the KernelManager is communicating.
691 # The kernel process with which the KernelManager is communicating.
692 kernel = Instance(Popen)
692 kernel = Instance(Popen)
693
693
694 # The addresses for the communication channels.
694 # The addresses for the communication channels.
695 shell_address = TCPAddress((LOCALHOST, 0))
695 shell_address = TCPAddress((LOCALHOST, 0))
696 sub_address = TCPAddress((LOCALHOST, 0))
696 sub_address = TCPAddress((LOCALHOST, 0))
697 stdin_address = TCPAddress((LOCALHOST, 0))
697 stdin_address = TCPAddress((LOCALHOST, 0))
698 hb_address = TCPAddress((LOCALHOST, 0))
698 hb_address = TCPAddress((LOCALHOST, 0))
699
699
700 # The classes to use for the various channels.
700 # The classes to use for the various channels.
701 shell_channel_class = Type(ShellSocketChannel)
701 shell_channel_class = Type(ShellSocketChannel)
702 sub_channel_class = Type(SubSocketChannel)
702 sub_channel_class = Type(SubSocketChannel)
703 stdin_channel_class = Type(StdInSocketChannel)
703 stdin_channel_class = Type(StdInSocketChannel)
704 hb_channel_class = Type(HBSocketChannel)
704 hb_channel_class = Type(HBSocketChannel)
705
705
706 # Protected traits.
706 # Protected traits.
707 _launch_args = Any
707 _launch_args = Any
708 _shell_channel = Any
708 _shell_channel = Any
709 _sub_channel = Any
709 _sub_channel = Any
710 _stdin_channel = Any
710 _stdin_channel = Any
711 _hb_channel = Any
711 _hb_channel = Any
712
712
713 def __init__(self, **kwargs):
713 def __init__(self, **kwargs):
714 super(KernelManager, self).__init__(**kwargs)
714 super(KernelManager, self).__init__(**kwargs)
715 if self.session is None:
715 if self.session is None:
716 self.session = Session(config=self.config)
716 self.session = Session(config=self.config)
717 # Uncomment this to try closing the context.
717 # Uncomment this to try closing the context.
718 # atexit.register(self.context.term)
718 # atexit.register(self.context.term)
719
719
720 #--------------------------------------------------------------------------
720 #--------------------------------------------------------------------------
721 # Channel management methods:
721 # Channel management methods:
722 #--------------------------------------------------------------------------
722 #--------------------------------------------------------------------------
723
723
724 def start_channels(self, shell=True, sub=True, stdin=True, hb=True):
724 def start_channels(self, shell=True, sub=True, stdin=True, hb=True):
725 """Starts the channels for this kernel.
725 """Starts the channels for this kernel.
726
726
727 This will create the channels if they do not exist and then start
727 This will create the channels if they do not exist and then start
728 them. If port numbers of 0 are being used (random ports) then you
728 them. If port numbers of 0 are being used (random ports) then you
729 must first call :method:`start_kernel`. If the channels have been
729 must first call :method:`start_kernel`. If the channels have been
730 stopped and you call this, :class:`RuntimeError` will be raised.
730 stopped and you call this, :class:`RuntimeError` will be raised.
731 """
731 """
732 if shell:
732 if shell:
733 self.shell_channel.start()
733 self.shell_channel.start()
734 if sub:
734 if sub:
735 self.sub_channel.start()
735 self.sub_channel.start()
736 if stdin:
736 if stdin:
737 self.stdin_channel.start()
737 self.stdin_channel.start()
738 if hb:
738 if hb:
739 self.hb_channel.start()
739 self.hb_channel.start()
740
740
741 def stop_channels(self):
741 def stop_channels(self):
742 """Stops all the running channels for this kernel.
742 """Stops all the running channels for this kernel.
743 """
743 """
744 if self.shell_channel.is_alive():
744 if self.shell_channel.is_alive():
745 self.shell_channel.stop()
745 self.shell_channel.stop()
746 if self.sub_channel.is_alive():
746 if self.sub_channel.is_alive():
747 self.sub_channel.stop()
747 self.sub_channel.stop()
748 if self.stdin_channel.is_alive():
748 if self.stdin_channel.is_alive():
749 self.stdin_channel.stop()
749 self.stdin_channel.stop()
750 if self.hb_channel.is_alive():
750 if self.hb_channel.is_alive():
751 self.hb_channel.stop()
751 self.hb_channel.stop()
752
752
753 @property
753 @property
754 def channels_running(self):
754 def channels_running(self):
755 """Are any of the channels created and running?"""
755 """Are any of the channels created and running?"""
756 return (self.shell_channel.is_alive() or self.sub_channel.is_alive() or
756 return (self.shell_channel.is_alive() or self.sub_channel.is_alive() or
757 self.stdin_channel.is_alive() or self.hb_channel.is_alive())
757 self.stdin_channel.is_alive() or self.hb_channel.is_alive())
758
758
759 #--------------------------------------------------------------------------
759 #--------------------------------------------------------------------------
760 # Kernel process management methods:
760 # Kernel process management methods:
761 #--------------------------------------------------------------------------
761 #--------------------------------------------------------------------------
762
762
763 def start_kernel(self, **kw):
763 def start_kernel(self, **kw):
764 """Starts a kernel process and configures the manager to use it.
764 """Starts a kernel process and configures the manager to use it.
765
765
766 If random ports (port=0) are being used, this method must be called
766 If random ports (port=0) are being used, this method must be called
767 before the channels are created.
767 before the channels are created.
768
768
769 Parameters:
769 Parameters:
770 -----------
770 -----------
771 ipython : bool, optional (default True)
771 ipython : bool, optional (default True)
772 Whether to use an IPython kernel instead of a plain Python kernel.
772 Whether to use an IPython kernel instead of a plain Python kernel.
773
773
774 launcher : callable, optional (default None)
774 launcher : callable, optional (default None)
775 A custom function for launching the kernel process (generally a
775 A custom function for launching the kernel process (generally a
776 wrapper around ``entry_point.base_launch_kernel``). In most cases,
776 wrapper around ``entry_point.base_launch_kernel``). In most cases,
777 it should not be necessary to use this parameter.
777 it should not be necessary to use this parameter.
778
778
779 **kw : optional
779 **kw : optional
780 See respective options for IPython and Python kernels.
780 See respective options for IPython and Python kernels.
781 """
781 """
782 shell, sub, stdin, hb = self.shell_address, self.sub_address, \
782 shell, sub, stdin, hb = self.shell_address, self.sub_address, \
783 self.stdin_address, self.hb_address
783 self.stdin_address, self.hb_address
784 if shell[0] not in LOCAL_IPS or sub[0] not in LOCAL_IPS or \
784 if shell[0] not in LOCAL_IPS or sub[0] not in LOCAL_IPS or \
785 stdin[0] not in LOCAL_IPS or hb[0] not in LOCAL_IPS:
785 stdin[0] not in LOCAL_IPS or hb[0] not in LOCAL_IPS:
786 raise RuntimeError("Can only launch a kernel on a local interface. "
786 raise RuntimeError("Can only launch a kernel on a local interface. "
787 "Make sure that the '*_address' attributes are "
787 "Make sure that the '*_address' attributes are "
788 "configured properly. "
788 "configured properly. "
789 "Currently valid addresses are: %s"%LOCAL_IPS
789 "Currently valid addresses are: %s"%LOCAL_IPS
790 )
790 )
791
791
792 self._launch_args = kw.copy()
792 self._launch_args = kw.copy()
793 launch_kernel = kw.pop('launcher', None)
793 launch_kernel = kw.pop('launcher', None)
794 if launch_kernel is None:
794 if launch_kernel is None:
795 if kw.pop('ipython', True):
795 if kw.pop('ipython', True):
796 from ipkernel import launch_kernel
796 from ipkernel import launch_kernel
797 else:
797 else:
798 from pykernel import launch_kernel
798 from pykernel import launch_kernel
799 self.kernel, xrep, pub, req, _hb = launch_kernel(
799 self.kernel, xrep, pub, req, _hb = launch_kernel(
800 shell_port=shell[1], iopub_port=sub[1],
800 shell_port=shell[1], iopub_port=sub[1],
801 stdin_port=stdin[1], hb_port=hb[1], **kw)
801 stdin_port=stdin[1], hb_port=hb[1], **kw)
802 self.shell_address = (shell[0], xrep)
802 self.shell_address = (shell[0], xrep)
803 self.sub_address = (sub[0], pub)
803 self.sub_address = (sub[0], pub)
804 self.stdin_address = (stdin[0], req)
804 self.stdin_address = (stdin[0], req)
805 self.hb_address = (hb[0], _hb)
805 self.hb_address = (hb[0], _hb)
806
806
807 def shutdown_kernel(self, restart=False):
807 def shutdown_kernel(self, restart=False):
808 """ Attempts to the stop the kernel process cleanly. If the kernel
808 """ Attempts to the stop the kernel process cleanly. If the kernel
809 cannot be stopped, it is killed, if possible.
809 cannot be stopped, it is killed, if possible.
810 """
810 """
811 # FIXME: Shutdown does not work on Windows due to ZMQ errors!
811 # FIXME: Shutdown does not work on Windows due to ZMQ errors!
812 if sys.platform == 'win32':
812 if sys.platform == 'win32':
813 self.kill_kernel()
813 self.kill_kernel()
814 return
814 return
815
815
816 # Pause the heart beat channel if it exists.
816 # Pause the heart beat channel if it exists.
817 if self._hb_channel is not None:
817 if self._hb_channel is not None:
818 self._hb_channel.pause()
818 self._hb_channel.pause()
819
819
820 # Don't send any additional kernel kill messages immediately, to give
820 # Don't send any additional kernel kill messages immediately, to give
821 # the kernel a chance to properly execute shutdown actions. Wait for at
821 # the kernel a chance to properly execute shutdown actions. Wait for at
822 # most 1s, checking every 0.1s.
822 # most 1s, checking every 0.1s.
823 self.shell_channel.shutdown(restart=restart)
823 self.shell_channel.shutdown(restart=restart)
824 for i in range(10):
824 for i in range(10):
825 if self.is_alive:
825 if self.is_alive:
826 time.sleep(0.1)
826 time.sleep(0.1)
827 else:
827 else:
828 break
828 break
829 else:
829 else:
830 # OK, we've waited long enough.
830 # OK, we've waited long enough.
831 if self.has_kernel:
831 if self.has_kernel:
832 self.kill_kernel()
832 self.kill_kernel()
833
833
834 def restart_kernel(self, now=False, **kw):
834 def restart_kernel(self, now=False, **kw):
835 """Restarts a kernel with the arguments that were used to launch it.
835 """Restarts a kernel with the arguments that were used to launch it.
836
836
837 If the old kernel was launched with random ports, the same ports will be
837 If the old kernel was launched with random ports, the same ports will be
838 used for the new kernel.
838 used for the new kernel.
839
839
840 Parameters
840 Parameters
841 ----------
841 ----------
842 now : bool, optional
842 now : bool, optional
843 If True, the kernel is forcefully restarted *immediately*, without
843 If True, the kernel is forcefully restarted *immediately*, without
844 having a chance to do any cleanup action. Otherwise the kernel is
844 having a chance to do any cleanup action. Otherwise the kernel is
845 given 1s to clean up before a forceful restart is issued.
845 given 1s to clean up before a forceful restart is issued.
846
846
847 In all cases the kernel is restarted, the only difference is whether
847 In all cases the kernel is restarted, the only difference is whether
848 it is given a chance to perform a clean shutdown or not.
848 it is given a chance to perform a clean shutdown or not.
849
849
850 **kw : optional
850 **kw : optional
851 Any options specified here will replace those used to launch the
851 Any options specified here will replace those used to launch the
852 kernel.
852 kernel.
853 """
853 """
854 if self._launch_args is None:
854 if self._launch_args is None:
855 raise RuntimeError("Cannot restart the kernel. "
855 raise RuntimeError("Cannot restart the kernel. "
856 "No previous call to 'start_kernel'.")
856 "No previous call to 'start_kernel'.")
857 else:
857 else:
858 # Stop currently running kernel.
858 # Stop currently running kernel.
859 if self.has_kernel:
859 if self.has_kernel:
860 if now:
860 if now:
861 self.kill_kernel()
861 self.kill_kernel()
862 else:
862 else:
863 self.shutdown_kernel(restart=True)
863 self.shutdown_kernel(restart=True)
864
864
865 # Start new kernel.
865 # Start new kernel.
866 self._launch_args.update(kw)
866 self._launch_args.update(kw)
867 self.start_kernel(**self._launch_args)
867 self.start_kernel(**self._launch_args)
868
868
869 # FIXME: Messages get dropped in Windows due to probable ZMQ bug
869 # FIXME: Messages get dropped in Windows due to probable ZMQ bug
870 # unless there is some delay here.
870 # unless there is some delay here.
871 if sys.platform == 'win32':
871 if sys.platform == 'win32':
872 time.sleep(0.2)
872 time.sleep(0.2)
873
873
874 @property
874 @property
875 def has_kernel(self):
875 def has_kernel(self):
876 """Returns whether a kernel process has been specified for the kernel
876 """Returns whether a kernel process has been specified for the kernel
877 manager.
877 manager.
878 """
878 """
879 return self.kernel is not None
879 return self.kernel is not None
880
880
881 def kill_kernel(self):
881 def kill_kernel(self):
882 """ Kill the running kernel. """
882 """ Kill the running kernel. """
883 if self.has_kernel:
883 if self.has_kernel:
884 # Pause the heart beat channel if it exists.
884 # Pause the heart beat channel if it exists.
885 if self._hb_channel is not None:
885 if self._hb_channel is not None:
886 self._hb_channel.pause()
886 self._hb_channel.pause()
887
887
888 # Attempt to kill the kernel.
888 # Attempt to kill the kernel.
889 try:
889 try:
890 self.kernel.kill()
890 self.kernel.kill()
891 except OSError, e:
891 except OSError, e:
892 # In Windows, we will get an Access Denied error if the process
892 # In Windows, we will get an Access Denied error if the process
893 # has already terminated. Ignore it.
893 # has already terminated. Ignore it.
894 if sys.platform == 'win32':
894 if sys.platform == 'win32':
895 if e.winerror != 5:
895 if e.winerror != 5:
896 raise
896 raise
897 # On Unix, we may get an ESRCH error if the process has already
897 # On Unix, we may get an ESRCH error if the process has already
898 # terminated. Ignore it.
898 # terminated. Ignore it.
899 else:
899 else:
900 from errno import ESRCH
900 from errno import ESRCH
901 if e.errno != ESRCH:
901 if e.errno != ESRCH:
902 raise
902 raise
903 self.kernel = None
903 self.kernel = None
904 else:
904 else:
905 raise RuntimeError("Cannot kill kernel. No kernel is running!")
905 raise RuntimeError("Cannot kill kernel. No kernel is running!")
906
906
907 def interrupt_kernel(self):
907 def interrupt_kernel(self):
908 """ Interrupts the kernel. Unlike ``signal_kernel``, this operation is
908 """ Interrupts the kernel. Unlike ``signal_kernel``, this operation is
909 well supported on all platforms.
909 well supported on all platforms.
910 """
910 """
911 if self.has_kernel:
911 if self.has_kernel:
912 if sys.platform == 'win32':
912 if sys.platform == 'win32':
913 from parentpoller import ParentPollerWindows as Poller
913 from parentpoller import ParentPollerWindows as Poller
914 Poller.send_interrupt(self.kernel.win32_interrupt_event)
914 Poller.send_interrupt(self.kernel.win32_interrupt_event)
915 else:
915 else:
916 self.kernel.send_signal(signal.SIGINT)
916 self.kernel.send_signal(signal.SIGINT)
917 else:
917 else:
918 raise RuntimeError("Cannot interrupt kernel. No kernel is running!")
918 raise RuntimeError("Cannot interrupt kernel. No kernel is running!")
919
919
920 def signal_kernel(self, signum):
920 def signal_kernel(self, signum):
921 """ Sends a signal to the kernel. Note that since only SIGTERM is
921 """ Sends a signal to the kernel. Note that since only SIGTERM is
922 supported on Windows, this function is only useful on Unix systems.
922 supported on Windows, this function is only useful on Unix systems.
923 """
923 """
924 if self.has_kernel:
924 if self.has_kernel:
925 self.kernel.send_signal(signum)
925 self.kernel.send_signal(signum)
926 else:
926 else:
927 raise RuntimeError("Cannot signal kernel. No kernel is running!")
927 raise RuntimeError("Cannot signal kernel. No kernel is running!")
928
928
929 @property
929 @property
930 def is_alive(self):
930 def is_alive(self):
931 """Is the kernel process still running?"""
931 """Is the kernel process still running?"""
932 # FIXME: not using a heartbeat means this method is broken for any
932 # FIXME: not using a heartbeat means this method is broken for any
933 # remote kernel, it's only capable of handling local kernels.
933 # remote kernel, it's only capable of handling local kernels.
934 if self.has_kernel:
934 if self.has_kernel:
935 if self.kernel.poll() is None:
935 if self.kernel.poll() is None:
936 return True
936 return True
937 else:
937 else:
938 return False
938 return False
939 else:
939 else:
940 # We didn't start the kernel with this KernelManager so we don't
940 # We didn't start the kernel with this KernelManager so we don't
941 # know if it is running. We should use a heartbeat for this case.
941 # know if it is running. We should use a heartbeat for this case.
942 return True
942 return True
943
943
944 #--------------------------------------------------------------------------
944 #--------------------------------------------------------------------------
945 # Channels used for communication with the kernel:
945 # Channels used for communication with the kernel:
946 #--------------------------------------------------------------------------
946 #--------------------------------------------------------------------------
947
947
948 @property
948 @property
949 def shell_channel(self):
949 def shell_channel(self):
950 """Get the REQ socket channel object to make requests of the kernel."""
950 """Get the REQ socket channel object to make requests of the kernel."""
951 if self._shell_channel is None:
951 if self._shell_channel is None:
952 self._shell_channel = self.shell_channel_class(self.context,
952 self._shell_channel = self.shell_channel_class(self.context,
953 self.session,
953 self.session,
954 self.shell_address)
954 self.shell_address)
955 return self._shell_channel
955 return self._shell_channel
956
956
957 @property
957 @property
958 def sub_channel(self):
958 def sub_channel(self):
959 """Get the SUB socket channel object."""
959 """Get the SUB socket channel object."""
960 if self._sub_channel is None:
960 if self._sub_channel is None:
961 self._sub_channel = self.sub_channel_class(self.context,
961 self._sub_channel = self.sub_channel_class(self.context,
962 self.session,
962 self.session,
963 self.sub_address)
963 self.sub_address)
964 return self._sub_channel
964 return self._sub_channel
965
965
966 @property
966 @property
967 def stdin_channel(self):
967 def stdin_channel(self):
968 """Get the REP socket channel object to handle stdin (raw_input)."""
968 """Get the REP socket channel object to handle stdin (raw_input)."""
969 if self._stdin_channel is None:
969 if self._stdin_channel is None:
970 self._stdin_channel = self.stdin_channel_class(self.context,
970 self._stdin_channel = self.stdin_channel_class(self.context,
971 self.session,
971 self.session,
972 self.stdin_address)
972 self.stdin_address)
973 return self._stdin_channel
973 return self._stdin_channel
974
974
975 @property
975 @property
976 def hb_channel(self):
976 def hb_channel(self):
977 """Get the heartbeat socket channel object to check that the
977 """Get the heartbeat socket channel object to check that the
978 kernel is alive."""
978 kernel is alive."""
979 if self._hb_channel is None:
979 if self._hb_channel is None:
980 self._hb_channel = self.hb_channel_class(self.context,
980 self._hb_channel = self.hb_channel_class(self.context,
981 self.session,
981 self.session,
982 self.hb_address)
982 self.hb_address)
983 return self._hb_channel
983 return self._hb_channel
@@ -1,114 +1,114 b''
1 .. _ipython_qt:
1 .. _ipython_qt:
2
2
3 ====================
3 ====================
4 IPython Qt interface
4 IPython Qt interface
5 ====================
5 ====================
6
6
7 Abstract
7 Abstract
8 --------
8 --------
9
9
10 This is about the implementation of a Qt-based Graphical User Interface (GUI)
10 This is about the implementation of a Qt-based Graphical User Interface (GUI)
11 to execute Python code with an interpreter that runs in a separate process and
11 to execute Python code with an interpreter that runs in a separate process and
12 the two systems (GUI frontend and interpreter kernel) communicating via the
12 the two systems (GUI frontend and interpreter kernel) communicating via the
13 ZeroMQ Messaging library. The bulk of the implementation will be done without
13 ZeroMQ Messaging library. The bulk of the implementation will be done without
14 dependencies on IPython (only on Zmq). Once the key features are ready,
14 dependencies on IPython (only on Zmq). Once the key features are ready,
15 IPython-specific features can be added using the IPython codebase.
15 IPython-specific features can be added using the IPython codebase.
16
16
17
17
18 Project details
18 Project details
19 ---------------
19 ---------------
20
20
21 For a long time there has been demand for a graphical user interface for
21 For a long time there has been demand for a graphical user interface for
22 IPython, and the project already ships Wx-based prototypes thereof. But these
22 IPython, and the project already ships Wx-based prototypes thereof. But these
23 run all code in a single process, making them extremely brittle, as a crash of
23 run all code in a single process, making them extremely brittle, as a crash of
24 the Python interpreter kills the entire user session. Here I propose to build
24 the Python interpreter kills the entire user session. Here I propose to build
25 a Qt-based GUI that will communicate with a separate process for the code
25 a Qt-based GUI that will communicate with a separate process for the code
26 execution, so that if the interpreter kernel dies, the frontend can continue to
26 execution, so that if the interpreter kernel dies, the frontend can continue to
27 function after restarting a new kernel (and offering the user the option to
27 function after restarting a new kernel (and offering the user the option to
28 re-execute all inputs, which the frontend can know).
28 re-execute all inputs, which the frontend can know).
29
29
30 This GUI will allow for the easy editing of multi-line input and the convenient
30 This GUI will allow for the easy editing of multi-line input and the convenient
31 re-editing of previous blocks of input, which can be displayed in a 2-d
31 re-editing of previous blocks of input, which can be displayed in a 2-d
32 workspace instead of a line-driven one like today's IPython. This makes it much
32 workspace instead of a line-driven one like today's IPython. This makes it much
33 easier to incrementally build and tune a code, by combining the rapid feedback
33 easier to incrementally build and tune a code, by combining the rapid feedback
34 cycle of IPython with the ability to edit multiline code with good graphical
34 cycle of IPython with the ability to edit multiline code with good graphical
35 support.
35 support.
36
36
37
37
38 2-process model pyzmq base
38 2-process model pyzmq base
39 ~~~~~~~~~~~~~~~~~~~~~~~~~~
39 ~~~~~~~~~~~~~~~~~~~~~~~~~~
40
40
41 Since the necessity of a user to keep his data safe, the design is based in a
41 Since the necessity of a user to keep his data safe, the design is based in a
42 2-process model that will be achieved with a simple client/server system with
42 2-process model that will be achieved with a simple client/server system with
43 `pyzmq <http://www.zeromq.org/bindings:python>`_, so the GUI session do not
43 `pyzmq <http://www.zeromq.org/bindings:python>`_, so the GUI session do not
44 crash if the the kernel process does. This will be achieved using this test
44 crash if the the kernel process does. This will be achieved using this test
45 `code
45 `code
46 <http://github.com/fperez/pyzmq/blob/completer/examples/kernel/kernel.py>`_ and
46 <http://github.com/fperez/pyzmq/blob/completer/examples/kernel/kernel.py>`_ and
47 customizing it to the necessities of the GUI such as queue management with
47 customizing it to the necessities of the GUI such as queue management with
48 discrimination for different frontends connected to the same kernel and tab
48 discrimination for different frontends connected to the same kernel and tab
49 completion. A piece of drafted code for the kernel (server) should look like
49 completion. A piece of drafted code for the kernel (server) should look like
50 this::
50 this::
51
51
52 def main():
52 def main():
53 c = zmq.Context(1, 1)
53 c = zmq.Context(1, 1)
54 rep_conn = connection % port_base
54 rep_conn = connection % port_base
55 pub_conn = connection % (port_base+1)
55 pub_conn = connection % (port_base+1)
56 print >>sys.__stdout__, "Starting the kernel..."
56 print >>sys.__stdout__, "Starting the kernel..."
57 print >>sys.__stdout__, "On:",rep_conn, pub_conn
57 print >>sys.__stdout__, "On:",rep_conn, pub_conn
58 session = Session(username=u'kernel')
58 session = Session(username=u'kernel')
59 reply_socket = c.socket(zmq.XREP)
59 reply_socket = c.socket(zmq.ROUTER)
60 reply_socket.bind(rep_conn)
60 reply_socket.bind(rep_conn)
61 pub_socket = c.socket(zmq.PUB)
61 pub_socket = c.socket(zmq.PUB)
62 pub_socket.bind(pub_conn)
62 pub_socket.bind(pub_conn)
63 stdout = OutStream(session, pub_socket, u'stdout')
63 stdout = OutStream(session, pub_socket, u'stdout')
64 stderr = OutStream(session, pub_socket, u'stderr')
64 stderr = OutStream(session, pub_socket, u'stderr')
65 sys.stdout = stdout
65 sys.stdout = stdout
66 sys.stderr = stderr
66 sys.stderr = stderr
67 display_hook = DisplayHook(session, pub_socket)
67 display_hook = DisplayHook(session, pub_socket)
68 sys.displayhook = display_hook
68 sys.displayhook = display_hook
69 kernel = Kernel(session, reply_socket, pub_socket)
69 kernel = Kernel(session, reply_socket, pub_socket)
70
70
71 This kernel will use two queues (output and input), the input queue will have
71 This kernel will use two queues (output and input), the input queue will have
72 the id of the process(frontend) making the request, type(execute, complete,
72 the id of the process(frontend) making the request, type(execute, complete,
73 help, etc) and id of the request itself and the string of code to be executed,
73 help, etc) and id of the request itself and the string of code to be executed,
74 the output queue will have basically the same information just that the string
74 the output queue will have basically the same information just that the string
75 is the to be displayed. This model is because the kernel needs to maintain
75 is the to be displayed. This model is because the kernel needs to maintain
76 control of timeouts when multiple requests are sent and keep them indexed.
76 control of timeouts when multiple requests are sent and keep them indexed.
77
77
78 Qt based GUI
78 Qt based GUI
79 ~~~~~~~~~~~~
79 ~~~~~~~~~~~~
80
80
81 Design of the interface is going to be based in cells of code executed on the
81 Design of the interface is going to be based in cells of code executed on the
82 previous defined kernel. It will also have GUI facilities such toolboxes,
82 previous defined kernel. It will also have GUI facilities such toolboxes,
83 tooltips to autocomplete code and function summary, highlighting and
83 tooltips to autocomplete code and function summary, highlighting and
84 autoindentation. It will have the cell kind of multiline edition mode so each
84 autoindentation. It will have the cell kind of multiline edition mode so each
85 block of code can be edited and executed independently, this can be achieved
85 block of code can be edited and executed independently, this can be achieved
86 queuing QTextEdit objects (the cell) giving them format so we can discriminate
86 queuing QTextEdit objects (the cell) giving them format so we can discriminate
87 outputs from inputs. One of the main characteristics will be the debug support
87 outputs from inputs. One of the main characteristics will be the debug support
88 that will show the requested outputs as the debugger (that will be on a popup
88 that will show the requested outputs as the debugger (that will be on a popup
89 widget) "walks" through the code, this design is to be reviewed with the
89 widget) "walks" through the code, this design is to be reviewed with the
90 mentor. `This <http://gfif.udea.edu.co/IPythonQt_snapshot.png>`_ is a
90 mentor. `This <http://gfif.udea.edu.co/IPythonQt_snapshot.png>`_ is a
91 tentative view of the main window.
91 tentative view of the main window.
92
92
93 The GUI will check continuously the output queue from the kernel for new
93 The GUI will check continuously the output queue from the kernel for new
94 information to handle. This information have to be handled with care since any
94 information to handle. This information have to be handled with care since any
95 output will come at anytime and possibly in a different order than requested or
95 output will come at anytime and possibly in a different order than requested or
96 maybe not appear at all, this could be possible due to a variety of reasons(for
96 maybe not appear at all, this could be possible due to a variety of reasons(for
97 example tab completion request while the kernel is busy processing another
97 example tab completion request while the kernel is busy processing another
98 frontend's request). This is, if the kernel is busy it won't be possible to
98 frontend's request). This is, if the kernel is busy it won't be possible to
99 fulfill the request for a while so the GUI will be prepared to abandon waiting
99 fulfill the request for a while so the GUI will be prepared to abandon waiting
100 for the reply if the user moves on or a certain timeout expires.
100 for the reply if the user moves on or a certain timeout expires.
101
101
102
102
103 POSSIBLE FUTURE DIRECTIONS
103 POSSIBLE FUTURE DIRECTIONS
104 ---------------------------
104 ---------------------------
105
105
106 The near future will bring the feature of saving and loading sessions, also
106 The near future will bring the feature of saving and loading sessions, also
107 importing and exporting to different formats like rst, html, pdf and
107 importing and exporting to different formats like rst, html, pdf and
108 python/ipython code, a discussion about this is taking place in the ipython-dev
108 python/ipython code, a discussion about this is taking place in the ipython-dev
109 mailing list. Also the interaction with a remote kernel and distributed
109 mailing list. Also the interaction with a remote kernel and distributed
110 computation which is an IPython's project already in development.
110 computation which is an IPython's project already in development.
111
111
112 The idea of a mathematica-like help widget (i.e. there will be parts of it that
112 The idea of a mathematica-like help widget (i.e. there will be parts of it that
113 will execute as a native session of IPythonQt) is still to be discussed in the
113 will execute as a native session of IPythonQt) is still to be discussed in the
114 development mailing list but it's definitively a great idea.
114 development mailing list but it's definitively a great idea.
@@ -1,937 +1,937 b''
1 .. _messaging:
1 .. _messaging:
2
2
3 ======================
3 ======================
4 Messaging in IPython
4 Messaging in IPython
5 ======================
5 ======================
6
6
7
7
8 Introduction
8 Introduction
9 ============
9 ============
10
10
11 This document explains the basic communications design and messaging
11 This document explains the basic communications design and messaging
12 specification for how the various IPython objects interact over a network
12 specification for how the various IPython objects interact over a network
13 transport. The current implementation uses the ZeroMQ_ library for messaging
13 transport. The current implementation uses the ZeroMQ_ library for messaging
14 within and between hosts.
14 within and between hosts.
15
15
16 .. Note::
16 .. Note::
17
17
18 This document should be considered the authoritative description of the
18 This document should be considered the authoritative description of the
19 IPython messaging protocol, and all developers are strongly encouraged to
19 IPython messaging protocol, and all developers are strongly encouraged to
20 keep it updated as the implementation evolves, so that we have a single
20 keep it updated as the implementation evolves, so that we have a single
21 common reference for all protocol details.
21 common reference for all protocol details.
22
22
23 The basic design is explained in the following diagram:
23 The basic design is explained in the following diagram:
24
24
25 .. image:: figs/frontend-kernel.png
25 .. image:: figs/frontend-kernel.png
26 :width: 450px
26 :width: 450px
27 :alt: IPython kernel/frontend messaging architecture.
27 :alt: IPython kernel/frontend messaging architecture.
28 :align: center
28 :align: center
29 :target: ../_images/frontend-kernel.png
29 :target: ../_images/frontend-kernel.png
30
30
31 A single kernel can be simultaneously connected to one or more frontends. The
31 A single kernel can be simultaneously connected to one or more frontends. The
32 kernel has three sockets that serve the following functions:
32 kernel has three sockets that serve the following functions:
33
33
34 1. REQ: this socket is connected to a *single* frontend at a time, and it allows
34 1. REQ: this socket is connected to a *single* frontend at a time, and it allows
35 the kernel to request input from a frontend when :func:`raw_input` is called.
35 the kernel to request input from a frontend when :func:`raw_input` is called.
36 The frontend holding the matching REP socket acts as a 'virtual keyboard'
36 The frontend holding the matching REP socket acts as a 'virtual keyboard'
37 for the kernel while this communication is happening (illustrated in the
37 for the kernel while this communication is happening (illustrated in the
38 figure by the black outline around the central keyboard). In practice,
38 figure by the black outline around the central keyboard). In practice,
39 frontends may display such kernel requests using a special input widget or
39 frontends may display such kernel requests using a special input widget or
40 otherwise indicating that the user is to type input for the kernel instead
40 otherwise indicating that the user is to type input for the kernel instead
41 of normal commands in the frontend.
41 of normal commands in the frontend.
42
42
43 2. XREP: this single sockets allows multiple incoming connections from
43 2. ROUTER: this single sockets allows multiple incoming connections from
44 frontends, and this is the socket where requests for code execution, object
44 frontends, and this is the socket where requests for code execution, object
45 information, prompts, etc. are made to the kernel by any frontend. The
45 information, prompts, etc. are made to the kernel by any frontend. The
46 communication on this socket is a sequence of request/reply actions from
46 communication on this socket is a sequence of request/reply actions from
47 each frontend and the kernel.
47 each frontend and the kernel.
48
48
49 3. PUB: this socket is the 'broadcast channel' where the kernel publishes all
49 3. PUB: this socket is the 'broadcast channel' where the kernel publishes all
50 side effects (stdout, stderr, etc.) as well as the requests coming from any
50 side effects (stdout, stderr, etc.) as well as the requests coming from any
51 client over the XREP socket and its own requests on the REP socket. There
51 client over the ROUTER socket and its own requests on the REP socket. There
52 are a number of actions in Python which generate side effects: :func:`print`
52 are a number of actions in Python which generate side effects: :func:`print`
53 writes to ``sys.stdout``, errors generate tracebacks, etc. Additionally, in
53 writes to ``sys.stdout``, errors generate tracebacks, etc. Additionally, in
54 a multi-client scenario, we want all frontends to be able to know what each
54 a multi-client scenario, we want all frontends to be able to know what each
55 other has sent to the kernel (this can be useful in collaborative scenarios,
55 other has sent to the kernel (this can be useful in collaborative scenarios,
56 for example). This socket allows both side effects and the information
56 for example). This socket allows both side effects and the information
57 about communications taking place with one client over the XREQ/XREP channel
57 about communications taking place with one client over the ROUTER/DEALER channel
58 to be made available to all clients in a uniform manner.
58 to be made available to all clients in a uniform manner.
59
59
60 All messages are tagged with enough information (details below) for clients
60 All messages are tagged with enough information (details below) for clients
61 to know which messages come from their own interaction with the kernel and
61 to know which messages come from their own interaction with the kernel and
62 which ones are from other clients, so they can display each type
62 which ones are from other clients, so they can display each type
63 appropriately.
63 appropriately.
64
64
65 The actual format of the messages allowed on each of these channels is
65 The actual format of the messages allowed on each of these channels is
66 specified below. Messages are dicts of dicts with string keys and values that
66 specified below. Messages are dicts of dicts with string keys and values that
67 are reasonably representable in JSON. Our current implementation uses JSON
67 are reasonably representable in JSON. Our current implementation uses JSON
68 explicitly as its message format, but this shouldn't be considered a permanent
68 explicitly as its message format, but this shouldn't be considered a permanent
69 feature. As we've discovered that JSON has non-trivial performance issues due
69 feature. As we've discovered that JSON has non-trivial performance issues due
70 to excessive copying, we may in the future move to a pure pickle-based raw
70 to excessive copying, we may in the future move to a pure pickle-based raw
71 message format. However, it should be possible to easily convert from the raw
71 message format. However, it should be possible to easily convert from the raw
72 objects to JSON, since we may have non-python clients (e.g. a web frontend).
72 objects to JSON, since we may have non-python clients (e.g. a web frontend).
73 As long as it's easy to make a JSON version of the objects that is a faithful
73 As long as it's easy to make a JSON version of the objects that is a faithful
74 representation of all the data, we can communicate with such clients.
74 representation of all the data, we can communicate with such clients.
75
75
76 .. Note::
76 .. Note::
77
77
78 Not all of these have yet been fully fleshed out, but the key ones are, see
78 Not all of these have yet been fully fleshed out, but the key ones are, see
79 kernel and frontend files for actual implementation details.
79 kernel and frontend files for actual implementation details.
80
80
81
81
82 Python functional API
82 Python functional API
83 =====================
83 =====================
84
84
85 As messages are dicts, they map naturally to a ``func(**kw)`` call form. We
85 As messages are dicts, they map naturally to a ``func(**kw)`` call form. We
86 should develop, at a few key points, functional forms of all the requests that
86 should develop, at a few key points, functional forms of all the requests that
87 take arguments in this manner and automatically construct the necessary dict
87 take arguments in this manner and automatically construct the necessary dict
88 for sending.
88 for sending.
89
89
90
90
91 General Message Format
91 General Message Format
92 ======================
92 ======================
93
93
94 All messages send or received by any IPython process should have the following
94 All messages send or received by any IPython process should have the following
95 generic structure::
95 generic structure::
96
96
97 {
97 {
98 # The message header contains a pair of unique identifiers for the
98 # The message header contains a pair of unique identifiers for the
99 # originating session and the actual message id, in addition to the
99 # originating session and the actual message id, in addition to the
100 # username for the process that generated the message. This is useful in
100 # username for the process that generated the message. This is useful in
101 # collaborative settings where multiple users may be interacting with the
101 # collaborative settings where multiple users may be interacting with the
102 # same kernel simultaneously, so that frontends can label the various
102 # same kernel simultaneously, so that frontends can label the various
103 # messages in a meaningful way.
103 # messages in a meaningful way.
104 'header' : {
104 'header' : {
105 'msg_id' : uuid,
105 'msg_id' : uuid,
106 'username' : str,
106 'username' : str,
107 'session' : uuid
107 'session' : uuid
108 # All recognized message type strings are listed below.
108 # All recognized message type strings are listed below.
109 'msg_type' : str,
109 'msg_type' : str,
110 },
110 },
111
111
112 # In a chain of messages, the header from the parent is copied so that
112 # In a chain of messages, the header from the parent is copied so that
113 # clients can track where messages come from.
113 # clients can track where messages come from.
114 'parent_header' : dict,
114 'parent_header' : dict,
115
115
116 # The actual content of the message must be a dict, whose structure
116 # The actual content of the message must be a dict, whose structure
117 # depends on the message type.x
117 # depends on the message type.x
118 'content' : dict,
118 'content' : dict,
119 }
119 }
120
120
121 For each message type, the actual content will differ and all existing message
121 For each message type, the actual content will differ and all existing message
122 types are specified in what follows of this document.
122 types are specified in what follows of this document.
123
123
124
124
125 Messages on the XREP/XREQ socket
125 Messages on the ROUTER/DEALER socket
126 ================================
126 ================================
127
127
128 .. _execute:
128 .. _execute:
129
129
130 Execute
130 Execute
131 -------
131 -------
132
132
133 This message type is used by frontends to ask the kernel to execute code on
133 This message type is used by frontends to ask the kernel to execute code on
134 behalf of the user, in a namespace reserved to the user's variables (and thus
134 behalf of the user, in a namespace reserved to the user's variables (and thus
135 separate from the kernel's own internal code and variables).
135 separate from the kernel's own internal code and variables).
136
136
137 Message type: ``execute_request``::
137 Message type: ``execute_request``::
138
138
139 content = {
139 content = {
140 # Source code to be executed by the kernel, one or more lines.
140 # Source code to be executed by the kernel, one or more lines.
141 'code' : str,
141 'code' : str,
142
142
143 # A boolean flag which, if True, signals the kernel to execute this
143 # A boolean flag which, if True, signals the kernel to execute this
144 # code as quietly as possible. This means that the kernel will compile
144 # code as quietly as possible. This means that the kernel will compile
145 # the code witIPython/core/tests/h 'exec' instead of 'single' (so
145 # the code witIPython/core/tests/h 'exec' instead of 'single' (so
146 # sys.displayhook will not fire), and will *not*:
146 # sys.displayhook will not fire), and will *not*:
147 # - broadcast exceptions on the PUB socket
147 # - broadcast exceptions on the PUB socket
148 # - do any logging
148 # - do any logging
149 # - populate any history
149 # - populate any history
150 #
150 #
151 # The default is False.
151 # The default is False.
152 'silent' : bool,
152 'silent' : bool,
153
153
154 # A list of variable names from the user's namespace to be retrieved. What
154 # A list of variable names from the user's namespace to be retrieved. What
155 # returns is a JSON string of the variable's repr(), not a python object.
155 # returns is a JSON string of the variable's repr(), not a python object.
156 'user_variables' : list,
156 'user_variables' : list,
157
157
158 # Similarly, a dict mapping names to expressions to be evaluated in the
158 # Similarly, a dict mapping names to expressions to be evaluated in the
159 # user's dict.
159 # user's dict.
160 'user_expressions' : dict,
160 'user_expressions' : dict,
161 }
161 }
162
162
163 The ``code`` field contains a single string (possibly multiline). The kernel
163 The ``code`` field contains a single string (possibly multiline). The kernel
164 is responsible for splitting this into one or more independent execution blocks
164 is responsible for splitting this into one or more independent execution blocks
165 and deciding whether to compile these in 'single' or 'exec' mode (see below for
165 and deciding whether to compile these in 'single' or 'exec' mode (see below for
166 detailed execution semantics).
166 detailed execution semantics).
167
167
168 The ``user_`` fields deserve a detailed explanation. In the past, IPython had
168 The ``user_`` fields deserve a detailed explanation. In the past, IPython had
169 the notion of a prompt string that allowed arbitrary code to be evaluated, and
169 the notion of a prompt string that allowed arbitrary code to be evaluated, and
170 this was put to good use by many in creating prompts that displayed system
170 this was put to good use by many in creating prompts that displayed system
171 status, path information, and even more esoteric uses like remote instrument
171 status, path information, and even more esoteric uses like remote instrument
172 status aqcuired over the network. But now that IPython has a clean separation
172 status aqcuired over the network. But now that IPython has a clean separation
173 between the kernel and the clients, the kernel has no prompt knowledge; prompts
173 between the kernel and the clients, the kernel has no prompt knowledge; prompts
174 are a frontend-side feature, and it should be even possible for different
174 are a frontend-side feature, and it should be even possible for different
175 frontends to display different prompts while interacting with the same kernel.
175 frontends to display different prompts while interacting with the same kernel.
176
176
177 The kernel now provides the ability to retrieve data from the user's namespace
177 The kernel now provides the ability to retrieve data from the user's namespace
178 after the execution of the main ``code``, thanks to two fields in the
178 after the execution of the main ``code``, thanks to two fields in the
179 ``execute_request`` message:
179 ``execute_request`` message:
180
180
181 - ``user_variables``: If only variables from the user's namespace are needed, a
181 - ``user_variables``: If only variables from the user's namespace are needed, a
182 list of variable names can be passed and a dict with these names as keys and
182 list of variable names can be passed and a dict with these names as keys and
183 their :func:`repr()` as values will be returned.
183 their :func:`repr()` as values will be returned.
184
184
185 - ``user_expressions``: For more complex expressions that require function
185 - ``user_expressions``: For more complex expressions that require function
186 evaluations, a dict can be provided with string keys and arbitrary python
186 evaluations, a dict can be provided with string keys and arbitrary python
187 expressions as values. The return message will contain also a dict with the
187 expressions as values. The return message will contain also a dict with the
188 same keys and the :func:`repr()` of the evaluated expressions as value.
188 same keys and the :func:`repr()` of the evaluated expressions as value.
189
189
190 With this information, frontends can display any status information they wish
190 With this information, frontends can display any status information they wish
191 in the form that best suits each frontend (a status line, a popup, inline for a
191 in the form that best suits each frontend (a status line, a popup, inline for a
192 terminal, etc).
192 terminal, etc).
193
193
194 .. Note::
194 .. Note::
195
195
196 In order to obtain the current execution counter for the purposes of
196 In order to obtain the current execution counter for the purposes of
197 displaying input prompts, frontends simply make an execution request with an
197 displaying input prompts, frontends simply make an execution request with an
198 empty code string and ``silent=True``.
198 empty code string and ``silent=True``.
199
199
200 Execution semantics
200 Execution semantics
201 ~~~~~~~~~~~~~~~~~~~
201 ~~~~~~~~~~~~~~~~~~~
202
202
203 When the silent flag is false, the execution of use code consists of the
203 When the silent flag is false, the execution of use code consists of the
204 following phases (in silent mode, only the ``code`` field is executed):
204 following phases (in silent mode, only the ``code`` field is executed):
205
205
206 1. Run the ``pre_runcode_hook``.
206 1. Run the ``pre_runcode_hook``.
207
207
208 2. Execute the ``code`` field, see below for details.
208 2. Execute the ``code`` field, see below for details.
209
209
210 3. If #2 succeeds, compute ``user_variables`` and ``user_expressions`` are
210 3. If #2 succeeds, compute ``user_variables`` and ``user_expressions`` are
211 computed. This ensures that any error in the latter don't harm the main
211 computed. This ensures that any error in the latter don't harm the main
212 code execution.
212 code execution.
213
213
214 4. Call any method registered with :meth:`register_post_execute`.
214 4. Call any method registered with :meth:`register_post_execute`.
215
215
216 .. warning::
216 .. warning::
217
217
218 The API for running code before/after the main code block is likely to
218 The API for running code before/after the main code block is likely to
219 change soon. Both the ``pre_runcode_hook`` and the
219 change soon. Both the ``pre_runcode_hook`` and the
220 :meth:`register_post_execute` are susceptible to modification, as we find a
220 :meth:`register_post_execute` are susceptible to modification, as we find a
221 consistent model for both.
221 consistent model for both.
222
222
223 To understand how the ``code`` field is executed, one must know that Python
223 To understand how the ``code`` field is executed, one must know that Python
224 code can be compiled in one of three modes (controlled by the ``mode`` argument
224 code can be compiled in one of three modes (controlled by the ``mode`` argument
225 to the :func:`compile` builtin):
225 to the :func:`compile` builtin):
226
226
227 *single*
227 *single*
228 Valid for a single interactive statement (though the source can contain
228 Valid for a single interactive statement (though the source can contain
229 multiple lines, such as a for loop). When compiled in this mode, the
229 multiple lines, such as a for loop). When compiled in this mode, the
230 generated bytecode contains special instructions that trigger the calling of
230 generated bytecode contains special instructions that trigger the calling of
231 :func:`sys.displayhook` for any expression in the block that returns a value.
231 :func:`sys.displayhook` for any expression in the block that returns a value.
232 This means that a single statement can actually produce multiple calls to
232 This means that a single statement can actually produce multiple calls to
233 :func:`sys.displayhook`, if for example it contains a loop where each
233 :func:`sys.displayhook`, if for example it contains a loop where each
234 iteration computes an unassigned expression would generate 10 calls::
234 iteration computes an unassigned expression would generate 10 calls::
235
235
236 for i in range(10):
236 for i in range(10):
237 i**2
237 i**2
238
238
239 *exec*
239 *exec*
240 An arbitrary amount of source code, this is how modules are compiled.
240 An arbitrary amount of source code, this is how modules are compiled.
241 :func:`sys.displayhook` is *never* implicitly called.
241 :func:`sys.displayhook` is *never* implicitly called.
242
242
243 *eval*
243 *eval*
244 A single expression that returns a value. :func:`sys.displayhook` is *never*
244 A single expression that returns a value. :func:`sys.displayhook` is *never*
245 implicitly called.
245 implicitly called.
246
246
247
247
248 The ``code`` field is split into individual blocks each of which is valid for
248 The ``code`` field is split into individual blocks each of which is valid for
249 execution in 'single' mode, and then:
249 execution in 'single' mode, and then:
250
250
251 - If there is only a single block: it is executed in 'single' mode.
251 - If there is only a single block: it is executed in 'single' mode.
252
252
253 - If there is more than one block:
253 - If there is more than one block:
254
254
255 * if the last one is a single line long, run all but the last in 'exec' mode
255 * if the last one is a single line long, run all but the last in 'exec' mode
256 and the very last one in 'single' mode. This makes it easy to type simple
256 and the very last one in 'single' mode. This makes it easy to type simple
257 expressions at the end to see computed values.
257 expressions at the end to see computed values.
258
258
259 * if the last one is no more than two lines long, run all but the last in
259 * if the last one is no more than two lines long, run all but the last in
260 'exec' mode and the very last one in 'single' mode. This makes it easy to
260 'exec' mode and the very last one in 'single' mode. This makes it easy to
261 type simple expressions at the end to see computed values. - otherwise
261 type simple expressions at the end to see computed values. - otherwise
262 (last one is also multiline), run all in 'exec' mode
262 (last one is also multiline), run all in 'exec' mode
263
263
264 * otherwise (last one is also multiline), run all in 'exec' mode as a single
264 * otherwise (last one is also multiline), run all in 'exec' mode as a single
265 unit.
265 unit.
266
266
267 Any error in retrieving the ``user_variables`` or evaluating the
267 Any error in retrieving the ``user_variables`` or evaluating the
268 ``user_expressions`` will result in a simple error message in the return fields
268 ``user_expressions`` will result in a simple error message in the return fields
269 of the form::
269 of the form::
270
270
271 [ERROR] ExceptionType: Exception message
271 [ERROR] ExceptionType: Exception message
272
272
273 The user can simply send the same variable name or expression for evaluation to
273 The user can simply send the same variable name or expression for evaluation to
274 see a regular traceback.
274 see a regular traceback.
275
275
276 Errors in any registered post_execute functions are also reported similarly,
276 Errors in any registered post_execute functions are also reported similarly,
277 and the failing function is removed from the post_execution set so that it does
277 and the failing function is removed from the post_execution set so that it does
278 not continue triggering failures.
278 not continue triggering failures.
279
279
280 Upon completion of the execution request, the kernel *always* sends a reply,
280 Upon completion of the execution request, the kernel *always* sends a reply,
281 with a status code indicating what happened and additional data depending on
281 with a status code indicating what happened and additional data depending on
282 the outcome. See :ref:`below <execution_results>` for the possible return
282 the outcome. See :ref:`below <execution_results>` for the possible return
283 codes and associated data.
283 codes and associated data.
284
284
285
285
286 Execution counter (old prompt number)
286 Execution counter (old prompt number)
287 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
287 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
288
288
289 The kernel has a single, monotonically increasing counter of all execution
289 The kernel has a single, monotonically increasing counter of all execution
290 requests that are made with ``silent=False``. This counter is used to populate
290 requests that are made with ``silent=False``. This counter is used to populate
291 the ``In[n]``, ``Out[n]`` and ``_n`` variables, so clients will likely want to
291 the ``In[n]``, ``Out[n]`` and ``_n`` variables, so clients will likely want to
292 display it in some form to the user, which will typically (but not necessarily)
292 display it in some form to the user, which will typically (but not necessarily)
293 be done in the prompts. The value of this counter will be returned as the
293 be done in the prompts. The value of this counter will be returned as the
294 ``execution_count`` field of all ``execute_reply`` messages.
294 ``execution_count`` field of all ``execute_reply`` messages.
295
295
296 .. _execution_results:
296 .. _execution_results:
297
297
298 Execution results
298 Execution results
299 ~~~~~~~~~~~~~~~~~
299 ~~~~~~~~~~~~~~~~~
300
300
301 Message type: ``execute_reply``::
301 Message type: ``execute_reply``::
302
302
303 content = {
303 content = {
304 # One of: 'ok' OR 'error' OR 'abort'
304 # One of: 'ok' OR 'error' OR 'abort'
305 'status' : str,
305 'status' : str,
306
306
307 # The global kernel counter that increases by one with each non-silent
307 # The global kernel counter that increases by one with each non-silent
308 # executed request. This will typically be used by clients to display
308 # executed request. This will typically be used by clients to display
309 # prompt numbers to the user. If the request was a silent one, this will
309 # prompt numbers to the user. If the request was a silent one, this will
310 # be the current value of the counter in the kernel.
310 # be the current value of the counter in the kernel.
311 'execution_count' : int,
311 'execution_count' : int,
312 }
312 }
313
313
314 When status is 'ok', the following extra fields are present::
314 When status is 'ok', the following extra fields are present::
315
315
316 {
316 {
317 # The execution payload is a dict with string keys that may have been
317 # The execution payload is a dict with string keys that may have been
318 # produced by the code being executed. It is retrieved by the kernel at
318 # produced by the code being executed. It is retrieved by the kernel at
319 # the end of the execution and sent back to the front end, which can take
319 # the end of the execution and sent back to the front end, which can take
320 # action on it as needed. See main text for further details.
320 # action on it as needed. See main text for further details.
321 'payload' : dict,
321 'payload' : dict,
322
322
323 # Results for the user_variables and user_expressions.
323 # Results for the user_variables and user_expressions.
324 'user_variables' : dict,
324 'user_variables' : dict,
325 'user_expressions' : dict,
325 'user_expressions' : dict,
326
326
327 # The kernel will often transform the input provided to it. If the
327 # The kernel will often transform the input provided to it. If the
328 # '---->' transform had been applied, this is filled, otherwise it's the
328 # '---->' transform had been applied, this is filled, otherwise it's the
329 # empty string. So transformations like magics don't appear here, only
329 # empty string. So transformations like magics don't appear here, only
330 # autocall ones.
330 # autocall ones.
331 'transformed_code' : str,
331 'transformed_code' : str,
332 }
332 }
333
333
334 .. admonition:: Execution payloads
334 .. admonition:: Execution payloads
335
335
336 The notion of an 'execution payload' is different from a return value of a
336 The notion of an 'execution payload' is different from a return value of a
337 given set of code, which normally is just displayed on the pyout stream
337 given set of code, which normally is just displayed on the pyout stream
338 through the PUB socket. The idea of a payload is to allow special types of
338 through the PUB socket. The idea of a payload is to allow special types of
339 code, typically magics, to populate a data container in the IPython kernel
339 code, typically magics, to populate a data container in the IPython kernel
340 that will be shipped back to the caller via this channel. The kernel will
340 that will be shipped back to the caller via this channel. The kernel will
341 have an API for this, probably something along the lines of::
341 have an API for this, probably something along the lines of::
342
342
343 ip.exec_payload_add(key, value)
343 ip.exec_payload_add(key, value)
344
344
345 though this API is still in the design stages. The data returned in this
345 though this API is still in the design stages. The data returned in this
346 payload will allow frontends to present special views of what just happened.
346 payload will allow frontends to present special views of what just happened.
347
347
348
348
349 When status is 'error', the following extra fields are present::
349 When status is 'error', the following extra fields are present::
350
350
351 {
351 {
352 'exc_name' : str, # Exception name, as a string
352 'exc_name' : str, # Exception name, as a string
353 'exc_value' : str, # Exception value, as a string
353 'exc_value' : str, # Exception value, as a string
354
354
355 # The traceback will contain a list of frames, represented each as a
355 # The traceback will contain a list of frames, represented each as a
356 # string. For now we'll stick to the existing design of ultraTB, which
356 # string. For now we'll stick to the existing design of ultraTB, which
357 # controls exception level of detail statefully. But eventually we'll
357 # controls exception level of detail statefully. But eventually we'll
358 # want to grow into a model where more information is collected and
358 # want to grow into a model where more information is collected and
359 # packed into the traceback object, with clients deciding how little or
359 # packed into the traceback object, with clients deciding how little or
360 # how much of it to unpack. But for now, let's start with a simple list
360 # how much of it to unpack. But for now, let's start with a simple list
361 # of strings, since that requires only minimal changes to ultratb as
361 # of strings, since that requires only minimal changes to ultratb as
362 # written.
362 # written.
363 'traceback' : list,
363 'traceback' : list,
364 }
364 }
365
365
366
366
367 When status is 'abort', there are for now no additional data fields. This
367 When status is 'abort', there are for now no additional data fields. This
368 happens when the kernel was interrupted by a signal.
368 happens when the kernel was interrupted by a signal.
369
369
370 Kernel attribute access
370 Kernel attribute access
371 -----------------------
371 -----------------------
372
372
373 .. warning::
373 .. warning::
374
374
375 This part of the messaging spec is not actually implemented in the kernel
375 This part of the messaging spec is not actually implemented in the kernel
376 yet.
376 yet.
377
377
378 While this protocol does not specify full RPC access to arbitrary methods of
378 While this protocol does not specify full RPC access to arbitrary methods of
379 the kernel object, the kernel does allow read (and in some cases write) access
379 the kernel object, the kernel does allow read (and in some cases write) access
380 to certain attributes.
380 to certain attributes.
381
381
382 The policy for which attributes can be read is: any attribute of the kernel, or
382 The policy for which attributes can be read is: any attribute of the kernel, or
383 its sub-objects, that belongs to a :class:`Configurable` object and has been
383 its sub-objects, that belongs to a :class:`Configurable` object and has been
384 declared at the class-level with Traits validation, is in principle accessible
384 declared at the class-level with Traits validation, is in principle accessible
385 as long as its name does not begin with a leading underscore. The attribute
385 as long as its name does not begin with a leading underscore. The attribute
386 itself will have metadata indicating whether it allows remote read and/or write
386 itself will have metadata indicating whether it allows remote read and/or write
387 access. The message spec follows for attribute read and write requests.
387 access. The message spec follows for attribute read and write requests.
388
388
389 Message type: ``getattr_request``::
389 Message type: ``getattr_request``::
390
390
391 content = {
391 content = {
392 # The (possibly dotted) name of the attribute
392 # The (possibly dotted) name of the attribute
393 'name' : str,
393 'name' : str,
394 }
394 }
395
395
396 When a ``getattr_request`` fails, there are two possible error types:
396 When a ``getattr_request`` fails, there are two possible error types:
397
397
398 - AttributeError: this type of error was raised when trying to access the
398 - AttributeError: this type of error was raised when trying to access the
399 given name by the kernel itself. This means that the attribute likely
399 given name by the kernel itself. This means that the attribute likely
400 doesn't exist.
400 doesn't exist.
401
401
402 - AccessError: the attribute exists but its value is not readable remotely.
402 - AccessError: the attribute exists but its value is not readable remotely.
403
403
404
404
405 Message type: ``getattr_reply``::
405 Message type: ``getattr_reply``::
406
406
407 content = {
407 content = {
408 # One of ['ok', 'AttributeError', 'AccessError'].
408 # One of ['ok', 'AttributeError', 'AccessError'].
409 'status' : str,
409 'status' : str,
410 # If status is 'ok', a JSON object.
410 # If status is 'ok', a JSON object.
411 'value' : object,
411 'value' : object,
412 }
412 }
413
413
414 Message type: ``setattr_request``::
414 Message type: ``setattr_request``::
415
415
416 content = {
416 content = {
417 # The (possibly dotted) name of the attribute
417 # The (possibly dotted) name of the attribute
418 'name' : str,
418 'name' : str,
419
419
420 # A JSON-encoded object, that will be validated by the Traits
420 # A JSON-encoded object, that will be validated by the Traits
421 # information in the kernel
421 # information in the kernel
422 'value' : object,
422 'value' : object,
423 }
423 }
424
424
425 When a ``setattr_request`` fails, there are also two possible error types with
425 When a ``setattr_request`` fails, there are also two possible error types with
426 similar meanings as those of the ``getattr_request`` case, but for writing.
426 similar meanings as those of the ``getattr_request`` case, but for writing.
427
427
428 Message type: ``setattr_reply``::
428 Message type: ``setattr_reply``::
429
429
430 content = {
430 content = {
431 # One of ['ok', 'AttributeError', 'AccessError'].
431 # One of ['ok', 'AttributeError', 'AccessError'].
432 'status' : str,
432 'status' : str,
433 }
433 }
434
434
435
435
436
436
437 Object information
437 Object information
438 ------------------
438 ------------------
439
439
440 One of IPython's most used capabilities is the introspection of Python objects
440 One of IPython's most used capabilities is the introspection of Python objects
441 in the user's namespace, typically invoked via the ``?`` and ``??`` characters
441 in the user's namespace, typically invoked via the ``?`` and ``??`` characters
442 (which in reality are shorthands for the ``%pinfo`` magic). This is used often
442 (which in reality are shorthands for the ``%pinfo`` magic). This is used often
443 enough that it warrants an explicit message type, especially because frontends
443 enough that it warrants an explicit message type, especially because frontends
444 may want to get object information in response to user keystrokes (like Tab or
444 may want to get object information in response to user keystrokes (like Tab or
445 F1) besides from the user explicitly typing code like ``x??``.
445 F1) besides from the user explicitly typing code like ``x??``.
446
446
447 Message type: ``object_info_request``::
447 Message type: ``object_info_request``::
448
448
449 content = {
449 content = {
450 # The (possibly dotted) name of the object to be searched in all
450 # The (possibly dotted) name of the object to be searched in all
451 # relevant namespaces
451 # relevant namespaces
452 'name' : str,
452 'name' : str,
453
453
454 # The level of detail desired. The default (0) is equivalent to typing
454 # The level of detail desired. The default (0) is equivalent to typing
455 # 'x?' at the prompt, 1 is equivalent to 'x??'.
455 # 'x?' at the prompt, 1 is equivalent to 'x??'.
456 'detail_level' : int,
456 'detail_level' : int,
457 }
457 }
458
458
459 The returned information will be a dictionary with keys very similar to the
459 The returned information will be a dictionary with keys very similar to the
460 field names that IPython prints at the terminal.
460 field names that IPython prints at the terminal.
461
461
462 Message type: ``object_info_reply``::
462 Message type: ``object_info_reply``::
463
463
464 content = {
464 content = {
465 # The name the object was requested under
465 # The name the object was requested under
466 'name' : str,
466 'name' : str,
467
467
468 # Boolean flag indicating whether the named object was found or not. If
468 # Boolean flag indicating whether the named object was found or not. If
469 # it's false, all other fields will be empty.
469 # it's false, all other fields will be empty.
470 'found' : bool,
470 'found' : bool,
471
471
472 # Flags for magics and system aliases
472 # Flags for magics and system aliases
473 'ismagic' : bool,
473 'ismagic' : bool,
474 'isalias' : bool,
474 'isalias' : bool,
475
475
476 # The name of the namespace where the object was found ('builtin',
476 # The name of the namespace where the object was found ('builtin',
477 # 'magics', 'alias', 'interactive', etc.)
477 # 'magics', 'alias', 'interactive', etc.)
478 'namespace' : str,
478 'namespace' : str,
479
479
480 # The type name will be type.__name__ for normal Python objects, but it
480 # The type name will be type.__name__ for normal Python objects, but it
481 # can also be a string like 'Magic function' or 'System alias'
481 # can also be a string like 'Magic function' or 'System alias'
482 'type_name' : str,
482 'type_name' : str,
483
483
484 # The string form of the object, possibly truncated for length if
484 # The string form of the object, possibly truncated for length if
485 # detail_level is 0
485 # detail_level is 0
486 'string_form' : str,
486 'string_form' : str,
487
487
488 # For objects with a __class__ attribute this will be set
488 # For objects with a __class__ attribute this will be set
489 'base_class' : str,
489 'base_class' : str,
490
490
491 # For objects with a __len__ attribute this will be set
491 # For objects with a __len__ attribute this will be set
492 'length' : int,
492 'length' : int,
493
493
494 # If the object is a function, class or method whose file we can find,
494 # If the object is a function, class or method whose file we can find,
495 # we give its full path
495 # we give its full path
496 'file' : str,
496 'file' : str,
497
497
498 # For pure Python callable objects, we can reconstruct the object
498 # For pure Python callable objects, we can reconstruct the object
499 # definition line which provides its call signature. For convenience this
499 # definition line which provides its call signature. For convenience this
500 # is returned as a single 'definition' field, but below the raw parts that
500 # is returned as a single 'definition' field, but below the raw parts that
501 # compose it are also returned as the argspec field.
501 # compose it are also returned as the argspec field.
502 'definition' : str,
502 'definition' : str,
503
503
504 # The individual parts that together form the definition string. Clients
504 # The individual parts that together form the definition string. Clients
505 # with rich display capabilities may use this to provide a richer and more
505 # with rich display capabilities may use this to provide a richer and more
506 # precise representation of the definition line (e.g. by highlighting
506 # precise representation of the definition line (e.g. by highlighting
507 # arguments based on the user's cursor position). For non-callable
507 # arguments based on the user's cursor position). For non-callable
508 # objects, this field is empty.
508 # objects, this field is empty.
509 'argspec' : { # The names of all the arguments
509 'argspec' : { # The names of all the arguments
510 args : list,
510 args : list,
511 # The name of the varargs (*args), if any
511 # The name of the varargs (*args), if any
512 varargs : str,
512 varargs : str,
513 # The name of the varkw (**kw), if any
513 # The name of the varkw (**kw), if any
514 varkw : str,
514 varkw : str,
515 # The values (as strings) of all default arguments. Note
515 # The values (as strings) of all default arguments. Note
516 # that these must be matched *in reverse* with the 'args'
516 # that these must be matched *in reverse* with the 'args'
517 # list above, since the first positional args have no default
517 # list above, since the first positional args have no default
518 # value at all.
518 # value at all.
519 defaults : list,
519 defaults : list,
520 },
520 },
521
521
522 # For instances, provide the constructor signature (the definition of
522 # For instances, provide the constructor signature (the definition of
523 # the __init__ method):
523 # the __init__ method):
524 'init_definition' : str,
524 'init_definition' : str,
525
525
526 # Docstrings: for any object (function, method, module, package) with a
526 # Docstrings: for any object (function, method, module, package) with a
527 # docstring, we show it. But in addition, we may provide additional
527 # docstring, we show it. But in addition, we may provide additional
528 # docstrings. For example, for instances we will show the constructor
528 # docstrings. For example, for instances we will show the constructor
529 # and class docstrings as well, if available.
529 # and class docstrings as well, if available.
530 'docstring' : str,
530 'docstring' : str,
531
531
532 # For instances, provide the constructor and class docstrings
532 # For instances, provide the constructor and class docstrings
533 'init_docstring' : str,
533 'init_docstring' : str,
534 'class_docstring' : str,
534 'class_docstring' : str,
535
535
536 # If it's a callable object whose call method has a separate docstring and
536 # If it's a callable object whose call method has a separate docstring and
537 # definition line:
537 # definition line:
538 'call_def' : str,
538 'call_def' : str,
539 'call_docstring' : str,
539 'call_docstring' : str,
540
540
541 # If detail_level was 1, we also try to find the source code that
541 # If detail_level was 1, we also try to find the source code that
542 # defines the object, if possible. The string 'None' will indicate
542 # defines the object, if possible. The string 'None' will indicate
543 # that no source was found.
543 # that no source was found.
544 'source' : str,
544 'source' : str,
545 }
545 }
546 '
546 '
547
547
548 Complete
548 Complete
549 --------
549 --------
550
550
551 Message type: ``complete_request``::
551 Message type: ``complete_request``::
552
552
553 content = {
553 content = {
554 # The text to be completed, such as 'a.is'
554 # The text to be completed, such as 'a.is'
555 'text' : str,
555 'text' : str,
556
556
557 # The full line, such as 'print a.is'. This allows completers to
557 # The full line, such as 'print a.is'. This allows completers to
558 # make decisions that may require information about more than just the
558 # make decisions that may require information about more than just the
559 # current word.
559 # current word.
560 'line' : str,
560 'line' : str,
561
561
562 # The entire block of text where the line is. This may be useful in the
562 # The entire block of text where the line is. This may be useful in the
563 # case of multiline completions where more context may be needed. Note: if
563 # case of multiline completions where more context may be needed. Note: if
564 # in practice this field proves unnecessary, remove it to lighten the
564 # in practice this field proves unnecessary, remove it to lighten the
565 # messages.
565 # messages.
566
566
567 'block' : str,
567 'block' : str,
568
568
569 # The position of the cursor where the user hit 'TAB' on the line.
569 # The position of the cursor where the user hit 'TAB' on the line.
570 'cursor_pos' : int,
570 'cursor_pos' : int,
571 }
571 }
572
572
573 Message type: ``complete_reply``::
573 Message type: ``complete_reply``::
574
574
575 content = {
575 content = {
576 # The list of all matches to the completion request, such as
576 # The list of all matches to the completion request, such as
577 # ['a.isalnum', 'a.isalpha'] for the above example.
577 # ['a.isalnum', 'a.isalpha'] for the above example.
578 'matches' : list
578 'matches' : list
579 }
579 }
580
580
581
581
582 History
582 History
583 -------
583 -------
584
584
585 For clients to explicitly request history from a kernel. The kernel has all
585 For clients to explicitly request history from a kernel. The kernel has all
586 the actual execution history stored in a single location, so clients can
586 the actual execution history stored in a single location, so clients can
587 request it from the kernel when needed.
587 request it from the kernel when needed.
588
588
589 Message type: ``history_request``::
589 Message type: ``history_request``::
590
590
591 content = {
591 content = {
592
592
593 # If True, also return output history in the resulting dict.
593 # If True, also return output history in the resulting dict.
594 'output' : bool,
594 'output' : bool,
595
595
596 # If True, return the raw input history, else the transformed input.
596 # If True, return the raw input history, else the transformed input.
597 'raw' : bool,
597 'raw' : bool,
598
598
599 # So far, this can be 'range', 'tail' or 'search'.
599 # So far, this can be 'range', 'tail' or 'search'.
600 'hist_access_type' : str,
600 'hist_access_type' : str,
601
601
602 # If hist_access_type is 'range', get a range of input cells. session can
602 # If hist_access_type is 'range', get a range of input cells. session can
603 # be a positive session number, or a negative number to count back from
603 # be a positive session number, or a negative number to count back from
604 # the current session.
604 # the current session.
605 'session' : int,
605 'session' : int,
606 # start and stop are line numbers within that session.
606 # start and stop are line numbers within that session.
607 'start' : int,
607 'start' : int,
608 'stop' : int,
608 'stop' : int,
609
609
610 # If hist_access_type is 'tail', get the last n cells.
610 # If hist_access_type is 'tail', get the last n cells.
611 'n' : int,
611 'n' : int,
612
612
613 # If hist_access_type is 'search', get cells matching the specified glob
613 # If hist_access_type is 'search', get cells matching the specified glob
614 # pattern (with * and ? as wildcards).
614 # pattern (with * and ? as wildcards).
615 'pattern' : str,
615 'pattern' : str,
616
616
617 }
617 }
618
618
619 Message type: ``history_reply``::
619 Message type: ``history_reply``::
620
620
621 content = {
621 content = {
622 # A list of 3 tuples, either:
622 # A list of 3 tuples, either:
623 # (session, line_number, input) or
623 # (session, line_number, input) or
624 # (session, line_number, (input, output)),
624 # (session, line_number, (input, output)),
625 # depending on whether output was False or True, respectively.
625 # depending on whether output was False or True, respectively.
626 'history' : list,
626 'history' : list,
627 }
627 }
628
628
629
629
630 Connect
630 Connect
631 -------
631 -------
632
632
633 When a client connects to the request/reply socket of the kernel, it can issue
633 When a client connects to the request/reply socket of the kernel, it can issue
634 a connect request to get basic information about the kernel, such as the ports
634 a connect request to get basic information about the kernel, such as the ports
635 the other ZeroMQ sockets are listening on. This allows clients to only have
635 the other ZeroMQ sockets are listening on. This allows clients to only have
636 to know about a single port (the XREQ/XREP channel) to connect to a kernel.
636 to know about a single port (the DEALER/ROUTER channel) to connect to a kernel.
637
637
638 Message type: ``connect_request``::
638 Message type: ``connect_request``::
639
639
640 content = {
640 content = {
641 }
641 }
642
642
643 Message type: ``connect_reply``::
643 Message type: ``connect_reply``::
644
644
645 content = {
645 content = {
646 'xrep_port' : int # The port the XREP socket is listening on.
646 'xrep_port' : int # The port the ROUTER socket is listening on.
647 'pub_port' : int # The port the PUB socket is listening on.
647 'pub_port' : int # The port the PUB socket is listening on.
648 'req_port' : int # The port the REQ socket is listening on.
648 'req_port' : int # The port the REQ socket is listening on.
649 'hb_port' : int # The port the heartbeat socket is listening on.
649 'hb_port' : int # The port the heartbeat socket is listening on.
650 }
650 }
651
651
652
652
653
653
654 Kernel shutdown
654 Kernel shutdown
655 ---------------
655 ---------------
656
656
657 The clients can request the kernel to shut itself down; this is used in
657 The clients can request the kernel to shut itself down; this is used in
658 multiple cases:
658 multiple cases:
659
659
660 - when the user chooses to close the client application via a menu or window
660 - when the user chooses to close the client application via a menu or window
661 control.
661 control.
662 - when the user types 'exit' or 'quit' (or their uppercase magic equivalents).
662 - when the user types 'exit' or 'quit' (or their uppercase magic equivalents).
663 - when the user chooses a GUI method (like the 'Ctrl-C' shortcut in the
663 - when the user chooses a GUI method (like the 'Ctrl-C' shortcut in the
664 IPythonQt client) to force a kernel restart to get a clean kernel without
664 IPythonQt client) to force a kernel restart to get a clean kernel without
665 losing client-side state like history or inlined figures.
665 losing client-side state like history or inlined figures.
666
666
667 The client sends a shutdown request to the kernel, and once it receives the
667 The client sends a shutdown request to the kernel, and once it receives the
668 reply message (which is otherwise empty), it can assume that the kernel has
668 reply message (which is otherwise empty), it can assume that the kernel has
669 completed shutdown safely.
669 completed shutdown safely.
670
670
671 Upon their own shutdown, client applications will typically execute a last
671 Upon their own shutdown, client applications will typically execute a last
672 minute sanity check and forcefully terminate any kernel that is still alive, to
672 minute sanity check and forcefully terminate any kernel that is still alive, to
673 avoid leaving stray processes in the user's machine.
673 avoid leaving stray processes in the user's machine.
674
674
675 For both shutdown request and reply, there is no actual content that needs to
675 For both shutdown request and reply, there is no actual content that needs to
676 be sent, so the content dict is empty.
676 be sent, so the content dict is empty.
677
677
678 Message type: ``shutdown_request``::
678 Message type: ``shutdown_request``::
679
679
680 content = {
680 content = {
681 'restart' : bool # whether the shutdown is final, or precedes a restart
681 'restart' : bool # whether the shutdown is final, or precedes a restart
682 }
682 }
683
683
684 Message type: ``shutdown_reply``::
684 Message type: ``shutdown_reply``::
685
685
686 content = {
686 content = {
687 'restart' : bool # whether the shutdown is final, or precedes a restart
687 'restart' : bool # whether the shutdown is final, or precedes a restart
688 }
688 }
689
689
690 .. Note::
690 .. Note::
691
691
692 When the clients detect a dead kernel thanks to inactivity on the heartbeat
692 When the clients detect a dead kernel thanks to inactivity on the heartbeat
693 socket, they simply send a forceful process termination signal, since a dead
693 socket, they simply send a forceful process termination signal, since a dead
694 process is unlikely to respond in any useful way to messages.
694 process is unlikely to respond in any useful way to messages.
695
695
696
696
697 Messages on the PUB/SUB socket
697 Messages on the PUB/SUB socket
698 ==============================
698 ==============================
699
699
700 Streams (stdout, stderr, etc)
700 Streams (stdout, stderr, etc)
701 ------------------------------
701 ------------------------------
702
702
703 Message type: ``stream``::
703 Message type: ``stream``::
704
704
705 content = {
705 content = {
706 # The name of the stream is one of 'stdin', 'stdout', 'stderr'
706 # The name of the stream is one of 'stdin', 'stdout', 'stderr'
707 'name' : str,
707 'name' : str,
708
708
709 # The data is an arbitrary string to be written to that stream
709 # The data is an arbitrary string to be written to that stream
710 'data' : str,
710 'data' : str,
711 }
711 }
712
712
713 When a kernel receives a raw_input call, it should also broadcast it on the pub
713 When a kernel receives a raw_input call, it should also broadcast it on the pub
714 socket with the names 'stdin' and 'stdin_reply'. This will allow other clients
714 socket with the names 'stdin' and 'stdin_reply'. This will allow other clients
715 to monitor/display kernel interactions and possibly replay them to their user
715 to monitor/display kernel interactions and possibly replay them to their user
716 or otherwise expose them.
716 or otherwise expose them.
717
717
718 Display Data
718 Display Data
719 ------------
719 ------------
720
720
721 This type of message is used to bring back data that should be diplayed (text,
721 This type of message is used to bring back data that should be diplayed (text,
722 html, svg, etc.) in the frontends. This data is published to all frontends.
722 html, svg, etc.) in the frontends. This data is published to all frontends.
723 Each message can have multiple representations of the data; it is up to the
723 Each message can have multiple representations of the data; it is up to the
724 frontend to decide which to use and how. A single message should contain all
724 frontend to decide which to use and how. A single message should contain all
725 possible representations of the same information. Each representation should
725 possible representations of the same information. Each representation should
726 be a JSON'able data structure, and should be a valid MIME type.
726 be a JSON'able data structure, and should be a valid MIME type.
727
727
728 Some questions remain about this design:
728 Some questions remain about this design:
729
729
730 * Do we use this message type for pyout/displayhook? Probably not, because
730 * Do we use this message type for pyout/displayhook? Probably not, because
731 the displayhook also has to handle the Out prompt display. On the other hand
731 the displayhook also has to handle the Out prompt display. On the other hand
732 we could put that information into the metadata secion.
732 we could put that information into the metadata secion.
733
733
734 Message type: ``display_data``::
734 Message type: ``display_data``::
735
735
736 content = {
736 content = {
737
737
738 # Who create the data
738 # Who create the data
739 'source' : str,
739 'source' : str,
740
740
741 # The data dict contains key/value pairs, where the kids are MIME
741 # The data dict contains key/value pairs, where the kids are MIME
742 # types and the values are the raw data of the representation in that
742 # types and the values are the raw data of the representation in that
743 # format. The data dict must minimally contain the ``text/plain``
743 # format. The data dict must minimally contain the ``text/plain``
744 # MIME type which is used as a backup representation.
744 # MIME type which is used as a backup representation.
745 'data' : dict,
745 'data' : dict,
746
746
747 # Any metadata that describes the data
747 # Any metadata that describes the data
748 'metadata' : dict
748 'metadata' : dict
749 }
749 }
750
750
751 Python inputs
751 Python inputs
752 -------------
752 -------------
753
753
754 These messages are the re-broadcast of the ``execute_request``.
754 These messages are the re-broadcast of the ``execute_request``.
755
755
756 Message type: ``pyin``::
756 Message type: ``pyin``::
757
757
758 content = {
758 content = {
759 'code' : str # Source code to be executed, one or more lines
759 'code' : str # Source code to be executed, one or more lines
760 }
760 }
761
761
762 Python outputs
762 Python outputs
763 --------------
763 --------------
764
764
765 When Python produces output from code that has been compiled in with the
765 When Python produces output from code that has been compiled in with the
766 'single' flag to :func:`compile`, any expression that produces a value (such as
766 'single' flag to :func:`compile`, any expression that produces a value (such as
767 ``1+1``) is passed to ``sys.displayhook``, which is a callable that can do with
767 ``1+1``) is passed to ``sys.displayhook``, which is a callable that can do with
768 this value whatever it wants. The default behavior of ``sys.displayhook`` in
768 this value whatever it wants. The default behavior of ``sys.displayhook`` in
769 the Python interactive prompt is to print to ``sys.stdout`` the :func:`repr` of
769 the Python interactive prompt is to print to ``sys.stdout`` the :func:`repr` of
770 the value as long as it is not ``None`` (which isn't printed at all). In our
770 the value as long as it is not ``None`` (which isn't printed at all). In our
771 case, the kernel instantiates as ``sys.displayhook`` an object which has
771 case, the kernel instantiates as ``sys.displayhook`` an object which has
772 similar behavior, but which instead of printing to stdout, broadcasts these
772 similar behavior, but which instead of printing to stdout, broadcasts these
773 values as ``pyout`` messages for clients to display appropriately.
773 values as ``pyout`` messages for clients to display appropriately.
774
774
775 IPython's displayhook can handle multiple simultaneous formats depending on its
775 IPython's displayhook can handle multiple simultaneous formats depending on its
776 configuration. The default pretty-printed repr text is always given with the
776 configuration. The default pretty-printed repr text is always given with the
777 ``data`` entry in this message. Any other formats are provided in the
777 ``data`` entry in this message. Any other formats are provided in the
778 ``extra_formats`` list. Frontends are free to display any or all of these
778 ``extra_formats`` list. Frontends are free to display any or all of these
779 according to its capabilities. ``extra_formats`` list contains 3-tuples of an ID
779 according to its capabilities. ``extra_formats`` list contains 3-tuples of an ID
780 string, a type string, and the data. The ID is unique to the formatter
780 string, a type string, and the data. The ID is unique to the formatter
781 implementation that created the data. Frontends will typically ignore the ID
781 implementation that created the data. Frontends will typically ignore the ID
782 unless if it has requested a particular formatter. The type string tells the
782 unless if it has requested a particular formatter. The type string tells the
783 frontend how to interpret the data. It is often, but not always a MIME type.
783 frontend how to interpret the data. It is often, but not always a MIME type.
784 Frontends should ignore types that it does not understand. The data itself is
784 Frontends should ignore types that it does not understand. The data itself is
785 any JSON object and depends on the format. It is often, but not always a string.
785 any JSON object and depends on the format. It is often, but not always a string.
786
786
787 Message type: ``pyout``::
787 Message type: ``pyout``::
788
788
789 content = {
789 content = {
790
790
791 # The counter for this execution is also provided so that clients can
791 # The counter for this execution is also provided so that clients can
792 # display it, since IPython automatically creates variables called _N
792 # display it, since IPython automatically creates variables called _N
793 # (for prompt N).
793 # (for prompt N).
794 'execution_count' : int,
794 'execution_count' : int,
795
795
796 # The data dict contains key/value pairs, where the kids are MIME
796 # The data dict contains key/value pairs, where the kids are MIME
797 # types and the values are the raw data of the representation in that
797 # types and the values are the raw data of the representation in that
798 # format. The data dict must minimally contain the ``text/plain``
798 # format. The data dict must minimally contain the ``text/plain``
799 # MIME type which is used as a backup representation.
799 # MIME type which is used as a backup representation.
800 'data' : dict,
800 'data' : dict,
801
801
802 }
802 }
803
803
804 Python errors
804 Python errors
805 -------------
805 -------------
806
806
807 When an error occurs during code execution
807 When an error occurs during code execution
808
808
809 Message type: ``pyerr``::
809 Message type: ``pyerr``::
810
810
811 content = {
811 content = {
812 # Similar content to the execute_reply messages for the 'error' case,
812 # Similar content to the execute_reply messages for the 'error' case,
813 # except the 'status' field is omitted.
813 # except the 'status' field is omitted.
814 }
814 }
815
815
816 Kernel status
816 Kernel status
817 -------------
817 -------------
818
818
819 This message type is used by frontends to monitor the status of the kernel.
819 This message type is used by frontends to monitor the status of the kernel.
820
820
821 Message type: ``status``::
821 Message type: ``status``::
822
822
823 content = {
823 content = {
824 # When the kernel starts to execute code, it will enter the 'busy'
824 # When the kernel starts to execute code, it will enter the 'busy'
825 # state and when it finishes, it will enter the 'idle' state.
825 # state and when it finishes, it will enter the 'idle' state.
826 execution_state : ('busy', 'idle')
826 execution_state : ('busy', 'idle')
827 }
827 }
828
828
829 Kernel crashes
829 Kernel crashes
830 --------------
830 --------------
831
831
832 When the kernel has an unexpected exception, caught by the last-resort
832 When the kernel has an unexpected exception, caught by the last-resort
833 sys.excepthook, we should broadcast the crash handler's output before exiting.
833 sys.excepthook, we should broadcast the crash handler's output before exiting.
834 This will allow clients to notice that a kernel died, inform the user and
834 This will allow clients to notice that a kernel died, inform the user and
835 propose further actions.
835 propose further actions.
836
836
837 Message type: ``crash``::
837 Message type: ``crash``::
838
838
839 content = {
839 content = {
840 # Similarly to the 'error' case for execute_reply messages, this will
840 # Similarly to the 'error' case for execute_reply messages, this will
841 # contain exc_name, exc_type and traceback fields.
841 # contain exc_name, exc_type and traceback fields.
842
842
843 # An additional field with supplementary information such as where to
843 # An additional field with supplementary information such as where to
844 # send the crash message
844 # send the crash message
845 'info' : str,
845 'info' : str,
846 }
846 }
847
847
848
848
849 Future ideas
849 Future ideas
850 ------------
850 ------------
851
851
852 Other potential message types, currently unimplemented, listed below as ideas.
852 Other potential message types, currently unimplemented, listed below as ideas.
853
853
854 Message type: ``file``::
854 Message type: ``file``::
855
855
856 content = {
856 content = {
857 'path' : 'cool.jpg',
857 'path' : 'cool.jpg',
858 'mimetype' : str,
858 'mimetype' : str,
859 'data' : str,
859 'data' : str,
860 }
860 }
861
861
862
862
863 Messages on the REQ/REP socket
863 Messages on the REQ/REP socket
864 ==============================
864 ==============================
865
865
866 This is a socket that goes in the opposite direction: from the kernel to a
866 This is a socket that goes in the opposite direction: from the kernel to a
867 *single* frontend, and its purpose is to allow ``raw_input`` and similar
867 *single* frontend, and its purpose is to allow ``raw_input`` and similar
868 operations that read from ``sys.stdin`` on the kernel to be fulfilled by the
868 operations that read from ``sys.stdin`` on the kernel to be fulfilled by the
869 client. For now we will keep these messages as simple as possible, since they
869 client. For now we will keep these messages as simple as possible, since they
870 basically only mean to convey the ``raw_input(prompt)`` call.
870 basically only mean to convey the ``raw_input(prompt)`` call.
871
871
872 Message type: ``input_request``::
872 Message type: ``input_request``::
873
873
874 content = { 'prompt' : str }
874 content = { 'prompt' : str }
875
875
876 Message type: ``input_reply``::
876 Message type: ``input_reply``::
877
877
878 content = { 'value' : str }
878 content = { 'value' : str }
879
879
880 .. Note::
880 .. Note::
881
881
882 We do not explicitly try to forward the raw ``sys.stdin`` object, because in
882 We do not explicitly try to forward the raw ``sys.stdin`` object, because in
883 practice the kernel should behave like an interactive program. When a
883 practice the kernel should behave like an interactive program. When a
884 program is opened on the console, the keyboard effectively takes over the
884 program is opened on the console, the keyboard effectively takes over the
885 ``stdin`` file descriptor, and it can't be used for raw reading anymore.
885 ``stdin`` file descriptor, and it can't be used for raw reading anymore.
886 Since the IPython kernel effectively behaves like a console program (albeit
886 Since the IPython kernel effectively behaves like a console program (albeit
887 one whose "keyboard" is actually living in a separate process and
887 one whose "keyboard" is actually living in a separate process and
888 transported over the zmq connection), raw ``stdin`` isn't expected to be
888 transported over the zmq connection), raw ``stdin`` isn't expected to be
889 available.
889 available.
890
890
891
891
892 Heartbeat for kernels
892 Heartbeat for kernels
893 =====================
893 =====================
894
894
895 Initially we had considered using messages like those above over ZMQ for a
895 Initially we had considered using messages like those above over ZMQ for a
896 kernel 'heartbeat' (a way to detect quickly and reliably whether a kernel is
896 kernel 'heartbeat' (a way to detect quickly and reliably whether a kernel is
897 alive at all, even if it may be busy executing user code). But this has the
897 alive at all, even if it may be busy executing user code). But this has the
898 problem that if the kernel is locked inside extension code, it wouldn't execute
898 problem that if the kernel is locked inside extension code, it wouldn't execute
899 the python heartbeat code. But it turns out that we can implement a basic
899 the python heartbeat code. But it turns out that we can implement a basic
900 heartbeat with pure ZMQ, without using any Python messaging at all.
900 heartbeat with pure ZMQ, without using any Python messaging at all.
901
901
902 The monitor sends out a single zmq message (right now, it is a str of the
902 The monitor sends out a single zmq message (right now, it is a str of the
903 monitor's lifetime in seconds), and gets the same message right back, prefixed
903 monitor's lifetime in seconds), and gets the same message right back, prefixed
904 with the zmq identity of the XREQ socket in the heartbeat process. This can be
904 with the zmq identity of the DEALER socket in the heartbeat process. This can be
905 a uuid, or even a full message, but there doesn't seem to be a need for packing
905 a uuid, or even a full message, but there doesn't seem to be a need for packing
906 up a message when the sender and receiver are the exact same Python object.
906 up a message when the sender and receiver are the exact same Python object.
907
907
908 The model is this::
908 The model is this::
909
909
910 monitor.send(str(self.lifetime)) # '1.2345678910'
910 monitor.send(str(self.lifetime)) # '1.2345678910'
911
911
912 and the monitor receives some number of messages of the form::
912 and the monitor receives some number of messages of the form::
913
913
914 ['uuid-abcd-dead-beef', '1.2345678910']
914 ['uuid-abcd-dead-beef', '1.2345678910']
915
915
916 where the first part is the zmq.IDENTITY of the heart's XREQ on the engine, and
916 where the first part is the zmq.IDENTITY of the heart's DEALER on the engine, and
917 the rest is the message sent by the monitor. No Python code ever has any
917 the rest is the message sent by the monitor. No Python code ever has any
918 access to the message between the monitor's send, and the monitor's recv.
918 access to the message between the monitor's send, and the monitor's recv.
919
919
920
920
921 ToDo
921 ToDo
922 ====
922 ====
923
923
924 Missing things include:
924 Missing things include:
925
925
926 * Important: finish thinking through the payload concept and API.
926 * Important: finish thinking through the payload concept and API.
927
927
928 * Important: ensure that we have a good solution for magics like %edit. It's
928 * Important: ensure that we have a good solution for magics like %edit. It's
929 likely that with the payload concept we can build a full solution, but not
929 likely that with the payload concept we can build a full solution, but not
930 100% clear yet.
930 100% clear yet.
931
931
932 * Finishing the details of the heartbeat protocol.
932 * Finishing the details of the heartbeat protocol.
933
933
934 * Signal handling: specify what kind of information kernel should broadcast (or
934 * Signal handling: specify what kind of information kernel should broadcast (or
935 not) when it receives signals.
935 not) when it receives signals.
936
936
937 .. include:: ../links.rst
937 .. include:: ../links.rst
@@ -1,154 +1,154 b''
1 .. _parallel_connections:
1 .. _parallel_connections:
2
2
3 ==============================================
3 ==============================================
4 Connection Diagrams of The IPython ZMQ Cluster
4 Connection Diagrams of The IPython ZMQ Cluster
5 ==============================================
5 ==============================================
6
6
7 This is a quick summary and illustration of the connections involved in the ZeroMQ based
7 This is a quick summary and illustration of the connections involved in the ZeroMQ based
8 IPython cluster for parallel computing.
8 IPython cluster for parallel computing.
9
9
10 All Connections
10 All Connections
11 ===============
11 ===============
12
12
13 The IPython cluster consists of a Controller, and one or more each of clients and engines.
13 The IPython cluster consists of a Controller, and one or more each of clients and engines.
14 The goal of the Controller is to manage and monitor the connections and communications
14 The goal of the Controller is to manage and monitor the connections and communications
15 between the clients and the engines. The Controller is no longer a single process entity,
15 between the clients and the engines. The Controller is no longer a single process entity,
16 but rather a collection of processes - specifically one Hub, and 4 (or more) Schedulers.
16 but rather a collection of processes - specifically one Hub, and 4 (or more) Schedulers.
17
17
18 It is important for security/practicality reasons that all connections be inbound to the
18 It is important for security/practicality reasons that all connections be inbound to the
19 controller processes. The arrows in the figures indicate the direction of the
19 controller processes. The arrows in the figures indicate the direction of the
20 connection.
20 connection.
21
21
22
22
23 .. figure:: figs/allconnections.png
23 .. figure:: figs/allconnections.png
24 :width: 432px
24 :width: 432px
25 :alt: IPython cluster connections
25 :alt: IPython cluster connections
26 :align: center
26 :align: center
27
27
28 All the connections involved in connecting one client to one engine.
28 All the connections involved in connecting one client to one engine.
29
29
30 The Controller consists of 1-4 processes. Central to the cluster is the **Hub**, which monitors
30 The Controller consists of 1-4 processes. Central to the cluster is the **Hub**, which monitors
31 engine state, execution traffic, and handles registration and notification. The Hub includes a
31 engine state, execution traffic, and handles registration and notification. The Hub includes a
32 Heartbeat Monitor for keeping track of engines that are alive. Outside the Hub are 4
32 Heartbeat Monitor for keeping track of engines that are alive. Outside the Hub are 4
33 **Schedulers**. These devices are very small pure-C MonitoredQueue processes (or optionally
33 **Schedulers**. These devices are very small pure-C MonitoredQueue processes (or optionally
34 threads) that relay messages very fast, but also send a copy of each message along a side socket
34 threads) that relay messages very fast, but also send a copy of each message along a side socket
35 to the Hub. The MUX queue and Control queue are MonitoredQueue ØMQ devices which relay
35 to the Hub. The MUX queue and Control queue are MonitoredQueue ØMQ devices which relay
36 explicitly addressed messages from clients to engines, and their replies back up. The Balanced
36 explicitly addressed messages from clients to engines, and their replies back up. The Balanced
37 queue performs load-balancing destination-agnostic scheduling. It may be a MonitoredQueue
37 queue performs load-balancing destination-agnostic scheduling. It may be a MonitoredQueue
38 device, but may also be a Python Scheduler that behaves externally in an identical fashion to MQ
38 device, but may also be a Python Scheduler that behaves externally in an identical fashion to MQ
39 devices, but with additional internal logic. stdout/err are also propagated from the Engines to
39 devices, but with additional internal logic. stdout/err are also propagated from the Engines to
40 the clients via a PUB/SUB MonitoredQueue.
40 the clients via a PUB/SUB MonitoredQueue.
41
41
42
42
43 Registration
43 Registration
44 ------------
44 ------------
45
45
46 .. figure:: figs/queryfade.png
46 .. figure:: figs/queryfade.png
47 :width: 432px
47 :width: 432px
48 :alt: IPython Registration connections
48 :alt: IPython Registration connections
49 :align: center
49 :align: center
50
50
51 Engines and Clients only need to know where the Query ``XREP`` is located to start
51 Engines and Clients only need to know where the Query ``ROUTER`` is located to start
52 connecting.
52 connecting.
53
53
54 Once a controller is launched, the only information needed for connecting clients and/or
54 Once a controller is launched, the only information needed for connecting clients and/or
55 engines is the IP/port of the Hub's ``XREP`` socket called the Registrar. This socket
55 engines is the IP/port of the Hub's ``ROUTER`` socket called the Registrar. This socket
56 handles connections from both clients and engines, and replies with the remaining
56 handles connections from both clients and engines, and replies with the remaining
57 information necessary to establish the remaining connections. Clients use this same socket for
57 information necessary to establish the remaining connections. Clients use this same socket for
58 querying the Hub for state information.
58 querying the Hub for state information.
59
59
60 Heartbeat
60 Heartbeat
61 ---------
61 ---------
62
62
63 .. figure:: figs/hbfade.png
63 .. figure:: figs/hbfade.png
64 :width: 432px
64 :width: 432px
65 :alt: IPython Heartbeat connections
65 :alt: IPython Heartbeat connections
66 :align: center
66 :align: center
67
67
68 The heartbeat sockets.
68 The heartbeat sockets.
69
69
70 The heartbeat process has been described elsewhere. To summarize: the Heartbeat Monitor
70 The heartbeat process has been described elsewhere. To summarize: the Heartbeat Monitor
71 publishes a distinct message periodically via a ``PUB`` socket. Each engine has a
71 publishes a distinct message periodically via a ``PUB`` socket. Each engine has a
72 ``zmq.FORWARDER`` device with a ``SUB`` socket for input, and ``XREQ`` socket for output.
72 ``zmq.FORWARDER`` device with a ``SUB`` socket for input, and ``DEALER`` socket for output.
73 The ``SUB`` socket is connected to the ``PUB`` socket labeled *ping*, and the ``XREQ`` is
73 The ``SUB`` socket is connected to the ``PUB`` socket labeled *ping*, and the ``DEALER`` is
74 connected to the ``XREP`` labeled *pong*. This results in the same message being relayed
74 connected to the ``ROUTER`` labeled *pong*. This results in the same message being relayed
75 back to the Heartbeat Monitor with the addition of the ``XREQ`` prefix. The Heartbeat
75 back to the Heartbeat Monitor with the addition of the ``DEALER`` prefix. The Heartbeat
76 Monitor receives all the replies via an ``XREP`` socket, and identifies which hearts are
76 Monitor receives all the replies via an ``ROUTER`` socket, and identifies which hearts are
77 still beating by the ``zmq.IDENTITY`` prefix of the ``XREQ`` sockets, which information
77 still beating by the ``zmq.IDENTITY`` prefix of the ``DEALER`` sockets, which information
78 the Hub uses to notify clients of any changes in the available engines.
78 the Hub uses to notify clients of any changes in the available engines.
79
79
80 Schedulers
80 Schedulers
81 ----------
81 ----------
82
82
83 .. figure:: figs/queuefade.png
83 .. figure:: figs/queuefade.png
84 :width: 432px
84 :width: 432px
85 :alt: IPython Queue connections
85 :alt: IPython Queue connections
86 :align: center
86 :align: center
87
87
88 Control message scheduler on the left, execution (apply) schedulers on the right.
88 Control message scheduler on the left, execution (apply) schedulers on the right.
89
89
90 The controller has at least three Schedulers. These devices are primarily for
90 The controller has at least three Schedulers. These devices are primarily for
91 relaying messages between clients and engines, but the Hub needs to see those
91 relaying messages between clients and engines, but the Hub needs to see those
92 messages for its own purposes. Since no Python code may exist between the two sockets in a
92 messages for its own purposes. Since no Python code may exist between the two sockets in a
93 queue, all messages sent through these queues (both directions) are also sent via a
93 queue, all messages sent through these queues (both directions) are also sent via a
94 ``PUB`` socket to a monitor, which allows the Hub to monitor queue traffic without
94 ``PUB`` socket to a monitor, which allows the Hub to monitor queue traffic without
95 interfering with it.
95 interfering with it.
96
96
97 For tasks, the engine need not be specified. Messages sent to the ``XREP`` socket from the
97 For tasks, the engine need not be specified. Messages sent to the ``ROUTER`` socket from the
98 client side are assigned to an engine via ZMQ's ``XREQ`` round-robin load balancing.
98 client side are assigned to an engine via ZMQ's ``DEALER`` round-robin load balancing.
99 Engine replies are directed to specific clients via the IDENTITY of the client, which is
99 Engine replies are directed to specific clients via the IDENTITY of the client, which is
100 received as a prefix at the Engine.
100 received as a prefix at the Engine.
101
101
102 For Multiplexing, ``XREP`` is used for both in and output sockets in the device. Clients must
102 For Multiplexing, ``ROUTER`` is used for both in and output sockets in the device. Clients must
103 specify the destination by the ``zmq.IDENTITY`` of the ``XREP`` socket connected to
103 specify the destination by the ``zmq.IDENTITY`` of the ``ROUTER`` socket connected to
104 the downstream end of the device.
104 the downstream end of the device.
105
105
106 At the Kernel level, both of these ``XREP`` sockets are treated in the same way as the ``REP``
106 At the Kernel level, both of these ``ROUTER`` sockets are treated in the same way as the ``REP``
107 socket in the serial version (except using ZMQStreams instead of explicit sockets).
107 socket in the serial version (except using ZMQStreams instead of explicit sockets).
108
108
109 Execution can be done in a load-balanced (engine-agnostic) or multiplexed (engine-specified)
109 Execution can be done in a load-balanced (engine-agnostic) or multiplexed (engine-specified)
110 manner. The sockets on the Client and Engine are the same for these two actions, but the
110 manner. The sockets on the Client and Engine are the same for these two actions, but the
111 scheduler used determines the actual behavior. This routing is done via the ``zmq.IDENTITY`` of
111 scheduler used determines the actual behavior. This routing is done via the ``zmq.IDENTITY`` of
112 the upstream sockets in each MonitoredQueue.
112 the upstream sockets in each MonitoredQueue.
113
113
114 IOPub
114 IOPub
115 -----
115 -----
116
116
117 .. figure:: figs/iopubfade.png
117 .. figure:: figs/iopubfade.png
118 :width: 432px
118 :width: 432px
119 :alt: IOPub connections
119 :alt: IOPub connections
120 :align: center
120 :align: center
121
121
122 stdout/err are published via a ``PUB/SUB`` MonitoredQueue
122 stdout/err are published via a ``PUB/SUB`` MonitoredQueue
123
123
124
124
125 On the kernels, stdout/stderr are captured and published via a ``PUB`` socket. These ``PUB``
125 On the kernels, stdout/stderr are captured and published via a ``PUB`` socket. These ``PUB``
126 sockets all connect to a ``SUB`` socket input of a MonitoredQueue, which subscribes to all
126 sockets all connect to a ``SUB`` socket input of a MonitoredQueue, which subscribes to all
127 messages. They are then republished via another ``PUB`` socket, which can be
127 messages. They are then republished via another ``PUB`` socket, which can be
128 subscribed by the clients.
128 subscribed by the clients.
129
129
130 Client connections
130 Client connections
131 ------------------
131 ------------------
132
132
133 .. figure:: figs/queryfade.png
133 .. figure:: figs/queryfade.png
134 :width: 432px
134 :width: 432px
135 :alt: IPython client query connections
135 :alt: IPython client query connections
136 :align: center
136 :align: center
137
137
138 Clients connect to an ``XREP`` socket to query the hub.
138 Clients connect to an ``ROUTER`` socket to query the hub.
139
139
140 The hub's registrar ``XREP`` socket also listens for queries from clients as to queue status,
140 The hub's registrar ``ROUTER`` socket also listens for queries from clients as to queue status,
141 and control instructions. Clients connect to this socket via an ``XREQ`` during registration.
141 and control instructions. Clients connect to this socket via an ``DEALER`` during registration.
142
142
143 .. figure:: figs/notiffade.png
143 .. figure:: figs/notiffade.png
144 :width: 432px
144 :width: 432px
145 :alt: IPython Registration connections
145 :alt: IPython Registration connections
146 :align: center
146 :align: center
147
147
148 Engine registration events are published via a ``PUB`` socket.
148 Engine registration events are published via a ``PUB`` socket.
149
149
150 The Hub publishes all registration/unregistration events via a ``PUB`` socket. This
150 The Hub publishes all registration/unregistration events via a ``PUB`` socket. This
151 allows clients to stay up to date with what engines are available by subscribing to the
151 allows clients to stay up to date with what engines are available by subscribing to the
152 feed with a ``SUB`` socket. Other processes could selectively subscribe to just
152 feed with a ``SUB`` socket. Other processes could selectively subscribe to just
153 registration or unregistration events.
153 registration or unregistration events.
154
154
@@ -1,378 +1,378 b''
1 .. _parallel_messages:
1 .. _parallel_messages:
2
2
3 Messaging for Parallel Computing
3 Messaging for Parallel Computing
4 ================================
4 ================================
5
5
6 This is an extension of the :ref:`messaging <messaging>` doc. Diagrams of the connections
6 This is an extension of the :ref:`messaging <messaging>` doc. Diagrams of the connections
7 can be found in the :ref:`parallel connections <parallel_connections>` doc.
7 can be found in the :ref:`parallel connections <parallel_connections>` doc.
8
8
9
9
10 ZMQ messaging is also used in the parallel computing IPython system. All messages to/from
10 ZMQ messaging is also used in the parallel computing IPython system. All messages to/from
11 kernels remain the same as the single kernel model, and are forwarded through a ZMQ Queue
11 kernels remain the same as the single kernel model, and are forwarded through a ZMQ Queue
12 device. The controller receives all messages and replies in these channels, and saves
12 device. The controller receives all messages and replies in these channels, and saves
13 results for future use.
13 results for future use.
14
14
15 The Controller
15 The Controller
16 --------------
16 --------------
17
17
18 The controller is the central collection of processes in the IPython parallel computing
18 The controller is the central collection of processes in the IPython parallel computing
19 model. It has two major components:
19 model. It has two major components:
20
20
21 * The Hub
21 * The Hub
22 * A collection of Schedulers
22 * A collection of Schedulers
23
23
24 The Hub
24 The Hub
25 -------
25 -------
26
26
27 The Hub is the central process for monitoring the state of the engines, and all task
27 The Hub is the central process for monitoring the state of the engines, and all task
28 requests and results. It has no role in execution and does no relay of messages, so
28 requests and results. It has no role in execution and does no relay of messages, so
29 large blocking requests or database actions in the Hub do not have the ability to impede
29 large blocking requests or database actions in the Hub do not have the ability to impede
30 job submission and results.
30 job submission and results.
31
31
32 Registration (``XREP``)
32 Registration (``ROUTER``)
33 ***********************
33 ***********************
34
34
35 The first function of the Hub is to facilitate and monitor connections of clients
35 The first function of the Hub is to facilitate and monitor connections of clients
36 and engines. Both client and engine registration are handled by the same socket, so only
36 and engines. Both client and engine registration are handled by the same socket, so only
37 one ip/port pair is needed to connect any number of connections and clients.
37 one ip/port pair is needed to connect any number of connections and clients.
38
38
39 Engines register with the ``zmq.IDENTITY`` of their two ``XREQ`` sockets, one for the
39 Engines register with the ``zmq.IDENTITY`` of their two ``DEALER`` sockets, one for the
40 queue, which receives execute requests, and one for the heartbeat, which is used to
40 queue, which receives execute requests, and one for the heartbeat, which is used to
41 monitor the survival of the Engine process.
41 monitor the survival of the Engine process.
42
42
43 Message type: ``registration_request``::
43 Message type: ``registration_request``::
44
44
45 content = {
45 content = {
46 'queue' : 'abcd-1234-...', # the MUX queue zmq.IDENTITY
46 'queue' : 'abcd-1234-...', # the MUX queue zmq.IDENTITY
47 'control' : 'abcd-1234-...', # the control queue zmq.IDENTITY
47 'control' : 'abcd-1234-...', # the control queue zmq.IDENTITY
48 'heartbeat' : 'abcd-1234-...' # the heartbeat zmq.IDENTITY
48 'heartbeat' : 'abcd-1234-...' # the heartbeat zmq.IDENTITY
49 }
49 }
50
50
51 .. note::
51 .. note::
52
52
53 these are always the same, at least for now.
53 these are always the same, at least for now.
54
54
55 The Controller replies to an Engine's registration request with the engine's integer ID,
55 The Controller replies to an Engine's registration request with the engine's integer ID,
56 and all the remaining connection information for connecting the heartbeat process, and
56 and all the remaining connection information for connecting the heartbeat process, and
57 kernel queue socket(s). The message status will be an error if the Engine requests IDs that
57 kernel queue socket(s). The message status will be an error if the Engine requests IDs that
58 already in use.
58 already in use.
59
59
60 Message type: ``registration_reply``::
60 Message type: ``registration_reply``::
61
61
62 content = {
62 content = {
63 'status' : 'ok', # or 'error'
63 'status' : 'ok', # or 'error'
64 # if ok:
64 # if ok:
65 'id' : 0, # int, the engine id
65 'id' : 0, # int, the engine id
66 'queue' : 'tcp://127.0.0.1:12345', # connection for engine side of the queue
66 'queue' : 'tcp://127.0.0.1:12345', # connection for engine side of the queue
67 'control' : 'tcp://...', # addr for control queue
67 'control' : 'tcp://...', # addr for control queue
68 'heartbeat' : ('tcp://...','tcp://...'), # tuple containing two interfaces needed for heartbeat
68 'heartbeat' : ('tcp://...','tcp://...'), # tuple containing two interfaces needed for heartbeat
69 'task' : 'tcp://...', # addr for task queue, or None if no task queue running
69 'task' : 'tcp://...', # addr for task queue, or None if no task queue running
70 }
70 }
71
71
72 Clients use the same socket as engines to start their connections. Connection requests
72 Clients use the same socket as engines to start their connections. Connection requests
73 from clients need no information:
73 from clients need no information:
74
74
75 Message type: ``connection_request``::
75 Message type: ``connection_request``::
76
76
77 content = {}
77 content = {}
78
78
79 The reply to a Client registration request contains the connection information for the
79 The reply to a Client registration request contains the connection information for the
80 multiplexer and load balanced queues, as well as the address for direct hub
80 multiplexer and load balanced queues, as well as the address for direct hub
81 queries. If any of these addresses is `None`, that functionality is not available.
81 queries. If any of these addresses is `None`, that functionality is not available.
82
82
83 Message type: ``connection_reply``::
83 Message type: ``connection_reply``::
84
84
85 content = {
85 content = {
86 'status' : 'ok', # or 'error'
86 'status' : 'ok', # or 'error'
87 # if ok:
87 # if ok:
88 'queue' : 'tcp://127.0.0.1:12345', # connection for client side of the MUX queue
88 'queue' : 'tcp://127.0.0.1:12345', # connection for client side of the MUX queue
89 'task' : ('lru','tcp...'), # routing scheme and addr for task queue (len 2 tuple)
89 'task' : ('lru','tcp...'), # routing scheme and addr for task queue (len 2 tuple)
90 'query' : 'tcp...', # addr for methods to query the hub, like queue_request, etc.
90 'query' : 'tcp...', # addr for methods to query the hub, like queue_request, etc.
91 'control' : 'tcp...', # addr for control methods, like abort, etc.
91 'control' : 'tcp...', # addr for control methods, like abort, etc.
92 }
92 }
93
93
94 Heartbeat
94 Heartbeat
95 *********
95 *********
96
96
97 The hub uses a heartbeat system to monitor engines, and track when they become
97 The hub uses a heartbeat system to monitor engines, and track when they become
98 unresponsive. As described in :ref:`messaging <messaging>`, and shown in :ref:`connections
98 unresponsive. As described in :ref:`messaging <messaging>`, and shown in :ref:`connections
99 <parallel_connections>`.
99 <parallel_connections>`.
100
100
101 Notification (``PUB``)
101 Notification (``PUB``)
102 **********************
102 **********************
103
103
104 The hub publishes all engine registration/unregistration events on a ``PUB`` socket.
104 The hub publishes all engine registration/unregistration events on a ``PUB`` socket.
105 This allows clients to have up-to-date engine ID sets without polling. Registration
105 This allows clients to have up-to-date engine ID sets without polling. Registration
106 notifications contain both the integer engine ID and the queue ID, which is necessary for
106 notifications contain both the integer engine ID and the queue ID, which is necessary for
107 sending messages via the Multiplexer Queue and Control Queues.
107 sending messages via the Multiplexer Queue and Control Queues.
108
108
109 Message type: ``registration_notification``::
109 Message type: ``registration_notification``::
110
110
111 content = {
111 content = {
112 'id' : 0, # engine ID that has been registered
112 'id' : 0, # engine ID that has been registered
113 'queue' : 'engine_id' # the IDENT for the engine's queue
113 'queue' : 'engine_id' # the IDENT for the engine's queue
114 }
114 }
115
115
116 Message type : ``unregistration_notification``::
116 Message type : ``unregistration_notification``::
117
117
118 content = {
118 content = {
119 'id' : 0 # engine ID that has been unregistered
119 'id' : 0 # engine ID that has been unregistered
120 }
120 }
121
121
122
122
123 Client Queries (``XREP``)
123 Client Queries (``ROUTER``)
124 *************************
124 *************************
125
125
126 The hub monitors and logs all queue traffic, so that clients can retrieve past
126 The hub monitors and logs all queue traffic, so that clients can retrieve past
127 results or monitor pending tasks. This information may reside in-memory on the Hub, or
127 results or monitor pending tasks. This information may reside in-memory on the Hub, or
128 on disk in a database (SQLite and MongoDB are currently supported). These requests are
128 on disk in a database (SQLite and MongoDB are currently supported). These requests are
129 handled by the same socket as registration.
129 handled by the same socket as registration.
130
130
131
131
132 :func:`queue_request` requests can specify multiple engines to query via the `targets`
132 :func:`queue_request` requests can specify multiple engines to query via the `targets`
133 element. A verbose flag can be passed, to determine whether the result should be the list
133 element. A verbose flag can be passed, to determine whether the result should be the list
134 of `msg_ids` in the queue or simply the length of each list.
134 of `msg_ids` in the queue or simply the length of each list.
135
135
136 Message type: ``queue_request``::
136 Message type: ``queue_request``::
137
137
138 content = {
138 content = {
139 'verbose' : True, # whether return should be lists themselves or just lens
139 'verbose' : True, # whether return should be lists themselves or just lens
140 'targets' : [0,3,1] # list of ints
140 'targets' : [0,3,1] # list of ints
141 }
141 }
142
142
143 The content of a reply to a :func:`queue_request` request is a dict, keyed by the engine
143 The content of a reply to a :func:`queue_request` request is a dict, keyed by the engine
144 IDs. Note that they will be the string representation of the integer keys, since JSON
144 IDs. Note that they will be the string representation of the integer keys, since JSON
145 cannot handle number keys. The three keys of each dict are::
145 cannot handle number keys. The three keys of each dict are::
146
146
147 'completed' : messages submitted via any queue that ran on the engine
147 'completed' : messages submitted via any queue that ran on the engine
148 'queue' : jobs submitted via MUX queue, whose results have not been received
148 'queue' : jobs submitted via MUX queue, whose results have not been received
149 'tasks' : tasks that are known to have been submitted to the engine, but
149 'tasks' : tasks that are known to have been submitted to the engine, but
150 have not completed. Note that with the pure zmq scheduler, this will
150 have not completed. Note that with the pure zmq scheduler, this will
151 always be 0/[].
151 always be 0/[].
152
152
153 Message type: ``queue_reply``::
153 Message type: ``queue_reply``::
154
154
155 content = {
155 content = {
156 'status' : 'ok', # or 'error'
156 'status' : 'ok', # or 'error'
157 # if verbose=False:
157 # if verbose=False:
158 '0' : {'completed' : 1, 'queue' : 7, 'tasks' : 0},
158 '0' : {'completed' : 1, 'queue' : 7, 'tasks' : 0},
159 # if verbose=True:
159 # if verbose=True:
160 '1' : {'completed' : ['abcd-...','1234-...'], 'queue' : ['58008-'], 'tasks' : []},
160 '1' : {'completed' : ['abcd-...','1234-...'], 'queue' : ['58008-'], 'tasks' : []},
161 }
161 }
162
162
163 Clients can request individual results directly from the hub. This is primarily for
163 Clients can request individual results directly from the hub. This is primarily for
164 gathering results of executions not submitted by the requesting client, as the client
164 gathering results of executions not submitted by the requesting client, as the client
165 will have all its own results already. Requests are made by msg_id, and can contain one or
165 will have all its own results already. Requests are made by msg_id, and can contain one or
166 more msg_id. An additional boolean key 'statusonly' can be used to not request the
166 more msg_id. An additional boolean key 'statusonly' can be used to not request the
167 results, but simply poll the status of the jobs.
167 results, but simply poll the status of the jobs.
168
168
169 Message type: ``result_request``::
169 Message type: ``result_request``::
170
170
171 content = {
171 content = {
172 'msg_ids' : ['uuid','...'], # list of strs
172 'msg_ids' : ['uuid','...'], # list of strs
173 'targets' : [1,2,3], # list of int ids or uuids
173 'targets' : [1,2,3], # list of int ids or uuids
174 'statusonly' : False, # bool
174 'statusonly' : False, # bool
175 }
175 }
176
176
177 The :func:`result_request` reply contains the content objects of the actual execution
177 The :func:`result_request` reply contains the content objects of the actual execution
178 reply messages. If `statusonly=True`, then there will be only the 'pending' and
178 reply messages. If `statusonly=True`, then there will be only the 'pending' and
179 'completed' lists.
179 'completed' lists.
180
180
181
181
182 Message type: ``result_reply``::
182 Message type: ``result_reply``::
183
183
184 content = {
184 content = {
185 'status' : 'ok', # else error
185 'status' : 'ok', # else error
186 # if ok:
186 # if ok:
187 'acbd-...' : msg, # the content dict is keyed by msg_ids,
187 'acbd-...' : msg, # the content dict is keyed by msg_ids,
188 # values are the result messages
188 # values are the result messages
189 # there will be none of these if `statusonly=True`
189 # there will be none of these if `statusonly=True`
190 'pending' : ['msg_id','...'], # msg_ids still pending
190 'pending' : ['msg_id','...'], # msg_ids still pending
191 'completed' : ['msg_id','...'], # list of completed msg_ids
191 'completed' : ['msg_id','...'], # list of completed msg_ids
192 }
192 }
193 buffers = ['bufs','...'] # the buffers that contained the results of the objects.
193 buffers = ['bufs','...'] # the buffers that contained the results of the objects.
194 # this will be empty if no messages are complete, or if
194 # this will be empty if no messages are complete, or if
195 # statusonly is True.
195 # statusonly is True.
196
196
197 For memory management purposes, Clients can also instruct the hub to forget the
197 For memory management purposes, Clients can also instruct the hub to forget the
198 results of messages. This can be done by message ID or engine ID. Individual messages are
198 results of messages. This can be done by message ID or engine ID. Individual messages are
199 dropped by msg_id, and all messages completed on an engine are dropped by engine ID. This
199 dropped by msg_id, and all messages completed on an engine are dropped by engine ID. This
200 may no longer be necessary with the mongodb-based message logging backend.
200 may no longer be necessary with the mongodb-based message logging backend.
201
201
202 If the msg_ids element is the string ``'all'`` instead of a list, then all completed
202 If the msg_ids element is the string ``'all'`` instead of a list, then all completed
203 results are forgotten.
203 results are forgotten.
204
204
205 Message type: ``purge_request``::
205 Message type: ``purge_request``::
206
206
207 content = {
207 content = {
208 'msg_ids' : ['id1', 'id2',...], # list of msg_ids or 'all'
208 'msg_ids' : ['id1', 'id2',...], # list of msg_ids or 'all'
209 'engine_ids' : [0,2,4] # list of engine IDs
209 'engine_ids' : [0,2,4] # list of engine IDs
210 }
210 }
211
211
212 The reply to a purge request is simply the status 'ok' if the request succeeded, or an
212 The reply to a purge request is simply the status 'ok' if the request succeeded, or an
213 explanation of why it failed, such as requesting the purge of a nonexistent or pending
213 explanation of why it failed, such as requesting the purge of a nonexistent or pending
214 message.
214 message.
215
215
216 Message type: ``purge_reply``::
216 Message type: ``purge_reply``::
217
217
218 content = {
218 content = {
219 'status' : 'ok', # or 'error'
219 'status' : 'ok', # or 'error'
220 }
220 }
221
221
222
222
223 Schedulers
223 Schedulers
224 ----------
224 ----------
225
225
226 There are three basic schedulers:
226 There are three basic schedulers:
227
227
228 * Task Scheduler
228 * Task Scheduler
229 * MUX Scheduler
229 * MUX Scheduler
230 * Control Scheduler
230 * Control Scheduler
231
231
232 The MUX and Control schedulers are simple MonitoredQueue ØMQ devices, with ``XREP``
232 The MUX and Control schedulers are simple MonitoredQueue ØMQ devices, with ``ROUTER``
233 sockets on either side. This allows the queue to relay individual messages to particular
233 sockets on either side. This allows the queue to relay individual messages to particular
234 targets via ``zmq.IDENTITY`` routing. The Task scheduler may be a MonitoredQueue ØMQ
234 targets via ``zmq.IDENTITY`` routing. The Task scheduler may be a MonitoredQueue ØMQ
235 device, in which case the client-facing socket is ``XREP``, and the engine-facing socket
235 device, in which case the client-facing socket is ``ROUTER``, and the engine-facing socket
236 is ``XREQ``. The result of this is that client-submitted messages are load-balanced via
236 is ``DEALER``. The result of this is that client-submitted messages are load-balanced via
237 the ``XREQ`` socket, but the engine's replies to each message go to the requesting client.
237 the ``DEALER`` socket, but the engine's replies to each message go to the requesting client.
238
238
239 Raw ``XREQ`` scheduling is quite primitive, and doesn't allow message introspection, so
239 Raw ``DEALER`` scheduling is quite primitive, and doesn't allow message introspection, so
240 there are also Python Schedulers that can be used. These Schedulers behave in much the
240 there are also Python Schedulers that can be used. These Schedulers behave in much the
241 same way as a MonitoredQueue does from the outside, but have rich internal logic to
241 same way as a MonitoredQueue does from the outside, but have rich internal logic to
242 determine destinations, as well as handle dependency graphs Their sockets are always
242 determine destinations, as well as handle dependency graphs Their sockets are always
243 ``XREP`` on both sides.
243 ``ROUTER`` on both sides.
244
244
245 The Python task schedulers have an additional message type, which informs the Hub of
245 The Python task schedulers have an additional message type, which informs the Hub of
246 the destination of a task as soon as that destination is known.
246 the destination of a task as soon as that destination is known.
247
247
248 Message type: ``task_destination``::
248 Message type: ``task_destination``::
249
249
250 content = {
250 content = {
251 'msg_id' : 'abcd-1234-...', # the msg's uuid
251 'msg_id' : 'abcd-1234-...', # the msg's uuid
252 'engine_id' : '1234-abcd-...', # the destination engine's zmq.IDENTITY
252 'engine_id' : '1234-abcd-...', # the destination engine's zmq.IDENTITY
253 }
253 }
254
254
255 :func:`apply` and :func:`apply_bound`
255 :func:`apply` and :func:`apply_bound`
256 *************************************
256 *************************************
257
257
258 In terms of message classes, the MUX scheduler and Task scheduler relay the exact same
258 In terms of message classes, the MUX scheduler and Task scheduler relay the exact same
259 message types. Their only difference lies in how the destination is selected.
259 message types. Their only difference lies in how the destination is selected.
260
260
261 The `Namespace <http://gist.github.com/483294>`_ model suggests that execution be able to
261 The `Namespace <http://gist.github.com/483294>`_ model suggests that execution be able to
262 use the model::
262 use the model::
263
263
264 ns.apply(f, *args, **kwargs)
264 ns.apply(f, *args, **kwargs)
265
265
266 which takes `f`, a function in the user's namespace, and executes ``f(*args, **kwargs)``
266 which takes `f`, a function in the user's namespace, and executes ``f(*args, **kwargs)``
267 on a remote engine, returning the result (or, for non-blocking, information facilitating
267 on a remote engine, returning the result (or, for non-blocking, information facilitating
268 later retrieval of the result). This model, unlike the execute message which just uses a
268 later retrieval of the result). This model, unlike the execute message which just uses a
269 code string, must be able to send arbitrary (pickleable) Python objects. And ideally, copy
269 code string, must be able to send arbitrary (pickleable) Python objects. And ideally, copy
270 as little data as we can. The `buffers` property of a Message was introduced for this
270 as little data as we can. The `buffers` property of a Message was introduced for this
271 purpose.
271 purpose.
272
272
273 Utility method :func:`build_apply_message` in :mod:`IPython.zmq.streamsession` wraps a
273 Utility method :func:`build_apply_message` in :mod:`IPython.zmq.streamsession` wraps a
274 function signature and builds a sendable buffer format for minimal data copying (exactly
274 function signature and builds a sendable buffer format for minimal data copying (exactly
275 zero copies of numpy array data or buffers or large strings).
275 zero copies of numpy array data or buffers or large strings).
276
276
277 Message type: ``apply_request``::
277 Message type: ``apply_request``::
278
278
279 content = {
279 content = {
280 'bound' : True, # whether to execute in the engine's namespace or unbound
280 'bound' : True, # whether to execute in the engine's namespace or unbound
281 'after' : ['msg_id',...], # list of msg_ids or output of Dependency.as_dict()
281 'after' : ['msg_id',...], # list of msg_ids or output of Dependency.as_dict()
282 'follow' : ['msg_id',...], # list of msg_ids or output of Dependency.as_dict()
282 'follow' : ['msg_id',...], # list of msg_ids or output of Dependency.as_dict()
283
283
284 }
284 }
285 buffers = ['...'] # at least 3 in length
285 buffers = ['...'] # at least 3 in length
286 # as built by build_apply_message(f,args,kwargs)
286 # as built by build_apply_message(f,args,kwargs)
287
287
288 after/follow represent task dependencies. 'after' corresponds to a time dependency. The
288 after/follow represent task dependencies. 'after' corresponds to a time dependency. The
289 request will not arrive at an engine until the 'after' dependency tasks have completed.
289 request will not arrive at an engine until the 'after' dependency tasks have completed.
290 'follow' corresponds to a location dependency. The task will be submitted to the same
290 'follow' corresponds to a location dependency. The task will be submitted to the same
291 engine as these msg_ids (see :class:`Dependency` docs for details).
291 engine as these msg_ids (see :class:`Dependency` docs for details).
292
292
293 Message type: ``apply_reply``::
293 Message type: ``apply_reply``::
294
294
295 content = {
295 content = {
296 'status' : 'ok' # 'ok' or 'error'
296 'status' : 'ok' # 'ok' or 'error'
297 # other error info here, as in other messages
297 # other error info here, as in other messages
298 }
298 }
299 buffers = ['...'] # either 1 or 2 in length
299 buffers = ['...'] # either 1 or 2 in length
300 # a serialization of the return value of f(*args,**kwargs)
300 # a serialization of the return value of f(*args,**kwargs)
301 # only populated if status is 'ok'
301 # only populated if status is 'ok'
302
302
303 All engine execution and data movement is performed via apply messages.
303 All engine execution and data movement is performed via apply messages.
304
304
305 Control Messages
305 Control Messages
306 ----------------
306 ----------------
307
307
308 Messages that interact with the engines, but are not meant to execute code, are submitted
308 Messages that interact with the engines, but are not meant to execute code, are submitted
309 via the Control queue. These messages have high priority, and are thus received and
309 via the Control queue. These messages have high priority, and are thus received and
310 handled before any execution requests.
310 handled before any execution requests.
311
311
312 Clients may want to clear the namespace on the engine. There are no arguments nor
312 Clients may want to clear the namespace on the engine. There are no arguments nor
313 information involved in this request, so the content is empty.
313 information involved in this request, so the content is empty.
314
314
315 Message type: ``clear_request``::
315 Message type: ``clear_request``::
316
316
317 content = {}
317 content = {}
318
318
319 Message type: ``clear_reply``::
319 Message type: ``clear_reply``::
320
320
321 content = {
321 content = {
322 'status' : 'ok' # 'ok' or 'error'
322 'status' : 'ok' # 'ok' or 'error'
323 # other error info here, as in other messages
323 # other error info here, as in other messages
324 }
324 }
325
325
326 Clients may want to abort tasks that have not yet run. This can by done by message id, or
326 Clients may want to abort tasks that have not yet run. This can by done by message id, or
327 all enqueued messages can be aborted if None is specified.
327 all enqueued messages can be aborted if None is specified.
328
328
329 Message type: ``abort_request``::
329 Message type: ``abort_request``::
330
330
331 content = {
331 content = {
332 'msg_ids' : ['1234-...', '...'] # list of msg_ids or None
332 'msg_ids' : ['1234-...', '...'] # list of msg_ids or None
333 }
333 }
334
334
335 Message type: ``abort_reply``::
335 Message type: ``abort_reply``::
336
336
337 content = {
337 content = {
338 'status' : 'ok' # 'ok' or 'error'
338 'status' : 'ok' # 'ok' or 'error'
339 # other error info here, as in other messages
339 # other error info here, as in other messages
340 }
340 }
341
341
342 The last action a client may want to do is shutdown the kernel. If a kernel receives a
342 The last action a client may want to do is shutdown the kernel. If a kernel receives a
343 shutdown request, then it aborts all queued messages, replies to the request, and exits.
343 shutdown request, then it aborts all queued messages, replies to the request, and exits.
344
344
345 Message type: ``shutdown_request``::
345 Message type: ``shutdown_request``::
346
346
347 content = {}
347 content = {}
348
348
349 Message type: ``shutdown_reply``::
349 Message type: ``shutdown_reply``::
350
350
351 content = {
351 content = {
352 'status' : 'ok' # 'ok' or 'error'
352 'status' : 'ok' # 'ok' or 'error'
353 # other error info here, as in other messages
353 # other error info here, as in other messages
354 }
354 }
355
355
356
356
357 Implementation
357 Implementation
358 --------------
358 --------------
359
359
360 There are a few differences in implementation between the `StreamSession` object used in
360 There are a few differences in implementation between the `StreamSession` object used in
361 the newparallel branch and the `Session` object, the main one being that messages are
361 the newparallel branch and the `Session` object, the main one being that messages are
362 sent in parts, rather than as a single serialized object. `StreamSession` objects also
362 sent in parts, rather than as a single serialized object. `StreamSession` objects also
363 take pack/unpack functions, which are to be used when serializing/deserializing objects.
363 take pack/unpack functions, which are to be used when serializing/deserializing objects.
364 These can be any functions that translate to/from formats that ZMQ sockets can send
364 These can be any functions that translate to/from formats that ZMQ sockets can send
365 (buffers,bytes, etc.).
365 (buffers,bytes, etc.).
366
366
367 Split Sends
367 Split Sends
368 ***********
368 ***********
369
369
370 Previously, messages were bundled as a single json object and one call to
370 Previously, messages were bundled as a single json object and one call to
371 :func:`socket.send_json`. Since the hub inspects all messages, and doesn't need to
371 :func:`socket.send_json`. Since the hub inspects all messages, and doesn't need to
372 see the content of the messages, which can be large, messages are now serialized and sent in
372 see the content of the messages, which can be large, messages are now serialized and sent in
373 pieces. All messages are sent in at least 3 parts: the header, the parent header, and the
373 pieces. All messages are sent in at least 3 parts: the header, the parent header, and the
374 content. This allows the controller to unpack and inspect the (always small) header,
374 content. This allows the controller to unpack and inspect the (always small) header,
375 without spending time unpacking the content unless the message is bound for the
375 without spending time unpacking the content unless the message is bound for the
376 controller. Buffers are added on to the end of the message, and can be any objects that
376 controller. Buffers are added on to the end of the message, and can be any objects that
377 present the buffer interface.
377 present the buffer interface.
378
378
@@ -1,442 +1,442 b''
1 .. _parallel_task:
1 .. _parallel_task:
2
2
3 ==========================
3 ==========================
4 The IPython task interface
4 The IPython task interface
5 ==========================
5 ==========================
6
6
7 The task interface to the cluster presents the engines as a fault tolerant,
7 The task interface to the cluster presents the engines as a fault tolerant,
8 dynamic load-balanced system of workers. Unlike the multiengine interface, in
8 dynamic load-balanced system of workers. Unlike the multiengine interface, in
9 the task interface the user have no direct access to individual engines. By
9 the task interface the user have no direct access to individual engines. By
10 allowing the IPython scheduler to assign work, this interface is simultaneously
10 allowing the IPython scheduler to assign work, this interface is simultaneously
11 simpler and more powerful.
11 simpler and more powerful.
12
12
13 Best of all, the user can use both of these interfaces running at the same time
13 Best of all, the user can use both of these interfaces running at the same time
14 to take advantage of their respective strengths. When the user can break up
14 to take advantage of their respective strengths. When the user can break up
15 the user's work into segments that do not depend on previous execution, the
15 the user's work into segments that do not depend on previous execution, the
16 task interface is ideal. But it also has more power and flexibility, allowing
16 task interface is ideal. But it also has more power and flexibility, allowing
17 the user to guide the distribution of jobs, without having to assign tasks to
17 the user to guide the distribution of jobs, without having to assign tasks to
18 engines explicitly.
18 engines explicitly.
19
19
20 Starting the IPython controller and engines
20 Starting the IPython controller and engines
21 ===========================================
21 ===========================================
22
22
23 To follow along with this tutorial, you will need to start the IPython
23 To follow along with this tutorial, you will need to start the IPython
24 controller and four IPython engines. The simplest way of doing this is to use
24 controller and four IPython engines. The simplest way of doing this is to use
25 the :command:`ipcluster` command::
25 the :command:`ipcluster` command::
26
26
27 $ ipcluster start -n 4
27 $ ipcluster start -n 4
28
28
29 For more detailed information about starting the controller and engines, see
29 For more detailed information about starting the controller and engines, see
30 our :ref:`introduction <parallel_overview>` to using IPython for parallel computing.
30 our :ref:`introduction <parallel_overview>` to using IPython for parallel computing.
31
31
32 Creating a ``Client`` instance
32 Creating a ``Client`` instance
33 ==============================
33 ==============================
34
34
35 The first step is to import the IPython :mod:`IPython.parallel`
35 The first step is to import the IPython :mod:`IPython.parallel`
36 module and then create a :class:`.Client` instance, and we will also be using
36 module and then create a :class:`.Client` instance, and we will also be using
37 a :class:`LoadBalancedView`, here called `lview`:
37 a :class:`LoadBalancedView`, here called `lview`:
38
38
39 .. sourcecode:: ipython
39 .. sourcecode:: ipython
40
40
41 In [1]: from IPython.parallel import Client
41 In [1]: from IPython.parallel import Client
42
42
43 In [2]: rc = Client()
43 In [2]: rc = Client()
44
44
45
45
46 This form assumes that the controller was started on localhost with default
46 This form assumes that the controller was started on localhost with default
47 configuration. If not, the location of the controller must be given as an
47 configuration. If not, the location of the controller must be given as an
48 argument to the constructor:
48 argument to the constructor:
49
49
50 .. sourcecode:: ipython
50 .. sourcecode:: ipython
51
51
52 # for a visible LAN controller listening on an external port:
52 # for a visible LAN controller listening on an external port:
53 In [2]: rc = Client('tcp://192.168.1.16:10101')
53 In [2]: rc = Client('tcp://192.168.1.16:10101')
54 # or to connect with a specific profile you have set up:
54 # or to connect with a specific profile you have set up:
55 In [3]: rc = Client(profile='mpi')
55 In [3]: rc = Client(profile='mpi')
56
56
57 For load-balanced execution, we will make use of a :class:`LoadBalancedView` object, which can
57 For load-balanced execution, we will make use of a :class:`LoadBalancedView` object, which can
58 be constructed via the client's :meth:`load_balanced_view` method:
58 be constructed via the client's :meth:`load_balanced_view` method:
59
59
60 .. sourcecode:: ipython
60 .. sourcecode:: ipython
61
61
62 In [4]: lview = rc.load_balanced_view() # default load-balanced view
62 In [4]: lview = rc.load_balanced_view() # default load-balanced view
63
63
64 .. seealso::
64 .. seealso::
65
65
66 For more information, see the in-depth explanation of :ref:`Views <parallel_details>`.
66 For more information, see the in-depth explanation of :ref:`Views <parallel_details>`.
67
67
68
68
69 Quick and easy parallelism
69 Quick and easy parallelism
70 ==========================
70 ==========================
71
71
72 In many cases, you simply want to apply a Python function to a sequence of
72 In many cases, you simply want to apply a Python function to a sequence of
73 objects, but *in parallel*. Like the multiengine interface, these can be
73 objects, but *in parallel*. Like the multiengine interface, these can be
74 implemented via the task interface. The exact same tools can perform these
74 implemented via the task interface. The exact same tools can perform these
75 actions in load-balanced ways as well as multiplexed ways: a parallel version
75 actions in load-balanced ways as well as multiplexed ways: a parallel version
76 of :func:`map` and :func:`@parallel` function decorator. If one specifies the
76 of :func:`map` and :func:`@parallel` function decorator. If one specifies the
77 argument `balanced=True`, then they are dynamically load balanced. Thus, if the
77 argument `balanced=True`, then they are dynamically load balanced. Thus, if the
78 execution time per item varies significantly, you should use the versions in
78 execution time per item varies significantly, you should use the versions in
79 the task interface.
79 the task interface.
80
80
81 Parallel map
81 Parallel map
82 ------------
82 ------------
83
83
84 To load-balance :meth:`map`,simply use a LoadBalancedView:
84 To load-balance :meth:`map`,simply use a LoadBalancedView:
85
85
86 .. sourcecode:: ipython
86 .. sourcecode:: ipython
87
87
88 In [62]: lview.block = True
88 In [62]: lview.block = True
89
89
90 In [63]: serial_result = map(lambda x:x**10, range(32))
90 In [63]: serial_result = map(lambda x:x**10, range(32))
91
91
92 In [64]: parallel_result = lview.map(lambda x:x**10, range(32))
92 In [64]: parallel_result = lview.map(lambda x:x**10, range(32))
93
93
94 In [65]: serial_result==parallel_result
94 In [65]: serial_result==parallel_result
95 Out[65]: True
95 Out[65]: True
96
96
97 Parallel function decorator
97 Parallel function decorator
98 ---------------------------
98 ---------------------------
99
99
100 Parallel functions are just like normal function, but they can be called on
100 Parallel functions are just like normal function, but they can be called on
101 sequences and *in parallel*. The multiengine interface provides a decorator
101 sequences and *in parallel*. The multiengine interface provides a decorator
102 that turns any Python function into a parallel function:
102 that turns any Python function into a parallel function:
103
103
104 .. sourcecode:: ipython
104 .. sourcecode:: ipython
105
105
106 In [10]: @lview.parallel()
106 In [10]: @lview.parallel()
107 ....: def f(x):
107 ....: def f(x):
108 ....: return 10.0*x**4
108 ....: return 10.0*x**4
109 ....:
109 ....:
110
110
111 In [11]: f.map(range(32)) # this is done in parallel
111 In [11]: f.map(range(32)) # this is done in parallel
112 Out[11]: [0.0,10.0,160.0,...]
112 Out[11]: [0.0,10.0,160.0,...]
113
113
114 .. _parallel_dependencies:
114 .. _parallel_dependencies:
115
115
116 Dependencies
116 Dependencies
117 ============
117 ============
118
118
119 Often, pure atomic load-balancing is too primitive for your work. In these cases, you
119 Often, pure atomic load-balancing is too primitive for your work. In these cases, you
120 may want to associate some kind of `Dependency` that describes when, where, or whether
120 may want to associate some kind of `Dependency` that describes when, where, or whether
121 a task can be run. In IPython, we provide two types of dependencies:
121 a task can be run. In IPython, we provide two types of dependencies:
122 `Functional Dependencies`_ and `Graph Dependencies`_
122 `Functional Dependencies`_ and `Graph Dependencies`_
123
123
124 .. note::
124 .. note::
125
125
126 It is important to note that the pure ZeroMQ scheduler does not support dependencies,
126 It is important to note that the pure ZeroMQ scheduler does not support dependencies,
127 and you will see errors or warnings if you try to use dependencies with the pure
127 and you will see errors or warnings if you try to use dependencies with the pure
128 scheduler.
128 scheduler.
129
129
130 Functional Dependencies
130 Functional Dependencies
131 -----------------------
131 -----------------------
132
132
133 Functional dependencies are used to determine whether a given engine is capable of running
133 Functional dependencies are used to determine whether a given engine is capable of running
134 a particular task. This is implemented via a special :class:`Exception` class,
134 a particular task. This is implemented via a special :class:`Exception` class,
135 :class:`UnmetDependency`, found in `IPython.parallel.error`. Its use is very simple:
135 :class:`UnmetDependency`, found in `IPython.parallel.error`. Its use is very simple:
136 if a task fails with an UnmetDependency exception, then the scheduler, instead of relaying
136 if a task fails with an UnmetDependency exception, then the scheduler, instead of relaying
137 the error up to the client like any other error, catches the error, and submits the task
137 the error up to the client like any other error, catches the error, and submits the task
138 to a different engine. This will repeat indefinitely, and a task will never be submitted
138 to a different engine. This will repeat indefinitely, and a task will never be submitted
139 to a given engine a second time.
139 to a given engine a second time.
140
140
141 You can manually raise the :class:`UnmetDependency` yourself, but IPython has provided
141 You can manually raise the :class:`UnmetDependency` yourself, but IPython has provided
142 some decorators for facilitating this behavior.
142 some decorators for facilitating this behavior.
143
143
144 There are two decorators and a class used for functional dependencies:
144 There are two decorators and a class used for functional dependencies:
145
145
146 .. sourcecode:: ipython
146 .. sourcecode:: ipython
147
147
148 In [9]: from IPython.parallel import depend, require, dependent
148 In [9]: from IPython.parallel import depend, require, dependent
149
149
150 @require
150 @require
151 ********
151 ********
152
152
153 The simplest sort of dependency is requiring that a Python module is available. The
153 The simplest sort of dependency is requiring that a Python module is available. The
154 ``@require`` decorator lets you define a function that will only run on engines where names
154 ``@require`` decorator lets you define a function that will only run on engines where names
155 you specify are importable:
155 you specify are importable:
156
156
157 .. sourcecode:: ipython
157 .. sourcecode:: ipython
158
158
159 In [10]: @require('numpy', 'zmq')
159 In [10]: @require('numpy', 'zmq')
160 ...: def myfunc():
160 ...: def myfunc():
161 ...: return dostuff()
161 ...: return dostuff()
162
162
163 Now, any time you apply :func:`myfunc`, the task will only run on a machine that has
163 Now, any time you apply :func:`myfunc`, the task will only run on a machine that has
164 numpy and pyzmq available, and when :func:`myfunc` is called, numpy and zmq will be imported.
164 numpy and pyzmq available, and when :func:`myfunc` is called, numpy and zmq will be imported.
165
165
166 @depend
166 @depend
167 *******
167 *******
168
168
169 The ``@depend`` decorator lets you decorate any function with any *other* function to
169 The ``@depend`` decorator lets you decorate any function with any *other* function to
170 evaluate the dependency. The dependency function will be called at the start of the task,
170 evaluate the dependency. The dependency function will be called at the start of the task,
171 and if it returns ``False``, then the dependency will be considered unmet, and the task
171 and if it returns ``False``, then the dependency will be considered unmet, and the task
172 will be assigned to another engine. If the dependency returns *anything other than
172 will be assigned to another engine. If the dependency returns *anything other than
173 ``False``*, the rest of the task will continue.
173 ``False``*, the rest of the task will continue.
174
174
175 .. sourcecode:: ipython
175 .. sourcecode:: ipython
176
176
177 In [10]: def platform_specific(plat):
177 In [10]: def platform_specific(plat):
178 ...: import sys
178 ...: import sys
179 ...: return sys.platform == plat
179 ...: return sys.platform == plat
180
180
181 In [11]: @depend(platform_specific, 'darwin')
181 In [11]: @depend(platform_specific, 'darwin')
182 ...: def mactask():
182 ...: def mactask():
183 ...: do_mac_stuff()
183 ...: do_mac_stuff()
184
184
185 In [12]: @depend(platform_specific, 'nt')
185 In [12]: @depend(platform_specific, 'nt')
186 ...: def wintask():
186 ...: def wintask():
187 ...: do_windows_stuff()
187 ...: do_windows_stuff()
188
188
189 In this case, any time you apply ``mytask``, it will only run on an OSX machine.
189 In this case, any time you apply ``mytask``, it will only run on an OSX machine.
190 ``@depend`` is just like ``apply``, in that it has a ``@depend(f,*args,**kwargs)``
190 ``@depend`` is just like ``apply``, in that it has a ``@depend(f,*args,**kwargs)``
191 signature.
191 signature.
192
192
193 dependents
193 dependents
194 **********
194 **********
195
195
196 You don't have to use the decorators on your tasks, if for instance you may want
196 You don't have to use the decorators on your tasks, if for instance you may want
197 to run tasks with a single function but varying dependencies, you can directly construct
197 to run tasks with a single function but varying dependencies, you can directly construct
198 the :class:`dependent` object that the decorators use:
198 the :class:`dependent` object that the decorators use:
199
199
200 .. sourcecode::ipython
200 .. sourcecode::ipython
201
201
202 In [13]: def mytask(*args):
202 In [13]: def mytask(*args):
203 ...: dostuff()
203 ...: dostuff()
204
204
205 In [14]: mactask = dependent(mytask, platform_specific, 'darwin')
205 In [14]: mactask = dependent(mytask, platform_specific, 'darwin')
206 # this is the same as decorating the declaration of mytask with @depend
206 # this is the same as decorating the declaration of mytask with @depend
207 # but you can do it again:
207 # but you can do it again:
208
208
209 In [15]: wintask = dependent(mytask, platform_specific, 'nt')
209 In [15]: wintask = dependent(mytask, platform_specific, 'nt')
210
210
211 # in general:
211 # in general:
212 In [16]: t = dependent(f, g, *dargs, **dkwargs)
212 In [16]: t = dependent(f, g, *dargs, **dkwargs)
213
213
214 # is equivalent to:
214 # is equivalent to:
215 In [17]: @depend(g, *dargs, **dkwargs)
215 In [17]: @depend(g, *dargs, **dkwargs)
216 ...: def t(a,b,c):
216 ...: def t(a,b,c):
217 ...: # contents of f
217 ...: # contents of f
218
218
219 Graph Dependencies
219 Graph Dependencies
220 ------------------
220 ------------------
221
221
222 Sometimes you want to restrict the time and/or location to run a given task as a function
222 Sometimes you want to restrict the time and/or location to run a given task as a function
223 of the time and/or location of other tasks. This is implemented via a subclass of
223 of the time and/or location of other tasks. This is implemented via a subclass of
224 :class:`set`, called a :class:`Dependency`. A Dependency is just a set of `msg_ids`
224 :class:`set`, called a :class:`Dependency`. A Dependency is just a set of `msg_ids`
225 corresponding to tasks, and a few attributes to guide how to decide when the Dependency
225 corresponding to tasks, and a few attributes to guide how to decide when the Dependency
226 has been met.
226 has been met.
227
227
228 The switches we provide for interpreting whether a given dependency set has been met:
228 The switches we provide for interpreting whether a given dependency set has been met:
229
229
230 any|all
230 any|all
231 Whether the dependency is considered met if *any* of the dependencies are done, or
231 Whether the dependency is considered met if *any* of the dependencies are done, or
232 only after *all* of them have finished. This is set by a Dependency's :attr:`all`
232 only after *all* of them have finished. This is set by a Dependency's :attr:`all`
233 boolean attribute, which defaults to ``True``.
233 boolean attribute, which defaults to ``True``.
234
234
235 success [default: True]
235 success [default: True]
236 Whether to consider tasks that succeeded as fulfilling dependencies.
236 Whether to consider tasks that succeeded as fulfilling dependencies.
237
237
238 failure [default : False]
238 failure [default : False]
239 Whether to consider tasks that failed as fulfilling dependencies.
239 Whether to consider tasks that failed as fulfilling dependencies.
240 using `failure=True,success=False` is useful for setting up cleanup tasks, to be run
240 using `failure=True,success=False` is useful for setting up cleanup tasks, to be run
241 only when tasks have failed.
241 only when tasks have failed.
242
242
243 Sometimes you want to run a task after another, but only if that task succeeded. In this case,
243 Sometimes you want to run a task after another, but only if that task succeeded. In this case,
244 ``success`` should be ``True`` and ``failure`` should be ``False``. However sometimes you may
244 ``success`` should be ``True`` and ``failure`` should be ``False``. However sometimes you may
245 not care whether the task succeeds, and always want the second task to run, in which case you
245 not care whether the task succeeds, and always want the second task to run, in which case you
246 should use `success=failure=True`. The default behavior is to only use successes.
246 should use `success=failure=True`. The default behavior is to only use successes.
247
247
248 There are other switches for interpretation that are made at the *task* level. These are
248 There are other switches for interpretation that are made at the *task* level. These are
249 specified via keyword arguments to the client's :meth:`apply` method.
249 specified via keyword arguments to the client's :meth:`apply` method.
250
250
251 after,follow
251 after,follow
252 You may want to run a task *after* a given set of dependencies have been run and/or
252 You may want to run a task *after* a given set of dependencies have been run and/or
253 run it *where* another set of dependencies are met. To support this, every task has an
253 run it *where* another set of dependencies are met. To support this, every task has an
254 `after` dependency to restrict time, and a `follow` dependency to restrict
254 `after` dependency to restrict time, and a `follow` dependency to restrict
255 destination.
255 destination.
256
256
257 timeout
257 timeout
258 You may also want to set a time-limit for how long the scheduler should wait before a
258 You may also want to set a time-limit for how long the scheduler should wait before a
259 task's dependencies are met. This is done via a `timeout`, which defaults to 0, which
259 task's dependencies are met. This is done via a `timeout`, which defaults to 0, which
260 indicates that the task should never timeout. If the timeout is reached, and the
260 indicates that the task should never timeout. If the timeout is reached, and the
261 scheduler still hasn't been able to assign the task to an engine, the task will fail
261 scheduler still hasn't been able to assign the task to an engine, the task will fail
262 with a :class:`DependencyTimeout`.
262 with a :class:`DependencyTimeout`.
263
263
264 .. note::
264 .. note::
265
265
266 Dependencies only work within the task scheduler. You cannot instruct a load-balanced
266 Dependencies only work within the task scheduler. You cannot instruct a load-balanced
267 task to run after a job submitted via the MUX interface.
267 task to run after a job submitted via the MUX interface.
268
268
269 The simplest form of Dependencies is with `all=True,success=True,failure=False`. In these cases,
269 The simplest form of Dependencies is with `all=True,success=True,failure=False`. In these cases,
270 you can skip using Dependency objects, and just pass msg_ids or AsyncResult objects as the
270 you can skip using Dependency objects, and just pass msg_ids or AsyncResult objects as the
271 `follow` and `after` keywords to :meth:`client.apply`:
271 `follow` and `after` keywords to :meth:`client.apply`:
272
272
273 .. sourcecode:: ipython
273 .. sourcecode:: ipython
274
274
275 In [14]: client.block=False
275 In [14]: client.block=False
276
276
277 In [15]: ar = lview.apply(f, args, kwargs)
277 In [15]: ar = lview.apply(f, args, kwargs)
278
278
279 In [16]: ar2 = lview.apply(f2)
279 In [16]: ar2 = lview.apply(f2)
280
280
281 In [17]: ar3 = lview.apply_with_flags(f3, after=[ar,ar2])
281 In [17]: ar3 = lview.apply_with_flags(f3, after=[ar,ar2])
282
282
283 In [17]: ar4 = lview.apply_with_flags(f3, follow=[ar], timeout=2.5)
283 In [17]: ar4 = lview.apply_with_flags(f3, follow=[ar], timeout=2.5)
284
284
285
285
286 .. seealso::
286 .. seealso::
287
287
288 Some parallel workloads can be described as a `Directed Acyclic Graph
288 Some parallel workloads can be described as a `Directed Acyclic Graph
289 <http://en.wikipedia.org/wiki/Directed_acyclic_graph>`_, or DAG. See :ref:`DAG
289 <http://en.wikipedia.org/wiki/Directed_acyclic_graph>`_, or DAG. See :ref:`DAG
290 Dependencies <dag_dependencies>` for an example demonstrating how to use map a NetworkX DAG
290 Dependencies <dag_dependencies>` for an example demonstrating how to use map a NetworkX DAG
291 onto task dependencies.
291 onto task dependencies.
292
292
293
293
294
294
295
295
296 Impossible Dependencies
296 Impossible Dependencies
297 ***********************
297 ***********************
298
298
299 The schedulers do perform some analysis on graph dependencies to determine whether they
299 The schedulers do perform some analysis on graph dependencies to determine whether they
300 are not possible to be met. If the scheduler does discover that a dependency cannot be
300 are not possible to be met. If the scheduler does discover that a dependency cannot be
301 met, then the task will fail with an :class:`ImpossibleDependency` error. This way, if the
301 met, then the task will fail with an :class:`ImpossibleDependency` error. This way, if the
302 scheduler realized that a task can never be run, it won't sit indefinitely in the
302 scheduler realized that a task can never be run, it won't sit indefinitely in the
303 scheduler clogging the pipeline.
303 scheduler clogging the pipeline.
304
304
305 The basic cases that are checked:
305 The basic cases that are checked:
306
306
307 * depending on nonexistent messages
307 * depending on nonexistent messages
308 * `follow` dependencies were run on more than one machine and `all=True`
308 * `follow` dependencies were run on more than one machine and `all=True`
309 * any dependencies failed and `all=True,success=True,failures=False`
309 * any dependencies failed and `all=True,success=True,failures=False`
310 * all dependencies failed and `all=False,success=True,failure=False`
310 * all dependencies failed and `all=False,success=True,failure=False`
311
311
312 .. warning::
312 .. warning::
313
313
314 This analysis has not been proven to be rigorous, so it is likely possible for tasks
314 This analysis has not been proven to be rigorous, so it is likely possible for tasks
315 to become impossible to run in obscure situations, so a timeout may be a good choice.
315 to become impossible to run in obscure situations, so a timeout may be a good choice.
316
316
317
317
318 Retries and Resubmit
318 Retries and Resubmit
319 ====================
319 ====================
320
320
321 Retries
321 Retries
322 -------
322 -------
323
323
324 Another flag for tasks is `retries`. This is an integer, specifying how many times
324 Another flag for tasks is `retries`. This is an integer, specifying how many times
325 a task should be resubmitted after failure. This is useful for tasks that should still run
325 a task should be resubmitted after failure. This is useful for tasks that should still run
326 if their engine was shutdown, or may have some statistical chance of failing. The default
326 if their engine was shutdown, or may have some statistical chance of failing. The default
327 is to not retry tasks.
327 is to not retry tasks.
328
328
329 Resubmit
329 Resubmit
330 --------
330 --------
331
331
332 Sometimes you may want to re-run a task. This could be because it failed for some reason, and
332 Sometimes you may want to re-run a task. This could be because it failed for some reason, and
333 you have fixed the error, or because you want to restore the cluster to an interrupted state.
333 you have fixed the error, or because you want to restore the cluster to an interrupted state.
334 For this, the :class:`Client` has a :meth:`rc.resubmit` method. This simply takes one or more
334 For this, the :class:`Client` has a :meth:`rc.resubmit` method. This simply takes one or more
335 msg_ids, and returns an :class:`AsyncHubResult` for the result(s). You cannot resubmit
335 msg_ids, and returns an :class:`AsyncHubResult` for the result(s). You cannot resubmit
336 a task that is pending - only those that have finished, either successful or unsuccessful.
336 a task that is pending - only those that have finished, either successful or unsuccessful.
337
337
338 .. _parallel_schedulers:
338 .. _parallel_schedulers:
339
339
340 Schedulers
340 Schedulers
341 ==========
341 ==========
342
342
343 There are a variety of valid ways to determine where jobs should be assigned in a
343 There are a variety of valid ways to determine where jobs should be assigned in a
344 load-balancing situation. In IPython, we support several standard schemes, and
344 load-balancing situation. In IPython, we support several standard schemes, and
345 even make it easy to define your own. The scheme can be selected via the ``scheme``
345 even make it easy to define your own. The scheme can be selected via the ``scheme``
346 argument to :command:`ipcontroller`, or in the :attr:`TaskScheduler.schemename` attribute
346 argument to :command:`ipcontroller`, or in the :attr:`TaskScheduler.schemename` attribute
347 of a controller config object.
347 of a controller config object.
348
348
349 The built-in routing schemes:
349 The built-in routing schemes:
350
350
351 To select one of these schemes, simply do::
351 To select one of these schemes, simply do::
352
352
353 $ ipcontroller --scheme=<schemename>
353 $ ipcontroller --scheme=<schemename>
354 for instance:
354 for instance:
355 $ ipcontroller --scheme=lru
355 $ ipcontroller --scheme=lru
356
356
357 lru: Least Recently Used
357 lru: Least Recently Used
358
358
359 Always assign work to the least-recently-used engine. A close relative of
359 Always assign work to the least-recently-used engine. A close relative of
360 round-robin, it will be fair with respect to the number of tasks, agnostic
360 round-robin, it will be fair with respect to the number of tasks, agnostic
361 with respect to runtime of each task.
361 with respect to runtime of each task.
362
362
363 plainrandom: Plain Random
363 plainrandom: Plain Random
364
364
365 Randomly picks an engine on which to run.
365 Randomly picks an engine on which to run.
366
366
367 twobin: Two-Bin Random
367 twobin: Two-Bin Random
368
368
369 **Requires numpy**
369 **Requires numpy**
370
370
371 Pick two engines at random, and use the LRU of the two. This is known to be better
371 Pick two engines at random, and use the LRU of the two. This is known to be better
372 than plain random in many cases, but requires a small amount of computation.
372 than plain random in many cases, but requires a small amount of computation.
373
373
374 leastload: Least Load
374 leastload: Least Load
375
375
376 **This is the default scheme**
376 **This is the default scheme**
377
377
378 Always assign tasks to the engine with the fewest outstanding tasks (LRU breaks tie).
378 Always assign tasks to the engine with the fewest outstanding tasks (LRU breaks tie).
379
379
380 weighted: Weighted Two-Bin Random
380 weighted: Weighted Two-Bin Random
381
381
382 **Requires numpy**
382 **Requires numpy**
383
383
384 Pick two engines at random using the number of outstanding tasks as inverse weights,
384 Pick two engines at random using the number of outstanding tasks as inverse weights,
385 and use the one with the lower load.
385 and use the one with the lower load.
386
386
387
387
388 Pure ZMQ Scheduler
388 Pure ZMQ Scheduler
389 ------------------
389 ------------------
390
390
391 For maximum throughput, the 'pure' scheme is not Python at all, but a C-level
391 For maximum throughput, the 'pure' scheme is not Python at all, but a C-level
392 :class:`MonitoredQueue` from PyZMQ, which uses a ZeroMQ ``XREQ`` socket to perform all
392 :class:`MonitoredQueue` from PyZMQ, which uses a ZeroMQ ``DEALER`` socket to perform all
393 load-balancing. This scheduler does not support any of the advanced features of the Python
393 load-balancing. This scheduler does not support any of the advanced features of the Python
394 :class:`.Scheduler`.
394 :class:`.Scheduler`.
395
395
396 Disabled features when using the ZMQ Scheduler:
396 Disabled features when using the ZMQ Scheduler:
397
397
398 * Engine unregistration
398 * Engine unregistration
399 Task farming will be disabled if an engine unregisters.
399 Task farming will be disabled if an engine unregisters.
400 Further, if an engine is unregistered during computation, the scheduler may not recover.
400 Further, if an engine is unregistered during computation, the scheduler may not recover.
401 * Dependencies
401 * Dependencies
402 Since there is no Python logic inside the Scheduler, routing decisions cannot be made
402 Since there is no Python logic inside the Scheduler, routing decisions cannot be made
403 based on message content.
403 based on message content.
404 * Early destination notification
404 * Early destination notification
405 The Python schedulers know which engine gets which task, and notify the Hub. This
405 The Python schedulers know which engine gets which task, and notify the Hub. This
406 allows graceful handling of Engines coming and going. There is no way to know
406 allows graceful handling of Engines coming and going. There is no way to know
407 where ZeroMQ messages have gone, so there is no way to know what tasks are on which
407 where ZeroMQ messages have gone, so there is no way to know what tasks are on which
408 engine until they *finish*. This makes recovery from engine shutdown very difficult.
408 engine until they *finish*. This makes recovery from engine shutdown very difficult.
409
409
410
410
411 .. note::
411 .. note::
412
412
413 TODO: performance comparisons
413 TODO: performance comparisons
414
414
415
415
416
416
417
417
418 More details
418 More details
419 ============
419 ============
420
420
421 The :class:`LoadBalancedView` has many more powerful features that allow quite a bit
421 The :class:`LoadBalancedView` has many more powerful features that allow quite a bit
422 of flexibility in how tasks are defined and run. The next places to look are
422 of flexibility in how tasks are defined and run. The next places to look are
423 in the following classes:
423 in the following classes:
424
424
425 * :class:`~IPython.parallel.client.view.LoadBalancedView`
425 * :class:`~IPython.parallel.client.view.LoadBalancedView`
426 * :class:`~IPython.parallel.client.asyncresult.AsyncResult`
426 * :class:`~IPython.parallel.client.asyncresult.AsyncResult`
427 * :meth:`~IPython.parallel.client.view.LoadBalancedView.apply`
427 * :meth:`~IPython.parallel.client.view.LoadBalancedView.apply`
428 * :mod:`~IPython.parallel.controller.dependency`
428 * :mod:`~IPython.parallel.controller.dependency`
429
429
430 The following is an overview of how to use these classes together:
430 The following is an overview of how to use these classes together:
431
431
432 1. Create a :class:`Client` and :class:`LoadBalancedView`
432 1. Create a :class:`Client` and :class:`LoadBalancedView`
433 2. Define some functions to be run as tasks
433 2. Define some functions to be run as tasks
434 3. Submit your tasks to using the :meth:`apply` method of your
434 3. Submit your tasks to using the :meth:`apply` method of your
435 :class:`LoadBalancedView` instance.
435 :class:`LoadBalancedView` instance.
436 4. Use :meth:`Client.get_result` to get the results of the
436 4. Use :meth:`Client.get_result` to get the results of the
437 tasks, or use the :meth:`AsyncResult.get` method of the results to wait
437 tasks, or use the :meth:`AsyncResult.get` method of the results to wait
438 for and then receive the results.
438 for and then receive the results.
439
439
440 .. seealso::
440 .. seealso::
441
441
442 A demo of :ref:`DAG Dependencies <dag_dependencies>` with NetworkX and IPython.
442 A demo of :ref:`DAG Dependencies <dag_dependencies>` with NetworkX and IPython.
General Comments 0
You need to be logged in to leave comments. Login now