##// END OF EJS Templates
add NoDB for non-recording Hub...
MinRK -
Show More
@@ -1,452 +1,459 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 # encoding: utf-8
2 # encoding: utf-8
3 """
3 """
4 The IPython controller application.
4 The IPython controller application.
5
5
6 Authors:
6 Authors:
7
7
8 * Brian Granger
8 * Brian Granger
9 * MinRK
9 * MinRK
10
10
11 """
11 """
12
12
13 #-----------------------------------------------------------------------------
13 #-----------------------------------------------------------------------------
14 # Copyright (C) 2008-2011 The IPython Development Team
14 # Copyright (C) 2008-2011 The IPython Development Team
15 #
15 #
16 # Distributed under the terms of the BSD License. The full license is in
16 # Distributed under the terms of the BSD License. The full license is in
17 # the file COPYING, distributed as part of this software.
17 # the file COPYING, distributed as part of this software.
18 #-----------------------------------------------------------------------------
18 #-----------------------------------------------------------------------------
19
19
20 #-----------------------------------------------------------------------------
20 #-----------------------------------------------------------------------------
21 # Imports
21 # Imports
22 #-----------------------------------------------------------------------------
22 #-----------------------------------------------------------------------------
23
23
24 from __future__ import with_statement
24 from __future__ import with_statement
25
25
26 import json
26 import json
27 import os
27 import os
28 import socket
28 import socket
29 import stat
29 import stat
30 import sys
30 import sys
31
31
32 from multiprocessing import Process
32 from multiprocessing import Process
33
33
34 import zmq
34 import zmq
35 from zmq.devices import ProcessMonitoredQueue
35 from zmq.devices import ProcessMonitoredQueue
36 from zmq.log.handlers import PUBHandler
36 from zmq.log.handlers import PUBHandler
37
37
38 from IPython.core.profiledir import ProfileDir
38 from IPython.core.profiledir import ProfileDir
39
39
40 from IPython.parallel.apps.baseapp import (
40 from IPython.parallel.apps.baseapp import (
41 BaseParallelApplication,
41 BaseParallelApplication,
42 base_aliases,
42 base_aliases,
43 base_flags,
43 base_flags,
44 catch_config_error,
44 catch_config_error,
45 )
45 )
46 from IPython.utils.importstring import import_item
46 from IPython.utils.importstring import import_item
47 from IPython.utils.traitlets import Instance, Unicode, Bool, List, Dict, TraitError
47 from IPython.utils.traitlets import Instance, Unicode, Bool, List, Dict, TraitError
48
48
49 from IPython.zmq.session import (
49 from IPython.zmq.session import (
50 Session, session_aliases, session_flags, default_secure
50 Session, session_aliases, session_flags, default_secure
51 )
51 )
52
52
53 from IPython.parallel.controller.heartmonitor import HeartMonitor
53 from IPython.parallel.controller.heartmonitor import HeartMonitor
54 from IPython.parallel.controller.hub import HubFactory
54 from IPython.parallel.controller.hub import HubFactory
55 from IPython.parallel.controller.scheduler import TaskScheduler,launch_scheduler
55 from IPython.parallel.controller.scheduler import TaskScheduler,launch_scheduler
56 from IPython.parallel.controller.sqlitedb import SQLiteDB
56 from IPython.parallel.controller.sqlitedb import SQLiteDB
57
57
58 from IPython.parallel.util import signal_children, split_url, disambiguate_url
58 from IPython.parallel.util import signal_children, split_url, disambiguate_url
59
59
60 # conditional import of MongoDB backend class
60 # conditional import of MongoDB backend class
61
61
62 try:
62 try:
63 from IPython.parallel.controller.mongodb import MongoDB
63 from IPython.parallel.controller.mongodb import MongoDB
64 except ImportError:
64 except ImportError:
65 maybe_mongo = []
65 maybe_mongo = []
66 else:
66 else:
67 maybe_mongo = [MongoDB]
67 maybe_mongo = [MongoDB]
68
68
69
69
70 #-----------------------------------------------------------------------------
70 #-----------------------------------------------------------------------------
71 # Module level variables
71 # Module level variables
72 #-----------------------------------------------------------------------------
72 #-----------------------------------------------------------------------------
73
73
74
74
75 #: The default config file name for this application
75 #: The default config file name for this application
76 default_config_file_name = u'ipcontroller_config.py'
76 default_config_file_name = u'ipcontroller_config.py'
77
77
78
78
79 _description = """Start the IPython controller for parallel computing.
79 _description = """Start the IPython controller for parallel computing.
80
80
81 The IPython controller provides a gateway between the IPython engines and
81 The IPython controller provides a gateway between the IPython engines and
82 clients. The controller needs to be started before the engines and can be
82 clients. The controller needs to be started before the engines and can be
83 configured using command line options or using a cluster directory. Cluster
83 configured using command line options or using a cluster directory. Cluster
84 directories contain config, log and security files and are usually located in
84 directories contain config, log and security files and are usually located in
85 your ipython directory and named as "profile_name". See the `profile`
85 your ipython directory and named as "profile_name". See the `profile`
86 and `profile-dir` options for details.
86 and `profile-dir` options for details.
87 """
87 """
88
88
89 _examples = """
89 _examples = """
90 ipcontroller --ip=192.168.0.1 --port=1000 # listen on ip, port for engines
90 ipcontroller --ip=192.168.0.1 --port=1000 # listen on ip, port for engines
91 ipcontroller --scheme=pure # use the pure zeromq scheduler
91 ipcontroller --scheme=pure # use the pure zeromq scheduler
92 """
92 """
93
93
94
94
95 #-----------------------------------------------------------------------------
95 #-----------------------------------------------------------------------------
96 # The main application
96 # The main application
97 #-----------------------------------------------------------------------------
97 #-----------------------------------------------------------------------------
98 flags = {}
98 flags = {}
99 flags.update(base_flags)
99 flags.update(base_flags)
100 flags.update({
100 flags.update({
101 'usethreads' : ( {'IPControllerApp' : {'use_threads' : True}},
101 'usethreads' : ( {'IPControllerApp' : {'use_threads' : True}},
102 'Use threads instead of processes for the schedulers'),
102 'Use threads instead of processes for the schedulers'),
103 'sqlitedb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.sqlitedb.SQLiteDB'}},
103 'sqlitedb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.sqlitedb.SQLiteDB'}},
104 'use the SQLiteDB backend'),
104 'use the SQLiteDB backend'),
105 'mongodb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.mongodb.MongoDB'}},
105 'mongodb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.mongodb.MongoDB'}},
106 'use the MongoDB backend'),
106 'use the MongoDB backend'),
107 'dictdb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.dictdb.DictDB'}},
107 'dictdb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.dictdb.DictDB'}},
108 'use the in-memory DictDB backend'),
108 'use the in-memory DictDB backend'),
109 'nodb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.dictdb.NoDB'}},
110 """use dummy DB backend, which doesn't store any information.
111
112 This can be used to prevent growth of the memory footprint of the Hub
113 in cases where its record-keeping is not required. Requesting results
114 of tasks submitted by other clients, db_queries, and task resubmission
115 will not be available."""),
109 'reuse' : ({'IPControllerApp' : {'reuse_files' : True}},
116 'reuse' : ({'IPControllerApp' : {'reuse_files' : True}},
110 'reuse existing json connection files')
117 'reuse existing json connection files')
111 })
118 })
112
119
113 flags.update(session_flags)
120 flags.update(session_flags)
114
121
115 aliases = dict(
122 aliases = dict(
116 ssh = 'IPControllerApp.ssh_server',
123 ssh = 'IPControllerApp.ssh_server',
117 enginessh = 'IPControllerApp.engine_ssh_server',
124 enginessh = 'IPControllerApp.engine_ssh_server',
118 location = 'IPControllerApp.location',
125 location = 'IPControllerApp.location',
119
126
120 url = 'HubFactory.url',
127 url = 'HubFactory.url',
121 ip = 'HubFactory.ip',
128 ip = 'HubFactory.ip',
122 transport = 'HubFactory.transport',
129 transport = 'HubFactory.transport',
123 port = 'HubFactory.regport',
130 port = 'HubFactory.regport',
124
131
125 ping = 'HeartMonitor.period',
132 ping = 'HeartMonitor.period',
126
133
127 scheme = 'TaskScheduler.scheme_name',
134 scheme = 'TaskScheduler.scheme_name',
128 hwm = 'TaskScheduler.hwm',
135 hwm = 'TaskScheduler.hwm',
129 )
136 )
130 aliases.update(base_aliases)
137 aliases.update(base_aliases)
131 aliases.update(session_aliases)
138 aliases.update(session_aliases)
132
139
133
140
134 class IPControllerApp(BaseParallelApplication):
141 class IPControllerApp(BaseParallelApplication):
135
142
136 name = u'ipcontroller'
143 name = u'ipcontroller'
137 description = _description
144 description = _description
138 examples = _examples
145 examples = _examples
139 config_file_name = Unicode(default_config_file_name)
146 config_file_name = Unicode(default_config_file_name)
140 classes = [ProfileDir, Session, HubFactory, TaskScheduler, HeartMonitor, SQLiteDB] + maybe_mongo
147 classes = [ProfileDir, Session, HubFactory, TaskScheduler, HeartMonitor, SQLiteDB] + maybe_mongo
141
148
142 # change default to True
149 # change default to True
143 auto_create = Bool(True, config=True,
150 auto_create = Bool(True, config=True,
144 help="""Whether to create profile dir if it doesn't exist.""")
151 help="""Whether to create profile dir if it doesn't exist.""")
145
152
146 reuse_files = Bool(False, config=True,
153 reuse_files = Bool(False, config=True,
147 help='Whether to reuse existing json connection files.'
154 help='Whether to reuse existing json connection files.'
148 )
155 )
149 ssh_server = Unicode(u'', config=True,
156 ssh_server = Unicode(u'', config=True,
150 help="""ssh url for clients to use when connecting to the Controller
157 help="""ssh url for clients to use when connecting to the Controller
151 processes. It should be of the form: [user@]server[:port]. The
158 processes. It should be of the form: [user@]server[:port]. The
152 Controller's listening addresses must be accessible from the ssh server""",
159 Controller's listening addresses must be accessible from the ssh server""",
153 )
160 )
154 engine_ssh_server = Unicode(u'', config=True,
161 engine_ssh_server = Unicode(u'', config=True,
155 help="""ssh url for engines to use when connecting to the Controller
162 help="""ssh url for engines to use when connecting to the Controller
156 processes. It should be of the form: [user@]server[:port]. The
163 processes. It should be of the form: [user@]server[:port]. The
157 Controller's listening addresses must be accessible from the ssh server""",
164 Controller's listening addresses must be accessible from the ssh server""",
158 )
165 )
159 location = Unicode(u'', config=True,
166 location = Unicode(u'', config=True,
160 help="""The external IP or domain name of the Controller, used for disambiguating
167 help="""The external IP or domain name of the Controller, used for disambiguating
161 engine and client connections.""",
168 engine and client connections.""",
162 )
169 )
163 import_statements = List([], config=True,
170 import_statements = List([], config=True,
164 help="import statements to be run at startup. Necessary in some environments"
171 help="import statements to be run at startup. Necessary in some environments"
165 )
172 )
166
173
167 use_threads = Bool(False, config=True,
174 use_threads = Bool(False, config=True,
168 help='Use threads instead of processes for the schedulers',
175 help='Use threads instead of processes for the schedulers',
169 )
176 )
170
177
171 engine_json_file = Unicode('ipcontroller-engine.json', config=True,
178 engine_json_file = Unicode('ipcontroller-engine.json', config=True,
172 help="JSON filename where engine connection info will be stored.")
179 help="JSON filename where engine connection info will be stored.")
173 client_json_file = Unicode('ipcontroller-client.json', config=True,
180 client_json_file = Unicode('ipcontroller-client.json', config=True,
174 help="JSON filename where client connection info will be stored.")
181 help="JSON filename where client connection info will be stored.")
175
182
176 def _cluster_id_changed(self, name, old, new):
183 def _cluster_id_changed(self, name, old, new):
177 super(IPControllerApp, self)._cluster_id_changed(name, old, new)
184 super(IPControllerApp, self)._cluster_id_changed(name, old, new)
178 self.engine_json_file = "%s-engine.json" % self.name
185 self.engine_json_file = "%s-engine.json" % self.name
179 self.client_json_file = "%s-client.json" % self.name
186 self.client_json_file = "%s-client.json" % self.name
180
187
181
188
182 # internal
189 # internal
183 children = List()
190 children = List()
184 mq_class = Unicode('zmq.devices.ProcessMonitoredQueue')
191 mq_class = Unicode('zmq.devices.ProcessMonitoredQueue')
185
192
186 def _use_threads_changed(self, name, old, new):
193 def _use_threads_changed(self, name, old, new):
187 self.mq_class = 'zmq.devices.%sMonitoredQueue'%('Thread' if new else 'Process')
194 self.mq_class = 'zmq.devices.%sMonitoredQueue'%('Thread' if new else 'Process')
188
195
189 aliases = Dict(aliases)
196 aliases = Dict(aliases)
190 flags = Dict(flags)
197 flags = Dict(flags)
191
198
192
199
193 def save_connection_dict(self, fname, cdict):
200 def save_connection_dict(self, fname, cdict):
194 """save a connection dict to json file."""
201 """save a connection dict to json file."""
195 c = self.config
202 c = self.config
196 url = cdict['url']
203 url = cdict['url']
197 location = cdict['location']
204 location = cdict['location']
198 if not location:
205 if not location:
199 try:
206 try:
200 proto,ip,port = split_url(url)
207 proto,ip,port = split_url(url)
201 except AssertionError:
208 except AssertionError:
202 pass
209 pass
203 else:
210 else:
204 try:
211 try:
205 location = socket.gethostbyname_ex(socket.gethostname())[2][-1]
212 location = socket.gethostbyname_ex(socket.gethostname())[2][-1]
206 except (socket.gaierror, IndexError):
213 except (socket.gaierror, IndexError):
207 self.log.warn("Could not identify this machine's IP, assuming 127.0.0.1."
214 self.log.warn("Could not identify this machine's IP, assuming 127.0.0.1."
208 " You may need to specify '--location=<external_ip_address>' to help"
215 " You may need to specify '--location=<external_ip_address>' to help"
209 " IPython decide when to connect via loopback.")
216 " IPython decide when to connect via loopback.")
210 location = '127.0.0.1'
217 location = '127.0.0.1'
211 cdict['location'] = location
218 cdict['location'] = location
212 fname = os.path.join(self.profile_dir.security_dir, fname)
219 fname = os.path.join(self.profile_dir.security_dir, fname)
213 self.log.info("writing connection info to %s", fname)
220 self.log.info("writing connection info to %s", fname)
214 with open(fname, 'w') as f:
221 with open(fname, 'w') as f:
215 f.write(json.dumps(cdict, indent=2))
222 f.write(json.dumps(cdict, indent=2))
216 os.chmod(fname, stat.S_IRUSR|stat.S_IWUSR)
223 os.chmod(fname, stat.S_IRUSR|stat.S_IWUSR)
217
224
218 def load_config_from_json(self):
225 def load_config_from_json(self):
219 """load config from existing json connector files."""
226 """load config from existing json connector files."""
220 c = self.config
227 c = self.config
221 self.log.debug("loading config from JSON")
228 self.log.debug("loading config from JSON")
222 # load from engine config
229 # load from engine config
223 fname = os.path.join(self.profile_dir.security_dir, self.engine_json_file)
230 fname = os.path.join(self.profile_dir.security_dir, self.engine_json_file)
224 self.log.info("loading connection info from %s", fname)
231 self.log.info("loading connection info from %s", fname)
225 with open(fname) as f:
232 with open(fname) as f:
226 cfg = json.loads(f.read())
233 cfg = json.loads(f.read())
227 key = cfg['exec_key']
234 key = cfg['exec_key']
228 # json gives unicode, Session.key wants bytes
235 # json gives unicode, Session.key wants bytes
229 c.Session.key = key.encode('ascii')
236 c.Session.key = key.encode('ascii')
230 xport,addr = cfg['url'].split('://')
237 xport,addr = cfg['url'].split('://')
231 c.HubFactory.engine_transport = xport
238 c.HubFactory.engine_transport = xport
232 ip,ports = addr.split(':')
239 ip,ports = addr.split(':')
233 c.HubFactory.engine_ip = ip
240 c.HubFactory.engine_ip = ip
234 c.HubFactory.regport = int(ports)
241 c.HubFactory.regport = int(ports)
235 self.location = cfg['location']
242 self.location = cfg['location']
236 if not self.engine_ssh_server:
243 if not self.engine_ssh_server:
237 self.engine_ssh_server = cfg['ssh']
244 self.engine_ssh_server = cfg['ssh']
238 # load client config
245 # load client config
239 fname = os.path.join(self.profile_dir.security_dir, self.client_json_file)
246 fname = os.path.join(self.profile_dir.security_dir, self.client_json_file)
240 self.log.info("loading connection info from %s", fname)
247 self.log.info("loading connection info from %s", fname)
241 with open(fname) as f:
248 with open(fname) as f:
242 cfg = json.loads(f.read())
249 cfg = json.loads(f.read())
243 assert key == cfg['exec_key'], "exec_key mismatch between engine and client keys"
250 assert key == cfg['exec_key'], "exec_key mismatch between engine and client keys"
244 xport,addr = cfg['url'].split('://')
251 xport,addr = cfg['url'].split('://')
245 c.HubFactory.client_transport = xport
252 c.HubFactory.client_transport = xport
246 ip,ports = addr.split(':')
253 ip,ports = addr.split(':')
247 c.HubFactory.client_ip = ip
254 c.HubFactory.client_ip = ip
248 if not self.ssh_server:
255 if not self.ssh_server:
249 self.ssh_server = cfg['ssh']
256 self.ssh_server = cfg['ssh']
250 assert int(ports) == c.HubFactory.regport, "regport mismatch"
257 assert int(ports) == c.HubFactory.regport, "regport mismatch"
251
258
252 def load_secondary_config(self):
259 def load_secondary_config(self):
253 """secondary config, loading from JSON and setting defaults"""
260 """secondary config, loading from JSON and setting defaults"""
254 if self.reuse_files:
261 if self.reuse_files:
255 try:
262 try:
256 self.load_config_from_json()
263 self.load_config_from_json()
257 except (AssertionError,IOError) as e:
264 except (AssertionError,IOError) as e:
258 self.log.error("Could not load config from JSON: %s" % e)
265 self.log.error("Could not load config from JSON: %s" % e)
259 self.reuse_files=False
266 self.reuse_files=False
260 # switch Session.key default to secure
267 # switch Session.key default to secure
261 default_secure(self.config)
268 default_secure(self.config)
262 self.log.debug("Config changed")
269 self.log.debug("Config changed")
263 self.log.debug(repr(self.config))
270 self.log.debug(repr(self.config))
264
271
265 def init_hub(self):
272 def init_hub(self):
266 c = self.config
273 c = self.config
267
274
268 self.do_import_statements()
275 self.do_import_statements()
269
276
270 try:
277 try:
271 self.factory = HubFactory(config=c, log=self.log)
278 self.factory = HubFactory(config=c, log=self.log)
272 # self.start_logging()
279 # self.start_logging()
273 self.factory.init_hub()
280 self.factory.init_hub()
274 except TraitError:
281 except TraitError:
275 raise
282 raise
276 except Exception:
283 except Exception:
277 self.log.error("Couldn't construct the Controller", exc_info=True)
284 self.log.error("Couldn't construct the Controller", exc_info=True)
278 self.exit(1)
285 self.exit(1)
279
286
280 if not self.reuse_files:
287 if not self.reuse_files:
281 # save to new json config files
288 # save to new json config files
282 f = self.factory
289 f = self.factory
283 cdict = {'exec_key' : f.session.key.decode('ascii'),
290 cdict = {'exec_key' : f.session.key.decode('ascii'),
284 'ssh' : self.ssh_server,
291 'ssh' : self.ssh_server,
285 'url' : "%s://%s:%s"%(f.client_transport, f.client_ip, f.regport),
292 'url' : "%s://%s:%s"%(f.client_transport, f.client_ip, f.regport),
286 'location' : self.location
293 'location' : self.location
287 }
294 }
288 self.save_connection_dict(self.client_json_file, cdict)
295 self.save_connection_dict(self.client_json_file, cdict)
289 edict = cdict
296 edict = cdict
290 edict['url']="%s://%s:%s"%((f.client_transport, f.client_ip, f.regport))
297 edict['url']="%s://%s:%s"%((f.client_transport, f.client_ip, f.regport))
291 edict['ssh'] = self.engine_ssh_server
298 edict['ssh'] = self.engine_ssh_server
292 self.save_connection_dict(self.engine_json_file, edict)
299 self.save_connection_dict(self.engine_json_file, edict)
293
300
294 #
301 #
295 def init_schedulers(self):
302 def init_schedulers(self):
296 children = self.children
303 children = self.children
297 mq = import_item(str(self.mq_class))
304 mq = import_item(str(self.mq_class))
298
305
299 hub = self.factory
306 hub = self.factory
300 # disambiguate url, in case of *
307 # disambiguate url, in case of *
301 monitor_url = disambiguate_url(hub.monitor_url)
308 monitor_url = disambiguate_url(hub.monitor_url)
302 # maybe_inproc = 'inproc://monitor' if self.use_threads else monitor_url
309 # maybe_inproc = 'inproc://monitor' if self.use_threads else monitor_url
303 # IOPub relay (in a Process)
310 # IOPub relay (in a Process)
304 q = mq(zmq.PUB, zmq.SUB, zmq.PUB, b'N/A',b'iopub')
311 q = mq(zmq.PUB, zmq.SUB, zmq.PUB, b'N/A',b'iopub')
305 q.bind_in(hub.client_info['iopub'])
312 q.bind_in(hub.client_info['iopub'])
306 q.bind_out(hub.engine_info['iopub'])
313 q.bind_out(hub.engine_info['iopub'])
307 q.setsockopt_out(zmq.SUBSCRIBE, b'')
314 q.setsockopt_out(zmq.SUBSCRIBE, b'')
308 q.connect_mon(monitor_url)
315 q.connect_mon(monitor_url)
309 q.daemon=True
316 q.daemon=True
310 children.append(q)
317 children.append(q)
311
318
312 # Multiplexer Queue (in a Process)
319 # Multiplexer Queue (in a Process)
313 q = mq(zmq.ROUTER, zmq.ROUTER, zmq.PUB, b'in', b'out')
320 q = mq(zmq.ROUTER, zmq.ROUTER, zmq.PUB, b'in', b'out')
314 q.bind_in(hub.client_info['mux'])
321 q.bind_in(hub.client_info['mux'])
315 q.setsockopt_in(zmq.IDENTITY, b'mux')
322 q.setsockopt_in(zmq.IDENTITY, b'mux')
316 q.bind_out(hub.engine_info['mux'])
323 q.bind_out(hub.engine_info['mux'])
317 q.connect_mon(monitor_url)
324 q.connect_mon(monitor_url)
318 q.daemon=True
325 q.daemon=True
319 children.append(q)
326 children.append(q)
320
327
321 # Control Queue (in a Process)
328 # Control Queue (in a Process)
322 q = mq(zmq.ROUTER, zmq.ROUTER, zmq.PUB, b'incontrol', b'outcontrol')
329 q = mq(zmq.ROUTER, zmq.ROUTER, zmq.PUB, b'incontrol', b'outcontrol')
323 q.bind_in(hub.client_info['control'])
330 q.bind_in(hub.client_info['control'])
324 q.setsockopt_in(zmq.IDENTITY, b'control')
331 q.setsockopt_in(zmq.IDENTITY, b'control')
325 q.bind_out(hub.engine_info['control'])
332 q.bind_out(hub.engine_info['control'])
326 q.connect_mon(monitor_url)
333 q.connect_mon(monitor_url)
327 q.daemon=True
334 q.daemon=True
328 children.append(q)
335 children.append(q)
329 try:
336 try:
330 scheme = self.config.TaskScheduler.scheme_name
337 scheme = self.config.TaskScheduler.scheme_name
331 except AttributeError:
338 except AttributeError:
332 scheme = TaskScheduler.scheme_name.get_default_value()
339 scheme = TaskScheduler.scheme_name.get_default_value()
333 # Task Queue (in a Process)
340 # Task Queue (in a Process)
334 if scheme == 'pure':
341 if scheme == 'pure':
335 self.log.warn("task::using pure XREQ Task scheduler")
342 self.log.warn("task::using pure XREQ Task scheduler")
336 q = mq(zmq.ROUTER, zmq.DEALER, zmq.PUB, b'intask', b'outtask')
343 q = mq(zmq.ROUTER, zmq.DEALER, zmq.PUB, b'intask', b'outtask')
337 # q.setsockopt_out(zmq.HWM, hub.hwm)
344 # q.setsockopt_out(zmq.HWM, hub.hwm)
338 q.bind_in(hub.client_info['task'][1])
345 q.bind_in(hub.client_info['task'][1])
339 q.setsockopt_in(zmq.IDENTITY, b'task')
346 q.setsockopt_in(zmq.IDENTITY, b'task')
340 q.bind_out(hub.engine_info['task'])
347 q.bind_out(hub.engine_info['task'])
341 q.connect_mon(monitor_url)
348 q.connect_mon(monitor_url)
342 q.daemon=True
349 q.daemon=True
343 children.append(q)
350 children.append(q)
344 elif scheme == 'none':
351 elif scheme == 'none':
345 self.log.warn("task::using no Task scheduler")
352 self.log.warn("task::using no Task scheduler")
346
353
347 else:
354 else:
348 self.log.info("task::using Python %s Task scheduler"%scheme)
355 self.log.info("task::using Python %s Task scheduler"%scheme)
349 sargs = (hub.client_info['task'][1], hub.engine_info['task'],
356 sargs = (hub.client_info['task'][1], hub.engine_info['task'],
350 monitor_url, disambiguate_url(hub.client_info['notification']))
357 monitor_url, disambiguate_url(hub.client_info['notification']))
351 kwargs = dict(logname='scheduler', loglevel=self.log_level,
358 kwargs = dict(logname='scheduler', loglevel=self.log_level,
352 log_url = self.log_url, config=dict(self.config))
359 log_url = self.log_url, config=dict(self.config))
353 if 'Process' in self.mq_class:
360 if 'Process' in self.mq_class:
354 # run the Python scheduler in a Process
361 # run the Python scheduler in a Process
355 q = Process(target=launch_scheduler, args=sargs, kwargs=kwargs)
362 q = Process(target=launch_scheduler, args=sargs, kwargs=kwargs)
356 q.daemon=True
363 q.daemon=True
357 children.append(q)
364 children.append(q)
358 else:
365 else:
359 # single-threaded Controller
366 # single-threaded Controller
360 kwargs['in_thread'] = True
367 kwargs['in_thread'] = True
361 launch_scheduler(*sargs, **kwargs)
368 launch_scheduler(*sargs, **kwargs)
362
369
363
370
364 def save_urls(self):
371 def save_urls(self):
365 """save the registration urls to files."""
372 """save the registration urls to files."""
366 c = self.config
373 c = self.config
367
374
368 sec_dir = self.profile_dir.security_dir
375 sec_dir = self.profile_dir.security_dir
369 cf = self.factory
376 cf = self.factory
370
377
371 with open(os.path.join(sec_dir, 'ipcontroller-engine.url'), 'w') as f:
378 with open(os.path.join(sec_dir, 'ipcontroller-engine.url'), 'w') as f:
372 f.write("%s://%s:%s"%(cf.engine_transport, cf.engine_ip, cf.regport))
379 f.write("%s://%s:%s"%(cf.engine_transport, cf.engine_ip, cf.regport))
373
380
374 with open(os.path.join(sec_dir, 'ipcontroller-client.url'), 'w') as f:
381 with open(os.path.join(sec_dir, 'ipcontroller-client.url'), 'w') as f:
375 f.write("%s://%s:%s"%(cf.client_transport, cf.client_ip, cf.regport))
382 f.write("%s://%s:%s"%(cf.client_transport, cf.client_ip, cf.regport))
376
383
377
384
378 def do_import_statements(self):
385 def do_import_statements(self):
379 statements = self.import_statements
386 statements = self.import_statements
380 for s in statements:
387 for s in statements:
381 try:
388 try:
382 self.log.msg("Executing statement: '%s'" % s)
389 self.log.msg("Executing statement: '%s'" % s)
383 exec s in globals(), locals()
390 exec s in globals(), locals()
384 except:
391 except:
385 self.log.msg("Error running statement: %s" % s)
392 self.log.msg("Error running statement: %s" % s)
386
393
387 def forward_logging(self):
394 def forward_logging(self):
388 if self.log_url:
395 if self.log_url:
389 self.log.info("Forwarding logging to %s"%self.log_url)
396 self.log.info("Forwarding logging to %s"%self.log_url)
390 context = zmq.Context.instance()
397 context = zmq.Context.instance()
391 lsock = context.socket(zmq.PUB)
398 lsock = context.socket(zmq.PUB)
392 lsock.connect(self.log_url)
399 lsock.connect(self.log_url)
393 handler = PUBHandler(lsock)
400 handler = PUBHandler(lsock)
394 self.log.removeHandler(self._log_handler)
401 self.log.removeHandler(self._log_handler)
395 handler.root_topic = 'controller'
402 handler.root_topic = 'controller'
396 handler.setLevel(self.log_level)
403 handler.setLevel(self.log_level)
397 self.log.addHandler(handler)
404 self.log.addHandler(handler)
398 self._log_handler = handler
405 self._log_handler = handler
399
406
400 @catch_config_error
407 @catch_config_error
401 def initialize(self, argv=None):
408 def initialize(self, argv=None):
402 super(IPControllerApp, self).initialize(argv)
409 super(IPControllerApp, self).initialize(argv)
403 self.forward_logging()
410 self.forward_logging()
404 self.load_secondary_config()
411 self.load_secondary_config()
405 self.init_hub()
412 self.init_hub()
406 self.init_schedulers()
413 self.init_schedulers()
407
414
408 def start(self):
415 def start(self):
409 # Start the subprocesses:
416 # Start the subprocesses:
410 self.factory.start()
417 self.factory.start()
411 child_procs = []
418 child_procs = []
412 for child in self.children:
419 for child in self.children:
413 child.start()
420 child.start()
414 if isinstance(child, ProcessMonitoredQueue):
421 if isinstance(child, ProcessMonitoredQueue):
415 child_procs.append(child.launcher)
422 child_procs.append(child.launcher)
416 elif isinstance(child, Process):
423 elif isinstance(child, Process):
417 child_procs.append(child)
424 child_procs.append(child)
418 if child_procs:
425 if child_procs:
419 signal_children(child_procs)
426 signal_children(child_procs)
420
427
421 self.write_pid_file(overwrite=True)
428 self.write_pid_file(overwrite=True)
422
429
423 try:
430 try:
424 self.factory.loop.start()
431 self.factory.loop.start()
425 except KeyboardInterrupt:
432 except KeyboardInterrupt:
426 self.log.critical("Interrupted, Exiting...\n")
433 self.log.critical("Interrupted, Exiting...\n")
427
434
428
435
429
436
430 def launch_new_instance():
437 def launch_new_instance():
431 """Create and run the IPython controller"""
438 """Create and run the IPython controller"""
432 if sys.platform == 'win32':
439 if sys.platform == 'win32':
433 # make sure we don't get called from a multiprocessing subprocess
440 # make sure we don't get called from a multiprocessing subprocess
434 # this can result in infinite Controllers being started on Windows
441 # this can result in infinite Controllers being started on Windows
435 # which doesn't have a proper fork, so multiprocessing is wonky
442 # which doesn't have a proper fork, so multiprocessing is wonky
436
443
437 # this only comes up when IPython has been installed using vanilla
444 # this only comes up when IPython has been installed using vanilla
438 # setuptools, and *not* distribute.
445 # setuptools, and *not* distribute.
439 import multiprocessing
446 import multiprocessing
440 p = multiprocessing.current_process()
447 p = multiprocessing.current_process()
441 # the main process has name 'MainProcess'
448 # the main process has name 'MainProcess'
442 # subprocesses will have names like 'Process-1'
449 # subprocesses will have names like 'Process-1'
443 if p.name != 'MainProcess':
450 if p.name != 'MainProcess':
444 # we are a subprocess, don't start another Controller!
451 # we are a subprocess, don't start another Controller!
445 return
452 return
446 app = IPControllerApp.instance()
453 app = IPControllerApp.instance()
447 app.initialize()
454 app.initialize()
448 app.start()
455 app.start()
449
456
450
457
451 if __name__ == '__main__':
458 if __name__ == '__main__':
452 launch_new_instance()
459 launch_new_instance()
@@ -1,185 +1,216 b''
1 """A Task logger that presents our DB interface,
1 """A Task logger that presents our DB interface,
2 but exists entirely in memory and implemented with dicts.
2 but exists entirely in memory and implemented with dicts.
3
3
4 Authors:
4 Authors:
5
5
6 * Min RK
6 * Min RK
7
7
8
8
9 TaskRecords are dicts of the form:
9 TaskRecords are dicts of the form:
10 {
10 {
11 'msg_id' : str(uuid),
11 'msg_id' : str(uuid),
12 'client_uuid' : str(uuid),
12 'client_uuid' : str(uuid),
13 'engine_uuid' : str(uuid) or None,
13 'engine_uuid' : str(uuid) or None,
14 'header' : dict(header),
14 'header' : dict(header),
15 'content': dict(content),
15 'content': dict(content),
16 'buffers': list(buffers),
16 'buffers': list(buffers),
17 'submitted': datetime,
17 'submitted': datetime,
18 'started': datetime or None,
18 'started': datetime or None,
19 'completed': datetime or None,
19 'completed': datetime or None,
20 'resubmitted': datetime or None,
20 'resubmitted': datetime or None,
21 'result_header' : dict(header) or None,
21 'result_header' : dict(header) or None,
22 'result_content' : dict(content) or None,
22 'result_content' : dict(content) or None,
23 'result_buffers' : list(buffers) or None,
23 'result_buffers' : list(buffers) or None,
24 }
24 }
25 With this info, many of the special categories of tasks can be defined by query:
25 With this info, many of the special categories of tasks can be defined by query:
26
26
27 pending: completed is None
27 pending: completed is None
28 client's outstanding: client_uuid = uuid && completed is None
28 client's outstanding: client_uuid = uuid && completed is None
29 MIA: arrived is None (and completed is None)
29 MIA: arrived is None (and completed is None)
30 etc.
30 etc.
31
31
32 EngineRecords are dicts of the form:
32 EngineRecords are dicts of the form:
33 {
33 {
34 'eid' : int(id),
34 'eid' : int(id),
35 'uuid': str(uuid)
35 'uuid': str(uuid)
36 }
36 }
37 This may be extended, but is currently.
37 This may be extended, but is currently.
38
38
39 We support a subset of mongodb operators:
39 We support a subset of mongodb operators:
40 $lt,$gt,$lte,$gte,$ne,$in,$nin,$all,$mod,$exists
40 $lt,$gt,$lte,$gte,$ne,$in,$nin,$all,$mod,$exists
41 """
41 """
42 #-----------------------------------------------------------------------------
42 #-----------------------------------------------------------------------------
43 # Copyright (C) 2010-2011 The IPython Development Team
43 # Copyright (C) 2010-2011 The IPython Development Team
44 #
44 #
45 # Distributed under the terms of the BSD License. The full license is in
45 # Distributed under the terms of the BSD License. The full license is in
46 # the file COPYING, distributed as part of this software.
46 # the file COPYING, distributed as part of this software.
47 #-----------------------------------------------------------------------------
47 #-----------------------------------------------------------------------------
48
48
49
49
50 from datetime import datetime
50 from datetime import datetime
51
51
52 from IPython.config.configurable import LoggingConfigurable
52 from IPython.config.configurable import LoggingConfigurable
53
53
54 from IPython.utils.traitlets import Dict, Unicode, Instance
54 from IPython.utils.traitlets import Dict, Unicode, Instance
55
55
56 filters = {
56 filters = {
57 '$lt' : lambda a,b: a < b,
57 '$lt' : lambda a,b: a < b,
58 '$gt' : lambda a,b: b > a,
58 '$gt' : lambda a,b: b > a,
59 '$eq' : lambda a,b: a == b,
59 '$eq' : lambda a,b: a == b,
60 '$ne' : lambda a,b: a != b,
60 '$ne' : lambda a,b: a != b,
61 '$lte': lambda a,b: a <= b,
61 '$lte': lambda a,b: a <= b,
62 '$gte': lambda a,b: a >= b,
62 '$gte': lambda a,b: a >= b,
63 '$in' : lambda a,b: a in b,
63 '$in' : lambda a,b: a in b,
64 '$nin': lambda a,b: a not in b,
64 '$nin': lambda a,b: a not in b,
65 '$all': lambda a,b: all([ a in bb for bb in b ]),
65 '$all': lambda a,b: all([ a in bb for bb in b ]),
66 '$mod': lambda a,b: a%b[0] == b[1],
66 '$mod': lambda a,b: a%b[0] == b[1],
67 '$exists' : lambda a,b: (b and a is not None) or (a is None and not b)
67 '$exists' : lambda a,b: (b and a is not None) or (a is None and not b)
68 }
68 }
69
69
70
70
71 class CompositeFilter(object):
71 class CompositeFilter(object):
72 """Composite filter for matching multiple properties."""
72 """Composite filter for matching multiple properties."""
73
73
74 def __init__(self, dikt):
74 def __init__(self, dikt):
75 self.tests = []
75 self.tests = []
76 self.values = []
76 self.values = []
77 for key, value in dikt.iteritems():
77 for key, value in dikt.iteritems():
78 self.tests.append(filters[key])
78 self.tests.append(filters[key])
79 self.values.append(value)
79 self.values.append(value)
80
80
81 def __call__(self, value):
81 def __call__(self, value):
82 for test,check in zip(self.tests, self.values):
82 for test,check in zip(self.tests, self.values):
83 if not test(value, check):
83 if not test(value, check):
84 return False
84 return False
85 return True
85 return True
86
86
87 class BaseDB(LoggingConfigurable):
87 class BaseDB(LoggingConfigurable):
88 """Empty Parent class so traitlets work on DB."""
88 """Empty Parent class so traitlets work on DB."""
89 # base configurable traits:
89 # base configurable traits:
90 session = Unicode("")
90 session = Unicode("")
91
91
92 class DictDB(BaseDB):
92 class DictDB(BaseDB):
93 """Basic in-memory dict-based object for saving Task Records.
93 """Basic in-memory dict-based object for saving Task Records.
94
94
95 This is the first object to present the DB interface
95 This is the first object to present the DB interface
96 for logging tasks out of memory.
96 for logging tasks out of memory.
97
97
98 The interface is based on MongoDB, so adding a MongoDB
98 The interface is based on MongoDB, so adding a MongoDB
99 backend should be straightforward.
99 backend should be straightforward.
100 """
100 """
101
101
102 _records = Dict()
102 _records = Dict()
103
103
104 def _match_one(self, rec, tests):
104 def _match_one(self, rec, tests):
105 """Check if a specific record matches tests."""
105 """Check if a specific record matches tests."""
106 for key,test in tests.iteritems():
106 for key,test in tests.iteritems():
107 if not test(rec.get(key, None)):
107 if not test(rec.get(key, None)):
108 return False
108 return False
109 return True
109 return True
110
110
111 def _match(self, check):
111 def _match(self, check):
112 """Find all the matches for a check dict."""
112 """Find all the matches for a check dict."""
113 matches = []
113 matches = []
114 tests = {}
114 tests = {}
115 for k,v in check.iteritems():
115 for k,v in check.iteritems():
116 if isinstance(v, dict):
116 if isinstance(v, dict):
117 tests[k] = CompositeFilter(v)
117 tests[k] = CompositeFilter(v)
118 else:
118 else:
119 tests[k] = lambda o: o==v
119 tests[k] = lambda o: o==v
120
120
121 for rec in self._records.itervalues():
121 for rec in self._records.itervalues():
122 if self._match_one(rec, tests):
122 if self._match_one(rec, tests):
123 matches.append(rec)
123 matches.append(rec)
124 return matches
124 return matches
125
125
126 def _extract_subdict(self, rec, keys):
126 def _extract_subdict(self, rec, keys):
127 """extract subdict of keys"""
127 """extract subdict of keys"""
128 d = {}
128 d = {}
129 d['msg_id'] = rec['msg_id']
129 d['msg_id'] = rec['msg_id']
130 for key in keys:
130 for key in keys:
131 d[key] = rec[key]
131 d[key] = rec[key]
132 return d
132 return d
133
133
134 def add_record(self, msg_id, rec):
134 def add_record(self, msg_id, rec):
135 """Add a new Task Record, by msg_id."""
135 """Add a new Task Record, by msg_id."""
136 if self._records.has_key(msg_id):
136 if self._records.has_key(msg_id):
137 raise KeyError("Already have msg_id %r"%(msg_id))
137 raise KeyError("Already have msg_id %r"%(msg_id))
138 self._records[msg_id] = rec
138 self._records[msg_id] = rec
139
139
140 def get_record(self, msg_id):
140 def get_record(self, msg_id):
141 """Get a specific Task Record, by msg_id."""
141 """Get a specific Task Record, by msg_id."""
142 if not self._records.has_key(msg_id):
142 if not self._records.has_key(msg_id):
143 raise KeyError("No such msg_id %r"%(msg_id))
143 raise KeyError("No such msg_id %r"%(msg_id))
144 return self._records[msg_id]
144 return self._records[msg_id]
145
145
146 def update_record(self, msg_id, rec):
146 def update_record(self, msg_id, rec):
147 """Update the data in an existing record."""
147 """Update the data in an existing record."""
148 self._records[msg_id].update(rec)
148 self._records[msg_id].update(rec)
149
149
150 def drop_matching_records(self, check):
150 def drop_matching_records(self, check):
151 """Remove a record from the DB."""
151 """Remove a record from the DB."""
152 matches = self._match(check)
152 matches = self._match(check)
153 for m in matches:
153 for m in matches:
154 del self._records[m['msg_id']]
154 del self._records[m['msg_id']]
155
155
156 def drop_record(self, msg_id):
156 def drop_record(self, msg_id):
157 """Remove a record from the DB."""
157 """Remove a record from the DB."""
158 del self._records[msg_id]
158 del self._records[msg_id]
159
159
160
160
161 def find_records(self, check, keys=None):
161 def find_records(self, check, keys=None):
162 """Find records matching a query dict, optionally extracting subset of keys.
162 """Find records matching a query dict, optionally extracting subset of keys.
163
163
164 Returns dict keyed by msg_id of matching records.
164 Returns dict keyed by msg_id of matching records.
165
165
166 Parameters
166 Parameters
167 ----------
167 ----------
168
168
169 check: dict
169 check: dict
170 mongodb-style query argument
170 mongodb-style query argument
171 keys: list of strs [optional]
171 keys: list of strs [optional]
172 if specified, the subset of keys to extract. msg_id will *always* be
172 if specified, the subset of keys to extract. msg_id will *always* be
173 included.
173 included.
174 """
174 """
175 matches = self._match(check)
175 matches = self._match(check)
176 if keys:
176 if keys:
177 return [ self._extract_subdict(rec, keys) for rec in matches ]
177 return [ self._extract_subdict(rec, keys) for rec in matches ]
178 else:
178 else:
179 return matches
179 return matches
180
180
181
181
182 def get_history(self):
182 def get_history(self):
183 """get all msg_ids, ordered by time submitted."""
183 """get all msg_ids, ordered by time submitted."""
184 msg_ids = self._records.keys()
184 msg_ids = self._records.keys()
185 return sorted(msg_ids, key=lambda m: self._records[m]['submitted'])
185 return sorted(msg_ids, key=lambda m: self._records[m]['submitted'])
186
187 class NoDB(DictDB):
188 """A blackhole db backend that actually stores no information.
189
190 Provides the full DB interface, but raises KeyErrors on any
191 method that tries to access the records. This can be used to
192 minimize the memory footprint of the Hub when its record-keeping
193 functionality is not required.
194 """
195
196 def add_record(self, msg_id, record):
197 pass
198
199 def get_record(self, msg_id):
200 raise KeyError("NoDB does not support record access")
201
202 def update_record(self, msg_id, record):
203 pass
204
205 def drop_matching_records(self, check):
206 pass
207
208 def drop_record(self, msg_id):
209 pass
210
211 def find_records(self, check, keys=None):
212 raise KeyError("NoDB does not store information")
213
214 def get_history(self):
215 raise KeyError("NoDB does not store information")
216
@@ -1,114 +1,137 b''
1 .. _parallel_db:
1 .. _parallel_db:
2
2
3 =======================
3 =======================
4 IPython's Task Database
4 IPython's Task Database
5 =======================
5 =======================
6
6
7 The IPython Hub stores all task requests and results in a database. Currently supported backends
7 The IPython Hub stores all task requests and results in a database. Currently supported backends
8 are: MongoDB, SQLite (the default), and an in-memory DictDB. The most common use case for
8 are: MongoDB, SQLite (the default), and an in-memory DictDB. The most common use case for
9 this is clients requesting results for tasks they did not submit, via:
9 this is clients requesting results for tasks they did not submit, via:
10
10
11 .. sourcecode:: ipython
11 .. sourcecode:: ipython
12
12
13 In [1]: rc.get_result(task_id)
13 In [1]: rc.get_result(task_id)
14
14
15 However, since we have this DB backend, we provide a direct query method in the :class:`client`
15 However, since we have this DB backend, we provide a direct query method in the :class:`client`
16 for users who want deeper introspection into their task history. The :meth:`db_query` method of
16 for users who want deeper introspection into their task history. The :meth:`db_query` method of
17 the Client is modeled after MongoDB queries, so if you have used MongoDB it should look
17 the Client is modeled after MongoDB queries, so if you have used MongoDB it should look
18 familiar. In fact, when the MongoDB backend is in use, the query is relayed directly. However,
18 familiar. In fact, when the MongoDB backend is in use, the query is relayed directly. However,
19 when using other backends, the interface is emulated and only a subset of queries is possible.
19 when using other backends, the interface is emulated and only a subset of queries is possible.
20
20
21 .. seealso::
21 .. seealso::
22
22
23 MongoDB query docs: http://www.mongodb.org/display/DOCS/Querying
23 MongoDB query docs: http://www.mongodb.org/display/DOCS/Querying
24
24
25 :meth:`Client.db_query` takes a dictionary query object, with keys from the TaskRecord key list,
25 :meth:`Client.db_query` takes a dictionary query object, with keys from the TaskRecord key list,
26 and values of either exact values to test, or MongoDB queries, which are dicts of The form:
26 and values of either exact values to test, or MongoDB queries, which are dicts of The form:
27 ``{'operator' : 'argument(s)'}``. There is also an optional `keys` argument, that specifies
27 ``{'operator' : 'argument(s)'}``. There is also an optional `keys` argument, that specifies
28 which subset of keys should be retrieved. The default is to retrieve all keys excluding the
28 which subset of keys should be retrieved. The default is to retrieve all keys excluding the
29 request and result buffers. :meth:`db_query` returns a list of TaskRecord dicts. Also like
29 request and result buffers. :meth:`db_query` returns a list of TaskRecord dicts. Also like
30 MongoDB, the `msg_id` key will always be included, whether requested or not.
30 MongoDB, the `msg_id` key will always be included, whether requested or not.
31
31
32 TaskRecord keys:
32 TaskRecord keys:
33
33
34 =============== =============== =============
34 =============== =============== =============
35 Key Type Description
35 Key Type Description
36 =============== =============== =============
36 =============== =============== =============
37 msg_id uuid(bytes) The msg ID
37 msg_id uuid(bytes) The msg ID
38 header dict The request header
38 header dict The request header
39 content dict The request content (likely empty)
39 content dict The request content (likely empty)
40 buffers list(bytes) buffers containing serialized request objects
40 buffers list(bytes) buffers containing serialized request objects
41 submitted datetime timestamp for time of submission (set by client)
41 submitted datetime timestamp for time of submission (set by client)
42 client_uuid uuid(bytes) IDENT of client's socket
42 client_uuid uuid(bytes) IDENT of client's socket
43 engine_uuid uuid(bytes) IDENT of engine's socket
43 engine_uuid uuid(bytes) IDENT of engine's socket
44 started datetime time task began execution on engine
44 started datetime time task began execution on engine
45 completed datetime time task finished execution (success or failure) on engine
45 completed datetime time task finished execution (success or failure) on engine
46 resubmitted datetime time of resubmission (if applicable)
46 resubmitted datetime time of resubmission (if applicable)
47 result_header dict header for result
47 result_header dict header for result
48 result_content dict content for result
48 result_content dict content for result
49 result_buffers list(bytes) buffers containing serialized request objects
49 result_buffers list(bytes) buffers containing serialized request objects
50 queue bytes The name of the queue for the task ('mux' or 'task')
50 queue bytes The name of the queue for the task ('mux' or 'task')
51 pyin <unused> Python input (unused)
51 pyin <unused> Python input (unused)
52 pyout <unused> Python output (unused)
52 pyout <unused> Python output (unused)
53 pyerr <unused> Python traceback (unused)
53 pyerr <unused> Python traceback (unused)
54 stdout str Stream of stdout data
54 stdout str Stream of stdout data
55 stderr str Stream of stderr data
55 stderr str Stream of stderr data
56
56
57 =============== =============== =============
57 =============== =============== =============
58
58
59 MongoDB operators we emulate on all backends:
59 MongoDB operators we emulate on all backends:
60
60
61 ========== =================
61 ========== =================
62 Operator Python equivalent
62 Operator Python equivalent
63 ========== =================
63 ========== =================
64 '$in' in
64 '$in' in
65 '$nin' not in
65 '$nin' not in
66 '$eq' ==
66 '$eq' ==
67 '$ne' !=
67 '$ne' !=
68 '$ge' >
68 '$ge' >
69 '$gte' >=
69 '$gte' >=
70 '$le' <
70 '$le' <
71 '$lte' <=
71 '$lte' <=
72 ========== =================
72 ========== =================
73
73
74
74
75 The DB Query is useful for two primary cases:
75 The DB Query is useful for two primary cases:
76
76
77 1. deep polling of task status or metadata
77 1. deep polling of task status or metadata
78 2. selecting a subset of tasks, on which to perform a later operation (e.g. wait on result, purge records, resubmit,...)
78 2. selecting a subset of tasks, on which to perform a later operation (e.g. wait on result, purge records, resubmit,...)
79
79
80 Example Queries
80 Example Queries
81 ===============
81 ===============
82
82
83
83
84 To get all msg_ids that are not completed, only retrieving their ID and start time:
84 To get all msg_ids that are not completed, only retrieving their ID and start time:
85
85
86 .. sourcecode:: ipython
86 .. sourcecode:: ipython
87
87
88 In [1]: incomplete = rc.db_query({'complete' : None}, keys=['msg_id', 'started'])
88 In [1]: incomplete = rc.db_query({'complete' : None}, keys=['msg_id', 'started'])
89
89
90 All jobs started in the last hour by me:
90 All jobs started in the last hour by me:
91
91
92 .. sourcecode:: ipython
92 .. sourcecode:: ipython
93
93
94 In [1]: from datetime import datetime, timedelta
94 In [1]: from datetime import datetime, timedelta
95
95
96 In [2]: hourago = datetime.now() - timedelta(1./24)
96 In [2]: hourago = datetime.now() - timedelta(1./24)
97
97
98 In [3]: recent = rc.db_query({'started' : {'$gte' : hourago },
98 In [3]: recent = rc.db_query({'started' : {'$gte' : hourago },
99 'client_uuid' : rc.session.session})
99 'client_uuid' : rc.session.session})
100
100
101 All jobs started more than an hour ago, by clients *other than me*:
101 All jobs started more than an hour ago, by clients *other than me*:
102
102
103 .. sourcecode:: ipython
103 .. sourcecode:: ipython
104
104
105 In [3]: recent = rc.db_query({'started' : {'$le' : hourago },
105 In [3]: recent = rc.db_query({'started' : {'$le' : hourago },
106 'client_uuid' : {'$ne' : rc.session.session}})
106 'client_uuid' : {'$ne' : rc.session.session}})
107
107
108 Result headers for all jobs on engine 3 or 4:
108 Result headers for all jobs on engine 3 or 4:
109
109
110 .. sourcecode:: ipython
110 .. sourcecode:: ipython
111
111
112 In [1]: uuids = map(rc._engines.get, (3,4))
112 In [1]: uuids = map(rc._engines.get, (3,4))
113
113
114 In [2]: hist34 = rc.db_query({'engine_uuid' : {'$in' : uuids }, keys='result_header')
114 In [2]: hist34 = rc.db_query({'engine_uuid' : {'$in' : uuids }, keys='result_header')
115
116
117 Cost
118 ====
119
120 The advantage of the database backends is, of course, that large amounts of
121 data can be stored that won't fit in memory. The default 'backend' is actually
122 to just store all of this information in a Python dictionary. This is very fast,
123 but will run out of memory quickly if you move a lot of data around, or your
124 cluster is to run for a long time.
125
126 Unfortunately, the DB backends (SQLite and MongoDB) right now are rather slow,
127 and can still consume large amounts of resources, particularly if large tasks
128 or results are being created at a high frequency.
129
130 For this reason, we have added :class:`~.NoDB`,a dummy backend that doesn't
131 actually store any information. When you use this database, nothing is stored,
132 and any request for results will result in a KeyError. This obviously prevents
133 later requests for results and task resubmission from functioning, but
134 sometimes those nice features are not as useful as keeping Hub memory under
135 control.
136
137
@@ -1,838 +1,858 b''
1 .. _parallel_process:
1 .. _parallel_process:
2
2
3 ===========================================
3 ===========================================
4 Starting the IPython controller and engines
4 Starting the IPython controller and engines
5 ===========================================
5 ===========================================
6
6
7 To use IPython for parallel computing, you need to start one instance of
7 To use IPython for parallel computing, you need to start one instance of
8 the controller and one or more instances of the engine. The controller
8 the controller and one or more instances of the engine. The controller
9 and each engine can run on different machines or on the same machine.
9 and each engine can run on different machines or on the same machine.
10 Because of this, there are many different possibilities.
10 Because of this, there are many different possibilities.
11
11
12 Broadly speaking, there are two ways of going about starting a controller and engines:
12 Broadly speaking, there are two ways of going about starting a controller and engines:
13
13
14 * In an automated manner using the :command:`ipcluster` command.
14 * In an automated manner using the :command:`ipcluster` command.
15 * In a more manual way using the :command:`ipcontroller` and
15 * In a more manual way using the :command:`ipcontroller` and
16 :command:`ipengine` commands.
16 :command:`ipengine` commands.
17
17
18 This document describes both of these methods. We recommend that new users
18 This document describes both of these methods. We recommend that new users
19 start with the :command:`ipcluster` command as it simplifies many common usage
19 start with the :command:`ipcluster` command as it simplifies many common usage
20 cases.
20 cases.
21
21
22 General considerations
22 General considerations
23 ======================
23 ======================
24
24
25 Before delving into the details about how you can start a controller and
25 Before delving into the details about how you can start a controller and
26 engines using the various methods, we outline some of the general issues that
26 engines using the various methods, we outline some of the general issues that
27 come up when starting the controller and engines. These things come up no
27 come up when starting the controller and engines. These things come up no
28 matter which method you use to start your IPython cluster.
28 matter which method you use to start your IPython cluster.
29
29
30 If you are running engines on multiple machines, you will likely need to instruct the
30 If you are running engines on multiple machines, you will likely need to instruct the
31 controller to listen for connections on an external interface. This can be done by specifying
31 controller to listen for connections on an external interface. This can be done by specifying
32 the ``ip`` argument on the command-line, or the ``HubFactory.ip`` configurable in
32 the ``ip`` argument on the command-line, or the ``HubFactory.ip`` configurable in
33 :file:`ipcontroller_config.py`.
33 :file:`ipcontroller_config.py`.
34
34
35 If your machines are on a trusted network, you can safely instruct the controller to listen
35 If your machines are on a trusted network, you can safely instruct the controller to listen
36 on all public interfaces with::
36 on all public interfaces with::
37
37
38 $> ipcontroller --ip=*
38 $> ipcontroller --ip=*
39
39
40 Or you can set the same behavior as the default by adding the following line to your :file:`ipcontroller_config.py`:
40 Or you can set the same behavior as the default by adding the following line to your :file:`ipcontroller_config.py`:
41
41
42 .. sourcecode:: python
42 .. sourcecode:: python
43
43
44 c.HubFactory.ip = '*'
44 c.HubFactory.ip = '*'
45
45
46 .. note::
46 .. note::
47
47
48 Due to the lack of security in ZeroMQ, the controller will only listen for connections on
48 Due to the lack of security in ZeroMQ, the controller will only listen for connections on
49 localhost by default. If you see Timeout errors on engines or clients, then the first
49 localhost by default. If you see Timeout errors on engines or clients, then the first
50 thing you should check is the ip address the controller is listening on, and make sure
50 thing you should check is the ip address the controller is listening on, and make sure
51 that it is visible from the timing out machine.
51 that it is visible from the timing out machine.
52
52
53 .. seealso::
53 .. seealso::
54
54
55 Our `notes <parallel_security>`_ on security in the new parallel computing code.
55 Our `notes <parallel_security>`_ on security in the new parallel computing code.
56
56
57 Let's say that you want to start the controller on ``host0`` and engines on
57 Let's say that you want to start the controller on ``host0`` and engines on
58 hosts ``host1``-``hostn``. The following steps are then required:
58 hosts ``host1``-``hostn``. The following steps are then required:
59
59
60 1. Start the controller on ``host0`` by running :command:`ipcontroller` on
60 1. Start the controller on ``host0`` by running :command:`ipcontroller` on
61 ``host0``. The controller must be instructed to listen on an interface visible
61 ``host0``. The controller must be instructed to listen on an interface visible
62 to the engine machines, via the ``ip`` command-line argument or ``HubFactory.ip``
62 to the engine machines, via the ``ip`` command-line argument or ``HubFactory.ip``
63 in :file:`ipcontroller_config.py`.
63 in :file:`ipcontroller_config.py`.
64 2. Move the JSON file (:file:`ipcontroller-engine.json`) created by the
64 2. Move the JSON file (:file:`ipcontroller-engine.json`) created by the
65 controller from ``host0`` to hosts ``host1``-``hostn``.
65 controller from ``host0`` to hosts ``host1``-``hostn``.
66 3. Start the engines on hosts ``host1``-``hostn`` by running
66 3. Start the engines on hosts ``host1``-``hostn`` by running
67 :command:`ipengine`. This command has to be told where the JSON file
67 :command:`ipengine`. This command has to be told where the JSON file
68 (:file:`ipcontroller-engine.json`) is located.
68 (:file:`ipcontroller-engine.json`) is located.
69
69
70 At this point, the controller and engines will be connected. By default, the JSON files
70 At this point, the controller and engines will be connected. By default, the JSON files
71 created by the controller are put into the :file:`~/.ipython/profile_default/security`
71 created by the controller are put into the :file:`~/.ipython/profile_default/security`
72 directory. If the engines share a filesystem with the controller, step 2 can be skipped as
72 directory. If the engines share a filesystem with the controller, step 2 can be skipped as
73 the engines will automatically look at that location.
73 the engines will automatically look at that location.
74
74
75 The final step required to actually use the running controller from a client is to move
75 The final step required to actually use the running controller from a client is to move
76 the JSON file :file:`ipcontroller-client.json` from ``host0`` to any host where clients
76 the JSON file :file:`ipcontroller-client.json` from ``host0`` to any host where clients
77 will be run. If these file are put into the :file:`~/.ipython/profile_default/security`
77 will be run. If these file are put into the :file:`~/.ipython/profile_default/security`
78 directory of the client's host, they will be found automatically. Otherwise, the full path
78 directory of the client's host, they will be found automatically. Otherwise, the full path
79 to them has to be passed to the client's constructor.
79 to them has to be passed to the client's constructor.
80
80
81 Using :command:`ipcluster`
81 Using :command:`ipcluster`
82 ===========================
82 ===========================
83
83
84 The :command:`ipcluster` command provides a simple way of starting a
84 The :command:`ipcluster` command provides a simple way of starting a
85 controller and engines in the following situations:
85 controller and engines in the following situations:
86
86
87 1. When the controller and engines are all run on localhost. This is useful
87 1. When the controller and engines are all run on localhost. This is useful
88 for testing or running on a multicore computer.
88 for testing or running on a multicore computer.
89 2. When engines are started using the :command:`mpiexec` command that comes
89 2. When engines are started using the :command:`mpiexec` command that comes
90 with most MPI [MPI]_ implementations
90 with most MPI [MPI]_ implementations
91 3. When engines are started using the PBS [PBS]_ batch system
91 3. When engines are started using the PBS [PBS]_ batch system
92 (or other `qsub` systems, such as SGE).
92 (or other `qsub` systems, such as SGE).
93 4. When the controller is started on localhost and the engines are started on
93 4. When the controller is started on localhost and the engines are started on
94 remote nodes using :command:`ssh`.
94 remote nodes using :command:`ssh`.
95 5. When engines are started using the Windows HPC Server batch system.
95 5. When engines are started using the Windows HPC Server batch system.
96
96
97 .. note::
97 .. note::
98
98
99 Currently :command:`ipcluster` requires that the
99 Currently :command:`ipcluster` requires that the
100 :file:`~/.ipython/profile_<name>/security` directory live on a shared filesystem that is
100 :file:`~/.ipython/profile_<name>/security` directory live on a shared filesystem that is
101 seen by both the controller and engines. If you don't have a shared file
101 seen by both the controller and engines. If you don't have a shared file
102 system you will need to use :command:`ipcontroller` and
102 system you will need to use :command:`ipcontroller` and
103 :command:`ipengine` directly.
103 :command:`ipengine` directly.
104
104
105 Under the hood, :command:`ipcluster` just uses :command:`ipcontroller`
105 Under the hood, :command:`ipcluster` just uses :command:`ipcontroller`
106 and :command:`ipengine` to perform the steps described above.
106 and :command:`ipengine` to perform the steps described above.
107
107
108 The simplest way to use ipcluster requires no configuration, and will
108 The simplest way to use ipcluster requires no configuration, and will
109 launch a controller and a number of engines on the local machine. For instance,
109 launch a controller and a number of engines on the local machine. For instance,
110 to start one controller and 4 engines on localhost, just do::
110 to start one controller and 4 engines on localhost, just do::
111
111
112 $ ipcluster start -n 4
112 $ ipcluster start -n 4
113
113
114 To see other command line options, do::
114 To see other command line options, do::
115
115
116 $ ipcluster -h
116 $ ipcluster -h
117
117
118
118
119 Configuring an IPython cluster
119 Configuring an IPython cluster
120 ==============================
120 ==============================
121
121
122 Cluster configurations are stored as `profiles`. You can create a new profile with::
122 Cluster configurations are stored as `profiles`. You can create a new profile with::
123
123
124 $ ipython profile create --parallel --profile=myprofile
124 $ ipython profile create --parallel --profile=myprofile
125
125
126 This will create the directory :file:`IPYTHONDIR/profile_myprofile`, and populate it
126 This will create the directory :file:`IPYTHONDIR/profile_myprofile`, and populate it
127 with the default configuration files for the three IPython cluster commands. Once
127 with the default configuration files for the three IPython cluster commands. Once
128 you edit those files, you can continue to call ipcluster/ipcontroller/ipengine
128 you edit those files, you can continue to call ipcluster/ipcontroller/ipengine
129 with no arguments beyond ``profile=myprofile``, and any configuration will be maintained.
129 with no arguments beyond ``profile=myprofile``, and any configuration will be maintained.
130
130
131 There is no limit to the number of profiles you can have, so you can maintain a profile for each
131 There is no limit to the number of profiles you can have, so you can maintain a profile for each
132 of your common use cases. The default profile will be used whenever the
132 of your common use cases. The default profile will be used whenever the
133 profile argument is not specified, so edit :file:`IPYTHONDIR/profile_default/*_config.py` to
133 profile argument is not specified, so edit :file:`IPYTHONDIR/profile_default/*_config.py` to
134 represent your most common use case.
134 represent your most common use case.
135
135
136 The configuration files are loaded with commented-out settings and explanations,
136 The configuration files are loaded with commented-out settings and explanations,
137 which should cover most of the available possibilities.
137 which should cover most of the available possibilities.
138
138
139 Using various batch systems with :command:`ipcluster`
139 Using various batch systems with :command:`ipcluster`
140 -----------------------------------------------------
140 -----------------------------------------------------
141
141
142 :command:`ipcluster` has a notion of Launchers that can start controllers
142 :command:`ipcluster` has a notion of Launchers that can start controllers
143 and engines with various remote execution schemes. Currently supported
143 and engines with various remote execution schemes. Currently supported
144 models include :command:`ssh`, :command:`mpiexec`, PBS-style (Torque, SGE, LSF),
144 models include :command:`ssh`, :command:`mpiexec`, PBS-style (Torque, SGE, LSF),
145 and Windows HPC Server.
145 and Windows HPC Server.
146
146
147 In general, these are configured by the :attr:`IPClusterEngines.engine_set_launcher_class`,
147 In general, these are configured by the :attr:`IPClusterEngines.engine_set_launcher_class`,
148 and :attr:`IPClusterStart.controller_launcher_class` configurables, which can be the
148 and :attr:`IPClusterStart.controller_launcher_class` configurables, which can be the
149 fully specified object name (e.g. ``'IPython.parallel.apps.launcher.LocalControllerLauncher'``),
149 fully specified object name (e.g. ``'IPython.parallel.apps.launcher.LocalControllerLauncher'``),
150 but if you are using IPython's builtin launchers, you can specify just the class name,
150 but if you are using IPython's builtin launchers, you can specify just the class name,
151 or even just the prefix e.g:
151 or even just the prefix e.g:
152
152
153 .. sourcecode:: python
153 .. sourcecode:: python
154
154
155 c.IPClusterEngines.engine_launcher_class = 'SSH'
155 c.IPClusterEngines.engine_launcher_class = 'SSH'
156 # equivalent to
156 # equivalent to
157 c.IPClusterEngines.engine_launcher_class = 'SSHEngineSetLauncher'
157 c.IPClusterEngines.engine_launcher_class = 'SSHEngineSetLauncher'
158 # both of which expand to
158 # both of which expand to
159 c.IPClusterEngines.engine_launcher_class = 'IPython.parallel.apps.launcher.SSHEngineSetLauncher'
159 c.IPClusterEngines.engine_launcher_class = 'IPython.parallel.apps.launcher.SSHEngineSetLauncher'
160
160
161 The shortest form being of particular use on the command line, where all you need to do to
161 The shortest form being of particular use on the command line, where all you need to do to
162 get an IPython cluster running with engines started with MPI is:
162 get an IPython cluster running with engines started with MPI is:
163
163
164 .. sourcecode:: bash
164 .. sourcecode:: bash
165
165
166 $> ipcluster start --engines=MPI
166 $> ipcluster start --engines=MPI
167
167
168 Assuming that the default MPI config is sufficient.
168 Assuming that the default MPI config is sufficient.
169
169
170 .. note::
170 .. note::
171
171
172 shortcuts for builtin launcher names were added in 0.12, as was the ``_class`` suffix
172 shortcuts for builtin launcher names were added in 0.12, as was the ``_class`` suffix
173 on the configurable names. If you use the old 0.11 names (e.g. ``engine_set_launcher``),
173 on the configurable names. If you use the old 0.11 names (e.g. ``engine_set_launcher``),
174 they will still work, but you will get a deprecation warning that the name has changed.
174 they will still work, but you will get a deprecation warning that the name has changed.
175
175
176
176
177 .. note::
177 .. note::
178
178
179 The Launchers and configuration are designed in such a way that advanced
179 The Launchers and configuration are designed in such a way that advanced
180 users can subclass and configure them to fit their own system that we
180 users can subclass and configure them to fit their own system that we
181 have not yet supported (such as Condor)
181 have not yet supported (such as Condor)
182
182
183 Using :command:`ipcluster` in mpiexec/mpirun mode
183 Using :command:`ipcluster` in mpiexec/mpirun mode
184 -------------------------------------------------
184 -------------------------------------------------
185
185
186
186
187 The mpiexec/mpirun mode is useful if you:
187 The mpiexec/mpirun mode is useful if you:
188
188
189 1. Have MPI installed.
189 1. Have MPI installed.
190 2. Your systems are configured to use the :command:`mpiexec` or
190 2. Your systems are configured to use the :command:`mpiexec` or
191 :command:`mpirun` commands to start MPI processes.
191 :command:`mpirun` commands to start MPI processes.
192
192
193 If these are satisfied, you can create a new profile::
193 If these are satisfied, you can create a new profile::
194
194
195 $ ipython profile create --parallel --profile=mpi
195 $ ipython profile create --parallel --profile=mpi
196
196
197 and edit the file :file:`IPYTHONDIR/profile_mpi/ipcluster_config.py`.
197 and edit the file :file:`IPYTHONDIR/profile_mpi/ipcluster_config.py`.
198
198
199 There, instruct ipcluster to use the MPI launchers by adding the lines:
199 There, instruct ipcluster to use the MPI launchers by adding the lines:
200
200
201 .. sourcecode:: python
201 .. sourcecode:: python
202
202
203 c.IPClusterEngines.engine_launcher_class = 'MPIEngineSetLauncher'
203 c.IPClusterEngines.engine_launcher_class = 'MPIEngineSetLauncher'
204
204
205 If the default MPI configuration is correct, then you can now start your cluster, with::
205 If the default MPI configuration is correct, then you can now start your cluster, with::
206
206
207 $ ipcluster start -n 4 --profile=mpi
207 $ ipcluster start -n 4 --profile=mpi
208
208
209 This does the following:
209 This does the following:
210
210
211 1. Starts the IPython controller on current host.
211 1. Starts the IPython controller on current host.
212 2. Uses :command:`mpiexec` to start 4 engines.
212 2. Uses :command:`mpiexec` to start 4 engines.
213
213
214 If you have a reason to also start the Controller with mpi, you can specify:
214 If you have a reason to also start the Controller with mpi, you can specify:
215
215
216 .. sourcecode:: python
216 .. sourcecode:: python
217
217
218 c.IPClusterStart.controller_launcher_class = 'MPIControllerLauncher'
218 c.IPClusterStart.controller_launcher_class = 'MPIControllerLauncher'
219
219
220 .. note::
220 .. note::
221
221
222 The Controller *will not* be in the same MPI universe as the engines, so there is not
222 The Controller *will not* be in the same MPI universe as the engines, so there is not
223 much reason to do this unless sysadmins demand it.
223 much reason to do this unless sysadmins demand it.
224
224
225 On newer MPI implementations (such as OpenMPI), this will work even if you
225 On newer MPI implementations (such as OpenMPI), this will work even if you
226 don't make any calls to MPI or call :func:`MPI_Init`. However, older MPI
226 don't make any calls to MPI or call :func:`MPI_Init`. However, older MPI
227 implementations actually require each process to call :func:`MPI_Init` upon
227 implementations actually require each process to call :func:`MPI_Init` upon
228 starting. The easiest way of having this done is to install the mpi4py
228 starting. The easiest way of having this done is to install the mpi4py
229 [mpi4py]_ package and then specify the ``c.MPI.use`` option in :file:`ipengine_config.py`:
229 [mpi4py]_ package and then specify the ``c.MPI.use`` option in :file:`ipengine_config.py`:
230
230
231 .. sourcecode:: python
231 .. sourcecode:: python
232
232
233 c.MPI.use = 'mpi4py'
233 c.MPI.use = 'mpi4py'
234
234
235 Unfortunately, even this won't work for some MPI implementations. If you are
235 Unfortunately, even this won't work for some MPI implementations. If you are
236 having problems with this, you will likely have to use a custom Python
236 having problems with this, you will likely have to use a custom Python
237 executable that itself calls :func:`MPI_Init` at the appropriate time.
237 executable that itself calls :func:`MPI_Init` at the appropriate time.
238 Fortunately, mpi4py comes with such a custom Python executable that is easy to
238 Fortunately, mpi4py comes with such a custom Python executable that is easy to
239 install and use. However, this custom Python executable approach will not work
239 install and use. However, this custom Python executable approach will not work
240 with :command:`ipcluster` currently.
240 with :command:`ipcluster` currently.
241
241
242 More details on using MPI with IPython can be found :ref:`here <parallelmpi>`.
242 More details on using MPI with IPython can be found :ref:`here <parallelmpi>`.
243
243
244
244
245 Using :command:`ipcluster` in PBS mode
245 Using :command:`ipcluster` in PBS mode
246 --------------------------------------
246 --------------------------------------
247
247
248 The PBS mode uses the Portable Batch System (PBS) to start the engines.
248 The PBS mode uses the Portable Batch System (PBS) to start the engines.
249
249
250 As usual, we will start by creating a fresh profile::
250 As usual, we will start by creating a fresh profile::
251
251
252 $ ipython profile create --parallel --profile=pbs
252 $ ipython profile create --parallel --profile=pbs
253
253
254 And in :file:`ipcluster_config.py`, we will select the PBS launchers for the controller
254 And in :file:`ipcluster_config.py`, we will select the PBS launchers for the controller
255 and engines:
255 and engines:
256
256
257 .. sourcecode:: python
257 .. sourcecode:: python
258
258
259 c.IPClusterStart.controller_launcher_class = 'PBSControllerLauncher'
259 c.IPClusterStart.controller_launcher_class = 'PBSControllerLauncher'
260 c.IPClusterEngines.engine_launcher_class = 'PBSEngineSetLauncher'
260 c.IPClusterEngines.engine_launcher_class = 'PBSEngineSetLauncher'
261
261
262 .. note::
262 .. note::
263
263
264 Note that the configurable is IPClusterEngines for the engine launcher, and
264 Note that the configurable is IPClusterEngines for the engine launcher, and
265 IPClusterStart for the controller launcher. This is because the start command is a
265 IPClusterStart for the controller launcher. This is because the start command is a
266 subclass of the engine command, adding a controller launcher. Since it is a subclass,
266 subclass of the engine command, adding a controller launcher. Since it is a subclass,
267 any configuration made in IPClusterEngines is inherited by IPClusterStart unless it is
267 any configuration made in IPClusterEngines is inherited by IPClusterStart unless it is
268 overridden.
268 overridden.
269
269
270 IPython does provide simple default batch templates for PBS and SGE, but you may need
270 IPython does provide simple default batch templates for PBS and SGE, but you may need
271 to specify your own. Here is a sample PBS script template:
271 to specify your own. Here is a sample PBS script template:
272
272
273 .. sourcecode:: bash
273 .. sourcecode:: bash
274
274
275 #PBS -N ipython
275 #PBS -N ipython
276 #PBS -j oe
276 #PBS -j oe
277 #PBS -l walltime=00:10:00
277 #PBS -l walltime=00:10:00
278 #PBS -l nodes={n/4}:ppn=4
278 #PBS -l nodes={n/4}:ppn=4
279 #PBS -q {queue}
279 #PBS -q {queue}
280
280
281 cd $PBS_O_WORKDIR
281 cd $PBS_O_WORKDIR
282 export PATH=$HOME/usr/local/bin
282 export PATH=$HOME/usr/local/bin
283 export PYTHONPATH=$HOME/usr/local/lib/python2.7/site-packages
283 export PYTHONPATH=$HOME/usr/local/lib/python2.7/site-packages
284 /usr/local/bin/mpiexec -n {n} ipengine --profile-dir={profile_dir}
284 /usr/local/bin/mpiexec -n {n} ipengine --profile-dir={profile_dir}
285
285
286 There are a few important points about this template:
286 There are a few important points about this template:
287
287
288 1. This template will be rendered at runtime using IPython's :class:`EvalFormatter`.
288 1. This template will be rendered at runtime using IPython's :class:`EvalFormatter`.
289 This is simply a subclass of :class:`string.Formatter` that allows simple expressions
289 This is simply a subclass of :class:`string.Formatter` that allows simple expressions
290 on keys.
290 on keys.
291
291
292 2. Instead of putting in the actual number of engines, use the notation
292 2. Instead of putting in the actual number of engines, use the notation
293 ``{n}`` to indicate the number of engines to be started. You can also use
293 ``{n}`` to indicate the number of engines to be started. You can also use
294 expressions like ``{n/4}`` in the template to indicate the number of nodes.
294 expressions like ``{n/4}`` in the template to indicate the number of nodes.
295 There will always be ``{n}`` and ``{profile_dir}`` variables passed to the formatter.
295 There will always be ``{n}`` and ``{profile_dir}`` variables passed to the formatter.
296 These allow the batch system to know how many engines, and where the configuration
296 These allow the batch system to know how many engines, and where the configuration
297 files reside. The same is true for the batch queue, with the template variable
297 files reside. The same is true for the batch queue, with the template variable
298 ``{queue}``.
298 ``{queue}``.
299
299
300 3. Any options to :command:`ipengine` can be given in the batch script
300 3. Any options to :command:`ipengine` can be given in the batch script
301 template, or in :file:`ipengine_config.py`.
301 template, or in :file:`ipengine_config.py`.
302
302
303 4. Depending on the configuration of you system, you may have to set
303 4. Depending on the configuration of you system, you may have to set
304 environment variables in the script template.
304 environment variables in the script template.
305
305
306 The controller template should be similar, but simpler:
306 The controller template should be similar, but simpler:
307
307
308 .. sourcecode:: bash
308 .. sourcecode:: bash
309
309
310 #PBS -N ipython
310 #PBS -N ipython
311 #PBS -j oe
311 #PBS -j oe
312 #PBS -l walltime=00:10:00
312 #PBS -l walltime=00:10:00
313 #PBS -l nodes=1:ppn=4
313 #PBS -l nodes=1:ppn=4
314 #PBS -q {queue}
314 #PBS -q {queue}
315
315
316 cd $PBS_O_WORKDIR
316 cd $PBS_O_WORKDIR
317 export PATH=$HOME/usr/local/bin
317 export PATH=$HOME/usr/local/bin
318 export PYTHONPATH=$HOME/usr/local/lib/python2.7/site-packages
318 export PYTHONPATH=$HOME/usr/local/lib/python2.7/site-packages
319 ipcontroller --profile-dir={profile_dir}
319 ipcontroller --profile-dir={profile_dir}
320
320
321
321
322 Once you have created these scripts, save them with names like
322 Once you have created these scripts, save them with names like
323 :file:`pbs.engine.template`. Now you can load them into the :file:`ipcluster_config` with:
323 :file:`pbs.engine.template`. Now you can load them into the :file:`ipcluster_config` with:
324
324
325 .. sourcecode:: python
325 .. sourcecode:: python
326
326
327 c.PBSEngineSetLauncher.batch_template_file = "pbs.engine.template"
327 c.PBSEngineSetLauncher.batch_template_file = "pbs.engine.template"
328
328
329 c.PBSControllerLauncher.batch_template_file = "pbs.controller.template"
329 c.PBSControllerLauncher.batch_template_file = "pbs.controller.template"
330
330
331
331
332 Alternately, you can just define the templates as strings inside :file:`ipcluster_config`.
332 Alternately, you can just define the templates as strings inside :file:`ipcluster_config`.
333
333
334 Whether you are using your own templates or our defaults, the extra configurables available are
334 Whether you are using your own templates or our defaults, the extra configurables available are
335 the number of engines to launch (``{n}``, and the batch system queue to which the jobs are to be
335 the number of engines to launch (``{n}``, and the batch system queue to which the jobs are to be
336 submitted (``{queue}``)). These are configurables, and can be specified in
336 submitted (``{queue}``)). These are configurables, and can be specified in
337 :file:`ipcluster_config`:
337 :file:`ipcluster_config`:
338
338
339 .. sourcecode:: python
339 .. sourcecode:: python
340
340
341 c.PBSLauncher.queue = 'veryshort.q'
341 c.PBSLauncher.queue = 'veryshort.q'
342 c.IPClusterEngines.n = 64
342 c.IPClusterEngines.n = 64
343
343
344 Note that assuming you are running PBS on a multi-node cluster, the Controller's default behavior
344 Note that assuming you are running PBS on a multi-node cluster, the Controller's default behavior
345 of listening only on localhost is likely too restrictive. In this case, also assuming the
345 of listening only on localhost is likely too restrictive. In this case, also assuming the
346 nodes are safely behind a firewall, you can simply instruct the Controller to listen for
346 nodes are safely behind a firewall, you can simply instruct the Controller to listen for
347 connections on all its interfaces, by adding in :file:`ipcontroller_config`:
347 connections on all its interfaces, by adding in :file:`ipcontroller_config`:
348
348
349 .. sourcecode:: python
349 .. sourcecode:: python
350
350
351 c.HubFactory.ip = '*'
351 c.HubFactory.ip = '*'
352
352
353 You can now run the cluster with::
353 You can now run the cluster with::
354
354
355 $ ipcluster start --profile=pbs -n 128
355 $ ipcluster start --profile=pbs -n 128
356
356
357 Additional configuration options can be found in the PBS section of :file:`ipcluster_config`.
357 Additional configuration options can be found in the PBS section of :file:`ipcluster_config`.
358
358
359 .. note::
359 .. note::
360
360
361 Due to the flexibility of configuration, the PBS launchers work with simple changes
361 Due to the flexibility of configuration, the PBS launchers work with simple changes
362 to the template for other :command:`qsub`-using systems, such as Sun Grid Engine,
362 to the template for other :command:`qsub`-using systems, such as Sun Grid Engine,
363 and with further configuration in similar batch systems like Condor.
363 and with further configuration in similar batch systems like Condor.
364
364
365
365
366 Using :command:`ipcluster` in SSH mode
366 Using :command:`ipcluster` in SSH mode
367 --------------------------------------
367 --------------------------------------
368
368
369
369
370 The SSH mode uses :command:`ssh` to execute :command:`ipengine` on remote
370 The SSH mode uses :command:`ssh` to execute :command:`ipengine` on remote
371 nodes and :command:`ipcontroller` can be run remotely as well, or on localhost.
371 nodes and :command:`ipcontroller` can be run remotely as well, or on localhost.
372
372
373 .. note::
373 .. note::
374
374
375 When using this mode it highly recommended that you have set up SSH keys
375 When using this mode it highly recommended that you have set up SSH keys
376 and are using ssh-agent [SSH]_ for password-less logins.
376 and are using ssh-agent [SSH]_ for password-less logins.
377
377
378 As usual, we start by creating a clean profile::
378 As usual, we start by creating a clean profile::
379
379
380 $ ipython profile create --parallel --profile=ssh
380 $ ipython profile create --parallel --profile=ssh
381
381
382 To use this mode, select the SSH launchers in :file:`ipcluster_config.py`:
382 To use this mode, select the SSH launchers in :file:`ipcluster_config.py`:
383
383
384 .. sourcecode:: python
384 .. sourcecode:: python
385
385
386 c.IPClusterEngines.engine_launcher_class = 'SSHEngineSetLauncher'
386 c.IPClusterEngines.engine_launcher_class = 'SSHEngineSetLauncher'
387 # and if the Controller is also to be remote:
387 # and if the Controller is also to be remote:
388 c.IPClusterStart.controller_launcher_class = 'SSHControllerLauncher'
388 c.IPClusterStart.controller_launcher_class = 'SSHControllerLauncher'
389
389
390
390
391
391
392 The controller's remote location and configuration can be specified:
392 The controller's remote location and configuration can be specified:
393
393
394 .. sourcecode:: python
394 .. sourcecode:: python
395
395
396 # Set the user and hostname for the controller
396 # Set the user and hostname for the controller
397 # c.SSHControllerLauncher.hostname = 'controller.example.com'
397 # c.SSHControllerLauncher.hostname = 'controller.example.com'
398 # c.SSHControllerLauncher.user = os.environ.get('USER','username')
398 # c.SSHControllerLauncher.user = os.environ.get('USER','username')
399
399
400 # Set the arguments to be passed to ipcontroller
400 # Set the arguments to be passed to ipcontroller
401 # note that remotely launched ipcontroller will not get the contents of
401 # note that remotely launched ipcontroller will not get the contents of
402 # the local ipcontroller_config.py unless it resides on the *remote host*
402 # the local ipcontroller_config.py unless it resides on the *remote host*
403 # in the location specified by the `profile-dir` argument.
403 # in the location specified by the `profile-dir` argument.
404 # c.SSHControllerLauncher.controller_args = ['--reuse', '--ip=*', '--profile-dir=/path/to/cd']
404 # c.SSHControllerLauncher.controller_args = ['--reuse', '--ip=*', '--profile-dir=/path/to/cd']
405
405
406 .. note::
406 .. note::
407
407
408 SSH mode does not do any file movement, so you will need to distribute configuration
408 SSH mode does not do any file movement, so you will need to distribute configuration
409 files manually. To aid in this, the `reuse_files` flag defaults to True for ssh-launched
409 files manually. To aid in this, the `reuse_files` flag defaults to True for ssh-launched
410 Controllers, so you will only need to do this once, unless you override this flag back
410 Controllers, so you will only need to do this once, unless you override this flag back
411 to False.
411 to False.
412
412
413 Engines are specified in a dictionary, by hostname and the number of engines to be run
413 Engines are specified in a dictionary, by hostname and the number of engines to be run
414 on that host.
414 on that host.
415
415
416 .. sourcecode:: python
416 .. sourcecode:: python
417
417
418 c.SSHEngineSetLauncher.engines = { 'host1.example.com' : 2,
418 c.SSHEngineSetLauncher.engines = { 'host1.example.com' : 2,
419 'host2.example.com' : 5,
419 'host2.example.com' : 5,
420 'host3.example.com' : (1, ['--profile-dir=/home/different/location']),
420 'host3.example.com' : (1, ['--profile-dir=/home/different/location']),
421 'host4.example.com' : 8 }
421 'host4.example.com' : 8 }
422
422
423 * The `engines` dict, where the keys are the host we want to run engines on and
423 * The `engines` dict, where the keys are the host we want to run engines on and
424 the value is the number of engines to run on that host.
424 the value is the number of engines to run on that host.
425 * on host3, the value is a tuple, where the number of engines is first, and the arguments
425 * on host3, the value is a tuple, where the number of engines is first, and the arguments
426 to be passed to :command:`ipengine` are the second element.
426 to be passed to :command:`ipengine` are the second element.
427
427
428 For engines without explicitly specified arguments, the default arguments are set in
428 For engines without explicitly specified arguments, the default arguments are set in
429 a single location:
429 a single location:
430
430
431 .. sourcecode:: python
431 .. sourcecode:: python
432
432
433 c.SSHEngineSetLauncher.engine_args = ['--profile-dir=/path/to/profile_ssh']
433 c.SSHEngineSetLauncher.engine_args = ['--profile-dir=/path/to/profile_ssh']
434
434
435 Current limitations of the SSH mode of :command:`ipcluster` are:
435 Current limitations of the SSH mode of :command:`ipcluster` are:
436
436
437 * Untested on Windows. Would require a working :command:`ssh` on Windows.
437 * Untested on Windows. Would require a working :command:`ssh` on Windows.
438 Also, we are using shell scripts to setup and execute commands on remote
438 Also, we are using shell scripts to setup and execute commands on remote
439 hosts.
439 hosts.
440 * No file movement - This is a regression from 0.10, which moved connection files
440 * No file movement - This is a regression from 0.10, which moved connection files
441 around with scp. This will be improved, Pull Requests are welcome.
441 around with scp. This will be improved, Pull Requests are welcome.
442
442
443
443
444 IPython on EC2 with StarCluster
444 IPython on EC2 with StarCluster
445 ===============================
445 ===============================
446
446
447 The excellent StarCluster_ toolkit for managing `Amazon EC2`_ clusters has a plugin
447 The excellent StarCluster_ toolkit for managing `Amazon EC2`_ clusters has a plugin
448 which makes deploying IPython on EC2 quite simple. The starcluster plugin uses
448 which makes deploying IPython on EC2 quite simple. The starcluster plugin uses
449 :command:`ipcluster` with the SGE launchers to distribute engines across the
449 :command:`ipcluster` with the SGE launchers to distribute engines across the
450 EC2 cluster. See their `ipcluster plugin documentation`_ for more information.
450 EC2 cluster. See their `ipcluster plugin documentation`_ for more information.
451
451
452 .. _StarCluster: http://web.mit.edu/starcluster
452 .. _StarCluster: http://web.mit.edu/starcluster
453 .. _Amazon EC2: http://aws.amazon.com/ec2/
453 .. _Amazon EC2: http://aws.amazon.com/ec2/
454 .. _ipcluster plugin documentation: http://web.mit.edu/starcluster/docs/latest/plugins/ipython.html
454 .. _ipcluster plugin documentation: http://web.mit.edu/starcluster/docs/latest/plugins/ipython.html
455
455
456
456
457 Using the :command:`ipcontroller` and :command:`ipengine` commands
457 Using the :command:`ipcontroller` and :command:`ipengine` commands
458 ==================================================================
458 ==================================================================
459
459
460 It is also possible to use the :command:`ipcontroller` and :command:`ipengine`
460 It is also possible to use the :command:`ipcontroller` and :command:`ipengine`
461 commands to start your controller and engines. This approach gives you full
461 commands to start your controller and engines. This approach gives you full
462 control over all aspects of the startup process.
462 control over all aspects of the startup process.
463
463
464 Starting the controller and engine on your local machine
464 Starting the controller and engine on your local machine
465 --------------------------------------------------------
465 --------------------------------------------------------
466
466
467 To use :command:`ipcontroller` and :command:`ipengine` to start things on your
467 To use :command:`ipcontroller` and :command:`ipengine` to start things on your
468 local machine, do the following.
468 local machine, do the following.
469
469
470 First start the controller::
470 First start the controller::
471
471
472 $ ipcontroller
472 $ ipcontroller
473
473
474 Next, start however many instances of the engine you want using (repeatedly)
474 Next, start however many instances of the engine you want using (repeatedly)
475 the command::
475 the command::
476
476
477 $ ipengine
477 $ ipengine
478
478
479 The engines should start and automatically connect to the controller using the
479 The engines should start and automatically connect to the controller using the
480 JSON files in :file:`~/.ipython/profile_default/security`. You are now ready to use the
480 JSON files in :file:`~/.ipython/profile_default/security`. You are now ready to use the
481 controller and engines from IPython.
481 controller and engines from IPython.
482
482
483 .. warning::
483 .. warning::
484
484
485 The order of the above operations may be important. You *must*
485 The order of the above operations may be important. You *must*
486 start the controller before the engines, unless you are reusing connection
486 start the controller before the engines, unless you are reusing connection
487 information (via ``--reuse``), in which case ordering is not important.
487 information (via ``--reuse``), in which case ordering is not important.
488
488
489 .. note::
489 .. note::
490
490
491 On some platforms (OS X), to put the controller and engine into the
491 On some platforms (OS X), to put the controller and engine into the
492 background you may need to give these commands in the form ``(ipcontroller
492 background you may need to give these commands in the form ``(ipcontroller
493 &)`` and ``(ipengine &)`` (with the parentheses) for them to work
493 &)`` and ``(ipengine &)`` (with the parentheses) for them to work
494 properly.
494 properly.
495
495
496 Starting the controller and engines on different hosts
496 Starting the controller and engines on different hosts
497 ------------------------------------------------------
497 ------------------------------------------------------
498
498
499 When the controller and engines are running on different hosts, things are
499 When the controller and engines are running on different hosts, things are
500 slightly more complicated, but the underlying ideas are the same:
500 slightly more complicated, but the underlying ideas are the same:
501
501
502 1. Start the controller on a host using :command:`ipcontroller`. The controller must be
502 1. Start the controller on a host using :command:`ipcontroller`. The controller must be
503 instructed to listen on an interface visible to the engine machines, via the ``ip``
503 instructed to listen on an interface visible to the engine machines, via the ``ip``
504 command-line argument or ``HubFactory.ip`` in :file:`ipcontroller_config.py`::
504 command-line argument or ``HubFactory.ip`` in :file:`ipcontroller_config.py`::
505
505
506 $ ipcontroller --ip=192.168.1.16
506 $ ipcontroller --ip=192.168.1.16
507
507
508 .. sourcecode:: python
508 .. sourcecode:: python
509
509
510 # in ipcontroller_config.py
510 # in ipcontroller_config.py
511 HubFactory.ip = '192.168.1.16'
511 HubFactory.ip = '192.168.1.16'
512
512
513 2. Copy :file:`ipcontroller-engine.json` from :file:`~/.ipython/profile_<name>/security` on
513 2. Copy :file:`ipcontroller-engine.json` from :file:`~/.ipython/profile_<name>/security` on
514 the controller's host to the host where the engines will run.
514 the controller's host to the host where the engines will run.
515 3. Use :command:`ipengine` on the engine's hosts to start the engines.
515 3. Use :command:`ipengine` on the engine's hosts to start the engines.
516
516
517 The only thing you have to be careful of is to tell :command:`ipengine` where
517 The only thing you have to be careful of is to tell :command:`ipengine` where
518 the :file:`ipcontroller-engine.json` file is located. There are two ways you
518 the :file:`ipcontroller-engine.json` file is located. There are two ways you
519 can do this:
519 can do this:
520
520
521 * Put :file:`ipcontroller-engine.json` in the :file:`~/.ipython/profile_<name>/security`
521 * Put :file:`ipcontroller-engine.json` in the :file:`~/.ipython/profile_<name>/security`
522 directory on the engine's host, where it will be found automatically.
522 directory on the engine's host, where it will be found automatically.
523 * Call :command:`ipengine` with the ``--file=full_path_to_the_file``
523 * Call :command:`ipengine` with the ``--file=full_path_to_the_file``
524 flag.
524 flag.
525
525
526 The ``file`` flag works like this::
526 The ``file`` flag works like this::
527
527
528 $ ipengine --file=/path/to/my/ipcontroller-engine.json
528 $ ipengine --file=/path/to/my/ipcontroller-engine.json
529
529
530 .. note::
530 .. note::
531
531
532 If the controller's and engine's hosts all have a shared file system
532 If the controller's and engine's hosts all have a shared file system
533 (:file:`~/.ipython/profile_<name>/security` is the same on all of them), then things
533 (:file:`~/.ipython/profile_<name>/security` is the same on all of them), then things
534 will just work!
534 will just work!
535
535
536 SSH Tunnels
536 SSH Tunnels
537 ***********
537 ***********
538
538
539 If your engines are not on the same LAN as the controller, or you are on a highly
539 If your engines are not on the same LAN as the controller, or you are on a highly
540 restricted network where your nodes cannot see each others ports, then you can
540 restricted network where your nodes cannot see each others ports, then you can
541 use SSH tunnels to connect engines to the controller.
541 use SSH tunnels to connect engines to the controller.
542
542
543 .. note::
543 .. note::
544
544
545 This does not work in all cases. Manual tunnels may be an option, but are
545 This does not work in all cases. Manual tunnels may be an option, but are
546 highly inconvenient. Support for manual tunnels will be improved.
546 highly inconvenient. Support for manual tunnels will be improved.
547
547
548 You can instruct all engines to use ssh, by specifying the ssh server in
548 You can instruct all engines to use ssh, by specifying the ssh server in
549 :file:`ipcontroller-engine.json`:
549 :file:`ipcontroller-engine.json`:
550
550
551 .. I know this is really JSON, but the example is a subset of Python:
551 .. I know this is really JSON, but the example is a subset of Python:
552 .. sourcecode:: python
552 .. sourcecode:: python
553
553
554 {
554 {
555 "url":"tcp://192.168.1.123:56951",
555 "url":"tcp://192.168.1.123:56951",
556 "exec_key":"26f4c040-587d-4a4e-b58b-030b96399584",
556 "exec_key":"26f4c040-587d-4a4e-b58b-030b96399584",
557 "ssh":"user@example.com",
557 "ssh":"user@example.com",
558 "location":"192.168.1.123"
558 "location":"192.168.1.123"
559 }
559 }
560
560
561 This will be specified if you give the ``--enginessh=use@example.com`` argument when
561 This will be specified if you give the ``--enginessh=use@example.com`` argument when
562 starting :command:`ipcontroller`.
562 starting :command:`ipcontroller`.
563
563
564 Or you can specify an ssh server on the command-line when starting an engine::
564 Or you can specify an ssh server on the command-line when starting an engine::
565
565
566 $> ipengine --profile=foo --ssh=my.login.node
566 $> ipengine --profile=foo --ssh=my.login.node
567
567
568 For example, if your system is totally restricted, then all connections will actually be
568 For example, if your system is totally restricted, then all connections will actually be
569 loopback, and ssh tunnels will be used to connect engines to the controller::
569 loopback, and ssh tunnels will be used to connect engines to the controller::
570
570
571 [node1] $> ipcontroller --enginessh=node1
571 [node1] $> ipcontroller --enginessh=node1
572 [node2] $> ipengine
572 [node2] $> ipengine
573 [node3] $> ipcluster engines --n=4
573 [node3] $> ipcluster engines --n=4
574
574
575 Or if you want to start many engines on each node, the command `ipcluster engines --n=4`
575 Or if you want to start many engines on each node, the command `ipcluster engines --n=4`
576 without any configuration is equivalent to running ipengine 4 times.
576 without any configuration is equivalent to running ipengine 4 times.
577
577
578 An example using ipcontroller/engine with ssh
578 An example using ipcontroller/engine with ssh
579 ---------------------------------------------
579 ---------------------------------------------
580
580
581 No configuration files are necessary to use ipcontroller/engine in an SSH environment
581 No configuration files are necessary to use ipcontroller/engine in an SSH environment
582 without a shared filesystem. You simply need to make sure that the controller is listening
582 without a shared filesystem. You simply need to make sure that the controller is listening
583 on an interface visible to the engines, and move the connection file from the controller to
583 on an interface visible to the engines, and move the connection file from the controller to
584 the engines.
584 the engines.
585
585
586 1. start the controller, listening on an ip-address visible to the engine machines::
586 1. start the controller, listening on an ip-address visible to the engine machines::
587
587
588 [controller.host] $ ipcontroller --ip=192.168.1.16
588 [controller.host] $ ipcontroller --ip=192.168.1.16
589
589
590 [IPControllerApp] Using existing profile dir: u'/Users/me/.ipython/profile_default'
590 [IPControllerApp] Using existing profile dir: u'/Users/me/.ipython/profile_default'
591 [IPControllerApp] Hub listening on tcp://192.168.1.16:63320 for registration.
591 [IPControllerApp] Hub listening on tcp://192.168.1.16:63320 for registration.
592 [IPControllerApp] Hub using DB backend: 'IPython.parallel.controller.dictdb.DictDB'
592 [IPControllerApp] Hub using DB backend: 'IPython.parallel.controller.dictdb.DictDB'
593 [IPControllerApp] hub::created hub
593 [IPControllerApp] hub::created hub
594 [IPControllerApp] writing connection info to /Users/me/.ipython/profile_default/security/ipcontroller-client.json
594 [IPControllerApp] writing connection info to /Users/me/.ipython/profile_default/security/ipcontroller-client.json
595 [IPControllerApp] writing connection info to /Users/me/.ipython/profile_default/security/ipcontroller-engine.json
595 [IPControllerApp] writing connection info to /Users/me/.ipython/profile_default/security/ipcontroller-engine.json
596 [IPControllerApp] task::using Python leastload Task scheduler
596 [IPControllerApp] task::using Python leastload Task scheduler
597 [IPControllerApp] Heartmonitor started
597 [IPControllerApp] Heartmonitor started
598 [IPControllerApp] Creating pid file: /Users/me/.ipython/profile_default/pid/ipcontroller.pid
598 [IPControllerApp] Creating pid file: /Users/me/.ipython/profile_default/pid/ipcontroller.pid
599 Scheduler started [leastload]
599 Scheduler started [leastload]
600
600
601 2. on each engine, fetch the connection file with scp::
601 2. on each engine, fetch the connection file with scp::
602
602
603 [engine.host.n] $ scp controller.host:.ipython/profile_default/security/ipcontroller-engine.json ./
603 [engine.host.n] $ scp controller.host:.ipython/profile_default/security/ipcontroller-engine.json ./
604
604
605 .. note::
605 .. note::
606
606
607 The log output of ipcontroller above shows you where the json files were written.
607 The log output of ipcontroller above shows you where the json files were written.
608 They will be in :file:`~/.ipython` (or :file:`~/.config/ipython`) under
608 They will be in :file:`~/.ipython` (or :file:`~/.config/ipython`) under
609 :file:`profile_default/security/ipcontroller-engine.json`
609 :file:`profile_default/security/ipcontroller-engine.json`
610
610
611 3. start the engines, using the connection file::
611 3. start the engines, using the connection file::
612
612
613 [engine.host.n] $ ipengine --file=./ipcontroller-engine.json
613 [engine.host.n] $ ipengine --file=./ipcontroller-engine.json
614
614
615 A couple of notes:
615 A couple of notes:
616
616
617 * You can avoid having to fetch the connection file every time by adding ``--reuse`` flag
617 * You can avoid having to fetch the connection file every time by adding ``--reuse`` flag
618 to ipcontroller, which instructs the controller to read the previous connection file for
618 to ipcontroller, which instructs the controller to read the previous connection file for
619 connection info, rather than generate a new one with randomized ports.
619 connection info, rather than generate a new one with randomized ports.
620
620
621 * In step 2, if you fetch the connection file directly into the security dir of a profile,
621 * In step 2, if you fetch the connection file directly into the security dir of a profile,
622 then you need not specify its path directly, only the profile (assumes the path exists,
622 then you need not specify its path directly, only the profile (assumes the path exists,
623 otherwise you must create it first)::
623 otherwise you must create it first)::
624
624
625 [engine.host.n] $ scp controller.host:.ipython/profile_default/security/ipcontroller-engine.json ~/.ipython/profile_ssh/security/
625 [engine.host.n] $ scp controller.host:.ipython/profile_default/security/ipcontroller-engine.json ~/.ipython/profile_ssh/security/
626 [engine.host.n] $ ipengine --profile=ssh
626 [engine.host.n] $ ipengine --profile=ssh
627
627
628 Of course, if you fetch the file into the default profile, no arguments must be passed to
628 Of course, if you fetch the file into the default profile, no arguments must be passed to
629 ipengine at all.
629 ipengine at all.
630
630
631 * Note that ipengine *did not* specify the ip argument. In general, it is unlikely for any
631 * Note that ipengine *did not* specify the ip argument. In general, it is unlikely for any
632 connection information to be specified at the command-line to ipengine, as all of this
632 connection information to be specified at the command-line to ipengine, as all of this
633 information should be contained in the connection file written by ipcontroller.
633 information should be contained in the connection file written by ipcontroller.
634
634
635 Make JSON files persistent
635 Make JSON files persistent
636 --------------------------
636 --------------------------
637
637
638 At fist glance it may seem that that managing the JSON files is a bit
638 At fist glance it may seem that that managing the JSON files is a bit
639 annoying. Going back to the house and key analogy, copying the JSON around
639 annoying. Going back to the house and key analogy, copying the JSON around
640 each time you start the controller is like having to make a new key every time
640 each time you start the controller is like having to make a new key every time
641 you want to unlock the door and enter your house. As with your house, you want
641 you want to unlock the door and enter your house. As with your house, you want
642 to be able to create the key (or JSON file) once, and then simply use it at
642 to be able to create the key (or JSON file) once, and then simply use it at
643 any point in the future.
643 any point in the future.
644
644
645 To do this, the only thing you have to do is specify the `--reuse` flag, so that
645 To do this, the only thing you have to do is specify the `--reuse` flag, so that
646 the connection information in the JSON files remains accurate::
646 the connection information in the JSON files remains accurate::
647
647
648 $ ipcontroller --reuse
648 $ ipcontroller --reuse
649
649
650 Then, just copy the JSON files over the first time and you are set. You can
650 Then, just copy the JSON files over the first time and you are set. You can
651 start and stop the controller and engines any many times as you want in the
651 start and stop the controller and engines any many times as you want in the
652 future, just make sure to tell the controller to reuse the file.
652 future, just make sure to tell the controller to reuse the file.
653
653
654 .. note::
654 .. note::
655
655
656 You may ask the question: what ports does the controller listen on if you
656 You may ask the question: what ports does the controller listen on if you
657 don't tell is to use specific ones? The default is to use high random port
657 don't tell is to use specific ones? The default is to use high random port
658 numbers. We do this for two reasons: i) to increase security through
658 numbers. We do this for two reasons: i) to increase security through
659 obscurity and ii) to multiple controllers on a given host to start and
659 obscurity and ii) to multiple controllers on a given host to start and
660 automatically use different ports.
660 automatically use different ports.
661
661
662 Log files
662 Log files
663 ---------
663 ---------
664
664
665 All of the components of IPython have log files associated with them.
665 All of the components of IPython have log files associated with them.
666 These log files can be extremely useful in debugging problems with
666 These log files can be extremely useful in debugging problems with
667 IPython and can be found in the directory :file:`~/.ipython/profile_<name>/log`.
667 IPython and can be found in the directory :file:`~/.ipython/profile_<name>/log`.
668 Sending the log files to us will often help us to debug any problems.
668 Sending the log files to us will often help us to debug any problems.
669
669
670
670
671 Configuring `ipcontroller`
671 Configuring `ipcontroller`
672 ---------------------------
672 ---------------------------
673
673
674 The IPython Controller takes its configuration from the file :file:`ipcontroller_config.py`
674 The IPython Controller takes its configuration from the file :file:`ipcontroller_config.py`
675 in the active profile directory.
675 in the active profile directory.
676
676
677 Ports and addresses
677 Ports and addresses
678 *******************
678 *******************
679
679
680 In many cases, you will want to configure the Controller's network identity. By default,
680 In many cases, you will want to configure the Controller's network identity. By default,
681 the Controller listens only on loopback, which is the most secure but often impractical.
681 the Controller listens only on loopback, which is the most secure but often impractical.
682 To instruct the controller to listen on a specific interface, you can set the
682 To instruct the controller to listen on a specific interface, you can set the
683 :attr:`HubFactory.ip` trait. To listen on all interfaces, simply specify:
683 :attr:`HubFactory.ip` trait. To listen on all interfaces, simply specify:
684
684
685 .. sourcecode:: python
685 .. sourcecode:: python
686
686
687 c.HubFactory.ip = '*'
687 c.HubFactory.ip = '*'
688
688
689 When connecting to a Controller that is listening on loopback or behind a firewall, it may
689 When connecting to a Controller that is listening on loopback or behind a firewall, it may
690 be necessary to specify an SSH server to use for tunnels, and the external IP of the
690 be necessary to specify an SSH server to use for tunnels, and the external IP of the
691 Controller. If you specified that the HubFactory listen on loopback, or all interfaces,
691 Controller. If you specified that the HubFactory listen on loopback, or all interfaces,
692 then IPython will try to guess the external IP. If you are on a system with VM network
692 then IPython will try to guess the external IP. If you are on a system with VM network
693 devices, or many interfaces, this guess may be incorrect. In these cases, you will want
693 devices, or many interfaces, this guess may be incorrect. In these cases, you will want
694 to specify the 'location' of the Controller. This is the IP of the machine the Controller
694 to specify the 'location' of the Controller. This is the IP of the machine the Controller
695 is on, as seen by the clients, engines, or the SSH server used to tunnel connections.
695 is on, as seen by the clients, engines, or the SSH server used to tunnel connections.
696
696
697 For example, to set up a cluster with a Controller on a work node, using ssh tunnels
697 For example, to set up a cluster with a Controller on a work node, using ssh tunnels
698 through the login node, an example :file:`ipcontroller_config.py` might contain:
698 through the login node, an example :file:`ipcontroller_config.py` might contain:
699
699
700 .. sourcecode:: python
700 .. sourcecode:: python
701
701
702 # allow connections on all interfaces from engines
702 # allow connections on all interfaces from engines
703 # engines on the same node will use loopback, while engines
703 # engines on the same node will use loopback, while engines
704 # from other nodes will use an external IP
704 # from other nodes will use an external IP
705 c.HubFactory.ip = '*'
705 c.HubFactory.ip = '*'
706
706
707 # you typically only need to specify the location when there are extra
707 # you typically only need to specify the location when there are extra
708 # interfaces that may not be visible to peer nodes (e.g. VM interfaces)
708 # interfaces that may not be visible to peer nodes (e.g. VM interfaces)
709 c.HubFactory.location = '10.0.1.5'
709 c.HubFactory.location = '10.0.1.5'
710 # or to get an automatic value, try this:
710 # or to get an automatic value, try this:
711 import socket
711 import socket
712 ex_ip = socket.gethostbyname_ex(socket.gethostname())[-1][0]
712 ex_ip = socket.gethostbyname_ex(socket.gethostname())[-1][0]
713 c.HubFactory.location = ex_ip
713 c.HubFactory.location = ex_ip
714
714
715 # now instruct clients to use the login node for SSH tunnels:
715 # now instruct clients to use the login node for SSH tunnels:
716 c.HubFactory.ssh_server = 'login.mycluster.net'
716 c.HubFactory.ssh_server = 'login.mycluster.net'
717
717
718 After doing this, your :file:`ipcontroller-client.json` file will look something like this:
718 After doing this, your :file:`ipcontroller-client.json` file will look something like this:
719
719
720 .. this can be Python, despite the fact that it's actually JSON, because it's
720 .. this can be Python, despite the fact that it's actually JSON, because it's
721 .. still valid Python
721 .. still valid Python
722
722
723 .. sourcecode:: python
723 .. sourcecode:: python
724
724
725 {
725 {
726 "url":"tcp:\/\/*:43447",
726 "url":"tcp:\/\/*:43447",
727 "exec_key":"9c7779e4-d08a-4c3b-ba8e-db1f80b562c1",
727 "exec_key":"9c7779e4-d08a-4c3b-ba8e-db1f80b562c1",
728 "ssh":"login.mycluster.net",
728 "ssh":"login.mycluster.net",
729 "location":"10.0.1.5"
729 "location":"10.0.1.5"
730 }
730 }
731
731
732 Then this file will be all you need for a client to connect to the controller, tunneling
732 Then this file will be all you need for a client to connect to the controller, tunneling
733 SSH connections through login.mycluster.net.
733 SSH connections through login.mycluster.net.
734
734
735 Database Backend
735 Database Backend
736 ****************
736 ****************
737
737
738 The Hub stores all messages and results passed between Clients and Engines.
738 The Hub stores all messages and results passed between Clients and Engines.
739 For large and/or long-running clusters, it would be unreasonable to keep all
739 For large and/or long-running clusters, it would be unreasonable to keep all
740 of this information in memory. For this reason, we have two database backends:
740 of this information in memory. For this reason, we have two database backends:
741 [MongoDB]_ via PyMongo_, and SQLite with the stdlib :py:mod:`sqlite`.
741 [MongoDB]_ via PyMongo_, and SQLite with the stdlib :py:mod:`sqlite`.
742
742
743 MongoDB is our design target, and the dict-like model it uses has driven our design. As far
743 MongoDB is our design target, and the dict-like model it uses has driven our design. As far
744 as we are concerned, BSON can be considered essentially the same as JSON, adding support
744 as we are concerned, BSON can be considered essentially the same as JSON, adding support
745 for binary data and datetime objects, and any new database backend must support the same
745 for binary data and datetime objects, and any new database backend must support the same
746 data types.
746 data types.
747
747
748 .. seealso::
748 .. seealso::
749
749
750 MongoDB `BSON doc <http://www.mongodb.org/display/DOCS/BSON>`_
750 MongoDB `BSON doc <http://www.mongodb.org/display/DOCS/BSON>`_
751
751
752 To use one of these backends, you must set the :attr:`HubFactory.db_class` trait:
752 To use one of these backends, you must set the :attr:`HubFactory.db_class` trait:
753
753
754 .. sourcecode:: python
754 .. sourcecode:: python
755
755
756 # for a simple dict-based in-memory implementation, use dictdb
756 # for a simple dict-based in-memory implementation, use dictdb
757 # This is the default and the fastest, since it doesn't involve the filesystem
757 # This is the default and the fastest, since it doesn't involve the filesystem
758 c.HubFactory.db_class = 'IPython.parallel.controller.dictdb.DictDB'
758 c.HubFactory.db_class = 'IPython.parallel.controller.dictdb.DictDB'
759
759
760 # To use MongoDB:
760 # To use MongoDB:
761 c.HubFactory.db_class = 'IPython.parallel.controller.mongodb.MongoDB'
761 c.HubFactory.db_class = 'IPython.parallel.controller.mongodb.MongoDB'
762
762
763 # and SQLite:
763 # and SQLite:
764 c.HubFactory.db_class = 'IPython.parallel.controller.sqlitedb.SQLiteDB'
764 c.HubFactory.db_class = 'IPython.parallel.controller.sqlitedb.SQLiteDB'
765
766 # You can use NoDB to disable the database altogether, in case you don't need
767 # to reuse tasks or results, and want to keep memory consumption under control.
768 c.HubFactory.db_class = 'IPython.parallel.controller.dictdb.NoDB'
765
769
766 When using the proper databases, you can actually allow for tasks to persist from
770 When using the proper databases, you can actually allow for tasks to persist from
767 one session to the next by specifying the MongoDB database or SQLite table in
771 one session to the next by specifying the MongoDB database or SQLite table in
768 which tasks are to be stored. The default is to use a table named for the Hub's Session,
772 which tasks are to be stored. The default is to use a table named for the Hub's Session,
769 which is a UUID, and thus different every time.
773 which is a UUID, and thus different every time.
770
774
771 .. sourcecode:: python
775 .. sourcecode:: python
772
776
773 # To keep persistant task history in MongoDB:
777 # To keep persistant task history in MongoDB:
774 c.MongoDB.database = 'tasks'
778 c.MongoDB.database = 'tasks'
775
779
776 # and in SQLite:
780 # and in SQLite:
777 c.SQLiteDB.table = 'tasks'
781 c.SQLiteDB.table = 'tasks'
778
782
779
783
780 Since MongoDB servers can be running remotely or configured to listen on a particular port,
784 Since MongoDB servers can be running remotely or configured to listen on a particular port,
781 you can specify any arguments you may need to the PyMongo `Connection
785 you can specify any arguments you may need to the PyMongo `Connection
782 <http://api.mongodb.org/python/1.9/api/pymongo/connection.html#pymongo.connection.Connection>`_:
786 <http://api.mongodb.org/python/1.9/api/pymongo/connection.html#pymongo.connection.Connection>`_:
783
787
784 .. sourcecode:: python
788 .. sourcecode:: python
785
789
786 # positional args to pymongo.Connection
790 # positional args to pymongo.Connection
787 c.MongoDB.connection_args = []
791 c.MongoDB.connection_args = []
788
792
789 # keyword args to pymongo.Connection
793 # keyword args to pymongo.Connection
790 c.MongoDB.connection_kwargs = {}
794 c.MongoDB.connection_kwargs = {}
791
795
796 But sometimes you are moving lots of data around quickly, and you don't need
797 that information to be stored for later access, even by other Clients to this
798 same session. For this case, we have a dummy database, which doesn't actually
799 store anything. This lets the Hub stay small in memory, at the obvious expense
800 of being able to access the information that would have been stored in the
801 database (used for task resubmission, requesting results of tasks you didn't
802 submit, etc.). To use this backend, simply pass ``--nodb`` to
803 :command:`ipcontroller` on the command-line, or specify the :class:`NoDB` class
804 in your :file:`ipcontroller_config.py` as described above.
805
806
807 .. seealso::
808
809 For more information on the database backends, see the :ref:`db backend reference <parallel_db>`.
810
811
792 .. _PyMongo: http://api.mongodb.org/python/1.9/
812 .. _PyMongo: http://api.mongodb.org/python/1.9/
793
813
794 Configuring `ipengine`
814 Configuring `ipengine`
795 -----------------------
815 -----------------------
796
816
797 The IPython Engine takes its configuration from the file :file:`ipengine_config.py`
817 The IPython Engine takes its configuration from the file :file:`ipengine_config.py`
798
818
799 The Engine itself also has some amount of configuration. Most of this
819 The Engine itself also has some amount of configuration. Most of this
800 has to do with initializing MPI or connecting to the controller.
820 has to do with initializing MPI or connecting to the controller.
801
821
802 To instruct the Engine to initialize with an MPI environment set up by
822 To instruct the Engine to initialize with an MPI environment set up by
803 mpi4py, add:
823 mpi4py, add:
804
824
805 .. sourcecode:: python
825 .. sourcecode:: python
806
826
807 c.MPI.use = 'mpi4py'
827 c.MPI.use = 'mpi4py'
808
828
809 In this case, the Engine will use our default mpi4py init script to set up
829 In this case, the Engine will use our default mpi4py init script to set up
810 the MPI environment prior to exection. We have default init scripts for
830 the MPI environment prior to exection. We have default init scripts for
811 mpi4py and pytrilinos. If you want to specify your own code to be run
831 mpi4py and pytrilinos. If you want to specify your own code to be run
812 at the beginning, specify `c.MPI.init_script`.
832 at the beginning, specify `c.MPI.init_script`.
813
833
814 You can also specify a file or python command to be run at startup of the
834 You can also specify a file or python command to be run at startup of the
815 Engine:
835 Engine:
816
836
817 .. sourcecode:: python
837 .. sourcecode:: python
818
838
819 c.IPEngineApp.startup_script = u'/path/to/my/startup.py'
839 c.IPEngineApp.startup_script = u'/path/to/my/startup.py'
820
840
821 c.IPEngineApp.startup_command = 'import numpy, scipy, mpi4py'
841 c.IPEngineApp.startup_command = 'import numpy, scipy, mpi4py'
822
842
823 These commands/files will be run again, after each
843 These commands/files will be run again, after each
824
844
825 It's also useful on systems with shared filesystems to run the engines
845 It's also useful on systems with shared filesystems to run the engines
826 in some scratch directory. This can be set with:
846 in some scratch directory. This can be set with:
827
847
828 .. sourcecode:: python
848 .. sourcecode:: python
829
849
830 c.IPEngineApp.work_dir = u'/path/to/scratch/'
850 c.IPEngineApp.work_dir = u'/path/to/scratch/'
831
851
832
852
833
853
834 .. [MongoDB] MongoDB database http://www.mongodb.org
854 .. [MongoDB] MongoDB database http://www.mongodb.org
835
855
836 .. [PBS] Portable Batch System http://www.openpbs.org
856 .. [PBS] Portable Batch System http://www.openpbs.org
837
857
838 .. [SSH] SSH-Agent http://en.wikipedia.org/wiki/ssh-agent
858 .. [SSH] SSH-Agent http://en.wikipedia.org/wiki/ssh-agent
General Comments 0
You need to be logged in to leave comments. Login now