##// END OF EJS Templates
parallel docs, tests, default config updated to newconfig
MinRK -
Show More
@@ -1,544 +1,544 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 # encoding: utf-8
2 # encoding: utf-8
3 """
3 """
4 The IPython cluster directory
4 The IPython cluster directory
5 """
5 """
6
6
7 #-----------------------------------------------------------------------------
7 #-----------------------------------------------------------------------------
8 # Copyright (C) 2008-2009 The IPython Development Team
8 # Copyright (C) 2008-2009 The IPython Development Team
9 #
9 #
10 # Distributed under the terms of the BSD License. The full license is in
10 # Distributed under the terms of the BSD License. The full license is in
11 # the file COPYING, distributed as part of this software.
11 # the file COPYING, distributed as part of this software.
12 #-----------------------------------------------------------------------------
12 #-----------------------------------------------------------------------------
13
13
14 #-----------------------------------------------------------------------------
14 #-----------------------------------------------------------------------------
15 # Imports
15 # Imports
16 #-----------------------------------------------------------------------------
16 #-----------------------------------------------------------------------------
17
17
18 from __future__ import with_statement
18 from __future__ import with_statement
19
19
20 import os
20 import os
21 import logging
21 import logging
22 import re
22 import re
23 import shutil
23 import shutil
24 import sys
24 import sys
25
25
26 from subprocess import Popen, PIPE
26 from subprocess import Popen, PIPE
27
27
28 from IPython.config.loader import PyFileConfigLoader, Config
28 from IPython.config.loader import PyFileConfigLoader, Config
29 from IPython.config.configurable import Configurable
29 from IPython.config.configurable import Configurable
30 from IPython.config.application import Application
30 from IPython.config.application import Application
31 from IPython.core.crashhandler import CrashHandler
31 from IPython.core.crashhandler import CrashHandler
32 from IPython.core.newapplication import BaseIPythonApplication
32 from IPython.core.newapplication import BaseIPythonApplication
33 from IPython.core import release
33 from IPython.core import release
34 from IPython.utils.path import (
34 from IPython.utils.path import (
35 get_ipython_package_dir,
35 get_ipython_package_dir,
36 get_ipython_dir,
36 get_ipython_dir,
37 expand_path
37 expand_path
38 )
38 )
39 from IPython.utils.traitlets import Unicode, Bool, Instance, Dict
39 from IPython.utils.traitlets import Unicode, Bool, Instance, Dict
40
40
41 #-----------------------------------------------------------------------------
41 #-----------------------------------------------------------------------------
42 # Module errors
42 # Module errors
43 #-----------------------------------------------------------------------------
43 #-----------------------------------------------------------------------------
44
44
45 class ClusterDirError(Exception):
45 class ClusterDirError(Exception):
46 pass
46 pass
47
47
48
48
49 class PIDFileError(Exception):
49 class PIDFileError(Exception):
50 pass
50 pass
51
51
52
52
53 #-----------------------------------------------------------------------------
53 #-----------------------------------------------------------------------------
54 # Class for managing cluster directories
54 # Class for managing cluster directories
55 #-----------------------------------------------------------------------------
55 #-----------------------------------------------------------------------------
56
56
57 class ClusterDir(Configurable):
57 class ClusterDir(Configurable):
58 """An object to manage the cluster directory and its resources.
58 """An object to manage the cluster directory and its resources.
59
59
60 The cluster directory is used by :command:`ipengine`,
60 The cluster directory is used by :command:`ipengine`,
61 :command:`ipcontroller` and :command:`ipclsuter` to manage the
61 :command:`ipcontroller` and :command:`ipclsuter` to manage the
62 configuration, logging and security of these applications.
62 configuration, logging and security of these applications.
63
63
64 This object knows how to find, create and manage these directories. This
64 This object knows how to find, create and manage these directories. This
65 should be used by any code that want's to handle cluster directories.
65 should be used by any code that want's to handle cluster directories.
66 """
66 """
67
67
68 security_dir_name = Unicode('security')
68 security_dir_name = Unicode('security')
69 log_dir_name = Unicode('log')
69 log_dir_name = Unicode('log')
70 pid_dir_name = Unicode('pid')
70 pid_dir_name = Unicode('pid')
71 security_dir = Unicode(u'')
71 security_dir = Unicode(u'')
72 log_dir = Unicode(u'')
72 log_dir = Unicode(u'')
73 pid_dir = Unicode(u'')
73 pid_dir = Unicode(u'')
74
74
75 auto_create = Bool(False,
75 auto_create = Bool(False,
76 help="""Whether to automatically create the ClusterDirectory if it does
76 help="""Whether to automatically create the ClusterDirectory if it does
77 not exist""")
77 not exist""")
78 overwrite = Bool(False,
78 overwrite = Bool(False,
79 help="""Whether to overwrite existing config files""")
79 help="""Whether to overwrite existing config files""")
80 location = Unicode(u'', config=True,
80 location = Unicode(u'', config=True,
81 help="""Set the cluster dir. This overrides the logic used by the
81 help="""Set the cluster dir. This overrides the logic used by the
82 `profile` option.""",
82 `profile` option.""",
83 )
83 )
84 profile = Unicode(u'default', config=True,
84 profile = Unicode(u'default', config=True,
85 help="""The string name of the profile to be used. This determines the name
85 help="""The string name of the profile to be used. This determines the name
86 of the cluster dir as: cluster_<profile>. The default profile is named
86 of the cluster dir as: cluster_<profile>. The default profile is named
87 'default'. The cluster directory is resolve this way if the
87 'default'. The cluster directory is resolve this way if the
88 `cluster_dir` option is not used."""
88 `cluster_dir` option is not used."""
89 )
89 )
90
90
91 _location_isset = Bool(False) # flag for detecting multiply set location
91 _location_isset = Bool(False) # flag for detecting multiply set location
92 _new_dir = Bool(False) # flag for whether a new dir was created
92 _new_dir = Bool(False) # flag for whether a new dir was created
93
93
94 def __init__(self, **kwargs):
94 def __init__(self, **kwargs):
95 # make sure auto_create,overwrite are set *before* location
95 # make sure auto_create,overwrite are set *before* location
96 for name in ('auto_create', 'overwrite'):
96 for name in ('auto_create', 'overwrite'):
97 v = kwargs.pop(name, None)
97 v = kwargs.pop(name, None)
98 if v is not None:
98 if v is not None:
99 setattr(self, name, v)
99 setattr(self, name, v)
100 super(ClusterDir, self).__init__(**kwargs)
100 super(ClusterDir, self).__init__(**kwargs)
101 if not self.location:
101 if not self.location:
102 self._profile_changed('profile', 'default', self.profile)
102 self._profile_changed('profile', 'default', self.profile)
103
103
104 def _location_changed(self, name, old, new):
104 def _location_changed(self, name, old, new):
105 if self._location_isset:
105 if self._location_isset:
106 raise RuntimeError("Cannot set ClusterDir more than once.")
106 raise RuntimeError("Cannot set ClusterDir more than once.")
107 self._location_isset = True
107 self._location_isset = True
108 if not os.path.isdir(new):
108 if not os.path.isdir(new):
109 if self.auto_create:# or self.config.ClusterDir.auto_create:
109 if self.auto_create:# or self.config.ClusterDir.auto_create:
110 os.makedirs(new)
110 os.makedirs(new)
111 self._new_dir = True
111 self._new_dir = True
112 else:
112 else:
113 raise ClusterDirError('Directory not found: %s' % new)
113 raise ClusterDirError('Directory not found: %s' % new)
114
114
115 # ensure config files exist:
115 # ensure config files exist:
116 self.copy_all_config_files(overwrite=self.overwrite)
116 self.copy_all_config_files(overwrite=self.overwrite)
117 self.security_dir = os.path.join(new, self.security_dir_name)
117 self.security_dir = os.path.join(new, self.security_dir_name)
118 self.log_dir = os.path.join(new, self.log_dir_name)
118 self.log_dir = os.path.join(new, self.log_dir_name)
119 self.pid_dir = os.path.join(new, self.pid_dir_name)
119 self.pid_dir = os.path.join(new, self.pid_dir_name)
120 self.check_dirs()
120 self.check_dirs()
121
121
122 def _profile_changed(self, name, old, new):
122 def _profile_changed(self, name, old, new):
123 if self._location_isset:
123 if self._location_isset:
124 raise RuntimeError("ClusterDir already set. Cannot set by profile.")
124 raise RuntimeError("ClusterDir already set. Cannot set by profile.")
125 self.location = os.path.join(get_ipython_dir(), 'cluster_'+new)
125 self.location = os.path.join(get_ipython_dir(), 'cluster_'+new)
126
126
127 def _log_dir_changed(self, name, old, new):
127 def _log_dir_changed(self, name, old, new):
128 self.check_log_dir()
128 self.check_log_dir()
129
129
130 def check_log_dir(self):
130 def check_log_dir(self):
131 if not os.path.isdir(self.log_dir):
131 if not os.path.isdir(self.log_dir):
132 os.mkdir(self.log_dir)
132 os.mkdir(self.log_dir)
133
133
134 def _security_dir_changed(self, name, old, new):
134 def _security_dir_changed(self, name, old, new):
135 self.check_security_dir()
135 self.check_security_dir()
136
136
137 def check_security_dir(self):
137 def check_security_dir(self):
138 if not os.path.isdir(self.security_dir):
138 if not os.path.isdir(self.security_dir):
139 os.mkdir(self.security_dir, 0700)
139 os.mkdir(self.security_dir, 0700)
140 os.chmod(self.security_dir, 0700)
140 os.chmod(self.security_dir, 0700)
141
141
142 def _pid_dir_changed(self, name, old, new):
142 def _pid_dir_changed(self, name, old, new):
143 self.check_pid_dir()
143 self.check_pid_dir()
144
144
145 def check_pid_dir(self):
145 def check_pid_dir(self):
146 if not os.path.isdir(self.pid_dir):
146 if not os.path.isdir(self.pid_dir):
147 os.mkdir(self.pid_dir, 0700)
147 os.mkdir(self.pid_dir, 0700)
148 os.chmod(self.pid_dir, 0700)
148 os.chmod(self.pid_dir, 0700)
149
149
150 def check_dirs(self):
150 def check_dirs(self):
151 self.check_security_dir()
151 self.check_security_dir()
152 self.check_log_dir()
152 self.check_log_dir()
153 self.check_pid_dir()
153 self.check_pid_dir()
154
154
155 def copy_config_file(self, config_file, path=None, overwrite=False):
155 def copy_config_file(self, config_file, path=None, overwrite=False):
156 """Copy a default config file into the active cluster directory.
156 """Copy a default config file into the active cluster directory.
157
157
158 Default configuration files are kept in :mod:`IPython.config.default`.
158 Default configuration files are kept in :mod:`IPython.config.default`.
159 This function moves these from that location to the working cluster
159 This function moves these from that location to the working cluster
160 directory.
160 directory.
161 """
161 """
162 if path is None:
162 if path is None:
163 import IPython.config.default
163 import IPython.config.default
164 path = IPython.config.default.__file__.split(os.path.sep)[:-1]
164 path = IPython.config.default.__file__.split(os.path.sep)[:-1]
165 path = os.path.sep.join(path)
165 path = os.path.sep.join(path)
166 src = os.path.join(path, config_file)
166 src = os.path.join(path, config_file)
167 dst = os.path.join(self.location, config_file)
167 dst = os.path.join(self.location, config_file)
168 if not os.path.isfile(dst) or overwrite:
168 if not os.path.isfile(dst) or overwrite:
169 shutil.copy(src, dst)
169 shutil.copy(src, dst)
170
170
171 def copy_all_config_files(self, path=None, overwrite=False):
171 def copy_all_config_files(self, path=None, overwrite=False):
172 """Copy all config files into the active cluster directory."""
172 """Copy all config files into the active cluster directory."""
173 for f in [u'ipcontroller_config.py', u'ipengine_config.py',
173 for f in [u'ipcontroller_config.py', u'ipengine_config.py',
174 u'ipcluster_config.py']:
174 u'ipcluster_config.py']:
175 self.copy_config_file(f, path=path, overwrite=overwrite)
175 self.copy_config_file(f, path=path, overwrite=overwrite)
176
176
177 @classmethod
177 @classmethod
178 def create_cluster_dir(csl, cluster_dir):
178 def create_cluster_dir(csl, cluster_dir):
179 """Create a new cluster directory given a full path.
179 """Create a new cluster directory given a full path.
180
180
181 Parameters
181 Parameters
182 ----------
182 ----------
183 cluster_dir : str
183 cluster_dir : str
184 The full path to the cluster directory. If it does exist, it will
184 The full path to the cluster directory. If it does exist, it will
185 be used. If not, it will be created.
185 be used. If not, it will be created.
186 """
186 """
187 return ClusterDir(location=cluster_dir)
187 return ClusterDir(location=cluster_dir)
188
188
189 @classmethod
189 @classmethod
190 def create_cluster_dir_by_profile(cls, path, profile=u'default'):
190 def create_cluster_dir_by_profile(cls, path, profile=u'default'):
191 """Create a cluster dir by profile name and path.
191 """Create a cluster dir by profile name and path.
192
192
193 Parameters
193 Parameters
194 ----------
194 ----------
195 path : str
195 path : str
196 The path (directory) to put the cluster directory in.
196 The path (directory) to put the cluster directory in.
197 profile : str
197 profile : str
198 The name of the profile. The name of the cluster directory will
198 The name of the profile. The name of the cluster directory will
199 be "cluster_<profile>".
199 be "cluster_<profile>".
200 """
200 """
201 if not os.path.isdir(path):
201 if not os.path.isdir(path):
202 raise ClusterDirError('Directory not found: %s' % path)
202 raise ClusterDirError('Directory not found: %s' % path)
203 cluster_dir = os.path.join(path, u'cluster_' + profile)
203 cluster_dir = os.path.join(path, u'cluster_' + profile)
204 return ClusterDir(location=cluster_dir)
204 return ClusterDir(location=cluster_dir)
205
205
206 @classmethod
206 @classmethod
207 def find_cluster_dir_by_profile(cls, ipython_dir, profile=u'default'):
207 def find_cluster_dir_by_profile(cls, ipython_dir, profile=u'default'):
208 """Find an existing cluster dir by profile name, return its ClusterDir.
208 """Find an existing cluster dir by profile name, return its ClusterDir.
209
209
210 This searches through a sequence of paths for a cluster dir. If it
210 This searches through a sequence of paths for a cluster dir. If it
211 is not found, a :class:`ClusterDirError` exception will be raised.
211 is not found, a :class:`ClusterDirError` exception will be raised.
212
212
213 The search path algorithm is:
213 The search path algorithm is:
214 1. ``os.getcwd()``
214 1. ``os.getcwd()``
215 2. ``ipython_dir``
215 2. ``ipython_dir``
216 3. The directories found in the ":" separated
216 3. The directories found in the ":" separated
217 :env:`IPCLUSTER_DIR_PATH` environment variable.
217 :env:`IPCLUSTER_DIR_PATH` environment variable.
218
218
219 Parameters
219 Parameters
220 ----------
220 ----------
221 ipython_dir : unicode or str
221 ipython_dir : unicode or str
222 The IPython directory to use.
222 The IPython directory to use.
223 profile : unicode or str
223 profile : unicode or str
224 The name of the profile. The name of the cluster directory
224 The name of the profile. The name of the cluster directory
225 will be "cluster_<profile>".
225 will be "cluster_<profile>".
226 """
226 """
227 dirname = u'cluster_' + profile
227 dirname = u'cluster_' + profile
228 cluster_dir_paths = os.environ.get('IPCLUSTER_DIR_PATH','')
228 cluster_dir_paths = os.environ.get('IPCLUSTER_DIR_PATH','')
229 if cluster_dir_paths:
229 if cluster_dir_paths:
230 cluster_dir_paths = cluster_dir_paths.split(':')
230 cluster_dir_paths = cluster_dir_paths.split(':')
231 else:
231 else:
232 cluster_dir_paths = []
232 cluster_dir_paths = []
233 paths = [os.getcwd(), ipython_dir] + cluster_dir_paths
233 paths = [os.getcwd(), ipython_dir] + cluster_dir_paths
234 for p in paths:
234 for p in paths:
235 cluster_dir = os.path.join(p, dirname)
235 cluster_dir = os.path.join(p, dirname)
236 if os.path.isdir(cluster_dir):
236 if os.path.isdir(cluster_dir):
237 return ClusterDir(location=cluster_dir)
237 return ClusterDir(location=cluster_dir)
238 else:
238 else:
239 raise ClusterDirError('Cluster directory not found in paths: %s' % dirname)
239 raise ClusterDirError('Cluster directory not found in paths: %s' % dirname)
240
240
241 @classmethod
241 @classmethod
242 def find_cluster_dir(cls, cluster_dir):
242 def find_cluster_dir(cls, cluster_dir):
243 """Find/create a cluster dir and return its ClusterDir.
243 """Find/create a cluster dir and return its ClusterDir.
244
244
245 This will create the cluster directory if it doesn't exist.
245 This will create the cluster directory if it doesn't exist.
246
246
247 Parameters
247 Parameters
248 ----------
248 ----------
249 cluster_dir : unicode or str
249 cluster_dir : unicode or str
250 The path of the cluster directory. This is expanded using
250 The path of the cluster directory. This is expanded using
251 :func:`IPython.utils.genutils.expand_path`.
251 :func:`IPython.utils.genutils.expand_path`.
252 """
252 """
253 cluster_dir = expand_path(cluster_dir)
253 cluster_dir = expand_path(cluster_dir)
254 if not os.path.isdir(cluster_dir):
254 if not os.path.isdir(cluster_dir):
255 raise ClusterDirError('Cluster directory not found: %s' % cluster_dir)
255 raise ClusterDirError('Cluster directory not found: %s' % cluster_dir)
256 return ClusterDir(location=cluster_dir)
256 return ClusterDir(location=cluster_dir)
257
257
258
258
259 #-----------------------------------------------------------------------------
259 #-----------------------------------------------------------------------------
260 # Crash handler for this application
260 # Crash handler for this application
261 #-----------------------------------------------------------------------------
261 #-----------------------------------------------------------------------------
262
262
263
263
264 _message_template = """\
264 _message_template = """\
265 Oops, $self.app_name crashed. We do our best to make it stable, but...
265 Oops, $self.app_name crashed. We do our best to make it stable, but...
266
266
267 A crash report was automatically generated with the following information:
267 A crash report was automatically generated with the following information:
268 - A verbatim copy of the crash traceback.
268 - A verbatim copy of the crash traceback.
269 - Data on your current $self.app_name configuration.
269 - Data on your current $self.app_name configuration.
270
270
271 It was left in the file named:
271 It was left in the file named:
272 \t'$self.crash_report_fname'
272 \t'$self.crash_report_fname'
273 If you can email this file to the developers, the information in it will help
273 If you can email this file to the developers, the information in it will help
274 them in understanding and correcting the problem.
274 them in understanding and correcting the problem.
275
275
276 You can mail it to: $self.contact_name at $self.contact_email
276 You can mail it to: $self.contact_name at $self.contact_email
277 with the subject '$self.app_name Crash Report'.
277 with the subject '$self.app_name Crash Report'.
278
278
279 If you want to do it now, the following command will work (under Unix):
279 If you want to do it now, the following command will work (under Unix):
280 mail -s '$self.app_name Crash Report' $self.contact_email < $self.crash_report_fname
280 mail -s '$self.app_name Crash Report' $self.contact_email < $self.crash_report_fname
281
281
282 To ensure accurate tracking of this issue, please file a report about it at:
282 To ensure accurate tracking of this issue, please file a report about it at:
283 $self.bug_tracker
283 $self.bug_tracker
284 """
284 """
285
285
286 class ClusterDirCrashHandler(CrashHandler):
286 class ClusterDirCrashHandler(CrashHandler):
287 """sys.excepthook for IPython itself, leaves a detailed report on disk."""
287 """sys.excepthook for IPython itself, leaves a detailed report on disk."""
288
288
289 message_template = _message_template
289 message_template = _message_template
290
290
291 def __init__(self, app):
291 def __init__(self, app):
292 contact_name = release.authors['Min'][0]
292 contact_name = release.authors['Min'][0]
293 contact_email = release.authors['Min'][1]
293 contact_email = release.authors['Min'][1]
294 bug_tracker = 'http://github.com/ipython/ipython/issues'
294 bug_tracker = 'http://github.com/ipython/ipython/issues'
295 super(ClusterDirCrashHandler,self).__init__(
295 super(ClusterDirCrashHandler,self).__init__(
296 app, contact_name, contact_email, bug_tracker
296 app, contact_name, contact_email, bug_tracker
297 )
297 )
298
298
299
299
300 #-----------------------------------------------------------------------------
300 #-----------------------------------------------------------------------------
301 # Main application
301 # Main application
302 #-----------------------------------------------------------------------------
302 #-----------------------------------------------------------------------------
303 base_aliases = {
303 base_aliases = {
304 'profile' : "ClusterDir.profile",
304 'profile' : "ClusterDir.profile",
305 'cluster_dir' : 'ClusterDir.location',
305 'cluster_dir' : 'ClusterDir.location',
306 'auto_create' : 'ClusterDirApplication.auto_create',
306 'auto_create' : 'ClusterDirApplication.auto_create',
307 'log_level' : 'ClusterApplication.log_level',
307 'log_level' : 'ClusterApplication.log_level',
308 'work_dir' : 'ClusterApplication.work_dir',
308 'work_dir' : 'ClusterApplication.work_dir',
309 'log_to_file' : 'ClusterApplication.log_to_file',
309 'log_to_file' : 'ClusterApplication.log_to_file',
310 'clean_logs' : 'ClusterApplication.clean_logs',
310 'clean_logs' : 'ClusterApplication.clean_logs',
311 'log_url' : 'ClusterApplication.log_url',
311 'log_url' : 'ClusterApplication.log_url',
312 }
312 }
313
313
314 base_flags = {
314 base_flags = {
315 'debug' : ( {"ClusterApplication" : {"log_level" : logging.DEBUG}}, "set loglevel to DEBUG"),
315 'debug' : ( {"ClusterApplication" : {"log_level" : logging.DEBUG}}, "set loglevel to DEBUG"),
316 'quiet' : ( {"ClusterApplication" : {"log_level" : logging.CRITICAL}}, "set loglevel to CRITICAL (minimal output)"),
316 'quiet' : ( {"ClusterApplication" : {"log_level" : logging.CRITICAL}}, "set loglevel to CRITICAL (minimal output)"),
317 'log-to-file' : ( {"ClusterApplication" : {"log_to_file" : True}}, "redirect log output to a file"),
317 'log-to-file' : ( {"ClusterApplication" : {"log_to_file" : True}}, "redirect log output to a file"),
318 }
318 }
319 for k,v in base_flags.iteritems():
319 for k,v in base_flags.iteritems():
320 base_flags[k] = (Config(v[0]),v[1])
320 base_flags[k] = (Config(v[0]),v[1])
321
321
322 class ClusterApplication(BaseIPythonApplication):
322 class ClusterApplication(BaseIPythonApplication):
323 """An application that puts everything into a cluster directory.
323 """An application that puts everything into a cluster directory.
324
324
325 Instead of looking for things in the ipython_dir, this type of application
325 Instead of looking for things in the ipython_dir, this type of application
326 will use its own private directory called the "cluster directory"
326 will use its own private directory called the "cluster directory"
327 for things like config files, log files, etc.
327 for things like config files, log files, etc.
328
328
329 The cluster directory is resolved as follows:
329 The cluster directory is resolved as follows:
330
330
331 * If the ``--cluster-dir`` option is given, it is used.
331 * If the ``cluster_dir`` option is given, it is used.
332 * If ``--cluster-dir`` is not given, the application directory is
332 * If ``cluster_dir`` is not given, the application directory is
333 resolve using the profile name as ``cluster_<profile>``. The search
333 resolve using the profile name as ``cluster_<profile>``. The search
334 path for this directory is then i) cwd if it is found there
334 path for this directory is then i) cwd if it is found there
335 and ii) in ipython_dir otherwise.
335 and ii) in ipython_dir otherwise.
336
336
337 The config file for the application is to be put in the cluster
337 The config file for the application is to be put in the cluster
338 dir and named the value of the ``config_file_name`` class attribute.
338 dir and named the value of the ``config_file_name`` class attribute.
339 """
339 """
340
340
341 crash_handler_class = ClusterDirCrashHandler
341 crash_handler_class = ClusterDirCrashHandler
342 auto_create_cluster_dir = Bool(True, config=True,
342 auto_create_cluster_dir = Bool(True, config=True,
343 help="whether to create the cluster_dir if it doesn't exist")
343 help="whether to create the cluster_dir if it doesn't exist")
344 cluster_dir = Instance(ClusterDir)
344 cluster_dir = Instance(ClusterDir)
345 classes = [ClusterDir]
345 classes = [ClusterDir]
346
346
347 def _log_level_default(self):
347 def _log_level_default(self):
348 # temporarily override default_log_level to INFO
348 # temporarily override default_log_level to INFO
349 return logging.INFO
349 return logging.INFO
350
350
351 work_dir = Unicode(os.getcwdu(), config=True,
351 work_dir = Unicode(os.getcwdu(), config=True,
352 help='Set the working dir for the process.'
352 help='Set the working dir for the process.'
353 )
353 )
354 def _work_dir_changed(self, name, old, new):
354 def _work_dir_changed(self, name, old, new):
355 self.work_dir = unicode(expand_path(new))
355 self.work_dir = unicode(expand_path(new))
356
356
357 log_to_file = Bool(config=True,
357 log_to_file = Bool(config=True,
358 help="whether to log to a file")
358 help="whether to log to a file")
359
359
360 clean_logs = Bool(False, shortname='--clean-logs', config=True,
360 clean_logs = Bool(False, shortname='--clean-logs', config=True,
361 help="whether to cleanup old logfiles before starting")
361 help="whether to cleanup old logfiles before starting")
362
362
363 log_url = Unicode('', shortname='--log-url', config=True,
363 log_url = Unicode('', shortname='--log-url', config=True,
364 help="The ZMQ URL of the iplogger to aggregate logging.")
364 help="The ZMQ URL of the iplogger to aggregate logging.")
365
365
366 config_file = Unicode(u'', config=True,
366 config_file = Unicode(u'', config=True,
367 help="""Path to ipcontroller configuration file. The default is to use
367 help="""Path to ipcontroller configuration file. The default is to use
368 <appname>_config.py, as found by cluster-dir."""
368 <appname>_config.py, as found by cluster-dir."""
369 )
369 )
370
370
371 loop = Instance('zmq.eventloop.ioloop.IOLoop')
371 loop = Instance('zmq.eventloop.ioloop.IOLoop')
372 def _loop_default(self):
372 def _loop_default(self):
373 from zmq.eventloop.ioloop import IOLoop
373 from zmq.eventloop.ioloop import IOLoop
374 return IOLoop.instance()
374 return IOLoop.instance()
375
375
376 aliases = Dict(base_aliases)
376 aliases = Dict(base_aliases)
377 flags = Dict(base_flags)
377 flags = Dict(base_flags)
378
378
379 def init_clusterdir(self):
379 def init_clusterdir(self):
380 """This resolves the cluster directory.
380 """This resolves the cluster directory.
381
381
382 This tries to find the cluster directory and if successful, it will
382 This tries to find the cluster directory and if successful, it will
383 have done:
383 have done:
384 * Sets ``self.cluster_dir_obj`` to the :class:`ClusterDir` object for
384 * Sets ``self.cluster_dir_obj`` to the :class:`ClusterDir` object for
385 the application.
385 the application.
386 * Sets ``self.cluster_dir`` attribute of the application and config
386 * Sets ``self.cluster_dir`` attribute of the application and config
387 objects.
387 objects.
388
388
389 The algorithm used for this is as follows:
389 The algorithm used for this is as follows:
390 1. Try ``Global.cluster_dir``.
390 1. Try ``Global.cluster_dir``.
391 2. Try using ``Global.profile``.
391 2. Try using ``Global.profile``.
392 3. If both of these fail and ``self.auto_create_cluster_dir`` is
392 3. If both of these fail and ``self.auto_create_cluster_dir`` is
393 ``True``, then create the new cluster dir in the IPython directory.
393 ``True``, then create the new cluster dir in the IPython directory.
394 4. If all fails, then raise :class:`ClusterDirError`.
394 4. If all fails, then raise :class:`ClusterDirError`.
395 """
395 """
396 try:
396 try:
397 self.cluster_dir = ClusterDir(auto_create=self.auto_create_cluster_dir, config=self.config)
397 self.cluster_dir = ClusterDir(auto_create=self.auto_create_cluster_dir, config=self.config)
398 except ClusterDirError as e:
398 except ClusterDirError as e:
399 self.log.fatal("Error initializing cluster dir: %s"%e)
399 self.log.fatal("Error initializing cluster dir: %s"%e)
400 self.log.fatal("A cluster dir must be created before running this command.")
400 self.log.fatal("A cluster dir must be created before running this command.")
401 self.log.fatal("Do 'ipcluster create -h' or 'ipcluster list -h' for more "
401 self.log.fatal("Do 'ipcluster create -h' or 'ipcluster list -h' for more "
402 "information about creating and listing cluster dirs."
402 "information about creating and listing cluster dirs."
403 )
403 )
404 self.exit(1)
404 self.exit(1)
405
405
406 if self.cluster_dir._new_dir:
406 if self.cluster_dir._new_dir:
407 self.log.info('Creating new cluster dir: %s' % \
407 self.log.info('Creating new cluster dir: %s' % \
408 self.cluster_dir.location)
408 self.cluster_dir.location)
409 else:
409 else:
410 self.log.info('Using existing cluster dir: %s' % \
410 self.log.info('Using existing cluster dir: %s' % \
411 self.cluster_dir.location)
411 self.cluster_dir.location)
412
412
413 def initialize(self, argv=None):
413 def initialize(self, argv=None):
414 """initialize the app"""
414 """initialize the app"""
415 self.init_crash_handler()
415 self.init_crash_handler()
416 self.parse_command_line(argv)
416 self.parse_command_line(argv)
417 cl_config = self.config
417 cl_config = self.config
418 self.init_clusterdir()
418 self.init_clusterdir()
419 if self.config_file:
419 if self.config_file:
420 self.load_config_file(self.config_file)
420 self.load_config_file(self.config_file)
421 elif self.default_config_file_name:
421 elif self.default_config_file_name:
422 try:
422 try:
423 self.load_config_file(self.default_config_file_name,
423 self.load_config_file(self.default_config_file_name,
424 path=self.cluster_dir.location)
424 path=self.cluster_dir.location)
425 except IOError:
425 except IOError:
426 self.log.warn("Warning: Default config file not found")
426 self.log.warn("Warning: Default config file not found")
427 # command-line should *override* config file, but command-line is necessary
427 # command-line should *override* config file, but command-line is necessary
428 # to determine clusterdir, etc.
428 # to determine clusterdir, etc.
429 self.update_config(cl_config)
429 self.update_config(cl_config)
430 self.to_work_dir()
430 self.to_work_dir()
431 self.reinit_logging()
431 self.reinit_logging()
432
432
433 def to_work_dir(self):
433 def to_work_dir(self):
434 wd = self.work_dir
434 wd = self.work_dir
435 if unicode(wd) != os.getcwdu():
435 if unicode(wd) != os.getcwdu():
436 os.chdir(wd)
436 os.chdir(wd)
437 self.log.info("Changing to working dir: %s" % wd)
437 self.log.info("Changing to working dir: %s" % wd)
438 # This is the working dir by now.
438 # This is the working dir by now.
439 sys.path.insert(0, '')
439 sys.path.insert(0, '')
440
440
441 def load_config_file(self, filename, path=None):
441 def load_config_file(self, filename, path=None):
442 """Load a .py based config file by filename and path."""
442 """Load a .py based config file by filename and path."""
443 # use config.application.Application.load_config
443 # use config.application.Application.load_config
444 # instead of inflexible core.newapplication.BaseIPythonApplication.load_config
444 # instead of inflexible core.newapplication.BaseIPythonApplication.load_config
445 return Application.load_config_file(self, filename, path=path)
445 return Application.load_config_file(self, filename, path=path)
446 #
446 #
447 # def load_default_config_file(self):
447 # def load_default_config_file(self):
448 # """Load a .py based config file by filename and path."""
448 # """Load a .py based config file by filename and path."""
449 # return BaseIPythonApplication.load_config_file(self)
449 # return BaseIPythonApplication.load_config_file(self)
450
450
451 # disable URL-logging
451 # disable URL-logging
452 def reinit_logging(self):
452 def reinit_logging(self):
453 # Remove old log files
453 # Remove old log files
454 log_dir = self.cluster_dir.log_dir
454 log_dir = self.cluster_dir.log_dir
455 if self.clean_logs:
455 if self.clean_logs:
456 for f in os.listdir(log_dir):
456 for f in os.listdir(log_dir):
457 if re.match(r'%s-\d+\.(log|err|out)'%self.name,f):
457 if re.match(r'%s-\d+\.(log|err|out)'%self.name,f):
458 os.remove(os.path.join(log_dir, f))
458 os.remove(os.path.join(log_dir, f))
459 if self.log_to_file:
459 if self.log_to_file:
460 # Start logging to the new log file
460 # Start logging to the new log file
461 log_filename = self.name + u'-' + str(os.getpid()) + u'.log'
461 log_filename = self.name + u'-' + str(os.getpid()) + u'.log'
462 logfile = os.path.join(log_dir, log_filename)
462 logfile = os.path.join(log_dir, log_filename)
463 open_log_file = open(logfile, 'w')
463 open_log_file = open(logfile, 'w')
464 else:
464 else:
465 open_log_file = None
465 open_log_file = None
466 if open_log_file is not None:
466 if open_log_file is not None:
467 self.log.removeHandler(self._log_handler)
467 self.log.removeHandler(self._log_handler)
468 self._log_handler = logging.StreamHandler(open_log_file)
468 self._log_handler = logging.StreamHandler(open_log_file)
469 self._log_formatter = logging.Formatter("[%(name)s] %(message)s")
469 self._log_formatter = logging.Formatter("[%(name)s] %(message)s")
470 self._log_handler.setFormatter(self._log_formatter)
470 self._log_handler.setFormatter(self._log_formatter)
471 self.log.addHandler(self._log_handler)
471 self.log.addHandler(self._log_handler)
472
472
473 def write_pid_file(self, overwrite=False):
473 def write_pid_file(self, overwrite=False):
474 """Create a .pid file in the pid_dir with my pid.
474 """Create a .pid file in the pid_dir with my pid.
475
475
476 This must be called after pre_construct, which sets `self.pid_dir`.
476 This must be called after pre_construct, which sets `self.pid_dir`.
477 This raises :exc:`PIDFileError` if the pid file exists already.
477 This raises :exc:`PIDFileError` if the pid file exists already.
478 """
478 """
479 pid_file = os.path.join(self.cluster_dir.pid_dir, self.name + u'.pid')
479 pid_file = os.path.join(self.cluster_dir.pid_dir, self.name + u'.pid')
480 if os.path.isfile(pid_file):
480 if os.path.isfile(pid_file):
481 pid = self.get_pid_from_file()
481 pid = self.get_pid_from_file()
482 if not overwrite:
482 if not overwrite:
483 raise PIDFileError(
483 raise PIDFileError(
484 'The pid file [%s] already exists. \nThis could mean that this '
484 'The pid file [%s] already exists. \nThis could mean that this '
485 'server is already running with [pid=%s].' % (pid_file, pid)
485 'server is already running with [pid=%s].' % (pid_file, pid)
486 )
486 )
487 with open(pid_file, 'w') as f:
487 with open(pid_file, 'w') as f:
488 self.log.info("Creating pid file: %s" % pid_file)
488 self.log.info("Creating pid file: %s" % pid_file)
489 f.write(repr(os.getpid())+'\n')
489 f.write(repr(os.getpid())+'\n')
490
490
491 def remove_pid_file(self):
491 def remove_pid_file(self):
492 """Remove the pid file.
492 """Remove the pid file.
493
493
494 This should be called at shutdown by registering a callback with
494 This should be called at shutdown by registering a callback with
495 :func:`reactor.addSystemEventTrigger`. This needs to return
495 :func:`reactor.addSystemEventTrigger`. This needs to return
496 ``None``.
496 ``None``.
497 """
497 """
498 pid_file = os.path.join(self.cluster_dir.pid_dir, self.name + u'.pid')
498 pid_file = os.path.join(self.cluster_dir.pid_dir, self.name + u'.pid')
499 if os.path.isfile(pid_file):
499 if os.path.isfile(pid_file):
500 try:
500 try:
501 self.log.info("Removing pid file: %s" % pid_file)
501 self.log.info("Removing pid file: %s" % pid_file)
502 os.remove(pid_file)
502 os.remove(pid_file)
503 except:
503 except:
504 self.log.warn("Error removing the pid file: %s" % pid_file)
504 self.log.warn("Error removing the pid file: %s" % pid_file)
505
505
506 def get_pid_from_file(self):
506 def get_pid_from_file(self):
507 """Get the pid from the pid file.
507 """Get the pid from the pid file.
508
508
509 If the pid file doesn't exist a :exc:`PIDFileError` is raised.
509 If the pid file doesn't exist a :exc:`PIDFileError` is raised.
510 """
510 """
511 pid_file = os.path.join(self.cluster_dir.pid_dir, self.name + u'.pid')
511 pid_file = os.path.join(self.cluster_dir.pid_dir, self.name + u'.pid')
512 if os.path.isfile(pid_file):
512 if os.path.isfile(pid_file):
513 with open(pid_file, 'r') as f:
513 with open(pid_file, 'r') as f:
514 pid = int(f.read().strip())
514 pid = int(f.read().strip())
515 return pid
515 return pid
516 else:
516 else:
517 raise PIDFileError('pid file not found: %s' % pid_file)
517 raise PIDFileError('pid file not found: %s' % pid_file)
518
518
519 def check_pid(self, pid):
519 def check_pid(self, pid):
520 if os.name == 'nt':
520 if os.name == 'nt':
521 try:
521 try:
522 import ctypes
522 import ctypes
523 # returns 0 if no such process (of ours) exists
523 # returns 0 if no such process (of ours) exists
524 # positive int otherwise
524 # positive int otherwise
525 p = ctypes.windll.kernel32.OpenProcess(1,0,pid)
525 p = ctypes.windll.kernel32.OpenProcess(1,0,pid)
526 except Exception:
526 except Exception:
527 self.log.warn(
527 self.log.warn(
528 "Could not determine whether pid %i is running via `OpenProcess`. "
528 "Could not determine whether pid %i is running via `OpenProcess`. "
529 " Making the likely assumption that it is."%pid
529 " Making the likely assumption that it is."%pid
530 )
530 )
531 return True
531 return True
532 return bool(p)
532 return bool(p)
533 else:
533 else:
534 try:
534 try:
535 p = Popen(['ps','x'], stdout=PIPE, stderr=PIPE)
535 p = Popen(['ps','x'], stdout=PIPE, stderr=PIPE)
536 output,_ = p.communicate()
536 output,_ = p.communicate()
537 except OSError:
537 except OSError:
538 self.log.warn(
538 self.log.warn(
539 "Could not determine whether pid %i is running via `ps x`. "
539 "Could not determine whether pid %i is running via `ps x`. "
540 " Making the likely assumption that it is."%pid
540 " Making the likely assumption that it is."%pid
541 )
541 )
542 return True
542 return True
543 pids = map(int, re.findall(r'^\W*\d+', output, re.MULTILINE))
543 pids = map(int, re.findall(r'^\W*\d+', output, re.MULTILINE))
544 return pid in pids
544 return pid in pids
@@ -1,537 +1,542 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 # encoding: utf-8
2 # encoding: utf-8
3 """
3 """
4 The ipcluster application.
4 The ipcluster application.
5 """
5 """
6
6
7 #-----------------------------------------------------------------------------
7 #-----------------------------------------------------------------------------
8 # Copyright (C) 2008-2009 The IPython Development Team
8 # Copyright (C) 2008-2009 The IPython Development Team
9 #
9 #
10 # Distributed under the terms of the BSD License. The full license is in
10 # Distributed under the terms of the BSD License. The full license is in
11 # the file COPYING, distributed as part of this software.
11 # the file COPYING, distributed as part of this software.
12 #-----------------------------------------------------------------------------
12 #-----------------------------------------------------------------------------
13
13
14 #-----------------------------------------------------------------------------
14 #-----------------------------------------------------------------------------
15 # Imports
15 # Imports
16 #-----------------------------------------------------------------------------
16 #-----------------------------------------------------------------------------
17
17
18 import errno
18 import errno
19 import logging
19 import logging
20 import os
20 import os
21 import re
21 import re
22 import signal
22 import signal
23
23
24 from subprocess import check_call, CalledProcessError, PIPE
24 from subprocess import check_call, CalledProcessError, PIPE
25 import zmq
25 import zmq
26 from zmq.eventloop import ioloop
26 from zmq.eventloop import ioloop
27
27
28 from IPython.config.application import Application, boolean_flag
28 from IPython.config.application import Application, boolean_flag
29 from IPython.config.loader import Config
29 from IPython.config.loader import Config
30 from IPython.core.newapplication import BaseIPythonApplication
30 from IPython.core.newapplication import BaseIPythonApplication
31 from IPython.utils.importstring import import_item
31 from IPython.utils.importstring import import_item
32 from IPython.utils.traitlets import Int, Unicode, Bool, CFloat, Dict, List
32 from IPython.utils.traitlets import Int, Unicode, Bool, CFloat, Dict, List
33
33
34 from IPython.parallel.apps.clusterdir import (
34 from IPython.parallel.apps.clusterdir import (
35 ClusterApplication, ClusterDirError, ClusterDir,
35 ClusterApplication, ClusterDirError, ClusterDir,
36 PIDFileError,
36 PIDFileError,
37 base_flags, base_aliases
37 base_flags, base_aliases
38 )
38 )
39
39
40
40
41 #-----------------------------------------------------------------------------
41 #-----------------------------------------------------------------------------
42 # Module level variables
42 # Module level variables
43 #-----------------------------------------------------------------------------
43 #-----------------------------------------------------------------------------
44
44
45
45
46 default_config_file_name = u'ipcluster_config.py'
46 default_config_file_name = u'ipcluster_config.py'
47
47
48
48
49 _description = """\
49 _description = """Start an IPython cluster for parallel computing.
50 Start an IPython cluster for parallel computing.\n\n
51
50
52 An IPython cluster consists of 1 controller and 1 or more engines.
51 An IPython cluster consists of 1 controller and 1 or more engines.
53 This command automates the startup of these processes using a wide
52 This command automates the startup of these processes using a wide
54 range of startup methods (SSH, local processes, PBS, mpiexec,
53 range of startup methods (SSH, local processes, PBS, mpiexec,
55 Windows HPC Server 2008). To start a cluster with 4 engines on your
54 Windows HPC Server 2008). To start a cluster with 4 engines on your
56 local host simply do 'ipcluster start n=4'. For more complex usage
55 local host simply do 'ipcluster start n=4'. For more complex usage
57 you will typically do 'ipcluster create profile=mycluster', then edit
56 you will typically do 'ipcluster create profile=mycluster', then edit
58 configuration files, followed by 'ipcluster start profile=mycluster n=4'.
57 configuration files, followed by 'ipcluster start profile=mycluster n=4'.
59 """
58 """
60
59
61
60
62 # Exit codes for ipcluster
61 # Exit codes for ipcluster
63
62
64 # This will be the exit code if the ipcluster appears to be running because
63 # This will be the exit code if the ipcluster appears to be running because
65 # a .pid file exists
64 # a .pid file exists
66 ALREADY_STARTED = 10
65 ALREADY_STARTED = 10
67
66
68
67
69 # This will be the exit code if ipcluster stop is run, but there is not .pid
68 # This will be the exit code if ipcluster stop is run, but there is not .pid
70 # file to be found.
69 # file to be found.
71 ALREADY_STOPPED = 11
70 ALREADY_STOPPED = 11
72
71
73 # This will be the exit code if ipcluster engines is run, but there is not .pid
72 # This will be the exit code if ipcluster engines is run, but there is not .pid
74 # file to be found.
73 # file to be found.
75 NO_CLUSTER = 12
74 NO_CLUSTER = 12
76
75
77
76
78 #-----------------------------------------------------------------------------
77 #-----------------------------------------------------------------------------
79 # Main application
78 # Main application
80 #-----------------------------------------------------------------------------
79 #-----------------------------------------------------------------------------
81 start_help = """
80 start_help = """Start an IPython cluster for parallel computing
81
82 Start an ipython cluster by its profile name or cluster
82 Start an ipython cluster by its profile name or cluster
83 directory. Cluster directories contain configuration, log and
83 directory. Cluster directories contain configuration, log and
84 security related files and are named using the convention
84 security related files and are named using the convention
85 'cluster_<profile>' and should be creating using the 'start'
85 'cluster_<profile>' and should be creating using the 'start'
86 subcommand of 'ipcluster'. If your cluster directory is in
86 subcommand of 'ipcluster'. If your cluster directory is in
87 the cwd or the ipython directory, you can simply refer to it
87 the cwd or the ipython directory, you can simply refer to it
88 using its profile name, 'ipcluster start n=4 profile=<profile>`,
88 using its profile name, 'ipcluster start n=4 profile=<profile>`,
89 otherwise use the 'cluster_dir' option.
89 otherwise use the 'cluster_dir' option.
90 """
90 """
91 stop_help = """
91 stop_help = """Stop a running IPython cluster
92
92 Stop a running ipython cluster by its profile name or cluster
93 Stop a running ipython cluster by its profile name or cluster
93 directory. Cluster directories are named using the convention
94 directory. Cluster directories are named using the convention
94 'cluster_<profile>'. If your cluster directory is in
95 'cluster_<profile>'. If your cluster directory is in
95 the cwd or the ipython directory, you can simply refer to it
96 the cwd or the ipython directory, you can simply refer to it
96 using its profile name, 'ipcluster stop profile=<profile>`, otherwise
97 using its profile name, 'ipcluster stop profile=<profile>`, otherwise
97 use the 'cluster_dir' option.
98 use the 'cluster_dir' option.
98 """
99 """
99 engines_help = """
100 engines_help = """Start engines connected to an existing IPython cluster
101
100 Start one or more engines to connect to an existing Cluster
102 Start one or more engines to connect to an existing Cluster
101 by profile name or cluster directory.
103 by profile name or cluster directory.
102 Cluster directories contain configuration, log and
104 Cluster directories contain configuration, log and
103 security related files and are named using the convention
105 security related files and are named using the convention
104 'cluster_<profile>' and should be creating using the 'start'
106 'cluster_<profile>' and should be creating using the 'start'
105 subcommand of 'ipcluster'. If your cluster directory is in
107 subcommand of 'ipcluster'. If your cluster directory is in
106 the cwd or the ipython directory, you can simply refer to it
108 the cwd or the ipython directory, you can simply refer to it
107 using its profile name, 'ipcluster engines n=4 profile=<profile>`,
109 using its profile name, 'ipcluster engines n=4 profile=<profile>`,
108 otherwise use the 'cluster_dir' option.
110 otherwise use the 'cluster_dir' option.
109 """
111 """
110 create_help = """
112 create_help = """Create an ipcluster profile by name
113
111 Create an ipython cluster directory by its profile name or
114 Create an ipython cluster directory by its profile name or
112 cluster directory path. Cluster directories contain
115 cluster directory path. Cluster directories contain
113 configuration, log and security related files and are named
116 configuration, log and security related files and are named
114 using the convention 'cluster_<profile>'. By default they are
117 using the convention 'cluster_<profile>'. By default they are
115 located in your ipython directory. Once created, you will
118 located in your ipython directory. Once created, you will
116 probably need to edit the configuration files in the cluster
119 probably need to edit the configuration files in the cluster
117 directory to configure your cluster. Most users will create a
120 directory to configure your cluster. Most users will create a
118 cluster directory by profile name,
121 cluster directory by profile name,
119 `ipcluster create profile=mycluster`, which will put the directory
122 `ipcluster create profile=mycluster`, which will put the directory
120 in `<ipython_dir>/cluster_mycluster`.
123 in `<ipython_dir>/cluster_mycluster`.
121 """
124 """
122 list_help = """List all available clusters, by cluster directory, that can
125 list_help = """List available cluster profiles
126
127 List all available clusters, by cluster directory, that can
123 be found in the current working directly or in the ipython
128 be found in the current working directly or in the ipython
124 directory. Cluster directories are named using the convention
129 directory. Cluster directories are named using the convention
125 'cluster_<profile>'.
130 'cluster_<profile>'.
126 """
131 """
127
132
128
133
129 class IPClusterList(BaseIPythonApplication):
134 class IPClusterList(BaseIPythonApplication):
130 name = u'ipcluster-list'
135 name = u'ipcluster-list'
131 description = list_help
136 description = list_help
132
137
133 # empty aliases
138 # empty aliases
134 aliases=Dict()
139 aliases=Dict()
135 flags = Dict(base_flags)
140 flags = Dict(base_flags)
136
141
137 def _log_level_default(self):
142 def _log_level_default(self):
138 return 20
143 return 20
139
144
140 def list_cluster_dirs(self):
145 def list_cluster_dirs(self):
141 # Find the search paths
146 # Find the search paths
142 cluster_dir_paths = os.environ.get('IPCLUSTER_DIR_PATH','')
147 cluster_dir_paths = os.environ.get('IPCLUSTER_DIR_PATH','')
143 if cluster_dir_paths:
148 if cluster_dir_paths:
144 cluster_dir_paths = cluster_dir_paths.split(':')
149 cluster_dir_paths = cluster_dir_paths.split(':')
145 else:
150 else:
146 cluster_dir_paths = []
151 cluster_dir_paths = []
147
152
148 ipython_dir = self.ipython_dir
153 ipython_dir = self.ipython_dir
149
154
150 paths = [os.getcwd(), ipython_dir] + cluster_dir_paths
155 paths = [os.getcwd(), ipython_dir] + cluster_dir_paths
151 paths = list(set(paths))
156 paths = list(set(paths))
152
157
153 self.log.info('Searching for cluster dirs in paths: %r' % paths)
158 self.log.info('Searching for cluster dirs in paths: %r' % paths)
154 for path in paths:
159 for path in paths:
155 files = os.listdir(path)
160 files = os.listdir(path)
156 for f in files:
161 for f in files:
157 full_path = os.path.join(path, f)
162 full_path = os.path.join(path, f)
158 if os.path.isdir(full_path) and f.startswith('cluster_'):
163 if os.path.isdir(full_path) and f.startswith('cluster_'):
159 profile = full_path.split('_')[-1]
164 profile = full_path.split('_')[-1]
160 start_cmd = 'ipcluster start profile=%s n=4' % profile
165 start_cmd = 'ipcluster start profile=%s n=4' % profile
161 print start_cmd + " ==> " + full_path
166 print start_cmd + " ==> " + full_path
162
167
163 def start(self):
168 def start(self):
164 self.list_cluster_dirs()
169 self.list_cluster_dirs()
165
170
166 create_flags = {}
171 create_flags = {}
167 create_flags.update(base_flags)
172 create_flags.update(base_flags)
168 create_flags.update(boolean_flag('reset', 'IPClusterCreate.reset',
173 create_flags.update(boolean_flag('reset', 'IPClusterCreate.reset',
169 "reset config files to defaults", "leave existing config files"))
174 "reset config files to defaults", "leave existing config files"))
170
175
171 class IPClusterCreate(ClusterApplication):
176 class IPClusterCreate(ClusterApplication):
172 name = u'ipcluster'
177 name = u'ipcluster'
173 description = create_help
178 description = create_help
174 auto_create_cluster_dir = Bool(True,
179 auto_create_cluster_dir = Bool(True,
175 help="whether to create the cluster_dir if it doesn't exist")
180 help="whether to create the cluster_dir if it doesn't exist")
176 default_config_file_name = default_config_file_name
181 default_config_file_name = default_config_file_name
177
182
178 reset = Bool(False, config=True,
183 reset = Bool(False, config=True,
179 help="Whether to reset config files as part of 'create'."
184 help="Whether to reset config files as part of 'create'."
180 )
185 )
181
186
182 flags = Dict(create_flags)
187 flags = Dict(create_flags)
183
188
184 aliases = Dict(dict(profile='ClusterDir.profile'))
189 aliases = Dict(dict(profile='ClusterDir.profile'))
185
190
186 classes = [ClusterDir]
191 classes = [ClusterDir]
187
192
188 def init_clusterdir(self):
193 def init_clusterdir(self):
189 super(IPClusterCreate, self).init_clusterdir()
194 super(IPClusterCreate, self).init_clusterdir()
190 self.log.info('Copying default config files to cluster directory '
195 self.log.info('Copying default config files to cluster directory '
191 '[overwrite=%r]' % (self.reset,))
196 '[overwrite=%r]' % (self.reset,))
192 self.cluster_dir.copy_all_config_files(overwrite=self.reset)
197 self.cluster_dir.copy_all_config_files(overwrite=self.reset)
193
198
194 def initialize(self, argv=None):
199 def initialize(self, argv=None):
195 self.parse_command_line(argv)
200 self.parse_command_line(argv)
196 self.init_clusterdir()
201 self.init_clusterdir()
197
202
198 stop_aliases = dict(
203 stop_aliases = dict(
199 signal='IPClusterStop.signal',
204 signal='IPClusterStop.signal',
200 profile='ClusterDir.profile',
205 profile='ClusterDir.profile',
201 cluster_dir='ClusterDir.location',
206 cluster_dir='ClusterDir.location',
202 )
207 )
203
208
204 class IPClusterStop(ClusterApplication):
209 class IPClusterStop(ClusterApplication):
205 name = u'ipcluster'
210 name = u'ipcluster'
206 description = stop_help
211 description = stop_help
207 auto_create_cluster_dir = Bool(False)
212 auto_create_cluster_dir = Bool(False)
208 default_config_file_name = default_config_file_name
213 default_config_file_name = default_config_file_name
209
214
210 signal = Int(signal.SIGINT, config=True,
215 signal = Int(signal.SIGINT, config=True,
211 help="signal to use for stopping processes.")
216 help="signal to use for stopping processes.")
212
217
213 aliases = Dict(stop_aliases)
218 aliases = Dict(stop_aliases)
214
219
215 def init_clusterdir(self):
220 def init_clusterdir(self):
216 try:
221 try:
217 super(IPClusterStop, self).init_clusterdir()
222 super(IPClusterStop, self).init_clusterdir()
218 except ClusterDirError as e:
223 except ClusterDirError as e:
219 self.log.fatal("Failed ClusterDir init: %s"%e)
224 self.log.fatal("Failed ClusterDir init: %s"%e)
220 self.exit(1)
225 self.exit(1)
221
226
222 def start(self):
227 def start(self):
223 """Start the app for the stop subcommand."""
228 """Start the app for the stop subcommand."""
224 try:
229 try:
225 pid = self.get_pid_from_file()
230 pid = self.get_pid_from_file()
226 except PIDFileError:
231 except PIDFileError:
227 self.log.critical(
232 self.log.critical(
228 'Could not read pid file, cluster is probably not running.'
233 'Could not read pid file, cluster is probably not running.'
229 )
234 )
230 # Here I exit with a unusual exit status that other processes
235 # Here I exit with a unusual exit status that other processes
231 # can watch for to learn how I existed.
236 # can watch for to learn how I existed.
232 self.remove_pid_file()
237 self.remove_pid_file()
233 self.exit(ALREADY_STOPPED)
238 self.exit(ALREADY_STOPPED)
234
239
235 if not self.check_pid(pid):
240 if not self.check_pid(pid):
236 self.log.critical(
241 self.log.critical(
237 'Cluster [pid=%r] is not running.' % pid
242 'Cluster [pid=%r] is not running.' % pid
238 )
243 )
239 self.remove_pid_file()
244 self.remove_pid_file()
240 # Here I exit with a unusual exit status that other processes
245 # Here I exit with a unusual exit status that other processes
241 # can watch for to learn how I existed.
246 # can watch for to learn how I existed.
242 self.exit(ALREADY_STOPPED)
247 self.exit(ALREADY_STOPPED)
243
248
244 elif os.name=='posix':
249 elif os.name=='posix':
245 sig = self.signal
250 sig = self.signal
246 self.log.info(
251 self.log.info(
247 "Stopping cluster [pid=%r] with [signal=%r]" % (pid, sig)
252 "Stopping cluster [pid=%r] with [signal=%r]" % (pid, sig)
248 )
253 )
249 try:
254 try:
250 os.kill(pid, sig)
255 os.kill(pid, sig)
251 except OSError:
256 except OSError:
252 self.log.error("Stopping cluster failed, assuming already dead.",
257 self.log.error("Stopping cluster failed, assuming already dead.",
253 exc_info=True)
258 exc_info=True)
254 self.remove_pid_file()
259 self.remove_pid_file()
255 elif os.name=='nt':
260 elif os.name=='nt':
256 try:
261 try:
257 # kill the whole tree
262 # kill the whole tree
258 p = check_call(['taskkill', '-pid', str(pid), '-t', '-f'], stdout=PIPE,stderr=PIPE)
263 p = check_call(['taskkill', '-pid', str(pid), '-t', '-f'], stdout=PIPE,stderr=PIPE)
259 except (CalledProcessError, OSError):
264 except (CalledProcessError, OSError):
260 self.log.error("Stopping cluster failed, assuming already dead.",
265 self.log.error("Stopping cluster failed, assuming already dead.",
261 exc_info=True)
266 exc_info=True)
262 self.remove_pid_file()
267 self.remove_pid_file()
263
268
264 engine_aliases = {}
269 engine_aliases = {}
265 engine_aliases.update(base_aliases)
270 engine_aliases.update(base_aliases)
266 engine_aliases.update(dict(
271 engine_aliases.update(dict(
267 n='IPClusterEngines.n',
272 n='IPClusterEngines.n',
268 elauncher = 'IPClusterEngines.engine_launcher_class',
273 elauncher = 'IPClusterEngines.engine_launcher_class',
269 ))
274 ))
270 class IPClusterEngines(ClusterApplication):
275 class IPClusterEngines(ClusterApplication):
271
276
272 name = u'ipcluster'
277 name = u'ipcluster'
273 description = engines_help
278 description = engines_help
274 usage = None
279 usage = None
275 default_config_file_name = default_config_file_name
280 default_config_file_name = default_config_file_name
276 default_log_level = logging.INFO
281 default_log_level = logging.INFO
277 auto_create_cluster_dir = Bool(False)
282 auto_create_cluster_dir = Bool(False)
278 classes = List()
283 classes = List()
279 def _classes_default(self):
284 def _classes_default(self):
280 from IPython.parallel.apps import launcher
285 from IPython.parallel.apps import launcher
281 launchers = launcher.all_launchers
286 launchers = launcher.all_launchers
282 eslaunchers = [ l for l in launchers if 'EngineSet' in l.__name__]
287 eslaunchers = [ l for l in launchers if 'EngineSet' in l.__name__]
283 return [ClusterDir]+eslaunchers
288 return [ClusterDir]+eslaunchers
284
289
285 n = Int(2, config=True,
290 n = Int(2, config=True,
286 help="The number of engines to start.")
291 help="The number of engines to start.")
287
292
288 engine_launcher_class = Unicode('LocalEngineSetLauncher',
293 engine_launcher_class = Unicode('LocalEngineSetLauncher',
289 config=True,
294 config=True,
290 help="The class for launching a set of Engines."
295 help="The class for launching a set of Engines."
291 )
296 )
292 daemonize = Bool(False, config=True,
297 daemonize = Bool(False, config=True,
293 help='Daemonize the ipcluster program. This implies --log-to-file')
298 help='Daemonize the ipcluster program. This implies --log-to-file')
294
299
295 def _daemonize_changed(self, name, old, new):
300 def _daemonize_changed(self, name, old, new):
296 if new:
301 if new:
297 self.log_to_file = True
302 self.log_to_file = True
298
303
299 aliases = Dict(engine_aliases)
304 aliases = Dict(engine_aliases)
300 # flags = Dict(flags)
305 # flags = Dict(flags)
301 _stopping = False
306 _stopping = False
302
307
303 def initialize(self, argv=None):
308 def initialize(self, argv=None):
304 super(IPClusterEngines, self).initialize(argv)
309 super(IPClusterEngines, self).initialize(argv)
305 self.init_signal()
310 self.init_signal()
306 self.init_launchers()
311 self.init_launchers()
307
312
308 def init_launchers(self):
313 def init_launchers(self):
309 self.engine_launcher = self.build_launcher(self.engine_launcher_class)
314 self.engine_launcher = self.build_launcher(self.engine_launcher_class)
310 self.engine_launcher.on_stop(lambda r: self.loop.stop())
315 self.engine_launcher.on_stop(lambda r: self.loop.stop())
311
316
312 def init_signal(self):
317 def init_signal(self):
313 # Setup signals
318 # Setup signals
314 signal.signal(signal.SIGINT, self.sigint_handler)
319 signal.signal(signal.SIGINT, self.sigint_handler)
315
320
316 def build_launcher(self, clsname):
321 def build_launcher(self, clsname):
317 """import and instantiate a Launcher based on importstring"""
322 """import and instantiate a Launcher based on importstring"""
318 if '.' not in clsname:
323 if '.' not in clsname:
319 # not a module, presume it's the raw name in apps.launcher
324 # not a module, presume it's the raw name in apps.launcher
320 clsname = 'IPython.parallel.apps.launcher.'+clsname
325 clsname = 'IPython.parallel.apps.launcher.'+clsname
321 # print repr(clsname)
326 # print repr(clsname)
322 klass = import_item(clsname)
327 klass = import_item(clsname)
323
328
324 launcher = klass(
329 launcher = klass(
325 work_dir=self.cluster_dir.location, config=self.config, logname=self.log.name
330 work_dir=self.cluster_dir.location, config=self.config, logname=self.log.name
326 )
331 )
327 return launcher
332 return launcher
328
333
329 def start_engines(self):
334 def start_engines(self):
330 self.log.info("Starting %i engines"%self.n)
335 self.log.info("Starting %i engines"%self.n)
331 self.engine_launcher.start(
336 self.engine_launcher.start(
332 self.n,
337 self.n,
333 cluster_dir=self.cluster_dir.location
338 cluster_dir=self.cluster_dir.location
334 )
339 )
335
340
336 def stop_engines(self):
341 def stop_engines(self):
337 self.log.info("Stopping Engines...")
342 self.log.info("Stopping Engines...")
338 if self.engine_launcher.running:
343 if self.engine_launcher.running:
339 d = self.engine_launcher.stop()
344 d = self.engine_launcher.stop()
340 return d
345 return d
341 else:
346 else:
342 return None
347 return None
343
348
344 def stop_launchers(self, r=None):
349 def stop_launchers(self, r=None):
345 if not self._stopping:
350 if not self._stopping:
346 self._stopping = True
351 self._stopping = True
347 self.log.error("IPython cluster: stopping")
352 self.log.error("IPython cluster: stopping")
348 self.stop_engines()
353 self.stop_engines()
349 # Wait a few seconds to let things shut down.
354 # Wait a few seconds to let things shut down.
350 dc = ioloop.DelayedCallback(self.loop.stop, 4000, self.loop)
355 dc = ioloop.DelayedCallback(self.loop.stop, 4000, self.loop)
351 dc.start()
356 dc.start()
352
357
353 def sigint_handler(self, signum, frame):
358 def sigint_handler(self, signum, frame):
354 self.log.debug("SIGINT received, stopping launchers...")
359 self.log.debug("SIGINT received, stopping launchers...")
355 self.stop_launchers()
360 self.stop_launchers()
356
361
357 def start_logging(self):
362 def start_logging(self):
358 # Remove old log files of the controller and engine
363 # Remove old log files of the controller and engine
359 if self.clean_logs:
364 if self.clean_logs:
360 log_dir = self.cluster_dir.log_dir
365 log_dir = self.cluster_dir.log_dir
361 for f in os.listdir(log_dir):
366 for f in os.listdir(log_dir):
362 if re.match(r'ip(engine|controller)z-\d+\.(log|err|out)',f):
367 if re.match(r'ip(engine|controller)z-\d+\.(log|err|out)',f):
363 os.remove(os.path.join(log_dir, f))
368 os.remove(os.path.join(log_dir, f))
364 # This will remove old log files for ipcluster itself
369 # This will remove old log files for ipcluster itself
365 # super(IPClusterApp, self).start_logging()
370 # super(IPClusterApp, self).start_logging()
366
371
367 def start(self):
372 def start(self):
368 """Start the app for the engines subcommand."""
373 """Start the app for the engines subcommand."""
369 self.log.info("IPython cluster: started")
374 self.log.info("IPython cluster: started")
370 # First see if the cluster is already running
375 # First see if the cluster is already running
371
376
372 # Now log and daemonize
377 # Now log and daemonize
373 self.log.info(
378 self.log.info(
374 'Starting engines with [daemon=%r]' % self.daemonize
379 'Starting engines with [daemon=%r]' % self.daemonize
375 )
380 )
376 # TODO: Get daemonize working on Windows or as a Windows Server.
381 # TODO: Get daemonize working on Windows or as a Windows Server.
377 if self.daemonize:
382 if self.daemonize:
378 if os.name=='posix':
383 if os.name=='posix':
379 from twisted.scripts._twistd_unix import daemonize
384 from twisted.scripts._twistd_unix import daemonize
380 daemonize()
385 daemonize()
381
386
382 dc = ioloop.DelayedCallback(self.start_engines, 0, self.loop)
387 dc = ioloop.DelayedCallback(self.start_engines, 0, self.loop)
383 dc.start()
388 dc.start()
384 # Now write the new pid file AFTER our new forked pid is active.
389 # Now write the new pid file AFTER our new forked pid is active.
385 # self.write_pid_file()
390 # self.write_pid_file()
386 try:
391 try:
387 self.loop.start()
392 self.loop.start()
388 except KeyboardInterrupt:
393 except KeyboardInterrupt:
389 pass
394 pass
390 except zmq.ZMQError as e:
395 except zmq.ZMQError as e:
391 if e.errno == errno.EINTR:
396 if e.errno == errno.EINTR:
392 pass
397 pass
393 else:
398 else:
394 raise
399 raise
395
400
396 start_aliases = {}
401 start_aliases = {}
397 start_aliases.update(engine_aliases)
402 start_aliases.update(engine_aliases)
398 start_aliases.update(dict(
403 start_aliases.update(dict(
399 delay='IPClusterStart.delay',
404 delay='IPClusterStart.delay',
400 clean_logs='IPClusterStart.clean_logs',
405 clean_logs='IPClusterStart.clean_logs',
401 ))
406 ))
402
407
403 class IPClusterStart(IPClusterEngines):
408 class IPClusterStart(IPClusterEngines):
404
409
405 name = u'ipcluster'
410 name = u'ipcluster'
406 description = start_help
411 description = start_help
407 usage = None
412 usage = None
408 default_config_file_name = default_config_file_name
413 default_config_file_name = default_config_file_name
409 default_log_level = logging.INFO
414 default_log_level = logging.INFO
410 auto_create_cluster_dir = Bool(True, config=True,
415 auto_create_cluster_dir = Bool(True, config=True,
411 help="whether to create the cluster_dir if it doesn't exist")
416 help="whether to create the cluster_dir if it doesn't exist")
412 classes = List()
417 classes = List()
413 def _classes_default(self,):
418 def _classes_default(self,):
414 from IPython.parallel.apps import launcher
419 from IPython.parallel.apps import launcher
415 return [ClusterDir]+launcher.all_launchers
420 return [ClusterDir]+launcher.all_launchers
416
421
417 clean_logs = Bool(True, config=True,
422 clean_logs = Bool(True, config=True,
418 help="whether to cleanup old logs before starting")
423 help="whether to cleanup old logs before starting")
419
424
420 delay = CFloat(1., config=True,
425 delay = CFloat(1., config=True,
421 help="delay (in s) between starting the controller and the engines")
426 help="delay (in s) between starting the controller and the engines")
422
427
423 controller_launcher_class = Unicode('LocalControllerLauncher',
428 controller_launcher_class = Unicode('LocalControllerLauncher',
424 config=True,
429 config=True,
425 help="The class for launching a Controller."
430 help="The class for launching a Controller."
426 )
431 )
427 reset = Bool(False, config=True,
432 reset = Bool(False, config=True,
428 help="Whether to reset config files as part of '--create'."
433 help="Whether to reset config files as part of '--create'."
429 )
434 )
430
435
431 # flags = Dict(flags)
436 # flags = Dict(flags)
432 aliases = Dict(start_aliases)
437 aliases = Dict(start_aliases)
433
438
434 def init_launchers(self):
439 def init_launchers(self):
435 self.controller_launcher = self.build_launcher(self.controller_launcher_class)
440 self.controller_launcher = self.build_launcher(self.controller_launcher_class)
436 self.engine_launcher = self.build_launcher(self.engine_launcher_class)
441 self.engine_launcher = self.build_launcher(self.engine_launcher_class)
437 self.controller_launcher.on_stop(self.stop_launchers)
442 self.controller_launcher.on_stop(self.stop_launchers)
438
443
439 def start_controller(self):
444 def start_controller(self):
440 self.controller_launcher.start(
445 self.controller_launcher.start(
441 cluster_dir=self.cluster_dir.location
446 cluster_dir=self.cluster_dir.location
442 )
447 )
443
448
444 def stop_controller(self):
449 def stop_controller(self):
445 # self.log.info("In stop_controller")
450 # self.log.info("In stop_controller")
446 if self.controller_launcher and self.controller_launcher.running:
451 if self.controller_launcher and self.controller_launcher.running:
447 return self.controller_launcher.stop()
452 return self.controller_launcher.stop()
448
453
449 def stop_launchers(self, r=None):
454 def stop_launchers(self, r=None):
450 if not self._stopping:
455 if not self._stopping:
451 self.stop_controller()
456 self.stop_controller()
452 super(IPClusterStart, self).stop_launchers()
457 super(IPClusterStart, self).stop_launchers()
453
458
454 def start(self):
459 def start(self):
455 """Start the app for the start subcommand."""
460 """Start the app for the start subcommand."""
456 # First see if the cluster is already running
461 # First see if the cluster is already running
457 try:
462 try:
458 pid = self.get_pid_from_file()
463 pid = self.get_pid_from_file()
459 except PIDFileError:
464 except PIDFileError:
460 pass
465 pass
461 else:
466 else:
462 if self.check_pid(pid):
467 if self.check_pid(pid):
463 self.log.critical(
468 self.log.critical(
464 'Cluster is already running with [pid=%s]. '
469 'Cluster is already running with [pid=%s]. '
465 'use "ipcluster stop" to stop the cluster.' % pid
470 'use "ipcluster stop" to stop the cluster.' % pid
466 )
471 )
467 # Here I exit with a unusual exit status that other processes
472 # Here I exit with a unusual exit status that other processes
468 # can watch for to learn how I existed.
473 # can watch for to learn how I existed.
469 self.exit(ALREADY_STARTED)
474 self.exit(ALREADY_STARTED)
470 else:
475 else:
471 self.remove_pid_file()
476 self.remove_pid_file()
472
477
473
478
474 # Now log and daemonize
479 # Now log and daemonize
475 self.log.info(
480 self.log.info(
476 'Starting ipcluster with [daemon=%r]' % self.daemonize
481 'Starting ipcluster with [daemon=%r]' % self.daemonize
477 )
482 )
478 # TODO: Get daemonize working on Windows or as a Windows Server.
483 # TODO: Get daemonize working on Windows or as a Windows Server.
479 if self.daemonize:
484 if self.daemonize:
480 if os.name=='posix':
485 if os.name=='posix':
481 from twisted.scripts._twistd_unix import daemonize
486 from twisted.scripts._twistd_unix import daemonize
482 daemonize()
487 daemonize()
483
488
484 dc = ioloop.DelayedCallback(self.start_controller, 0, self.loop)
489 dc = ioloop.DelayedCallback(self.start_controller, 0, self.loop)
485 dc.start()
490 dc.start()
486 dc = ioloop.DelayedCallback(self.start_engines, 1000*self.delay, self.loop)
491 dc = ioloop.DelayedCallback(self.start_engines, 1000*self.delay, self.loop)
487 dc.start()
492 dc.start()
488 # Now write the new pid file AFTER our new forked pid is active.
493 # Now write the new pid file AFTER our new forked pid is active.
489 self.write_pid_file()
494 self.write_pid_file()
490 try:
495 try:
491 self.loop.start()
496 self.loop.start()
492 except KeyboardInterrupt:
497 except KeyboardInterrupt:
493 pass
498 pass
494 except zmq.ZMQError as e:
499 except zmq.ZMQError as e:
495 if e.errno == errno.EINTR:
500 if e.errno == errno.EINTR:
496 pass
501 pass
497 else:
502 else:
498 raise
503 raise
499 finally:
504 finally:
500 self.remove_pid_file()
505 self.remove_pid_file()
501
506
502 base='IPython.parallel.apps.ipclusterapp.IPCluster'
507 base='IPython.parallel.apps.ipclusterapp.IPCluster'
503
508
504 class IPClusterApp(Application):
509 class IPClusterApp(Application):
505 name = u'ipcluster'
510 name = u'ipcluster'
506 description = _description
511 description = _description
507
512
508 subcommands = {'create' : (base+'Create', create_help),
513 subcommands = {'create' : (base+'Create', create_help),
509 'list' : (base+'List', list_help),
514 'list' : (base+'List', list_help),
510 'start' : (base+'Start', start_help),
515 'start' : (base+'Start', start_help),
511 'stop' : (base+'Stop', stop_help),
516 'stop' : (base+'Stop', stop_help),
512 'engines' : (base+'Engines', engines_help),
517 'engines' : (base+'Engines', engines_help),
513 }
518 }
514
519
515 # no aliases or flags for parent App
520 # no aliases or flags for parent App
516 aliases = Dict()
521 aliases = Dict()
517 flags = Dict()
522 flags = Dict()
518
523
519 def start(self):
524 def start(self):
520 if self.subapp is None:
525 if self.subapp is None:
521 print "No subcommand specified! Must specify one of: %s"%(self.subcommands.keys())
526 print "No subcommand specified! Must specify one of: %s"%(self.subcommands.keys())
522 print
527 print
523 self.print_subcommands()
528 self.print_subcommands()
524 self.exit(1)
529 self.exit(1)
525 else:
530 else:
526 return self.subapp.start()
531 return self.subapp.start()
527
532
528 def launch_new_instance():
533 def launch_new_instance():
529 """Create and run the IPython cluster."""
534 """Create and run the IPython cluster."""
530 app = IPClusterApp()
535 app = IPClusterApp()
531 app.initialize()
536 app.initialize()
532 app.start()
537 app.start()
533
538
534
539
535 if __name__ == '__main__':
540 if __name__ == '__main__':
536 launch_new_instance()
541 launch_new_instance()
537
542
@@ -1,403 +1,405 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 # encoding: utf-8
2 # encoding: utf-8
3 """
3 """
4 The IPython controller application.
4 The IPython controller application.
5 """
5 """
6
6
7 #-----------------------------------------------------------------------------
7 #-----------------------------------------------------------------------------
8 # Copyright (C) 2008-2009 The IPython Development Team
8 # Copyright (C) 2008-2009 The IPython Development Team
9 #
9 #
10 # Distributed under the terms of the BSD License. The full license is in
10 # Distributed under the terms of the BSD License. The full license is in
11 # the file COPYING, distributed as part of this software.
11 # the file COPYING, distributed as part of this software.
12 #-----------------------------------------------------------------------------
12 #-----------------------------------------------------------------------------
13
13
14 #-----------------------------------------------------------------------------
14 #-----------------------------------------------------------------------------
15 # Imports
15 # Imports
16 #-----------------------------------------------------------------------------
16 #-----------------------------------------------------------------------------
17
17
18 from __future__ import with_statement
18 from __future__ import with_statement
19
19
20 import copy
20 import copy
21 import os
21 import os
22 import logging
22 import logging
23 import socket
23 import socket
24 import stat
24 import stat
25 import sys
25 import sys
26 import uuid
26 import uuid
27
27
28 from multiprocessing import Process
28 from multiprocessing import Process
29
29
30 import zmq
30 import zmq
31 from zmq.devices import ProcessMonitoredQueue
31 from zmq.devices import ProcessMonitoredQueue
32 from zmq.log.handlers import PUBHandler
32 from zmq.log.handlers import PUBHandler
33 from zmq.utils import jsonapi as json
33 from zmq.utils import jsonapi as json
34
34
35 from IPython.config.loader import Config
35 from IPython.config.loader import Config
36
36
37 from IPython.parallel import factory
37 from IPython.parallel import factory
38
38
39 from IPython.parallel.apps.clusterdir import (
39 from IPython.parallel.apps.clusterdir import (
40 ClusterDir,
40 ClusterDir,
41 ClusterApplication,
41 ClusterApplication,
42 base_flags
42 base_flags
43 # ClusterDirConfigLoader
43 # ClusterDirConfigLoader
44 )
44 )
45 from IPython.utils.importstring import import_item
45 from IPython.utils.importstring import import_item
46 from IPython.utils.traitlets import Instance, Unicode, Bool, List, Dict
46 from IPython.utils.traitlets import Instance, Unicode, Bool, List, Dict
47
47
48 # from IPython.parallel.controller.controller import ControllerFactory
48 # from IPython.parallel.controller.controller import ControllerFactory
49 from IPython.parallel.streamsession import StreamSession
49 from IPython.parallel.streamsession import StreamSession
50 from IPython.parallel.controller.heartmonitor import HeartMonitor
50 from IPython.parallel.controller.heartmonitor import HeartMonitor
51 from IPython.parallel.controller.hub import Hub, HubFactory
51 from IPython.parallel.controller.hub import Hub, HubFactory
52 from IPython.parallel.controller.scheduler import TaskScheduler,launch_scheduler
52 from IPython.parallel.controller.scheduler import TaskScheduler,launch_scheduler
53 from IPython.parallel.controller.sqlitedb import SQLiteDB
53 from IPython.parallel.controller.sqlitedb import SQLiteDB
54
54
55 from IPython.parallel.util import signal_children,disambiguate_ip_address, split_url
55 from IPython.parallel.util import signal_children,disambiguate_ip_address, split_url
56
56
57 # conditional import of MongoDB backend class
57 # conditional import of MongoDB backend class
58
58
59 try:
59 try:
60 from IPython.parallel.controller.mongodb import MongoDB
60 from IPython.parallel.controller.mongodb import MongoDB
61 except ImportError:
61 except ImportError:
62 maybe_mongo = []
62 maybe_mongo = []
63 else:
63 else:
64 maybe_mongo = [MongoDB]
64 maybe_mongo = [MongoDB]
65
65
66
66
67 #-----------------------------------------------------------------------------
67 #-----------------------------------------------------------------------------
68 # Module level variables
68 # Module level variables
69 #-----------------------------------------------------------------------------
69 #-----------------------------------------------------------------------------
70
70
71
71
72 #: The default config file name for this application
72 #: The default config file name for this application
73 default_config_file_name = u'ipcontroller_config.py'
73 default_config_file_name = u'ipcontroller_config.py'
74
74
75
75
76 _description = """Start the IPython controller for parallel computing.
76 _description = """Start the IPython controller for parallel computing.
77
77
78 The IPython controller provides a gateway between the IPython engines and
78 The IPython controller provides a gateway between the IPython engines and
79 clients. The controller needs to be started before the engines and can be
79 clients. The controller needs to be started before the engines and can be
80 configured using command line options or using a cluster directory. Cluster
80 configured using command line options or using a cluster directory. Cluster
81 directories contain config, log and security files and are usually located in
81 directories contain config, log and security files and are usually located in
82 your ipython directory and named as "cluster_<profile>". See the --profile
82 your ipython directory and named as "cluster_<profile>". See the `profile`
83 and --cluster-dir options for details.
83 and `cluster_dir` options for details.
84 """
84 """
85
85
86
86
87
87
88
88
89 #-----------------------------------------------------------------------------
89 #-----------------------------------------------------------------------------
90 # The main application
90 # The main application
91 #-----------------------------------------------------------------------------
91 #-----------------------------------------------------------------------------
92 flags = {}
92 flags = {}
93 flags.update(base_flags)
93 flags.update(base_flags)
94 flags.update({
94 flags.update({
95 'usethreads' : ( {'IPControllerApp' : {'usethreads' : True}},
95 'usethreads' : ( {'IPControllerApp' : {'use_threads' : True}},
96 'Use threads instead of processes for the schedulers'),
96 'Use threads instead of processes for the schedulers'),
97 'sqlitedb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.sqlitedb.SQLiteDB'}},
97 'sqlitedb' : ({'HubFactory' : Config({'db_class' : 'IPython.parallel.controller.sqlitedb.SQLiteDB'})},
98 'use the SQLiteDB backend'),
98 'use the SQLiteDB backend'),
99 'mongodb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.mongodb.MongoDB'}},
99 'mongodb' : ({'HubFactory' : Config({'db_class' : 'IPython.parallel.controller.mongodb.MongoDB'})},
100 'use the MongoDB backend'),
100 'use the MongoDB backend'),
101 'dictdb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.dictdb.DictDB'}},
101 'dictdb' : ({'HubFactory' : Config({'db_class' : 'IPython.parallel.controller.dictdb.DictDB'})},
102 'use the in-memory DictDB backend'),
102 'use the in-memory DictDB backend'),
103 'reuse' : ({'IPControllerApp' : Config({'reuse_files' : True})},
104 'reuse existing json connection files')
103 })
105 })
104
106
105 flags.update()
107 flags.update()
106
108
107 class IPControllerApp(ClusterApplication):
109 class IPControllerApp(ClusterApplication):
108
110
109 name = u'ipcontroller'
111 name = u'ipcontroller'
110 description = _description
112 description = _description
111 # command_line_loader = IPControllerAppConfigLoader
113 # command_line_loader = IPControllerAppConfigLoader
112 default_config_file_name = default_config_file_name
114 default_config_file_name = default_config_file_name
113 classes = [ClusterDir, StreamSession, HubFactory, TaskScheduler, HeartMonitor, SQLiteDB] + maybe_mongo
115 classes = [ClusterDir, StreamSession, HubFactory, TaskScheduler, HeartMonitor, SQLiteDB] + maybe_mongo
114
116
115 auto_create_cluster_dir = Bool(True, config=True,
117 auto_create_cluster_dir = Bool(True, config=True,
116 help="Whether to create cluster_dir if it exists.")
118 help="Whether to create cluster_dir if it exists.")
117 reuse_files = Bool(False, config=True,
119 reuse_files = Bool(False, config=True,
118 help='Whether to reuse existing json connection files [default: False]'
120 help='Whether to reuse existing json connection files [default: False]'
119 )
121 )
120 secure = Bool(True, config=True,
122 secure = Bool(True, config=True,
121 help='Whether to use exec_keys for extra authentication [default: True]'
123 help='Whether to use exec_keys for extra authentication [default: True]'
122 )
124 )
123 ssh_server = Unicode(u'', config=True,
125 ssh_server = Unicode(u'', config=True,
124 help="""ssh url for clients to use when connecting to the Controller
126 help="""ssh url for clients to use when connecting to the Controller
125 processes. It should be of the form: [user@]server[:port]. The
127 processes. It should be of the form: [user@]server[:port]. The
126 Controller\'s listening addresses must be accessible from the ssh server""",
128 Controller\'s listening addresses must be accessible from the ssh server""",
127 )
129 )
128 location = Unicode(u'', config=True,
130 location = Unicode(u'', config=True,
129 help="""The external IP or domain name of the Controller, used for disambiguating
131 help="""The external IP or domain name of the Controller, used for disambiguating
130 engine and client connections.""",
132 engine and client connections.""",
131 )
133 )
132 import_statements = List([], config=True,
134 import_statements = List([], config=True,
133 help="import statements to be run at startup. Necessary in some environments"
135 help="import statements to be run at startup. Necessary in some environments"
134 )
136 )
135
137
136 usethreads = Bool(False, config=True,
138 use_threads = Bool(False, config=True,
137 help='Use threads instead of processes for the schedulers',
139 help='Use threads instead of processes for the schedulers',
138 )
140 )
139
141
140 # internal
142 # internal
141 children = List()
143 children = List()
142 mq_class = Unicode('zmq.devices.ProcessMonitoredQueue')
144 mq_class = Unicode('zmq.devices.ProcessMonitoredQueue')
143
145
144 def _usethreads_changed(self, name, old, new):
146 def _use_threads_changed(self, name, old, new):
145 self.mq_class = 'zmq.devices.%sMonitoredQueue'%('Thread' if new else 'Process')
147 self.mq_class = 'zmq.devices.%sMonitoredQueue'%('Thread' if new else 'Process')
146
148
147 aliases = Dict(dict(
149 aliases = Dict(dict(
148 config = 'IPControllerApp.config_file',
150 config = 'IPControllerApp.config_file',
149 # file = 'IPControllerApp.url_file',
151 # file = 'IPControllerApp.url_file',
150 log_level = 'IPControllerApp.log_level',
152 log_level = 'IPControllerApp.log_level',
151 log_url = 'IPControllerApp.log_url',
153 log_url = 'IPControllerApp.log_url',
152 reuse_files = 'IPControllerApp.reuse_files',
154 reuse_files = 'IPControllerApp.reuse_files',
153 secure = 'IPControllerApp.secure',
155 secure = 'IPControllerApp.secure',
154 ssh = 'IPControllerApp.ssh_server',
156 ssh = 'IPControllerApp.ssh_server',
155 usethreads = 'IPControllerApp.usethreads',
157 use_threads = 'IPControllerApp.use_threads',
156 import_statements = 'IPControllerApp.import_statements',
158 import_statements = 'IPControllerApp.import_statements',
157 location = 'IPControllerApp.location',
159 location = 'IPControllerApp.location',
158
160
159 ident = 'StreamSession.session',
161 ident = 'StreamSession.session',
160 user = 'StreamSession.username',
162 user = 'StreamSession.username',
161 exec_key = 'StreamSession.keyfile',
163 exec_key = 'StreamSession.keyfile',
162
164
163 url = 'HubFactory.url',
165 url = 'HubFactory.url',
164 ip = 'HubFactory.ip',
166 ip = 'HubFactory.ip',
165 transport = 'HubFactory.transport',
167 transport = 'HubFactory.transport',
166 port = 'HubFactory.regport',
168 port = 'HubFactory.regport',
167
169
168 ping = 'HeartMonitor.period',
170 ping = 'HeartMonitor.period',
169
171
170 scheme = 'TaskScheduler.scheme_name',
172 scheme = 'TaskScheduler.scheme_name',
171 hwm = 'TaskScheduler.hwm',
173 hwm = 'TaskScheduler.hwm',
172
174
173
175
174 profile = "ClusterDir.profile",
176 profile = "ClusterDir.profile",
175 cluster_dir = 'ClusterDir.location',
177 cluster_dir = 'ClusterDir.location',
176
178
177 ))
179 ))
178 flags = Dict(flags)
180 flags = Dict(flags)
179
181
180
182
181 def save_connection_dict(self, fname, cdict):
183 def save_connection_dict(self, fname, cdict):
182 """save a connection dict to json file."""
184 """save a connection dict to json file."""
183 c = self.config
185 c = self.config
184 url = cdict['url']
186 url = cdict['url']
185 location = cdict['location']
187 location = cdict['location']
186 if not location:
188 if not location:
187 try:
189 try:
188 proto,ip,port = split_url(url)
190 proto,ip,port = split_url(url)
189 except AssertionError:
191 except AssertionError:
190 pass
192 pass
191 else:
193 else:
192 location = socket.gethostbyname_ex(socket.gethostname())[2][-1]
194 location = socket.gethostbyname_ex(socket.gethostname())[2][-1]
193 cdict['location'] = location
195 cdict['location'] = location
194 fname = os.path.join(self.cluster_dir.security_dir, fname)
196 fname = os.path.join(self.cluster_dir.security_dir, fname)
195 with open(fname, 'w') as f:
197 with open(fname, 'w') as f:
196 f.write(json.dumps(cdict, indent=2))
198 f.write(json.dumps(cdict, indent=2))
197 os.chmod(fname, stat.S_IRUSR|stat.S_IWUSR)
199 os.chmod(fname, stat.S_IRUSR|stat.S_IWUSR)
198
200
199 def load_config_from_json(self):
201 def load_config_from_json(self):
200 """load config from existing json connector files."""
202 """load config from existing json connector files."""
201 c = self.config
203 c = self.config
202 # load from engine config
204 # load from engine config
203 with open(os.path.join(self.cluster_dir.security_dir, 'ipcontroller-engine.json')) as f:
205 with open(os.path.join(self.cluster_dir.security_dir, 'ipcontroller-engine.json')) as f:
204 cfg = json.loads(f.read())
206 cfg = json.loads(f.read())
205 key = c.StreamSession.key = cfg['exec_key']
207 key = c.StreamSession.key = cfg['exec_key']
206 xport,addr = cfg['url'].split('://')
208 xport,addr = cfg['url'].split('://')
207 c.HubFactory.engine_transport = xport
209 c.HubFactory.engine_transport = xport
208 ip,ports = addr.split(':')
210 ip,ports = addr.split(':')
209 c.HubFactory.engine_ip = ip
211 c.HubFactory.engine_ip = ip
210 c.HubFactory.regport = int(ports)
212 c.HubFactory.regport = int(ports)
211 self.location = cfg['location']
213 self.location = cfg['location']
212
214
213 # load client config
215 # load client config
214 with open(os.path.join(self.cluster_dir.security_dir, 'ipcontroller-client.json')) as f:
216 with open(os.path.join(self.cluster_dir.security_dir, 'ipcontroller-client.json')) as f:
215 cfg = json.loads(f.read())
217 cfg = json.loads(f.read())
216 assert key == cfg['exec_key'], "exec_key mismatch between engine and client keys"
218 assert key == cfg['exec_key'], "exec_key mismatch between engine and client keys"
217 xport,addr = cfg['url'].split('://')
219 xport,addr = cfg['url'].split('://')
218 c.HubFactory.client_transport = xport
220 c.HubFactory.client_transport = xport
219 ip,ports = addr.split(':')
221 ip,ports = addr.split(':')
220 c.HubFactory.client_ip = ip
222 c.HubFactory.client_ip = ip
221 self.ssh_server = cfg['ssh']
223 self.ssh_server = cfg['ssh']
222 assert int(ports) == c.HubFactory.regport, "regport mismatch"
224 assert int(ports) == c.HubFactory.regport, "regport mismatch"
223
225
224 def init_hub(self):
226 def init_hub(self):
225 c = self.config
227 c = self.config
226
228
227 self.do_import_statements()
229 self.do_import_statements()
228 reusing = self.reuse_files
230 reusing = self.reuse_files
229 if reusing:
231 if reusing:
230 try:
232 try:
231 self.load_config_from_json()
233 self.load_config_from_json()
232 except (AssertionError,IOError):
234 except (AssertionError,IOError):
233 reusing=False
235 reusing=False
234 # check again, because reusing may have failed:
236 # check again, because reusing may have failed:
235 if reusing:
237 if reusing:
236 pass
238 pass
237 elif self.secure:
239 elif self.secure:
238 key = str(uuid.uuid4())
240 key = str(uuid.uuid4())
239 # keyfile = os.path.join(self.cluster_dir.security_dir, self.exec_key)
241 # keyfile = os.path.join(self.cluster_dir.security_dir, self.exec_key)
240 # with open(keyfile, 'w') as f:
242 # with open(keyfile, 'w') as f:
241 # f.write(key)
243 # f.write(key)
242 # os.chmod(keyfile, stat.S_IRUSR|stat.S_IWUSR)
244 # os.chmod(keyfile, stat.S_IRUSR|stat.S_IWUSR)
243 c.StreamSession.key = key
245 c.StreamSession.key = key
244 else:
246 else:
245 key = c.StreamSession.key = ''
247 key = c.StreamSession.key = ''
246
248
247 try:
249 try:
248 self.factory = HubFactory(config=c, log=self.log)
250 self.factory = HubFactory(config=c, log=self.log)
249 # self.start_logging()
251 # self.start_logging()
250 self.factory.init_hub()
252 self.factory.init_hub()
251 except:
253 except:
252 self.log.error("Couldn't construct the Controller", exc_info=True)
254 self.log.error("Couldn't construct the Controller", exc_info=True)
253 self.exit(1)
255 self.exit(1)
254
256
255 if not reusing:
257 if not reusing:
256 # save to new json config files
258 # save to new json config files
257 f = self.factory
259 f = self.factory
258 cdict = {'exec_key' : key,
260 cdict = {'exec_key' : key,
259 'ssh' : self.ssh_server,
261 'ssh' : self.ssh_server,
260 'url' : "%s://%s:%s"%(f.client_transport, f.client_ip, f.regport),
262 'url' : "%s://%s:%s"%(f.client_transport, f.client_ip, f.regport),
261 'location' : self.location
263 'location' : self.location
262 }
264 }
263 self.save_connection_dict('ipcontroller-client.json', cdict)
265 self.save_connection_dict('ipcontroller-client.json', cdict)
264 edict = cdict
266 edict = cdict
265 edict['url']="%s://%s:%s"%((f.client_transport, f.client_ip, f.regport))
267 edict['url']="%s://%s:%s"%((f.client_transport, f.client_ip, f.regport))
266 self.save_connection_dict('ipcontroller-engine.json', edict)
268 self.save_connection_dict('ipcontroller-engine.json', edict)
267
269
268 #
270 #
269 def init_schedulers(self):
271 def init_schedulers(self):
270 children = self.children
272 children = self.children
271 mq = import_item(str(self.mq_class))
273 mq = import_item(str(self.mq_class))
272
274
273 hub = self.factory
275 hub = self.factory
274 # maybe_inproc = 'inproc://monitor' if self.usethreads else self.monitor_url
276 # maybe_inproc = 'inproc://monitor' if self.use_threads else self.monitor_url
275 # IOPub relay (in a Process)
277 # IOPub relay (in a Process)
276 q = mq(zmq.PUB, zmq.SUB, zmq.PUB, 'N/A','iopub')
278 q = mq(zmq.PUB, zmq.SUB, zmq.PUB, 'N/A','iopub')
277 q.bind_in(hub.client_info['iopub'])
279 q.bind_in(hub.client_info['iopub'])
278 q.bind_out(hub.engine_info['iopub'])
280 q.bind_out(hub.engine_info['iopub'])
279 q.setsockopt_out(zmq.SUBSCRIBE, '')
281 q.setsockopt_out(zmq.SUBSCRIBE, '')
280 q.connect_mon(hub.monitor_url)
282 q.connect_mon(hub.monitor_url)
281 q.daemon=True
283 q.daemon=True
282 children.append(q)
284 children.append(q)
283
285
284 # Multiplexer Queue (in a Process)
286 # Multiplexer Queue (in a Process)
285 q = mq(zmq.XREP, zmq.XREP, zmq.PUB, 'in', 'out')
287 q = mq(zmq.XREP, zmq.XREP, zmq.PUB, 'in', 'out')
286 q.bind_in(hub.client_info['mux'])
288 q.bind_in(hub.client_info['mux'])
287 q.setsockopt_in(zmq.IDENTITY, 'mux')
289 q.setsockopt_in(zmq.IDENTITY, 'mux')
288 q.bind_out(hub.engine_info['mux'])
290 q.bind_out(hub.engine_info['mux'])
289 q.connect_mon(hub.monitor_url)
291 q.connect_mon(hub.monitor_url)
290 q.daemon=True
292 q.daemon=True
291 children.append(q)
293 children.append(q)
292
294
293 # Control Queue (in a Process)
295 # Control Queue (in a Process)
294 q = mq(zmq.XREP, zmq.XREP, zmq.PUB, 'incontrol', 'outcontrol')
296 q = mq(zmq.XREP, zmq.XREP, zmq.PUB, 'incontrol', 'outcontrol')
295 q.bind_in(hub.client_info['control'])
297 q.bind_in(hub.client_info['control'])
296 q.setsockopt_in(zmq.IDENTITY, 'control')
298 q.setsockopt_in(zmq.IDENTITY, 'control')
297 q.bind_out(hub.engine_info['control'])
299 q.bind_out(hub.engine_info['control'])
298 q.connect_mon(hub.monitor_url)
300 q.connect_mon(hub.monitor_url)
299 q.daemon=True
301 q.daemon=True
300 children.append(q)
302 children.append(q)
301 try:
303 try:
302 scheme = self.config.TaskScheduler.scheme_name
304 scheme = self.config.TaskScheduler.scheme_name
303 except AttributeError:
305 except AttributeError:
304 scheme = TaskScheduler.scheme_name.get_default_value()
306 scheme = TaskScheduler.scheme_name.get_default_value()
305 # Task Queue (in a Process)
307 # Task Queue (in a Process)
306 if scheme == 'pure':
308 if scheme == 'pure':
307 self.log.warn("task::using pure XREQ Task scheduler")
309 self.log.warn("task::using pure XREQ Task scheduler")
308 q = mq(zmq.XREP, zmq.XREQ, zmq.PUB, 'intask', 'outtask')
310 q = mq(zmq.XREP, zmq.XREQ, zmq.PUB, 'intask', 'outtask')
309 # q.setsockopt_out(zmq.HWM, hub.hwm)
311 # q.setsockopt_out(zmq.HWM, hub.hwm)
310 q.bind_in(hub.client_info['task'][1])
312 q.bind_in(hub.client_info['task'][1])
311 q.setsockopt_in(zmq.IDENTITY, 'task')
313 q.setsockopt_in(zmq.IDENTITY, 'task')
312 q.bind_out(hub.engine_info['task'])
314 q.bind_out(hub.engine_info['task'])
313 q.connect_mon(hub.monitor_url)
315 q.connect_mon(hub.monitor_url)
314 q.daemon=True
316 q.daemon=True
315 children.append(q)
317 children.append(q)
316 elif scheme == 'none':
318 elif scheme == 'none':
317 self.log.warn("task::using no Task scheduler")
319 self.log.warn("task::using no Task scheduler")
318
320
319 else:
321 else:
320 self.log.info("task::using Python %s Task scheduler"%scheme)
322 self.log.info("task::using Python %s Task scheduler"%scheme)
321 sargs = (hub.client_info['task'][1], hub.engine_info['task'],
323 sargs = (hub.client_info['task'][1], hub.engine_info['task'],
322 hub.monitor_url, hub.client_info['notification'])
324 hub.monitor_url, hub.client_info['notification'])
323 kwargs = dict(logname='scheduler', loglevel=self.log_level,
325 kwargs = dict(logname='scheduler', loglevel=self.log_level,
324 log_url = self.log_url, config=dict(self.config))
326 log_url = self.log_url, config=dict(self.config))
325 q = Process(target=launch_scheduler, args=sargs, kwargs=kwargs)
327 q = Process(target=launch_scheduler, args=sargs, kwargs=kwargs)
326 q.daemon=True
328 q.daemon=True
327 children.append(q)
329 children.append(q)
328
330
329
331
330 def save_urls(self):
332 def save_urls(self):
331 """save the registration urls to files."""
333 """save the registration urls to files."""
332 c = self.config
334 c = self.config
333
335
334 sec_dir = self.cluster_dir.security_dir
336 sec_dir = self.cluster_dir.security_dir
335 cf = self.factory
337 cf = self.factory
336
338
337 with open(os.path.join(sec_dir, 'ipcontroller-engine.url'), 'w') as f:
339 with open(os.path.join(sec_dir, 'ipcontroller-engine.url'), 'w') as f:
338 f.write("%s://%s:%s"%(cf.engine_transport, cf.engine_ip, cf.regport))
340 f.write("%s://%s:%s"%(cf.engine_transport, cf.engine_ip, cf.regport))
339
341
340 with open(os.path.join(sec_dir, 'ipcontroller-client.url'), 'w') as f:
342 with open(os.path.join(sec_dir, 'ipcontroller-client.url'), 'w') as f:
341 f.write("%s://%s:%s"%(cf.client_transport, cf.client_ip, cf.regport))
343 f.write("%s://%s:%s"%(cf.client_transport, cf.client_ip, cf.regport))
342
344
343
345
344 def do_import_statements(self):
346 def do_import_statements(self):
345 statements = self.import_statements
347 statements = self.import_statements
346 for s in statements:
348 for s in statements:
347 try:
349 try:
348 self.log.msg("Executing statement: '%s'" % s)
350 self.log.msg("Executing statement: '%s'" % s)
349 exec s in globals(), locals()
351 exec s in globals(), locals()
350 except:
352 except:
351 self.log.msg("Error running statement: %s" % s)
353 self.log.msg("Error running statement: %s" % s)
352
354
353 def forward_logging(self):
355 def forward_logging(self):
354 if self.log_url:
356 if self.log_url:
355 self.log.info("Forwarding logging to %s"%self.log_url)
357 self.log.info("Forwarding logging to %s"%self.log_url)
356 context = zmq.Context.instance()
358 context = zmq.Context.instance()
357 lsock = context.socket(zmq.PUB)
359 lsock = context.socket(zmq.PUB)
358 lsock.connect(self.log_url)
360 lsock.connect(self.log_url)
359 handler = PUBHandler(lsock)
361 handler = PUBHandler(lsock)
360 self.log.removeHandler(self._log_handler)
362 self.log.removeHandler(self._log_handler)
361 handler.root_topic = 'controller'
363 handler.root_topic = 'controller'
362 handler.setLevel(self.log_level)
364 handler.setLevel(self.log_level)
363 self.log.addHandler(handler)
365 self.log.addHandler(handler)
364 self._log_handler = handler
366 self._log_handler = handler
365 # #
367 # #
366
368
367 def initialize(self, argv=None):
369 def initialize(self, argv=None):
368 super(IPControllerApp, self).initialize(argv)
370 super(IPControllerApp, self).initialize(argv)
369 self.forward_logging()
371 self.forward_logging()
370 self.init_hub()
372 self.init_hub()
371 self.init_schedulers()
373 self.init_schedulers()
372
374
373 def start(self):
375 def start(self):
374 # Start the subprocesses:
376 # Start the subprocesses:
375 self.factory.start()
377 self.factory.start()
376 child_procs = []
378 child_procs = []
377 for child in self.children:
379 for child in self.children:
378 child.start()
380 child.start()
379 if isinstance(child, ProcessMonitoredQueue):
381 if isinstance(child, ProcessMonitoredQueue):
380 child_procs.append(child.launcher)
382 child_procs.append(child.launcher)
381 elif isinstance(child, Process):
383 elif isinstance(child, Process):
382 child_procs.append(child)
384 child_procs.append(child)
383 if child_procs:
385 if child_procs:
384 signal_children(child_procs)
386 signal_children(child_procs)
385
387
386 self.write_pid_file(overwrite=True)
388 self.write_pid_file(overwrite=True)
387
389
388 try:
390 try:
389 self.factory.loop.start()
391 self.factory.loop.start()
390 except KeyboardInterrupt:
392 except KeyboardInterrupt:
391 self.log.critical("Interrupted, Exiting...\n")
393 self.log.critical("Interrupted, Exiting...\n")
392
394
393
395
394
396
395 def launch_new_instance():
397 def launch_new_instance():
396 """Create and run the IPython controller"""
398 """Create and run the IPython controller"""
397 app = IPControllerApp()
399 app = IPControllerApp()
398 app.initialize()
400 app.initialize()
399 app.start()
401 app.start()
400
402
401
403
402 if __name__ == '__main__':
404 if __name__ == '__main__':
403 launch_new_instance()
405 launch_new_instance()
@@ -1,277 +1,277 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 # encoding: utf-8
2 # encoding: utf-8
3 """
3 """
4 The IPython engine application
4 The IPython engine application
5 """
5 """
6
6
7 #-----------------------------------------------------------------------------
7 #-----------------------------------------------------------------------------
8 # Copyright (C) 2008-2009 The IPython Development Team
8 # Copyright (C) 2008-2009 The IPython Development Team
9 #
9 #
10 # Distributed under the terms of the BSD License. The full license is in
10 # Distributed under the terms of the BSD License. The full license is in
11 # the file COPYING, distributed as part of this software.
11 # the file COPYING, distributed as part of this software.
12 #-----------------------------------------------------------------------------
12 #-----------------------------------------------------------------------------
13
13
14 #-----------------------------------------------------------------------------
14 #-----------------------------------------------------------------------------
15 # Imports
15 # Imports
16 #-----------------------------------------------------------------------------
16 #-----------------------------------------------------------------------------
17
17
18 import json
18 import json
19 import os
19 import os
20 import sys
20 import sys
21
21
22 import zmq
22 import zmq
23 from zmq.eventloop import ioloop
23 from zmq.eventloop import ioloop
24
24
25 from IPython.parallel.apps.clusterdir import (
25 from IPython.parallel.apps.clusterdir import (
26 ClusterApplication,
26 ClusterApplication,
27 ClusterDir,
27 ClusterDir,
28 # ClusterDirConfigLoader
28 # ClusterDirConfigLoader
29 )
29 )
30 from IPython.zmq.log import EnginePUBHandler
30 from IPython.zmq.log import EnginePUBHandler
31
31
32 from IPython.config.configurable import Configurable
32 from IPython.config.configurable import Configurable
33 from IPython.parallel.streamsession import StreamSession
33 from IPython.parallel.streamsession import StreamSession
34 from IPython.parallel.engine.engine import EngineFactory
34 from IPython.parallel.engine.engine import EngineFactory
35 from IPython.parallel.engine.streamkernel import Kernel
35 from IPython.parallel.engine.streamkernel import Kernel
36 from IPython.parallel.util import disambiguate_url
36 from IPython.parallel.util import disambiguate_url
37
37
38 from IPython.utils.importstring import import_item
38 from IPython.utils.importstring import import_item
39 from IPython.utils.traitlets import Bool, Unicode, Dict, List
39 from IPython.utils.traitlets import Bool, Unicode, Dict, List
40
40
41
41
42 #-----------------------------------------------------------------------------
42 #-----------------------------------------------------------------------------
43 # Module level variables
43 # Module level variables
44 #-----------------------------------------------------------------------------
44 #-----------------------------------------------------------------------------
45
45
46 #: The default config file name for this application
46 #: The default config file name for this application
47 default_config_file_name = u'ipengine_config.py'
47 default_config_file_name = u'ipengine_config.py'
48
48
49 _description = """Start an IPython engine for parallel computing.\n\n
49 _description = """Start an IPython engine for parallel computing.
50
50
51 IPython engines run in parallel and perform computations on behalf of a client
51 IPython engines run in parallel and perform computations on behalf of a client
52 and controller. A controller needs to be started before the engines. The
52 and controller. A controller needs to be started before the engines. The
53 engine can be configured using command line options or using a cluster
53 engine can be configured using command line options or using a cluster
54 directory. Cluster directories contain config, log and security files and are
54 directory. Cluster directories contain config, log and security files and are
55 usually located in your ipython directory and named as "cluster_<profile>".
55 usually located in your ipython directory and named as "cluster_<profile>".
56 See the `profile` and `cluster_dir` options for details.
56 See the `profile` and `cluster_dir` options for details.
57 """
57 """
58
58
59
59
60 #-----------------------------------------------------------------------------
60 #-----------------------------------------------------------------------------
61 # MPI configuration
61 # MPI configuration
62 #-----------------------------------------------------------------------------
62 #-----------------------------------------------------------------------------
63
63
64 mpi4py_init = """from mpi4py import MPI as mpi
64 mpi4py_init = """from mpi4py import MPI as mpi
65 mpi.size = mpi.COMM_WORLD.Get_size()
65 mpi.size = mpi.COMM_WORLD.Get_size()
66 mpi.rank = mpi.COMM_WORLD.Get_rank()
66 mpi.rank = mpi.COMM_WORLD.Get_rank()
67 """
67 """
68
68
69
69
70 pytrilinos_init = """from PyTrilinos import Epetra
70 pytrilinos_init = """from PyTrilinos import Epetra
71 class SimpleStruct:
71 class SimpleStruct:
72 pass
72 pass
73 mpi = SimpleStruct()
73 mpi = SimpleStruct()
74 mpi.rank = 0
74 mpi.rank = 0
75 mpi.size = 0
75 mpi.size = 0
76 """
76 """
77
77
78 class MPI(Configurable):
78 class MPI(Configurable):
79 """Configurable for MPI initialization"""
79 """Configurable for MPI initialization"""
80 use = Unicode('', config=True,
80 use = Unicode('', config=True,
81 help='How to enable MPI (mpi4py, pytrilinos, or empty string to disable).'
81 help='How to enable MPI (mpi4py, pytrilinos, or empty string to disable).'
82 )
82 )
83
83
84 def _on_use_changed(self, old, new):
84 def _on_use_changed(self, old, new):
85 # load default init script if it's not set
85 # load default init script if it's not set
86 if not self.init_script:
86 if not self.init_script:
87 self.init_script = self.default_inits.get(new, '')
87 self.init_script = self.default_inits.get(new, '')
88
88
89 init_script = Unicode('', config=True,
89 init_script = Unicode('', config=True,
90 help="Initialization code for MPI")
90 help="Initialization code for MPI")
91
91
92 default_inits = Dict({'mpi4py' : mpi4py_init, 'pytrilinos':pytrilinos_init},
92 default_inits = Dict({'mpi4py' : mpi4py_init, 'pytrilinos':pytrilinos_init},
93 config=True)
93 config=True)
94
94
95
95
96 #-----------------------------------------------------------------------------
96 #-----------------------------------------------------------------------------
97 # Main application
97 # Main application
98 #-----------------------------------------------------------------------------
98 #-----------------------------------------------------------------------------
99
99
100
100
101 class IPEngineApp(ClusterApplication):
101 class IPEngineApp(ClusterApplication):
102
102
103 app_name = Unicode(u'ipengine')
103 app_name = Unicode(u'ipengine')
104 description = Unicode(_description)
104 description = Unicode(_description)
105 default_config_file_name = default_config_file_name
105 default_config_file_name = default_config_file_name
106 classes = List([ClusterDir, StreamSession, EngineFactory, Kernel, MPI])
106 classes = List([ClusterDir, StreamSession, EngineFactory, Kernel, MPI])
107
107
108 auto_create_cluster_dir = Bool(False,
108 auto_create_cluster_dir = Bool(False,
109 help="whether to create the cluster_dir if it doesn't exist")
109 help="whether to create the cluster_dir if it doesn't exist")
110
110
111 startup_script = Unicode(u'', config=True,
111 startup_script = Unicode(u'', config=True,
112 help='specify a script to be run at startup')
112 help='specify a script to be run at startup')
113 startup_command = Unicode('', config=True,
113 startup_command = Unicode('', config=True,
114 help='specify a command to be run at startup')
114 help='specify a command to be run at startup')
115
115
116 url_file = Unicode(u'', config=True,
116 url_file = Unicode(u'', config=True,
117 help="""The full location of the file containing the connection information for
117 help="""The full location of the file containing the connection information for
118 the controller. If this is not given, the file must be in the
118 the controller. If this is not given, the file must be in the
119 security directory of the cluster directory. This location is
119 security directory of the cluster directory. This location is
120 resolved using the `profile` or `cluster_dir` options.""",
120 resolved using the `profile` or `cluster_dir` options.""",
121 )
121 )
122
122
123 url_file_name = Unicode(u'ipcontroller-engine.json')
123 url_file_name = Unicode(u'ipcontroller-engine.json')
124 log_url = Unicode('', config=True,
124 log_url = Unicode('', config=True,
125 help="""The URL for the iploggerapp instance, for forwarding
125 help="""The URL for the iploggerapp instance, for forwarding
126 logging to a central location.""")
126 logging to a central location.""")
127
127
128 aliases = Dict(dict(
128 aliases = Dict(dict(
129 config = 'IPEngineApp.config_file',
129 config = 'IPEngineApp.config_file',
130 file = 'IPEngineApp.url_file',
130 file = 'IPEngineApp.url_file',
131 c = 'IPEngineApp.startup_command',
131 c = 'IPEngineApp.startup_command',
132 s = 'IPEngineApp.startup_script',
132 s = 'IPEngineApp.startup_script',
133
133
134 ident = 'StreamSession.session',
134 ident = 'StreamSession.session',
135 user = 'StreamSession.username',
135 user = 'StreamSession.username',
136 exec_key = 'StreamSession.keyfile',
136 exec_key = 'StreamSession.keyfile',
137
137
138 url = 'EngineFactory.url',
138 url = 'EngineFactory.url',
139 ip = 'EngineFactory.ip',
139 ip = 'EngineFactory.ip',
140 transport = 'EngineFactory.transport',
140 transport = 'EngineFactory.transport',
141 port = 'EngineFactory.regport',
141 port = 'EngineFactory.regport',
142 location = 'EngineFactory.location',
142 location = 'EngineFactory.location',
143
143
144 timeout = 'EngineFactory.timeout',
144 timeout = 'EngineFactory.timeout',
145
145
146 profile = "ClusterDir.profile",
146 profile = "ClusterDir.profile",
147 cluster_dir = 'ClusterDir.location',
147 cluster_dir = 'ClusterDir.location',
148
148
149 mpi = 'MPI.use',
149 mpi = 'MPI.use',
150
150
151 log_level = 'IPEngineApp.log_level',
151 log_level = 'IPEngineApp.log_level',
152 log_url = 'IPEngineApp.log_url'
152 log_url = 'IPEngineApp.log_url'
153 ))
153 ))
154
154
155 # def find_key_file(self):
155 # def find_key_file(self):
156 # """Set the key file.
156 # """Set the key file.
157 #
157 #
158 # Here we don't try to actually see if it exists for is valid as that
158 # Here we don't try to actually see if it exists for is valid as that
159 # is hadled by the connection logic.
159 # is hadled by the connection logic.
160 # """
160 # """
161 # config = self.master_config
161 # config = self.master_config
162 # # Find the actual controller key file
162 # # Find the actual controller key file
163 # if not config.Global.key_file:
163 # if not config.Global.key_file:
164 # try_this = os.path.join(
164 # try_this = os.path.join(
165 # config.Global.cluster_dir,
165 # config.Global.cluster_dir,
166 # config.Global.security_dir,
166 # config.Global.security_dir,
167 # config.Global.key_file_name
167 # config.Global.key_file_name
168 # )
168 # )
169 # config.Global.key_file = try_this
169 # config.Global.key_file = try_this
170
170
171 def find_url_file(self):
171 def find_url_file(self):
172 """Set the key file.
172 """Set the key file.
173
173
174 Here we don't try to actually see if it exists for is valid as that
174 Here we don't try to actually see if it exists for is valid as that
175 is hadled by the connection logic.
175 is hadled by the connection logic.
176 """
176 """
177 config = self.config
177 config = self.config
178 # Find the actual controller key file
178 # Find the actual controller key file
179 if not self.url_file:
179 if not self.url_file:
180 self.url_file = os.path.join(
180 self.url_file = os.path.join(
181 self.cluster_dir.security_dir,
181 self.cluster_dir.security_dir,
182 self.url_file_name
182 self.url_file_name
183 )
183 )
184 def init_engine(self):
184 def init_engine(self):
185 # This is the working dir by now.
185 # This is the working dir by now.
186 sys.path.insert(0, '')
186 sys.path.insert(0, '')
187 config = self.config
187 config = self.config
188 # print config
188 # print config
189 self.find_url_file()
189 self.find_url_file()
190
190
191 # if os.path.exists(config.Global.key_file) and config.Global.secure:
191 # if os.path.exists(config.Global.key_file) and config.Global.secure:
192 # config.SessionFactory.exec_key = config.Global.key_file
192 # config.SessionFactory.exec_key = config.Global.key_file
193 if os.path.exists(self.url_file):
193 if os.path.exists(self.url_file):
194 with open(self.url_file) as f:
194 with open(self.url_file) as f:
195 d = json.loads(f.read())
195 d = json.loads(f.read())
196 for k,v in d.iteritems():
196 for k,v in d.iteritems():
197 if isinstance(v, unicode):
197 if isinstance(v, unicode):
198 d[k] = v.encode()
198 d[k] = v.encode()
199 if d['exec_key']:
199 if d['exec_key']:
200 config.StreamSession.key = d['exec_key']
200 config.StreamSession.key = d['exec_key']
201 d['url'] = disambiguate_url(d['url'], d['location'])
201 d['url'] = disambiguate_url(d['url'], d['location'])
202 config.EngineFactory.url = d['url']
202 config.EngineFactory.url = d['url']
203 config.EngineFactory.location = d['location']
203 config.EngineFactory.location = d['location']
204
204
205 try:
205 try:
206 exec_lines = config.Kernel.exec_lines
206 exec_lines = config.Kernel.exec_lines
207 except AttributeError:
207 except AttributeError:
208 config.Kernel.exec_lines = []
208 config.Kernel.exec_lines = []
209 exec_lines = config.Kernel.exec_lines
209 exec_lines = config.Kernel.exec_lines
210
210
211 if self.startup_script:
211 if self.startup_script:
212 enc = sys.getfilesystemencoding() or 'utf8'
212 enc = sys.getfilesystemencoding() or 'utf8'
213 cmd="execfile(%r)"%self.startup_script.encode(enc)
213 cmd="execfile(%r)"%self.startup_script.encode(enc)
214 exec_lines.append(cmd)
214 exec_lines.append(cmd)
215 if self.startup_command:
215 if self.startup_command:
216 exec_lines.append(self.startup_command)
216 exec_lines.append(self.startup_command)
217
217
218 # Create the underlying shell class and Engine
218 # Create the underlying shell class and Engine
219 # shell_class = import_item(self.master_config.Global.shell_class)
219 # shell_class = import_item(self.master_config.Global.shell_class)
220 # print self.config
220 # print self.config
221 try:
221 try:
222 self.engine = EngineFactory(config=config, log=self.log)
222 self.engine = EngineFactory(config=config, log=self.log)
223 except:
223 except:
224 self.log.error("Couldn't start the Engine", exc_info=True)
224 self.log.error("Couldn't start the Engine", exc_info=True)
225 self.exit(1)
225 self.exit(1)
226
226
227 def forward_logging(self):
227 def forward_logging(self):
228 if self.log_url:
228 if self.log_url:
229 self.log.info("Forwarding logging to %s"%self.log_url)
229 self.log.info("Forwarding logging to %s"%self.log_url)
230 context = self.engine.context
230 context = self.engine.context
231 lsock = context.socket(zmq.PUB)
231 lsock = context.socket(zmq.PUB)
232 lsock.connect(self.log_url)
232 lsock.connect(self.log_url)
233 self.log.removeHandler(self._log_handler)
233 self.log.removeHandler(self._log_handler)
234 handler = EnginePUBHandler(self.engine, lsock)
234 handler = EnginePUBHandler(self.engine, lsock)
235 handler.setLevel(self.log_level)
235 handler.setLevel(self.log_level)
236 self.log.addHandler(handler)
236 self.log.addHandler(handler)
237 self._log_handler = handler
237 self._log_handler = handler
238 #
238 #
239 def init_mpi(self):
239 def init_mpi(self):
240 global mpi
240 global mpi
241 self.mpi = MPI(config=self.config)
241 self.mpi = MPI(config=self.config)
242
242
243 mpi_import_statement = self.mpi.init_script
243 mpi_import_statement = self.mpi.init_script
244 if mpi_import_statement:
244 if mpi_import_statement:
245 try:
245 try:
246 self.log.info("Initializing MPI:")
246 self.log.info("Initializing MPI:")
247 self.log.info(mpi_import_statement)
247 self.log.info(mpi_import_statement)
248 exec mpi_import_statement in globals()
248 exec mpi_import_statement in globals()
249 except:
249 except:
250 mpi = None
250 mpi = None
251 else:
251 else:
252 mpi = None
252 mpi = None
253
253
254 def initialize(self, argv=None):
254 def initialize(self, argv=None):
255 super(IPEngineApp, self).initialize(argv)
255 super(IPEngineApp, self).initialize(argv)
256 self.init_mpi()
256 self.init_mpi()
257 self.init_engine()
257 self.init_engine()
258 self.forward_logging()
258 self.forward_logging()
259
259
260 def start(self):
260 def start(self):
261 self.engine.start()
261 self.engine.start()
262 try:
262 try:
263 self.engine.loop.start()
263 self.engine.loop.start()
264 except KeyboardInterrupt:
264 except KeyboardInterrupt:
265 self.log.critical("Engine Interrupted, shutting down...\n")
265 self.log.critical("Engine Interrupted, shutting down...\n")
266
266
267
267
268 def launch_new_instance():
268 def launch_new_instance():
269 """Create and run the IPython engine"""
269 """Create and run the IPython engine"""
270 app = IPEngineApp()
270 app = IPEngineApp()
271 app.initialize()
271 app.initialize()
272 app.start()
272 app.start()
273
273
274
274
275 if __name__ == '__main__':
275 if __name__ == '__main__':
276 launch_new_instance()
276 launch_new_instance()
277
277
@@ -1,97 +1,97 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 # encoding: utf-8
2 # encoding: utf-8
3 """
3 """
4 A simple IPython logger application
4 A simple IPython logger application
5 """
5 """
6
6
7 #-----------------------------------------------------------------------------
7 #-----------------------------------------------------------------------------
8 # Copyright (C) 2011 The IPython Development Team
8 # Copyright (C) 2011 The IPython Development Team
9 #
9 #
10 # Distributed under the terms of the BSD License. The full license is in
10 # Distributed under the terms of the BSD License. The full license is in
11 # the file COPYING, distributed as part of this software.
11 # the file COPYING, distributed as part of this software.
12 #-----------------------------------------------------------------------------
12 #-----------------------------------------------------------------------------
13
13
14 #-----------------------------------------------------------------------------
14 #-----------------------------------------------------------------------------
15 # Imports
15 # Imports
16 #-----------------------------------------------------------------------------
16 #-----------------------------------------------------------------------------
17
17
18 import os
18 import os
19 import sys
19 import sys
20
20
21 import zmq
21 import zmq
22
22
23 from IPython.utils.traitlets import Bool, Dict
23 from IPython.utils.traitlets import Bool, Dict
24
24
25 from IPython.parallel.apps.clusterdir import (
25 from IPython.parallel.apps.clusterdir import (
26 ClusterApplication,
26 ClusterApplication,
27 ClusterDir,
27 ClusterDir,
28 base_aliases
28 base_aliases
29 )
29 )
30 from IPython.parallel.apps.logwatcher import LogWatcher
30 from IPython.parallel.apps.logwatcher import LogWatcher
31
31
32 #-----------------------------------------------------------------------------
32 #-----------------------------------------------------------------------------
33 # Module level variables
33 # Module level variables
34 #-----------------------------------------------------------------------------
34 #-----------------------------------------------------------------------------
35
35
36 #: The default config file name for this application
36 #: The default config file name for this application
37 default_config_file_name = u'iplogger_config.py'
37 default_config_file_name = u'iplogger_config.py'
38
38
39 _description = """Start an IPython logger for parallel computing.\n\n
39 _description = """Start an IPython logger for parallel computing.
40
40
41 IPython controllers and engines (and your own processes) can broadcast log messages
41 IPython controllers and engines (and your own processes) can broadcast log messages
42 by registering a `zmq.log.handlers.PUBHandler` with the `logging` module. The
42 by registering a `zmq.log.handlers.PUBHandler` with the `logging` module. The
43 logger can be configured using command line options or using a cluster
43 logger can be configured using command line options or using a cluster
44 directory. Cluster directories contain config, log and security files and are
44 directory. Cluster directories contain config, log and security files and are
45 usually located in your ipython directory and named as "cluster_<profile>".
45 usually located in your ipython directory and named as "cluster_<profile>".
46 See the --profile and --cluster-dir options for details.
46 See the `profile` and `cluster_dir` options for details.
47 """
47 """
48
48
49
49
50 #-----------------------------------------------------------------------------
50 #-----------------------------------------------------------------------------
51 # Main application
51 # Main application
52 #-----------------------------------------------------------------------------
52 #-----------------------------------------------------------------------------
53 aliases = {}
53 aliases = {}
54 aliases.update(base_aliases)
54 aliases.update(base_aliases)
55 aliases.update(dict(url='LogWatcher.url', topics='LogWatcher.topics'))
55 aliases.update(dict(url='LogWatcher.url', topics='LogWatcher.topics'))
56
56
57 class IPLoggerApp(ClusterApplication):
57 class IPLoggerApp(ClusterApplication):
58
58
59 name = u'iploggerz'
59 name = u'iploggerz'
60 description = _description
60 description = _description
61 default_config_file_name = default_config_file_name
61 default_config_file_name = default_config_file_name
62 auto_create_cluster_dir = Bool(False)
62 auto_create_cluster_dir = Bool(False)
63
63
64 classes = [LogWatcher, ClusterDir]
64 classes = [LogWatcher, ClusterDir]
65 aliases = Dict(aliases)
65 aliases = Dict(aliases)
66
66
67 def initialize(self, argv=None):
67 def initialize(self, argv=None):
68 super(IPLoggerApp, self).initialize(argv)
68 super(IPLoggerApp, self).initialize(argv)
69 self.init_watcher()
69 self.init_watcher()
70
70
71 def init_watcher(self):
71 def init_watcher(self):
72 try:
72 try:
73 self.watcher = LogWatcher(config=self.config, logname=self.log.name)
73 self.watcher = LogWatcher(config=self.config, logname=self.log.name)
74 except:
74 except:
75 self.log.error("Couldn't start the LogWatcher", exc_info=True)
75 self.log.error("Couldn't start the LogWatcher", exc_info=True)
76 self.exit(1)
76 self.exit(1)
77 self.log.info("Listening for log messages on %r"%self.watcher.url)
77 self.log.info("Listening for log messages on %r"%self.watcher.url)
78
78
79
79
80 def start(self):
80 def start(self):
81 self.watcher.start()
81 self.watcher.start()
82 try:
82 try:
83 self.watcher.loop.start()
83 self.watcher.loop.start()
84 except KeyboardInterrupt:
84 except KeyboardInterrupt:
85 self.log.critical("Logging Interrupted, shutting down...\n")
85 self.log.critical("Logging Interrupted, shutting down...\n")
86
86
87
87
88 def launch_new_instance():
88 def launch_new_instance():
89 """Create and run the IPython LogWatcher"""
89 """Create and run the IPython LogWatcher"""
90 app = IPLoggerApp()
90 app = IPLoggerApp()
91 app.initialize()
91 app.initialize()
92 app.start()
92 app.start()
93
93
94
94
95 if __name__ == '__main__':
95 if __name__ == '__main__':
96 launch_new_instance()
96 launch_new_instance()
97
97
@@ -1,166 +1,165 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 """A simple engine that talks to a controller over 0MQ.
2 """A simple engine that talks to a controller over 0MQ.
3 it handles registration, etc. and launches a kernel
3 it handles registration, etc. and launches a kernel
4 connected to the Controller's Schedulers.
4 connected to the Controller's Schedulers.
5 """
5 """
6 #-----------------------------------------------------------------------------
6 #-----------------------------------------------------------------------------
7 # Copyright (C) 2010-2011 The IPython Development Team
7 # Copyright (C) 2010-2011 The IPython Development Team
8 #
8 #
9 # Distributed under the terms of the BSD License. The full license is in
9 # Distributed under the terms of the BSD License. The full license is in
10 # the file COPYING, distributed as part of this software.
10 # the file COPYING, distributed as part of this software.
11 #-----------------------------------------------------------------------------
11 #-----------------------------------------------------------------------------
12
12
13 from __future__ import print_function
13 from __future__ import print_function
14
14
15 import sys
15 import sys
16 import time
16 import time
17
17
18 import zmq
18 import zmq
19 from zmq.eventloop import ioloop, zmqstream
19 from zmq.eventloop import ioloop, zmqstream
20
20
21 # internal
21 # internal
22 from IPython.utils.traitlets import Instance, Dict, Int, Type, CFloat, Unicode
22 from IPython.utils.traitlets import Instance, Dict, Int, Type, CFloat, Unicode
23 # from IPython.utils.localinterfaces import LOCALHOST
23 # from IPython.utils.localinterfaces import LOCALHOST
24
24
25 from IPython.parallel.controller.heartmonitor import Heart
25 from IPython.parallel.controller.heartmonitor import Heart
26 from IPython.parallel.factory import RegistrationFactory
26 from IPython.parallel.factory import RegistrationFactory
27 from IPython.parallel.streamsession import Message
27 from IPython.parallel.streamsession import Message
28 from IPython.parallel.util import disambiguate_url
28 from IPython.parallel.util import disambiguate_url
29
29
30 from .streamkernel import Kernel
30 from .streamkernel import Kernel
31
31
32 class EngineFactory(RegistrationFactory):
32 class EngineFactory(RegistrationFactory):
33 """IPython engine"""
33 """IPython engine"""
34
34
35 # configurables:
35 # configurables:
36 out_stream_factory=Type('IPython.zmq.iostream.OutStream', config=True,
36 out_stream_factory=Type('IPython.zmq.iostream.OutStream', config=True,
37 help="""The OutStream for handling stdout/err.
37 help="""The OutStream for handling stdout/err.
38 Typically 'IPython.zmq.iostream.OutStream'""")
38 Typically 'IPython.zmq.iostream.OutStream'""")
39 display_hook_factory=Type('IPython.zmq.displayhook.DisplayHook', config=True,
39 display_hook_factory=Type('IPython.zmq.displayhook.DisplayHook', config=True,
40 help="""The class for handling displayhook.
40 help="""The class for handling displayhook.
41 Typically 'IPython.zmq.displayhook.DisplayHook'""")
41 Typically 'IPython.zmq.displayhook.DisplayHook'""")
42 location=Unicode(config=True,
42 location=Unicode(config=True,
43 help="""The location (an IP address) of the controller. This is
43 help="""The location (an IP address) of the controller. This is
44 used for disambiguating URLs, to determine whether
44 used for disambiguating URLs, to determine whether
45 loopback should be used to connect or the public address.""")
45 loopback should be used to connect or the public address.""")
46 timeout=CFloat(2,config=True,
46 timeout=CFloat(2,config=True,
47 help="""The time (in seconds) to wait for the Controller to respond
47 help="""The time (in seconds) to wait for the Controller to respond
48 to registration requests before giving up.""")
48 to registration requests before giving up.""")
49
49
50 # not configurable:
50 # not configurable:
51 user_ns=Dict()
51 user_ns=Dict()
52 id=Int(allow_none=True)
52 id=Int(allow_none=True)
53 registrar=Instance('zmq.eventloop.zmqstream.ZMQStream')
53 registrar=Instance('zmq.eventloop.zmqstream.ZMQStream')
54 kernel=Instance(Kernel)
54 kernel=Instance(Kernel)
55
55
56
56
57 def __init__(self, **kwargs):
57 def __init__(self, **kwargs):
58 super(EngineFactory, self).__init__(**kwargs)
58 super(EngineFactory, self).__init__(**kwargs)
59 self.ident = self.session.session
59 self.ident = self.session.session
60 ctx = self.context
60 ctx = self.context
61
61
62 reg = ctx.socket(zmq.XREQ)
62 reg = ctx.socket(zmq.XREQ)
63 reg.setsockopt(zmq.IDENTITY, self.ident)
63 reg.setsockopt(zmq.IDENTITY, self.ident)
64 reg.connect(self.url)
64 reg.connect(self.url)
65 self.registrar = zmqstream.ZMQStream(reg, self.loop)
65 self.registrar = zmqstream.ZMQStream(reg, self.loop)
66
66
67 def register(self):
67 def register(self):
68 """send the registration_request"""
68 """send the registration_request"""
69
69
70 self.log.info("registering")
70 self.log.info("registering")
71 content = dict(queue=self.ident, heartbeat=self.ident, control=self.ident)
71 content = dict(queue=self.ident, heartbeat=self.ident, control=self.ident)
72 self.registrar.on_recv(self.complete_registration)
72 self.registrar.on_recv(self.complete_registration)
73 # print (self.session.key)
73 # print (self.session.key)
74 self.session.send(self.registrar, "registration_request",content=content)
74 self.session.send(self.registrar, "registration_request",content=content)
75
75
76 def complete_registration(self, msg):
76 def complete_registration(self, msg):
77 # print msg
77 # print msg
78 self._abort_dc.stop()
78 self._abort_dc.stop()
79 ctx = self.context
79 ctx = self.context
80 loop = self.loop
80 loop = self.loop
81 identity = self.ident
81 identity = self.ident
82
82
83 idents,msg = self.session.feed_identities(msg)
83 idents,msg = self.session.feed_identities(msg)
84 msg = Message(self.session.unpack_message(msg))
84 msg = Message(self.session.unpack_message(msg))
85
85
86 if msg.content.status == 'ok':
86 if msg.content.status == 'ok':
87 self.id = int(msg.content.id)
87 self.id = int(msg.content.id)
88
88
89 # create Shell Streams (MUX, Task, etc.):
89 # create Shell Streams (MUX, Task, etc.):
90 queue_addr = msg.content.mux
90 queue_addr = msg.content.mux
91 shell_addrs = [ str(queue_addr) ]
91 shell_addrs = [ str(queue_addr) ]
92 task_addr = msg.content.task
92 task_addr = msg.content.task
93 if task_addr:
93 if task_addr:
94 shell_addrs.append(str(task_addr))
94 shell_addrs.append(str(task_addr))
95
95
96 # Uncomment this to go back to two-socket model
96 # Uncomment this to go back to two-socket model
97 # shell_streams = []
97 # shell_streams = []
98 # for addr in shell_addrs:
98 # for addr in shell_addrs:
99 # stream = zmqstream.ZMQStream(ctx.socket(zmq.XREP), loop)
99 # stream = zmqstream.ZMQStream(ctx.socket(zmq.XREP), loop)
100 # stream.setsockopt(zmq.IDENTITY, identity)
100 # stream.setsockopt(zmq.IDENTITY, identity)
101 # stream.connect(disambiguate_url(addr, self.location))
101 # stream.connect(disambiguate_url(addr, self.location))
102 # shell_streams.append(stream)
102 # shell_streams.append(stream)
103
103
104 # Now use only one shell stream for mux and tasks
104 # Now use only one shell stream for mux and tasks
105 stream = zmqstream.ZMQStream(ctx.socket(zmq.XREP), loop)
105 stream = zmqstream.ZMQStream(ctx.socket(zmq.XREP), loop)
106 stream.setsockopt(zmq.IDENTITY, identity)
106 stream.setsockopt(zmq.IDENTITY, identity)
107 shell_streams = [stream]
107 shell_streams = [stream]
108 for addr in shell_addrs:
108 for addr in shell_addrs:
109 stream.connect(disambiguate_url(addr, self.location))
109 stream.connect(disambiguate_url(addr, self.location))
110 # end single stream-socket
110 # end single stream-socket
111
111
112 # control stream:
112 # control stream:
113 control_addr = str(msg.content.control)
113 control_addr = str(msg.content.control)
114 control_stream = zmqstream.ZMQStream(ctx.socket(zmq.XREP), loop)
114 control_stream = zmqstream.ZMQStream(ctx.socket(zmq.XREP), loop)
115 control_stream.setsockopt(zmq.IDENTITY, identity)
115 control_stream.setsockopt(zmq.IDENTITY, identity)
116 control_stream.connect(disambiguate_url(control_addr, self.location))
116 control_stream.connect(disambiguate_url(control_addr, self.location))
117
117
118 # create iopub stream:
118 # create iopub stream:
119 iopub_addr = msg.content.iopub
119 iopub_addr = msg.content.iopub
120 iopub_stream = zmqstream.ZMQStream(ctx.socket(zmq.PUB), loop)
120 iopub_stream = zmqstream.ZMQStream(ctx.socket(zmq.PUB), loop)
121 iopub_stream.setsockopt(zmq.IDENTITY, identity)
121 iopub_stream.setsockopt(zmq.IDENTITY, identity)
122 iopub_stream.connect(disambiguate_url(iopub_addr, self.location))
122 iopub_stream.connect(disambiguate_url(iopub_addr, self.location))
123
123
124 # launch heartbeat
124 # launch heartbeat
125 hb_addrs = msg.content.heartbeat
125 hb_addrs = msg.content.heartbeat
126 # print (hb_addrs)
126 # print (hb_addrs)
127
127
128 # # Redirect input streams and set a display hook.
128 # # Redirect input streams and set a display hook.
129 if self.out_stream_factory:
129 if self.out_stream_factory:
130 sys.stdout = self.out_stream_factory(self.session, iopub_stream, u'stdout')
130 sys.stdout = self.out_stream_factory(self.session, iopub_stream, u'stdout')
131 sys.stdout.topic = 'engine.%i.stdout'%self.id
131 sys.stdout.topic = 'engine.%i.stdout'%self.id
132 sys.stderr = self.out_stream_factory(self.session, iopub_stream, u'stderr')
132 sys.stderr = self.out_stream_factory(self.session, iopub_stream, u'stderr')
133 sys.stderr.topic = 'engine.%i.stderr'%self.id
133 sys.stderr.topic = 'engine.%i.stderr'%self.id
134 if self.display_hook_factory:
134 if self.display_hook_factory:
135 sys.displayhook = self.display_hook_factory(self.session, iopub_stream)
135 sys.displayhook = self.display_hook_factory(self.session, iopub_stream)
136 sys.displayhook.topic = 'engine.%i.pyout'%self.id
136 sys.displayhook.topic = 'engine.%i.pyout'%self.id
137
137
138 self.kernel = Kernel(config=self.config, int_id=self.id, ident=self.ident, session=self.session,
138 self.kernel = Kernel(config=self.config, int_id=self.id, ident=self.ident, session=self.session,
139 control_stream=control_stream, shell_streams=shell_streams, iopub_stream=iopub_stream,
139 control_stream=control_stream, shell_streams=shell_streams, iopub_stream=iopub_stream,
140 loop=loop, user_ns = self.user_ns, log=self.log)
140 loop=loop, user_ns = self.user_ns, log=self.log)
141 self.kernel.start()
141 self.kernel.start()
142 hb_addrs = [ disambiguate_url(addr, self.location) for addr in hb_addrs ]
142 hb_addrs = [ disambiguate_url(addr, self.location) for addr in hb_addrs ]
143 heart = Heart(*map(str, hb_addrs), heart_id=identity)
143 heart = Heart(*map(str, hb_addrs), heart_id=identity)
144 # ioloop.DelayedCallback(heart.start, 1000, self.loop).start()
145 heart.start()
144 heart.start()
146
145
147
146
148 else:
147 else:
149 self.log.fatal("Registration Failed: %s"%msg)
148 self.log.fatal("Registration Failed: %s"%msg)
150 raise Exception("Registration Failed: %s"%msg)
149 raise Exception("Registration Failed: %s"%msg)
151
150
152 self.log.info("Completed registration with id %i"%self.id)
151 self.log.info("Completed registration with id %i"%self.id)
153
152
154
153
155 def abort(self):
154 def abort(self):
156 self.log.fatal("Registration timed out after %.1f seconds"%self.timeout)
155 self.log.fatal("Registration timed out after %.1f seconds"%self.timeout)
157 self.session.send(self.registrar, "unregistration_request", content=dict(id=self.id))
156 self.session.send(self.registrar, "unregistration_request", content=dict(id=self.id))
158 time.sleep(1)
157 time.sleep(1)
159 sys.exit(255)
158 sys.exit(255)
160
159
161 def start(self):
160 def start(self):
162 dc = ioloop.DelayedCallback(self.register, 0, self.loop)
161 dc = ioloop.DelayedCallback(self.register, 0, self.loop)
163 dc.start()
162 dc.start()
164 self._abort_dc = ioloop.DelayedCallback(self.abort, self.timeout*1000, self.loop)
163 self._abort_dc = ioloop.DelayedCallback(self.abort, self.timeout*1000, self.loop)
165 self._abort_dc.start()
164 self._abort_dc.start()
166
165
@@ -1,107 +1,107 b''
1 """toplevel setup/teardown for parallel tests."""
1 """toplevel setup/teardown for parallel tests."""
2
2
3 #-------------------------------------------------------------------------------
3 #-------------------------------------------------------------------------------
4 # Copyright (C) 2011 The IPython Development Team
4 # Copyright (C) 2011 The IPython Development Team
5 #
5 #
6 # Distributed under the terms of the BSD License. The full license is in
6 # Distributed under the terms of the BSD License. The full license is in
7 # the file COPYING, distributed as part of this software.
7 # the file COPYING, distributed as part of this software.
8 #-------------------------------------------------------------------------------
8 #-------------------------------------------------------------------------------
9
9
10 #-------------------------------------------------------------------------------
10 #-------------------------------------------------------------------------------
11 # Imports
11 # Imports
12 #-------------------------------------------------------------------------------
12 #-------------------------------------------------------------------------------
13
13
14 import os
14 import os
15 import tempfile
15 import tempfile
16 import time
16 import time
17 from subprocess import Popen
17 from subprocess import Popen
18
18
19 from IPython.utils.path import get_ipython_dir
19 from IPython.utils.path import get_ipython_dir
20 from IPython.parallel import Client
20 from IPython.parallel import Client
21 from IPython.parallel.apps.launcher import (LocalProcessLauncher,
21 from IPython.parallel.apps.launcher import (LocalProcessLauncher,
22 ipengine_cmd_argv,
22 ipengine_cmd_argv,
23 ipcontroller_cmd_argv,
23 ipcontroller_cmd_argv,
24 SIGKILL)
24 SIGKILL)
25
25
26 # globals
26 # globals
27 launchers = []
27 launchers = []
28 blackhole = open(os.devnull, 'w')
28 blackhole = open(os.devnull, 'w')
29
29
30 # Launcher class
30 # Launcher class
31 class TestProcessLauncher(LocalProcessLauncher):
31 class TestProcessLauncher(LocalProcessLauncher):
32 """subclass LocalProcessLauncher, to prevent extra sockets and threads being created on Windows"""
32 """subclass LocalProcessLauncher, to prevent extra sockets and threads being created on Windows"""
33 def start(self):
33 def start(self):
34 if self.state == 'before':
34 if self.state == 'before':
35 self.process = Popen(self.args,
35 self.process = Popen(self.args,
36 stdout=blackhole, stderr=blackhole,
36 stdout=blackhole, stderr=blackhole,
37 env=os.environ,
37 env=os.environ,
38 cwd=self.work_dir
38 cwd=self.work_dir
39 )
39 )
40 self.notify_start(self.process.pid)
40 self.notify_start(self.process.pid)
41 self.poll = self.process.poll
41 self.poll = self.process.poll
42 else:
42 else:
43 s = 'The process was already started and has state: %r' % self.state
43 s = 'The process was already started and has state: %r' % self.state
44 raise ProcessStateError(s)
44 raise ProcessStateError(s)
45
45
46 # nose setup/teardown
46 # nose setup/teardown
47
47
48 def setup():
48 def setup():
49 cp = TestProcessLauncher()
49 cp = TestProcessLauncher()
50 cp.cmd_and_args = ipcontroller_cmd_argv + \
50 cp.cmd_and_args = ipcontroller_cmd_argv + \
51 ['--profile', 'iptest', '--log-level', '99', '-r']
51 ['profile=iptest', 'log_level=50', '--reuse']
52 cp.start()
52 cp.start()
53 launchers.append(cp)
53 launchers.append(cp)
54 cluster_dir = os.path.join(get_ipython_dir(), 'cluster_iptest')
54 cluster_dir = os.path.join(get_ipython_dir(), 'cluster_iptest')
55 engine_json = os.path.join(cluster_dir, 'security', 'ipcontroller-engine.json')
55 engine_json = os.path.join(cluster_dir, 'security', 'ipcontroller-engine.json')
56 client_json = os.path.join(cluster_dir, 'security', 'ipcontroller-client.json')
56 client_json = os.path.join(cluster_dir, 'security', 'ipcontroller-client.json')
57 tic = time.time()
57 tic = time.time()
58 while not os.path.exists(engine_json) or not os.path.exists(client_json):
58 while not os.path.exists(engine_json) or not os.path.exists(client_json):
59 if cp.poll() is not None:
59 if cp.poll() is not None:
60 print cp.poll()
60 print cp.poll()
61 raise RuntimeError("The test controller failed to start.")
61 raise RuntimeError("The test controller failed to start.")
62 elif time.time()-tic > 10:
62 elif time.time()-tic > 10:
63 raise RuntimeError("Timeout waiting for the test controller to start.")
63 raise RuntimeError("Timeout waiting for the test controller to start.")
64 time.sleep(0.1)
64 time.sleep(0.1)
65 add_engines(1)
65 add_engines(1)
66
66
67 def add_engines(n=1, profile='iptest'):
67 def add_engines(n=1, profile='iptest'):
68 rc = Client(profile=profile)
68 rc = Client(profile=profile)
69 base = len(rc)
69 base = len(rc)
70 eps = []
70 eps = []
71 for i in range(n):
71 for i in range(n):
72 ep = TestProcessLauncher()
72 ep = TestProcessLauncher()
73 ep.cmd_and_args = ipengine_cmd_argv + ['--profile', profile, '--log-level', '99']
73 ep.cmd_and_args = ipengine_cmd_argv + ['profile=%s'%profile, 'log_level=50']
74 ep.start()
74 ep.start()
75 launchers.append(ep)
75 launchers.append(ep)
76 eps.append(ep)
76 eps.append(ep)
77 tic = time.time()
77 tic = time.time()
78 while len(rc) < base+n:
78 while len(rc) < base+n:
79 if any([ ep.poll() is not None for ep in eps ]):
79 if any([ ep.poll() is not None for ep in eps ]):
80 raise RuntimeError("A test engine failed to start.")
80 raise RuntimeError("A test engine failed to start.")
81 elif time.time()-tic > 10:
81 elif time.time()-tic > 10:
82 raise RuntimeError("Timeout waiting for engines to connect.")
82 raise RuntimeError("Timeout waiting for engines to connect.")
83 time.sleep(.1)
83 time.sleep(.1)
84 rc.spin()
84 rc.spin()
85 rc.close()
85 rc.close()
86 return eps
86 return eps
87
87
88 def teardown():
88 def teardown():
89 time.sleep(1)
89 time.sleep(1)
90 while launchers:
90 while launchers:
91 p = launchers.pop()
91 p = launchers.pop()
92 if p.poll() is None:
92 if p.poll() is None:
93 try:
93 try:
94 p.stop()
94 p.stop()
95 except Exception, e:
95 except Exception, e:
96 print e
96 print e
97 pass
97 pass
98 if p.poll() is None:
98 if p.poll() is None:
99 time.sleep(.25)
99 time.sleep(.25)
100 if p.poll() is None:
100 if p.poll() is None:
101 try:
101 try:
102 print 'cleaning up test process...'
102 print 'cleaning up test process...'
103 p.signal(SIGKILL)
103 p.signal(SIGKILL)
104 except:
104 except:
105 print "couldn't shutdown process: ", p
105 print "couldn't shutdown process: ", p
106 blackhole.close()
106 blackhole.close()
107
107
@@ -1,111 +1,111 b''
1 """test building messages with streamsession"""
1 """test building messages with streamsession"""
2
2
3 #-------------------------------------------------------------------------------
3 #-------------------------------------------------------------------------------
4 # Copyright (C) 2011 The IPython Development Team
4 # Copyright (C) 2011 The IPython Development Team
5 #
5 #
6 # Distributed under the terms of the BSD License. The full license is in
6 # Distributed under the terms of the BSD License. The full license is in
7 # the file COPYING, distributed as part of this software.
7 # the file COPYING, distributed as part of this software.
8 #-------------------------------------------------------------------------------
8 #-------------------------------------------------------------------------------
9
9
10 #-------------------------------------------------------------------------------
10 #-------------------------------------------------------------------------------
11 # Imports
11 # Imports
12 #-------------------------------------------------------------------------------
12 #-------------------------------------------------------------------------------
13
13
14 import os
14 import os
15 import uuid
15 import uuid
16 import zmq
16 import zmq
17
17
18 from zmq.tests import BaseZMQTestCase
18 from zmq.tests import BaseZMQTestCase
19 from zmq.eventloop.zmqstream import ZMQStream
19 from zmq.eventloop.zmqstream import ZMQStream
20 # from IPython.zmq.tests import SessionTestCase
20 # from IPython.zmq.tests import SessionTestCase
21 from IPython.parallel import streamsession as ss
21 from IPython.parallel import streamsession as ss
22
22
23 class SessionTestCase(BaseZMQTestCase):
23 class SessionTestCase(BaseZMQTestCase):
24
24
25 def setUp(self):
25 def setUp(self):
26 BaseZMQTestCase.setUp(self)
26 BaseZMQTestCase.setUp(self)
27 self.session = ss.StreamSession()
27 self.session = ss.StreamSession()
28
28
29 class TestSession(SessionTestCase):
29 class TestSession(SessionTestCase):
30
30
31 def test_msg(self):
31 def test_msg(self):
32 """message format"""
32 """message format"""
33 msg = self.session.msg('execute')
33 msg = self.session.msg('execute')
34 thekeys = set('header msg_id parent_header msg_type content'.split())
34 thekeys = set('header msg_id parent_header msg_type content'.split())
35 s = set(msg.keys())
35 s = set(msg.keys())
36 self.assertEquals(s, thekeys)
36 self.assertEquals(s, thekeys)
37 self.assertTrue(isinstance(msg['content'],dict))
37 self.assertTrue(isinstance(msg['content'],dict))
38 self.assertTrue(isinstance(msg['header'],dict))
38 self.assertTrue(isinstance(msg['header'],dict))
39 self.assertTrue(isinstance(msg['parent_header'],dict))
39 self.assertTrue(isinstance(msg['parent_header'],dict))
40 self.assertEquals(msg['msg_type'], 'execute')
40 self.assertEquals(msg['msg_type'], 'execute')
41
41
42
42
43
43
44 def test_args(self):
44 def test_args(self):
45 """initialization arguments for StreamSession"""
45 """initialization arguments for StreamSession"""
46 s = self.session
46 s = self.session
47 self.assertTrue(s.pack is ss.default_packer)
47 self.assertTrue(s.pack is ss.default_packer)
48 self.assertTrue(s.unpack is ss.default_unpacker)
48 self.assertTrue(s.unpack is ss.default_unpacker)
49 self.assertEquals(s.username, os.environ.get('USER', 'username'))
49 self.assertEquals(s.username, os.environ.get('USER', 'username'))
50
50
51 s = ss.StreamSession(username=None)
51 s = ss.StreamSession()
52 self.assertEquals(s.username, os.environ.get('USER', 'username'))
52 self.assertEquals(s.username, os.environ.get('USER', 'username'))
53
53
54 self.assertRaises(TypeError, ss.StreamSession, packer='hi')
54 self.assertRaises(TypeError, ss.StreamSession, pack='hi')
55 self.assertRaises(TypeError, ss.StreamSession, unpacker='hi')
55 self.assertRaises(TypeError, ss.StreamSession, unpack='hi')
56 u = str(uuid.uuid4())
56 u = str(uuid.uuid4())
57 s = ss.StreamSession(username='carrot', session=u)
57 s = ss.StreamSession(username='carrot', session=u)
58 self.assertEquals(s.session, u)
58 self.assertEquals(s.session, u)
59 self.assertEquals(s.username, 'carrot')
59 self.assertEquals(s.username, 'carrot')
60
60
61 def test_tracking(self):
61 def test_tracking(self):
62 """test tracking messages"""
62 """test tracking messages"""
63 a,b = self.create_bound_pair(zmq.PAIR, zmq.PAIR)
63 a,b = self.create_bound_pair(zmq.PAIR, zmq.PAIR)
64 s = self.session
64 s = self.session
65 stream = ZMQStream(a)
65 stream = ZMQStream(a)
66 msg = s.send(a, 'hello', track=False)
66 msg = s.send(a, 'hello', track=False)
67 self.assertTrue(msg['tracker'] is None)
67 self.assertTrue(msg['tracker'] is None)
68 msg = s.send(a, 'hello', track=True)
68 msg = s.send(a, 'hello', track=True)
69 self.assertTrue(isinstance(msg['tracker'], zmq.MessageTracker))
69 self.assertTrue(isinstance(msg['tracker'], zmq.MessageTracker))
70 M = zmq.Message(b'hi there', track=True)
70 M = zmq.Message(b'hi there', track=True)
71 msg = s.send(a, 'hello', buffers=[M], track=True)
71 msg = s.send(a, 'hello', buffers=[M], track=True)
72 t = msg['tracker']
72 t = msg['tracker']
73 self.assertTrue(isinstance(t, zmq.MessageTracker))
73 self.assertTrue(isinstance(t, zmq.MessageTracker))
74 self.assertRaises(zmq.NotDone, t.wait, .1)
74 self.assertRaises(zmq.NotDone, t.wait, .1)
75 del M
75 del M
76 t.wait(1) # this will raise
76 t.wait(1) # this will raise
77
77
78
78
79 # def test_rekey(self):
79 # def test_rekey(self):
80 # """rekeying dict around json str keys"""
80 # """rekeying dict around json str keys"""
81 # d = {'0': uuid.uuid4(), 0:uuid.uuid4()}
81 # d = {'0': uuid.uuid4(), 0:uuid.uuid4()}
82 # self.assertRaises(KeyError, ss.rekey, d)
82 # self.assertRaises(KeyError, ss.rekey, d)
83 #
83 #
84 # d = {'0': uuid.uuid4(), 1:uuid.uuid4(), 'asdf':uuid.uuid4()}
84 # d = {'0': uuid.uuid4(), 1:uuid.uuid4(), 'asdf':uuid.uuid4()}
85 # d2 = {0:d['0'],1:d[1],'asdf':d['asdf']}
85 # d2 = {0:d['0'],1:d[1],'asdf':d['asdf']}
86 # rd = ss.rekey(d)
86 # rd = ss.rekey(d)
87 # self.assertEquals(d2,rd)
87 # self.assertEquals(d2,rd)
88 #
88 #
89 # d = {'1.5':uuid.uuid4(),'1':uuid.uuid4()}
89 # d = {'1.5':uuid.uuid4(),'1':uuid.uuid4()}
90 # d2 = {1.5:d['1.5'],1:d['1']}
90 # d2 = {1.5:d['1.5'],1:d['1']}
91 # rd = ss.rekey(d)
91 # rd = ss.rekey(d)
92 # self.assertEquals(d2,rd)
92 # self.assertEquals(d2,rd)
93 #
93 #
94 # d = {'1.0':uuid.uuid4(),'1':uuid.uuid4()}
94 # d = {'1.0':uuid.uuid4(),'1':uuid.uuid4()}
95 # self.assertRaises(KeyError, ss.rekey, d)
95 # self.assertRaises(KeyError, ss.rekey, d)
96 #
96 #
97 def test_unique_msg_ids(self):
97 def test_unique_msg_ids(self):
98 """test that messages receive unique ids"""
98 """test that messages receive unique ids"""
99 ids = set()
99 ids = set()
100 for i in range(2**12):
100 for i in range(2**12):
101 h = self.session.msg_header('test')
101 h = self.session.msg_header('test')
102 msg_id = h['msg_id']
102 msg_id = h['msg_id']
103 self.assertTrue(msg_id not in ids)
103 self.assertTrue(msg_id not in ids)
104 ids.add(msg_id)
104 ids.add(msg_id)
105
105
106 def test_feed_identities(self):
106 def test_feed_identities(self):
107 """scrub the front for zmq IDENTITIES"""
107 """scrub the front for zmq IDENTITIES"""
108 theids = "engine client other".split()
108 theids = "engine client other".split()
109 content = dict(code='whoda',stuff=object())
109 content = dict(code='whoda',stuff=object())
110 themsg = self.session.msg('execute',content=content)
110 themsg = self.session.msg('execute',content=content)
111 pmsg = theids
111 pmsg = theids
@@ -1,253 +1,253 b''
1 .. _ip1par:
1 .. _ip1par:
2
2
3 ============================
3 ============================
4 Overview and getting started
4 Overview and getting started
5 ============================
5 ============================
6
6
7 Introduction
7 Introduction
8 ============
8 ============
9
9
10 This section gives an overview of IPython's sophisticated and powerful
10 This section gives an overview of IPython's sophisticated and powerful
11 architecture for parallel and distributed computing. This architecture
11 architecture for parallel and distributed computing. This architecture
12 abstracts out parallelism in a very general way, which enables IPython to
12 abstracts out parallelism in a very general way, which enables IPython to
13 support many different styles of parallelism including:
13 support many different styles of parallelism including:
14
14
15 * Single program, multiple data (SPMD) parallelism.
15 * Single program, multiple data (SPMD) parallelism.
16 * Multiple program, multiple data (MPMD) parallelism.
16 * Multiple program, multiple data (MPMD) parallelism.
17 * Message passing using MPI.
17 * Message passing using MPI.
18 * Task farming.
18 * Task farming.
19 * Data parallel.
19 * Data parallel.
20 * Combinations of these approaches.
20 * Combinations of these approaches.
21 * Custom user defined approaches.
21 * Custom user defined approaches.
22
22
23 Most importantly, IPython enables all types of parallel applications to
23 Most importantly, IPython enables all types of parallel applications to
24 be developed, executed, debugged and monitored *interactively*. Hence,
24 be developed, executed, debugged and monitored *interactively*. Hence,
25 the ``I`` in IPython. The following are some example usage cases for IPython:
25 the ``I`` in IPython. The following are some example usage cases for IPython:
26
26
27 * Quickly parallelize algorithms that are embarrassingly parallel
27 * Quickly parallelize algorithms that are embarrassingly parallel
28 using a number of simple approaches. Many simple things can be
28 using a number of simple approaches. Many simple things can be
29 parallelized interactively in one or two lines of code.
29 parallelized interactively in one or two lines of code.
30
30
31 * Steer traditional MPI applications on a supercomputer from an
31 * Steer traditional MPI applications on a supercomputer from an
32 IPython session on your laptop.
32 IPython session on your laptop.
33
33
34 * Analyze and visualize large datasets (that could be remote and/or
34 * Analyze and visualize large datasets (that could be remote and/or
35 distributed) interactively using IPython and tools like
35 distributed) interactively using IPython and tools like
36 matplotlib/TVTK.
36 matplotlib/TVTK.
37
37
38 * Develop, test and debug new parallel algorithms
38 * Develop, test and debug new parallel algorithms
39 (that may use MPI) interactively.
39 (that may use MPI) interactively.
40
40
41 * Tie together multiple MPI jobs running on different systems into
41 * Tie together multiple MPI jobs running on different systems into
42 one giant distributed and parallel system.
42 one giant distributed and parallel system.
43
43
44 * Start a parallel job on your cluster and then have a remote
44 * Start a parallel job on your cluster and then have a remote
45 collaborator connect to it and pull back data into their
45 collaborator connect to it and pull back data into their
46 local IPython session for plotting and analysis.
46 local IPython session for plotting and analysis.
47
47
48 * Run a set of tasks on a set of CPUs using dynamic load balancing.
48 * Run a set of tasks on a set of CPUs using dynamic load balancing.
49
49
50 Architecture overview
50 Architecture overview
51 =====================
51 =====================
52
52
53 The IPython architecture consists of four components:
53 The IPython architecture consists of four components:
54
54
55 * The IPython engine.
55 * The IPython engine.
56 * The IPython hub.
56 * The IPython hub.
57 * The IPython schedulers.
57 * The IPython schedulers.
58 * The controller client.
58 * The controller client.
59
59
60 These components live in the :mod:`IPython.parallel` package and are
60 These components live in the :mod:`IPython.parallel` package and are
61 installed with IPython. They do, however, have additional dependencies
61 installed with IPython. They do, however, have additional dependencies
62 that must be installed. For more information, see our
62 that must be installed. For more information, see our
63 :ref:`installation documentation <install_index>`.
63 :ref:`installation documentation <install_index>`.
64
64
65 .. TODO: include zmq in install_index
65 .. TODO: include zmq in install_index
66
66
67 IPython engine
67 IPython engine
68 ---------------
68 ---------------
69
69
70 The IPython engine is a Python instance that takes Python commands over a
70 The IPython engine is a Python instance that takes Python commands over a
71 network connection. Eventually, the IPython engine will be a full IPython
71 network connection. Eventually, the IPython engine will be a full IPython
72 interpreter, but for now, it is a regular Python interpreter. The engine
72 interpreter, but for now, it is a regular Python interpreter. The engine
73 can also handle incoming and outgoing Python objects sent over a network
73 can also handle incoming and outgoing Python objects sent over a network
74 connection. When multiple engines are started, parallel and distributed
74 connection. When multiple engines are started, parallel and distributed
75 computing becomes possible. An important feature of an IPython engine is
75 computing becomes possible. An important feature of an IPython engine is
76 that it blocks while user code is being executed. Read on for how the
76 that it blocks while user code is being executed. Read on for how the
77 IPython controller solves this problem to expose a clean asynchronous API
77 IPython controller solves this problem to expose a clean asynchronous API
78 to the user.
78 to the user.
79
79
80 IPython controller
80 IPython controller
81 ------------------
81 ------------------
82
82
83 The IPython controller processes provide an interface for working with a set of engines.
83 The IPython controller processes provide an interface for working with a set of engines.
84 At a general level, the controller is a collection of processes to which IPython engines
84 At a general level, the controller is a collection of processes to which IPython engines
85 and clients can connect. The controller is composed of a :class:`Hub` and a collection of
85 and clients can connect. The controller is composed of a :class:`Hub` and a collection of
86 :class:`Schedulers`. These Schedulers are typically run in separate processes but on the
86 :class:`Schedulers`. These Schedulers are typically run in separate processes but on the
87 same machine as the Hub, but can be run anywhere from local threads or on remote machines.
87 same machine as the Hub, but can be run anywhere from local threads or on remote machines.
88
88
89 The controller also provides a single point of contact for users who wish to
89 The controller also provides a single point of contact for users who wish to
90 utilize the engines connected to the controller. There are different ways of
90 utilize the engines connected to the controller. There are different ways of
91 working with a controller. In IPython, all of these models are implemented via
91 working with a controller. In IPython, all of these models are implemented via
92 the client's :meth:`.View.apply` method, with various arguments, or
92 the client's :meth:`.View.apply` method, with various arguments, or
93 constructing :class:`.View` objects to represent subsets of engines. The two
93 constructing :class:`.View` objects to represent subsets of engines. The two
94 primary models for interacting with engines are:
94 primary models for interacting with engines are:
95
95
96 * A **Direct** interface, where engines are addressed explicitly.
96 * A **Direct** interface, where engines are addressed explicitly.
97 * A **LoadBalanced** interface, where the Scheduler is trusted with assigning work to
97 * A **LoadBalanced** interface, where the Scheduler is trusted with assigning work to
98 appropriate engines.
98 appropriate engines.
99
99
100 Advanced users can readily extend the View models to enable other
100 Advanced users can readily extend the View models to enable other
101 styles of parallelism.
101 styles of parallelism.
102
102
103 .. note::
103 .. note::
104
104
105 A single controller and set of engines can be used with multiple models
105 A single controller and set of engines can be used with multiple models
106 simultaneously. This opens the door for lots of interesting things.
106 simultaneously. This opens the door for lots of interesting things.
107
107
108
108
109 The Hub
109 The Hub
110 *******
110 *******
111
111
112 The center of an IPython cluster is the Hub. This is the process that keeps
112 The center of an IPython cluster is the Hub. This is the process that keeps
113 track of engine connections, schedulers, clients, as well as all task requests and
113 track of engine connections, schedulers, clients, as well as all task requests and
114 results. The primary role of the Hub is to facilitate queries of the cluster state, and
114 results. The primary role of the Hub is to facilitate queries of the cluster state, and
115 minimize the necessary information required to establish the many connections involved in
115 minimize the necessary information required to establish the many connections involved in
116 connecting new clients and engines.
116 connecting new clients and engines.
117
117
118
118
119 Schedulers
119 Schedulers
120 **********
120 **********
121
121
122 All actions that can be performed on the engine go through a Scheduler. While the engines
122 All actions that can be performed on the engine go through a Scheduler. While the engines
123 themselves block when user code is run, the schedulers hide that from the user to provide
123 themselves block when user code is run, the schedulers hide that from the user to provide
124 a fully asynchronous interface to a set of engines.
124 a fully asynchronous interface to a set of engines.
125
125
126
126
127 IPython client and views
127 IPython client and views
128 ------------------------
128 ------------------------
129
129
130 There is one primary object, the :class:`~.parallel.Client`, for connecting to a cluster.
130 There is one primary object, the :class:`~.parallel.Client`, for connecting to a cluster.
131 For each execution model, there is a corresponding :class:`~.parallel.View`. These views
131 For each execution model, there is a corresponding :class:`~.parallel.View`. These views
132 allow users to interact with a set of engines through the interface. Here are the two default
132 allow users to interact with a set of engines through the interface. Here are the two default
133 views:
133 views:
134
134
135 * The :class:`DirectView` class for explicit addressing.
135 * The :class:`DirectView` class for explicit addressing.
136 * The :class:`LoadBalancedView` class for destination-agnostic scheduling.
136 * The :class:`LoadBalancedView` class for destination-agnostic scheduling.
137
137
138 Security
138 Security
139 --------
139 --------
140
140
141 IPython uses ZeroMQ for networking, which has provided many advantages, but
141 IPython uses ZeroMQ for networking, which has provided many advantages, but
142 one of the setbacks is its utter lack of security [ZeroMQ]_. By default, no IPython
142 one of the setbacks is its utter lack of security [ZeroMQ]_. By default, no IPython
143 connections are encrypted, but open ports only listen on localhost. The only
143 connections are encrypted, but open ports only listen on localhost. The only
144 source of security for IPython is via ssh-tunnel. IPython supports both shell
144 source of security for IPython is via ssh-tunnel. IPython supports both shell
145 (`openssh`) and `paramiko` based tunnels for connections. There is a key necessary
145 (`openssh`) and `paramiko` based tunnels for connections. There is a key necessary
146 to submit requests, but due to the lack of encryption, it does not provide
146 to submit requests, but due to the lack of encryption, it does not provide
147 significant security if loopback traffic is compromised.
147 significant security if loopback traffic is compromised.
148
148
149 In our architecture, the controller is the only process that listens on
149 In our architecture, the controller is the only process that listens on
150 network ports, and is thus the main point of vulnerability. The standard model
150 network ports, and is thus the main point of vulnerability. The standard model
151 for secure connections is to designate that the controller listen on
151 for secure connections is to designate that the controller listen on
152 localhost, and use ssh-tunnels to connect clients and/or
152 localhost, and use ssh-tunnels to connect clients and/or
153 engines.
153 engines.
154
154
155 To connect and authenticate to the controller an engine or client needs
155 To connect and authenticate to the controller an engine or client needs
156 some information that the controller has stored in a JSON file.
156 some information that the controller has stored in a JSON file.
157 Thus, the JSON files need to be copied to a location where
157 Thus, the JSON files need to be copied to a location where
158 the clients and engines can find them. Typically, this is the
158 the clients and engines can find them. Typically, this is the
159 :file:`~/.ipython/cluster_default/security` directory on the host where the
159 :file:`~/.ipython/cluster_default/security` directory on the host where the
160 client/engine is running (which could be a different host than the controller).
160 client/engine is running (which could be a different host than the controller).
161 Once the JSON files are copied over, everything should work fine.
161 Once the JSON files are copied over, everything should work fine.
162
162
163 Currently, there are two JSON files that the controller creates:
163 Currently, there are two JSON files that the controller creates:
164
164
165 ipcontroller-engine.json
165 ipcontroller-engine.json
166 This JSON file has the information necessary for an engine to connect
166 This JSON file has the information necessary for an engine to connect
167 to a controller.
167 to a controller.
168
168
169 ipcontroller-client.json
169 ipcontroller-client.json
170 The client's connection information. This may not differ from the engine's,
170 The client's connection information. This may not differ from the engine's,
171 but since the controller may listen on different ports for clients and
171 but since the controller may listen on different ports for clients and
172 engines, it is stored separately.
172 engines, it is stored separately.
173
173
174 More details of how these JSON files are used are given below.
174 More details of how these JSON files are used are given below.
175
175
176 A detailed description of the security model and its implementation in IPython
176 A detailed description of the security model and its implementation in IPython
177 can be found :ref:`here <parallelsecurity>`.
177 can be found :ref:`here <parallelsecurity>`.
178
178
179 .. warning::
179 .. warning::
180
180
181 Even at its most secure, the Controller listens on ports on localhost, and
181 Even at its most secure, the Controller listens on ports on localhost, and
182 every time you make a tunnel, you open a localhost port on the connecting
182 every time you make a tunnel, you open a localhost port on the connecting
183 machine that points to the Controller. If localhost on the Controller's
183 machine that points to the Controller. If localhost on the Controller's
184 machine, or the machine of any client or engine, is untrusted, then your
184 machine, or the machine of any client or engine, is untrusted, then your
185 Controller is insecure. There is no way around this with ZeroMQ.
185 Controller is insecure. There is no way around this with ZeroMQ.
186
186
187
187
188
188
189 Getting Started
189 Getting Started
190 ===============
190 ===============
191
191
192 To use IPython for parallel computing, you need to start one instance of the
192 To use IPython for parallel computing, you need to start one instance of the
193 controller and one or more instances of the engine. Initially, it is best to
193 controller and one or more instances of the engine. Initially, it is best to
194 simply start a controller and engines on a single host using the
194 simply start a controller and engines on a single host using the
195 :command:`ipcluster` command. To start a controller and 4 engines on your
195 :command:`ipcluster` command. To start a controller and 4 engines on your
196 localhost, just do::
196 localhost, just do::
197
197
198 $ ipcluster start -n 4
198 $ ipcluster start n=4
199
199
200 More details about starting the IPython controller and engines can be found
200 More details about starting the IPython controller and engines can be found
201 :ref:`here <parallel_process>`
201 :ref:`here <parallel_process>`
202
202
203 Once you have started the IPython controller and one or more engines, you
203 Once you have started the IPython controller and one or more engines, you
204 are ready to use the engines to do something useful. To make sure
204 are ready to use the engines to do something useful. To make sure
205 everything is working correctly, try the following commands:
205 everything is working correctly, try the following commands:
206
206
207 .. sourcecode:: ipython
207 .. sourcecode:: ipython
208
208
209 In [1]: from IPython.parallel import Client
209 In [1]: from IPython.parallel import Client
210
210
211 In [2]: c = Client()
211 In [2]: c = Client()
212
212
213 In [4]: c.ids
213 In [4]: c.ids
214 Out[4]: set([0, 1, 2, 3])
214 Out[4]: set([0, 1, 2, 3])
215
215
216 In [5]: c[:].apply_sync(lambda : "Hello, World")
216 In [5]: c[:].apply_sync(lambda : "Hello, World")
217 Out[5]: [ 'Hello, World', 'Hello, World', 'Hello, World', 'Hello, World' ]
217 Out[5]: [ 'Hello, World', 'Hello, World', 'Hello, World', 'Hello, World' ]
218
218
219
219
220 When a client is created with no arguments, the client tries to find the corresponding JSON file
220 When a client is created with no arguments, the client tries to find the corresponding JSON file
221 in the local `~/.ipython/cluster_default/security` directory. Or if you specified a profile,
221 in the local `~/.ipython/cluster_default/security` directory. Or if you specified a profile,
222 you can use that with the Client. This should cover most cases:
222 you can use that with the Client. This should cover most cases:
223
223
224 .. sourcecode:: ipython
224 .. sourcecode:: ipython
225
225
226 In [2]: c = Client(profile='myprofile')
226 In [2]: c = Client(profile='myprofile')
227
227
228 If you have put the JSON file in a different location or it has a different name, create the
228 If you have put the JSON file in a different location or it has a different name, create the
229 client like this:
229 client like this:
230
230
231 .. sourcecode:: ipython
231 .. sourcecode:: ipython
232
232
233 In [2]: c = Client('/path/to/my/ipcontroller-client.json')
233 In [2]: c = Client('/path/to/my/ipcontroller-client.json')
234
234
235 Remember, a client needs to be able to see the Hub's ports to connect. So if they are on a
235 Remember, a client needs to be able to see the Hub's ports to connect. So if they are on a
236 different machine, you may need to use an ssh server to tunnel access to that machine,
236 different machine, you may need to use an ssh server to tunnel access to that machine,
237 then you would connect to it with:
237 then you would connect to it with:
238
238
239 .. sourcecode:: ipython
239 .. sourcecode:: ipython
240
240
241 In [2]: c = Client(sshserver='myhub.example.com')
241 In [2]: c = Client(sshserver='myhub.example.com')
242
242
243 Where 'myhub.example.com' is the url or IP address of the machine on
243 Where 'myhub.example.com' is the url or IP address of the machine on
244 which the Hub process is running (or another machine that has direct access to the Hub's ports).
244 which the Hub process is running (or another machine that has direct access to the Hub's ports).
245
245
246 The SSH server may already be specified in ipcontroller-client.json, if the controller was
246 The SSH server may already be specified in ipcontroller-client.json, if the controller was
247 instructed at its launch time.
247 instructed at its launch time.
248
248
249 You are now ready to learn more about the :ref:`Direct
249 You are now ready to learn more about the :ref:`Direct
250 <parallel_multiengine>` and :ref:`LoadBalanced <parallel_task>` interfaces to the
250 <parallel_multiengine>` and :ref:`LoadBalanced <parallel_task>` interfaces to the
251 controller.
251 controller.
252
252
253 .. [ZeroMQ] ZeroMQ. http://www.zeromq.org
253 .. [ZeroMQ] ZeroMQ. http://www.zeromq.org
@@ -1,156 +1,156 b''
1 .. _parallelmpi:
1 .. _parallelmpi:
2
2
3 =======================
3 =======================
4 Using MPI with IPython
4 Using MPI with IPython
5 =======================
5 =======================
6
6
7 .. note::
7 .. note::
8
8
9 Not adapted to zmq yet
9 Not adapted to zmq yet
10 This is out of date wrt ipcluster in general as well
10 This is out of date wrt ipcluster in general as well
11
11
12 Often, a parallel algorithm will require moving data between the engines. One
12 Often, a parallel algorithm will require moving data between the engines. One
13 way of accomplishing this is by doing a pull and then a push using the
13 way of accomplishing this is by doing a pull and then a push using the
14 multiengine client. However, this will be slow as all the data has to go
14 multiengine client. However, this will be slow as all the data has to go
15 through the controller to the client and then back through the controller, to
15 through the controller to the client and then back through the controller, to
16 its final destination.
16 its final destination.
17
17
18 A much better way of moving data between engines is to use a message passing
18 A much better way of moving data between engines is to use a message passing
19 library, such as the Message Passing Interface (MPI) [MPI]_. IPython's
19 library, such as the Message Passing Interface (MPI) [MPI]_. IPython's
20 parallel computing architecture has been designed from the ground up to
20 parallel computing architecture has been designed from the ground up to
21 integrate with MPI. This document describes how to use MPI with IPython.
21 integrate with MPI. This document describes how to use MPI with IPython.
22
22
23 Additional installation requirements
23 Additional installation requirements
24 ====================================
24 ====================================
25
25
26 If you want to use MPI with IPython, you will need to install:
26 If you want to use MPI with IPython, you will need to install:
27
27
28 * A standard MPI implementation such as OpenMPI [OpenMPI]_ or MPICH.
28 * A standard MPI implementation such as OpenMPI [OpenMPI]_ or MPICH.
29 * The mpi4py [mpi4py]_ package.
29 * The mpi4py [mpi4py]_ package.
30
30
31 .. note::
31 .. note::
32
32
33 The mpi4py package is not a strict requirement. However, you need to
33 The mpi4py package is not a strict requirement. However, you need to
34 have *some* way of calling MPI from Python. You also need some way of
34 have *some* way of calling MPI from Python. You also need some way of
35 making sure that :func:`MPI_Init` is called when the IPython engines start
35 making sure that :func:`MPI_Init` is called when the IPython engines start
36 up. There are a number of ways of doing this and a good number of
36 up. There are a number of ways of doing this and a good number of
37 associated subtleties. We highly recommend just using mpi4py as it
37 associated subtleties. We highly recommend just using mpi4py as it
38 takes care of most of these problems. If you want to do something
38 takes care of most of these problems. If you want to do something
39 different, let us know and we can help you get started.
39 different, let us know and we can help you get started.
40
40
41 Starting the engines with MPI enabled
41 Starting the engines with MPI enabled
42 =====================================
42 =====================================
43
43
44 To use code that calls MPI, there are typically two things that MPI requires.
44 To use code that calls MPI, there are typically two things that MPI requires.
45
45
46 1. The process that wants to call MPI must be started using
46 1. The process that wants to call MPI must be started using
47 :command:`mpiexec` or a batch system (like PBS) that has MPI support.
47 :command:`mpiexec` or a batch system (like PBS) that has MPI support.
48 2. Once the process starts, it must call :func:`MPI_Init`.
48 2. Once the process starts, it must call :func:`MPI_Init`.
49
49
50 There are a couple of ways that you can start the IPython engines and get
50 There are a couple of ways that you can start the IPython engines and get
51 these things to happen.
51 these things to happen.
52
52
53 Automatic starting using :command:`mpiexec` and :command:`ipcluster`
53 Automatic starting using :command:`mpiexec` and :command:`ipcluster`
54 --------------------------------------------------------------------
54 --------------------------------------------------------------------
55
55
56 The easiest approach is to use the `mpiexec` mode of :command:`ipcluster`,
56 The easiest approach is to use the `MPIExec` Launchers in :command:`ipcluster`,
57 which will first start a controller and then a set of engines using
57 which will first start a controller and then a set of engines using
58 :command:`mpiexec`::
58 :command:`mpiexec`::
59
59
60 $ ipcluster mpiexec -n 4
60 $ ipcluster start n=4 elauncher=MPIExecEngineSetLauncher
61
61
62 This approach is best as interrupting :command:`ipcluster` will automatically
62 This approach is best as interrupting :command:`ipcluster` will automatically
63 stop and clean up the controller and engines.
63 stop and clean up the controller and engines.
64
64
65 Manual starting using :command:`mpiexec`
65 Manual starting using :command:`mpiexec`
66 ----------------------------------------
66 ----------------------------------------
67
67
68 If you want to start the IPython engines using the :command:`mpiexec`, just
68 If you want to start the IPython engines using the :command:`mpiexec`, just
69 do::
69 do::
70
70
71 $ mpiexec -n 4 ipengine --mpi=mpi4py
71 $ mpiexec n=4 ipengine mpi=mpi4py
72
72
73 This requires that you already have a controller running and that the FURL
73 This requires that you already have a controller running and that the FURL
74 files for the engines are in place. We also have built in support for
74 files for the engines are in place. We also have built in support for
75 PyTrilinos [PyTrilinos]_, which can be used (assuming is installed) by
75 PyTrilinos [PyTrilinos]_, which can be used (assuming is installed) by
76 starting the engines with::
76 starting the engines with::
77
77
78 $ mpiexec -n 4 ipengine --mpi=pytrilinos
78 $ mpiexec n=4 ipengine mpi=pytrilinos
79
79
80 Automatic starting using PBS and :command:`ipcluster`
80 Automatic starting using PBS and :command:`ipcluster`
81 ------------------------------------------------------
81 ------------------------------------------------------
82
82
83 The :command:`ipcluster` command also has built-in integration with PBS. For
83 The :command:`ipcluster` command also has built-in integration with PBS. For
84 more information on this approach, see our documentation on :ref:`ipcluster
84 more information on this approach, see our documentation on :ref:`ipcluster
85 <parallel_process>`.
85 <parallel_process>`.
86
86
87 Actually using MPI
87 Actually using MPI
88 ==================
88 ==================
89
89
90 Once the engines are running with MPI enabled, you are ready to go. You can
90 Once the engines are running with MPI enabled, you are ready to go. You can
91 now call any code that uses MPI in the IPython engines. And, all of this can
91 now call any code that uses MPI in the IPython engines. And, all of this can
92 be done interactively. Here we show a simple example that uses mpi4py
92 be done interactively. Here we show a simple example that uses mpi4py
93 [mpi4py]_ version 1.1.0 or later.
93 [mpi4py]_ version 1.1.0 or later.
94
94
95 First, lets define a simply function that uses MPI to calculate the sum of a
95 First, lets define a simply function that uses MPI to calculate the sum of a
96 distributed array. Save the following text in a file called :file:`psum.py`:
96 distributed array. Save the following text in a file called :file:`psum.py`:
97
97
98 .. sourcecode:: python
98 .. sourcecode:: python
99
99
100 from mpi4py import MPI
100 from mpi4py import MPI
101 import numpy as np
101 import numpy as np
102
102
103 def psum(a):
103 def psum(a):
104 s = np.sum(a)
104 s = np.sum(a)
105 rcvBuf = np.array(0.0,'d')
105 rcvBuf = np.array(0.0,'d')
106 MPI.COMM_WORLD.Allreduce([s, MPI.DOUBLE],
106 MPI.COMM_WORLD.Allreduce([s, MPI.DOUBLE],
107 [rcvBuf, MPI.DOUBLE],
107 [rcvBuf, MPI.DOUBLE],
108 op=MPI.SUM)
108 op=MPI.SUM)
109 return rcvBuf
109 return rcvBuf
110
110
111 Now, start an IPython cluster::
111 Now, start an IPython cluster::
112
112
113 $ ipcluster start -p mpi -n 4
113 $ ipcluster start profile=mpi n=4
114
114
115 .. note::
115 .. note::
116
116
117 It is assumed here that the mpi profile has been set up, as described :ref:`here
117 It is assumed here that the mpi profile has been set up, as described :ref:`here
118 <parallel_process>`.
118 <parallel_process>`.
119
119
120 Finally, connect to the cluster and use this function interactively. In this
120 Finally, connect to the cluster and use this function interactively. In this
121 case, we create a random array on each engine and sum up all the random arrays
121 case, we create a random array on each engine and sum up all the random arrays
122 using our :func:`psum` function:
122 using our :func:`psum` function:
123
123
124 .. sourcecode:: ipython
124 .. sourcecode:: ipython
125
125
126 In [1]: from IPython.parallel import Client
126 In [1]: from IPython.parallel import Client
127
127
128 In [2]: %load_ext parallel_magic
128 In [2]: %load_ext parallel_magic
129
129
130 In [3]: c = Client(profile='mpi')
130 In [3]: c = Client(profile='mpi')
131
131
132 In [4]: view = c[:]
132 In [4]: view = c[:]
133
133
134 In [5]: view.activate()
134 In [5]: view.activate()
135
135
136 # run the contents of the file on each engine:
136 # run the contents of the file on each engine:
137 In [6]: view.run('psum.py')
137 In [6]: view.run('psum.py')
138
138
139 In [6]: px a = np.random.rand(100)
139 In [6]: px a = np.random.rand(100)
140 Parallel execution on engines: [0,1,2,3]
140 Parallel execution on engines: [0,1,2,3]
141
141
142 In [8]: px s = psum(a)
142 In [8]: px s = psum(a)
143 Parallel execution on engines: [0,1,2,3]
143 Parallel execution on engines: [0,1,2,3]
144
144
145 In [9]: view['s']
145 In [9]: view['s']
146 Out[9]: [187.451545803,187.451545803,187.451545803,187.451545803]
146 Out[9]: [187.451545803,187.451545803,187.451545803,187.451545803]
147
147
148 Any Python code that makes calls to MPI can be used in this manner, including
148 Any Python code that makes calls to MPI can be used in this manner, including
149 compiled C, C++ and Fortran libraries that have been exposed to Python.
149 compiled C, C++ and Fortran libraries that have been exposed to Python.
150
150
151 .. [MPI] Message Passing Interface. http://www-unix.mcs.anl.gov/mpi/
151 .. [MPI] Message Passing Interface. http://www-unix.mcs.anl.gov/mpi/
152 .. [mpi4py] MPI for Python. mpi4py: http://mpi4py.scipy.org/
152 .. [mpi4py] MPI for Python. mpi4py: http://mpi4py.scipy.org/
153 .. [OpenMPI] Open MPI. http://www.open-mpi.org/
153 .. [OpenMPI] Open MPI. http://www.open-mpi.org/
154 .. [PyTrilinos] PyTrilinos. http://trilinos.sandia.gov/packages/pytrilinos/
154 .. [PyTrilinos] PyTrilinos. http://trilinos.sandia.gov/packages/pytrilinos/
155
155
156
156
@@ -1,843 +1,843 b''
1 .. _parallel_multiengine:
1 .. _parallel_multiengine:
2
2
3 ==========================
3 ==========================
4 IPython's Direct interface
4 IPython's Direct interface
5 ==========================
5 ==========================
6
6
7 The direct, or multiengine, interface represents one possible way of working with a set of
7 The direct, or multiengine, interface represents one possible way of working with a set of
8 IPython engines. The basic idea behind the multiengine interface is that the
8 IPython engines. The basic idea behind the multiengine interface is that the
9 capabilities of each engine are directly and explicitly exposed to the user.
9 capabilities of each engine are directly and explicitly exposed to the user.
10 Thus, in the multiengine interface, each engine is given an id that is used to
10 Thus, in the multiengine interface, each engine is given an id that is used to
11 identify the engine and give it work to do. This interface is very intuitive
11 identify the engine and give it work to do. This interface is very intuitive
12 and is designed with interactive usage in mind, and is the best place for
12 and is designed with interactive usage in mind, and is the best place for
13 new users of IPython to begin.
13 new users of IPython to begin.
14
14
15 Starting the IPython controller and engines
15 Starting the IPython controller and engines
16 ===========================================
16 ===========================================
17
17
18 To follow along with this tutorial, you will need to start the IPython
18 To follow along with this tutorial, you will need to start the IPython
19 controller and four IPython engines. The simplest way of doing this is to use
19 controller and four IPython engines. The simplest way of doing this is to use
20 the :command:`ipcluster` command::
20 the :command:`ipcluster` command::
21
21
22 $ ipcluster start -n 4
22 $ ipcluster start n=4
23
23
24 For more detailed information about starting the controller and engines, see
24 For more detailed information about starting the controller and engines, see
25 our :ref:`introduction <ip1par>` to using IPython for parallel computing.
25 our :ref:`introduction <ip1par>` to using IPython for parallel computing.
26
26
27 Creating a ``Client`` instance
27 Creating a ``Client`` instance
28 ==============================
28 ==============================
29
29
30 The first step is to import the IPython :mod:`IPython.parallel`
30 The first step is to import the IPython :mod:`IPython.parallel`
31 module and then create a :class:`.Client` instance:
31 module and then create a :class:`.Client` instance:
32
32
33 .. sourcecode:: ipython
33 .. sourcecode:: ipython
34
34
35 In [1]: from IPython.parallel import Client
35 In [1]: from IPython.parallel import Client
36
36
37 In [2]: rc = Client()
37 In [2]: rc = Client()
38
38
39 This form assumes that the default connection information (stored in
39 This form assumes that the default connection information (stored in
40 :file:`ipcontroller-client.json` found in :file:`IPYTHON_DIR/cluster_default/security`) is
40 :file:`ipcontroller-client.json` found in :file:`IPYTHON_DIR/cluster_default/security`) is
41 accurate. If the controller was started on a remote machine, you must copy that connection
41 accurate. If the controller was started on a remote machine, you must copy that connection
42 file to the client machine, or enter its contents as arguments to the Client constructor:
42 file to the client machine, or enter its contents as arguments to the Client constructor:
43
43
44 .. sourcecode:: ipython
44 .. sourcecode:: ipython
45
45
46 # If you have copied the json connector file from the controller:
46 # If you have copied the json connector file from the controller:
47 In [2]: rc = Client('/path/to/ipcontroller-client.json')
47 In [2]: rc = Client('/path/to/ipcontroller-client.json')
48 # or to connect with a specific profile you have set up:
48 # or to connect with a specific profile you have set up:
49 In [3]: rc = Client(profile='mpi')
49 In [3]: rc = Client(profile='mpi')
50
50
51
51
52 To make sure there are engines connected to the controller, users can get a list
52 To make sure there are engines connected to the controller, users can get a list
53 of engine ids:
53 of engine ids:
54
54
55 .. sourcecode:: ipython
55 .. sourcecode:: ipython
56
56
57 In [3]: rc.ids
57 In [3]: rc.ids
58 Out[3]: [0, 1, 2, 3]
58 Out[3]: [0, 1, 2, 3]
59
59
60 Here we see that there are four engines ready to do work for us.
60 Here we see that there are four engines ready to do work for us.
61
61
62 For direct execution, we will make use of a :class:`DirectView` object, which can be
62 For direct execution, we will make use of a :class:`DirectView` object, which can be
63 constructed via list-access to the client:
63 constructed via list-access to the client:
64
64
65 .. sourcecode:: ipython
65 .. sourcecode:: ipython
66
66
67 In [4]: dview = rc[:] # use all engines
67 In [4]: dview = rc[:] # use all engines
68
68
69 .. seealso::
69 .. seealso::
70
70
71 For more information, see the in-depth explanation of :ref:`Views <parallel_details>`.
71 For more information, see the in-depth explanation of :ref:`Views <parallel_details>`.
72
72
73
73
74 Quick and easy parallelism
74 Quick and easy parallelism
75 ==========================
75 ==========================
76
76
77 In many cases, you simply want to apply a Python function to a sequence of
77 In many cases, you simply want to apply a Python function to a sequence of
78 objects, but *in parallel*. The client interface provides a simple way
78 objects, but *in parallel*. The client interface provides a simple way
79 of accomplishing this: using the DirectView's :meth:`~DirectView.map` method.
79 of accomplishing this: using the DirectView's :meth:`~DirectView.map` method.
80
80
81 Parallel map
81 Parallel map
82 ------------
82 ------------
83
83
84 Python's builtin :func:`map` functions allows a function to be applied to a
84 Python's builtin :func:`map` functions allows a function to be applied to a
85 sequence element-by-element. This type of code is typically trivial to
85 sequence element-by-element. This type of code is typically trivial to
86 parallelize. In fact, since IPython's interface is all about functions anyway,
86 parallelize. In fact, since IPython's interface is all about functions anyway,
87 you can just use the builtin :func:`map` with a :class:`RemoteFunction`, or a
87 you can just use the builtin :func:`map` with a :class:`RemoteFunction`, or a
88 DirectView's :meth:`map` method:
88 DirectView's :meth:`map` method:
89
89
90 .. sourcecode:: ipython
90 .. sourcecode:: ipython
91
91
92 In [62]: serial_result = map(lambda x:x**10, range(32))
92 In [62]: serial_result = map(lambda x:x**10, range(32))
93
93
94 In [63]: parallel_result = dview.map_sync(lambda x: x**10, range(32))
94 In [63]: parallel_result = dview.map_sync(lambda x: x**10, range(32))
95
95
96 In [67]: serial_result==parallel_result
96 In [67]: serial_result==parallel_result
97 Out[67]: True
97 Out[67]: True
98
98
99
99
100 .. note::
100 .. note::
101
101
102 The :class:`DirectView`'s version of :meth:`map` does
102 The :class:`DirectView`'s version of :meth:`map` does
103 not do dynamic load balancing. For a load balanced version, use a
103 not do dynamic load balancing. For a load balanced version, use a
104 :class:`LoadBalancedView`.
104 :class:`LoadBalancedView`.
105
105
106 .. seealso::
106 .. seealso::
107
107
108 :meth:`map` is implemented via :class:`ParallelFunction`.
108 :meth:`map` is implemented via :class:`ParallelFunction`.
109
109
110 Remote function decorators
110 Remote function decorators
111 --------------------------
111 --------------------------
112
112
113 Remote functions are just like normal functions, but when they are called,
113 Remote functions are just like normal functions, but when they are called,
114 they execute on one or more engines, rather than locally. IPython provides
114 they execute on one or more engines, rather than locally. IPython provides
115 two decorators:
115 two decorators:
116
116
117 .. sourcecode:: ipython
117 .. sourcecode:: ipython
118
118
119 In [10]: @dview.remote(block=True)
119 In [10]: @dview.remote(block=True)
120 ...: def getpid():
120 ...: def getpid():
121 ...: import os
121 ...: import os
122 ...: return os.getpid()
122 ...: return os.getpid()
123 ...:
123 ...:
124
124
125 In [11]: getpid()
125 In [11]: getpid()
126 Out[11]: [12345, 12346, 12347, 12348]
126 Out[11]: [12345, 12346, 12347, 12348]
127
127
128 The ``@parallel`` decorator creates parallel functions, that break up an element-wise
128 The ``@parallel`` decorator creates parallel functions, that break up an element-wise
129 operations and distribute them, reconstructing the result.
129 operations and distribute them, reconstructing the result.
130
130
131 .. sourcecode:: ipython
131 .. sourcecode:: ipython
132
132
133 In [12]: import numpy as np
133 In [12]: import numpy as np
134
134
135 In [13]: A = np.random.random((64,48))
135 In [13]: A = np.random.random((64,48))
136
136
137 In [14]: @dview.parallel(block=True)
137 In [14]: @dview.parallel(block=True)
138 ...: def pmul(A,B):
138 ...: def pmul(A,B):
139 ...: return A*B
139 ...: return A*B
140
140
141 In [15]: C_local = A*A
141 In [15]: C_local = A*A
142
142
143 In [16]: C_remote = pmul(A,A)
143 In [16]: C_remote = pmul(A,A)
144
144
145 In [17]: (C_local == C_remote).all()
145 In [17]: (C_local == C_remote).all()
146 Out[17]: True
146 Out[17]: True
147
147
148 .. seealso::
148 .. seealso::
149
149
150 See the docstrings for the :func:`parallel` and :func:`remote` decorators for
150 See the docstrings for the :func:`parallel` and :func:`remote` decorators for
151 options.
151 options.
152
152
153 Calling Python functions
153 Calling Python functions
154 ========================
154 ========================
155
155
156 The most basic type of operation that can be performed on the engines is to
156 The most basic type of operation that can be performed on the engines is to
157 execute Python code or call Python functions. Executing Python code can be
157 execute Python code or call Python functions. Executing Python code can be
158 done in blocking or non-blocking mode (non-blocking is default) using the
158 done in blocking or non-blocking mode (non-blocking is default) using the
159 :meth:`.View.execute` method, and calling functions can be done via the
159 :meth:`.View.execute` method, and calling functions can be done via the
160 :meth:`.View.apply` method.
160 :meth:`.View.apply` method.
161
161
162 apply
162 apply
163 -----
163 -----
164
164
165 The main method for doing remote execution (in fact, all methods that
165 The main method for doing remote execution (in fact, all methods that
166 communicate with the engines are built on top of it), is :meth:`View.apply`.
166 communicate with the engines are built on top of it), is :meth:`View.apply`.
167
167
168 We strive to provide the cleanest interface we can, so `apply` has the following
168 We strive to provide the cleanest interface we can, so `apply` has the following
169 signature:
169 signature:
170
170
171 .. sourcecode:: python
171 .. sourcecode:: python
172
172
173 view.apply(f, *args, **kwargs)
173 view.apply(f, *args, **kwargs)
174
174
175 There are various ways to call functions with IPython, and these flags are set as
175 There are various ways to call functions with IPython, and these flags are set as
176 attributes of the View. The ``DirectView`` has just two of these flags:
176 attributes of the View. The ``DirectView`` has just two of these flags:
177
177
178 dv.block : bool
178 dv.block : bool
179 whether to wait for the result, or return an :class:`AsyncResult` object
179 whether to wait for the result, or return an :class:`AsyncResult` object
180 immediately
180 immediately
181 dv.track : bool
181 dv.track : bool
182 whether to instruct pyzmq to track when
182 whether to instruct pyzmq to track when
183 This is primarily useful for non-copying sends of numpy arrays that you plan to
183 This is primarily useful for non-copying sends of numpy arrays that you plan to
184 edit in-place. You need to know when it becomes safe to edit the buffer
184 edit in-place. You need to know when it becomes safe to edit the buffer
185 without corrupting the message.
185 without corrupting the message.
186
186
187
187
188 Creating a view is simple: index-access on a client creates a :class:`.DirectView`.
188 Creating a view is simple: index-access on a client creates a :class:`.DirectView`.
189
189
190 .. sourcecode:: ipython
190 .. sourcecode:: ipython
191
191
192 In [4]: view = rc[1:3]
192 In [4]: view = rc[1:3]
193 Out[4]: <DirectView [1, 2]>
193 Out[4]: <DirectView [1, 2]>
194
194
195 In [5]: view.apply<tab>
195 In [5]: view.apply<tab>
196 view.apply view.apply_async view.apply_sync
196 view.apply view.apply_async view.apply_sync
197
197
198 For convenience, you can set block temporarily for a single call with the extra sync/async methods.
198 For convenience, you can set block temporarily for a single call with the extra sync/async methods.
199
199
200 Blocking execution
200 Blocking execution
201 ------------------
201 ------------------
202
202
203 In blocking mode, the :class:`.DirectView` object (called ``dview`` in
203 In blocking mode, the :class:`.DirectView` object (called ``dview`` in
204 these examples) submits the command to the controller, which places the
204 these examples) submits the command to the controller, which places the
205 command in the engines' queues for execution. The :meth:`apply` call then
205 command in the engines' queues for execution. The :meth:`apply` call then
206 blocks until the engines are done executing the command:
206 blocks until the engines are done executing the command:
207
207
208 .. sourcecode:: ipython
208 .. sourcecode:: ipython
209
209
210 In [2]: dview = rc[:] # A DirectView of all engines
210 In [2]: dview = rc[:] # A DirectView of all engines
211 In [3]: dview.block=True
211 In [3]: dview.block=True
212 In [4]: dview['a'] = 5
212 In [4]: dview['a'] = 5
213
213
214 In [5]: dview['b'] = 10
214 In [5]: dview['b'] = 10
215
215
216 In [6]: dview.apply(lambda x: a+b+x, 27)
216 In [6]: dview.apply(lambda x: a+b+x, 27)
217 Out[6]: [42, 42, 42, 42]
217 Out[6]: [42, 42, 42, 42]
218
218
219 You can also select blocking execution on a call-by-call basis with the :meth:`apply_sync`
219 You can also select blocking execution on a call-by-call basis with the :meth:`apply_sync`
220 method:
220 method:
221
221
222 In [7]: dview.block=False
222 In [7]: dview.block=False
223
223
224 In [8]: dview.apply_sync(lambda x: a+b+x, 27)
224 In [8]: dview.apply_sync(lambda x: a+b+x, 27)
225 Out[8]: [42, 42, 42, 42]
225 Out[8]: [42, 42, 42, 42]
226
226
227 Python commands can be executed as strings on specific engines by using a View's ``execute``
227 Python commands can be executed as strings on specific engines by using a View's ``execute``
228 method:
228 method:
229
229
230 .. sourcecode:: ipython
230 .. sourcecode:: ipython
231
231
232 In [6]: rc[::2].execute('c=a+b')
232 In [6]: rc[::2].execute('c=a+b')
233
233
234 In [7]: rc[1::2].execute('c=a-b')
234 In [7]: rc[1::2].execute('c=a-b')
235
235
236 In [8]: dview['c'] # shorthand for dview.pull('c', block=True)
236 In [8]: dview['c'] # shorthand for dview.pull('c', block=True)
237 Out[8]: [15, -5, 15, -5]
237 Out[8]: [15, -5, 15, -5]
238
238
239
239
240 Non-blocking execution
240 Non-blocking execution
241 ----------------------
241 ----------------------
242
242
243 In non-blocking mode, :meth:`apply` submits the command to be executed and
243 In non-blocking mode, :meth:`apply` submits the command to be executed and
244 then returns a :class:`AsyncResult` object immediately. The
244 then returns a :class:`AsyncResult` object immediately. The
245 :class:`AsyncResult` object gives you a way of getting a result at a later
245 :class:`AsyncResult` object gives you a way of getting a result at a later
246 time through its :meth:`get` method.
246 time through its :meth:`get` method.
247
247
248 .. Note::
248 .. Note::
249
249
250 The :class:`AsyncResult` object provides a superset of the interface in
250 The :class:`AsyncResult` object provides a superset of the interface in
251 :py:class:`multiprocessing.pool.AsyncResult`. See the
251 :py:class:`multiprocessing.pool.AsyncResult`. See the
252 `official Python documentation <http://docs.python.org/library/multiprocessing#multiprocessing.pool.AsyncResult>`_
252 `official Python documentation <http://docs.python.org/library/multiprocessing#multiprocessing.pool.AsyncResult>`_
253 for more.
253 for more.
254
254
255
255
256 This allows you to quickly submit long running commands without blocking your
256 This allows you to quickly submit long running commands without blocking your
257 local Python/IPython session:
257 local Python/IPython session:
258
258
259 .. sourcecode:: ipython
259 .. sourcecode:: ipython
260
260
261 # define our function
261 # define our function
262 In [6]: def wait(t):
262 In [6]: def wait(t):
263 ...: import time
263 ...: import time
264 ...: tic = time.time()
264 ...: tic = time.time()
265 ...: time.sleep(t)
265 ...: time.sleep(t)
266 ...: return time.time()-tic
266 ...: return time.time()-tic
267
267
268 # In non-blocking mode
268 # In non-blocking mode
269 In [7]: ar = dview.apply_async(wait, 2)
269 In [7]: ar = dview.apply_async(wait, 2)
270
270
271 # Now block for the result
271 # Now block for the result
272 In [8]: ar.get()
272 In [8]: ar.get()
273 Out[8]: [2.0006198883056641, 1.9997570514678955, 1.9996809959411621, 2.0003249645233154]
273 Out[8]: [2.0006198883056641, 1.9997570514678955, 1.9996809959411621, 2.0003249645233154]
274
274
275 # Again in non-blocking mode
275 # Again in non-blocking mode
276 In [9]: ar = dview.apply_async(wait, 10)
276 In [9]: ar = dview.apply_async(wait, 10)
277
277
278 # Poll to see if the result is ready
278 # Poll to see if the result is ready
279 In [10]: ar.ready()
279 In [10]: ar.ready()
280 Out[10]: False
280 Out[10]: False
281
281
282 # ask for the result, but wait a maximum of 1 second:
282 # ask for the result, but wait a maximum of 1 second:
283 In [45]: ar.get(1)
283 In [45]: ar.get(1)
284 ---------------------------------------------------------------------------
284 ---------------------------------------------------------------------------
285 TimeoutError Traceback (most recent call last)
285 TimeoutError Traceback (most recent call last)
286 /home/you/<ipython-input-45-7cd858bbb8e0> in <module>()
286 /home/you/<ipython-input-45-7cd858bbb8e0> in <module>()
287 ----> 1 ar.get(1)
287 ----> 1 ar.get(1)
288
288
289 /path/to/site-packages/IPython/parallel/asyncresult.pyc in get(self, timeout)
289 /path/to/site-packages/IPython/parallel/asyncresult.pyc in get(self, timeout)
290 62 raise self._exception
290 62 raise self._exception
291 63 else:
291 63 else:
292 ---> 64 raise error.TimeoutError("Result not ready.")
292 ---> 64 raise error.TimeoutError("Result not ready.")
293 65
293 65
294 66 def ready(self):
294 66 def ready(self):
295
295
296 TimeoutError: Result not ready.
296 TimeoutError: Result not ready.
297
297
298 .. Note::
298 .. Note::
299
299
300 Note the import inside the function. This is a common model, to ensure
300 Note the import inside the function. This is a common model, to ensure
301 that the appropriate modules are imported where the task is run. You can
301 that the appropriate modules are imported where the task is run. You can
302 also manually import modules into the engine(s) namespace(s) via
302 also manually import modules into the engine(s) namespace(s) via
303 :meth:`view.execute('import numpy')`.
303 :meth:`view.execute('import numpy')`.
304
304
305 Often, it is desirable to wait until a set of :class:`AsyncResult` objects
305 Often, it is desirable to wait until a set of :class:`AsyncResult` objects
306 are done. For this, there is a the method :meth:`wait`. This method takes a
306 are done. For this, there is a the method :meth:`wait`. This method takes a
307 tuple of :class:`AsyncResult` objects (or `msg_ids` or indices to the client's History),
307 tuple of :class:`AsyncResult` objects (or `msg_ids` or indices to the client's History),
308 and blocks until all of the associated results are ready:
308 and blocks until all of the associated results are ready:
309
309
310 .. sourcecode:: ipython
310 .. sourcecode:: ipython
311
311
312 In [72]: dview.block=False
312 In [72]: dview.block=False
313
313
314 # A trivial list of AsyncResults objects
314 # A trivial list of AsyncResults objects
315 In [73]: pr_list = [dview.apply_async(wait, 3) for i in range(10)]
315 In [73]: pr_list = [dview.apply_async(wait, 3) for i in range(10)]
316
316
317 # Wait until all of them are done
317 # Wait until all of them are done
318 In [74]: dview.wait(pr_list)
318 In [74]: dview.wait(pr_list)
319
319
320 # Then, their results are ready using get() or the `.r` attribute
320 # Then, their results are ready using get() or the `.r` attribute
321 In [75]: pr_list[0].get()
321 In [75]: pr_list[0].get()
322 Out[75]: [2.9982571601867676, 2.9982588291168213, 2.9987530708312988, 2.9990990161895752]
322 Out[75]: [2.9982571601867676, 2.9982588291168213, 2.9987530708312988, 2.9990990161895752]
323
323
324
324
325
325
326 The ``block`` and ``targets`` keyword arguments and attributes
326 The ``block`` and ``targets`` keyword arguments and attributes
327 --------------------------------------------------------------
327 --------------------------------------------------------------
328
328
329 Most DirectView methods (excluding :meth:`apply` and :meth:`map`) accept ``block`` and
329 Most DirectView methods (excluding :meth:`apply` and :meth:`map`) accept ``block`` and
330 ``targets`` as keyword arguments. As we have seen above, these keyword arguments control the
330 ``targets`` as keyword arguments. As we have seen above, these keyword arguments control the
331 blocking mode and which engines the command is applied to. The :class:`View` class also has
331 blocking mode and which engines the command is applied to. The :class:`View` class also has
332 :attr:`block` and :attr:`targets` attributes that control the default behavior when the keyword
332 :attr:`block` and :attr:`targets` attributes that control the default behavior when the keyword
333 arguments are not provided. Thus the following logic is used for :attr:`block` and :attr:`targets`:
333 arguments are not provided. Thus the following logic is used for :attr:`block` and :attr:`targets`:
334
334
335 * If no keyword argument is provided, the instance attributes are used.
335 * If no keyword argument is provided, the instance attributes are used.
336 * Keyword argument, if provided override the instance attributes for
336 * Keyword argument, if provided override the instance attributes for
337 the duration of a single call.
337 the duration of a single call.
338
338
339 The following examples demonstrate how to use the instance attributes:
339 The following examples demonstrate how to use the instance attributes:
340
340
341 .. sourcecode:: ipython
341 .. sourcecode:: ipython
342
342
343 In [16]: dview.targets = [0,2]
343 In [16]: dview.targets = [0,2]
344
344
345 In [17]: dview.block = False
345 In [17]: dview.block = False
346
346
347 In [18]: ar = dview.apply(lambda : 10)
347 In [18]: ar = dview.apply(lambda : 10)
348
348
349 In [19]: ar.get()
349 In [19]: ar.get()
350 Out[19]: [10, 10]
350 Out[19]: [10, 10]
351
351
352 In [16]: dview.targets = v.client.ids # all engines (4)
352 In [16]: dview.targets = v.client.ids # all engines (4)
353
353
354 In [21]: dview.block = True
354 In [21]: dview.block = True
355
355
356 In [22]: dview.apply(lambda : 42)
356 In [22]: dview.apply(lambda : 42)
357 Out[22]: [42, 42, 42, 42]
357 Out[22]: [42, 42, 42, 42]
358
358
359 The :attr:`block` and :attr:`targets` instance attributes of the
359 The :attr:`block` and :attr:`targets` instance attributes of the
360 :class:`.DirectView` also determine the behavior of the parallel magic commands.
360 :class:`.DirectView` also determine the behavior of the parallel magic commands.
361
361
362 Parallel magic commands
362 Parallel magic commands
363 -----------------------
363 -----------------------
364
364
365 .. warning::
365 .. warning::
366
366
367 The magics have not been changed to work with the zeromq system. The
367 The magics have not been changed to work with the zeromq system. The
368 magics do work, but *do not* print stdin/out like they used to in IPython.kernel.
368 magics do work, but *do not* print stdin/out like they used to in IPython.kernel.
369
369
370 We provide a few IPython magic commands (``%px``, ``%autopx`` and ``%result``)
370 We provide a few IPython magic commands (``%px``, ``%autopx`` and ``%result``)
371 that make it more pleasant to execute Python commands on the engines
371 that make it more pleasant to execute Python commands on the engines
372 interactively. These are simply shortcuts to :meth:`execute` and
372 interactively. These are simply shortcuts to :meth:`execute` and
373 :meth:`get_result` of the :class:`DirectView`. The ``%px`` magic executes a single
373 :meth:`get_result` of the :class:`DirectView`. The ``%px`` magic executes a single
374 Python command on the engines specified by the :attr:`targets` attribute of the
374 Python command on the engines specified by the :attr:`targets` attribute of the
375 :class:`DirectView` instance:
375 :class:`DirectView` instance:
376
376
377 .. sourcecode:: ipython
377 .. sourcecode:: ipython
378
378
379 # load the parallel magic extension:
379 # load the parallel magic extension:
380 In [21]: %load_ext parallelmagic
380 In [21]: %load_ext parallelmagic
381
381
382 # Create a DirectView for all targets
382 # Create a DirectView for all targets
383 In [22]: dv = rc[:]
383 In [22]: dv = rc[:]
384
384
385 # Make this DirectView active for parallel magic commands
385 # Make this DirectView active for parallel magic commands
386 In [23]: dv.activate()
386 In [23]: dv.activate()
387
387
388 In [24]: dv.block=True
388 In [24]: dv.block=True
389
389
390 In [25]: import numpy
390 In [25]: import numpy
391
391
392 In [26]: %px import numpy
392 In [26]: %px import numpy
393 Parallel execution on engines: [0, 1, 2, 3]
393 Parallel execution on engines: [0, 1, 2, 3]
394
394
395 In [27]: %px a = numpy.random.rand(2,2)
395 In [27]: %px a = numpy.random.rand(2,2)
396 Parallel execution on engines: [0, 1, 2, 3]
396 Parallel execution on engines: [0, 1, 2, 3]
397
397
398 In [28]: %px ev = numpy.linalg.eigvals(a)
398 In [28]: %px ev = numpy.linalg.eigvals(a)
399 Parallel execution on engines: [0, 1, 2, 3]
399 Parallel execution on engines: [0, 1, 2, 3]
400
400
401 In [28]: dv['ev']
401 In [28]: dv['ev']
402 Out[28]: [ array([ 1.09522024, -0.09645227]),
402 Out[28]: [ array([ 1.09522024, -0.09645227]),
403 array([ 1.21435496, -0.35546712]),
403 array([ 1.21435496, -0.35546712]),
404 array([ 0.72180653, 0.07133042]),
404 array([ 0.72180653, 0.07133042]),
405 array([ 1.46384341e+00, 1.04353244e-04])
405 array([ 1.46384341e+00, 1.04353244e-04])
406 ]
406 ]
407
407
408 The ``%result`` magic gets the most recent result, or takes an argument
408 The ``%result`` magic gets the most recent result, or takes an argument
409 specifying the index of the result to be requested. It is simply a shortcut to the
409 specifying the index of the result to be requested. It is simply a shortcut to the
410 :meth:`get_result` method:
410 :meth:`get_result` method:
411
411
412 .. sourcecode:: ipython
412 .. sourcecode:: ipython
413
413
414 In [29]: dv.apply_async(lambda : ev)
414 In [29]: dv.apply_async(lambda : ev)
415
415
416 In [30]: %result
416 In [30]: %result
417 Out[30]: [ [ 1.28167017 0.14197338],
417 Out[30]: [ [ 1.28167017 0.14197338],
418 [-0.14093616 1.27877273],
418 [-0.14093616 1.27877273],
419 [-0.37023573 1.06779409],
419 [-0.37023573 1.06779409],
420 [ 0.83664764 -0.25602658] ]
420 [ 0.83664764 -0.25602658] ]
421
421
422 The ``%autopx`` magic switches to a mode where everything you type is executed
422 The ``%autopx`` magic switches to a mode where everything you type is executed
423 on the engines given by the :attr:`targets` attribute:
423 on the engines given by the :attr:`targets` attribute:
424
424
425 .. sourcecode:: ipython
425 .. sourcecode:: ipython
426
426
427 In [30]: dv.block=False
427 In [30]: dv.block=False
428
428
429 In [31]: %autopx
429 In [31]: %autopx
430 Auto Parallel Enabled
430 Auto Parallel Enabled
431 Type %autopx to disable
431 Type %autopx to disable
432
432
433 In [32]: max_evals = []
433 In [32]: max_evals = []
434 <IPython.parallel.AsyncResult object at 0x17b8a70>
434 <IPython.parallel.AsyncResult object at 0x17b8a70>
435
435
436 In [33]: for i in range(100):
436 In [33]: for i in range(100):
437 ....: a = numpy.random.rand(10,10)
437 ....: a = numpy.random.rand(10,10)
438 ....: a = a+a.transpose()
438 ....: a = a+a.transpose()
439 ....: evals = numpy.linalg.eigvals(a)
439 ....: evals = numpy.linalg.eigvals(a)
440 ....: max_evals.append(evals[0].real)
440 ....: max_evals.append(evals[0].real)
441 ....:
441 ....:
442 ....:
442 ....:
443 <IPython.parallel.AsyncResult object at 0x17af8f0>
443 <IPython.parallel.AsyncResult object at 0x17af8f0>
444
444
445 In [34]: %autopx
445 In [34]: %autopx
446 Auto Parallel Disabled
446 Auto Parallel Disabled
447
447
448 In [35]: dv.block=True
448 In [35]: dv.block=True
449
449
450 In [36]: px ans= "Average max eigenvalue is: %f"%(sum(max_evals)/len(max_evals))
450 In [36]: px ans= "Average max eigenvalue is: %f"%(sum(max_evals)/len(max_evals))
451 Parallel execution on engines: [0, 1, 2, 3]
451 Parallel execution on engines: [0, 1, 2, 3]
452
452
453 In [37]: dv['ans']
453 In [37]: dv['ans']
454 Out[37]: [ 'Average max eigenvalue is: 10.1387247332',
454 Out[37]: [ 'Average max eigenvalue is: 10.1387247332',
455 'Average max eigenvalue is: 10.2076902286',
455 'Average max eigenvalue is: 10.2076902286',
456 'Average max eigenvalue is: 10.1891484655',
456 'Average max eigenvalue is: 10.1891484655',
457 'Average max eigenvalue is: 10.1158837784',]
457 'Average max eigenvalue is: 10.1158837784',]
458
458
459
459
460 Moving Python objects around
460 Moving Python objects around
461 ============================
461 ============================
462
462
463 In addition to calling functions and executing code on engines, you can
463 In addition to calling functions and executing code on engines, you can
464 transfer Python objects to and from your IPython session and the engines. In
464 transfer Python objects to and from your IPython session and the engines. In
465 IPython, these operations are called :meth:`push` (sending an object to the
465 IPython, these operations are called :meth:`push` (sending an object to the
466 engines) and :meth:`pull` (getting an object from the engines).
466 engines) and :meth:`pull` (getting an object from the engines).
467
467
468 Basic push and pull
468 Basic push and pull
469 -------------------
469 -------------------
470
470
471 Here are some examples of how you use :meth:`push` and :meth:`pull`:
471 Here are some examples of how you use :meth:`push` and :meth:`pull`:
472
472
473 .. sourcecode:: ipython
473 .. sourcecode:: ipython
474
474
475 In [38]: dview.push(dict(a=1.03234,b=3453))
475 In [38]: dview.push(dict(a=1.03234,b=3453))
476 Out[38]: [None,None,None,None]
476 Out[38]: [None,None,None,None]
477
477
478 In [39]: dview.pull('a')
478 In [39]: dview.pull('a')
479 Out[39]: [ 1.03234, 1.03234, 1.03234, 1.03234]
479 Out[39]: [ 1.03234, 1.03234, 1.03234, 1.03234]
480
480
481 In [40]: dview.pull('b', targets=0)
481 In [40]: dview.pull('b', targets=0)
482 Out[40]: 3453
482 Out[40]: 3453
483
483
484 In [41]: dview.pull(('a','b'))
484 In [41]: dview.pull(('a','b'))
485 Out[41]: [ [1.03234, 3453], [1.03234, 3453], [1.03234, 3453], [1.03234, 3453] ]
485 Out[41]: [ [1.03234, 3453], [1.03234, 3453], [1.03234, 3453], [1.03234, 3453] ]
486
486
487 In [43]: dview.push(dict(c='speed'))
487 In [43]: dview.push(dict(c='speed'))
488 Out[43]: [None,None,None,None]
488 Out[43]: [None,None,None,None]
489
489
490 In non-blocking mode :meth:`push` and :meth:`pull` also return
490 In non-blocking mode :meth:`push` and :meth:`pull` also return
491 :class:`AsyncResult` objects:
491 :class:`AsyncResult` objects:
492
492
493 .. sourcecode:: ipython
493 .. sourcecode:: ipython
494
494
495 In [48]: ar = dview.pull('a', block=False)
495 In [48]: ar = dview.pull('a', block=False)
496
496
497 In [49]: ar.get()
497 In [49]: ar.get()
498 Out[49]: [1.03234, 1.03234, 1.03234, 1.03234]
498 Out[49]: [1.03234, 1.03234, 1.03234, 1.03234]
499
499
500
500
501 Dictionary interface
501 Dictionary interface
502 --------------------
502 --------------------
503
503
504 Since a Python namespace is just a :class:`dict`, :class:`DirectView` objects provide
504 Since a Python namespace is just a :class:`dict`, :class:`DirectView` objects provide
505 dictionary-style access by key and methods such as :meth:`get` and
505 dictionary-style access by key and methods such as :meth:`get` and
506 :meth:`update` for convenience. This make the remote namespaces of the engines
506 :meth:`update` for convenience. This make the remote namespaces of the engines
507 appear as a local dictionary. Underneath, these methods call :meth:`apply`:
507 appear as a local dictionary. Underneath, these methods call :meth:`apply`:
508
508
509 .. sourcecode:: ipython
509 .. sourcecode:: ipython
510
510
511 In [51]: dview['a']=['foo','bar']
511 In [51]: dview['a']=['foo','bar']
512
512
513 In [52]: dview['a']
513 In [52]: dview['a']
514 Out[52]: [ ['foo', 'bar'], ['foo', 'bar'], ['foo', 'bar'], ['foo', 'bar'] ]
514 Out[52]: [ ['foo', 'bar'], ['foo', 'bar'], ['foo', 'bar'], ['foo', 'bar'] ]
515
515
516 Scatter and gather
516 Scatter and gather
517 ------------------
517 ------------------
518
518
519 Sometimes it is useful to partition a sequence and push the partitions to
519 Sometimes it is useful to partition a sequence and push the partitions to
520 different engines. In MPI language, this is know as scatter/gather and we
520 different engines. In MPI language, this is know as scatter/gather and we
521 follow that terminology. However, it is important to remember that in
521 follow that terminology. However, it is important to remember that in
522 IPython's :class:`Client` class, :meth:`scatter` is from the
522 IPython's :class:`Client` class, :meth:`scatter` is from the
523 interactive IPython session to the engines and :meth:`gather` is from the
523 interactive IPython session to the engines and :meth:`gather` is from the
524 engines back to the interactive IPython session. For scatter/gather operations
524 engines back to the interactive IPython session. For scatter/gather operations
525 between engines, MPI should be used:
525 between engines, MPI should be used:
526
526
527 .. sourcecode:: ipython
527 .. sourcecode:: ipython
528
528
529 In [58]: dview.scatter('a',range(16))
529 In [58]: dview.scatter('a',range(16))
530 Out[58]: [None,None,None,None]
530 Out[58]: [None,None,None,None]
531
531
532 In [59]: dview['a']
532 In [59]: dview['a']
533 Out[59]: [ [0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15] ]
533 Out[59]: [ [0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15] ]
534
534
535 In [60]: dview.gather('a')
535 In [60]: dview.gather('a')
536 Out[60]: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
536 Out[60]: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
537
537
538 Other things to look at
538 Other things to look at
539 =======================
539 =======================
540
540
541 How to do parallel list comprehensions
541 How to do parallel list comprehensions
542 --------------------------------------
542 --------------------------------------
543
543
544 In many cases list comprehensions are nicer than using the map function. While
544 In many cases list comprehensions are nicer than using the map function. While
545 we don't have fully parallel list comprehensions, it is simple to get the
545 we don't have fully parallel list comprehensions, it is simple to get the
546 basic effect using :meth:`scatter` and :meth:`gather`:
546 basic effect using :meth:`scatter` and :meth:`gather`:
547
547
548 .. sourcecode:: ipython
548 .. sourcecode:: ipython
549
549
550 In [66]: dview.scatter('x',range(64))
550 In [66]: dview.scatter('x',range(64))
551
551
552 In [67]: %px y = [i**10 for i in x]
552 In [67]: %px y = [i**10 for i in x]
553 Parallel execution on engines: [0, 1, 2, 3]
553 Parallel execution on engines: [0, 1, 2, 3]
554 Out[67]:
554 Out[67]:
555
555
556 In [68]: y = dview.gather('y')
556 In [68]: y = dview.gather('y')
557
557
558 In [69]: print y
558 In [69]: print y
559 [0, 1, 1024, 59049, 1048576, 9765625, 60466176, 282475249, 1073741824,...]
559 [0, 1, 1024, 59049, 1048576, 9765625, 60466176, 282475249, 1073741824,...]
560
560
561 Remote imports
561 Remote imports
562 --------------
562 --------------
563
563
564 Sometimes you will want to import packages both in your interactive session
564 Sometimes you will want to import packages both in your interactive session
565 and on your remote engines. This can be done with the :class:`ContextManager`
565 and on your remote engines. This can be done with the :class:`ContextManager`
566 created by a DirectView's :meth:`sync_imports` method:
566 created by a DirectView's :meth:`sync_imports` method:
567
567
568 .. sourcecode:: ipython
568 .. sourcecode:: ipython
569
569
570 In [69]: with dview.sync_imports():
570 In [69]: with dview.sync_imports():
571 ...: import numpy
571 ...: import numpy
572 importing numpy on engine(s)
572 importing numpy on engine(s)
573
573
574 Any imports made inside the block will also be performed on the view's engines.
574 Any imports made inside the block will also be performed on the view's engines.
575 sync_imports also takes a `local` boolean flag that defaults to True, which specifies
575 sync_imports also takes a `local` boolean flag that defaults to True, which specifies
576 whether the local imports should also be performed. However, support for `local=False`
576 whether the local imports should also be performed. However, support for `local=False`
577 has not been implemented, so only packages that can be imported locally will work
577 has not been implemented, so only packages that can be imported locally will work
578 this way.
578 this way.
579
579
580 You can also specify imports via the ``@require`` decorator. This is a decorator
580 You can also specify imports via the ``@require`` decorator. This is a decorator
581 designed for use in Dependencies, but can be used to handle remote imports as well.
581 designed for use in Dependencies, but can be used to handle remote imports as well.
582 Modules or module names passed to ``@require`` will be imported before the decorated
582 Modules or module names passed to ``@require`` will be imported before the decorated
583 function is called. If they cannot be imported, the decorated function will never
583 function is called. If they cannot be imported, the decorated function will never
584 execution, and will fail with an UnmetDependencyError.
584 execution, and will fail with an UnmetDependencyError.
585
585
586 .. sourcecode:: ipython
586 .. sourcecode:: ipython
587
587
588 In [69]: from IPython.parallel import require
588 In [69]: from IPython.parallel import require
589
589
590 In [70]: @requre('re'):
590 In [70]: @requre('re'):
591 ...: def findall(pat, x):
591 ...: def findall(pat, x):
592 ...: # re is guaranteed to be available
592 ...: # re is guaranteed to be available
593 ...: return re.findall(pat, x)
593 ...: return re.findall(pat, x)
594
594
595 # you can also pass modules themselves, that you already have locally:
595 # you can also pass modules themselves, that you already have locally:
596 In [71]: @requre(time):
596 In [71]: @requre(time):
597 ...: def wait(t):
597 ...: def wait(t):
598 ...: time.sleep(t)
598 ...: time.sleep(t)
599 ...: return t
599 ...: return t
600
600
601
601
602 Parallel exceptions
602 Parallel exceptions
603 -------------------
603 -------------------
604
604
605 In the multiengine interface, parallel commands can raise Python exceptions,
605 In the multiengine interface, parallel commands can raise Python exceptions,
606 just like serial commands. But, it is a little subtle, because a single
606 just like serial commands. But, it is a little subtle, because a single
607 parallel command can actually raise multiple exceptions (one for each engine
607 parallel command can actually raise multiple exceptions (one for each engine
608 the command was run on). To express this idea, we have a
608 the command was run on). To express this idea, we have a
609 :exc:`CompositeError` exception class that will be raised in most cases. The
609 :exc:`CompositeError` exception class that will be raised in most cases. The
610 :exc:`CompositeError` class is a special type of exception that wraps one or
610 :exc:`CompositeError` class is a special type of exception that wraps one or
611 more other types of exceptions. Here is how it works:
611 more other types of exceptions. Here is how it works:
612
612
613 .. sourcecode:: ipython
613 .. sourcecode:: ipython
614
614
615 In [76]: dview.block=True
615 In [76]: dview.block=True
616
616
617 In [77]: dview.execute('1/0')
617 In [77]: dview.execute('1/0')
618 ---------------------------------------------------------------------------
618 ---------------------------------------------------------------------------
619 CompositeError Traceback (most recent call last)
619 CompositeError Traceback (most recent call last)
620 /home/you/<ipython-input-10-15c2c22dec39> in <module>()
620 /home/you/<ipython-input-10-15c2c22dec39> in <module>()
621 ----> 1 dview.execute('1/0', block=True)
621 ----> 1 dview.execute('1/0', block=True)
622
622
623 /path/to/site-packages/IPython/parallel/view.py in execute(self, code, block)
623 /path/to/site-packages/IPython/parallel/view.py in execute(self, code, block)
624 460 default: self.block
624 460 default: self.block
625 461 """
625 461 """
626 --> 462 return self.apply_with_flags(util._execute, args=(code,), block=block)
626 --> 462 return self.apply_with_flags(util._execute, args=(code,), block=block)
627 463
627 463
628 464 def run(self, filename, block=None):
628 464 def run(self, filename, block=None):
629
629
630 /home/you/<string> in apply_with_flags(self, f, args, kwargs, block, track)
630 /home/you/<string> in apply_with_flags(self, f, args, kwargs, block, track)
631
631
632 /path/to/site-packages/IPython/parallel/view.py in sync_results(f, self, *args, **kwargs)
632 /path/to/site-packages/IPython/parallel/view.py in sync_results(f, self, *args, **kwargs)
633 46 def sync_results(f, self, *args, **kwargs):
633 46 def sync_results(f, self, *args, **kwargs):
634 47 """sync relevant results from self.client to our results attribute."""
634 47 """sync relevant results from self.client to our results attribute."""
635 ---> 48 ret = f(self, *args, **kwargs)
635 ---> 48 ret = f(self, *args, **kwargs)
636 49 delta = self.outstanding.difference(self.client.outstanding)
636 49 delta = self.outstanding.difference(self.client.outstanding)
637 50 completed = self.outstanding.intersection(delta)
637 50 completed = self.outstanding.intersection(delta)
638
638
639 /home/you/<string> in apply_with_flags(self, f, args, kwargs, block, track)
639 /home/you/<string> in apply_with_flags(self, f, args, kwargs, block, track)
640
640
641 /path/to/site-packages/IPython/parallel/view.py in save_ids(f, self, *args, **kwargs)
641 /path/to/site-packages/IPython/parallel/view.py in save_ids(f, self, *args, **kwargs)
642 35 n_previous = len(self.client.history)
642 35 n_previous = len(self.client.history)
643 36 try:
643 36 try:
644 ---> 37 ret = f(self, *args, **kwargs)
644 ---> 37 ret = f(self, *args, **kwargs)
645 38 finally:
645 38 finally:
646 39 nmsgs = len(self.client.history) - n_previous
646 39 nmsgs = len(self.client.history) - n_previous
647
647
648 /path/to/site-packages/IPython/parallel/view.py in apply_with_flags(self, f, args, kwargs, block, track)
648 /path/to/site-packages/IPython/parallel/view.py in apply_with_flags(self, f, args, kwargs, block, track)
649 398 if block:
649 398 if block:
650 399 try:
650 399 try:
651 --> 400 return ar.get()
651 --> 400 return ar.get()
652 401 except KeyboardInterrupt:
652 401 except KeyboardInterrupt:
653 402 pass
653 402 pass
654
654
655 /path/to/site-packages/IPython/parallel/asyncresult.pyc in get(self, timeout)
655 /path/to/site-packages/IPython/parallel/asyncresult.pyc in get(self, timeout)
656 87 return self._result
656 87 return self._result
657 88 else:
657 88 else:
658 ---> 89 raise self._exception
658 ---> 89 raise self._exception
659 90 else:
659 90 else:
660 91 raise error.TimeoutError("Result not ready.")
660 91 raise error.TimeoutError("Result not ready.")
661
661
662 CompositeError: one or more exceptions from call to method: _execute
662 CompositeError: one or more exceptions from call to method: _execute
663 [0:apply]: ZeroDivisionError: integer division or modulo by zero
663 [0:apply]: ZeroDivisionError: integer division or modulo by zero
664 [1:apply]: ZeroDivisionError: integer division or modulo by zero
664 [1:apply]: ZeroDivisionError: integer division or modulo by zero
665 [2:apply]: ZeroDivisionError: integer division or modulo by zero
665 [2:apply]: ZeroDivisionError: integer division or modulo by zero
666 [3:apply]: ZeroDivisionError: integer division or modulo by zero
666 [3:apply]: ZeroDivisionError: integer division or modulo by zero
667
667
668
668
669 Notice how the error message printed when :exc:`CompositeError` is raised has
669 Notice how the error message printed when :exc:`CompositeError` is raised has
670 information about the individual exceptions that were raised on each engine.
670 information about the individual exceptions that were raised on each engine.
671 If you want, you can even raise one of these original exceptions:
671 If you want, you can even raise one of these original exceptions:
672
672
673 .. sourcecode:: ipython
673 .. sourcecode:: ipython
674
674
675 In [80]: try:
675 In [80]: try:
676 ....: dview.execute('1/0')
676 ....: dview.execute('1/0')
677 ....: except client.CompositeError, e:
677 ....: except client.CompositeError, e:
678 ....: e.raise_exception()
678 ....: e.raise_exception()
679 ....:
679 ....:
680 ....:
680 ....:
681 ---------------------------------------------------------------------------
681 ---------------------------------------------------------------------------
682 ZeroDivisionError Traceback (most recent call last)
682 ZeroDivisionError Traceback (most recent call last)
683
683
684 /ipython1-client-r3021/docs/examples/<ipython console> in <module>()
684 /ipython1-client-r3021/docs/examples/<ipython console> in <module>()
685
685
686 /ipython1-client-r3021/ipython1/kernel/error.pyc in raise_exception(self, excid)
686 /ipython1-client-r3021/ipython1/kernel/error.pyc in raise_exception(self, excid)
687 156 raise IndexError("an exception with index %i does not exist"%excid)
687 156 raise IndexError("an exception with index %i does not exist"%excid)
688 157 else:
688 157 else:
689 --> 158 raise et, ev, etb
689 --> 158 raise et, ev, etb
690 159
690 159
691 160 def collect_exceptions(rlist, method):
691 160 def collect_exceptions(rlist, method):
692
692
693 ZeroDivisionError: integer division or modulo by zero
693 ZeroDivisionError: integer division or modulo by zero
694
694
695 If you are working in IPython, you can simple type ``%debug`` after one of
695 If you are working in IPython, you can simple type ``%debug`` after one of
696 these :exc:`CompositeError` exceptions is raised, and inspect the exception
696 these :exc:`CompositeError` exceptions is raised, and inspect the exception
697 instance:
697 instance:
698
698
699 .. sourcecode:: ipython
699 .. sourcecode:: ipython
700
700
701 In [81]: dview.execute('1/0')
701 In [81]: dview.execute('1/0')
702 ---------------------------------------------------------------------------
702 ---------------------------------------------------------------------------
703 CompositeError Traceback (most recent call last)
703 CompositeError Traceback (most recent call last)
704 /home/you/<ipython-input-10-15c2c22dec39> in <module>()
704 /home/you/<ipython-input-10-15c2c22dec39> in <module>()
705 ----> 1 dview.execute('1/0', block=True)
705 ----> 1 dview.execute('1/0', block=True)
706
706
707 /path/to/site-packages/IPython/parallel/view.py in execute(self, code, block)
707 /path/to/site-packages/IPython/parallel/view.py in execute(self, code, block)
708 460 default: self.block
708 460 default: self.block
709 461 """
709 461 """
710 --> 462 return self.apply_with_flags(util._execute, args=(code,), block=block)
710 --> 462 return self.apply_with_flags(util._execute, args=(code,), block=block)
711 463
711 463
712 464 def run(self, filename, block=None):
712 464 def run(self, filename, block=None):
713
713
714 /home/you/<string> in apply_with_flags(self, f, args, kwargs, block, track)
714 /home/you/<string> in apply_with_flags(self, f, args, kwargs, block, track)
715
715
716 /path/to/site-packages/IPython/parallel/view.py in sync_results(f, self, *args, **kwargs)
716 /path/to/site-packages/IPython/parallel/view.py in sync_results(f, self, *args, **kwargs)
717 46 def sync_results(f, self, *args, **kwargs):
717 46 def sync_results(f, self, *args, **kwargs):
718 47 """sync relevant results from self.client to our results attribute."""
718 47 """sync relevant results from self.client to our results attribute."""
719 ---> 48 ret = f(self, *args, **kwargs)
719 ---> 48 ret = f(self, *args, **kwargs)
720 49 delta = self.outstanding.difference(self.client.outstanding)
720 49 delta = self.outstanding.difference(self.client.outstanding)
721 50 completed = self.outstanding.intersection(delta)
721 50 completed = self.outstanding.intersection(delta)
722
722
723 /home/you/<string> in apply_with_flags(self, f, args, kwargs, block, track)
723 /home/you/<string> in apply_with_flags(self, f, args, kwargs, block, track)
724
724
725 /path/to/site-packages/IPython/parallel/view.py in save_ids(f, self, *args, **kwargs)
725 /path/to/site-packages/IPython/parallel/view.py in save_ids(f, self, *args, **kwargs)
726 35 n_previous = len(self.client.history)
726 35 n_previous = len(self.client.history)
727 36 try:
727 36 try:
728 ---> 37 ret = f(self, *args, **kwargs)
728 ---> 37 ret = f(self, *args, **kwargs)
729 38 finally:
729 38 finally:
730 39 nmsgs = len(self.client.history) - n_previous
730 39 nmsgs = len(self.client.history) - n_previous
731
731
732 /path/to/site-packages/IPython/parallel/view.py in apply_with_flags(self, f, args, kwargs, block, track)
732 /path/to/site-packages/IPython/parallel/view.py in apply_with_flags(self, f, args, kwargs, block, track)
733 398 if block:
733 398 if block:
734 399 try:
734 399 try:
735 --> 400 return ar.get()
735 --> 400 return ar.get()
736 401 except KeyboardInterrupt:
736 401 except KeyboardInterrupt:
737 402 pass
737 402 pass
738
738
739 /path/to/site-packages/IPython/parallel/asyncresult.pyc in get(self, timeout)
739 /path/to/site-packages/IPython/parallel/asyncresult.pyc in get(self, timeout)
740 87 return self._result
740 87 return self._result
741 88 else:
741 88 else:
742 ---> 89 raise self._exception
742 ---> 89 raise self._exception
743 90 else:
743 90 else:
744 91 raise error.TimeoutError("Result not ready.")
744 91 raise error.TimeoutError("Result not ready.")
745
745
746 CompositeError: one or more exceptions from call to method: _execute
746 CompositeError: one or more exceptions from call to method: _execute
747 [0:apply]: ZeroDivisionError: integer division or modulo by zero
747 [0:apply]: ZeroDivisionError: integer division or modulo by zero
748 [1:apply]: ZeroDivisionError: integer division or modulo by zero
748 [1:apply]: ZeroDivisionError: integer division or modulo by zero
749 [2:apply]: ZeroDivisionError: integer division or modulo by zero
749 [2:apply]: ZeroDivisionError: integer division or modulo by zero
750 [3:apply]: ZeroDivisionError: integer division or modulo by zero
750 [3:apply]: ZeroDivisionError: integer division or modulo by zero
751
751
752 In [82]: %debug
752 In [82]: %debug
753 > /path/to/site-packages/IPython/parallel/asyncresult.py(80)get()
753 > /path/to/site-packages/IPython/parallel/asyncresult.py(80)get()
754 79 else:
754 79 else:
755 ---> 80 raise self._exception
755 ---> 80 raise self._exception
756 81 else:
756 81 else:
757
757
758
758
759 # With the debugger running, e is the exceptions instance. We can tab complete
759 # With the debugger running, e is the exceptions instance. We can tab complete
760 # on it and see the extra methods that are available.
760 # on it and see the extra methods that are available.
761 ipdb> e.
761 ipdb> e.
762 e.__class__ e.__getitem__ e.__new__ e.__setstate__ e.args
762 e.__class__ e.__getitem__ e.__new__ e.__setstate__ e.args
763 e.__delattr__ e.__getslice__ e.__reduce__ e.__str__ e.elist
763 e.__delattr__ e.__getslice__ e.__reduce__ e.__str__ e.elist
764 e.__dict__ e.__hash__ e.__reduce_ex__ e.__weakref__ e.message
764 e.__dict__ e.__hash__ e.__reduce_ex__ e.__weakref__ e.message
765 e.__doc__ e.__init__ e.__repr__ e._get_engine_str e.print_tracebacks
765 e.__doc__ e.__init__ e.__repr__ e._get_engine_str e.print_tracebacks
766 e.__getattribute__ e.__module__ e.__setattr__ e._get_traceback e.raise_exception
766 e.__getattribute__ e.__module__ e.__setattr__ e._get_traceback e.raise_exception
767 ipdb> e.print_tracebacks()
767 ipdb> e.print_tracebacks()
768 [0:apply]:
768 [0:apply]:
769 Traceback (most recent call last):
769 Traceback (most recent call last):
770 File "/path/to/site-packages/IPython/parallel/streamkernel.py", line 332, in apply_request
770 File "/path/to/site-packages/IPython/parallel/streamkernel.py", line 332, in apply_request
771 exec code in working, working
771 exec code in working, working
772 File "<string>", line 1, in <module>
772 File "<string>", line 1, in <module>
773 File "/path/to/site-packages/IPython/parallel/client.py", line 69, in _execute
773 File "/path/to/site-packages/IPython/parallel/client.py", line 69, in _execute
774 exec code in globals()
774 exec code in globals()
775 File "<string>", line 1, in <module>
775 File "<string>", line 1, in <module>
776 ZeroDivisionError: integer division or modulo by zero
776 ZeroDivisionError: integer division or modulo by zero
777
777
778
778
779 [1:apply]:
779 [1:apply]:
780 Traceback (most recent call last):
780 Traceback (most recent call last):
781 File "/path/to/site-packages/IPython/parallel/streamkernel.py", line 332, in apply_request
781 File "/path/to/site-packages/IPython/parallel/streamkernel.py", line 332, in apply_request
782 exec code in working, working
782 exec code in working, working
783 File "<string>", line 1, in <module>
783 File "<string>", line 1, in <module>
784 File "/path/to/site-packages/IPython/parallel/client.py", line 69, in _execute
784 File "/path/to/site-packages/IPython/parallel/client.py", line 69, in _execute
785 exec code in globals()
785 exec code in globals()
786 File "<string>", line 1, in <module>
786 File "<string>", line 1, in <module>
787 ZeroDivisionError: integer division or modulo by zero
787 ZeroDivisionError: integer division or modulo by zero
788
788
789
789
790 [2:apply]:
790 [2:apply]:
791 Traceback (most recent call last):
791 Traceback (most recent call last):
792 File "/path/to/site-packages/IPython/parallel/streamkernel.py", line 332, in apply_request
792 File "/path/to/site-packages/IPython/parallel/streamkernel.py", line 332, in apply_request
793 exec code in working, working
793 exec code in working, working
794 File "<string>", line 1, in <module>
794 File "<string>", line 1, in <module>
795 File "/path/to/site-packages/IPython/parallel/client.py", line 69, in _execute
795 File "/path/to/site-packages/IPython/parallel/client.py", line 69, in _execute
796 exec code in globals()
796 exec code in globals()
797 File "<string>", line 1, in <module>
797 File "<string>", line 1, in <module>
798 ZeroDivisionError: integer division or modulo by zero
798 ZeroDivisionError: integer division or modulo by zero
799
799
800
800
801 [3:apply]:
801 [3:apply]:
802 Traceback (most recent call last):
802 Traceback (most recent call last):
803 File "/path/to/site-packages/IPython/parallel/streamkernel.py", line 332, in apply_request
803 File "/path/to/site-packages/IPython/parallel/streamkernel.py", line 332, in apply_request
804 exec code in working, working
804 exec code in working, working
805 File "<string>", line 1, in <module>
805 File "<string>", line 1, in <module>
806 File "/path/to/site-packages/IPython/parallel/client.py", line 69, in _execute
806 File "/path/to/site-packages/IPython/parallel/client.py", line 69, in _execute
807 exec code in globals()
807 exec code in globals()
808 File "<string>", line 1, in <module>
808 File "<string>", line 1, in <module>
809 ZeroDivisionError: integer division or modulo by zero
809 ZeroDivisionError: integer division or modulo by zero
810
810
811
811
812 .. note::
812 .. note::
813
813
814 TODO: The above tracebacks are not up to date
814 TODO: The above tracebacks are not up to date
815
815
816
816
817 All of this same error handling magic even works in non-blocking mode:
817 All of this same error handling magic even works in non-blocking mode:
818
818
819 .. sourcecode:: ipython
819 .. sourcecode:: ipython
820
820
821 In [83]: dview.block=False
821 In [83]: dview.block=False
822
822
823 In [84]: ar = dview.execute('1/0')
823 In [84]: ar = dview.execute('1/0')
824
824
825 In [85]: ar.get()
825 In [85]: ar.get()
826 ---------------------------------------------------------------------------
826 ---------------------------------------------------------------------------
827 CompositeError Traceback (most recent call last)
827 CompositeError Traceback (most recent call last)
828 /Users/minrk/<ipython-input-3-8531eb3d26fb> in <module>()
828 /Users/minrk/<ipython-input-3-8531eb3d26fb> in <module>()
829 ----> 1 ar.get()
829 ----> 1 ar.get()
830
830
831 /path/to/site-packages/IPython/parallel/asyncresult.pyc in get(self, timeout)
831 /path/to/site-packages/IPython/parallel/asyncresult.pyc in get(self, timeout)
832 78 return self._result
832 78 return self._result
833 79 else:
833 79 else:
834 ---> 80 raise self._exception
834 ---> 80 raise self._exception
835 81 else:
835 81 else:
836 82 raise error.TimeoutError("Result not ready.")
836 82 raise error.TimeoutError("Result not ready.")
837
837
838 CompositeError: one or more exceptions from call to method: _execute
838 CompositeError: one or more exceptions from call to method: _execute
839 [0:apply]: ZeroDivisionError: integer division or modulo by zero
839 [0:apply]: ZeroDivisionError: integer division or modulo by zero
840 [1:apply]: ZeroDivisionError: integer division or modulo by zero
840 [1:apply]: ZeroDivisionError: integer division or modulo by zero
841 [2:apply]: ZeroDivisionError: integer division or modulo by zero
841 [2:apply]: ZeroDivisionError: integer division or modulo by zero
842 [3:apply]: ZeroDivisionError: integer division or modulo by zero
842 [3:apply]: ZeroDivisionError: integer division or modulo by zero
843
843
@@ -1,506 +1,507 b''
1 .. _parallel_process:
1 .. _parallel_process:
2
2
3 ===========================================
3 ===========================================
4 Starting the IPython controller and engines
4 Starting the IPython controller and engines
5 ===========================================
5 ===========================================
6
6
7 To use IPython for parallel computing, you need to start one instance of
7 To use IPython for parallel computing, you need to start one instance of
8 the controller and one or more instances of the engine. The controller
8 the controller and one or more instances of the engine. The controller
9 and each engine can run on different machines or on the same machine.
9 and each engine can run on different machines or on the same machine.
10 Because of this, there are many different possibilities.
10 Because of this, there are many different possibilities.
11
11
12 Broadly speaking, there are two ways of going about starting a controller and engines:
12 Broadly speaking, there are two ways of going about starting a controller and engines:
13
13
14 * In an automated manner using the :command:`ipcluster` command.
14 * In an automated manner using the :command:`ipcluster` command.
15 * In a more manual way using the :command:`ipcontroller` and
15 * In a more manual way using the :command:`ipcontroller` and
16 :command:`ipengine` commands.
16 :command:`ipengine` commands.
17
17
18 This document describes both of these methods. We recommend that new users
18 This document describes both of these methods. We recommend that new users
19 start with the :command:`ipcluster` command as it simplifies many common usage
19 start with the :command:`ipcluster` command as it simplifies many common usage
20 cases.
20 cases.
21
21
22 General considerations
22 General considerations
23 ======================
23 ======================
24
24
25 Before delving into the details about how you can start a controller and
25 Before delving into the details about how you can start a controller and
26 engines using the various methods, we outline some of the general issues that
26 engines using the various methods, we outline some of the general issues that
27 come up when starting the controller and engines. These things come up no
27 come up when starting the controller and engines. These things come up no
28 matter which method you use to start your IPython cluster.
28 matter which method you use to start your IPython cluster.
29
29
30 Let's say that you want to start the controller on ``host0`` and engines on
30 Let's say that you want to start the controller on ``host0`` and engines on
31 hosts ``host1``-``hostn``. The following steps are then required:
31 hosts ``host1``-``hostn``. The following steps are then required:
32
32
33 1. Start the controller on ``host0`` by running :command:`ipcontroller` on
33 1. Start the controller on ``host0`` by running :command:`ipcontroller` on
34 ``host0``.
34 ``host0``.
35 2. Move the JSON file (:file:`ipcontroller-engine.json`) created by the
35 2. Move the JSON file (:file:`ipcontroller-engine.json`) created by the
36 controller from ``host0`` to hosts ``host1``-``hostn``.
36 controller from ``host0`` to hosts ``host1``-``hostn``.
37 3. Start the engines on hosts ``host1``-``hostn`` by running
37 3. Start the engines on hosts ``host1``-``hostn`` by running
38 :command:`ipengine`. This command has to be told where the JSON file
38 :command:`ipengine`. This command has to be told where the JSON file
39 (:file:`ipcontroller-engine.json`) is located.
39 (:file:`ipcontroller-engine.json`) is located.
40
40
41 At this point, the controller and engines will be connected. By default, the JSON files
41 At this point, the controller and engines will be connected. By default, the JSON files
42 created by the controller are put into the :file:`~/.ipython/cluster_default/security`
42 created by the controller are put into the :file:`~/.ipython/cluster_default/security`
43 directory. If the engines share a filesystem with the controller, step 2 can be skipped as
43 directory. If the engines share a filesystem with the controller, step 2 can be skipped as
44 the engines will automatically look at that location.
44 the engines will automatically look at that location.
45
45
46 The final step required to actually use the running controller from a client is to move
46 The final step required to actually use the running controller from a client is to move
47 the JSON file :file:`ipcontroller-client.json` from ``host0`` to any host where clients
47 the JSON file :file:`ipcontroller-client.json` from ``host0`` to any host where clients
48 will be run. If these file are put into the :file:`~/.ipython/cluster_default/security`
48 will be run. If these file are put into the :file:`~/.ipython/cluster_default/security`
49 directory of the client's host, they will be found automatically. Otherwise, the full path
49 directory of the client's host, they will be found automatically. Otherwise, the full path
50 to them has to be passed to the client's constructor.
50 to them has to be passed to the client's constructor.
51
51
52 Using :command:`ipcluster`
52 Using :command:`ipcluster`
53 ===========================
53 ===========================
54
54
55 The :command:`ipcluster` command provides a simple way of starting a
55 The :command:`ipcluster` command provides a simple way of starting a
56 controller and engines in the following situations:
56 controller and engines in the following situations:
57
57
58 1. When the controller and engines are all run on localhost. This is useful
58 1. When the controller and engines are all run on localhost. This is useful
59 for testing or running on a multicore computer.
59 for testing or running on a multicore computer.
60 2. When engines are started using the :command:`mpirun` command that comes
60 2. When engines are started using the :command:`mpiexec` command that comes
61 with most MPI [MPI]_ implementations
61 with most MPI [MPI]_ implementations
62 3. When engines are started using the PBS [PBS]_ batch system
62 3. When engines are started using the PBS [PBS]_ batch system
63 (or other `qsub` systems, such as SGE).
63 (or other `qsub` systems, such as SGE).
64 4. When the controller is started on localhost and the engines are started on
64 4. When the controller is started on localhost and the engines are started on
65 remote nodes using :command:`ssh`.
65 remote nodes using :command:`ssh`.
66 5. When engines are started using the Windows HPC Server batch system.
66 5. When engines are started using the Windows HPC Server batch system.
67
67
68 .. note::
68 .. note::
69
69
70 Currently :command:`ipcluster` requires that the
70 Currently :command:`ipcluster` requires that the
71 :file:`~/.ipython/cluster_<profile>/security` directory live on a shared filesystem that is
71 :file:`~/.ipython/cluster_<profile>/security` directory live on a shared filesystem that is
72 seen by both the controller and engines. If you don't have a shared file
72 seen by both the controller and engines. If you don't have a shared file
73 system you will need to use :command:`ipcontroller` and
73 system you will need to use :command:`ipcontroller` and
74 :command:`ipengine` directly.
74 :command:`ipengine` directly.
75
75
76 Under the hood, :command:`ipcluster` just uses :command:`ipcontroller`
76 Under the hood, :command:`ipcluster` just uses :command:`ipcontroller`
77 and :command:`ipengine` to perform the steps described above.
77 and :command:`ipengine` to perform the steps described above.
78
78
79 The simplest way to use ipcluster requires no configuration, and will
79 The simplest way to use ipcluster requires no configuration, and will
80 launch a controller and a number of engines on the local machine. For instance,
80 launch a controller and a number of engines on the local machine. For instance,
81 to start one controller and 4 engines on localhost, just do::
81 to start one controller and 4 engines on localhost, just do::
82
82
83 $ ipcluster start -n 4
83 $ ipcluster start n=4
84
84
85 To see other command line options for the local mode, do::
85 To see other command line options, do::
86
86
87 $ ipcluster -h
87 $ ipcluster -h
88
88
89
89
90 Configuring an IPython cluster
90 Configuring an IPython cluster
91 ==============================
91 ==============================
92
92
93 Cluster configurations are stored as `profiles`. You can create a new profile with::
93 Cluster configurations are stored as `profiles`. You can create a new profile with::
94
94
95 $ ipcluster create -p myprofile
95 $ ipcluster create profile=myprofile
96
96
97 This will create the directory :file:`IPYTHONDIR/cluster_myprofile`, and populate it
97 This will create the directory :file:`IPYTHONDIR/cluster_myprofile`, and populate it
98 with the default configuration files for the three IPython cluster commands. Once
98 with the default configuration files for the three IPython cluster commands. Once
99 you edit those files, you can continue to call ipcluster/ipcontroller/ipengine
99 you edit those files, you can continue to call ipcluster/ipcontroller/ipengine
100 with no arguments beyond ``-p myprofile``, and any configuration will be maintained.
100 with no arguments beyond ``p=myprofile``, and any configuration will be maintained.
101
101
102 There is no limit to the number of profiles you can have, so you can maintain a profile for each
102 There is no limit to the number of profiles you can have, so you can maintain a profile for each
103 of your common use cases. The default profile will be used whenever the
103 of your common use cases. The default profile will be used whenever the
104 profile argument is not specified, so edit :file:`IPYTHONDIR/cluster_default/*_config.py` to
104 profile argument is not specified, so edit :file:`IPYTHONDIR/cluster_default/*_config.py` to
105 represent your most common use case.
105 represent your most common use case.
106
106
107 The configuration files are loaded with commented-out settings and explanations,
107 The configuration files are loaded with commented-out settings and explanations,
108 which should cover most of the available possibilities.
108 which should cover most of the available possibilities.
109
109
110 Using various batch systems with :command:`ipcluster`
110 Using various batch systems with :command:`ipcluster`
111 ------------------------------------------------------
111 ------------------------------------------------------
112
112
113 :command:`ipcluster` has a notion of Launchers that can start controllers
113 :command:`ipcluster` has a notion of Launchers that can start controllers
114 and engines with various remote execution schemes. Currently supported
114 and engines with various remote execution schemes. Currently supported
115 models include `mpiexec`, PBS-style (Torque, SGE), and Windows HPC Server.
115 models include :command:`ssh`, :command`mpiexec`, PBS-style (Torque, SGE),
116 and Windows HPC Server.
116
117
117 .. note::
118 .. note::
118
119
119 The Launchers and configuration are designed in such a way that advanced
120 The Launchers and configuration are designed in such a way that advanced
120 users can subclass and configure them to fit their own system that we
121 users can subclass and configure them to fit their own system that we
121 have not yet supported (such as Condor)
122 have not yet supported (such as Condor)
122
123
123 Using :command:`ipcluster` in mpiexec/mpirun mode
124 Using :command:`ipcluster` in mpiexec/mpirun mode
124 --------------------------------------------------
125 --------------------------------------------------
125
126
126
127
127 The mpiexec/mpirun mode is useful if you:
128 The mpiexec/mpirun mode is useful if you:
128
129
129 1. Have MPI installed.
130 1. Have MPI installed.
130 2. Your systems are configured to use the :command:`mpiexec` or
131 2. Your systems are configured to use the :command:`mpiexec` or
131 :command:`mpirun` commands to start MPI processes.
132 :command:`mpirun` commands to start MPI processes.
132
133
133 If these are satisfied, you can create a new profile::
134 If these are satisfied, you can create a new profile::
134
135
135 $ ipcluster create -p mpi
136 $ ipcluster create profile=mpi
136
137
137 and edit the file :file:`IPYTHONDIR/cluster_mpi/ipcluster_config.py`.
138 and edit the file :file:`IPYTHONDIR/cluster_mpi/ipcluster_config.py`.
138
139
139 There, instruct ipcluster to use the MPIExec launchers by adding the lines:
140 There, instruct ipcluster to use the MPIExec launchers by adding the lines:
140
141
141 .. sourcecode:: python
142 .. sourcecode:: python
142
143
143 c.Global.engine_launcher = 'IPython.parallel.apps.launcher.MPIExecEngineSetLauncher'
144 c.IPClusterEnginesApp.engine_launcher = 'IPython.parallel.apps.launcher.MPIExecEngineSetLauncher'
144
145
145 If the default MPI configuration is correct, then you can now start your cluster, with::
146 If the default MPI configuration is correct, then you can now start your cluster, with::
146
147
147 $ ipcluster start -n 4 -p mpi
148 $ ipcluster start n=4 profile=mpi
148
149
149 This does the following:
150 This does the following:
150
151
151 1. Starts the IPython controller on current host.
152 1. Starts the IPython controller on current host.
152 2. Uses :command:`mpiexec` to start 4 engines.
153 2. Uses :command:`mpiexec` to start 4 engines.
153
154
154 If you have a reason to also start the Controller with mpi, you can specify:
155 If you have a reason to also start the Controller with mpi, you can specify:
155
156
156 .. sourcecode:: python
157 .. sourcecode:: python
157
158
158 c.Global.controller_launcher = 'IPython.parallel.apps.launcher.MPIExecControllerLauncher'
159 c.IPClusterStartApp.controller_launcher = 'IPython.parallel.apps.launcher.MPIExecControllerLauncher'
159
160
160 .. note::
161 .. note::
161
162
162 The Controller *will not* be in the same MPI universe as the engines, so there is not
163 The Controller *will not* be in the same MPI universe as the engines, so there is not
163 much reason to do this unless sysadmins demand it.
164 much reason to do this unless sysadmins demand it.
164
165
165 On newer MPI implementations (such as OpenMPI), this will work even if you
166 On newer MPI implementations (such as OpenMPI), this will work even if you
166 don't make any calls to MPI or call :func:`MPI_Init`. However, older MPI
167 don't make any calls to MPI or call :func:`MPI_Init`. However, older MPI
167 implementations actually require each process to call :func:`MPI_Init` upon
168 implementations actually require each process to call :func:`MPI_Init` upon
168 starting. The easiest way of having this done is to install the mpi4py
169 starting. The easiest way of having this done is to install the mpi4py
169 [mpi4py]_ package and then specify the ``c.MPI.use`` option in :file:`ipengine_config.py`:
170 [mpi4py]_ package and then specify the ``c.MPI.use`` option in :file:`ipengine_config.py`:
170
171
171 .. sourcecode:: python
172 .. sourcecode:: python
172
173
173 c.MPI.use = 'mpi4py'
174 c.MPI.use = 'mpi4py'
174
175
175 Unfortunately, even this won't work for some MPI implementations. If you are
176 Unfortunately, even this won't work for some MPI implementations. If you are
176 having problems with this, you will likely have to use a custom Python
177 having problems with this, you will likely have to use a custom Python
177 executable that itself calls :func:`MPI_Init` at the appropriate time.
178 executable that itself calls :func:`MPI_Init` at the appropriate time.
178 Fortunately, mpi4py comes with such a custom Python executable that is easy to
179 Fortunately, mpi4py comes with such a custom Python executable that is easy to
179 install and use. However, this custom Python executable approach will not work
180 install and use. However, this custom Python executable approach will not work
180 with :command:`ipcluster` currently.
181 with :command:`ipcluster` currently.
181
182
182 More details on using MPI with IPython can be found :ref:`here <parallelmpi>`.
183 More details on using MPI with IPython can be found :ref:`here <parallelmpi>`.
183
184
184
185
185 Using :command:`ipcluster` in PBS mode
186 Using :command:`ipcluster` in PBS mode
186 ---------------------------------------
187 ---------------------------------------
187
188
188 The PBS mode uses the Portable Batch System [PBS]_ to start the engines.
189 The PBS mode uses the Portable Batch System [PBS]_ to start the engines.
189
190
190 As usual, we will start by creating a fresh profile::
191 As usual, we will start by creating a fresh profile::
191
192
192 $ ipcluster create -p pbs
193 $ ipcluster create profile=pbs
193
194
194 And in :file:`ipcluster_config.py`, we will select the PBS launchers for the controller
195 And in :file:`ipcluster_config.py`, we will select the PBS launchers for the controller
195 and engines:
196 and engines:
196
197
197 .. sourcecode:: python
198 .. sourcecode:: python
198
199
199 c.Global.controller_launcher = 'IPython.parallel.apps.launcher.PBSControllerLauncher'
200 c.Global.controller_launcher = 'IPython.parallel.apps.launcher.PBSControllerLauncher'
200 c.Global.engine_launcher = 'IPython.parallel.apps.launcher.PBSEngineSetLauncher'
201 c.Global.engine_launcher = 'IPython.parallel.apps.launcher.PBSEngineSetLauncher'
201
202
202 IPython does provide simple default batch templates for PBS and SGE, but you may need
203 IPython does provide simple default batch templates for PBS and SGE, but you may need
203 to specify your own. Here is a sample PBS script template:
204 to specify your own. Here is a sample PBS script template:
204
205
205 .. sourcecode:: bash
206 .. sourcecode:: bash
206
207
207 #PBS -N ipython
208 #PBS -N ipython
208 #PBS -j oe
209 #PBS -j oe
209 #PBS -l walltime=00:10:00
210 #PBS -l walltime=00:10:00
210 #PBS -l nodes=${n/4}:ppn=4
211 #PBS -l nodes=${n/4}:ppn=4
211 #PBS -q $queue
212 #PBS -q $queue
212
213
213 cd $$PBS_O_WORKDIR
214 cd $$PBS_O_WORKDIR
214 export PATH=$$HOME/usr/local/bin
215 export PATH=$$HOME/usr/local/bin
215 export PYTHONPATH=$$HOME/usr/local/lib/python2.7/site-packages
216 export PYTHONPATH=$$HOME/usr/local/lib/python2.7/site-packages
216 /usr/local/bin/mpiexec -n ${n} ipengine --cluster_dir=${cluster_dir}
217 /usr/local/bin/mpiexec -n ${n} ipengine cluster_dir=${cluster_dir}
217
218
218 There are a few important points about this template:
219 There are a few important points about this template:
219
220
220 1. This template will be rendered at runtime using IPython's :mod:`Itpl`
221 1. This template will be rendered at runtime using IPython's :mod:`Itpl`
221 template engine.
222 template engine.
222
223
223 2. Instead of putting in the actual number of engines, use the notation
224 2. Instead of putting in the actual number of engines, use the notation
224 ``${n}`` to indicate the number of engines to be started. You can also uses
225 ``${n}`` to indicate the number of engines to be started. You can also uses
225 expressions like ``${n/4}`` in the template to indicate the number of
226 expressions like ``${n/4}`` in the template to indicate the number of
226 nodes. There will always be a ${n} and ${cluster_dir} variable passed to the template.
227 nodes. There will always be a ${n} and ${cluster_dir} variable passed to the template.
227 These allow the batch system to know how many engines, and where the configuration
228 These allow the batch system to know how many engines, and where the configuration
228 files reside. The same is true for the batch queue, with the template variable ``$queue``.
229 files reside. The same is true for the batch queue, with the template variable ``$queue``.
229
230
230 3. Because ``$`` is a special character used by the template engine, you must
231 3. Because ``$`` is a special character used by the template engine, you must
231 escape any ``$`` by using ``$$``. This is important when referring to
232 escape any ``$`` by using ``$$``. This is important when referring to
232 environment variables in the template, or in SGE, where the config lines start
233 environment variables in the template, or in SGE, where the config lines start
233 with ``#$``, which will have to be ``#$$``.
234 with ``#$``, which will have to be ``#$$``.
234
235
235 4. Any options to :command:`ipengine` can be given in the batch script
236 4. Any options to :command:`ipengine` can be given in the batch script
236 template, or in :file:`ipengine_config.py`.
237 template, or in :file:`ipengine_config.py`.
237
238
238 5. Depending on the configuration of you system, you may have to set
239 5. Depending on the configuration of you system, you may have to set
239 environment variables in the script template.
240 environment variables in the script template.
240
241
241 The controller template should be similar, but simpler:
242 The controller template should be similar, but simpler:
242
243
243 .. sourcecode:: bash
244 .. sourcecode:: bash
244
245
245 #PBS -N ipython
246 #PBS -N ipython
246 #PBS -j oe
247 #PBS -j oe
247 #PBS -l walltime=00:10:00
248 #PBS -l walltime=00:10:00
248 #PBS -l nodes=1:ppn=4
249 #PBS -l nodes=1:ppn=4
249 #PBS -q $queue
250 #PBS -q $queue
250
251
251 cd $$PBS_O_WORKDIR
252 cd $$PBS_O_WORKDIR
252 export PATH=$$HOME/usr/local/bin
253 export PATH=$$HOME/usr/local/bin
253 export PYTHONPATH=$$HOME/usr/local/lib/python2.7/site-packages
254 export PYTHONPATH=$$HOME/usr/local/lib/python2.7/site-packages
254 ipcontroller --cluster_dir=${cluster_dir}
255 ipcontroller cluster_dir=${cluster_dir}
255
256
256
257
257 Once you have created these scripts, save them with names like
258 Once you have created these scripts, save them with names like
258 :file:`pbs.engine.template`. Now you can load them into the :file:`ipcluster_config` with:
259 :file:`pbs.engine.template`. Now you can load them into the :file:`ipcluster_config` with:
259
260
260 .. sourcecode:: python
261 .. sourcecode:: python
261
262
262 c.PBSEngineSetLauncher.batch_template_file = "pbs.engine.template"
263 c.PBSEngineSetLauncher.batch_template_file = "pbs.engine.template"
263
264
264 c.PBSControllerLauncher.batch_template_file = "pbs.controller.template"
265 c.PBSControllerLauncher.batch_template_file = "pbs.controller.template"
265
266
266
267
267 Alternately, you can just define the templates as strings inside :file:`ipcluster_config`.
268 Alternately, you can just define the templates as strings inside :file:`ipcluster_config`.
268
269
269 Whether you are using your own templates or our defaults, the extra configurables available are
270 Whether you are using your own templates or our defaults, the extra configurables available are
270 the number of engines to launch (``$n``, and the batch system queue to which the jobs are to be
271 the number of engines to launch (``$n``, and the batch system queue to which the jobs are to be
271 submitted (``$queue``)). These are configurables, and can be specified in
272 submitted (``$queue``)). These are configurables, and can be specified in
272 :file:`ipcluster_config`:
273 :file:`ipcluster_config`:
273
274
274 .. sourcecode:: python
275 .. sourcecode:: python
275
276
276 c.PBSLauncher.queue = 'veryshort.q'
277 c.PBSLauncher.queue = 'veryshort.q'
277 c.PBSEngineSetLauncher.n = 64
278 c.PBSEngineSetLauncher.n = 64
278
279
279 Note that assuming you are running PBS on a multi-node cluster, the Controller's default behavior
280 Note that assuming you are running PBS on a multi-node cluster, the Controller's default behavior
280 of listening only on localhost is likely too restrictive. In this case, also assuming the
281 of listening only on localhost is likely too restrictive. In this case, also assuming the
281 nodes are safely behind a firewall, you can simply instruct the Controller to listen for
282 nodes are safely behind a firewall, you can simply instruct the Controller to listen for
282 connections on all its interfaces, by adding in :file:`ipcontroller_config`:
283 connections on all its interfaces, by adding in :file:`ipcontroller_config`:
283
284
284 .. sourcecode:: python
285 .. sourcecode:: python
285
286
286 c.RegistrationFactory.ip = '*'
287 c.RegistrationFactory.ip = '*'
287
288
288 You can now run the cluster with::
289 You can now run the cluster with::
289
290
290 $ ipcluster start -p pbs -n 128
291 $ ipcluster start profile=pbs n=128
291
292
292 Additional configuration options can be found in the PBS section of :file:`ipcluster_config`.
293 Additional configuration options can be found in the PBS section of :file:`ipcluster_config`.
293
294
294 .. note::
295 .. note::
295
296
296 Due to the flexibility of configuration, the PBS launchers work with simple changes
297 Due to the flexibility of configuration, the PBS launchers work with simple changes
297 to the template for other :command:`qsub`-using systems, such as Sun Grid Engine,
298 to the template for other :command:`qsub`-using systems, such as Sun Grid Engine,
298 and with further configuration in similar batch systems like Condor.
299 and with further configuration in similar batch systems like Condor.
299
300
300
301
301 Using :command:`ipcluster` in SSH mode
302 Using :command:`ipcluster` in SSH mode
302 ---------------------------------------
303 ---------------------------------------
303
304
304
305
305 The SSH mode uses :command:`ssh` to execute :command:`ipengine` on remote
306 The SSH mode uses :command:`ssh` to execute :command:`ipengine` on remote
306 nodes and :command:`ipcontroller` can be run remotely as well, or on localhost.
307 nodes and :command:`ipcontroller` can be run remotely as well, or on localhost.
307
308
308 .. note::
309 .. note::
309
310
310 When using this mode it highly recommended that you have set up SSH keys
311 When using this mode it highly recommended that you have set up SSH keys
311 and are using ssh-agent [SSH]_ for password-less logins.
312 and are using ssh-agent [SSH]_ for password-less logins.
312
313
313 As usual, we start by creating a clean profile::
314 As usual, we start by creating a clean profile::
314
315
315 $ ipcluster create -p ssh
316 $ ipcluster create profile= ssh
316
317
317 To use this mode, select the SSH launchers in :file:`ipcluster_config.py`:
318 To use this mode, select the SSH launchers in :file:`ipcluster_config.py`:
318
319
319 .. sourcecode:: python
320 .. sourcecode:: python
320
321
321 c.Global.engine_launcher = 'IPython.parallel.apps.launcher.SSHEngineSetLauncher'
322 c.Global.engine_launcher = 'IPython.parallel.apps.launcher.SSHEngineSetLauncher'
322 # and if the Controller is also to be remote:
323 # and if the Controller is also to be remote:
323 c.Global.controller_launcher = 'IPython.parallel.apps.launcher.SSHControllerLauncher'
324 c.Global.controller_launcher = 'IPython.parallel.apps.launcher.SSHControllerLauncher'
324
325
325
326
326 The controller's remote location and configuration can be specified:
327 The controller's remote location and configuration can be specified:
327
328
328 .. sourcecode:: python
329 .. sourcecode:: python
329
330
330 # Set the user and hostname for the controller
331 # Set the user and hostname for the controller
331 # c.SSHControllerLauncher.hostname = 'controller.example.com'
332 # c.SSHControllerLauncher.hostname = 'controller.example.com'
332 # c.SSHControllerLauncher.user = os.environ.get('USER','username')
333 # c.SSHControllerLauncher.user = os.environ.get('USER','username')
333
334
334 # Set the arguments to be passed to ipcontroller
335 # Set the arguments to be passed to ipcontroller
335 # note that remotely launched ipcontroller will not get the contents of
336 # note that remotely launched ipcontroller will not get the contents of
336 # the local ipcontroller_config.py unless it resides on the *remote host*
337 # the local ipcontroller_config.py unless it resides on the *remote host*
337 # in the location specified by the --cluster_dir argument.
338 # in the location specified by the `cluster_dir` argument.
338 # c.SSHControllerLauncher.program_args = ['-r', '-ip', '0.0.0.0', '--cluster_dir', '/path/to/cd']
339 # c.SSHControllerLauncher.program_args = ['-r', '-ip', '0.0.0.0', '--cluster_dir', '/path/to/cd']
339
340
340 .. note::
341 .. note::
341
342
342 SSH mode does not do any file movement, so you will need to distribute configuration
343 SSH mode does not do any file movement, so you will need to distribute configuration
343 files manually. To aid in this, the `reuse_files` flag defaults to True for ssh-launched
344 files manually. To aid in this, the `reuse_files` flag defaults to True for ssh-launched
344 Controllers, so you will only need to do this once, unless you override this flag back
345 Controllers, so you will only need to do this once, unless you override this flag back
345 to False.
346 to False.
346
347
347 Engines are specified in a dictionary, by hostname and the number of engines to be run
348 Engines are specified in a dictionary, by hostname and the number of engines to be run
348 on that host.
349 on that host.
349
350
350 .. sourcecode:: python
351 .. sourcecode:: python
351
352
352 c.SSHEngineSetLauncher.engines = { 'host1.example.com' : 2,
353 c.SSHEngineSetLauncher.engines = { 'host1.example.com' : 2,
353 'host2.example.com' : 5,
354 'host2.example.com' : 5,
354 'host3.example.com' : (1, ['--cluster_dir', '/home/different/location']),
355 'host3.example.com' : (1, ['cluster_dir=/home/different/location']),
355 'host4.example.com' : 8 }
356 'host4.example.com' : 8 }
356
357
357 * The `engines` dict, where the keys are the host we want to run engines on and
358 * The `engines` dict, where the keys are the host we want to run engines on and
358 the value is the number of engines to run on that host.
359 the value is the number of engines to run on that host.
359 * on host3, the value is a tuple, where the number of engines is first, and the arguments
360 * on host3, the value is a tuple, where the number of engines is first, and the arguments
360 to be passed to :command:`ipengine` are the second element.
361 to be passed to :command:`ipengine` are the second element.
361
362
362 For engines without explicitly specified arguments, the default arguments are set in
363 For engines without explicitly specified arguments, the default arguments are set in
363 a single location:
364 a single location:
364
365
365 .. sourcecode:: python
366 .. sourcecode:: python
366
367
367 c.SSHEngineSetLauncher.engine_args = ['--cluster_dir', '/path/to/cluster_ssh']
368 c.SSHEngineSetLauncher.engine_args = ['--cluster_dir', '/path/to/cluster_ssh']
368
369
369 Current limitations of the SSH mode of :command:`ipcluster` are:
370 Current limitations of the SSH mode of :command:`ipcluster` are:
370
371
371 * Untested on Windows. Would require a working :command:`ssh` on Windows.
372 * Untested on Windows. Would require a working :command:`ssh` on Windows.
372 Also, we are using shell scripts to setup and execute commands on remote
373 Also, we are using shell scripts to setup and execute commands on remote
373 hosts.
374 hosts.
374 * No file movement -
375 * No file movement -
375
376
376 Using the :command:`ipcontroller` and :command:`ipengine` commands
377 Using the :command:`ipcontroller` and :command:`ipengine` commands
377 ====================================================================
378 ====================================================================
378
379
379 It is also possible to use the :command:`ipcontroller` and :command:`ipengine`
380 It is also possible to use the :command:`ipcontroller` and :command:`ipengine`
380 commands to start your controller and engines. This approach gives you full
381 commands to start your controller and engines. This approach gives you full
381 control over all aspects of the startup process.
382 control over all aspects of the startup process.
382
383
383 Starting the controller and engine on your local machine
384 Starting the controller and engine on your local machine
384 --------------------------------------------------------
385 --------------------------------------------------------
385
386
386 To use :command:`ipcontroller` and :command:`ipengine` to start things on your
387 To use :command:`ipcontroller` and :command:`ipengine` to start things on your
387 local machine, do the following.
388 local machine, do the following.
388
389
389 First start the controller::
390 First start the controller::
390
391
391 $ ipcontroller
392 $ ipcontroller
392
393
393 Next, start however many instances of the engine you want using (repeatedly)
394 Next, start however many instances of the engine you want using (repeatedly)
394 the command::
395 the command::
395
396
396 $ ipengine
397 $ ipengine
397
398
398 The engines should start and automatically connect to the controller using the
399 The engines should start and automatically connect to the controller using the
399 JSON files in :file:`~/.ipython/cluster_default/security`. You are now ready to use the
400 JSON files in :file:`~/.ipython/cluster_default/security`. You are now ready to use the
400 controller and engines from IPython.
401 controller and engines from IPython.
401
402
402 .. warning::
403 .. warning::
403
404
404 The order of the above operations may be important. You *must*
405 The order of the above operations may be important. You *must*
405 start the controller before the engines, unless you are reusing connection
406 start the controller before the engines, unless you are reusing connection
406 information (via `-r`), in which case ordering is not important.
407 information (via `-r`), in which case ordering is not important.
407
408
408 .. note::
409 .. note::
409
410
410 On some platforms (OS X), to put the controller and engine into the
411 On some platforms (OS X), to put the controller and engine into the
411 background you may need to give these commands in the form ``(ipcontroller
412 background you may need to give these commands in the form ``(ipcontroller
412 &)`` and ``(ipengine &)`` (with the parentheses) for them to work
413 &)`` and ``(ipengine &)`` (with the parentheses) for them to work
413 properly.
414 properly.
414
415
415 Starting the controller and engines on different hosts
416 Starting the controller and engines on different hosts
416 ------------------------------------------------------
417 ------------------------------------------------------
417
418
418 When the controller and engines are running on different hosts, things are
419 When the controller and engines are running on different hosts, things are
419 slightly more complicated, but the underlying ideas are the same:
420 slightly more complicated, but the underlying ideas are the same:
420
421
421 1. Start the controller on a host using :command:`ipcontroller`.
422 1. Start the controller on a host using :command:`ipcontroller`.
422 2. Copy :file:`ipcontroller-engine.json` from :file:`~/.ipython/cluster_<profile>/security` on
423 2. Copy :file:`ipcontroller-engine.json` from :file:`~/.ipython/cluster_<profile>/security` on
423 the controller's host to the host where the engines will run.
424 the controller's host to the host where the engines will run.
424 3. Use :command:`ipengine` on the engine's hosts to start the engines.
425 3. Use :command:`ipengine` on the engine's hosts to start the engines.
425
426
426 The only thing you have to be careful of is to tell :command:`ipengine` where
427 The only thing you have to be careful of is to tell :command:`ipengine` where
427 the :file:`ipcontroller-engine.json` file is located. There are two ways you
428 the :file:`ipcontroller-engine.json` file is located. There are two ways you
428 can do this:
429 can do this:
429
430
430 * Put :file:`ipcontroller-engine.json` in the :file:`~/.ipython/cluster_<profile>/security`
431 * Put :file:`ipcontroller-engine.json` in the :file:`~/.ipython/cluster_<profile>/security`
431 directory on the engine's host, where it will be found automatically.
432 directory on the engine's host, where it will be found automatically.
432 * Call :command:`ipengine` with the ``--file=full_path_to_the_file``
433 * Call :command:`ipengine` with the ``--file=full_path_to_the_file``
433 flag.
434 flag.
434
435
435 The ``--file`` flag works like this::
436 The ``--file`` flag works like this::
436
437
437 $ ipengine --file=/path/to/my/ipcontroller-engine.json
438 $ ipengine --file=/path/to/my/ipcontroller-engine.json
438
439
439 .. note::
440 .. note::
440
441
441 If the controller's and engine's hosts all have a shared file system
442 If the controller's and engine's hosts all have a shared file system
442 (:file:`~/.ipython/cluster_<profile>/security` is the same on all of them), then things
443 (:file:`~/.ipython/cluster_<profile>/security` is the same on all of them), then things
443 will just work!
444 will just work!
444
445
445 Make JSON files persistent
446 Make JSON files persistent
446 --------------------------
447 --------------------------
447
448
448 At fist glance it may seem that that managing the JSON files is a bit
449 At fist glance it may seem that that managing the JSON files is a bit
449 annoying. Going back to the house and key analogy, copying the JSON around
450 annoying. Going back to the house and key analogy, copying the JSON around
450 each time you start the controller is like having to make a new key every time
451 each time you start the controller is like having to make a new key every time
451 you want to unlock the door and enter your house. As with your house, you want
452 you want to unlock the door and enter your house. As with your house, you want
452 to be able to create the key (or JSON file) once, and then simply use it at
453 to be able to create the key (or JSON file) once, and then simply use it at
453 any point in the future.
454 any point in the future.
454
455
455 To do this, the only thing you have to do is specify the `-r` flag, so that
456 To do this, the only thing you have to do is specify the `--reuse` flag, so that
456 the connection information in the JSON files remains accurate::
457 the connection information in the JSON files remains accurate::
457
458
458 $ ipcontroller -r
459 $ ipcontroller --reuse
459
460
460 Then, just copy the JSON files over the first time and you are set. You can
461 Then, just copy the JSON files over the first time and you are set. You can
461 start and stop the controller and engines any many times as you want in the
462 start and stop the controller and engines any many times as you want in the
462 future, just make sure to tell the controller to reuse the file.
463 future, just make sure to tell the controller to reuse the file.
463
464
464 .. note::
465 .. note::
465
466
466 You may ask the question: what ports does the controller listen on if you
467 You may ask the question: what ports does the controller listen on if you
467 don't tell is to use specific ones? The default is to use high random port
468 don't tell is to use specific ones? The default is to use high random port
468 numbers. We do this for two reasons: i) to increase security through
469 numbers. We do this for two reasons: i) to increase security through
469 obscurity and ii) to multiple controllers on a given host to start and
470 obscurity and ii) to multiple controllers on a given host to start and
470 automatically use different ports.
471 automatically use different ports.
471
472
472 Log files
473 Log files
473 ---------
474 ---------
474
475
475 All of the components of IPython have log files associated with them.
476 All of the components of IPython have log files associated with them.
476 These log files can be extremely useful in debugging problems with
477 These log files can be extremely useful in debugging problems with
477 IPython and can be found in the directory :file:`~/.ipython/cluster_<profile>/log`.
478 IPython and can be found in the directory :file:`~/.ipython/cluster_<profile>/log`.
478 Sending the log files to us will often help us to debug any problems.
479 Sending the log files to us will often help us to debug any problems.
479
480
480
481
481 Configuring `ipcontroller`
482 Configuring `ipcontroller`
482 ---------------------------
483 ---------------------------
483
484
484 Ports and addresses
485 Ports and addresses
485 *******************
486 *******************
486
487
487
488
488 Database Backend
489 Database Backend
489 ****************
490 ****************
490
491
491
492
492 .. seealso::
493 .. seealso::
493
494
494
495
495
496
496 Configuring `ipengine`
497 Configuring `ipengine`
497 -----------------------
498 -----------------------
498
499
499 .. note::
500 .. note::
500
501
501 TODO
502 TODO
502
503
503
504
504
505
505 .. [PBS] Portable Batch System. http://www.openpbs.org/
506 .. [PBS] Portable Batch System. http://www.openpbs.org/
506 .. [SSH] SSH-Agent http://en.wikipedia.org/wiki/ssh-agent
507 .. [SSH] SSH-Agent http://en.wikipedia.org/wiki/ssh-agent
@@ -1,442 +1,442 b''
1 .. _parallel_task:
1 .. _parallel_task:
2
2
3 ==========================
3 ==========================
4 The IPython task interface
4 The IPython task interface
5 ==========================
5 ==========================
6
6
7 The task interface to the cluster presents the engines as a fault tolerant,
7 The task interface to the cluster presents the engines as a fault tolerant,
8 dynamic load-balanced system of workers. Unlike the multiengine interface, in
8 dynamic load-balanced system of workers. Unlike the multiengine interface, in
9 the task interface the user have no direct access to individual engines. By
9 the task interface the user have no direct access to individual engines. By
10 allowing the IPython scheduler to assign work, this interface is simultaneously
10 allowing the IPython scheduler to assign work, this interface is simultaneously
11 simpler and more powerful.
11 simpler and more powerful.
12
12
13 Best of all, the user can use both of these interfaces running at the same time
13 Best of all, the user can use both of these interfaces running at the same time
14 to take advantage of their respective strengths. When the user can break up
14 to take advantage of their respective strengths. When the user can break up
15 the user's work into segments that do not depend on previous execution, the
15 the user's work into segments that do not depend on previous execution, the
16 task interface is ideal. But it also has more power and flexibility, allowing
16 task interface is ideal. But it also has more power and flexibility, allowing
17 the user to guide the distribution of jobs, without having to assign tasks to
17 the user to guide the distribution of jobs, without having to assign tasks to
18 engines explicitly.
18 engines explicitly.
19
19
20 Starting the IPython controller and engines
20 Starting the IPython controller and engines
21 ===========================================
21 ===========================================
22
22
23 To follow along with this tutorial, you will need to start the IPython
23 To follow along with this tutorial, you will need to start the IPython
24 controller and four IPython engines. The simplest way of doing this is to use
24 controller and four IPython engines. The simplest way of doing this is to use
25 the :command:`ipcluster` command::
25 the :command:`ipcluster` command::
26
26
27 $ ipcluster start -n 4
27 $ ipcluster start n=4
28
28
29 For more detailed information about starting the controller and engines, see
29 For more detailed information about starting the controller and engines, see
30 our :ref:`introduction <ip1par>` to using IPython for parallel computing.
30 our :ref:`introduction <ip1par>` to using IPython for parallel computing.
31
31
32 Creating a ``Client`` instance
32 Creating a ``Client`` instance
33 ==============================
33 ==============================
34
34
35 The first step is to import the IPython :mod:`IPython.parallel`
35 The first step is to import the IPython :mod:`IPython.parallel`
36 module and then create a :class:`.Client` instance, and we will also be using
36 module and then create a :class:`.Client` instance, and we will also be using
37 a :class:`LoadBalancedView`, here called `lview`:
37 a :class:`LoadBalancedView`, here called `lview`:
38
38
39 .. sourcecode:: ipython
39 .. sourcecode:: ipython
40
40
41 In [1]: from IPython.parallel import Client
41 In [1]: from IPython.parallel import Client
42
42
43 In [2]: rc = Client()
43 In [2]: rc = Client()
44
44
45
45
46 This form assumes that the controller was started on localhost with default
46 This form assumes that the controller was started on localhost with default
47 configuration. If not, the location of the controller must be given as an
47 configuration. If not, the location of the controller must be given as an
48 argument to the constructor:
48 argument to the constructor:
49
49
50 .. sourcecode:: ipython
50 .. sourcecode:: ipython
51
51
52 # for a visible LAN controller listening on an external port:
52 # for a visible LAN controller listening on an external port:
53 In [2]: rc = Client('tcp://192.168.1.16:10101')
53 In [2]: rc = Client('tcp://192.168.1.16:10101')
54 # or to connect with a specific profile you have set up:
54 # or to connect with a specific profile you have set up:
55 In [3]: rc = Client(profile='mpi')
55 In [3]: rc = Client(profile='mpi')
56
56
57 For load-balanced execution, we will make use of a :class:`LoadBalancedView` object, which can
57 For load-balanced execution, we will make use of a :class:`LoadBalancedView` object, which can
58 be constructed via the client's :meth:`load_balanced_view` method:
58 be constructed via the client's :meth:`load_balanced_view` method:
59
59
60 .. sourcecode:: ipython
60 .. sourcecode:: ipython
61
61
62 In [4]: lview = rc.load_balanced_view() # default load-balanced view
62 In [4]: lview = rc.load_balanced_view() # default load-balanced view
63
63
64 .. seealso::
64 .. seealso::
65
65
66 For more information, see the in-depth explanation of :ref:`Views <parallel_details>`.
66 For more information, see the in-depth explanation of :ref:`Views <parallel_details>`.
67
67
68
68
69 Quick and easy parallelism
69 Quick and easy parallelism
70 ==========================
70 ==========================
71
71
72 In many cases, you simply want to apply a Python function to a sequence of
72 In many cases, you simply want to apply a Python function to a sequence of
73 objects, but *in parallel*. Like the multiengine interface, these can be
73 objects, but *in parallel*. Like the multiengine interface, these can be
74 implemented via the task interface. The exact same tools can perform these
74 implemented via the task interface. The exact same tools can perform these
75 actions in load-balanced ways as well as multiplexed ways: a parallel version
75 actions in load-balanced ways as well as multiplexed ways: a parallel version
76 of :func:`map` and :func:`@parallel` function decorator. If one specifies the
76 of :func:`map` and :func:`@parallel` function decorator. If one specifies the
77 argument `balanced=True`, then they are dynamically load balanced. Thus, if the
77 argument `balanced=True`, then they are dynamically load balanced. Thus, if the
78 execution time per item varies significantly, you should use the versions in
78 execution time per item varies significantly, you should use the versions in
79 the task interface.
79 the task interface.
80
80
81 Parallel map
81 Parallel map
82 ------------
82 ------------
83
83
84 To load-balance :meth:`map`,simply use a LoadBalancedView:
84 To load-balance :meth:`map`,simply use a LoadBalancedView:
85
85
86 .. sourcecode:: ipython
86 .. sourcecode:: ipython
87
87
88 In [62]: lview.block = True
88 In [62]: lview.block = True
89
89
90 In [63]: serial_result = map(lambda x:x**10, range(32))
90 In [63]: serial_result = map(lambda x:x**10, range(32))
91
91
92 In [64]: parallel_result = lview.map(lambda x:x**10, range(32))
92 In [64]: parallel_result = lview.map(lambda x:x**10, range(32))
93
93
94 In [65]: serial_result==parallel_result
94 In [65]: serial_result==parallel_result
95 Out[65]: True
95 Out[65]: True
96
96
97 Parallel function decorator
97 Parallel function decorator
98 ---------------------------
98 ---------------------------
99
99
100 Parallel functions are just like normal function, but they can be called on
100 Parallel functions are just like normal function, but they can be called on
101 sequences and *in parallel*. The multiengine interface provides a decorator
101 sequences and *in parallel*. The multiengine interface provides a decorator
102 that turns any Python function into a parallel function:
102 that turns any Python function into a parallel function:
103
103
104 .. sourcecode:: ipython
104 .. sourcecode:: ipython
105
105
106 In [10]: @lview.parallel()
106 In [10]: @lview.parallel()
107 ....: def f(x):
107 ....: def f(x):
108 ....: return 10.0*x**4
108 ....: return 10.0*x**4
109 ....:
109 ....:
110
110
111 In [11]: f.map(range(32)) # this is done in parallel
111 In [11]: f.map(range(32)) # this is done in parallel
112 Out[11]: [0.0,10.0,160.0,...]
112 Out[11]: [0.0,10.0,160.0,...]
113
113
114 .. _parallel_dependencies:
114 .. _parallel_dependencies:
115
115
116 Dependencies
116 Dependencies
117 ============
117 ============
118
118
119 Often, pure atomic load-balancing is too primitive for your work. In these cases, you
119 Often, pure atomic load-balancing is too primitive for your work. In these cases, you
120 may want to associate some kind of `Dependency` that describes when, where, or whether
120 may want to associate some kind of `Dependency` that describes when, where, or whether
121 a task can be run. In IPython, we provide two types of dependencies:
121 a task can be run. In IPython, we provide two types of dependencies:
122 `Functional Dependencies`_ and `Graph Dependencies`_
122 `Functional Dependencies`_ and `Graph Dependencies`_
123
123
124 .. note::
124 .. note::
125
125
126 It is important to note that the pure ZeroMQ scheduler does not support dependencies,
126 It is important to note that the pure ZeroMQ scheduler does not support dependencies,
127 and you will see errors or warnings if you try to use dependencies with the pure
127 and you will see errors or warnings if you try to use dependencies with the pure
128 scheduler.
128 scheduler.
129
129
130 Functional Dependencies
130 Functional Dependencies
131 -----------------------
131 -----------------------
132
132
133 Functional dependencies are used to determine whether a given engine is capable of running
133 Functional dependencies are used to determine whether a given engine is capable of running
134 a particular task. This is implemented via a special :class:`Exception` class,
134 a particular task. This is implemented via a special :class:`Exception` class,
135 :class:`UnmetDependency`, found in `IPython.parallel.error`. Its use is very simple:
135 :class:`UnmetDependency`, found in `IPython.parallel.error`. Its use is very simple:
136 if a task fails with an UnmetDependency exception, then the scheduler, instead of relaying
136 if a task fails with an UnmetDependency exception, then the scheduler, instead of relaying
137 the error up to the client like any other error, catches the error, and submits the task
137 the error up to the client like any other error, catches the error, and submits the task
138 to a different engine. This will repeat indefinitely, and a task will never be submitted
138 to a different engine. This will repeat indefinitely, and a task will never be submitted
139 to a given engine a second time.
139 to a given engine a second time.
140
140
141 You can manually raise the :class:`UnmetDependency` yourself, but IPython has provided
141 You can manually raise the :class:`UnmetDependency` yourself, but IPython has provided
142 some decorators for facilitating this behavior.
142 some decorators for facilitating this behavior.
143
143
144 There are two decorators and a class used for functional dependencies:
144 There are two decorators and a class used for functional dependencies:
145
145
146 .. sourcecode:: ipython
146 .. sourcecode:: ipython
147
147
148 In [9]: from IPython.parallel import depend, require, dependent
148 In [9]: from IPython.parallel import depend, require, dependent
149
149
150 @require
150 @require
151 ********
151 ********
152
152
153 The simplest sort of dependency is requiring that a Python module is available. The
153 The simplest sort of dependency is requiring that a Python module is available. The
154 ``@require`` decorator lets you define a function that will only run on engines where names
154 ``@require`` decorator lets you define a function that will only run on engines where names
155 you specify are importable:
155 you specify are importable:
156
156
157 .. sourcecode:: ipython
157 .. sourcecode:: ipython
158
158
159 In [10]: @require('numpy', 'zmq')
159 In [10]: @require('numpy', 'zmq')
160 ...: def myfunc():
160 ...: def myfunc():
161 ...: return dostuff()
161 ...: return dostuff()
162
162
163 Now, any time you apply :func:`myfunc`, the task will only run on a machine that has
163 Now, any time you apply :func:`myfunc`, the task will only run on a machine that has
164 numpy and pyzmq available, and when :func:`myfunc` is called, numpy and zmq will be imported.
164 numpy and pyzmq available, and when :func:`myfunc` is called, numpy and zmq will be imported.
165
165
166 @depend
166 @depend
167 *******
167 *******
168
168
169 The ``@depend`` decorator lets you decorate any function with any *other* function to
169 The ``@depend`` decorator lets you decorate any function with any *other* function to
170 evaluate the dependency. The dependency function will be called at the start of the task,
170 evaluate the dependency. The dependency function will be called at the start of the task,
171 and if it returns ``False``, then the dependency will be considered unmet, and the task
171 and if it returns ``False``, then the dependency will be considered unmet, and the task
172 will be assigned to another engine. If the dependency returns *anything other than
172 will be assigned to another engine. If the dependency returns *anything other than
173 ``False``*, the rest of the task will continue.
173 ``False``*, the rest of the task will continue.
174
174
175 .. sourcecode:: ipython
175 .. sourcecode:: ipython
176
176
177 In [10]: def platform_specific(plat):
177 In [10]: def platform_specific(plat):
178 ...: import sys
178 ...: import sys
179 ...: return sys.platform == plat
179 ...: return sys.platform == plat
180
180
181 In [11]: @depend(platform_specific, 'darwin')
181 In [11]: @depend(platform_specific, 'darwin')
182 ...: def mactask():
182 ...: def mactask():
183 ...: do_mac_stuff()
183 ...: do_mac_stuff()
184
184
185 In [12]: @depend(platform_specific, 'nt')
185 In [12]: @depend(platform_specific, 'nt')
186 ...: def wintask():
186 ...: def wintask():
187 ...: do_windows_stuff()
187 ...: do_windows_stuff()
188
188
189 In this case, any time you apply ``mytask``, it will only run on an OSX machine.
189 In this case, any time you apply ``mytask``, it will only run on an OSX machine.
190 ``@depend`` is just like ``apply``, in that it has a ``@depend(f,*args,**kwargs)``
190 ``@depend`` is just like ``apply``, in that it has a ``@depend(f,*args,**kwargs)``
191 signature.
191 signature.
192
192
193 dependents
193 dependents
194 **********
194 **********
195
195
196 You don't have to use the decorators on your tasks, if for instance you may want
196 You don't have to use the decorators on your tasks, if for instance you may want
197 to run tasks with a single function but varying dependencies, you can directly construct
197 to run tasks with a single function but varying dependencies, you can directly construct
198 the :class:`dependent` object that the decorators use:
198 the :class:`dependent` object that the decorators use:
199
199
200 .. sourcecode::ipython
200 .. sourcecode::ipython
201
201
202 In [13]: def mytask(*args):
202 In [13]: def mytask(*args):
203 ...: dostuff()
203 ...: dostuff()
204
204
205 In [14]: mactask = dependent(mytask, platform_specific, 'darwin')
205 In [14]: mactask = dependent(mytask, platform_specific, 'darwin')
206 # this is the same as decorating the declaration of mytask with @depend
206 # this is the same as decorating the declaration of mytask with @depend
207 # but you can do it again:
207 # but you can do it again:
208
208
209 In [15]: wintask = dependent(mytask, platform_specific, 'nt')
209 In [15]: wintask = dependent(mytask, platform_specific, 'nt')
210
210
211 # in general:
211 # in general:
212 In [16]: t = dependent(f, g, *dargs, **dkwargs)
212 In [16]: t = dependent(f, g, *dargs, **dkwargs)
213
213
214 # is equivalent to:
214 # is equivalent to:
215 In [17]: @depend(g, *dargs, **dkwargs)
215 In [17]: @depend(g, *dargs, **dkwargs)
216 ...: def t(a,b,c):
216 ...: def t(a,b,c):
217 ...: # contents of f
217 ...: # contents of f
218
218
219 Graph Dependencies
219 Graph Dependencies
220 ------------------
220 ------------------
221
221
222 Sometimes you want to restrict the time and/or location to run a given task as a function
222 Sometimes you want to restrict the time and/or location to run a given task as a function
223 of the time and/or location of other tasks. This is implemented via a subclass of
223 of the time and/or location of other tasks. This is implemented via a subclass of
224 :class:`set`, called a :class:`Dependency`. A Dependency is just a set of `msg_ids`
224 :class:`set`, called a :class:`Dependency`. A Dependency is just a set of `msg_ids`
225 corresponding to tasks, and a few attributes to guide how to decide when the Dependency
225 corresponding to tasks, and a few attributes to guide how to decide when the Dependency
226 has been met.
226 has been met.
227
227
228 The switches we provide for interpreting whether a given dependency set has been met:
228 The switches we provide for interpreting whether a given dependency set has been met:
229
229
230 any|all
230 any|all
231 Whether the dependency is considered met if *any* of the dependencies are done, or
231 Whether the dependency is considered met if *any* of the dependencies are done, or
232 only after *all* of them have finished. This is set by a Dependency's :attr:`all`
232 only after *all* of them have finished. This is set by a Dependency's :attr:`all`
233 boolean attribute, which defaults to ``True``.
233 boolean attribute, which defaults to ``True``.
234
234
235 success [default: True]
235 success [default: True]
236 Whether to consider tasks that succeeded as fulfilling dependencies.
236 Whether to consider tasks that succeeded as fulfilling dependencies.
237
237
238 failure [default : False]
238 failure [default : False]
239 Whether to consider tasks that failed as fulfilling dependencies.
239 Whether to consider tasks that failed as fulfilling dependencies.
240 using `failure=True,success=False` is useful for setting up cleanup tasks, to be run
240 using `failure=True,success=False` is useful for setting up cleanup tasks, to be run
241 only when tasks have failed.
241 only when tasks have failed.
242
242
243 Sometimes you want to run a task after another, but only if that task succeeded. In this case,
243 Sometimes you want to run a task after another, but only if that task succeeded. In this case,
244 ``success`` should be ``True`` and ``failure`` should be ``False``. However sometimes you may
244 ``success`` should be ``True`` and ``failure`` should be ``False``. However sometimes you may
245 not care whether the task succeeds, and always want the second task to run, in which case you
245 not care whether the task succeeds, and always want the second task to run, in which case you
246 should use `success=failure=True`. The default behavior is to only use successes.
246 should use `success=failure=True`. The default behavior is to only use successes.
247
247
248 There are other switches for interpretation that are made at the *task* level. These are
248 There are other switches for interpretation that are made at the *task* level. These are
249 specified via keyword arguments to the client's :meth:`apply` method.
249 specified via keyword arguments to the client's :meth:`apply` method.
250
250
251 after,follow
251 after,follow
252 You may want to run a task *after* a given set of dependencies have been run and/or
252 You may want to run a task *after* a given set of dependencies have been run and/or
253 run it *where* another set of dependencies are met. To support this, every task has an
253 run it *where* another set of dependencies are met. To support this, every task has an
254 `after` dependency to restrict time, and a `follow` dependency to restrict
254 `after` dependency to restrict time, and a `follow` dependency to restrict
255 destination.
255 destination.
256
256
257 timeout
257 timeout
258 You may also want to set a time-limit for how long the scheduler should wait before a
258 You may also want to set a time-limit for how long the scheduler should wait before a
259 task's dependencies are met. This is done via a `timeout`, which defaults to 0, which
259 task's dependencies are met. This is done via a `timeout`, which defaults to 0, which
260 indicates that the task should never timeout. If the timeout is reached, and the
260 indicates that the task should never timeout. If the timeout is reached, and the
261 scheduler still hasn't been able to assign the task to an engine, the task will fail
261 scheduler still hasn't been able to assign the task to an engine, the task will fail
262 with a :class:`DependencyTimeout`.
262 with a :class:`DependencyTimeout`.
263
263
264 .. note::
264 .. note::
265
265
266 Dependencies only work within the task scheduler. You cannot instruct a load-balanced
266 Dependencies only work within the task scheduler. You cannot instruct a load-balanced
267 task to run after a job submitted via the MUX interface.
267 task to run after a job submitted via the MUX interface.
268
268
269 The simplest form of Dependencies is with `all=True,success=True,failure=False`. In these cases,
269 The simplest form of Dependencies is with `all=True,success=True,failure=False`. In these cases,
270 you can skip using Dependency objects, and just pass msg_ids or AsyncResult objects as the
270 you can skip using Dependency objects, and just pass msg_ids or AsyncResult objects as the
271 `follow` and `after` keywords to :meth:`client.apply`:
271 `follow` and `after` keywords to :meth:`client.apply`:
272
272
273 .. sourcecode:: ipython
273 .. sourcecode:: ipython
274
274
275 In [14]: client.block=False
275 In [14]: client.block=False
276
276
277 In [15]: ar = lview.apply(f, args, kwargs)
277 In [15]: ar = lview.apply(f, args, kwargs)
278
278
279 In [16]: ar2 = lview.apply(f2)
279 In [16]: ar2 = lview.apply(f2)
280
280
281 In [17]: ar3 = lview.apply_with_flags(f3, after=[ar,ar2])
281 In [17]: ar3 = lview.apply_with_flags(f3, after=[ar,ar2])
282
282
283 In [17]: ar4 = lview.apply_with_flags(f3, follow=[ar], timeout=2.5)
283 In [17]: ar4 = lview.apply_with_flags(f3, follow=[ar], timeout=2.5)
284
284
285
285
286 .. seealso::
286 .. seealso::
287
287
288 Some parallel workloads can be described as a `Directed Acyclic Graph
288 Some parallel workloads can be described as a `Directed Acyclic Graph
289 <http://en.wikipedia.org/wiki/Directed_acyclic_graph>`_, or DAG. See :ref:`DAG
289 <http://en.wikipedia.org/wiki/Directed_acyclic_graph>`_, or DAG. See :ref:`DAG
290 Dependencies <dag_dependencies>` for an example demonstrating how to use map a NetworkX DAG
290 Dependencies <dag_dependencies>` for an example demonstrating how to use map a NetworkX DAG
291 onto task dependencies.
291 onto task dependencies.
292
292
293
293
294
294
295
295
296 Impossible Dependencies
296 Impossible Dependencies
297 ***********************
297 ***********************
298
298
299 The schedulers do perform some analysis on graph dependencies to determine whether they
299 The schedulers do perform some analysis on graph dependencies to determine whether they
300 are not possible to be met. If the scheduler does discover that a dependency cannot be
300 are not possible to be met. If the scheduler does discover that a dependency cannot be
301 met, then the task will fail with an :class:`ImpossibleDependency` error. This way, if the
301 met, then the task will fail with an :class:`ImpossibleDependency` error. This way, if the
302 scheduler realized that a task can never be run, it won't sit indefinitely in the
302 scheduler realized that a task can never be run, it won't sit indefinitely in the
303 scheduler clogging the pipeline.
303 scheduler clogging the pipeline.
304
304
305 The basic cases that are checked:
305 The basic cases that are checked:
306
306
307 * depending on nonexistent messages
307 * depending on nonexistent messages
308 * `follow` dependencies were run on more than one machine and `all=True`
308 * `follow` dependencies were run on more than one machine and `all=True`
309 * any dependencies failed and `all=True,success=True,failures=False`
309 * any dependencies failed and `all=True,success=True,failures=False`
310 * all dependencies failed and `all=False,success=True,failure=False`
310 * all dependencies failed and `all=False,success=True,failure=False`
311
311
312 .. warning::
312 .. warning::
313
313
314 This analysis has not been proven to be rigorous, so it is likely possible for tasks
314 This analysis has not been proven to be rigorous, so it is likely possible for tasks
315 to become impossible to run in obscure situations, so a timeout may be a good choice.
315 to become impossible to run in obscure situations, so a timeout may be a good choice.
316
316
317
317
318 Retries and Resubmit
318 Retries and Resubmit
319 ====================
319 ====================
320
320
321 Retries
321 Retries
322 -------
322 -------
323
323
324 Another flag for tasks is `retries`. This is an integer, specifying how many times
324 Another flag for tasks is `retries`. This is an integer, specifying how many times
325 a task should be resubmitted after failure. This is useful for tasks that should still run
325 a task should be resubmitted after failure. This is useful for tasks that should still run
326 if their engine was shutdown, or may have some statistical chance of failing. The default
326 if their engine was shutdown, or may have some statistical chance of failing. The default
327 is to not retry tasks.
327 is to not retry tasks.
328
328
329 Resubmit
329 Resubmit
330 --------
330 --------
331
331
332 Sometimes you may want to re-run a task. This could be because it failed for some reason, and
332 Sometimes you may want to re-run a task. This could be because it failed for some reason, and
333 you have fixed the error, or because you want to restore the cluster to an interrupted state.
333 you have fixed the error, or because you want to restore the cluster to an interrupted state.
334 For this, the :class:`Client` has a :meth:`rc.resubmit` method. This simply takes one or more
334 For this, the :class:`Client` has a :meth:`rc.resubmit` method. This simply takes one or more
335 msg_ids, and returns an :class:`AsyncHubResult` for the result(s). You cannot resubmit
335 msg_ids, and returns an :class:`AsyncHubResult` for the result(s). You cannot resubmit
336 a task that is pending - only those that have finished, either successful or unsuccessful.
336 a task that is pending - only those that have finished, either successful or unsuccessful.
337
337
338 .. _parallel_schedulers:
338 .. _parallel_schedulers:
339
339
340 Schedulers
340 Schedulers
341 ==========
341 ==========
342
342
343 There are a variety of valid ways to determine where jobs should be assigned in a
343 There are a variety of valid ways to determine where jobs should be assigned in a
344 load-balancing situation. In IPython, we support several standard schemes, and
344 load-balancing situation. In IPython, we support several standard schemes, and
345 even make it easy to define your own. The scheme can be selected via the ``--scheme``
345 even make it easy to define your own. The scheme can be selected via the ``scheme``
346 argument to :command:`ipcontroller`, or in the :attr:`HubFactory.scheme` attribute
346 argument to :command:`ipcontroller`, or in the :attr:`TaskScheduler.schemename` attribute
347 of a controller config object.
347 of a controller config object.
348
348
349 The built-in routing schemes:
349 The built-in routing schemes:
350
350
351 To select one of these schemes, simply do::
351 To select one of these schemes, simply do::
352
352
353 $ ipcontroller --scheme <schemename>
353 $ ipcontroller scheme=<schemename>
354 for instance:
354 for instance:
355 $ ipcontroller --scheme lru
355 $ ipcontroller scheme=lru
356
356
357 lru: Least Recently Used
357 lru: Least Recently Used
358
358
359 Always assign work to the least-recently-used engine. A close relative of
359 Always assign work to the least-recently-used engine. A close relative of
360 round-robin, it will be fair with respect to the number of tasks, agnostic
360 round-robin, it will be fair with respect to the number of tasks, agnostic
361 with respect to runtime of each task.
361 with respect to runtime of each task.
362
362
363 plainrandom: Plain Random
363 plainrandom: Plain Random
364
364
365 Randomly picks an engine on which to run.
365 Randomly picks an engine on which to run.
366
366
367 twobin: Two-Bin Random
367 twobin: Two-Bin Random
368
368
369 **Requires numpy**
369 **Requires numpy**
370
370
371 Pick two engines at random, and use the LRU of the two. This is known to be better
371 Pick two engines at random, and use the LRU of the two. This is known to be better
372 than plain random in many cases, but requires a small amount of computation.
372 than plain random in many cases, but requires a small amount of computation.
373
373
374 leastload: Least Load
374 leastload: Least Load
375
375
376 **This is the default scheme**
376 **This is the default scheme**
377
377
378 Always assign tasks to the engine with the fewest outstanding tasks (LRU breaks tie).
378 Always assign tasks to the engine with the fewest outstanding tasks (LRU breaks tie).
379
379
380 weighted: Weighted Two-Bin Random
380 weighted: Weighted Two-Bin Random
381
381
382 **Requires numpy**
382 **Requires numpy**
383
383
384 Pick two engines at random using the number of outstanding tasks as inverse weights,
384 Pick two engines at random using the number of outstanding tasks as inverse weights,
385 and use the one with the lower load.
385 and use the one with the lower load.
386
386
387
387
388 Pure ZMQ Scheduler
388 Pure ZMQ Scheduler
389 ------------------
389 ------------------
390
390
391 For maximum throughput, the 'pure' scheme is not Python at all, but a C-level
391 For maximum throughput, the 'pure' scheme is not Python at all, but a C-level
392 :class:`MonitoredQueue` from PyZMQ, which uses a ZeroMQ ``XREQ`` socket to perform all
392 :class:`MonitoredQueue` from PyZMQ, which uses a ZeroMQ ``XREQ`` socket to perform all
393 load-balancing. This scheduler does not support any of the advanced features of the Python
393 load-balancing. This scheduler does not support any of the advanced features of the Python
394 :class:`.Scheduler`.
394 :class:`.Scheduler`.
395
395
396 Disabled features when using the ZMQ Scheduler:
396 Disabled features when using the ZMQ Scheduler:
397
397
398 * Engine unregistration
398 * Engine unregistration
399 Task farming will be disabled if an engine unregisters.
399 Task farming will be disabled if an engine unregisters.
400 Further, if an engine is unregistered during computation, the scheduler may not recover.
400 Further, if an engine is unregistered during computation, the scheduler may not recover.
401 * Dependencies
401 * Dependencies
402 Since there is no Python logic inside the Scheduler, routing decisions cannot be made
402 Since there is no Python logic inside the Scheduler, routing decisions cannot be made
403 based on message content.
403 based on message content.
404 * Early destination notification
404 * Early destination notification
405 The Python schedulers know which engine gets which task, and notify the Hub. This
405 The Python schedulers know which engine gets which task, and notify the Hub. This
406 allows graceful handling of Engines coming and going. There is no way to know
406 allows graceful handling of Engines coming and going. There is no way to know
407 where ZeroMQ messages have gone, so there is no way to know what tasks are on which
407 where ZeroMQ messages have gone, so there is no way to know what tasks are on which
408 engine until they *finish*. This makes recovery from engine shutdown very difficult.
408 engine until they *finish*. This makes recovery from engine shutdown very difficult.
409
409
410
410
411 .. note::
411 .. note::
412
412
413 TODO: performance comparisons
413 TODO: performance comparisons
414
414
415
415
416
416
417
417
418 More details
418 More details
419 ============
419 ============
420
420
421 The :class:`LoadBalancedView` has many more powerful features that allow quite a bit
421 The :class:`LoadBalancedView` has many more powerful features that allow quite a bit
422 of flexibility in how tasks are defined and run. The next places to look are
422 of flexibility in how tasks are defined and run. The next places to look are
423 in the following classes:
423 in the following classes:
424
424
425 * :class:`~IPython.parallel.client.view.LoadBalancedView`
425 * :class:`~IPython.parallel.client.view.LoadBalancedView`
426 * :class:`~IPython.parallel.client.asyncresult.AsyncResult`
426 * :class:`~IPython.parallel.client.asyncresult.AsyncResult`
427 * :meth:`~IPython.parallel.client.view.LoadBalancedView.apply`
427 * :meth:`~IPython.parallel.client.view.LoadBalancedView.apply`
428 * :mod:`~IPython.parallel.controller.dependency`
428 * :mod:`~IPython.parallel.controller.dependency`
429
429
430 The following is an overview of how to use these classes together:
430 The following is an overview of how to use these classes together:
431
431
432 1. Create a :class:`Client` and :class:`LoadBalancedView`
432 1. Create a :class:`Client` and :class:`LoadBalancedView`
433 2. Define some functions to be run as tasks
433 2. Define some functions to be run as tasks
434 3. Submit your tasks to using the :meth:`apply` method of your
434 3. Submit your tasks to using the :meth:`apply` method of your
435 :class:`LoadBalancedView` instance.
435 :class:`LoadBalancedView` instance.
436 4. Use :meth:`Client.get_result` to get the results of the
436 4. Use :meth:`Client.get_result` to get the results of the
437 tasks, or use the :meth:`AsyncResult.get` method of the results to wait
437 tasks, or use the :meth:`AsyncResult.get` method of the results to wait
438 for and then receive the results.
438 for and then receive the results.
439
439
440 .. seealso::
440 .. seealso::
441
441
442 A demo of :ref:`DAG Dependencies <dag_dependencies>` with NetworkX and IPython.
442 A demo of :ref:`DAG Dependencies <dag_dependencies>` with NetworkX and IPython.
@@ -1,334 +1,334 b''
1 ============================================
1 ============================================
2 Getting started with Windows HPC Server 2008
2 Getting started with Windows HPC Server 2008
3 ============================================
3 ============================================
4
4
5 .. note::
5 .. note::
6
6
7 Not adapted to zmq yet
7 Not adapted to zmq yet
8
8
9 Introduction
9 Introduction
10 ============
10 ============
11
11
12 The Python programming language is an increasingly popular language for
12 The Python programming language is an increasingly popular language for
13 numerical computing. This is due to a unique combination of factors. First,
13 numerical computing. This is due to a unique combination of factors. First,
14 Python is a high-level and *interactive* language that is well matched to
14 Python is a high-level and *interactive* language that is well matched to
15 interactive numerical work. Second, it is easy (often times trivial) to
15 interactive numerical work. Second, it is easy (often times trivial) to
16 integrate legacy C/C++/Fortran code into Python. Third, a large number of
16 integrate legacy C/C++/Fortran code into Python. Third, a large number of
17 high-quality open source projects provide all the needed building blocks for
17 high-quality open source projects provide all the needed building blocks for
18 numerical computing: numerical arrays (NumPy), algorithms (SciPy), 2D/3D
18 numerical computing: numerical arrays (NumPy), algorithms (SciPy), 2D/3D
19 Visualization (Matplotlib, Mayavi, Chaco), Symbolic Mathematics (Sage, Sympy)
19 Visualization (Matplotlib, Mayavi, Chaco), Symbolic Mathematics (Sage, Sympy)
20 and others.
20 and others.
21
21
22 The IPython project is a core part of this open-source toolchain and is
22 The IPython project is a core part of this open-source toolchain and is
23 focused on creating a comprehensive environment for interactive and
23 focused on creating a comprehensive environment for interactive and
24 exploratory computing in the Python programming language. It enables all of
24 exploratory computing in the Python programming language. It enables all of
25 the above tools to be used interactively and consists of two main components:
25 the above tools to be used interactively and consists of two main components:
26
26
27 * An enhanced interactive Python shell with support for interactive plotting
27 * An enhanced interactive Python shell with support for interactive plotting
28 and visualization.
28 and visualization.
29 * An architecture for interactive parallel computing.
29 * An architecture for interactive parallel computing.
30
30
31 With these components, it is possible to perform all aspects of a parallel
31 With these components, it is possible to perform all aspects of a parallel
32 computation interactively. This type of workflow is particularly relevant in
32 computation interactively. This type of workflow is particularly relevant in
33 scientific and numerical computing where algorithms, code and data are
33 scientific and numerical computing where algorithms, code and data are
34 continually evolving as the user/developer explores a problem. The broad
34 continually evolving as the user/developer explores a problem. The broad
35 treads in computing (commodity clusters, multicore, cloud computing, etc.)
35 treads in computing (commodity clusters, multicore, cloud computing, etc.)
36 make these capabilities of IPython particularly relevant.
36 make these capabilities of IPython particularly relevant.
37
37
38 While IPython is a cross platform tool, it has particularly strong support for
38 While IPython is a cross platform tool, it has particularly strong support for
39 Windows based compute clusters running Windows HPC Server 2008. This document
39 Windows based compute clusters running Windows HPC Server 2008. This document
40 describes how to get started with IPython on Windows HPC Server 2008. The
40 describes how to get started with IPython on Windows HPC Server 2008. The
41 content and emphasis here is practical: installing IPython, configuring
41 content and emphasis here is practical: installing IPython, configuring
42 IPython to use the Windows job scheduler and running example parallel programs
42 IPython to use the Windows job scheduler and running example parallel programs
43 interactively. A more complete description of IPython's parallel computing
43 interactively. A more complete description of IPython's parallel computing
44 capabilities can be found in IPython's online documentation
44 capabilities can be found in IPython's online documentation
45 (http://ipython.scipy.org/moin/Documentation).
45 (http://ipython.scipy.org/moin/Documentation).
46
46
47 Setting up your Windows cluster
47 Setting up your Windows cluster
48 ===============================
48 ===============================
49
49
50 This document assumes that you already have a cluster running Windows
50 This document assumes that you already have a cluster running Windows
51 HPC Server 2008. Here is a broad overview of what is involved with setting up
51 HPC Server 2008. Here is a broad overview of what is involved with setting up
52 such a cluster:
52 such a cluster:
53
53
54 1. Install Windows Server 2008 on the head and compute nodes in the cluster.
54 1. Install Windows Server 2008 on the head and compute nodes in the cluster.
55 2. Setup the network configuration on each host. Each host should have a
55 2. Setup the network configuration on each host. Each host should have a
56 static IP address.
56 static IP address.
57 3. On the head node, activate the "Active Directory Domain Services" role
57 3. On the head node, activate the "Active Directory Domain Services" role
58 and make the head node the domain controller.
58 and make the head node the domain controller.
59 4. Join the compute nodes to the newly created Active Directory (AD) domain.
59 4. Join the compute nodes to the newly created Active Directory (AD) domain.
60 5. Setup user accounts in the domain with shared home directories.
60 5. Setup user accounts in the domain with shared home directories.
61 6. Install the HPC Pack 2008 on the head node to create a cluster.
61 6. Install the HPC Pack 2008 on the head node to create a cluster.
62 7. Install the HPC Pack 2008 on the compute nodes.
62 7. Install the HPC Pack 2008 on the compute nodes.
63
63
64 More details about installing and configuring Windows HPC Server 2008 can be
64 More details about installing and configuring Windows HPC Server 2008 can be
65 found on the Windows HPC Home Page (http://www.microsoft.com/hpc). Regardless
65 found on the Windows HPC Home Page (http://www.microsoft.com/hpc). Regardless
66 of what steps you follow to set up your cluster, the remainder of this
66 of what steps you follow to set up your cluster, the remainder of this
67 document will assume that:
67 document will assume that:
68
68
69 * There are domain users that can log on to the AD domain and submit jobs
69 * There are domain users that can log on to the AD domain and submit jobs
70 to the cluster scheduler.
70 to the cluster scheduler.
71 * These domain users have shared home directories. While shared home
71 * These domain users have shared home directories. While shared home
72 directories are not required to use IPython, they make it much easier to
72 directories are not required to use IPython, they make it much easier to
73 use IPython.
73 use IPython.
74
74
75 Installation of IPython and its dependencies
75 Installation of IPython and its dependencies
76 ============================================
76 ============================================
77
77
78 IPython and all of its dependencies are freely available and open source.
78 IPython and all of its dependencies are freely available and open source.
79 These packages provide a powerful and cost-effective approach to numerical and
79 These packages provide a powerful and cost-effective approach to numerical and
80 scientific computing on Windows. The following dependencies are needed to run
80 scientific computing on Windows. The following dependencies are needed to run
81 IPython on Windows:
81 IPython on Windows:
82
82
83 * Python 2.6 or 2.7 (http://www.python.org)
83 * Python 2.6 or 2.7 (http://www.python.org)
84 * pywin32 (http://sourceforge.net/projects/pywin32/)
84 * pywin32 (http://sourceforge.net/projects/pywin32/)
85 * PyReadline (https://launchpad.net/pyreadline)
85 * PyReadline (https://launchpad.net/pyreadline)
86 * pyzmq (http://github.com/zeromq/pyzmq/downloads)
86 * pyzmq (http://github.com/zeromq/pyzmq/downloads)
87 * IPython (http://ipython.scipy.org)
87 * IPython (http://ipython.scipy.org)
88
88
89 In addition, the following dependencies are needed to run the demos described
89 In addition, the following dependencies are needed to run the demos described
90 in this document.
90 in this document.
91
91
92 * NumPy and SciPy (http://www.scipy.org)
92 * NumPy and SciPy (http://www.scipy.org)
93 * Matplotlib (http://matplotlib.sourceforge.net/)
93 * Matplotlib (http://matplotlib.sourceforge.net/)
94
94
95 The easiest way of obtaining these dependencies is through the Enthought
95 The easiest way of obtaining these dependencies is through the Enthought
96 Python Distribution (EPD) (http://www.enthought.com/products/epd.php). EPD is
96 Python Distribution (EPD) (http://www.enthought.com/products/epd.php). EPD is
97 produced by Enthought, Inc. and contains all of these packages and others in a
97 produced by Enthought, Inc. and contains all of these packages and others in a
98 single installer and is available free for academic users. While it is also
98 single installer and is available free for academic users. While it is also
99 possible to download and install each package individually, this is a tedious
99 possible to download and install each package individually, this is a tedious
100 process. Thus, we highly recommend using EPD to install these packages on
100 process. Thus, we highly recommend using EPD to install these packages on
101 Windows.
101 Windows.
102
102
103 Regardless of how you install the dependencies, here are the steps you will
103 Regardless of how you install the dependencies, here are the steps you will
104 need to follow:
104 need to follow:
105
105
106 1. Install all of the packages listed above, either individually or using EPD
106 1. Install all of the packages listed above, either individually or using EPD
107 on the head node, compute nodes and user workstations.
107 on the head node, compute nodes and user workstations.
108
108
109 2. Make sure that :file:`C:\\Python27` and :file:`C:\\Python27\\Scripts` are
109 2. Make sure that :file:`C:\\Python27` and :file:`C:\\Python27\\Scripts` are
110 in the system :envvar:`%PATH%` variable on each node.
110 in the system :envvar:`%PATH%` variable on each node.
111
111
112 3. Install the latest development version of IPython. This can be done by
112 3. Install the latest development version of IPython. This can be done by
113 downloading the the development version from the IPython website
113 downloading the the development version from the IPython website
114 (http://ipython.scipy.org) and following the installation instructions.
114 (http://ipython.scipy.org) and following the installation instructions.
115
115
116 Further details about installing IPython or its dependencies can be found in
116 Further details about installing IPython or its dependencies can be found in
117 the online IPython documentation (http://ipython.scipy.org/moin/Documentation)
117 the online IPython documentation (http://ipython.scipy.org/moin/Documentation)
118 Once you are finished with the installation, you can try IPython out by
118 Once you are finished with the installation, you can try IPython out by
119 opening a Windows Command Prompt and typing ``ipython``. This will
119 opening a Windows Command Prompt and typing ``ipython``. This will
120 start IPython's interactive shell and you should see something like the
120 start IPython's interactive shell and you should see something like the
121 following screenshot:
121 following screenshot:
122
122
123 .. image:: ipython_shell.*
123 .. image:: ipython_shell.*
124
124
125 Starting an IPython cluster
125 Starting an IPython cluster
126 ===========================
126 ===========================
127
127
128 To use IPython's parallel computing capabilities, you will need to start an
128 To use IPython's parallel computing capabilities, you will need to start an
129 IPython cluster. An IPython cluster consists of one controller and multiple
129 IPython cluster. An IPython cluster consists of one controller and multiple
130 engines:
130 engines:
131
131
132 IPython controller
132 IPython controller
133 The IPython controller manages the engines and acts as a gateway between
133 The IPython controller manages the engines and acts as a gateway between
134 the engines and the client, which runs in the user's interactive IPython
134 the engines and the client, which runs in the user's interactive IPython
135 session. The controller is started using the :command:`ipcontroller`
135 session. The controller is started using the :command:`ipcontroller`
136 command.
136 command.
137
137
138 IPython engine
138 IPython engine
139 IPython engines run a user's Python code in parallel on the compute nodes.
139 IPython engines run a user's Python code in parallel on the compute nodes.
140 Engines are starting using the :command:`ipengine` command.
140 Engines are starting using the :command:`ipengine` command.
141
141
142 Once these processes are started, a user can run Python code interactively and
142 Once these processes are started, a user can run Python code interactively and
143 in parallel on the engines from within the IPython shell using an appropriate
143 in parallel on the engines from within the IPython shell using an appropriate
144 client. This includes the ability to interact with, plot and visualize data
144 client. This includes the ability to interact with, plot and visualize data
145 from the engines.
145 from the engines.
146
146
147 IPython has a command line program called :command:`ipcluster` that automates
147 IPython has a command line program called :command:`ipcluster` that automates
148 all aspects of starting the controller and engines on the compute nodes.
148 all aspects of starting the controller and engines on the compute nodes.
149 :command:`ipcluster` has full support for the Windows HPC job scheduler,
149 :command:`ipcluster` has full support for the Windows HPC job scheduler,
150 meaning that :command:`ipcluster` can use this job scheduler to start the
150 meaning that :command:`ipcluster` can use this job scheduler to start the
151 controller and engines. In our experience, the Windows HPC job scheduler is
151 controller and engines. In our experience, the Windows HPC job scheduler is
152 particularly well suited for interactive applications, such as IPython. Once
152 particularly well suited for interactive applications, such as IPython. Once
153 :command:`ipcluster` is configured properly, a user can start an IPython
153 :command:`ipcluster` is configured properly, a user can start an IPython
154 cluster from their local workstation almost instantly, without having to log
154 cluster from their local workstation almost instantly, without having to log
155 on to the head node (as is typically required by Unix based job schedulers).
155 on to the head node (as is typically required by Unix based job schedulers).
156 This enables a user to move seamlessly between serial and parallel
156 This enables a user to move seamlessly between serial and parallel
157 computations.
157 computations.
158
158
159 In this section we show how to use :command:`ipcluster` to start an IPython
159 In this section we show how to use :command:`ipcluster` to start an IPython
160 cluster using the Windows HPC Server 2008 job scheduler. To make sure that
160 cluster using the Windows HPC Server 2008 job scheduler. To make sure that
161 :command:`ipcluster` is installed and working properly, you should first try
161 :command:`ipcluster` is installed and working properly, you should first try
162 to start an IPython cluster on your local host. To do this, open a Windows
162 to start an IPython cluster on your local host. To do this, open a Windows
163 Command Prompt and type the following command::
163 Command Prompt and type the following command::
164
164
165 ipcluster start -n 2
165 ipcluster start n=2
166
166
167 You should see a number of messages printed to the screen, ending with
167 You should see a number of messages printed to the screen, ending with
168 "IPython cluster: started". The result should look something like the following
168 "IPython cluster: started". The result should look something like the following
169 screenshot:
169 screenshot:
170
170
171 .. image:: ipcluster_start.*
171 .. image:: ipcluster_start.*
172
172
173 At this point, the controller and two engines are running on your local host.
173 At this point, the controller and two engines are running on your local host.
174 This configuration is useful for testing and for situations where you want to
174 This configuration is useful for testing and for situations where you want to
175 take advantage of multiple cores on your local computer.
175 take advantage of multiple cores on your local computer.
176
176
177 Now that we have confirmed that :command:`ipcluster` is working properly, we
177 Now that we have confirmed that :command:`ipcluster` is working properly, we
178 describe how to configure and run an IPython cluster on an actual compute
178 describe how to configure and run an IPython cluster on an actual compute
179 cluster running Windows HPC Server 2008. Here is an outline of the needed
179 cluster running Windows HPC Server 2008. Here is an outline of the needed
180 steps:
180 steps:
181
181
182 1. Create a cluster profile using: ``ipcluster create -p mycluster``
182 1. Create a cluster profile using: ``ipcluster create profile=mycluster``
183
183
184 2. Edit configuration files in the directory :file:`.ipython\\cluster_mycluster`
184 2. Edit configuration files in the directory :file:`.ipython\\cluster_mycluster`
185
185
186 3. Start the cluster using: ``ipcluser start -p mycluster -n 32``
186 3. Start the cluster using: ``ipcluser start profile=mycluster n=32``
187
187
188 Creating a cluster profile
188 Creating a cluster profile
189 --------------------------
189 --------------------------
190
190
191 In most cases, you will have to create a cluster profile to use IPython on a
191 In most cases, you will have to create a cluster profile to use IPython on a
192 cluster. A cluster profile is a name (like "mycluster") that is associated
192 cluster. A cluster profile is a name (like "mycluster") that is associated
193 with a particular cluster configuration. The profile name is used by
193 with a particular cluster configuration. The profile name is used by
194 :command:`ipcluster` when working with the cluster.
194 :command:`ipcluster` when working with the cluster.
195
195
196 Associated with each cluster profile is a cluster directory. This cluster
196 Associated with each cluster profile is a cluster directory. This cluster
197 directory is a specially named directory (typically located in the
197 directory is a specially named directory (typically located in the
198 :file:`.ipython` subdirectory of your home directory) that contains the
198 :file:`.ipython` subdirectory of your home directory) that contains the
199 configuration files for a particular cluster profile, as well as log files and
199 configuration files for a particular cluster profile, as well as log files and
200 security keys. The naming convention for cluster directories is:
200 security keys. The naming convention for cluster directories is:
201 :file:`cluster_<profile name>`. Thus, the cluster directory for a profile named
201 :file:`cluster_<profile name>`. Thus, the cluster directory for a profile named
202 "foo" would be :file:`.ipython\\cluster_foo`.
202 "foo" would be :file:`.ipython\\cluster_foo`.
203
203
204 To create a new cluster profile (named "mycluster") and the associated cluster
204 To create a new cluster profile (named "mycluster") and the associated cluster
205 directory, type the following command at the Windows Command Prompt::
205 directory, type the following command at the Windows Command Prompt::
206
206
207 ipcluster create -p mycluster
207 ipcluster create profile=mycluster
208
208
209 The output of this command is shown in the screenshot below. Notice how
209 The output of this command is shown in the screenshot below. Notice how
210 :command:`ipcluster` prints out the location of the newly created cluster
210 :command:`ipcluster` prints out the location of the newly created cluster
211 directory.
211 directory.
212
212
213 .. image:: ipcluster_create.*
213 .. image:: ipcluster_create.*
214
214
215 Configuring a cluster profile
215 Configuring a cluster profile
216 -----------------------------
216 -----------------------------
217
217
218 Next, you will need to configure the newly created cluster profile by editing
218 Next, you will need to configure the newly created cluster profile by editing
219 the following configuration files in the cluster directory:
219 the following configuration files in the cluster directory:
220
220
221 * :file:`ipcluster_config.py`
221 * :file:`ipcluster_config.py`
222 * :file:`ipcontroller_config.py`
222 * :file:`ipcontroller_config.py`
223 * :file:`ipengine_config.py`
223 * :file:`ipengine_config.py`
224
224
225 When :command:`ipcluster` is run, these configuration files are used to
225 When :command:`ipcluster` is run, these configuration files are used to
226 determine how the engines and controller will be started. In most cases,
226 determine how the engines and controller will be started. In most cases,
227 you will only have to set a few of the attributes in these files.
227 you will only have to set a few of the attributes in these files.
228
228
229 To configure :command:`ipcluster` to use the Windows HPC job scheduler, you
229 To configure :command:`ipcluster` to use the Windows HPC job scheduler, you
230 will need to edit the following attributes in the file
230 will need to edit the following attributes in the file
231 :file:`ipcluster_config.py`::
231 :file:`ipcluster_config.py`::
232
232
233 # Set these at the top of the file to tell ipcluster to use the
233 # Set these at the top of the file to tell ipcluster to use the
234 # Windows HPC job scheduler.
234 # Windows HPC job scheduler.
235 c.Global.controller_launcher = \
235 c.Global.controller_launcher = \
236 'IPython.parallel.apps.launcher.WindowsHPCControllerLauncher'
236 'IPython.parallel.apps.launcher.WindowsHPCControllerLauncher'
237 c.Global.engine_launcher = \
237 c.Global.engine_launcher = \
238 'IPython.parallel.apps.launcher.WindowsHPCEngineSetLauncher'
238 'IPython.parallel.apps.launcher.WindowsHPCEngineSetLauncher'
239
239
240 # Set these to the host name of the scheduler (head node) of your cluster.
240 # Set these to the host name of the scheduler (head node) of your cluster.
241 c.WindowsHPCControllerLauncher.scheduler = 'HEADNODE'
241 c.WindowsHPCControllerLauncher.scheduler = 'HEADNODE'
242 c.WindowsHPCEngineSetLauncher.scheduler = 'HEADNODE'
242 c.WindowsHPCEngineSetLauncher.scheduler = 'HEADNODE'
243
243
244 There are a number of other configuration attributes that can be set, but
244 There are a number of other configuration attributes that can be set, but
245 in most cases these will be sufficient to get you started.
245 in most cases these will be sufficient to get you started.
246
246
247 .. warning::
247 .. warning::
248 If any of your configuration attributes involve specifying the location
248 If any of your configuration attributes involve specifying the location
249 of shared directories or files, you must make sure that you use UNC paths
249 of shared directories or files, you must make sure that you use UNC paths
250 like :file:`\\\\host\\share`. It is also important that you specify
250 like :file:`\\\\host\\share`. It is also important that you specify
251 these paths using raw Python strings: ``r'\\host\share'`` to make sure
251 these paths using raw Python strings: ``r'\\host\share'`` to make sure
252 that the backslashes are properly escaped.
252 that the backslashes are properly escaped.
253
253
254 Starting the cluster profile
254 Starting the cluster profile
255 ----------------------------
255 ----------------------------
256
256
257 Once a cluster profile has been configured, starting an IPython cluster using
257 Once a cluster profile has been configured, starting an IPython cluster using
258 the profile is simple::
258 the profile is simple::
259
259
260 ipcluster start -p mycluster -n 32
260 ipcluster start profile=mycluster n=32
261
261
262 The ``-n`` option tells :command:`ipcluster` how many engines to start (in
262 The ``-n`` option tells :command:`ipcluster` how many engines to start (in
263 this case 32). Stopping the cluster is as simple as typing Control-C.
263 this case 32). Stopping the cluster is as simple as typing Control-C.
264
264
265 Using the HPC Job Manager
265 Using the HPC Job Manager
266 -------------------------
266 -------------------------
267
267
268 When ``ipcluster start`` is run the first time, :command:`ipcluster` creates
268 When ``ipcluster start`` is run the first time, :command:`ipcluster` creates
269 two XML job description files in the cluster directory:
269 two XML job description files in the cluster directory:
270
270
271 * :file:`ipcontroller_job.xml`
271 * :file:`ipcontroller_job.xml`
272 * :file:`ipengineset_job.xml`
272 * :file:`ipengineset_job.xml`
273
273
274 Once these files have been created, they can be imported into the HPC Job
274 Once these files have been created, they can be imported into the HPC Job
275 Manager application. Then, the controller and engines for that profile can be
275 Manager application. Then, the controller and engines for that profile can be
276 started using the HPC Job Manager directly, without using :command:`ipcluster`.
276 started using the HPC Job Manager directly, without using :command:`ipcluster`.
277 However, anytime the cluster profile is re-configured, ``ipcluster start``
277 However, anytime the cluster profile is re-configured, ``ipcluster start``
278 must be run again to regenerate the XML job description files. The
278 must be run again to regenerate the XML job description files. The
279 following screenshot shows what the HPC Job Manager interface looks like
279 following screenshot shows what the HPC Job Manager interface looks like
280 with a running IPython cluster.
280 with a running IPython cluster.
281
281
282 .. image:: hpc_job_manager.*
282 .. image:: hpc_job_manager.*
283
283
284 Performing a simple interactive parallel computation
284 Performing a simple interactive parallel computation
285 ====================================================
285 ====================================================
286
286
287 Once you have started your IPython cluster, you can start to use it. To do
287 Once you have started your IPython cluster, you can start to use it. To do
288 this, open up a new Windows Command Prompt and start up IPython's interactive
288 this, open up a new Windows Command Prompt and start up IPython's interactive
289 shell by typing::
289 shell by typing::
290
290
291 ipython
291 ipython
292
292
293 Then you can create a :class:`MultiEngineClient` instance for your profile and
293 Then you can create a :class:`MultiEngineClient` instance for your profile and
294 use the resulting instance to do a simple interactive parallel computation. In
294 use the resulting instance to do a simple interactive parallel computation. In
295 the code and screenshot that follows, we take a simple Python function and
295 the code and screenshot that follows, we take a simple Python function and
296 apply it to each element of an array of integers in parallel using the
296 apply it to each element of an array of integers in parallel using the
297 :meth:`MultiEngineClient.map` method:
297 :meth:`MultiEngineClient.map` method:
298
298
299 .. sourcecode:: ipython
299 .. sourcecode:: ipython
300
300
301 In [1]: from IPython.parallel import *
301 In [1]: from IPython.parallel import *
302
302
303 In [2]: c = MultiEngineClient(profile='mycluster')
303 In [2]: c = MultiEngineClient(profile='mycluster')
304
304
305 In [3]: mec.get_ids()
305 In [3]: mec.get_ids()
306 Out[3]: [0, 1, 2, 3, 4, 5, 67, 8, 9, 10, 11, 12, 13, 14]
306 Out[3]: [0, 1, 2, 3, 4, 5, 67, 8, 9, 10, 11, 12, 13, 14]
307
307
308 In [4]: def f(x):
308 In [4]: def f(x):
309 ...: return x**10
309 ...: return x**10
310
310
311 In [5]: mec.map(f, range(15)) # f is applied in parallel
311 In [5]: mec.map(f, range(15)) # f is applied in parallel
312 Out[5]:
312 Out[5]:
313 [0,
313 [0,
314 1,
314 1,
315 1024,
315 1024,
316 59049,
316 59049,
317 1048576,
317 1048576,
318 9765625,
318 9765625,
319 60466176,
319 60466176,
320 282475249,
320 282475249,
321 1073741824,
321 1073741824,
322 3486784401L,
322 3486784401L,
323 10000000000L,
323 10000000000L,
324 25937424601L,
324 25937424601L,
325 61917364224L,
325 61917364224L,
326 137858491849L,
326 137858491849L,
327 289254654976L]
327 289254654976L]
328
328
329 The :meth:`map` method has the same signature as Python's builtin :func:`map`
329 The :meth:`map` method has the same signature as Python's builtin :func:`map`
330 function, but runs the calculation in parallel. More involved examples of using
330 function, but runs the calculation in parallel. More involved examples of using
331 :class:`MultiEngineClient` are provided in the examples that follow.
331 :class:`MultiEngineClient` are provided in the examples that follow.
332
332
333 .. image:: mec_simple.*
333 .. image:: mec_simple.*
334
334
General Comments 0
You need to be logged in to leave comments. Login now