##// END OF EJS Templates
Merge remote branch 'trunk/newparallel'
MinRK -
r3676:98436f1b merge
parent child Browse files
Show More

The requested changes are too big and content was truncated. Show full diff

1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
@@ -0,0 +1,90 b''
1 #!/usr/bin/env python
2
3 #
4 # This file is adapted from a paramiko demo, and thus licensed under LGPL 2.1.
5 # Original Copyright (C) 2003-2007 Robey Pointer <robeypointer@gmail.com>
6 # Edits Copyright (C) 2010 The IPython Team
7 #
8 # Paramiko is free software; you can redistribute it and/or modify it under the
9 # terms of the GNU Lesser General Public License as published by the Free
10 # Software Foundation; either version 2.1 of the License, or (at your option)
11 # any later version.
12 #
13 # Paramiko is distrubuted in the hope that it will be useful, but WITHOUT ANY
14 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
15 # A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
16 # details.
17 #
18 # You should have received a copy of the GNU Lesser General Public License
19 # along with Paramiko; if not, write to the Free Software Foundation, Inc.,
20 # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
21
22 """
23 Sample script showing how to do local port forwarding over paramiko.
24
25 This script connects to the requested SSH server and sets up local port
26 forwarding (the openssh -L option) from a local port through a tunneled
27 connection to a destination reachable from the SSH server machine.
28 """
29
30 from __future__ import print_function
31
32 import logging
33 import select
34 import SocketServer
35
36 logger = logging.getLogger('ssh')
37
38 class ForwardServer (SocketServer.ThreadingTCPServer):
39 daemon_threads = True
40 allow_reuse_address = True
41
42
43 class Handler (SocketServer.BaseRequestHandler):
44
45 def handle(self):
46 try:
47 chan = self.ssh_transport.open_channel('direct-tcpip',
48 (self.chain_host, self.chain_port),
49 self.request.getpeername())
50 except Exception, e:
51 logger.debug('Incoming request to %s:%d failed: %s' % (self.chain_host,
52 self.chain_port,
53 repr(e)))
54 return
55 if chan is None:
56 logger.debug('Incoming request to %s:%d was rejected by the SSH server.' %
57 (self.chain_host, self.chain_port))
58 return
59
60 logger.debug('Connected! Tunnel open %r -> %r -> %r' % (self.request.getpeername(),
61 chan.getpeername(), (self.chain_host, self.chain_port)))
62 while True:
63 r, w, x = select.select([self.request, chan], [], [])
64 if self.request in r:
65 data = self.request.recv(1024)
66 if len(data) == 0:
67 break
68 chan.send(data)
69 if chan in r:
70 data = chan.recv(1024)
71 if len(data) == 0:
72 break
73 self.request.send(data)
74 chan.close()
75 self.request.close()
76 logger.debug('Tunnel closed ')
77
78
79 def forward_tunnel(local_port, remote_host, remote_port, transport):
80 # this is a little convoluted, but lets me configure things for the Handler
81 # object. (SocketServer doesn't give Handlers any way to access the outer
82 # server normally.)
83 class SubHander (Handler):
84 chain_host = remote_host
85 chain_port = remote_port
86 ssh_transport = transport
87 ForwardServer(('127.0.0.1', local_port), SubHander).serve_forever()
88
89
90 __all__ = ['forward_tunnel'] No newline at end of file
@@ -0,0 +1,295 b''
1 """Basic ssh tunneling utilities."""
2
3 #-----------------------------------------------------------------------------
4 # Copyright (C) 2008-2010 The IPython Development Team
5 #
6 # Distributed under the terms of the BSD License. The full license is in
7 # the file COPYING, distributed as part of this software.
8 #-----------------------------------------------------------------------------
9
10
11
12 #-----------------------------------------------------------------------------
13 # Imports
14 #-----------------------------------------------------------------------------
15
16 from __future__ import print_function
17
18 import os,sys, atexit
19 from multiprocessing import Process
20 from getpass import getpass, getuser
21 import warnings
22
23 try:
24 with warnings.catch_warnings():
25 warnings.simplefilter('ignore', DeprecationWarning)
26 import paramiko
27 except ImportError:
28 paramiko = None
29 else:
30 from forward import forward_tunnel
31
32 try:
33 from IPython.external import pexpect
34 except ImportError:
35 pexpect = None
36
37 from IPython.parallel.util import select_random_ports
38
39 #-----------------------------------------------------------------------------
40 # Code
41 #-----------------------------------------------------------------------------
42
43 #-----------------------------------------------------------------------------
44 # Check for passwordless login
45 #-----------------------------------------------------------------------------
46
47 def try_passwordless_ssh(server, keyfile, paramiko=None):
48 """Attempt to make an ssh connection without a password.
49 This is mainly used for requiring password input only once
50 when many tunnels may be connected to the same server.
51
52 If paramiko is None, the default for the platform is chosen.
53 """
54 if paramiko is None:
55 paramiko = sys.platform == 'win32'
56 if not paramiko:
57 f = _try_passwordless_openssh
58 else:
59 f = _try_passwordless_paramiko
60 return f(server, keyfile)
61
62 def _try_passwordless_openssh(server, keyfile):
63 """Try passwordless login with shell ssh command."""
64 if pexpect is None:
65 raise ImportError("pexpect unavailable, use paramiko")
66 cmd = 'ssh -f '+ server
67 if keyfile:
68 cmd += ' -i ' + keyfile
69 cmd += ' exit'
70 p = pexpect.spawn(cmd)
71 while True:
72 try:
73 p.expect('[Ppassword]:', timeout=.1)
74 except pexpect.TIMEOUT:
75 continue
76 except pexpect.EOF:
77 return True
78 else:
79 return False
80
81 def _try_passwordless_paramiko(server, keyfile):
82 """Try passwordless login with paramiko."""
83 if paramiko is None:
84 raise ImportError("paramiko unavailable, use openssh")
85 username, server, port = _split_server(server)
86 client = paramiko.SSHClient()
87 client.load_system_host_keys()
88 client.set_missing_host_key_policy(paramiko.WarningPolicy())
89 try:
90 client.connect(server, port, username=username, key_filename=keyfile,
91 look_for_keys=True)
92 except paramiko.AuthenticationException:
93 return False
94 else:
95 client.close()
96 return True
97
98
99 def tunnel_connection(socket, addr, server, keyfile=None, password=None, paramiko=None):
100 """Connect a socket to an address via an ssh tunnel.
101
102 This is a wrapper for socket.connect(addr), when addr is not accessible
103 from the local machine. It simply creates an ssh tunnel using the remaining args,
104 and calls socket.connect('tcp://localhost:lport') where lport is the randomly
105 selected local port of the tunnel.
106
107 """
108 lport = select_random_ports(1)[0]
109 transport, addr = addr.split('://')
110 ip,rport = addr.split(':')
111 rport = int(rport)
112 if paramiko is None:
113 paramiko = sys.platform == 'win32'
114 if paramiko:
115 tunnelf = paramiko_tunnel
116 else:
117 tunnelf = openssh_tunnel
118 tunnel = tunnelf(lport, rport, server, remoteip=ip, keyfile=keyfile, password=password)
119 socket.connect('tcp://127.0.0.1:%i'%lport)
120 return tunnel
121
122 def openssh_tunnel(lport, rport, server, remoteip='127.0.0.1', keyfile=None, password=None, timeout=15):
123 """Create an ssh tunnel using command-line ssh that connects port lport
124 on this machine to localhost:rport on server. The tunnel
125 will automatically close when not in use, remaining open
126 for a minimum of timeout seconds for an initial connection.
127
128 This creates a tunnel redirecting `localhost:lport` to `remoteip:rport`,
129 as seen from `server`.
130
131 keyfile and password may be specified, but ssh config is checked for defaults.
132
133 Parameters
134 ----------
135
136 lport : int
137 local port for connecting to the tunnel from this machine.
138 rport : int
139 port on the remote machine to connect to.
140 server : str
141 The ssh server to connect to. The full ssh server string will be parsed.
142 user@server:port
143 remoteip : str [Default: 127.0.0.1]
144 The remote ip, specifying the destination of the tunnel.
145 Default is localhost, which means that the tunnel would redirect
146 localhost:lport on this machine to localhost:rport on the *server*.
147
148 keyfile : str; path to public key file
149 This specifies a key to be used in ssh login, default None.
150 Regular default ssh keys will be used without specifying this argument.
151 password : str;
152 Your ssh password to the ssh server. Note that if this is left None,
153 you will be prompted for it if passwordless key based login is unavailable.
154
155 """
156 if pexpect is None:
157 raise ImportError("pexpect unavailable, use paramiko_tunnel")
158 ssh="ssh "
159 if keyfile:
160 ssh += "-i " + keyfile
161 cmd = ssh + " -f -L 127.0.0.1:%i:%s:%i %s sleep %i"%(lport, remoteip, rport, server, timeout)
162 tunnel = pexpect.spawn(cmd)
163 failed = False
164 while True:
165 try:
166 tunnel.expect('[Pp]assword:', timeout=.1)
167 except pexpect.TIMEOUT:
168 continue
169 except pexpect.EOF:
170 if tunnel.exitstatus:
171 print (tunnel.exitstatus)
172 print (tunnel.before)
173 print (tunnel.after)
174 raise RuntimeError("tunnel '%s' failed to start"%(cmd))
175 else:
176 return tunnel.pid
177 else:
178 if failed:
179 print("Password rejected, try again")
180 password=None
181 if password is None:
182 password = getpass("%s's password: "%(server))
183 tunnel.sendline(password)
184 failed = True
185
186 def _split_server(server):
187 if '@' in server:
188 username,server = server.split('@', 1)
189 else:
190 username = getuser()
191 if ':' in server:
192 server, port = server.split(':')
193 port = int(port)
194 else:
195 port = 22
196 return username, server, port
197
198 def paramiko_tunnel(lport, rport, server, remoteip='127.0.0.1', keyfile=None, password=None, timeout=15):
199 """launch a tunner with paramiko in a subprocess. This should only be used
200 when shell ssh is unavailable (e.g. Windows).
201
202 This creates a tunnel redirecting `localhost:lport` to `remoteip:rport`,
203 as seen from `server`.
204
205 If you are familiar with ssh tunnels, this creates the tunnel:
206
207 ssh server -L localhost:lport:remoteip:rport
208
209 keyfile and password may be specified, but ssh config is checked for defaults.
210
211
212 Parameters
213 ----------
214
215 lport : int
216 local port for connecting to the tunnel from this machine.
217 rport : int
218 port on the remote machine to connect to.
219 server : str
220 The ssh server to connect to. The full ssh server string will be parsed.
221 user@server:port
222 remoteip : str [Default: 127.0.0.1]
223 The remote ip, specifying the destination of the tunnel.
224 Default is localhost, which means that the tunnel would redirect
225 localhost:lport on this machine to localhost:rport on the *server*.
226
227 keyfile : str; path to public key file
228 This specifies a key to be used in ssh login, default None.
229 Regular default ssh keys will be used without specifying this argument.
230 password : str;
231 Your ssh password to the ssh server. Note that if this is left None,
232 you will be prompted for it if passwordless key based login is unavailable.
233
234 """
235 if paramiko is None:
236 raise ImportError("Paramiko not available")
237
238 if password is None:
239 if not _check_passwordless_paramiko(server, keyfile):
240 password = getpass("%s's password: "%(server))
241
242 p = Process(target=_paramiko_tunnel,
243 args=(lport, rport, server, remoteip),
244 kwargs=dict(keyfile=keyfile, password=password))
245 p.daemon=False
246 p.start()
247 atexit.register(_shutdown_process, p)
248 return p
249
250 def _shutdown_process(p):
251 if p.isalive():
252 p.terminate()
253
254 def _paramiko_tunnel(lport, rport, server, remoteip, keyfile=None, password=None):
255 """Function for actually starting a paramiko tunnel, to be passed
256 to multiprocessing.Process(target=this), and not called directly.
257 """
258 username, server, port = _split_server(server)
259 client = paramiko.SSHClient()
260 client.load_system_host_keys()
261 client.set_missing_host_key_policy(paramiko.WarningPolicy())
262
263 try:
264 client.connect(server, port, username=username, key_filename=keyfile,
265 look_for_keys=True, password=password)
266 # except paramiko.AuthenticationException:
267 # if password is None:
268 # password = getpass("%s@%s's password: "%(username, server))
269 # client.connect(server, port, username=username, password=password)
270 # else:
271 # raise
272 except Exception as e:
273 print ('*** Failed to connect to %s:%d: %r' % (server, port, e))
274 sys.exit(1)
275
276 # print ('Now forwarding port %d to %s:%d ...' % (lport, server, rport))
277
278 try:
279 forward_tunnel(lport, remoteip, rport, client.get_transport())
280 except KeyboardInterrupt:
281 print ('SIGINT: Port forwarding stopped cleanly')
282 sys.exit(0)
283 except Exception as e:
284 print ("Port forwarding stopped uncleanly: %s"%e)
285 sys.exit(255)
286
287 if sys.platform == 'win32':
288 ssh_tunnel = paramiko_tunnel
289 else:
290 ssh_tunnel = openssh_tunnel
291
292
293 __all__ = ['tunnel_connection', 'ssh_tunnel', 'openssh_tunnel', 'paramiko_tunnel', 'try_passwordless_ssh']
294
295
@@ -0,0 +1,26 b''
1 """The IPython ZMQ-based parallel computing interface."""
2 #-----------------------------------------------------------------------------
3 # Copyright (C) 2011 The IPython Development Team
4 #
5 # Distributed under the terms of the BSD License. The full license is in
6 # the file COPYING, distributed as part of this software.
7 #-----------------------------------------------------------------------------
8
9 #-----------------------------------------------------------------------------
10 # Imports
11 #-----------------------------------------------------------------------------
12
13 import zmq
14
15 if zmq.__version__ < '2.1.4':
16 raise ImportError("IPython.parallel requires pyzmq/0MQ >= 2.1.4, you appear to have %s"%zmq.__version__)
17
18 from IPython.utils.pickleutil import Reference
19
20 from .client.asyncresult import *
21 from .client.client import Client
22 from .client.remotefunction import *
23 from .client.view import *
24 from .controller.dependency import *
25
26
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
This diff has been collapsed as it changes many lines, (537 lines changed) Show them Hide them
@@ -0,0 +1,537 b''
1 #!/usr/bin/env python
2 # encoding: utf-8
3 """
4 The IPython cluster directory
5 """
6
7 #-----------------------------------------------------------------------------
8 # Copyright (C) 2008-2009 The IPython Development Team
9 #
10 # Distributed under the terms of the BSD License. The full license is in
11 # the file COPYING, distributed as part of this software.
12 #-----------------------------------------------------------------------------
13
14 #-----------------------------------------------------------------------------
15 # Imports
16 #-----------------------------------------------------------------------------
17
18 from __future__ import with_statement
19
20 import os
21 import logging
22 import re
23 import shutil
24 import sys
25
26 from IPython.config.loader import PyFileConfigLoader
27 from IPython.config.configurable import Configurable
28 from IPython.core.application import Application, BaseAppConfigLoader
29 from IPython.core.crashhandler import CrashHandler
30 from IPython.core import release
31 from IPython.utils.path import (
32 get_ipython_package_dir,
33 expand_path
34 )
35 from IPython.utils.traitlets import Unicode
36
37 #-----------------------------------------------------------------------------
38 # Module errors
39 #-----------------------------------------------------------------------------
40
41 class ClusterDirError(Exception):
42 pass
43
44
45 class PIDFileError(Exception):
46 pass
47
48
49 #-----------------------------------------------------------------------------
50 # Class for managing cluster directories
51 #-----------------------------------------------------------------------------
52
53 class ClusterDir(Configurable):
54 """An object to manage the cluster directory and its resources.
55
56 The cluster directory is used by :command:`ipengine`,
57 :command:`ipcontroller` and :command:`ipclsuter` to manage the
58 configuration, logging and security of these applications.
59
60 This object knows how to find, create and manage these directories. This
61 should be used by any code that want's to handle cluster directories.
62 """
63
64 security_dir_name = Unicode('security')
65 log_dir_name = Unicode('log')
66 pid_dir_name = Unicode('pid')
67 security_dir = Unicode(u'')
68 log_dir = Unicode(u'')
69 pid_dir = Unicode(u'')
70 location = Unicode(u'')
71
72 def __init__(self, location=u''):
73 super(ClusterDir, self).__init__(location=location)
74
75 def _location_changed(self, name, old, new):
76 if not os.path.isdir(new):
77 os.makedirs(new)
78 self.security_dir = os.path.join(new, self.security_dir_name)
79 self.log_dir = os.path.join(new, self.log_dir_name)
80 self.pid_dir = os.path.join(new, self.pid_dir_name)
81 self.check_dirs()
82
83 def _log_dir_changed(self, name, old, new):
84 self.check_log_dir()
85
86 def check_log_dir(self):
87 if not os.path.isdir(self.log_dir):
88 os.mkdir(self.log_dir)
89
90 def _security_dir_changed(self, name, old, new):
91 self.check_security_dir()
92
93 def check_security_dir(self):
94 if not os.path.isdir(self.security_dir):
95 os.mkdir(self.security_dir, 0700)
96 os.chmod(self.security_dir, 0700)
97
98 def _pid_dir_changed(self, name, old, new):
99 self.check_pid_dir()
100
101 def check_pid_dir(self):
102 if not os.path.isdir(self.pid_dir):
103 os.mkdir(self.pid_dir, 0700)
104 os.chmod(self.pid_dir, 0700)
105
106 def check_dirs(self):
107 self.check_security_dir()
108 self.check_log_dir()
109 self.check_pid_dir()
110
111 def load_config_file(self, filename):
112 """Load a config file from the top level of the cluster dir.
113
114 Parameters
115 ----------
116 filename : unicode or str
117 The filename only of the config file that must be located in
118 the top-level of the cluster directory.
119 """
120 loader = PyFileConfigLoader(filename, self.location)
121 return loader.load_config()
122
123 def copy_config_file(self, config_file, path=None, overwrite=False):
124 """Copy a default config file into the active cluster directory.
125
126 Default configuration files are kept in :mod:`IPython.config.default`.
127 This function moves these from that location to the working cluster
128 directory.
129 """
130 if path is None:
131 import IPython.config.default
132 path = IPython.config.default.__file__.split(os.path.sep)[:-1]
133 path = os.path.sep.join(path)
134 src = os.path.join(path, config_file)
135 dst = os.path.join(self.location, config_file)
136 if not os.path.isfile(dst) or overwrite:
137 shutil.copy(src, dst)
138
139 def copy_all_config_files(self, path=None, overwrite=False):
140 """Copy all config files into the active cluster directory."""
141 for f in [u'ipcontroller_config.py', u'ipengine_config.py',
142 u'ipcluster_config.py']:
143 self.copy_config_file(f, path=path, overwrite=overwrite)
144
145 @classmethod
146 def create_cluster_dir(csl, cluster_dir):
147 """Create a new cluster directory given a full path.
148
149 Parameters
150 ----------
151 cluster_dir : str
152 The full path to the cluster directory. If it does exist, it will
153 be used. If not, it will be created.
154 """
155 return ClusterDir(location=cluster_dir)
156
157 @classmethod
158 def create_cluster_dir_by_profile(cls, path, profile=u'default'):
159 """Create a cluster dir by profile name and path.
160
161 Parameters
162 ----------
163 path : str
164 The path (directory) to put the cluster directory in.
165 profile : str
166 The name of the profile. The name of the cluster directory will
167 be "cluster_<profile>".
168 """
169 if not os.path.isdir(path):
170 raise ClusterDirError('Directory not found: %s' % path)
171 cluster_dir = os.path.join(path, u'cluster_' + profile)
172 return ClusterDir(location=cluster_dir)
173
174 @classmethod
175 def find_cluster_dir_by_profile(cls, ipython_dir, profile=u'default'):
176 """Find an existing cluster dir by profile name, return its ClusterDir.
177
178 This searches through a sequence of paths for a cluster dir. If it
179 is not found, a :class:`ClusterDirError` exception will be raised.
180
181 The search path algorithm is:
182 1. ``os.getcwd()``
183 2. ``ipython_dir``
184 3. The directories found in the ":" separated
185 :env:`IPCLUSTER_DIR_PATH` environment variable.
186
187 Parameters
188 ----------
189 ipython_dir : unicode or str
190 The IPython directory to use.
191 profile : unicode or str
192 The name of the profile. The name of the cluster directory
193 will be "cluster_<profile>".
194 """
195 dirname = u'cluster_' + profile
196 cluster_dir_paths = os.environ.get('IPCLUSTER_DIR_PATH','')
197 if cluster_dir_paths:
198 cluster_dir_paths = cluster_dir_paths.split(':')
199 else:
200 cluster_dir_paths = []
201 paths = [os.getcwd(), ipython_dir] + cluster_dir_paths
202 for p in paths:
203 cluster_dir = os.path.join(p, dirname)
204 if os.path.isdir(cluster_dir):
205 return ClusterDir(location=cluster_dir)
206 else:
207 raise ClusterDirError('Cluster directory not found in paths: %s' % dirname)
208
209 @classmethod
210 def find_cluster_dir(cls, cluster_dir):
211 """Find/create a cluster dir and return its ClusterDir.
212
213 This will create the cluster directory if it doesn't exist.
214
215 Parameters
216 ----------
217 cluster_dir : unicode or str
218 The path of the cluster directory. This is expanded using
219 :func:`IPython.utils.genutils.expand_path`.
220 """
221 cluster_dir = expand_path(cluster_dir)
222 if not os.path.isdir(cluster_dir):
223 raise ClusterDirError('Cluster directory not found: %s' % cluster_dir)
224 return ClusterDir(location=cluster_dir)
225
226
227 #-----------------------------------------------------------------------------
228 # Command line options
229 #-----------------------------------------------------------------------------
230
231 class ClusterDirConfigLoader(BaseAppConfigLoader):
232
233 def _add_cluster_profile(self, parser):
234 paa = parser.add_argument
235 paa('-p', '--profile',
236 dest='Global.profile',type=unicode,
237 help=
238 """The string name of the profile to be used. This determines the name
239 of the cluster dir as: cluster_<profile>. The default profile is named
240 'default'. The cluster directory is resolve this way if the
241 --cluster-dir option is not used.""",
242 metavar='Global.profile')
243
244 def _add_cluster_dir(self, parser):
245 paa = parser.add_argument
246 paa('--cluster-dir',
247 dest='Global.cluster_dir',type=unicode,
248 help="""Set the cluster dir. This overrides the logic used by the
249 --profile option.""",
250 metavar='Global.cluster_dir')
251
252 def _add_work_dir(self, parser):
253 paa = parser.add_argument
254 paa('--work-dir',
255 dest='Global.work_dir',type=unicode,
256 help='Set the working dir for the process.',
257 metavar='Global.work_dir')
258
259 def _add_clean_logs(self, parser):
260 paa = parser.add_argument
261 paa('--clean-logs',
262 dest='Global.clean_logs', action='store_true',
263 help='Delete old log flies before starting.')
264
265 def _add_no_clean_logs(self, parser):
266 paa = parser.add_argument
267 paa('--no-clean-logs',
268 dest='Global.clean_logs', action='store_false',
269 help="Don't Delete old log flies before starting.")
270
271 def _add_arguments(self):
272 super(ClusterDirConfigLoader, self)._add_arguments()
273 self._add_cluster_profile(self.parser)
274 self._add_cluster_dir(self.parser)
275 self._add_work_dir(self.parser)
276 self._add_clean_logs(self.parser)
277 self._add_no_clean_logs(self.parser)
278
279
280 #-----------------------------------------------------------------------------
281 # Crash handler for this application
282 #-----------------------------------------------------------------------------
283
284
285 _message_template = """\
286 Oops, $self.app_name crashed. We do our best to make it stable, but...
287
288 A crash report was automatically generated with the following information:
289 - A verbatim copy of the crash traceback.
290 - Data on your current $self.app_name configuration.
291
292 It was left in the file named:
293 \t'$self.crash_report_fname'
294 If you can email this file to the developers, the information in it will help
295 them in understanding and correcting the problem.
296
297 You can mail it to: $self.contact_name at $self.contact_email
298 with the subject '$self.app_name Crash Report'.
299
300 If you want to do it now, the following command will work (under Unix):
301 mail -s '$self.app_name Crash Report' $self.contact_email < $self.crash_report_fname
302
303 To ensure accurate tracking of this issue, please file a report about it at:
304 $self.bug_tracker
305 """
306
307 class ClusterDirCrashHandler(CrashHandler):
308 """sys.excepthook for IPython itself, leaves a detailed report on disk."""
309
310 message_template = _message_template
311
312 def __init__(self, app):
313 contact_name = release.authors['Brian'][0]
314 contact_email = release.authors['Brian'][1]
315 bug_tracker = 'http://github.com/ipython/ipython/issues'
316 super(ClusterDirCrashHandler,self).__init__(
317 app, contact_name, contact_email, bug_tracker
318 )
319
320
321 #-----------------------------------------------------------------------------
322 # Main application
323 #-----------------------------------------------------------------------------
324
325 class ApplicationWithClusterDir(Application):
326 """An application that puts everything into a cluster directory.
327
328 Instead of looking for things in the ipython_dir, this type of application
329 will use its own private directory called the "cluster directory"
330 for things like config files, log files, etc.
331
332 The cluster directory is resolved as follows:
333
334 * If the ``--cluster-dir`` option is given, it is used.
335 * If ``--cluster-dir`` is not given, the application directory is
336 resolve using the profile name as ``cluster_<profile>``. The search
337 path for this directory is then i) cwd if it is found there
338 and ii) in ipython_dir otherwise.
339
340 The config file for the application is to be put in the cluster
341 dir and named the value of the ``config_file_name`` class attribute.
342 """
343
344 command_line_loader = ClusterDirConfigLoader
345 crash_handler_class = ClusterDirCrashHandler
346 auto_create_cluster_dir = True
347 # temporarily override default_log_level to INFO
348 default_log_level = logging.INFO
349
350 def create_default_config(self):
351 super(ApplicationWithClusterDir, self).create_default_config()
352 self.default_config.Global.profile = u'default'
353 self.default_config.Global.cluster_dir = u''
354 self.default_config.Global.work_dir = os.getcwd()
355 self.default_config.Global.log_to_file = False
356 self.default_config.Global.log_url = None
357 self.default_config.Global.clean_logs = False
358
359 def find_resources(self):
360 """This resolves the cluster directory.
361
362 This tries to find the cluster directory and if successful, it will
363 have done:
364 * Sets ``self.cluster_dir_obj`` to the :class:`ClusterDir` object for
365 the application.
366 * Sets ``self.cluster_dir`` attribute of the application and config
367 objects.
368
369 The algorithm used for this is as follows:
370 1. Try ``Global.cluster_dir``.
371 2. Try using ``Global.profile``.
372 3. If both of these fail and ``self.auto_create_cluster_dir`` is
373 ``True``, then create the new cluster dir in the IPython directory.
374 4. If all fails, then raise :class:`ClusterDirError`.
375 """
376
377 try:
378 cluster_dir = self.command_line_config.Global.cluster_dir
379 except AttributeError:
380 cluster_dir = self.default_config.Global.cluster_dir
381 cluster_dir = expand_path(cluster_dir)
382 try:
383 self.cluster_dir_obj = ClusterDir.find_cluster_dir(cluster_dir)
384 except ClusterDirError:
385 pass
386 else:
387 self.log.info('Using existing cluster dir: %s' % \
388 self.cluster_dir_obj.location
389 )
390 self.finish_cluster_dir()
391 return
392
393 try:
394 self.profile = self.command_line_config.Global.profile
395 except AttributeError:
396 self.profile = self.default_config.Global.profile
397 try:
398 self.cluster_dir_obj = ClusterDir.find_cluster_dir_by_profile(
399 self.ipython_dir, self.profile)
400 except ClusterDirError:
401 pass
402 else:
403 self.log.info('Using existing cluster dir: %s' % \
404 self.cluster_dir_obj.location
405 )
406 self.finish_cluster_dir()
407 return
408
409 if self.auto_create_cluster_dir:
410 self.cluster_dir_obj = ClusterDir.create_cluster_dir_by_profile(
411 self.ipython_dir, self.profile
412 )
413 self.log.info('Creating new cluster dir: %s' % \
414 self.cluster_dir_obj.location
415 )
416 self.finish_cluster_dir()
417 else:
418 raise ClusterDirError('Could not find a valid cluster directory.')
419
420 def finish_cluster_dir(self):
421 # Set the cluster directory
422 self.cluster_dir = self.cluster_dir_obj.location
423
424 # These have to be set because they could be different from the one
425 # that we just computed. Because command line has the highest
426 # priority, this will always end up in the master_config.
427 self.default_config.Global.cluster_dir = self.cluster_dir
428 self.command_line_config.Global.cluster_dir = self.cluster_dir
429
430 def find_config_file_name(self):
431 """Find the config file name for this application."""
432 # For this type of Application it should be set as a class attribute.
433 if not hasattr(self, 'default_config_file_name'):
434 self.log.critical("No config filename found")
435 else:
436 self.config_file_name = self.default_config_file_name
437
438 def find_config_file_paths(self):
439 # Set the search path to to the cluster directory. We should NOT
440 # include IPython.config.default here as the default config files
441 # are ALWAYS automatically moved to the cluster directory.
442 conf_dir = os.path.join(get_ipython_package_dir(), 'config', 'default')
443 self.config_file_paths = (self.cluster_dir,)
444
445 def pre_construct(self):
446 # The log and security dirs were set earlier, but here we put them
447 # into the config and log them.
448 config = self.master_config
449 sdir = self.cluster_dir_obj.security_dir
450 self.security_dir = config.Global.security_dir = sdir
451 ldir = self.cluster_dir_obj.log_dir
452 self.log_dir = config.Global.log_dir = ldir
453 pdir = self.cluster_dir_obj.pid_dir
454 self.pid_dir = config.Global.pid_dir = pdir
455 self.log.info("Cluster directory set to: %s" % self.cluster_dir)
456 config.Global.work_dir = unicode(expand_path(config.Global.work_dir))
457 # Change to the working directory. We do this just before construct
458 # is called so all the components there have the right working dir.
459 self.to_work_dir()
460
461 def to_work_dir(self):
462 wd = self.master_config.Global.work_dir
463 if unicode(wd) != unicode(os.getcwd()):
464 os.chdir(wd)
465 self.log.info("Changing to working dir: %s" % wd)
466
467 def start_logging(self):
468 # Remove old log files
469 if self.master_config.Global.clean_logs:
470 log_dir = self.master_config.Global.log_dir
471 for f in os.listdir(log_dir):
472 if re.match(r'%s-\d+\.(log|err|out)'%self.name,f):
473 # if f.startswith(self.name + u'-') and f.endswith('.log'):
474 os.remove(os.path.join(log_dir, f))
475 # Start logging to the new log file
476 if self.master_config.Global.log_to_file:
477 log_filename = self.name + u'-' + str(os.getpid()) + u'.log'
478 logfile = os.path.join(self.log_dir, log_filename)
479 open_log_file = open(logfile, 'w')
480 elif self.master_config.Global.log_url:
481 open_log_file = None
482 else:
483 open_log_file = sys.stdout
484 if open_log_file is not None:
485 self.log.removeHandler(self._log_handler)
486 self._log_handler = logging.StreamHandler(open_log_file)
487 self._log_formatter = logging.Formatter("[%(name)s] %(message)s")
488 self._log_handler.setFormatter(self._log_formatter)
489 self.log.addHandler(self._log_handler)
490 # log.startLogging(open_log_file)
491
492 def write_pid_file(self, overwrite=False):
493 """Create a .pid file in the pid_dir with my pid.
494
495 This must be called after pre_construct, which sets `self.pid_dir`.
496 This raises :exc:`PIDFileError` if the pid file exists already.
497 """
498 pid_file = os.path.join(self.pid_dir, self.name + u'.pid')
499 if os.path.isfile(pid_file):
500 pid = self.get_pid_from_file()
501 if not overwrite:
502 raise PIDFileError(
503 'The pid file [%s] already exists. \nThis could mean that this '
504 'server is already running with [pid=%s].' % (pid_file, pid)
505 )
506 with open(pid_file, 'w') as f:
507 self.log.info("Creating pid file: %s" % pid_file)
508 f.write(repr(os.getpid())+'\n')
509
510 def remove_pid_file(self):
511 """Remove the pid file.
512
513 This should be called at shutdown by registering a callback with
514 :func:`reactor.addSystemEventTrigger`. This needs to return
515 ``None``.
516 """
517 pid_file = os.path.join(self.pid_dir, self.name + u'.pid')
518 if os.path.isfile(pid_file):
519 try:
520 self.log.info("Removing pid file: %s" % pid_file)
521 os.remove(pid_file)
522 except:
523 self.log.warn("Error removing the pid file: %s" % pid_file)
524
525 def get_pid_from_file(self):
526 """Get the pid from the pid file.
527
528 If the pid file doesn't exist a :exc:`PIDFileError` is raised.
529 """
530 pid_file = os.path.join(self.pid_dir, self.name + u'.pid')
531 if os.path.isfile(pid_file):
532 with open(pid_file, 'r') as f:
533 pid = int(f.read().strip())
534 return pid
535 else:
536 raise PIDFileError('pid file not found: %s' % pid_file)
537
This diff has been collapsed as it changes many lines, (592 lines changed) Show them Hide them
@@ -0,0 +1,592 b''
1 #!/usr/bin/env python
2 # encoding: utf-8
3 """
4 The ipcluster application.
5 """
6
7 #-----------------------------------------------------------------------------
8 # Copyright (C) 2008-2009 The IPython Development Team
9 #
10 # Distributed under the terms of the BSD License. The full license is in
11 # the file COPYING, distributed as part of this software.
12 #-----------------------------------------------------------------------------
13
14 #-----------------------------------------------------------------------------
15 # Imports
16 #-----------------------------------------------------------------------------
17
18 import errno
19 import logging
20 import os
21 import re
22 import signal
23
24 import zmq
25 from zmq.eventloop import ioloop
26
27 from IPython.external.argparse import ArgumentParser, SUPPRESS
28 from IPython.utils.importstring import import_item
29 from .clusterdir import (
30 ApplicationWithClusterDir, ClusterDirConfigLoader,
31 ClusterDirError, PIDFileError
32 )
33
34
35 #-----------------------------------------------------------------------------
36 # Module level variables
37 #-----------------------------------------------------------------------------
38
39
40 default_config_file_name = u'ipcluster_config.py'
41
42
43 _description = """\
44 Start an IPython cluster for parallel computing.\n\n
45
46 An IPython cluster consists of 1 controller and 1 or more engines.
47 This command automates the startup of these processes using a wide
48 range of startup methods (SSH, local processes, PBS, mpiexec,
49 Windows HPC Server 2008). To start a cluster with 4 engines on your
50 local host simply do 'ipcluster start -n 4'. For more complex usage
51 you will typically do 'ipcluster create -p mycluster', then edit
52 configuration files, followed by 'ipcluster start -p mycluster -n 4'.
53 """
54
55
56 # Exit codes for ipcluster
57
58 # This will be the exit code if the ipcluster appears to be running because
59 # a .pid file exists
60 ALREADY_STARTED = 10
61
62
63 # This will be the exit code if ipcluster stop is run, but there is not .pid
64 # file to be found.
65 ALREADY_STOPPED = 11
66
67 # This will be the exit code if ipcluster engines is run, but there is not .pid
68 # file to be found.
69 NO_CLUSTER = 12
70
71
72 #-----------------------------------------------------------------------------
73 # Command line options
74 #-----------------------------------------------------------------------------
75
76
77 class IPClusterAppConfigLoader(ClusterDirConfigLoader):
78
79 def _add_arguments(self):
80 # Don't call ClusterDirConfigLoader._add_arguments as we don't want
81 # its defaults on self.parser. Instead, we will put those on
82 # default options on our subparsers.
83
84 # This has all the common options that all subcommands use
85 parent_parser1 = ArgumentParser(
86 add_help=False,
87 argument_default=SUPPRESS
88 )
89 self._add_ipython_dir(parent_parser1)
90 self._add_log_level(parent_parser1)
91
92 # This has all the common options that other subcommands use
93 parent_parser2 = ArgumentParser(
94 add_help=False,
95 argument_default=SUPPRESS
96 )
97 self._add_cluster_profile(parent_parser2)
98 self._add_cluster_dir(parent_parser2)
99 self._add_work_dir(parent_parser2)
100 paa = parent_parser2.add_argument
101 paa('--log-to-file',
102 action='store_true', dest='Global.log_to_file',
103 help='Log to a file in the log directory (default is stdout)')
104
105 # Create the object used to create the subparsers.
106 subparsers = self.parser.add_subparsers(
107 dest='Global.subcommand',
108 title='ipcluster subcommands',
109 description=
110 """ipcluster has a variety of subcommands. The general way of
111 running ipcluster is 'ipcluster <cmd> [options]'. To get help
112 on a particular subcommand do 'ipcluster <cmd> -h'."""
113 # help="For more help, type 'ipcluster <cmd> -h'",
114 )
115
116 # The "list" subcommand parser
117 parser_list = subparsers.add_parser(
118 'list',
119 parents=[parent_parser1],
120 argument_default=SUPPRESS,
121 help="List all clusters in cwd and ipython_dir.",
122 description=
123 """List all available clusters, by cluster directory, that can
124 be found in the current working directly or in the ipython
125 directory. Cluster directories are named using the convention
126 'cluster_<profile>'."""
127 )
128
129 # The "create" subcommand parser
130 parser_create = subparsers.add_parser(
131 'create',
132 parents=[parent_parser1, parent_parser2],
133 argument_default=SUPPRESS,
134 help="Create a new cluster directory.",
135 description=
136 """Create an ipython cluster directory by its profile name or
137 cluster directory path. Cluster directories contain
138 configuration, log and security related files and are named
139 using the convention 'cluster_<profile>'. By default they are
140 located in your ipython directory. Once created, you will
141 probably need to edit the configuration files in the cluster
142 directory to configure your cluster. Most users will create a
143 cluster directory by profile name,
144 'ipcluster create -p mycluster', which will put the directory
145 in '<ipython_dir>/cluster_mycluster'.
146 """
147 )
148 paa = parser_create.add_argument
149 paa('--reset-config',
150 dest='Global.reset_config', action='store_true',
151 help=
152 """Recopy the default config files to the cluster directory.
153 You will loose any modifications you have made to these files.""")
154
155 # The "start" subcommand parser
156 parser_start = subparsers.add_parser(
157 'start',
158 parents=[parent_parser1, parent_parser2],
159 argument_default=SUPPRESS,
160 help="Start a cluster.",
161 description=
162 """Start an ipython cluster by its profile name or cluster
163 directory. Cluster directories contain configuration, log and
164 security related files and are named using the convention
165 'cluster_<profile>' and should be creating using the 'start'
166 subcommand of 'ipcluster'. If your cluster directory is in
167 the cwd or the ipython directory, you can simply refer to it
168 using its profile name, 'ipcluster start -n 4 -p <profile>`,
169 otherwise use the '--cluster-dir' option.
170 """
171 )
172
173 paa = parser_start.add_argument
174 paa('-n', '--number',
175 type=int, dest='Global.n',
176 help='The number of engines to start.',
177 metavar='Global.n')
178 paa('--clean-logs',
179 dest='Global.clean_logs', action='store_true',
180 help='Delete old log flies before starting.')
181 paa('--no-clean-logs',
182 dest='Global.clean_logs', action='store_false',
183 help="Don't delete old log flies before starting.")
184 paa('--daemon',
185 dest='Global.daemonize', action='store_true',
186 help='Daemonize the ipcluster program. This implies --log-to-file')
187 paa('--no-daemon',
188 dest='Global.daemonize', action='store_false',
189 help="Dont't daemonize the ipcluster program.")
190 paa('--delay',
191 type=float, dest='Global.delay',
192 help="Specify the delay (in seconds) between starting the controller and starting the engine(s).")
193
194 # The "stop" subcommand parser
195 parser_stop = subparsers.add_parser(
196 'stop',
197 parents=[parent_parser1, parent_parser2],
198 argument_default=SUPPRESS,
199 help="Stop a running cluster.",
200 description=
201 """Stop a running ipython cluster by its profile name or cluster
202 directory. Cluster directories are named using the convention
203 'cluster_<profile>'. If your cluster directory is in
204 the cwd or the ipython directory, you can simply refer to it
205 using its profile name, 'ipcluster stop -p <profile>`, otherwise
206 use the '--cluster-dir' option.
207 """
208 )
209 paa = parser_stop.add_argument
210 paa('--signal',
211 dest='Global.signal', type=int,
212 help="The signal number to use in stopping the cluster (default=2).",
213 metavar="Global.signal")
214
215 # the "engines" subcommand parser
216 parser_engines = subparsers.add_parser(
217 'engines',
218 parents=[parent_parser1, parent_parser2],
219 argument_default=SUPPRESS,
220 help="Attach some engines to an existing controller or cluster.",
221 description=
222 """Start one or more engines to connect to an existing Cluster
223 by profile name or cluster directory.
224 Cluster directories contain configuration, log and
225 security related files and are named using the convention
226 'cluster_<profile>' and should be creating using the 'start'
227 subcommand of 'ipcluster'. If your cluster directory is in
228 the cwd or the ipython directory, you can simply refer to it
229 using its profile name, 'ipcluster engines -n 4 -p <profile>`,
230 otherwise use the '--cluster-dir' option.
231 """
232 )
233 paa = parser_engines.add_argument
234 paa('-n', '--number',
235 type=int, dest='Global.n',
236 help='The number of engines to start.',
237 metavar='Global.n')
238 paa('--daemon',
239 dest='Global.daemonize', action='store_true',
240 help='Daemonize the ipcluster program. This implies --log-to-file')
241 paa('--no-daemon',
242 dest='Global.daemonize', action='store_false',
243 help="Dont't daemonize the ipcluster program.")
244
245 #-----------------------------------------------------------------------------
246 # Main application
247 #-----------------------------------------------------------------------------
248
249
250 class IPClusterApp(ApplicationWithClusterDir):
251
252 name = u'ipcluster'
253 description = _description
254 usage = None
255 command_line_loader = IPClusterAppConfigLoader
256 default_config_file_name = default_config_file_name
257 default_log_level = logging.INFO
258 auto_create_cluster_dir = False
259
260 def create_default_config(self):
261 super(IPClusterApp, self).create_default_config()
262 self.default_config.Global.controller_launcher = \
263 'IPython.parallel.launcher.LocalControllerLauncher'
264 self.default_config.Global.engine_launcher = \
265 'IPython.parallel.launcher.LocalEngineSetLauncher'
266 self.default_config.Global.n = 2
267 self.default_config.Global.delay = 2
268 self.default_config.Global.reset_config = False
269 self.default_config.Global.clean_logs = True
270 self.default_config.Global.signal = signal.SIGINT
271 self.default_config.Global.daemonize = False
272
273 def find_resources(self):
274 subcommand = self.command_line_config.Global.subcommand
275 if subcommand=='list':
276 self.list_cluster_dirs()
277 # Exit immediately because there is nothing left to do.
278 self.exit()
279 elif subcommand=='create':
280 self.auto_create_cluster_dir = True
281 super(IPClusterApp, self).find_resources()
282 elif subcommand=='start' or subcommand=='stop':
283 self.auto_create_cluster_dir = True
284 try:
285 super(IPClusterApp, self).find_resources()
286 except ClusterDirError:
287 raise ClusterDirError(
288 "Could not find a cluster directory. A cluster dir must "
289 "be created before running 'ipcluster start'. Do "
290 "'ipcluster create -h' or 'ipcluster list -h' for more "
291 "information about creating and listing cluster dirs."
292 )
293 elif subcommand=='engines':
294 self.auto_create_cluster_dir = False
295 try:
296 super(IPClusterApp, self).find_resources()
297 except ClusterDirError:
298 raise ClusterDirError(
299 "Could not find a cluster directory. A cluster dir must "
300 "be created before running 'ipcluster start'. Do "
301 "'ipcluster create -h' or 'ipcluster list -h' for more "
302 "information about creating and listing cluster dirs."
303 )
304
305 def list_cluster_dirs(self):
306 # Find the search paths
307 cluster_dir_paths = os.environ.get('IPCLUSTER_DIR_PATH','')
308 if cluster_dir_paths:
309 cluster_dir_paths = cluster_dir_paths.split(':')
310 else:
311 cluster_dir_paths = []
312 try:
313 ipython_dir = self.command_line_config.Global.ipython_dir
314 except AttributeError:
315 ipython_dir = self.default_config.Global.ipython_dir
316 paths = [os.getcwd(), ipython_dir] + \
317 cluster_dir_paths
318 paths = list(set(paths))
319
320 self.log.info('Searching for cluster dirs in paths: %r' % paths)
321 for path in paths:
322 files = os.listdir(path)
323 for f in files:
324 full_path = os.path.join(path, f)
325 if os.path.isdir(full_path) and f.startswith('cluster_'):
326 profile = full_path.split('_')[-1]
327 start_cmd = 'ipcluster start -p %s -n 4' % profile
328 print start_cmd + " ==> " + full_path
329
330 def pre_construct(self):
331 # IPClusterApp.pre_construct() is where we cd to the working directory.
332 super(IPClusterApp, self).pre_construct()
333 config = self.master_config
334 try:
335 daemon = config.Global.daemonize
336 if daemon:
337 config.Global.log_to_file = True
338 except AttributeError:
339 pass
340
341 def construct(self):
342 config = self.master_config
343 subcmd = config.Global.subcommand
344 reset = config.Global.reset_config
345 if subcmd == 'list':
346 return
347 if subcmd == 'create':
348 self.log.info('Copying default config files to cluster directory '
349 '[overwrite=%r]' % (reset,))
350 self.cluster_dir_obj.copy_all_config_files(overwrite=reset)
351 if subcmd =='start':
352 self.cluster_dir_obj.copy_all_config_files(overwrite=False)
353 self.start_logging()
354 self.loop = ioloop.IOLoop.instance()
355 # reactor.callWhenRunning(self.start_launchers)
356 dc = ioloop.DelayedCallback(self.start_launchers, 0, self.loop)
357 dc.start()
358 if subcmd == 'engines':
359 self.start_logging()
360 self.loop = ioloop.IOLoop.instance()
361 # reactor.callWhenRunning(self.start_launchers)
362 engine_only = lambda : self.start_launchers(controller=False)
363 dc = ioloop.DelayedCallback(engine_only, 0, self.loop)
364 dc.start()
365
366 def start_launchers(self, controller=True):
367 config = self.master_config
368
369 # Create the launchers. In both bases, we set the work_dir of
370 # the launcher to the cluster_dir. This is where the launcher's
371 # subprocesses will be launched. It is not where the controller
372 # and engine will be launched.
373 if controller:
374 cl_class = import_item(config.Global.controller_launcher)
375 self.controller_launcher = cl_class(
376 work_dir=self.cluster_dir, config=config,
377 logname=self.log.name
378 )
379 # Setup the observing of stopping. If the controller dies, shut
380 # everything down as that will be completely fatal for the engines.
381 self.controller_launcher.on_stop(self.stop_launchers)
382 # But, we don't monitor the stopping of engines. An engine dying
383 # is just fine and in principle a user could start a new engine.
384 # Also, if we did monitor engine stopping, it is difficult to
385 # know what to do when only some engines die. Currently, the
386 # observing of engine stopping is inconsistent. Some launchers
387 # might trigger on a single engine stopping, other wait until
388 # all stop. TODO: think more about how to handle this.
389 else:
390 self.controller_launcher = None
391
392 el_class = import_item(config.Global.engine_launcher)
393 self.engine_launcher = el_class(
394 work_dir=self.cluster_dir, config=config, logname=self.log.name
395 )
396
397 # Setup signals
398 signal.signal(signal.SIGINT, self.sigint_handler)
399
400 # Start the controller and engines
401 self._stopping = False # Make sure stop_launchers is not called 2x.
402 if controller:
403 self.start_controller()
404 dc = ioloop.DelayedCallback(self.start_engines, 1000*config.Global.delay*controller, self.loop)
405 dc.start()
406 self.startup_message()
407
408 def startup_message(self, r=None):
409 self.log.info("IPython cluster: started")
410 return r
411
412 def start_controller(self, r=None):
413 # self.log.info("In start_controller")
414 config = self.master_config
415 d = self.controller_launcher.start(
416 cluster_dir=config.Global.cluster_dir
417 )
418 return d
419
420 def start_engines(self, r=None):
421 # self.log.info("In start_engines")
422 config = self.master_config
423
424 d = self.engine_launcher.start(
425 config.Global.n,
426 cluster_dir=config.Global.cluster_dir
427 )
428 return d
429
430 def stop_controller(self, r=None):
431 # self.log.info("In stop_controller")
432 if self.controller_launcher and self.controller_launcher.running:
433 return self.controller_launcher.stop()
434
435 def stop_engines(self, r=None):
436 # self.log.info("In stop_engines")
437 if self.engine_launcher.running:
438 d = self.engine_launcher.stop()
439 # d.addErrback(self.log_err)
440 return d
441 else:
442 return None
443
444 def log_err(self, f):
445 self.log.error(f.getTraceback())
446 return None
447
448 def stop_launchers(self, r=None):
449 if not self._stopping:
450 self._stopping = True
451 # if isinstance(r, failure.Failure):
452 # self.log.error('Unexpected error in ipcluster:')
453 # self.log.info(r.getTraceback())
454 self.log.error("IPython cluster: stopping")
455 # These return deferreds. We are not doing anything with them
456 # but we are holding refs to them as a reminder that they
457 # do return deferreds.
458 d1 = self.stop_engines()
459 d2 = self.stop_controller()
460 # Wait a few seconds to let things shut down.
461 dc = ioloop.DelayedCallback(self.loop.stop, 4000, self.loop)
462 dc.start()
463 # reactor.callLater(4.0, reactor.stop)
464
465 def sigint_handler(self, signum, frame):
466 self.stop_launchers()
467
468 def start_logging(self):
469 # Remove old log files of the controller and engine
470 if self.master_config.Global.clean_logs:
471 log_dir = self.master_config.Global.log_dir
472 for f in os.listdir(log_dir):
473 if re.match(r'ip(engine|controller)z-\d+\.(log|err|out)',f):
474 os.remove(os.path.join(log_dir, f))
475 # This will remove old log files for ipcluster itself
476 super(IPClusterApp, self).start_logging()
477
478 def start_app(self):
479 """Start the application, depending on what subcommand is used."""
480 subcmd = self.master_config.Global.subcommand
481 if subcmd=='create' or subcmd=='list':
482 return
483 elif subcmd=='start':
484 self.start_app_start()
485 elif subcmd=='stop':
486 self.start_app_stop()
487 elif subcmd=='engines':
488 self.start_app_engines()
489
490 def start_app_start(self):
491 """Start the app for the start subcommand."""
492 config = self.master_config
493 # First see if the cluster is already running
494 try:
495 pid = self.get_pid_from_file()
496 except PIDFileError:
497 pass
498 else:
499 self.log.critical(
500 'Cluster is already running with [pid=%s]. '
501 'use "ipcluster stop" to stop the cluster.' % pid
502 )
503 # Here I exit with a unusual exit status that other processes
504 # can watch for to learn how I existed.
505 self.exit(ALREADY_STARTED)
506
507 # Now log and daemonize
508 self.log.info(
509 'Starting ipcluster with [daemon=%r]' % config.Global.daemonize
510 )
511 # TODO: Get daemonize working on Windows or as a Windows Server.
512 if config.Global.daemonize:
513 if os.name=='posix':
514 from twisted.scripts._twistd_unix import daemonize
515 daemonize()
516
517 # Now write the new pid file AFTER our new forked pid is active.
518 self.write_pid_file()
519 try:
520 self.loop.start()
521 except KeyboardInterrupt:
522 pass
523 except zmq.ZMQError as e:
524 if e.errno == errno.EINTR:
525 pass
526 else:
527 raise
528 self.remove_pid_file()
529
530 def start_app_engines(self):
531 """Start the app for the start subcommand."""
532 config = self.master_config
533 # First see if the cluster is already running
534
535 # Now log and daemonize
536 self.log.info(
537 'Starting engines with [daemon=%r]' % config.Global.daemonize
538 )
539 # TODO: Get daemonize working on Windows or as a Windows Server.
540 if config.Global.daemonize:
541 if os.name=='posix':
542 from twisted.scripts._twistd_unix import daemonize
543 daemonize()
544
545 # Now write the new pid file AFTER our new forked pid is active.
546 # self.write_pid_file()
547 try:
548 self.loop.start()
549 except KeyboardInterrupt:
550 pass
551 except zmq.ZMQError as e:
552 if e.errno == errno.EINTR:
553 pass
554 else:
555 raise
556 # self.remove_pid_file()
557
558 def start_app_stop(self):
559 """Start the app for the stop subcommand."""
560 config = self.master_config
561 try:
562 pid = self.get_pid_from_file()
563 except PIDFileError:
564 self.log.critical(
565 'Problem reading pid file, cluster is probably not running.'
566 )
567 # Here I exit with a unusual exit status that other processes
568 # can watch for to learn how I existed.
569 self.exit(ALREADY_STOPPED)
570 else:
571 if os.name=='posix':
572 sig = config.Global.signal
573 self.log.info(
574 "Stopping cluster [pid=%r] with [signal=%r]" % (pid, sig)
575 )
576 os.kill(pid, sig)
577 elif os.name=='nt':
578 # As of right now, we don't support daemonize on Windows, so
579 # stop will not do anything. Minimally, it should clean up the
580 # old .pid files.
581 self.remove_pid_file()
582
583
584 def launch_new_instance():
585 """Create and run the IPython cluster."""
586 app = IPClusterApp()
587 app.start()
588
589
590 if __name__ == '__main__':
591 launch_new_instance()
592
@@ -0,0 +1,432 b''
1 #!/usr/bin/env python
2 # encoding: utf-8
3 """
4 The IPython controller application.
5 """
6
7 #-----------------------------------------------------------------------------
8 # Copyright (C) 2008-2009 The IPython Development Team
9 #
10 # Distributed under the terms of the BSD License. The full license is in
11 # the file COPYING, distributed as part of this software.
12 #-----------------------------------------------------------------------------
13
14 #-----------------------------------------------------------------------------
15 # Imports
16 #-----------------------------------------------------------------------------
17
18 from __future__ import with_statement
19
20 import copy
21 import os
22 import logging
23 import socket
24 import stat
25 import sys
26 import uuid
27
28 import zmq
29 from zmq.log.handlers import PUBHandler
30 from zmq.utils import jsonapi as json
31
32 from IPython.config.loader import Config
33
34 from IPython.parallel import factory
35 from .clusterdir import (
36 ApplicationWithClusterDir,
37 ClusterDirConfigLoader
38 )
39 from IPython.parallel.util import disambiguate_ip_address, split_url
40 # from IPython.kernel.fcutil import FCServiceFactory, FURLError
41 from IPython.utils.traitlets import Instance, Unicode
42
43 from IPython.parallel.controller.controller import ControllerFactory
44
45
46 #-----------------------------------------------------------------------------
47 # Module level variables
48 #-----------------------------------------------------------------------------
49
50
51 #: The default config file name for this application
52 default_config_file_name = u'ipcontroller_config.py'
53
54
55 _description = """Start the IPython controller for parallel computing.
56
57 The IPython controller provides a gateway between the IPython engines and
58 clients. The controller needs to be started before the engines and can be
59 configured using command line options or using a cluster directory. Cluster
60 directories contain config, log and security files and are usually located in
61 your ipython directory and named as "cluster_<profile>". See the --profile
62 and --cluster-dir options for details.
63 """
64
65 #-----------------------------------------------------------------------------
66 # Default interfaces
67 #-----------------------------------------------------------------------------
68
69 # The default client interfaces for FCClientServiceFactory.interfaces
70 default_client_interfaces = Config()
71 default_client_interfaces.Default.url_file = 'ipcontroller-client.url'
72
73 # Make this a dict we can pass to Config.__init__ for the default
74 default_client_interfaces = dict(copy.deepcopy(default_client_interfaces.items()))
75
76
77
78 # The default engine interfaces for FCEngineServiceFactory.interfaces
79 default_engine_interfaces = Config()
80 default_engine_interfaces.Default.url_file = u'ipcontroller-engine.url'
81
82 # Make this a dict we can pass to Config.__init__ for the default
83 default_engine_interfaces = dict(copy.deepcopy(default_engine_interfaces.items()))
84
85
86 #-----------------------------------------------------------------------------
87 # Service factories
88 #-----------------------------------------------------------------------------
89
90 #
91 # class FCClientServiceFactory(FCServiceFactory):
92 # """A Foolscap implementation of the client services."""
93 #
94 # cert_file = Unicode(u'ipcontroller-client.pem', config=True)
95 # interfaces = Instance(klass=Config, kw=default_client_interfaces,
96 # allow_none=False, config=True)
97 #
98 #
99 # class FCEngineServiceFactory(FCServiceFactory):
100 # """A Foolscap implementation of the engine services."""
101 #
102 # cert_file = Unicode(u'ipcontroller-engine.pem', config=True)
103 # interfaces = Instance(klass=dict, kw=default_engine_interfaces,
104 # allow_none=False, config=True)
105 #
106
107 #-----------------------------------------------------------------------------
108 # Command line options
109 #-----------------------------------------------------------------------------
110
111
112 class IPControllerAppConfigLoader(ClusterDirConfigLoader):
113
114 def _add_arguments(self):
115 super(IPControllerAppConfigLoader, self)._add_arguments()
116 paa = self.parser.add_argument
117
118 ## Hub Config:
119 paa('--mongodb',
120 dest='HubFactory.db_class', action='store_const',
121 const='IPython.parallel.controller.mongodb.MongoDB',
122 help='Use MongoDB for task storage [default: in-memory]')
123 paa('--sqlite',
124 dest='HubFactory.db_class', action='store_const',
125 const='IPython.parallel.controller.sqlitedb.SQLiteDB',
126 help='Use SQLite3 for DB task storage [default: in-memory]')
127 paa('--hb',
128 type=int, dest='HubFactory.hb', nargs=2,
129 help='The (2) ports the Hub\'s Heartmonitor will use for the heartbeat '
130 'connections [default: random]',
131 metavar='Hub.hb_ports')
132 paa('--ping',
133 type=int, dest='HubFactory.ping',
134 help='The frequency at which the Hub pings the engines for heartbeats '
135 ' (in ms) [default: 100]',
136 metavar='Hub.ping')
137
138 # Client config
139 paa('--client-ip',
140 type=str, dest='HubFactory.client_ip',
141 help='The IP address or hostname the Hub will listen on for '
142 'client connections. Both engine-ip and client-ip can be set simultaneously '
143 'via --ip [default: loopback]',
144 metavar='Hub.client_ip')
145 paa('--client-transport',
146 type=str, dest='HubFactory.client_transport',
147 help='The ZeroMQ transport the Hub will use for '
148 'client connections. Both engine-transport and client-transport can be set simultaneously '
149 'via --transport [default: tcp]',
150 metavar='Hub.client_transport')
151 paa('--query',
152 type=int, dest='HubFactory.query_port',
153 help='The port on which the Hub XREP socket will listen for result queries from clients [default: random]',
154 metavar='Hub.query_port')
155 paa('--notifier',
156 type=int, dest='HubFactory.notifier_port',
157 help='The port on which the Hub PUB socket will listen for notification connections [default: random]',
158 metavar='Hub.notifier_port')
159
160 # Engine config
161 paa('--engine-ip',
162 type=str, dest='HubFactory.engine_ip',
163 help='The IP address or hostname the Hub will listen on for '
164 'engine connections. This applies to the Hub and its schedulers'
165 'engine-ip and client-ip can be set simultaneously '
166 'via --ip [default: loopback]',
167 metavar='Hub.engine_ip')
168 paa('--engine-transport',
169 type=str, dest='HubFactory.engine_transport',
170 help='The ZeroMQ transport the Hub will use for '
171 'client connections. Both engine-transport and client-transport can be set simultaneously '
172 'via --transport [default: tcp]',
173 metavar='Hub.engine_transport')
174
175 # Scheduler config
176 paa('--mux',
177 type=int, dest='ControllerFactory.mux', nargs=2,
178 help='The (2) ports the MUX scheduler will listen on for client,engine '
179 'connections, respectively [default: random]',
180 metavar='Scheduler.mux_ports')
181 paa('--task',
182 type=int, dest='ControllerFactory.task', nargs=2,
183 help='The (2) ports the Task scheduler will listen on for client,engine '
184 'connections, respectively [default: random]',
185 metavar='Scheduler.task_ports')
186 paa('--control',
187 type=int, dest='ControllerFactory.control', nargs=2,
188 help='The (2) ports the Control scheduler will listen on for client,engine '
189 'connections, respectively [default: random]',
190 metavar='Scheduler.control_ports')
191 paa('--iopub',
192 type=int, dest='ControllerFactory.iopub', nargs=2,
193 help='The (2) ports the IOPub scheduler will listen on for client,engine '
194 'connections, respectively [default: random]',
195 metavar='Scheduler.iopub_ports')
196
197 paa('--scheme',
198 type=str, dest='HubFactory.scheme',
199 choices = ['pure', 'lru', 'plainrandom', 'weighted', 'twobin','leastload'],
200 help='select the task scheduler scheme [default: Python LRU]',
201 metavar='Scheduler.scheme')
202 paa('--usethreads',
203 dest='ControllerFactory.usethreads', action="store_true",
204 help='Use threads instead of processes for the schedulers',
205 )
206 paa('--hwm',
207 dest='ControllerFactory.hwm', type=int,
208 help='specify the High Water Mark (HWM) for the downstream '
209 'socket in the pure ZMQ scheduler. This is the maximum number '
210 'of allowed outstanding tasks on each engine.',
211 )
212
213 ## Global config
214 paa('--log-to-file',
215 action='store_true', dest='Global.log_to_file',
216 help='Log to a file in the log directory (default is stdout)')
217 paa('--log-url',
218 type=str, dest='Global.log_url',
219 help='Broadcast logs to an iploggerz process [default: disabled]')
220 paa('-r','--reuse-files',
221 action='store_true', dest='Global.reuse_files',
222 help='Try to reuse existing json connection files.')
223 paa('--no-secure',
224 action='store_false', dest='Global.secure',
225 help='Turn off execution keys (default).')
226 paa('--secure',
227 action='store_true', dest='Global.secure',
228 help='Turn on execution keys.')
229 paa('--execkey',
230 type=str, dest='Global.exec_key',
231 help='path to a file containing an execution key.',
232 metavar='keyfile')
233 paa('--ssh',
234 type=str, dest='Global.sshserver',
235 help='ssh url for clients to use when connecting to the Controller '
236 'processes. It should be of the form: [user@]server[:port]. The '
237 'Controller\'s listening addresses must be accessible from the ssh server',
238 metavar='Global.sshserver')
239 paa('--location',
240 type=str, dest='Global.location',
241 help="The external IP or domain name of this machine, used for disambiguating "
242 "engine and client connections.",
243 metavar='Global.location')
244 factory.add_session_arguments(self.parser)
245 factory.add_registration_arguments(self.parser)
246
247
248 #-----------------------------------------------------------------------------
249 # The main application
250 #-----------------------------------------------------------------------------
251
252
253 class IPControllerApp(ApplicationWithClusterDir):
254
255 name = u'ipcontroller'
256 description = _description
257 command_line_loader = IPControllerAppConfigLoader
258 default_config_file_name = default_config_file_name
259 auto_create_cluster_dir = True
260
261
262 def create_default_config(self):
263 super(IPControllerApp, self).create_default_config()
264 # Don't set defaults for Global.secure or Global.reuse_furls
265 # as those are set in a component.
266 self.default_config.Global.import_statements = []
267 self.default_config.Global.clean_logs = True
268 self.default_config.Global.secure = True
269 self.default_config.Global.reuse_files = False
270 self.default_config.Global.exec_key = "exec_key.key"
271 self.default_config.Global.sshserver = None
272 self.default_config.Global.location = None
273
274 def pre_construct(self):
275 super(IPControllerApp, self).pre_construct()
276 c = self.master_config
277 # The defaults for these are set in FCClientServiceFactory and
278 # FCEngineServiceFactory, so we only set them here if the global
279 # options have be set to override the class level defaults.
280
281 # if hasattr(c.Global, 'reuse_furls'):
282 # c.FCClientServiceFactory.reuse_furls = c.Global.reuse_furls
283 # c.FCEngineServiceFactory.reuse_furls = c.Global.reuse_furls
284 # del c.Global.reuse_furls
285 # if hasattr(c.Global, 'secure'):
286 # c.FCClientServiceFactory.secure = c.Global.secure
287 # c.FCEngineServiceFactory.secure = c.Global.secure
288 # del c.Global.secure
289
290 def save_connection_dict(self, fname, cdict):
291 """save a connection dict to json file."""
292 c = self.master_config
293 url = cdict['url']
294 location = cdict['location']
295 if not location:
296 try:
297 proto,ip,port = split_url(url)
298 except AssertionError:
299 pass
300 else:
301 location = socket.gethostbyname_ex(socket.gethostname())[2][-1]
302 cdict['location'] = location
303 fname = os.path.join(c.Global.security_dir, fname)
304 with open(fname, 'w') as f:
305 f.write(json.dumps(cdict, indent=2))
306 os.chmod(fname, stat.S_IRUSR|stat.S_IWUSR)
307
308 def load_config_from_json(self):
309 """load config from existing json connector files."""
310 c = self.master_config
311 # load from engine config
312 with open(os.path.join(c.Global.security_dir, 'ipcontroller-engine.json')) as f:
313 cfg = json.loads(f.read())
314 key = c.SessionFactory.exec_key = cfg['exec_key']
315 xport,addr = cfg['url'].split('://')
316 c.HubFactory.engine_transport = xport
317 ip,ports = addr.split(':')
318 c.HubFactory.engine_ip = ip
319 c.HubFactory.regport = int(ports)
320 c.Global.location = cfg['location']
321
322 # load client config
323 with open(os.path.join(c.Global.security_dir, 'ipcontroller-client.json')) as f:
324 cfg = json.loads(f.read())
325 assert key == cfg['exec_key'], "exec_key mismatch between engine and client keys"
326 xport,addr = cfg['url'].split('://')
327 c.HubFactory.client_transport = xport
328 ip,ports = addr.split(':')
329 c.HubFactory.client_ip = ip
330 c.Global.sshserver = cfg['ssh']
331 assert int(ports) == c.HubFactory.regport, "regport mismatch"
332
333 def construct(self):
334 # This is the working dir by now.
335 sys.path.insert(0, '')
336 c = self.master_config
337
338 self.import_statements()
339 reusing = c.Global.reuse_files
340 if reusing:
341 try:
342 self.load_config_from_json()
343 except (AssertionError,IOError):
344 reusing=False
345 # check again, because reusing may have failed:
346 if reusing:
347 pass
348 elif c.Global.secure:
349 keyfile = os.path.join(c.Global.security_dir, c.Global.exec_key)
350 key = str(uuid.uuid4())
351 with open(keyfile, 'w') as f:
352 f.write(key)
353 os.chmod(keyfile, stat.S_IRUSR|stat.S_IWUSR)
354 c.SessionFactory.exec_key = key
355 else:
356 c.SessionFactory.exec_key = ''
357 key = None
358
359 try:
360 self.factory = ControllerFactory(config=c, logname=self.log.name)
361 self.start_logging()
362 self.factory.construct()
363 except:
364 self.log.error("Couldn't construct the Controller", exc_info=True)
365 self.exit(1)
366
367 if not reusing:
368 # save to new json config files
369 f = self.factory
370 cdict = {'exec_key' : key,
371 'ssh' : c.Global.sshserver,
372 'url' : "%s://%s:%s"%(f.client_transport, f.client_ip, f.regport),
373 'location' : c.Global.location
374 }
375 self.save_connection_dict('ipcontroller-client.json', cdict)
376 edict = cdict
377 edict['url']="%s://%s:%s"%((f.client_transport, f.client_ip, f.regport))
378 self.save_connection_dict('ipcontroller-engine.json', edict)
379
380
381 def save_urls(self):
382 """save the registration urls to files."""
383 c = self.master_config
384
385 sec_dir = c.Global.security_dir
386 cf = self.factory
387
388 with open(os.path.join(sec_dir, 'ipcontroller-engine.url'), 'w') as f:
389 f.write("%s://%s:%s"%(cf.engine_transport, cf.engine_ip, cf.regport))
390
391 with open(os.path.join(sec_dir, 'ipcontroller-client.url'), 'w') as f:
392 f.write("%s://%s:%s"%(cf.client_transport, cf.client_ip, cf.regport))
393
394
395 def import_statements(self):
396 statements = self.master_config.Global.import_statements
397 for s in statements:
398 try:
399 self.log.msg("Executing statement: '%s'" % s)
400 exec s in globals(), locals()
401 except:
402 self.log.msg("Error running statement: %s" % s)
403
404 def start_logging(self):
405 super(IPControllerApp, self).start_logging()
406 if self.master_config.Global.log_url:
407 context = self.factory.context
408 lsock = context.socket(zmq.PUB)
409 lsock.connect(self.master_config.Global.log_url)
410 handler = PUBHandler(lsock)
411 handler.root_topic = 'controller'
412 handler.setLevel(self.log_level)
413 self.log.addHandler(handler)
414 #
415 def start_app(self):
416 # Start the subprocesses:
417 self.factory.start()
418 self.write_pid_file(overwrite=True)
419 try:
420 self.factory.loop.start()
421 except KeyboardInterrupt:
422 self.log.critical("Interrupted, Exiting...\n")
423
424
425 def launch_new_instance():
426 """Create and run the IPython controller"""
427 app = IPControllerApp()
428 app.start()
429
430
431 if __name__ == '__main__':
432 launch_new_instance()
@@ -0,0 +1,295 b''
1 #!/usr/bin/env python
2 # encoding: utf-8
3 """
4 The IPython engine application
5 """
6
7 #-----------------------------------------------------------------------------
8 # Copyright (C) 2008-2009 The IPython Development Team
9 #
10 # Distributed under the terms of the BSD License. The full license is in
11 # the file COPYING, distributed as part of this software.
12 #-----------------------------------------------------------------------------
13
14 #-----------------------------------------------------------------------------
15 # Imports
16 #-----------------------------------------------------------------------------
17
18 import json
19 import os
20 import sys
21
22 import zmq
23 from zmq.eventloop import ioloop
24
25 from .clusterdir import (
26 ApplicationWithClusterDir,
27 ClusterDirConfigLoader
28 )
29 from IPython.zmq.log import EnginePUBHandler
30
31 from IPython.parallel import factory
32 from IPython.parallel.engine.engine import EngineFactory
33 from IPython.parallel.engine.streamkernel import Kernel
34 from IPython.parallel.util import disambiguate_url
35
36 from IPython.utils.importstring import import_item
37
38
39 #-----------------------------------------------------------------------------
40 # Module level variables
41 #-----------------------------------------------------------------------------
42
43 #: The default config file name for this application
44 default_config_file_name = u'ipengine_config.py'
45
46
47 mpi4py_init = """from mpi4py import MPI as mpi
48 mpi.size = mpi.COMM_WORLD.Get_size()
49 mpi.rank = mpi.COMM_WORLD.Get_rank()
50 """
51
52
53 pytrilinos_init = """from PyTrilinos import Epetra
54 class SimpleStruct:
55 pass
56 mpi = SimpleStruct()
57 mpi.rank = 0
58 mpi.size = 0
59 """
60
61
62 _description = """Start an IPython engine for parallel computing.\n\n
63
64 IPython engines run in parallel and perform computations on behalf of a client
65 and controller. A controller needs to be started before the engines. The
66 engine can be configured using command line options or using a cluster
67 directory. Cluster directories contain config, log and security files and are
68 usually located in your ipython directory and named as "cluster_<profile>".
69 See the --profile and --cluster-dir options for details.
70 """
71
72 #-----------------------------------------------------------------------------
73 # Command line options
74 #-----------------------------------------------------------------------------
75
76
77 class IPEngineAppConfigLoader(ClusterDirConfigLoader):
78
79 def _add_arguments(self):
80 super(IPEngineAppConfigLoader, self)._add_arguments()
81 paa = self.parser.add_argument
82 # Controller config
83 paa('--file', '-f',
84 type=unicode, dest='Global.url_file',
85 help='The full location of the file containing the connection information fo '
86 'controller. If this is not given, the file must be in the '
87 'security directory of the cluster directory. This location is '
88 'resolved using the --profile and --app-dir options.',
89 metavar='Global.url_file')
90 # MPI
91 paa('--mpi',
92 type=str, dest='MPI.use',
93 help='How to enable MPI (mpi4py, pytrilinos, or empty string to disable).',
94 metavar='MPI.use')
95 # Global config
96 paa('--log-to-file',
97 action='store_true', dest='Global.log_to_file',
98 help='Log to a file in the log directory (default is stdout)')
99 paa('--log-url',
100 dest='Global.log_url',
101 help="url of ZMQ logger, as started with iploggerz")
102 # paa('--execkey',
103 # type=str, dest='Global.exec_key',
104 # help='path to a file containing an execution key.',
105 # metavar='keyfile')
106 # paa('--no-secure',
107 # action='store_false', dest='Global.secure',
108 # help='Turn off execution keys.')
109 # paa('--secure',
110 # action='store_true', dest='Global.secure',
111 # help='Turn on execution keys (default).')
112 # init command
113 paa('-c',
114 type=str, dest='Global.extra_exec_lines',
115 help='specify a command to be run at startup')
116
117 factory.add_session_arguments(self.parser)
118 factory.add_registration_arguments(self.parser)
119
120
121 #-----------------------------------------------------------------------------
122 # Main application
123 #-----------------------------------------------------------------------------
124
125
126 class IPEngineApp(ApplicationWithClusterDir):
127
128 name = u'ipengine'
129 description = _description
130 command_line_loader = IPEngineAppConfigLoader
131 default_config_file_name = default_config_file_name
132 auto_create_cluster_dir = True
133
134 def create_default_config(self):
135 super(IPEngineApp, self).create_default_config()
136
137 # The engine should not clean logs as we don't want to remove the
138 # active log files of other running engines.
139 self.default_config.Global.clean_logs = False
140 self.default_config.Global.secure = True
141
142 # Global config attributes
143 self.default_config.Global.exec_lines = []
144 self.default_config.Global.extra_exec_lines = ''
145
146 # Configuration related to the controller
147 # This must match the filename (path not included) that the controller
148 # used for the FURL file.
149 self.default_config.Global.url_file = u''
150 self.default_config.Global.url_file_name = u'ipcontroller-engine.json'
151 # If given, this is the actual location of the controller's FURL file.
152 # If not, this is computed using the profile, app_dir and furl_file_name
153 # self.default_config.Global.key_file_name = u'exec_key.key'
154 # self.default_config.Global.key_file = u''
155
156 # MPI related config attributes
157 self.default_config.MPI.use = ''
158 self.default_config.MPI.mpi4py = mpi4py_init
159 self.default_config.MPI.pytrilinos = pytrilinos_init
160
161 def post_load_command_line_config(self):
162 pass
163
164 def pre_construct(self):
165 super(IPEngineApp, self).pre_construct()
166 # self.find_cont_url_file()
167 self.find_url_file()
168 if self.master_config.Global.extra_exec_lines:
169 self.master_config.Global.exec_lines.append(self.master_config.Global.extra_exec_lines)
170
171 # def find_key_file(self):
172 # """Set the key file.
173 #
174 # Here we don't try to actually see if it exists for is valid as that
175 # is hadled by the connection logic.
176 # """
177 # config = self.master_config
178 # # Find the actual controller key file
179 # if not config.Global.key_file:
180 # try_this = os.path.join(
181 # config.Global.cluster_dir,
182 # config.Global.security_dir,
183 # config.Global.key_file_name
184 # )
185 # config.Global.key_file = try_this
186
187 def find_url_file(self):
188 """Set the key file.
189
190 Here we don't try to actually see if it exists for is valid as that
191 is hadled by the connection logic.
192 """
193 config = self.master_config
194 # Find the actual controller key file
195 if not config.Global.url_file:
196 try_this = os.path.join(
197 config.Global.cluster_dir,
198 config.Global.security_dir,
199 config.Global.url_file_name
200 )
201 config.Global.url_file = try_this
202
203 def construct(self):
204 # This is the working dir by now.
205 sys.path.insert(0, '')
206 config = self.master_config
207 # if os.path.exists(config.Global.key_file) and config.Global.secure:
208 # config.SessionFactory.exec_key = config.Global.key_file
209 if os.path.exists(config.Global.url_file):
210 with open(config.Global.url_file) as f:
211 d = json.loads(f.read())
212 for k,v in d.iteritems():
213 if isinstance(v, unicode):
214 d[k] = v.encode()
215 if d['exec_key']:
216 config.SessionFactory.exec_key = d['exec_key']
217 d['url'] = disambiguate_url(d['url'], d['location'])
218 config.RegistrationFactory.url=d['url']
219 config.EngineFactory.location = d['location']
220
221
222
223 config.Kernel.exec_lines = config.Global.exec_lines
224
225 self.start_mpi()
226
227 # Create the underlying shell class and EngineService
228 # shell_class = import_item(self.master_config.Global.shell_class)
229 try:
230 self.engine = EngineFactory(config=config, logname=self.log.name)
231 except:
232 self.log.error("Couldn't start the Engine", exc_info=True)
233 self.exit(1)
234
235 self.start_logging()
236
237 # Create the service hierarchy
238 # self.main_service = service.MultiService()
239 # self.engine_service.setServiceParent(self.main_service)
240 # self.tub_service = Tub()
241 # self.tub_service.setServiceParent(self.main_service)
242 # # This needs to be called before the connection is initiated
243 # self.main_service.startService()
244
245 # This initiates the connection to the controller and calls
246 # register_engine to tell the controller we are ready to do work
247 # self.engine_connector = EngineConnector(self.tub_service)
248
249 # self.log.info("Using furl file: %s" % self.master_config.Global.furl_file)
250
251 # reactor.callWhenRunning(self.call_connect)
252
253
254 def start_logging(self):
255 super(IPEngineApp, self).start_logging()
256 if self.master_config.Global.log_url:
257 context = self.engine.context
258 lsock = context.socket(zmq.PUB)
259 lsock.connect(self.master_config.Global.log_url)
260 handler = EnginePUBHandler(self.engine, lsock)
261 handler.setLevel(self.log_level)
262 self.log.addHandler(handler)
263
264 def start_mpi(self):
265 global mpi
266 mpikey = self.master_config.MPI.use
267 mpi_import_statement = self.master_config.MPI.get(mpikey, None)
268 if mpi_import_statement is not None:
269 try:
270 self.log.info("Initializing MPI:")
271 self.log.info(mpi_import_statement)
272 exec mpi_import_statement in globals()
273 except:
274 mpi = None
275 else:
276 mpi = None
277
278
279 def start_app(self):
280 self.engine.start()
281 try:
282 self.engine.loop.start()
283 except KeyboardInterrupt:
284 self.log.critical("Engine Interrupted, shutting down...\n")
285
286
287 def launch_new_instance():
288 """Create and run the IPython controller"""
289 app = IPEngineApp()
290 app.start()
291
292
293 if __name__ == '__main__':
294 launch_new_instance()
295
@@ -0,0 +1,132 b''
1 #!/usr/bin/env python
2 # encoding: utf-8
3 """
4 A simple IPython logger application
5 """
6
7 #-----------------------------------------------------------------------------
8 # Copyright (C) 2011 The IPython Development Team
9 #
10 # Distributed under the terms of the BSD License. The full license is in
11 # the file COPYING, distributed as part of this software.
12 #-----------------------------------------------------------------------------
13
14 #-----------------------------------------------------------------------------
15 # Imports
16 #-----------------------------------------------------------------------------
17
18 import os
19 import sys
20
21 import zmq
22
23 from .clusterdir import (
24 ApplicationWithClusterDir,
25 ClusterDirConfigLoader
26 )
27 from .logwatcher import LogWatcher
28
29 #-----------------------------------------------------------------------------
30 # Module level variables
31 #-----------------------------------------------------------------------------
32
33 #: The default config file name for this application
34 default_config_file_name = u'iplogger_config.py'
35
36 _description = """Start an IPython logger for parallel computing.\n\n
37
38 IPython controllers and engines (and your own processes) can broadcast log messages
39 by registering a `zmq.log.handlers.PUBHandler` with the `logging` module. The
40 logger can be configured using command line options or using a cluster
41 directory. Cluster directories contain config, log and security files and are
42 usually located in your ipython directory and named as "cluster_<profile>".
43 See the --profile and --cluster-dir options for details.
44 """
45
46 #-----------------------------------------------------------------------------
47 # Command line options
48 #-----------------------------------------------------------------------------
49
50
51 class IPLoggerAppConfigLoader(ClusterDirConfigLoader):
52
53 def _add_arguments(self):
54 super(IPLoggerAppConfigLoader, self)._add_arguments()
55 paa = self.parser.add_argument
56 # Controller config
57 paa('--url',
58 type=str, dest='LogWatcher.url',
59 help='The url the LogWatcher will listen on',
60 )
61 # MPI
62 paa('--topics',
63 type=str, dest='LogWatcher.topics', nargs='+',
64 help='What topics to subscribe to',
65 metavar='topics')
66 # Global config
67 paa('--log-to-file',
68 action='store_true', dest='Global.log_to_file',
69 help='Log to a file in the log directory (default is stdout)')
70
71
72 #-----------------------------------------------------------------------------
73 # Main application
74 #-----------------------------------------------------------------------------
75
76
77 class IPLoggerApp(ApplicationWithClusterDir):
78
79 name = u'iploggerz'
80 description = _description
81 command_line_loader = IPLoggerAppConfigLoader
82 default_config_file_name = default_config_file_name
83 auto_create_cluster_dir = True
84
85 def create_default_config(self):
86 super(IPLoggerApp, self).create_default_config()
87
88 # The engine should not clean logs as we don't want to remove the
89 # active log files of other running engines.
90 self.default_config.Global.clean_logs = False
91
92 # If given, this is the actual location of the logger's URL file.
93 # If not, this is computed using the profile, app_dir and furl_file_name
94 self.default_config.Global.url_file_name = u'iplogger.url'
95 self.default_config.Global.url_file = u''
96
97 def post_load_command_line_config(self):
98 pass
99
100 def pre_construct(self):
101 super(IPLoggerApp, self).pre_construct()
102
103 def construct(self):
104 # This is the working dir by now.
105 sys.path.insert(0, '')
106
107 self.start_logging()
108
109 try:
110 self.watcher = LogWatcher(config=self.master_config, logname=self.log.name)
111 except:
112 self.log.error("Couldn't start the LogWatcher", exc_info=True)
113 self.exit(1)
114
115
116 def start_app(self):
117 try:
118 self.watcher.start()
119 self.watcher.loop.start()
120 except KeyboardInterrupt:
121 self.log.critical("Logging Interrupted, shutting down...\n")
122
123
124 def launch_new_instance():
125 """Create and run the IPython LogWatcher"""
126 app = IPLoggerApp()
127 app.start()
128
129
130 if __name__ == '__main__':
131 launch_new_instance()
132
This diff has been collapsed as it changes many lines, (971 lines changed) Show them Hide them
@@ -0,0 +1,971 b''
1 #!/usr/bin/env python
2 # encoding: utf-8
3 """
4 Facilities for launching IPython processes asynchronously.
5 """
6
7 #-----------------------------------------------------------------------------
8 # Copyright (C) 2008-2009 The IPython Development Team
9 #
10 # Distributed under the terms of the BSD License. The full license is in
11 # the file COPYING, distributed as part of this software.
12 #-----------------------------------------------------------------------------
13
14 #-----------------------------------------------------------------------------
15 # Imports
16 #-----------------------------------------------------------------------------
17
18 import copy
19 import logging
20 import os
21 import re
22 import stat
23
24 from signal import SIGINT, SIGTERM
25 try:
26 from signal import SIGKILL
27 except ImportError:
28 SIGKILL=SIGTERM
29
30 from subprocess import Popen, PIPE, STDOUT
31 try:
32 from subprocess import check_output
33 except ImportError:
34 # pre-2.7, define check_output with Popen
35 def check_output(*args, **kwargs):
36 kwargs.update(dict(stdout=PIPE))
37 p = Popen(*args, **kwargs)
38 out,err = p.communicate()
39 return out
40
41 from zmq.eventloop import ioloop
42
43 from IPython.external import Itpl
44 # from IPython.config.configurable import Configurable
45 from IPython.utils.traitlets import Any, Str, Int, List, Unicode, Dict, Instance, CUnicode
46 from IPython.utils.path import get_ipython_module_path
47 from IPython.utils.process import find_cmd, pycmd2argv, FindCmdError
48
49 from IPython.parallel.factory import LoggingFactory
50
51 # load winhpcjob only on Windows
52 try:
53 from .winhpcjob import (
54 IPControllerTask, IPEngineTask,
55 IPControllerJob, IPEngineSetJob
56 )
57 except ImportError:
58 pass
59
60
61 #-----------------------------------------------------------------------------
62 # Paths to the kernel apps
63 #-----------------------------------------------------------------------------
64
65
66 ipcluster_cmd_argv = pycmd2argv(get_ipython_module_path(
67 'IPython.parallel.apps.ipclusterapp'
68 ))
69
70 ipengine_cmd_argv = pycmd2argv(get_ipython_module_path(
71 'IPython.parallel.apps.ipengineapp'
72 ))
73
74 ipcontroller_cmd_argv = pycmd2argv(get_ipython_module_path(
75 'IPython.parallel.apps.ipcontrollerapp'
76 ))
77
78 #-----------------------------------------------------------------------------
79 # Base launchers and errors
80 #-----------------------------------------------------------------------------
81
82
83 class LauncherError(Exception):
84 pass
85
86
87 class ProcessStateError(LauncherError):
88 pass
89
90
91 class UnknownStatus(LauncherError):
92 pass
93
94
95 class BaseLauncher(LoggingFactory):
96 """An asbtraction for starting, stopping and signaling a process."""
97
98 # In all of the launchers, the work_dir is where child processes will be
99 # run. This will usually be the cluster_dir, but may not be. any work_dir
100 # passed into the __init__ method will override the config value.
101 # This should not be used to set the work_dir for the actual engine
102 # and controller. Instead, use their own config files or the
103 # controller_args, engine_args attributes of the launchers to add
104 # the --work-dir option.
105 work_dir = Unicode(u'.')
106 loop = Instance('zmq.eventloop.ioloop.IOLoop')
107
108 start_data = Any()
109 stop_data = Any()
110
111 def _loop_default(self):
112 return ioloop.IOLoop.instance()
113
114 def __init__(self, work_dir=u'.', config=None, **kwargs):
115 super(BaseLauncher, self).__init__(work_dir=work_dir, config=config, **kwargs)
116 self.state = 'before' # can be before, running, after
117 self.stop_callbacks = []
118 self.start_data = None
119 self.stop_data = None
120
121 @property
122 def args(self):
123 """A list of cmd and args that will be used to start the process.
124
125 This is what is passed to :func:`spawnProcess` and the first element
126 will be the process name.
127 """
128 return self.find_args()
129
130 def find_args(self):
131 """The ``.args`` property calls this to find the args list.
132
133 Subcommand should implement this to construct the cmd and args.
134 """
135 raise NotImplementedError('find_args must be implemented in a subclass')
136
137 @property
138 def arg_str(self):
139 """The string form of the program arguments."""
140 return ' '.join(self.args)
141
142 @property
143 def running(self):
144 """Am I running."""
145 if self.state == 'running':
146 return True
147 else:
148 return False
149
150 def start(self):
151 """Start the process.
152
153 This must return a deferred that fires with information about the
154 process starting (like a pid, job id, etc.).
155 """
156 raise NotImplementedError('start must be implemented in a subclass')
157
158 def stop(self):
159 """Stop the process and notify observers of stopping.
160
161 This must return a deferred that fires with information about the
162 processing stopping, like errors that occur while the process is
163 attempting to be shut down. This deferred won't fire when the process
164 actually stops. To observe the actual process stopping, see
165 :func:`observe_stop`.
166 """
167 raise NotImplementedError('stop must be implemented in a subclass')
168
169 def on_stop(self, f):
170 """Get a deferred that will fire when the process stops.
171
172 The deferred will fire with data that contains information about
173 the exit status of the process.
174 """
175 if self.state=='after':
176 return f(self.stop_data)
177 else:
178 self.stop_callbacks.append(f)
179
180 def notify_start(self, data):
181 """Call this to trigger startup actions.
182
183 This logs the process startup and sets the state to 'running'. It is
184 a pass-through so it can be used as a callback.
185 """
186
187 self.log.info('Process %r started: %r' % (self.args[0], data))
188 self.start_data = data
189 self.state = 'running'
190 return data
191
192 def notify_stop(self, data):
193 """Call this to trigger process stop actions.
194
195 This logs the process stopping and sets the state to 'after'. Call
196 this to trigger all the deferreds from :func:`observe_stop`."""
197
198 self.log.info('Process %r stopped: %r' % (self.args[0], data))
199 self.stop_data = data
200 self.state = 'after'
201 for i in range(len(self.stop_callbacks)):
202 d = self.stop_callbacks.pop()
203 d(data)
204 return data
205
206 def signal(self, sig):
207 """Signal the process.
208
209 Return a semi-meaningless deferred after signaling the process.
210
211 Parameters
212 ----------
213 sig : str or int
214 'KILL', 'INT', etc., or any signal number
215 """
216 raise NotImplementedError('signal must be implemented in a subclass')
217
218
219 #-----------------------------------------------------------------------------
220 # Local process launchers
221 #-----------------------------------------------------------------------------
222
223
224 class LocalProcessLauncher(BaseLauncher):
225 """Start and stop an external process in an asynchronous manner.
226
227 This will launch the external process with a working directory of
228 ``self.work_dir``.
229 """
230
231 # This is used to to construct self.args, which is passed to
232 # spawnProcess.
233 cmd_and_args = List([])
234 poll_frequency = Int(100) # in ms
235
236 def __init__(self, work_dir=u'.', config=None, **kwargs):
237 super(LocalProcessLauncher, self).__init__(
238 work_dir=work_dir, config=config, **kwargs
239 )
240 self.process = None
241 self.start_deferred = None
242 self.poller = None
243
244 def find_args(self):
245 return self.cmd_and_args
246
247 def start(self):
248 if self.state == 'before':
249 self.process = Popen(self.args,
250 stdout=PIPE,stderr=PIPE,stdin=PIPE,
251 env=os.environ,
252 cwd=self.work_dir
253 )
254
255 self.loop.add_handler(self.process.stdout.fileno(), self.handle_stdout, self.loop.READ)
256 self.loop.add_handler(self.process.stderr.fileno(), self.handle_stderr, self.loop.READ)
257 self.poller = ioloop.PeriodicCallback(self.poll, self.poll_frequency, self.loop)
258 self.poller.start()
259 self.notify_start(self.process.pid)
260 else:
261 s = 'The process was already started and has state: %r' % self.state
262 raise ProcessStateError(s)
263
264 def stop(self):
265 return self.interrupt_then_kill()
266
267 def signal(self, sig):
268 if self.state == 'running':
269 self.process.send_signal(sig)
270
271 def interrupt_then_kill(self, delay=2.0):
272 """Send INT, wait a delay and then send KILL."""
273 self.signal(SIGINT)
274 self.killer = ioloop.DelayedCallback(lambda : self.signal(SIGKILL), delay*1000, self.loop)
275 self.killer.start()
276
277 # callbacks, etc:
278
279 def handle_stdout(self, fd, events):
280 line = self.process.stdout.readline()
281 # a stopped process will be readable but return empty strings
282 if line:
283 self.log.info(line[:-1])
284 else:
285 self.poll()
286
287 def handle_stderr(self, fd, events):
288 line = self.process.stderr.readline()
289 # a stopped process will be readable but return empty strings
290 if line:
291 self.log.error(line[:-1])
292 else:
293 self.poll()
294
295 def poll(self):
296 status = self.process.poll()
297 if status is not None:
298 self.poller.stop()
299 self.loop.remove_handler(self.process.stdout.fileno())
300 self.loop.remove_handler(self.process.stderr.fileno())
301 self.notify_stop(dict(exit_code=status, pid=self.process.pid))
302 return status
303
304 class LocalControllerLauncher(LocalProcessLauncher):
305 """Launch a controller as a regular external process."""
306
307 controller_cmd = List(ipcontroller_cmd_argv, config=True)
308 # Command line arguments to ipcontroller.
309 controller_args = List(['--log-to-file','--log-level', str(logging.INFO)], config=True)
310
311 def find_args(self):
312 return self.controller_cmd + self.controller_args
313
314 def start(self, cluster_dir):
315 """Start the controller by cluster_dir."""
316 self.controller_args.extend(['--cluster-dir', cluster_dir])
317 self.cluster_dir = unicode(cluster_dir)
318 self.log.info("Starting LocalControllerLauncher: %r" % self.args)
319 return super(LocalControllerLauncher, self).start()
320
321
322 class LocalEngineLauncher(LocalProcessLauncher):
323 """Launch a single engine as a regular externall process."""
324
325 engine_cmd = List(ipengine_cmd_argv, config=True)
326 # Command line arguments for ipengine.
327 engine_args = List(
328 ['--log-to-file','--log-level', str(logging.INFO)], config=True
329 )
330
331 def find_args(self):
332 return self.engine_cmd + self.engine_args
333
334 def start(self, cluster_dir):
335 """Start the engine by cluster_dir."""
336 self.engine_args.extend(['--cluster-dir', cluster_dir])
337 self.cluster_dir = unicode(cluster_dir)
338 return super(LocalEngineLauncher, self).start()
339
340
341 class LocalEngineSetLauncher(BaseLauncher):
342 """Launch a set of engines as regular external processes."""
343
344 # Command line arguments for ipengine.
345 engine_args = List(
346 ['--log-to-file','--log-level', str(logging.INFO)], config=True
347 )
348 # launcher class
349 launcher_class = LocalEngineLauncher
350
351 launchers = Dict()
352 stop_data = Dict()
353
354 def __init__(self, work_dir=u'.', config=None, **kwargs):
355 super(LocalEngineSetLauncher, self).__init__(
356 work_dir=work_dir, config=config, **kwargs
357 )
358 self.stop_data = {}
359
360 def start(self, n, cluster_dir):
361 """Start n engines by profile or cluster_dir."""
362 self.cluster_dir = unicode(cluster_dir)
363 dlist = []
364 for i in range(n):
365 el = self.launcher_class(work_dir=self.work_dir, config=self.config, logname=self.log.name)
366 # Copy the engine args over to each engine launcher.
367 el.engine_args = copy.deepcopy(self.engine_args)
368 el.on_stop(self._notice_engine_stopped)
369 d = el.start(cluster_dir)
370 if i==0:
371 self.log.info("Starting LocalEngineSetLauncher: %r" % el.args)
372 self.launchers[i] = el
373 dlist.append(d)
374 self.notify_start(dlist)
375 # The consumeErrors here could be dangerous
376 # dfinal = gatherBoth(dlist, consumeErrors=True)
377 # dfinal.addCallback(self.notify_start)
378 return dlist
379
380 def find_args(self):
381 return ['engine set']
382
383 def signal(self, sig):
384 dlist = []
385 for el in self.launchers.itervalues():
386 d = el.signal(sig)
387 dlist.append(d)
388 # dfinal = gatherBoth(dlist, consumeErrors=True)
389 return dlist
390
391 def interrupt_then_kill(self, delay=1.0):
392 dlist = []
393 for el in self.launchers.itervalues():
394 d = el.interrupt_then_kill(delay)
395 dlist.append(d)
396 # dfinal = gatherBoth(dlist, consumeErrors=True)
397 return dlist
398
399 def stop(self):
400 return self.interrupt_then_kill()
401
402 def _notice_engine_stopped(self, data):
403 pid = data['pid']
404 for idx,el in self.launchers.iteritems():
405 if el.process.pid == pid:
406 break
407 self.launchers.pop(idx)
408 self.stop_data[idx] = data
409 if not self.launchers:
410 self.notify_stop(self.stop_data)
411
412
413 #-----------------------------------------------------------------------------
414 # MPIExec launchers
415 #-----------------------------------------------------------------------------
416
417
418 class MPIExecLauncher(LocalProcessLauncher):
419 """Launch an external process using mpiexec."""
420
421 # The mpiexec command to use in starting the process.
422 mpi_cmd = List(['mpiexec'], config=True)
423 # The command line arguments to pass to mpiexec.
424 mpi_args = List([], config=True)
425 # The program to start using mpiexec.
426 program = List(['date'], config=True)
427 # The command line argument to the program.
428 program_args = List([], config=True)
429 # The number of instances of the program to start.
430 n = Int(1, config=True)
431
432 def find_args(self):
433 """Build self.args using all the fields."""
434 return self.mpi_cmd + ['-n', str(self.n)] + self.mpi_args + \
435 self.program + self.program_args
436
437 def start(self, n):
438 """Start n instances of the program using mpiexec."""
439 self.n = n
440 return super(MPIExecLauncher, self).start()
441
442
443 class MPIExecControllerLauncher(MPIExecLauncher):
444 """Launch a controller using mpiexec."""
445
446 controller_cmd = List(ipcontroller_cmd_argv, config=True)
447 # Command line arguments to ipcontroller.
448 controller_args = List(['--log-to-file','--log-level', str(logging.INFO)], config=True)
449 n = Int(1, config=False)
450
451 def start(self, cluster_dir):
452 """Start the controller by cluster_dir."""
453 self.controller_args.extend(['--cluster-dir', cluster_dir])
454 self.cluster_dir = unicode(cluster_dir)
455 self.log.info("Starting MPIExecControllerLauncher: %r" % self.args)
456 return super(MPIExecControllerLauncher, self).start(1)
457
458 def find_args(self):
459 return self.mpi_cmd + ['-n', self.n] + self.mpi_args + \
460 self.controller_cmd + self.controller_args
461
462
463 class MPIExecEngineSetLauncher(MPIExecLauncher):
464
465 program = List(ipengine_cmd_argv, config=True)
466 # Command line arguments for ipengine.
467 program_args = List(
468 ['--log-to-file','--log-level', str(logging.INFO)], config=True
469 )
470 n = Int(1, config=True)
471
472 def start(self, n, cluster_dir):
473 """Start n engines by profile or cluster_dir."""
474 self.program_args.extend(['--cluster-dir', cluster_dir])
475 self.cluster_dir = unicode(cluster_dir)
476 self.n = n
477 self.log.info('Starting MPIExecEngineSetLauncher: %r' % self.args)
478 return super(MPIExecEngineSetLauncher, self).start(n)
479
480 #-----------------------------------------------------------------------------
481 # SSH launchers
482 #-----------------------------------------------------------------------------
483
484 # TODO: Get SSH Launcher working again.
485
486 class SSHLauncher(LocalProcessLauncher):
487 """A minimal launcher for ssh.
488
489 To be useful this will probably have to be extended to use the ``sshx``
490 idea for environment variables. There could be other things this needs
491 as well.
492 """
493
494 ssh_cmd = List(['ssh'], config=True)
495 ssh_args = List(['-tt'], config=True)
496 program = List(['date'], config=True)
497 program_args = List([], config=True)
498 hostname = CUnicode('', config=True)
499 user = CUnicode('', config=True)
500 location = CUnicode('')
501
502 def _hostname_changed(self, name, old, new):
503 if self.user:
504 self.location = u'%s@%s' % (self.user, new)
505 else:
506 self.location = new
507
508 def _user_changed(self, name, old, new):
509 self.location = u'%s@%s' % (new, self.hostname)
510
511 def find_args(self):
512 return self.ssh_cmd + self.ssh_args + [self.location] + \
513 self.program + self.program_args
514
515 def start(self, cluster_dir, hostname=None, user=None):
516 self.cluster_dir = unicode(cluster_dir)
517 if hostname is not None:
518 self.hostname = hostname
519 if user is not None:
520 self.user = user
521
522 return super(SSHLauncher, self).start()
523
524 def signal(self, sig):
525 if self.state == 'running':
526 # send escaped ssh connection-closer
527 self.process.stdin.write('~.')
528 self.process.stdin.flush()
529
530
531
532 class SSHControllerLauncher(SSHLauncher):
533
534 program = List(ipcontroller_cmd_argv, config=True)
535 # Command line arguments to ipcontroller.
536 program_args = List(['-r', '--log-to-file','--log-level', str(logging.INFO)], config=True)
537
538
539 class SSHEngineLauncher(SSHLauncher):
540 program = List(ipengine_cmd_argv, config=True)
541 # Command line arguments for ipengine.
542 program_args = List(
543 ['--log-to-file','--log-level', str(logging.INFO)], config=True
544 )
545
546 class SSHEngineSetLauncher(LocalEngineSetLauncher):
547 launcher_class = SSHEngineLauncher
548 engines = Dict(config=True)
549
550 def start(self, n, cluster_dir):
551 """Start engines by profile or cluster_dir.
552 `n` is ignored, and the `engines` config property is used instead.
553 """
554
555 self.cluster_dir = unicode(cluster_dir)
556 dlist = []
557 for host, n in self.engines.iteritems():
558 if isinstance(n, (tuple, list)):
559 n, args = n
560 else:
561 args = copy.deepcopy(self.engine_args)
562
563 if '@' in host:
564 user,host = host.split('@',1)
565 else:
566 user=None
567 for i in range(n):
568 el = self.launcher_class(work_dir=self.work_dir, config=self.config, logname=self.log.name)
569
570 # Copy the engine args over to each engine launcher.
571 i
572 el.program_args = args
573 el.on_stop(self._notice_engine_stopped)
574 d = el.start(cluster_dir, user=user, hostname=host)
575 if i==0:
576 self.log.info("Starting SSHEngineSetLauncher: %r" % el.args)
577 self.launchers[host+str(i)] = el
578 dlist.append(d)
579 self.notify_start(dlist)
580 return dlist
581
582
583
584 #-----------------------------------------------------------------------------
585 # Windows HPC Server 2008 scheduler launchers
586 #-----------------------------------------------------------------------------
587
588
589 # This is only used on Windows.
590 def find_job_cmd():
591 if os.name=='nt':
592 try:
593 return find_cmd('job')
594 except FindCmdError:
595 return 'job'
596 else:
597 return 'job'
598
599
600 class WindowsHPCLauncher(BaseLauncher):
601
602 # A regular expression used to get the job id from the output of the
603 # submit_command.
604 job_id_regexp = Str(r'\d+', config=True)
605 # The filename of the instantiated job script.
606 job_file_name = CUnicode(u'ipython_job.xml', config=True)
607 # The full path to the instantiated job script. This gets made dynamically
608 # by combining the work_dir with the job_file_name.
609 job_file = CUnicode(u'')
610 # The hostname of the scheduler to submit the job to
611 scheduler = CUnicode('', config=True)
612 job_cmd = CUnicode(find_job_cmd(), config=True)
613
614 def __init__(self, work_dir=u'.', config=None, **kwargs):
615 super(WindowsHPCLauncher, self).__init__(
616 work_dir=work_dir, config=config, **kwargs
617 )
618
619 @property
620 def job_file(self):
621 return os.path.join(self.work_dir, self.job_file_name)
622
623 def write_job_file(self, n):
624 raise NotImplementedError("Implement write_job_file in a subclass.")
625
626 def find_args(self):
627 return [u'job.exe']
628
629 def parse_job_id(self, output):
630 """Take the output of the submit command and return the job id."""
631 m = re.search(self.job_id_regexp, output)
632 if m is not None:
633 job_id = m.group()
634 else:
635 raise LauncherError("Job id couldn't be determined: %s" % output)
636 self.job_id = job_id
637 self.log.info('Job started with job id: %r' % job_id)
638 return job_id
639
640 def start(self, n):
641 """Start n copies of the process using the Win HPC job scheduler."""
642 self.write_job_file(n)
643 args = [
644 'submit',
645 '/jobfile:%s' % self.job_file,
646 '/scheduler:%s' % self.scheduler
647 ]
648 self.log.info("Starting Win HPC Job: %s" % (self.job_cmd + ' ' + ' '.join(args),))
649 # Twisted will raise DeprecationWarnings if we try to pass unicode to this
650 output = check_output([self.job_cmd]+args,
651 env=os.environ,
652 cwd=self.work_dir,
653 stderr=STDOUT
654 )
655 job_id = self.parse_job_id(output)
656 self.notify_start(job_id)
657 return job_id
658
659 def stop(self):
660 args = [
661 'cancel',
662 self.job_id,
663 '/scheduler:%s' % self.scheduler
664 ]
665 self.log.info("Stopping Win HPC Job: %s" % (self.job_cmd + ' ' + ' '.join(args),))
666 try:
667 output = check_output([self.job_cmd]+args,
668 env=os.environ,
669 cwd=self.work_dir,
670 stderr=STDOUT
671 )
672 except:
673 output = 'The job already appears to be stoppped: %r' % self.job_id
674 self.notify_stop(dict(job_id=self.job_id, output=output)) # Pass the output of the kill cmd
675 return output
676
677
678 class WindowsHPCControllerLauncher(WindowsHPCLauncher):
679
680 job_file_name = CUnicode(u'ipcontroller_job.xml', config=True)
681 extra_args = List([], config=False)
682
683 def write_job_file(self, n):
684 job = IPControllerJob(config=self.config)
685
686 t = IPControllerTask(config=self.config)
687 # The tasks work directory is *not* the actual work directory of
688 # the controller. It is used as the base path for the stdout/stderr
689 # files that the scheduler redirects to.
690 t.work_directory = self.cluster_dir
691 # Add the --cluster-dir and from self.start().
692 t.controller_args.extend(self.extra_args)
693 job.add_task(t)
694
695 self.log.info("Writing job description file: %s" % self.job_file)
696 job.write(self.job_file)
697
698 @property
699 def job_file(self):
700 return os.path.join(self.cluster_dir, self.job_file_name)
701
702 def start(self, cluster_dir):
703 """Start the controller by cluster_dir."""
704 self.extra_args = ['--cluster-dir', cluster_dir]
705 self.cluster_dir = unicode(cluster_dir)
706 return super(WindowsHPCControllerLauncher, self).start(1)
707
708
709 class WindowsHPCEngineSetLauncher(WindowsHPCLauncher):
710
711 job_file_name = CUnicode(u'ipengineset_job.xml', config=True)
712 extra_args = List([], config=False)
713
714 def write_job_file(self, n):
715 job = IPEngineSetJob(config=self.config)
716
717 for i in range(n):
718 t = IPEngineTask(config=self.config)
719 # The tasks work directory is *not* the actual work directory of
720 # the engine. It is used as the base path for the stdout/stderr
721 # files that the scheduler redirects to.
722 t.work_directory = self.cluster_dir
723 # Add the --cluster-dir and from self.start().
724 t.engine_args.extend(self.extra_args)
725 job.add_task(t)
726
727 self.log.info("Writing job description file: %s" % self.job_file)
728 job.write(self.job_file)
729
730 @property
731 def job_file(self):
732 return os.path.join(self.cluster_dir, self.job_file_name)
733
734 def start(self, n, cluster_dir):
735 """Start the controller by cluster_dir."""
736 self.extra_args = ['--cluster-dir', cluster_dir]
737 self.cluster_dir = unicode(cluster_dir)
738 return super(WindowsHPCEngineSetLauncher, self).start(n)
739
740
741 #-----------------------------------------------------------------------------
742 # Batch (PBS) system launchers
743 #-----------------------------------------------------------------------------
744
745 class BatchSystemLauncher(BaseLauncher):
746 """Launch an external process using a batch system.
747
748 This class is designed to work with UNIX batch systems like PBS, LSF,
749 GridEngine, etc. The overall model is that there are different commands
750 like qsub, qdel, etc. that handle the starting and stopping of the process.
751
752 This class also has the notion of a batch script. The ``batch_template``
753 attribute can be set to a string that is a template for the batch script.
754 This template is instantiated using Itpl. Thus the template can use
755 ${n} fot the number of instances. Subclasses can add additional variables
756 to the template dict.
757 """
758
759 # Subclasses must fill these in. See PBSEngineSet
760 # The name of the command line program used to submit jobs.
761 submit_command = List([''], config=True)
762 # The name of the command line program used to delete jobs.
763 delete_command = List([''], config=True)
764 # A regular expression used to get the job id from the output of the
765 # submit_command.
766 job_id_regexp = CUnicode('', config=True)
767 # The string that is the batch script template itself.
768 batch_template = CUnicode('', config=True)
769 # The file that contains the batch template
770 batch_template_file = CUnicode(u'', config=True)
771 # The filename of the instantiated batch script.
772 batch_file_name = CUnicode(u'batch_script', config=True)
773 # The PBS Queue
774 queue = CUnicode(u'', config=True)
775
776 # not configurable, override in subclasses
777 # PBS Job Array regex
778 job_array_regexp = CUnicode('')
779 job_array_template = CUnicode('')
780 # PBS Queue regex
781 queue_regexp = CUnicode('')
782 queue_template = CUnicode('')
783 # The default batch template, override in subclasses
784 default_template = CUnicode('')
785 # The full path to the instantiated batch script.
786 batch_file = CUnicode(u'')
787 # the format dict used with batch_template:
788 context = Dict()
789
790
791 def find_args(self):
792 return self.submit_command + [self.batch_file]
793
794 def __init__(self, work_dir=u'.', config=None, **kwargs):
795 super(BatchSystemLauncher, self).__init__(
796 work_dir=work_dir, config=config, **kwargs
797 )
798 self.batch_file = os.path.join(self.work_dir, self.batch_file_name)
799
800 def parse_job_id(self, output):
801 """Take the output of the submit command and return the job id."""
802 m = re.search(self.job_id_regexp, output)
803 if m is not None:
804 job_id = m.group()
805 else:
806 raise LauncherError("Job id couldn't be determined: %s" % output)
807 self.job_id = job_id
808 self.log.info('Job submitted with job id: %r' % job_id)
809 return job_id
810
811 def write_batch_script(self, n):
812 """Instantiate and write the batch script to the work_dir."""
813 self.context['n'] = n
814 self.context['queue'] = self.queue
815 print self.context
816 # first priority is batch_template if set
817 if self.batch_template_file and not self.batch_template:
818 # second priority is batch_template_file
819 with open(self.batch_template_file) as f:
820 self.batch_template = f.read()
821 if not self.batch_template:
822 # third (last) priority is default_template
823 self.batch_template = self.default_template
824
825 regex = re.compile(self.job_array_regexp)
826 # print regex.search(self.batch_template)
827 if not regex.search(self.batch_template):
828 self.log.info("adding job array settings to batch script")
829 firstline, rest = self.batch_template.split('\n',1)
830 self.batch_template = u'\n'.join([firstline, self.job_array_template, rest])
831
832 regex = re.compile(self.queue_regexp)
833 # print regex.search(self.batch_template)
834 if self.queue and not regex.search(self.batch_template):
835 self.log.info("adding PBS queue settings to batch script")
836 firstline, rest = self.batch_template.split('\n',1)
837 self.batch_template = u'\n'.join([firstline, self.queue_template, rest])
838
839 script_as_string = Itpl.itplns(self.batch_template, self.context)
840 self.log.info('Writing instantiated batch script: %s' % self.batch_file)
841
842 with open(self.batch_file, 'w') as f:
843 f.write(script_as_string)
844 os.chmod(self.batch_file, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
845
846 def start(self, n, cluster_dir):
847 """Start n copies of the process using a batch system."""
848 # Here we save profile and cluster_dir in the context so they
849 # can be used in the batch script template as ${profile} and
850 # ${cluster_dir}
851 self.context['cluster_dir'] = cluster_dir
852 self.cluster_dir = unicode(cluster_dir)
853 self.write_batch_script(n)
854 output = check_output(self.args, env=os.environ)
855
856 job_id = self.parse_job_id(output)
857 self.notify_start(job_id)
858 return job_id
859
860 def stop(self):
861 output = check_output(self.delete_command+[self.job_id], env=os.environ)
862 self.notify_stop(dict(job_id=self.job_id, output=output)) # Pass the output of the kill cmd
863 return output
864
865
866 class PBSLauncher(BatchSystemLauncher):
867 """A BatchSystemLauncher subclass for PBS."""
868
869 submit_command = List(['qsub'], config=True)
870 delete_command = List(['qdel'], config=True)
871 job_id_regexp = CUnicode(r'\d+', config=True)
872
873 batch_file = CUnicode(u'')
874 job_array_regexp = CUnicode('#PBS\W+-t\W+[\w\d\-\$]+')
875 job_array_template = CUnicode('#PBS -t 1-$n')
876 queue_regexp = CUnicode('#PBS\W+-q\W+\$?\w+')
877 queue_template = CUnicode('#PBS -q $queue')
878
879
880 class PBSControllerLauncher(PBSLauncher):
881 """Launch a controller using PBS."""
882
883 batch_file_name = CUnicode(u'pbs_controller', config=True)
884 default_template= CUnicode("""#!/bin/sh
885 #PBS -V
886 #PBS -N ipcontroller
887 %s --log-to-file --cluster-dir $cluster_dir
888 """%(' '.join(ipcontroller_cmd_argv)))
889
890 def start(self, cluster_dir):
891 """Start the controller by profile or cluster_dir."""
892 self.log.info("Starting PBSControllerLauncher: %r" % self.args)
893 return super(PBSControllerLauncher, self).start(1, cluster_dir)
894
895
896 class PBSEngineSetLauncher(PBSLauncher):
897 """Launch Engines using PBS"""
898 batch_file_name = CUnicode(u'pbs_engines', config=True)
899 default_template= CUnicode(u"""#!/bin/sh
900 #PBS -V
901 #PBS -N ipengine
902 %s --cluster-dir $cluster_dir
903 """%(' '.join(ipengine_cmd_argv)))
904
905 def start(self, n, cluster_dir):
906 """Start n engines by profile or cluster_dir."""
907 self.log.info('Starting %i engines with PBSEngineSetLauncher: %r' % (n, self.args))
908 return super(PBSEngineSetLauncher, self).start(n, cluster_dir)
909
910 #SGE is very similar to PBS
911
912 class SGELauncher(PBSLauncher):
913 """Sun GridEngine is a PBS clone with slightly different syntax"""
914 job_array_regexp = CUnicode('#$$\W+-t\W+[\w\d\-\$]+')
915 job_array_template = CUnicode('#$$ -t 1-$n')
916 queue_regexp = CUnicode('#$$\W+-q\W+\$?\w+')
917 queue_template = CUnicode('#$$ -q $queue')
918
919 class SGEControllerLauncher(SGELauncher):
920 """Launch a controller using SGE."""
921
922 batch_file_name = CUnicode(u'sge_controller', config=True)
923 default_template= CUnicode(u"""#$$ -V
924 #$$ -S /bin/sh
925 #$$ -N ipcontroller
926 %s --log-to-file --cluster-dir $cluster_dir
927 """%(' '.join(ipcontroller_cmd_argv)))
928
929 def start(self, cluster_dir):
930 """Start the controller by profile or cluster_dir."""
931 self.log.info("Starting PBSControllerLauncher: %r" % self.args)
932 return super(PBSControllerLauncher, self).start(1, cluster_dir)
933
934 class SGEEngineSetLauncher(SGELauncher):
935 """Launch Engines with SGE"""
936 batch_file_name = CUnicode(u'sge_engines', config=True)
937 default_template = CUnicode("""#$$ -V
938 #$$ -S /bin/sh
939 #$$ -N ipengine
940 %s --cluster-dir $cluster_dir
941 """%(' '.join(ipengine_cmd_argv)))
942
943 def start(self, n, cluster_dir):
944 """Start n engines by profile or cluster_dir."""
945 self.log.info('Starting %i engines with SGEEngineSetLauncher: %r' % (n, self.args))
946 return super(SGEEngineSetLauncher, self).start(n, cluster_dir)
947
948
949 #-----------------------------------------------------------------------------
950 # A launcher for ipcluster itself!
951 #-----------------------------------------------------------------------------
952
953
954 class IPClusterLauncher(LocalProcessLauncher):
955 """Launch the ipcluster program in an external process."""
956
957 ipcluster_cmd = List(ipcluster_cmd_argv, config=True)
958 # Command line arguments to pass to ipcluster.
959 ipcluster_args = List(
960 ['--clean-logs', '--log-to-file', '--log-level', str(logging.INFO)], config=True)
961 ipcluster_subcommand = Str('start')
962 ipcluster_n = Int(2)
963
964 def find_args(self):
965 return self.ipcluster_cmd + [self.ipcluster_subcommand] + \
966 ['-n', repr(self.ipcluster_n)] + self.ipcluster_args
967
968 def start(self):
969 self.log.info("Starting ipcluster: %r" % self.args)
970 return super(IPClusterLauncher, self).start()
971
@@ -0,0 +1,98 b''
1 #!/usr/bin/env python
2 """A simple logger object that consolidates messages incoming from ipcluster processes."""
3
4 #-----------------------------------------------------------------------------
5 # Copyright (C) 2011 The IPython Development Team
6 #
7 # Distributed under the terms of the BSD License. The full license is in
8 # the file COPYING, distributed as part of this software.
9 #-----------------------------------------------------------------------------
10
11 #-----------------------------------------------------------------------------
12 # Imports
13 #-----------------------------------------------------------------------------
14
15
16 import logging
17 import sys
18
19 import zmq
20 from zmq.eventloop import ioloop, zmqstream
21
22 from IPython.utils.traitlets import Int, Str, Instance, List
23
24 from IPython.parallel.factory import LoggingFactory
25
26 #-----------------------------------------------------------------------------
27 # Classes
28 #-----------------------------------------------------------------------------
29
30
31 class LogWatcher(LoggingFactory):
32 """A simple class that receives messages on a SUB socket, as published
33 by subclasses of `zmq.log.handlers.PUBHandler`, and logs them itself.
34
35 This can subscribe to multiple topics, but defaults to all topics.
36 """
37 # configurables
38 topics = List([''], config=True)
39 url = Str('tcp://127.0.0.1:20202', config=True)
40
41 # internals
42 context = Instance(zmq.Context, (), {})
43 stream = Instance('zmq.eventloop.zmqstream.ZMQStream')
44 loop = Instance('zmq.eventloop.ioloop.IOLoop')
45 def _loop_default(self):
46 return ioloop.IOLoop.instance()
47
48 def __init__(self, **kwargs):
49 super(LogWatcher, self).__init__(**kwargs)
50 s = self.context.socket(zmq.SUB)
51 s.bind(self.url)
52 self.stream = zmqstream.ZMQStream(s, self.loop)
53 self.subscribe()
54 self.on_trait_change(self.subscribe, 'topics')
55
56 def start(self):
57 self.stream.on_recv(self.log_message)
58
59 def stop(self):
60 self.stream.stop_on_recv()
61
62 def subscribe(self):
63 """Update our SUB socket's subscriptions."""
64 self.stream.setsockopt(zmq.UNSUBSCRIBE, '')
65 for topic in self.topics:
66 self.log.debug("Subscribing to: %r"%topic)
67 self.stream.setsockopt(zmq.SUBSCRIBE, topic)
68
69 def _extract_level(self, topic_str):
70 """Turn 'engine.0.INFO.extra' into (logging.INFO, 'engine.0.extra')"""
71 topics = topic_str.split('.')
72 for idx,t in enumerate(topics):
73 level = getattr(logging, t, None)
74 if level is not None:
75 break
76
77 if level is None:
78 level = logging.INFO
79 else:
80 topics.pop(idx)
81
82 return level, '.'.join(topics)
83
84
85 def log_message(self, raw):
86 """receive and parse a message, then log it."""
87 if len(raw) != 2 or '.' not in raw[0]:
88 self.log.error("Invalid log message: %s"%raw)
89 return
90 else:
91 topic, msg = raw
92 # don't newline, since log messages always newline:
93 topic,level_name = topic.rsplit('.',1)
94 level,topic = self._extract_level(topic)
95 if msg[-1] == '\n':
96 msg = msg[:-1]
97 logging.log(level, "[%s] %s" % (topic, msg))
98
@@ -0,0 +1,316 b''
1 #!/usr/bin/env python
2 # encoding: utf-8
3 """
4 Job and task components for writing .xml files that the Windows HPC Server
5 2008 can use to start jobs.
6 """
7
8 #-----------------------------------------------------------------------------
9 # Copyright (C) 2008-2009 The IPython Development Team
10 #
11 # Distributed under the terms of the BSD License. The full license is in
12 # the file COPYING, distributed as part of this software.
13 #-----------------------------------------------------------------------------
14
15 #-----------------------------------------------------------------------------
16 # Imports
17 #-----------------------------------------------------------------------------
18
19 from __future__ import with_statement
20
21 import os
22 import re
23 import uuid
24
25 from xml.etree import ElementTree as ET
26
27 from IPython.config.configurable import Configurable
28 from IPython.utils.traitlets import (
29 Str, Int, List, Instance,
30 Enum, Bool, CStr
31 )
32
33 #-----------------------------------------------------------------------------
34 # Job and Task classes
35 #-----------------------------------------------------------------------------
36
37
38 def as_str(value):
39 if isinstance(value, str):
40 return value
41 elif isinstance(value, bool):
42 if value:
43 return 'true'
44 else:
45 return 'false'
46 elif isinstance(value, (int, float)):
47 return repr(value)
48 else:
49 return value
50
51
52 def indent(elem, level=0):
53 i = "\n" + level*" "
54 if len(elem):
55 if not elem.text or not elem.text.strip():
56 elem.text = i + " "
57 if not elem.tail or not elem.tail.strip():
58 elem.tail = i
59 for elem in elem:
60 indent(elem, level+1)
61 if not elem.tail or not elem.tail.strip():
62 elem.tail = i
63 else:
64 if level and (not elem.tail or not elem.tail.strip()):
65 elem.tail = i
66
67
68 def find_username():
69 domain = os.environ.get('USERDOMAIN')
70 username = os.environ.get('USERNAME','')
71 if domain is None:
72 return username
73 else:
74 return '%s\\%s' % (domain, username)
75
76
77 class WinHPCJob(Configurable):
78
79 job_id = Str('')
80 job_name = Str('MyJob', config=True)
81 min_cores = Int(1, config=True)
82 max_cores = Int(1, config=True)
83 min_sockets = Int(1, config=True)
84 max_sockets = Int(1, config=True)
85 min_nodes = Int(1, config=True)
86 max_nodes = Int(1, config=True)
87 unit_type = Str("Core", config=True)
88 auto_calculate_min = Bool(True, config=True)
89 auto_calculate_max = Bool(True, config=True)
90 run_until_canceled = Bool(False, config=True)
91 is_exclusive = Bool(False, config=True)
92 username = Str(find_username(), config=True)
93 job_type = Str('Batch', config=True)
94 priority = Enum(('Lowest','BelowNormal','Normal','AboveNormal','Highest'),
95 default_value='Highest', config=True)
96 requested_nodes = Str('', config=True)
97 project = Str('IPython', config=True)
98 xmlns = Str('http://schemas.microsoft.com/HPCS2008/scheduler/')
99 version = Str("2.000")
100 tasks = List([])
101
102 @property
103 def owner(self):
104 return self.username
105
106 def _write_attr(self, root, attr, key):
107 s = as_str(getattr(self, attr, ''))
108 if s:
109 root.set(key, s)
110
111 def as_element(self):
112 # We have to add _A_ type things to get the right order than
113 # the MSFT XML parser expects.
114 root = ET.Element('Job')
115 self._write_attr(root, 'version', '_A_Version')
116 self._write_attr(root, 'job_name', '_B_Name')
117 self._write_attr(root, 'unit_type', '_C_UnitType')
118 self._write_attr(root, 'min_cores', '_D_MinCores')
119 self._write_attr(root, 'max_cores', '_E_MaxCores')
120 self._write_attr(root, 'min_sockets', '_F_MinSockets')
121 self._write_attr(root, 'max_sockets', '_G_MaxSockets')
122 self._write_attr(root, 'min_nodes', '_H_MinNodes')
123 self._write_attr(root, 'max_nodes', '_I_MaxNodes')
124 self._write_attr(root, 'run_until_canceled', '_J_RunUntilCanceled')
125 self._write_attr(root, 'is_exclusive', '_K_IsExclusive')
126 self._write_attr(root, 'username', '_L_UserName')
127 self._write_attr(root, 'job_type', '_M_JobType')
128 self._write_attr(root, 'priority', '_N_Priority')
129 self._write_attr(root, 'requested_nodes', '_O_RequestedNodes')
130 self._write_attr(root, 'auto_calculate_max', '_P_AutoCalculateMax')
131 self._write_attr(root, 'auto_calculate_min', '_Q_AutoCalculateMin')
132 self._write_attr(root, 'project', '_R_Project')
133 self._write_attr(root, 'owner', '_S_Owner')
134 self._write_attr(root, 'xmlns', '_T_xmlns')
135 dependencies = ET.SubElement(root, "Dependencies")
136 etasks = ET.SubElement(root, "Tasks")
137 for t in self.tasks:
138 etasks.append(t.as_element())
139 return root
140
141 def tostring(self):
142 """Return the string representation of the job description XML."""
143 root = self.as_element()
144 indent(root)
145 txt = ET.tostring(root, encoding="utf-8")
146 # Now remove the tokens used to order the attributes.
147 txt = re.sub(r'_[A-Z]_','',txt)
148 txt = '<?xml version="1.0" encoding="utf-8"?>\n' + txt
149 return txt
150
151 def write(self, filename):
152 """Write the XML job description to a file."""
153 txt = self.tostring()
154 with open(filename, 'w') as f:
155 f.write(txt)
156
157 def add_task(self, task):
158 """Add a task to the job.
159
160 Parameters
161 ----------
162 task : :class:`WinHPCTask`
163 The task object to add.
164 """
165 self.tasks.append(task)
166
167
168 class WinHPCTask(Configurable):
169
170 task_id = Str('')
171 task_name = Str('')
172 version = Str("2.000")
173 min_cores = Int(1, config=True)
174 max_cores = Int(1, config=True)
175 min_sockets = Int(1, config=True)
176 max_sockets = Int(1, config=True)
177 min_nodes = Int(1, config=True)
178 max_nodes = Int(1, config=True)
179 unit_type = Str("Core", config=True)
180 command_line = CStr('', config=True)
181 work_directory = CStr('', config=True)
182 is_rerunnaable = Bool(True, config=True)
183 std_out_file_path = CStr('', config=True)
184 std_err_file_path = CStr('', config=True)
185 is_parametric = Bool(False, config=True)
186 environment_variables = Instance(dict, args=(), config=True)
187
188 def _write_attr(self, root, attr, key):
189 s = as_str(getattr(self, attr, ''))
190 if s:
191 root.set(key, s)
192
193 def as_element(self):
194 root = ET.Element('Task')
195 self._write_attr(root, 'version', '_A_Version')
196 self._write_attr(root, 'task_name', '_B_Name')
197 self._write_attr(root, 'min_cores', '_C_MinCores')
198 self._write_attr(root, 'max_cores', '_D_MaxCores')
199 self._write_attr(root, 'min_sockets', '_E_MinSockets')
200 self._write_attr(root, 'max_sockets', '_F_MaxSockets')
201 self._write_attr(root, 'min_nodes', '_G_MinNodes')
202 self._write_attr(root, 'max_nodes', '_H_MaxNodes')
203 self._write_attr(root, 'command_line', '_I_CommandLine')
204 self._write_attr(root, 'work_directory', '_J_WorkDirectory')
205 self._write_attr(root, 'is_rerunnaable', '_K_IsRerunnable')
206 self._write_attr(root, 'std_out_file_path', '_L_StdOutFilePath')
207 self._write_attr(root, 'std_err_file_path', '_M_StdErrFilePath')
208 self._write_attr(root, 'is_parametric', '_N_IsParametric')
209 self._write_attr(root, 'unit_type', '_O_UnitType')
210 root.append(self.get_env_vars())
211 return root
212
213 def get_env_vars(self):
214 env_vars = ET.Element('EnvironmentVariables')
215 for k, v in self.environment_variables.iteritems():
216 variable = ET.SubElement(env_vars, "Variable")
217 name = ET.SubElement(variable, "Name")
218 name.text = k
219 value = ET.SubElement(variable, "Value")
220 value.text = v
221 return env_vars
222
223
224
225 # By declaring these, we can configure the controller and engine separately!
226
227 class IPControllerJob(WinHPCJob):
228 job_name = Str('IPController', config=False)
229 is_exclusive = Bool(False, config=True)
230 username = Str(find_username(), config=True)
231 priority = Enum(('Lowest','BelowNormal','Normal','AboveNormal','Highest'),
232 default_value='Highest', config=True)
233 requested_nodes = Str('', config=True)
234 project = Str('IPython', config=True)
235
236
237 class IPEngineSetJob(WinHPCJob):
238 job_name = Str('IPEngineSet', config=False)
239 is_exclusive = Bool(False, config=True)
240 username = Str(find_username(), config=True)
241 priority = Enum(('Lowest','BelowNormal','Normal','AboveNormal','Highest'),
242 default_value='Highest', config=True)
243 requested_nodes = Str('', config=True)
244 project = Str('IPython', config=True)
245
246
247 class IPControllerTask(WinHPCTask):
248
249 task_name = Str('IPController', config=True)
250 controller_cmd = List(['ipcontroller.exe'], config=True)
251 controller_args = List(['--log-to-file', '--log-level', '40'], config=True)
252 # I don't want these to be configurable
253 std_out_file_path = CStr('', config=False)
254 std_err_file_path = CStr('', config=False)
255 min_cores = Int(1, config=False)
256 max_cores = Int(1, config=False)
257 min_sockets = Int(1, config=False)
258 max_sockets = Int(1, config=False)
259 min_nodes = Int(1, config=False)
260 max_nodes = Int(1, config=False)
261 unit_type = Str("Core", config=False)
262 work_directory = CStr('', config=False)
263
264 def __init__(self, config=None):
265 super(IPControllerTask, self).__init__(config=config)
266 the_uuid = uuid.uuid1()
267 self.std_out_file_path = os.path.join('log','ipcontroller-%s.out' % the_uuid)
268 self.std_err_file_path = os.path.join('log','ipcontroller-%s.err' % the_uuid)
269
270 @property
271 def command_line(self):
272 return ' '.join(self.controller_cmd + self.controller_args)
273
274
275 class IPEngineTask(WinHPCTask):
276
277 task_name = Str('IPEngine', config=True)
278 engine_cmd = List(['ipengine.exe'], config=True)
279 engine_args = List(['--log-to-file', '--log-level', '40'], config=True)
280 # I don't want these to be configurable
281 std_out_file_path = CStr('', config=False)
282 std_err_file_path = CStr('', config=False)
283 min_cores = Int(1, config=False)
284 max_cores = Int(1, config=False)
285 min_sockets = Int(1, config=False)
286 max_sockets = Int(1, config=False)
287 min_nodes = Int(1, config=False)
288 max_nodes = Int(1, config=False)
289 unit_type = Str("Core", config=False)
290 work_directory = CStr('', config=False)
291
292 def __init__(self, config=None):
293 super(IPEngineTask,self).__init__(config=config)
294 the_uuid = uuid.uuid1()
295 self.std_out_file_path = os.path.join('log','ipengine-%s.out' % the_uuid)
296 self.std_err_file_path = os.path.join('log','ipengine-%s.err' % the_uuid)
297
298 @property
299 def command_line(self):
300 return ' '.join(self.engine_cmd + self.engine_args)
301
302
303 # j = WinHPCJob(None)
304 # j.job_name = 'IPCluster'
305 # j.username = 'GNET\\bgranger'
306 # j.requested_nodes = 'GREEN'
307 #
308 # t = WinHPCTask(None)
309 # t.task_name = 'Controller'
310 # t.command_line = r"\\blue\domainusers$\bgranger\Python\Python25\Scripts\ipcontroller.exe --log-to-file -p default --log-level 10"
311 # t.work_directory = r"\\blue\domainusers$\bgranger\.ipython\cluster_default"
312 # t.std_out_file_path = 'controller-out.txt'
313 # t.std_err_file_path = 'controller-err.txt'
314 # t.environment_variables['PYTHONPATH'] = r"\\blue\domainusers$\bgranger\Python\Python25\Lib\site-packages"
315 # j.add_task(t)
316
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
@@ -0,0 +1,340 b''
1 """AsyncResult objects for the client"""
2 #-----------------------------------------------------------------------------
3 # Copyright (C) 2010-2011 The IPython Development Team
4 #
5 # Distributed under the terms of the BSD License. The full license is in
6 # the file COPYING, distributed as part of this software.
7 #-----------------------------------------------------------------------------
8
9 #-----------------------------------------------------------------------------
10 # Imports
11 #-----------------------------------------------------------------------------
12
13 import time
14
15 from zmq import MessageTracker
16
17 from IPython.external.decorator import decorator
18 from IPython.parallel import error
19
20 #-----------------------------------------------------------------------------
21 # Classes
22 #-----------------------------------------------------------------------------
23
24 # global empty tracker that's always done:
25 finished_tracker = MessageTracker()
26
27 @decorator
28 def check_ready(f, self, *args, **kwargs):
29 """Call spin() to sync state prior to calling the method."""
30 self.wait(0)
31 if not self._ready:
32 raise error.TimeoutError("result not ready")
33 return f(self, *args, **kwargs)
34
35 class AsyncResult(object):
36 """Class for representing results of non-blocking calls.
37
38 Provides the same interface as :py:class:`multiprocessing.pool.AsyncResult`.
39 """
40
41 msg_ids = None
42 _targets = None
43 _tracker = None
44 _single_result = False
45
46 def __init__(self, client, msg_ids, fname='unknown', targets=None, tracker=None):
47 if isinstance(msg_ids, basestring):
48 # always a list
49 msg_ids = [msg_ids]
50 if tracker is None:
51 # default to always done
52 tracker = finished_tracker
53 self._client = client
54 self.msg_ids = msg_ids
55 self._fname=fname
56 self._targets = targets
57 self._tracker = tracker
58 self._ready = False
59 self._success = None
60 if len(msg_ids) == 1:
61 self._single_result = not isinstance(targets, (list, tuple))
62 else:
63 self._single_result = False
64
65 def __repr__(self):
66 if self._ready:
67 return "<%s: finished>"%(self.__class__.__name__)
68 else:
69 return "<%s: %s>"%(self.__class__.__name__,self._fname)
70
71
72 def _reconstruct_result(self, res):
73 """Reconstruct our result from actual result list (always a list)
74
75 Override me in subclasses for turning a list of results
76 into the expected form.
77 """
78 if self._single_result:
79 return res[0]
80 else:
81 return res
82
83 def get(self, timeout=-1):
84 """Return the result when it arrives.
85
86 If `timeout` is not ``None`` and the result does not arrive within
87 `timeout` seconds then ``TimeoutError`` is raised. If the
88 remote call raised an exception then that exception will be reraised
89 by get() inside a `RemoteError`.
90 """
91 if not self.ready():
92 self.wait(timeout)
93
94 if self._ready:
95 if self._success:
96 return self._result
97 else:
98 raise self._exception
99 else:
100 raise error.TimeoutError("Result not ready.")
101
102 def ready(self):
103 """Return whether the call has completed."""
104 if not self._ready:
105 self.wait(0)
106 return self._ready
107
108 def wait(self, timeout=-1):
109 """Wait until the result is available or until `timeout` seconds pass.
110
111 This method always returns None.
112 """
113 if self._ready:
114 return
115 self._ready = self._client.wait(self.msg_ids, timeout)
116 if self._ready:
117 try:
118 results = map(self._client.results.get, self.msg_ids)
119 self._result = results
120 if self._single_result:
121 r = results[0]
122 if isinstance(r, Exception):
123 raise r
124 else:
125 results = error.collect_exceptions(results, self._fname)
126 self._result = self._reconstruct_result(results)
127 except Exception, e:
128 self._exception = e
129 self._success = False
130 else:
131 self._success = True
132 finally:
133 self._metadata = map(self._client.metadata.get, self.msg_ids)
134
135
136 def successful(self):
137 """Return whether the call completed without raising an exception.
138
139 Will raise ``AssertionError`` if the result is not ready.
140 """
141 assert self.ready()
142 return self._success
143
144 #----------------------------------------------------------------
145 # Extra methods not in mp.pool.AsyncResult
146 #----------------------------------------------------------------
147
148 def get_dict(self, timeout=-1):
149 """Get the results as a dict, keyed by engine_id.
150
151 timeout behavior is described in `get()`.
152 """
153
154 results = self.get(timeout)
155 engine_ids = [ md['engine_id'] for md in self._metadata ]
156 bycount = sorted(engine_ids, key=lambda k: engine_ids.count(k))
157 maxcount = bycount.count(bycount[-1])
158 if maxcount > 1:
159 raise ValueError("Cannot build dict, %i jobs ran on engine #%i"%(
160 maxcount, bycount[-1]))
161
162 return dict(zip(engine_ids,results))
163
164 @property
165 def result(self):
166 """result property wrapper for `get(timeout=0)`."""
167 return self.get()
168
169 # abbreviated alias:
170 r = result
171
172 @property
173 @check_ready
174 def metadata(self):
175 """property for accessing execution metadata."""
176 if self._single_result:
177 return self._metadata[0]
178 else:
179 return self._metadata
180
181 @property
182 def result_dict(self):
183 """result property as a dict."""
184 return self.get_dict()
185
186 def __dict__(self):
187 return self.get_dict(0)
188
189 def abort(self):
190 """abort my tasks."""
191 assert not self.ready(), "Can't abort, I am already done!"
192 return self.client.abort(self.msg_ids, targets=self._targets, block=True)
193
194 @property
195 def sent(self):
196 """check whether my messages have been sent."""
197 return self._tracker.done
198
199 def wait_for_send(self, timeout=-1):
200 """wait for pyzmq send to complete.
201
202 This is necessary when sending arrays that you intend to edit in-place.
203 `timeout` is in seconds, and will raise TimeoutError if it is reached
204 before the send completes.
205 """
206 return self._tracker.wait(timeout)
207
208 #-------------------------------------
209 # dict-access
210 #-------------------------------------
211
212 @check_ready
213 def __getitem__(self, key):
214 """getitem returns result value(s) if keyed by int/slice, or metadata if key is str.
215 """
216 if isinstance(key, int):
217 return error.collect_exceptions([self._result[key]], self._fname)[0]
218 elif isinstance(key, slice):
219 return error.collect_exceptions(self._result[key], self._fname)
220 elif isinstance(key, basestring):
221 values = [ md[key] for md in self._metadata ]
222 if self._single_result:
223 return values[0]
224 else:
225 return values
226 else:
227 raise TypeError("Invalid key type %r, must be 'int','slice', or 'str'"%type(key))
228
229 @check_ready
230 def __getattr__(self, key):
231 """getattr maps to getitem for convenient attr access to metadata."""
232 if key not in self._metadata[0].keys():
233 raise AttributeError("%r object has no attribute %r"%(
234 self.__class__.__name__, key))
235 return self.__getitem__(key)
236
237 # asynchronous iterator:
238 def __iter__(self):
239 if self._single_result:
240 raise TypeError("AsyncResults with a single result are not iterable.")
241 try:
242 rlist = self.get(0)
243 except error.TimeoutError:
244 # wait for each result individually
245 for msg_id in self.msg_ids:
246 ar = AsyncResult(self._client, msg_id, self._fname)
247 yield ar.get()
248 else:
249 # already done
250 for r in rlist:
251 yield r
252
253
254
255 class AsyncMapResult(AsyncResult):
256 """Class for representing results of non-blocking gathers.
257
258 This will properly reconstruct the gather.
259 """
260
261 def __init__(self, client, msg_ids, mapObject, fname=''):
262 AsyncResult.__init__(self, client, msg_ids, fname=fname)
263 self._mapObject = mapObject
264 self._single_result = False
265
266 def _reconstruct_result(self, res):
267 """Perform the gather on the actual results."""
268 return self._mapObject.joinPartitions(res)
269
270 # asynchronous iterator:
271 def __iter__(self):
272 try:
273 rlist = self.get(0)
274 except error.TimeoutError:
275 # wait for each result individually
276 for msg_id in self.msg_ids:
277 ar = AsyncResult(self._client, msg_id, self._fname)
278 rlist = ar.get()
279 try:
280 for r in rlist:
281 yield r
282 except TypeError:
283 # flattened, not a list
284 # this could get broken by flattened data that returns iterables
285 # but most calls to map do not expose the `flatten` argument
286 yield rlist
287 else:
288 # already done
289 for r in rlist:
290 yield r
291
292
293 class AsyncHubResult(AsyncResult):
294 """Class to wrap pending results that must be requested from the Hub.
295
296 Note that waiting/polling on these objects requires polling the Hubover the network,
297 so use `AsyncHubResult.wait()` sparingly.
298 """
299
300 def wait(self, timeout=-1):
301 """wait for result to complete."""
302 start = time.time()
303 if self._ready:
304 return
305 local_ids = filter(lambda msg_id: msg_id in self._client.outstanding, self.msg_ids)
306 local_ready = self._client.wait(local_ids, timeout)
307 if local_ready:
308 remote_ids = filter(lambda msg_id: msg_id not in self._client.results, self.msg_ids)
309 if not remote_ids:
310 self._ready = True
311 else:
312 rdict = self._client.result_status(remote_ids, status_only=False)
313 pending = rdict['pending']
314 while pending and (timeout < 0 or time.time() < start+timeout):
315 rdict = self._client.result_status(remote_ids, status_only=False)
316 pending = rdict['pending']
317 if pending:
318 time.sleep(0.1)
319 if not pending:
320 self._ready = True
321 if self._ready:
322 try:
323 results = map(self._client.results.get, self.msg_ids)
324 self._result = results
325 if self._single_result:
326 r = results[0]
327 if isinstance(r, Exception):
328 raise r
329 else:
330 results = error.collect_exceptions(results, self._fname)
331 self._result = self._reconstruct_result(results)
332 except Exception, e:
333 self._exception = e
334 self._success = False
335 else:
336 self._success = True
337 finally:
338 self._metadata = map(self._client.metadata.get, self.msg_ids)
339
340 __all__ = ['AsyncResult', 'AsyncMapResult', 'AsyncHubResult'] No newline at end of file
This diff has been collapsed as it changes many lines, (1279 lines changed) Show them Hide them
@@ -0,0 +1,1279 b''
1 """A semi-synchronous Client for the ZMQ cluster"""
2 #-----------------------------------------------------------------------------
3 # Copyright (C) 2010 The IPython Development Team
4 #
5 # Distributed under the terms of the BSD License. The full license is in
6 # the file COPYING, distributed as part of this software.
7 #-----------------------------------------------------------------------------
8
9 #-----------------------------------------------------------------------------
10 # Imports
11 #-----------------------------------------------------------------------------
12
13 import os
14 import json
15 import time
16 import warnings
17 from datetime import datetime
18 from getpass import getpass
19 from pprint import pprint
20
21 pjoin = os.path.join
22
23 import zmq
24 # from zmq.eventloop import ioloop, zmqstream
25
26 from IPython.utils.path import get_ipython_dir
27 from IPython.utils.traitlets import (HasTraits, Int, Instance, CUnicode,
28 Dict, List, Bool, Str, Set)
29 from IPython.external.decorator import decorator
30 from IPython.external.ssh import tunnel
31
32 from IPython.parallel import error
33 from IPython.parallel import streamsession as ss
34 from IPython.parallel import util
35
36 from .asyncresult import AsyncResult, AsyncHubResult
37 from IPython.parallel.apps.clusterdir import ClusterDir, ClusterDirError
38 from .view import DirectView, LoadBalancedView
39
40 #--------------------------------------------------------------------------
41 # Decorators for Client methods
42 #--------------------------------------------------------------------------
43
44 @decorator
45 def spin_first(f, self, *args, **kwargs):
46 """Call spin() to sync state prior to calling the method."""
47 self.spin()
48 return f(self, *args, **kwargs)
49
50 @decorator
51 def default_block(f, self, *args, **kwargs):
52 """Default to self.block; preserve self.block."""
53 block = kwargs.get('block',None)
54 block = self.block if block is None else block
55 saveblock = self.block
56 self.block = block
57 try:
58 ret = f(self, *args, **kwargs)
59 finally:
60 self.block = saveblock
61 return ret
62
63
64 #--------------------------------------------------------------------------
65 # Classes
66 #--------------------------------------------------------------------------
67
68 class Metadata(dict):
69 """Subclass of dict for initializing metadata values.
70
71 Attribute access works on keys.
72
73 These objects have a strict set of keys - errors will raise if you try
74 to add new keys.
75 """
76 def __init__(self, *args, **kwargs):
77 dict.__init__(self)
78 md = {'msg_id' : None,
79 'submitted' : None,
80 'started' : None,
81 'completed' : None,
82 'received' : None,
83 'engine_uuid' : None,
84 'engine_id' : None,
85 'follow' : None,
86 'after' : None,
87 'status' : None,
88
89 'pyin' : None,
90 'pyout' : None,
91 'pyerr' : None,
92 'stdout' : '',
93 'stderr' : '',
94 }
95 self.update(md)
96 self.update(dict(*args, **kwargs))
97
98 def __getattr__(self, key):
99 """getattr aliased to getitem"""
100 if key in self.iterkeys():
101 return self[key]
102 else:
103 raise AttributeError(key)
104
105 def __setattr__(self, key, value):
106 """setattr aliased to setitem, with strict"""
107 if key in self.iterkeys():
108 self[key] = value
109 else:
110 raise AttributeError(key)
111
112 def __setitem__(self, key, value):
113 """strict static key enforcement"""
114 if key in self.iterkeys():
115 dict.__setitem__(self, key, value)
116 else:
117 raise KeyError(key)
118
119
120 class Client(HasTraits):
121 """A semi-synchronous client to the IPython ZMQ cluster
122
123 Parameters
124 ----------
125
126 url_or_file : bytes; zmq url or path to ipcontroller-client.json
127 Connection information for the Hub's registration. If a json connector
128 file is given, then likely no further configuration is necessary.
129 [Default: use profile]
130 profile : bytes
131 The name of the Cluster profile to be used to find connector information.
132 [Default: 'default']
133 context : zmq.Context
134 Pass an existing zmq.Context instance, otherwise the client will create its own.
135 username : bytes
136 set username to be passed to the Session object
137 debug : bool
138 flag for lots of message printing for debug purposes
139
140 #-------------- ssh related args ----------------
141 # These are args for configuring the ssh tunnel to be used
142 # credentials are used to forward connections over ssh to the Controller
143 # Note that the ip given in `addr` needs to be relative to sshserver
144 # The most basic case is to leave addr as pointing to localhost (127.0.0.1),
145 # and set sshserver as the same machine the Controller is on. However,
146 # the only requirement is that sshserver is able to see the Controller
147 # (i.e. is within the same trusted network).
148
149 sshserver : str
150 A string of the form passed to ssh, i.e. 'server.tld' or 'user@server.tld:port'
151 If keyfile or password is specified, and this is not, it will default to
152 the ip given in addr.
153 sshkey : str; path to public ssh key file
154 This specifies a key to be used in ssh login, default None.
155 Regular default ssh keys will be used without specifying this argument.
156 password : str
157 Your ssh password to sshserver. Note that if this is left None,
158 you will be prompted for it if passwordless key based login is unavailable.
159 paramiko : bool
160 flag for whether to use paramiko instead of shell ssh for tunneling.
161 [default: True on win32, False else]
162
163 ------- exec authentication args -------
164 If even localhost is untrusted, you can have some protection against
165 unauthorized execution by using a key. Messages are still sent
166 as cleartext, so if someone can snoop your loopback traffic this will
167 not help against malicious attacks.
168
169 exec_key : str
170 an authentication key or file containing a key
171 default: None
172
173
174 Attributes
175 ----------
176
177 ids : list of int engine IDs
178 requesting the ids attribute always synchronizes
179 the registration state. To request ids without synchronization,
180 use semi-private _ids attributes.
181
182 history : list of msg_ids
183 a list of msg_ids, keeping track of all the execution
184 messages you have submitted in order.
185
186 outstanding : set of msg_ids
187 a set of msg_ids that have been submitted, but whose
188 results have not yet been received.
189
190 results : dict
191 a dict of all our results, keyed by msg_id
192
193 block : bool
194 determines default behavior when block not specified
195 in execution methods
196
197 Methods
198 -------
199
200 spin
201 flushes incoming results and registration state changes
202 control methods spin, and requesting `ids` also ensures up to date
203
204 wait
205 wait on one or more msg_ids
206
207 execution methods
208 apply
209 legacy: execute, run
210
211 data movement
212 push, pull, scatter, gather
213
214 query methods
215 queue_status, get_result, purge, result_status
216
217 control methods
218 abort, shutdown
219
220 """
221
222
223 block = Bool(False)
224 outstanding = Set()
225 results = Instance('collections.defaultdict', (dict,))
226 metadata = Instance('collections.defaultdict', (Metadata,))
227 history = List()
228 debug = Bool(False)
229 profile=CUnicode('default')
230
231 _outstanding_dict = Instance('collections.defaultdict', (set,))
232 _ids = List()
233 _connected=Bool(False)
234 _ssh=Bool(False)
235 _context = Instance('zmq.Context')
236 _config = Dict()
237 _engines=Instance(util.ReverseDict, (), {})
238 # _hub_socket=Instance('zmq.Socket')
239 _query_socket=Instance('zmq.Socket')
240 _control_socket=Instance('zmq.Socket')
241 _iopub_socket=Instance('zmq.Socket')
242 _notification_socket=Instance('zmq.Socket')
243 _mux_socket=Instance('zmq.Socket')
244 _task_socket=Instance('zmq.Socket')
245 _task_scheme=Str()
246 _closed = False
247 _ignored_control_replies=Int(0)
248 _ignored_hub_replies=Int(0)
249
250 def __init__(self, url_or_file=None, profile='default', cluster_dir=None, ipython_dir=None,
251 context=None, username=None, debug=False, exec_key=None,
252 sshserver=None, sshkey=None, password=None, paramiko=None,
253 timeout=10
254 ):
255 super(Client, self).__init__(debug=debug, profile=profile)
256 if context is None:
257 context = zmq.Context.instance()
258 self._context = context
259
260
261 self._setup_cluster_dir(profile, cluster_dir, ipython_dir)
262 if self._cd is not None:
263 if url_or_file is None:
264 url_or_file = pjoin(self._cd.security_dir, 'ipcontroller-client.json')
265 assert url_or_file is not None, "I can't find enough information to connect to a hub!"\
266 " Please specify at least one of url_or_file or profile."
267
268 try:
269 util.validate_url(url_or_file)
270 except AssertionError:
271 if not os.path.exists(url_or_file):
272 if self._cd:
273 url_or_file = os.path.join(self._cd.security_dir, url_or_file)
274 assert os.path.exists(url_or_file), "Not a valid connection file or url: %r"%url_or_file
275 with open(url_or_file) as f:
276 cfg = json.loads(f.read())
277 else:
278 cfg = {'url':url_or_file}
279
280 # sync defaults from args, json:
281 if sshserver:
282 cfg['ssh'] = sshserver
283 if exec_key:
284 cfg['exec_key'] = exec_key
285 exec_key = cfg['exec_key']
286 sshserver=cfg['ssh']
287 url = cfg['url']
288 location = cfg.setdefault('location', None)
289 cfg['url'] = util.disambiguate_url(cfg['url'], location)
290 url = cfg['url']
291
292 self._config = cfg
293
294 self._ssh = bool(sshserver or sshkey or password)
295 if self._ssh and sshserver is None:
296 # default to ssh via localhost
297 sshserver = url.split('://')[1].split(':')[0]
298 if self._ssh and password is None:
299 if tunnel.try_passwordless_ssh(sshserver, sshkey, paramiko):
300 password=False
301 else:
302 password = getpass("SSH Password for %s: "%sshserver)
303 ssh_kwargs = dict(keyfile=sshkey, password=password, paramiko=paramiko)
304 if exec_key is not None and os.path.isfile(exec_key):
305 arg = 'keyfile'
306 else:
307 arg = 'key'
308 key_arg = {arg:exec_key}
309 if username is None:
310 self.session = ss.StreamSession(**key_arg)
311 else:
312 self.session = ss.StreamSession(username, **key_arg)
313 self._query_socket = self._context.socket(zmq.XREQ)
314 self._query_socket.setsockopt(zmq.IDENTITY, self.session.session)
315 if self._ssh:
316 tunnel.tunnel_connection(self._query_socket, url, sshserver, **ssh_kwargs)
317 else:
318 self._query_socket.connect(url)
319
320 self.session.debug = self.debug
321
322 self._notification_handlers = {'registration_notification' : self._register_engine,
323 'unregistration_notification' : self._unregister_engine,
324 'shutdown_notification' : lambda msg: self.close(),
325 }
326 self._queue_handlers = {'execute_reply' : self._handle_execute_reply,
327 'apply_reply' : self._handle_apply_reply}
328 self._connect(sshserver, ssh_kwargs, timeout)
329
330 def __del__(self):
331 """cleanup sockets, but _not_ context."""
332 self.close()
333
334 def _setup_cluster_dir(self, profile, cluster_dir, ipython_dir):
335 if ipython_dir is None:
336 ipython_dir = get_ipython_dir()
337 if cluster_dir is not None:
338 try:
339 self._cd = ClusterDir.find_cluster_dir(cluster_dir)
340 return
341 except ClusterDirError:
342 pass
343 elif profile is not None:
344 try:
345 self._cd = ClusterDir.find_cluster_dir_by_profile(
346 ipython_dir, profile)
347 return
348 except ClusterDirError:
349 pass
350 self._cd = None
351
352 def _update_engines(self, engines):
353 """Update our engines dict and _ids from a dict of the form: {id:uuid}."""
354 for k,v in engines.iteritems():
355 eid = int(k)
356 self._engines[eid] = bytes(v) # force not unicode
357 self._ids.append(eid)
358 self._ids = sorted(self._ids)
359 if sorted(self._engines.keys()) != range(len(self._engines)) and \
360 self._task_scheme == 'pure' and self._task_socket:
361 self._stop_scheduling_tasks()
362
363 def _stop_scheduling_tasks(self):
364 """Stop scheduling tasks because an engine has been unregistered
365 from a pure ZMQ scheduler.
366 """
367 self._task_socket.close()
368 self._task_socket = None
369 msg = "An engine has been unregistered, and we are using pure " +\
370 "ZMQ task scheduling. Task farming will be disabled."
371 if self.outstanding:
372 msg += " If you were running tasks when this happened, " +\
373 "some `outstanding` msg_ids may never resolve."
374 warnings.warn(msg, RuntimeWarning)
375
376 def _build_targets(self, targets):
377 """Turn valid target IDs or 'all' into two lists:
378 (int_ids, uuids).
379 """
380 if targets is None:
381 targets = self._ids
382 elif isinstance(targets, str):
383 if targets.lower() == 'all':
384 targets = self._ids
385 else:
386 raise TypeError("%r not valid str target, must be 'all'"%(targets))
387 elif isinstance(targets, int):
388 if targets < 0:
389 targets = self.ids[targets]
390 if targets not in self.ids:
391 raise IndexError("No such engine: %i"%targets)
392 targets = [targets]
393
394 if isinstance(targets, slice):
395 indices = range(len(self._ids))[targets]
396 ids = self.ids
397 targets = [ ids[i] for i in indices ]
398
399 if not isinstance(targets, (tuple, list, xrange)):
400 raise TypeError("targets by int/slice/collection of ints only, not %s"%(type(targets)))
401
402 return [self._engines[t] for t in targets], list(targets)
403
404 def _connect(self, sshserver, ssh_kwargs, timeout):
405 """setup all our socket connections to the cluster. This is called from
406 __init__."""
407
408 # Maybe allow reconnecting?
409 if self._connected:
410 return
411 self._connected=True
412
413 def connect_socket(s, url):
414 url = util.disambiguate_url(url, self._config['location'])
415 if self._ssh:
416 return tunnel.tunnel_connection(s, url, sshserver, **ssh_kwargs)
417 else:
418 return s.connect(url)
419
420 self.session.send(self._query_socket, 'connection_request')
421 r,w,x = zmq.select([self._query_socket],[],[], timeout)
422 if not r:
423 raise error.TimeoutError("Hub connection request timed out")
424 idents,msg = self.session.recv(self._query_socket,mode=0)
425 if self.debug:
426 pprint(msg)
427 msg = ss.Message(msg)
428 content = msg.content
429 self._config['registration'] = dict(content)
430 if content.status == 'ok':
431 if content.mux:
432 self._mux_socket = self._context.socket(zmq.XREQ)
433 self._mux_socket.setsockopt(zmq.IDENTITY, self.session.session)
434 connect_socket(self._mux_socket, content.mux)
435 if content.task:
436 self._task_scheme, task_addr = content.task
437 self._task_socket = self._context.socket(zmq.XREQ)
438 self._task_socket.setsockopt(zmq.IDENTITY, self.session.session)
439 connect_socket(self._task_socket, task_addr)
440 if content.notification:
441 self._notification_socket = self._context.socket(zmq.SUB)
442 connect_socket(self._notification_socket, content.notification)
443 self._notification_socket.setsockopt(zmq.SUBSCRIBE, b'')
444 # if content.query:
445 # self._query_socket = self._context.socket(zmq.XREQ)
446 # self._query_socket.setsockopt(zmq.IDENTITY, self.session.session)
447 # connect_socket(self._query_socket, content.query)
448 if content.control:
449 self._control_socket = self._context.socket(zmq.XREQ)
450 self._control_socket.setsockopt(zmq.IDENTITY, self.session.session)
451 connect_socket(self._control_socket, content.control)
452 if content.iopub:
453 self._iopub_socket = self._context.socket(zmq.SUB)
454 self._iopub_socket.setsockopt(zmq.SUBSCRIBE, b'')
455 self._iopub_socket.setsockopt(zmq.IDENTITY, self.session.session)
456 connect_socket(self._iopub_socket, content.iopub)
457 self._update_engines(dict(content.engines))
458 else:
459 self._connected = False
460 raise Exception("Failed to connect!")
461
462 #--------------------------------------------------------------------------
463 # handlers and callbacks for incoming messages
464 #--------------------------------------------------------------------------
465
466 def _unwrap_exception(self, content):
467 """unwrap exception, and remap engine_id to int."""
468 e = error.unwrap_exception(content)
469 # print e.traceback
470 if e.engine_info:
471 e_uuid = e.engine_info['engine_uuid']
472 eid = self._engines[e_uuid]
473 e.engine_info['engine_id'] = eid
474 return e
475
476 def _extract_metadata(self, header, parent, content):
477 md = {'msg_id' : parent['msg_id'],
478 'received' : datetime.now(),
479 'engine_uuid' : header.get('engine', None),
480 'follow' : parent.get('follow', []),
481 'after' : parent.get('after', []),
482 'status' : content['status'],
483 }
484
485 if md['engine_uuid'] is not None:
486 md['engine_id'] = self._engines.get(md['engine_uuid'], None)
487
488 if 'date' in parent:
489 md['submitted'] = datetime.strptime(parent['date'], util.ISO8601)
490 if 'started' in header:
491 md['started'] = datetime.strptime(header['started'], util.ISO8601)
492 if 'date' in header:
493 md['completed'] = datetime.strptime(header['date'], util.ISO8601)
494 return md
495
496 def _register_engine(self, msg):
497 """Register a new engine, and update our connection info."""
498 content = msg['content']
499 eid = content['id']
500 d = {eid : content['queue']}
501 self._update_engines(d)
502
503 def _unregister_engine(self, msg):
504 """Unregister an engine that has died."""
505 content = msg['content']
506 eid = int(content['id'])
507 if eid in self._ids:
508 self._ids.remove(eid)
509 uuid = self._engines.pop(eid)
510
511 self._handle_stranded_msgs(eid, uuid)
512
513 if self._task_socket and self._task_scheme == 'pure':
514 self._stop_scheduling_tasks()
515
516 def _handle_stranded_msgs(self, eid, uuid):
517 """Handle messages known to be on an engine when the engine unregisters.
518
519 It is possible that this will fire prematurely - that is, an engine will
520 go down after completing a result, and the client will be notified
521 of the unregistration and later receive the successful result.
522 """
523
524 outstanding = self._outstanding_dict[uuid]
525
526 for msg_id in list(outstanding):
527 if msg_id in self.results:
528 # we already
529 continue
530 try:
531 raise error.EngineError("Engine %r died while running task %r"%(eid, msg_id))
532 except:
533 content = error.wrap_exception()
534 # build a fake message:
535 parent = {}
536 header = {}
537 parent['msg_id'] = msg_id
538 header['engine'] = uuid
539 header['date'] = datetime.now().strftime(util.ISO8601)
540 msg = dict(parent_header=parent, header=header, content=content)
541 self._handle_apply_reply(msg)
542
543 def _handle_execute_reply(self, msg):
544 """Save the reply to an execute_request into our results.
545
546 execute messages are never actually used. apply is used instead.
547 """
548
549 parent = msg['parent_header']
550 msg_id = parent['msg_id']
551 if msg_id not in self.outstanding:
552 if msg_id in self.history:
553 print ("got stale result: %s"%msg_id)
554 else:
555 print ("got unknown result: %s"%msg_id)
556 else:
557 self.outstanding.remove(msg_id)
558 self.results[msg_id] = self._unwrap_exception(msg['content'])
559
560 def _handle_apply_reply(self, msg):
561 """Save the reply to an apply_request into our results."""
562 parent = msg['parent_header']
563 msg_id = parent['msg_id']
564 if msg_id not in self.outstanding:
565 if msg_id in self.history:
566 print ("got stale result: %s"%msg_id)
567 print self.results[msg_id]
568 print msg
569 else:
570 print ("got unknown result: %s"%msg_id)
571 else:
572 self.outstanding.remove(msg_id)
573 content = msg['content']
574 header = msg['header']
575
576 # construct metadata:
577 md = self.metadata[msg_id]
578 md.update(self._extract_metadata(header, parent, content))
579 # is this redundant?
580 self.metadata[msg_id] = md
581
582 e_outstanding = self._outstanding_dict[md['engine_uuid']]
583 if msg_id in e_outstanding:
584 e_outstanding.remove(msg_id)
585
586 # construct result:
587 if content['status'] == 'ok':
588 self.results[msg_id] = util.unserialize_object(msg['buffers'])[0]
589 elif content['status'] == 'aborted':
590 self.results[msg_id] = error.TaskAborted(msg_id)
591 elif content['status'] == 'resubmitted':
592 # TODO: handle resubmission
593 pass
594 else:
595 self.results[msg_id] = self._unwrap_exception(content)
596
597 def _flush_notifications(self):
598 """Flush notifications of engine registrations waiting
599 in ZMQ queue."""
600 msg = self.session.recv(self._notification_socket, mode=zmq.NOBLOCK)
601 while msg is not None:
602 if self.debug:
603 pprint(msg)
604 msg = msg[-1]
605 msg_type = msg['msg_type']
606 handler = self._notification_handlers.get(msg_type, None)
607 if handler is None:
608 raise Exception("Unhandled message type: %s"%msg.msg_type)
609 else:
610 handler(msg)
611 msg = self.session.recv(self._notification_socket, mode=zmq.NOBLOCK)
612
613 def _flush_results(self, sock):
614 """Flush task or queue results waiting in ZMQ queue."""
615 msg = self.session.recv(sock, mode=zmq.NOBLOCK)
616 while msg is not None:
617 if self.debug:
618 pprint(msg)
619 msg = msg[-1]
620 msg_type = msg['msg_type']
621 handler = self._queue_handlers.get(msg_type, None)
622 if handler is None:
623 raise Exception("Unhandled message type: %s"%msg.msg_type)
624 else:
625 handler(msg)
626 msg = self.session.recv(sock, mode=zmq.NOBLOCK)
627
628 def _flush_control(self, sock):
629 """Flush replies from the control channel waiting
630 in the ZMQ queue.
631
632 Currently: ignore them."""
633 if self._ignored_control_replies <= 0:
634 return
635 msg = self.session.recv(sock, mode=zmq.NOBLOCK)
636 while msg is not None:
637 self._ignored_control_replies -= 1
638 if self.debug:
639 pprint(msg)
640 msg = self.session.recv(sock, mode=zmq.NOBLOCK)
641
642 def _flush_ignored_control(self):
643 """flush ignored control replies"""
644 while self._ignored_control_replies > 0:
645 self.session.recv(self._control_socket)
646 self._ignored_control_replies -= 1
647
648 def _flush_ignored_hub_replies(self):
649 msg = self.session.recv(self._query_socket, mode=zmq.NOBLOCK)
650 while msg is not None:
651 msg = self.session.recv(self._query_socket, mode=zmq.NOBLOCK)
652
653 def _flush_iopub(self, sock):
654 """Flush replies from the iopub channel waiting
655 in the ZMQ queue.
656 """
657 msg = self.session.recv(sock, mode=zmq.NOBLOCK)
658 while msg is not None:
659 if self.debug:
660 pprint(msg)
661 msg = msg[-1]
662 parent = msg['parent_header']
663 msg_id = parent['msg_id']
664 content = msg['content']
665 header = msg['header']
666 msg_type = msg['msg_type']
667
668 # init metadata:
669 md = self.metadata[msg_id]
670
671 if msg_type == 'stream':
672 name = content['name']
673 s = md[name] or ''
674 md[name] = s + content['data']
675 elif msg_type == 'pyerr':
676 md.update({'pyerr' : self._unwrap_exception(content)})
677 else:
678 md.update({msg_type : content['data']})
679
680 # reduntant?
681 self.metadata[msg_id] = md
682
683 msg = self.session.recv(sock, mode=zmq.NOBLOCK)
684
685 #--------------------------------------------------------------------------
686 # len, getitem
687 #--------------------------------------------------------------------------
688
689 def __len__(self):
690 """len(client) returns # of engines."""
691 return len(self.ids)
692
693 def __getitem__(self, key):
694 """index access returns DirectView multiplexer objects
695
696 Must be int, slice, or list/tuple/xrange of ints"""
697 if not isinstance(key, (int, slice, tuple, list, xrange)):
698 raise TypeError("key by int/slice/iterable of ints only, not %s"%(type(key)))
699 else:
700 return self.direct_view(key)
701
702 #--------------------------------------------------------------------------
703 # Begin public methods
704 #--------------------------------------------------------------------------
705
706 @property
707 def ids(self):
708 """Always up-to-date ids property."""
709 self._flush_notifications()
710 # always copy:
711 return list(self._ids)
712
713 def close(self):
714 if self._closed:
715 return
716 snames = filter(lambda n: n.endswith('socket'), dir(self))
717 for socket in map(lambda name: getattr(self, name), snames):
718 if isinstance(socket, zmq.Socket) and not socket.closed:
719 socket.close()
720 self._closed = True
721
722 def spin(self):
723 """Flush any registration notifications and execution results
724 waiting in the ZMQ queue.
725 """
726 if self._notification_socket:
727 self._flush_notifications()
728 if self._mux_socket:
729 self._flush_results(self._mux_socket)
730 if self._task_socket:
731 self._flush_results(self._task_socket)
732 if self._control_socket:
733 self._flush_control(self._control_socket)
734 if self._iopub_socket:
735 self._flush_iopub(self._iopub_socket)
736 if self._query_socket:
737 self._flush_ignored_hub_replies()
738
739 def wait(self, jobs=None, timeout=-1):
740 """waits on one or more `jobs`, for up to `timeout` seconds.
741
742 Parameters
743 ----------
744
745 jobs : int, str, or list of ints and/or strs, or one or more AsyncResult objects
746 ints are indices to self.history
747 strs are msg_ids
748 default: wait on all outstanding messages
749 timeout : float
750 a time in seconds, after which to give up.
751 default is -1, which means no timeout
752
753 Returns
754 -------
755
756 True : when all msg_ids are done
757 False : timeout reached, some msg_ids still outstanding
758 """
759 tic = time.time()
760 if jobs is None:
761 theids = self.outstanding
762 else:
763 if isinstance(jobs, (int, str, AsyncResult)):
764 jobs = [jobs]
765 theids = set()
766 for job in jobs:
767 if isinstance(job, int):
768 # index access
769 job = self.history[job]
770 elif isinstance(job, AsyncResult):
771 map(theids.add, job.msg_ids)
772 continue
773 theids.add(job)
774 if not theids.intersection(self.outstanding):
775 return True
776 self.spin()
777 while theids.intersection(self.outstanding):
778 if timeout >= 0 and ( time.time()-tic ) > timeout:
779 break
780 time.sleep(1e-3)
781 self.spin()
782 return len(theids.intersection(self.outstanding)) == 0
783
784 #--------------------------------------------------------------------------
785 # Control methods
786 #--------------------------------------------------------------------------
787
788 @spin_first
789 @default_block
790 def clear(self, targets=None, block=None):
791 """Clear the namespace in target(s)."""
792 targets = self._build_targets(targets)[0]
793 for t in targets:
794 self.session.send(self._control_socket, 'clear_request', content={}, ident=t)
795 error = False
796 if self.block:
797 self._flush_ignored_control()
798 for i in range(len(targets)):
799 idents,msg = self.session.recv(self._control_socket,0)
800 if self.debug:
801 pprint(msg)
802 if msg['content']['status'] != 'ok':
803 error = self._unwrap_exception(msg['content'])
804 else:
805 self._ignored_control_replies += len(targets)
806 if error:
807 raise error
808
809
810 @spin_first
811 @default_block
812 def abort(self, jobs=None, targets=None, block=None):
813 """Abort specific jobs from the execution queues of target(s).
814
815 This is a mechanism to prevent jobs that have already been submitted
816 from executing.
817
818 Parameters
819 ----------
820
821 jobs : msg_id, list of msg_ids, or AsyncResult
822 The jobs to be aborted
823
824
825 """
826 targets = self._build_targets(targets)[0]
827 msg_ids = []
828 if isinstance(jobs, (basestring,AsyncResult)):
829 jobs = [jobs]
830 bad_ids = filter(lambda obj: not isinstance(obj, (basestring, AsyncResult)), jobs)
831 if bad_ids:
832 raise TypeError("Invalid msg_id type %r, expected str or AsyncResult"%bad_ids[0])
833 for j in jobs:
834 if isinstance(j, AsyncResult):
835 msg_ids.extend(j.msg_ids)
836 else:
837 msg_ids.append(j)
838 content = dict(msg_ids=msg_ids)
839 for t in targets:
840 self.session.send(self._control_socket, 'abort_request',
841 content=content, ident=t)
842 error = False
843 if self.block:
844 self._flush_ignored_control()
845 for i in range(len(targets)):
846 idents,msg = self.session.recv(self._control_socket,0)
847 if self.debug:
848 pprint(msg)
849 if msg['content']['status'] != 'ok':
850 error = self._unwrap_exception(msg['content'])
851 else:
852 self._ignored_control_replies += len(targets)
853 if error:
854 raise error
855
856 @spin_first
857 @default_block
858 def shutdown(self, targets=None, restart=False, hub=False, block=None):
859 """Terminates one or more engine processes, optionally including the hub."""
860 if hub:
861 targets = 'all'
862 targets = self._build_targets(targets)[0]
863 for t in targets:
864 self.session.send(self._control_socket, 'shutdown_request',
865 content={'restart':restart},ident=t)
866 error = False
867 if block or hub:
868 self._flush_ignored_control()
869 for i in range(len(targets)):
870 idents,msg = self.session.recv(self._control_socket, 0)
871 if self.debug:
872 pprint(msg)
873 if msg['content']['status'] != 'ok':
874 error = self._unwrap_exception(msg['content'])
875 else:
876 self._ignored_control_replies += len(targets)
877
878 if hub:
879 time.sleep(0.25)
880 self.session.send(self._query_socket, 'shutdown_request')
881 idents,msg = self.session.recv(self._query_socket, 0)
882 if self.debug:
883 pprint(msg)
884 if msg['content']['status'] != 'ok':
885 error = self._unwrap_exception(msg['content'])
886
887 if error:
888 raise error
889
890 #--------------------------------------------------------------------------
891 # Execution methods
892 #--------------------------------------------------------------------------
893
894 @default_block
895 def _execute(self, code, targets='all', block=None):
896 """Executes `code` on `targets` in blocking or nonblocking manner.
897
898 ``execute`` is always `bound` (affects engine namespace)
899
900 Parameters
901 ----------
902
903 code : str
904 the code string to be executed
905 targets : int/str/list of ints/strs
906 the engines on which to execute
907 default : all
908 block : bool
909 whether or not to wait until done to return
910 default: self.block
911 """
912 return self[targets].execute(code, block=block)
913
914 def _maybe_raise(self, result):
915 """wrapper for maybe raising an exception if apply failed."""
916 if isinstance(result, error.RemoteError):
917 raise result
918
919 return result
920
921 def send_apply_message(self, socket, f, args=None, kwargs=None, subheader=None, track=False,
922 ident=None):
923 """construct and send an apply message via a socket.
924
925 This is the principal method with which all engine execution is performed by views.
926 """
927
928 assert not self._closed, "cannot use me anymore, I'm closed!"
929 # defaults:
930 args = args if args is not None else []
931 kwargs = kwargs if kwargs is not None else {}
932 subheader = subheader if subheader is not None else {}
933
934 # validate arguments
935 if not callable(f):
936 raise TypeError("f must be callable, not %s"%type(f))
937 if not isinstance(args, (tuple, list)):
938 raise TypeError("args must be tuple or list, not %s"%type(args))
939 if not isinstance(kwargs, dict):
940 raise TypeError("kwargs must be dict, not %s"%type(kwargs))
941 if not isinstance(subheader, dict):
942 raise TypeError("subheader must be dict, not %s"%type(subheader))
943
944 if not self._ids:
945 # flush notification socket if no engines yet
946 any_ids = self.ids
947 if not any_ids:
948 raise error.NoEnginesRegistered("Can't execute without any connected engines.")
949 # enforce types of f,args,kwargs
950
951 bufs = util.pack_apply_message(f,args,kwargs)
952
953 msg = self.session.send(socket, "apply_request", buffers=bufs, ident=ident,
954 subheader=subheader, track=track)
955
956 msg_id = msg['msg_id']
957 self.outstanding.add(msg_id)
958 if ident:
959 # possibly routed to a specific engine
960 if isinstance(ident, list):
961 ident = ident[-1]
962 if ident in self._engines.values():
963 # save for later, in case of engine death
964 self._outstanding_dict[ident].add(msg_id)
965 self.history.append(msg_id)
966 self.metadata[msg_id]['submitted'] = datetime.now()
967
968 return msg
969
970 #--------------------------------------------------------------------------
971 # construct a View object
972 #--------------------------------------------------------------------------
973
974 def load_balanced_view(self, targets=None):
975 """construct a DirectView object.
976
977 If no arguments are specified, create a LoadBalancedView
978 using all engines.
979
980 Parameters
981 ----------
982
983 targets: list,slice,int,etc. [default: use all engines]
984 The subset of engines across which to load-balance
985 """
986 if targets is not None:
987 targets = self._build_targets(targets)[1]
988 return LoadBalancedView(client=self, socket=self._task_socket, targets=targets)
989
990 def direct_view(self, targets='all'):
991 """construct a DirectView object.
992
993 If no targets are specified, create a DirectView
994 using all engines.
995
996 Parameters
997 ----------
998
999 targets: list,slice,int,etc. [default: use all engines]
1000 The engines to use for the View
1001 """
1002 single = isinstance(targets, int)
1003 targets = self._build_targets(targets)[1]
1004 if single:
1005 targets = targets[0]
1006 return DirectView(client=self, socket=self._mux_socket, targets=targets)
1007
1008 #--------------------------------------------------------------------------
1009 # Data movement (TO BE REMOVED)
1010 #--------------------------------------------------------------------------
1011
1012 @default_block
1013 def _push(self, ns, targets='all', block=None, track=False):
1014 """Push the contents of `ns` into the namespace on `target`"""
1015 if not isinstance(ns, dict):
1016 raise TypeError("Must be a dict, not %s"%type(ns))
1017 result = self.apply(util._push, kwargs=ns, targets=targets, block=block, bound=True, balanced=False, track=track)
1018 if not block:
1019 return result
1020
1021 @default_block
1022 def _pull(self, keys, targets='all', block=None):
1023 """Pull objects from `target`'s namespace by `keys`"""
1024 if isinstance(keys, basestring):
1025 pass
1026 elif isinstance(keys, (list,tuple,set)):
1027 for key in keys:
1028 if not isinstance(key, basestring):
1029 raise TypeError("keys must be str, not type %r"%type(key))
1030 else:
1031 raise TypeError("keys must be strs, not %r"%keys)
1032 result = self.apply(util._pull, (keys,), targets=targets, block=block, bound=True, balanced=False)
1033 return result
1034
1035 #--------------------------------------------------------------------------
1036 # Query methods
1037 #--------------------------------------------------------------------------
1038
1039 @spin_first
1040 @default_block
1041 def get_result(self, indices_or_msg_ids=None, block=None):
1042 """Retrieve a result by msg_id or history index, wrapped in an AsyncResult object.
1043
1044 If the client already has the results, no request to the Hub will be made.
1045
1046 This is a convenient way to construct AsyncResult objects, which are wrappers
1047 that include metadata about execution, and allow for awaiting results that
1048 were not submitted by this Client.
1049
1050 It can also be a convenient way to retrieve the metadata associated with
1051 blocking execution, since it always retrieves
1052
1053 Examples
1054 --------
1055 ::
1056
1057 In [10]: r = client.apply()
1058
1059 Parameters
1060 ----------
1061
1062 indices_or_msg_ids : integer history index, str msg_id, or list of either
1063 The indices or msg_ids of indices to be retrieved
1064
1065 block : bool
1066 Whether to wait for the result to be done
1067
1068 Returns
1069 -------
1070
1071 AsyncResult
1072 A single AsyncResult object will always be returned.
1073
1074 AsyncHubResult
1075 A subclass of AsyncResult that retrieves results from the Hub
1076
1077 """
1078 if indices_or_msg_ids is None:
1079 indices_or_msg_ids = -1
1080
1081 if not isinstance(indices_or_msg_ids, (list,tuple)):
1082 indices_or_msg_ids = [indices_or_msg_ids]
1083
1084 theids = []
1085 for id in indices_or_msg_ids:
1086 if isinstance(id, int):
1087 id = self.history[id]
1088 if not isinstance(id, str):
1089 raise TypeError("indices must be str or int, not %r"%id)
1090 theids.append(id)
1091
1092 local_ids = filter(lambda msg_id: msg_id in self.history or msg_id in self.results, theids)
1093 remote_ids = filter(lambda msg_id: msg_id not in local_ids, theids)
1094
1095 if remote_ids:
1096 ar = AsyncHubResult(self, msg_ids=theids)
1097 else:
1098 ar = AsyncResult(self, msg_ids=theids)
1099
1100 if block:
1101 ar.wait()
1102
1103 return ar
1104
1105 @spin_first
1106 def result_status(self, msg_ids, status_only=True):
1107 """Check on the status of the result(s) of the apply request with `msg_ids`.
1108
1109 If status_only is False, then the actual results will be retrieved, else
1110 only the status of the results will be checked.
1111
1112 Parameters
1113 ----------
1114
1115 msg_ids : list of msg_ids
1116 if int:
1117 Passed as index to self.history for convenience.
1118 status_only : bool (default: True)
1119 if False:
1120 Retrieve the actual results of completed tasks.
1121
1122 Returns
1123 -------
1124
1125 results : dict
1126 There will always be the keys 'pending' and 'completed', which will
1127 be lists of msg_ids that are incomplete or complete. If `status_only`
1128 is False, then completed results will be keyed by their `msg_id`.
1129 """
1130 if not isinstance(msg_ids, (list,tuple)):
1131 msg_ids = [msg_ids]
1132
1133 theids = []
1134 for msg_id in msg_ids:
1135 if isinstance(msg_id, int):
1136 msg_id = self.history[msg_id]
1137 if not isinstance(msg_id, basestring):
1138 raise TypeError("msg_ids must be str, not %r"%msg_id)
1139 theids.append(msg_id)
1140
1141 completed = []
1142 local_results = {}
1143
1144 # comment this block out to temporarily disable local shortcut:
1145 for msg_id in theids:
1146 if msg_id in self.results:
1147 completed.append(msg_id)
1148 local_results[msg_id] = self.results[msg_id]
1149 theids.remove(msg_id)
1150
1151 if theids: # some not locally cached
1152 content = dict(msg_ids=theids, status_only=status_only)
1153 msg = self.session.send(self._query_socket, "result_request", content=content)
1154 zmq.select([self._query_socket], [], [])
1155 idents,msg = self.session.recv(self._query_socket, zmq.NOBLOCK)
1156 if self.debug:
1157 pprint(msg)
1158 content = msg['content']
1159 if content['status'] != 'ok':
1160 raise self._unwrap_exception(content)
1161 buffers = msg['buffers']
1162 else:
1163 content = dict(completed=[],pending=[])
1164
1165 content['completed'].extend(completed)
1166
1167 if status_only:
1168 return content
1169
1170 failures = []
1171 # load cached results into result:
1172 content.update(local_results)
1173 # update cache with results:
1174 for msg_id in sorted(theids):
1175 if msg_id in content['completed']:
1176 rec = content[msg_id]
1177 parent = rec['header']
1178 header = rec['result_header']
1179 rcontent = rec['result_content']
1180 iodict = rec['io']
1181 if isinstance(rcontent, str):
1182 rcontent = self.session.unpack(rcontent)
1183
1184 md = self.metadata[msg_id]
1185 md.update(self._extract_metadata(header, parent, rcontent))
1186 md.update(iodict)
1187
1188 if rcontent['status'] == 'ok':
1189 res,buffers = util.unserialize_object(buffers)
1190 else:
1191 print rcontent
1192 res = self._unwrap_exception(rcontent)
1193 failures.append(res)
1194
1195 self.results[msg_id] = res
1196 content[msg_id] = res
1197
1198 if len(theids) == 1 and failures:
1199 raise failures[0]
1200
1201 error.collect_exceptions(failures, "result_status")
1202 return content
1203
1204 @spin_first
1205 def queue_status(self, targets='all', verbose=False):
1206 """Fetch the status of engine queues.
1207
1208 Parameters
1209 ----------
1210
1211 targets : int/str/list of ints/strs
1212 the engines whose states are to be queried.
1213 default : all
1214 verbose : bool
1215 Whether to return lengths only, or lists of ids for each element
1216 """
1217 engine_ids = self._build_targets(targets)[1]
1218 content = dict(targets=engine_ids, verbose=verbose)
1219 self.session.send(self._query_socket, "queue_request", content=content)
1220 idents,msg = self.session.recv(self._query_socket, 0)
1221 if self.debug:
1222 pprint(msg)
1223 content = msg['content']
1224 status = content.pop('status')
1225 if status != 'ok':
1226 raise self._unwrap_exception(content)
1227 content = util.rekey(content)
1228 if isinstance(targets, int):
1229 return content[targets]
1230 else:
1231 return content
1232
1233 @spin_first
1234 def purge_results(self, jobs=[], targets=[]):
1235 """Tell the Hub to forget results.
1236
1237 Individual results can be purged by msg_id, or the entire
1238 history of specific targets can be purged.
1239
1240 Parameters
1241 ----------
1242
1243 jobs : str or list of str or AsyncResult objects
1244 the msg_ids whose results should be forgotten.
1245 targets : int/str/list of ints/strs
1246 The targets, by uuid or int_id, whose entire history is to be purged.
1247 Use `targets='all'` to scrub everything from the Hub's memory.
1248
1249 default : None
1250 """
1251 if not targets and not jobs:
1252 raise ValueError("Must specify at least one of `targets` and `jobs`")
1253 if targets:
1254 targets = self._build_targets(targets)[1]
1255
1256 # construct msg_ids from jobs
1257 msg_ids = []
1258 if isinstance(jobs, (basestring,AsyncResult)):
1259 jobs = [jobs]
1260 bad_ids = filter(lambda obj: not isinstance(obj, (basestring, AsyncResult)), jobs)
1261 if bad_ids:
1262 raise TypeError("Invalid msg_id type %r, expected str or AsyncResult"%bad_ids[0])
1263 for j in jobs:
1264 if isinstance(j, AsyncResult):
1265 msg_ids.extend(j.msg_ids)
1266 else:
1267 msg_ids.append(j)
1268
1269 content = dict(targets=targets, msg_ids=msg_ids)
1270 self.session.send(self._query_socket, "purge_request", content=content)
1271 idents, msg = self.session.recv(self._query_socket, 0)
1272 if self.debug:
1273 pprint(msg)
1274 content = msg['content']
1275 if content['status'] != 'ok':
1276 raise self._unwrap_exception(content)
1277
1278
1279 __all__ = [ 'Client' ]
@@ -0,0 +1,158 b''
1 # encoding: utf-8
2
3 """Classes used in scattering and gathering sequences.
4
5 Scattering consists of partitioning a sequence and sending the various
6 pieces to individual nodes in a cluster.
7 """
8
9 __docformat__ = "restructuredtext en"
10
11 #-------------------------------------------------------------------------------
12 # Copyright (C) 2008 The IPython Development Team
13 #
14 # Distributed under the terms of the BSD License. The full license is in
15 # the file COPYING, distributed as part of this software.
16 #-------------------------------------------------------------------------------
17
18 #-------------------------------------------------------------------------------
19 # Imports
20 #-------------------------------------------------------------------------------
21
22 import types
23
24 from IPython.utils.data import flatten as utils_flatten
25
26 #-------------------------------------------------------------------------------
27 # Figure out which array packages are present and their array types
28 #-------------------------------------------------------------------------------
29
30 arrayModules = []
31 try:
32 import Numeric
33 except ImportError:
34 pass
35 else:
36 arrayModules.append({'module':Numeric, 'type':Numeric.arraytype})
37 try:
38 import numpy
39 except ImportError:
40 pass
41 else:
42 arrayModules.append({'module':numpy, 'type':numpy.ndarray})
43 try:
44 import numarray
45 except ImportError:
46 pass
47 else:
48 arrayModules.append({'module':numarray,
49 'type':numarray.numarraycore.NumArray})
50
51 class Map:
52 """A class for partitioning a sequence using a map."""
53
54 def getPartition(self, seq, p, q):
55 """Returns the pth partition of q partitions of seq."""
56
57 # Test for error conditions here
58 if p<0 or p>=q:
59 print "No partition exists."
60 return
61
62 remainder = len(seq)%q
63 basesize = len(seq)/q
64 hi = []
65 lo = []
66 for n in range(q):
67 if n < remainder:
68 lo.append(n * (basesize + 1))
69 hi.append(lo[-1] + basesize + 1)
70 else:
71 lo.append(n*basesize + remainder)
72 hi.append(lo[-1] + basesize)
73
74
75 result = seq[lo[p]:hi[p]]
76 return result
77
78 def joinPartitions(self, listOfPartitions):
79 return self.concatenate(listOfPartitions)
80
81 def concatenate(self, listOfPartitions):
82 testObject = listOfPartitions[0]
83 # First see if we have a known array type
84 for m in arrayModules:
85 #print m
86 if isinstance(testObject, m['type']):
87 return m['module'].concatenate(listOfPartitions)
88 # Next try for Python sequence types
89 if isinstance(testObject, (types.ListType, types.TupleType)):
90 return utils_flatten(listOfPartitions)
91 # If we have scalars, just return listOfPartitions
92 return listOfPartitions
93
94 class RoundRobinMap(Map):
95 """Partitions a sequence in a roun robin fashion.
96
97 This currently does not work!
98 """
99
100 def getPartition(self, seq, p, q):
101 # if not isinstance(seq,(list,tuple)):
102 # raise NotImplementedError("cannot RR partition type %s"%type(seq))
103 return seq[p:len(seq):q]
104 #result = []
105 #for i in range(p,len(seq),q):
106 # result.append(seq[i])
107 #return result
108
109 def joinPartitions(self, listOfPartitions):
110 testObject = listOfPartitions[0]
111 # First see if we have a known array type
112 for m in arrayModules:
113 #print m
114 if isinstance(testObject, m['type']):
115 return self.flatten_array(m['type'], listOfPartitions)
116 if isinstance(testObject, (types.ListType, types.TupleType)):
117 return self.flatten_list(listOfPartitions)
118 return listOfPartitions
119
120 def flatten_array(self, klass, listOfPartitions):
121 test = listOfPartitions[0]
122 shape = list(test.shape)
123 shape[0] = sum([ p.shape[0] for p in listOfPartitions])
124 A = klass(shape)
125 N = shape[0]
126 q = len(listOfPartitions)
127 for p,part in enumerate(listOfPartitions):
128 A[p:N:q] = part
129 return A
130
131 def flatten_list(self, listOfPartitions):
132 flat = []
133 for i in range(len(listOfPartitions[0])):
134 flat.extend([ part[i] for part in listOfPartitions if len(part) > i ])
135 return flat
136 #lengths = [len(x) for x in listOfPartitions]
137 #maxPartitionLength = len(listOfPartitions[0])
138 #numberOfPartitions = len(listOfPartitions)
139 #concat = self.concatenate(listOfPartitions)
140 #totalLength = len(concat)
141 #result = []
142 #for i in range(maxPartitionLength):
143 # result.append(concat[i:totalLength:maxPartitionLength])
144 # return self.concatenate(listOfPartitions)
145
146 def mappable(obj):
147 """return whether an object is mappable or not."""
148 if isinstance(obj, (tuple,list)):
149 return True
150 for m in arrayModules:
151 if isinstance(obj,m['type']):
152 return True
153 return False
154
155 dists = {'b':Map,'r':RoundRobinMap}
156
157
158
@@ -0,0 +1,200 b''
1 """Remote Functions and decorators for Views."""
2 #-----------------------------------------------------------------------------
3 # Copyright (C) 2010 The IPython Development Team
4 #
5 # Distributed under the terms of the BSD License. The full license is in
6 # the file COPYING, distributed as part of this software.
7 #-----------------------------------------------------------------------------
8
9 #-----------------------------------------------------------------------------
10 # Imports
11 #-----------------------------------------------------------------------------
12
13 import warnings
14
15 from IPython.testing import decorators as testdec
16
17 from . import map as Map
18 from .asyncresult import AsyncMapResult
19
20 #-----------------------------------------------------------------------------
21 # Decorators
22 #-----------------------------------------------------------------------------
23
24 @testdec.skip_doctest
25 def remote(view, block=None, **flags):
26 """Turn a function into a remote function.
27
28 This method can be used for map:
29
30 In [1]: @remote(view,block=True)
31 ...: def func(a):
32 ...: pass
33 """
34
35 def remote_function(f):
36 return RemoteFunction(view, f, block=block, **flags)
37 return remote_function
38
39 @testdec.skip_doctest
40 def parallel(view, dist='b', block=None, **flags):
41 """Turn a function into a parallel remote function.
42
43 This method can be used for map:
44
45 In [1]: @parallel(view, block=True)
46 ...: def func(a):
47 ...: pass
48 """
49
50 def parallel_function(f):
51 return ParallelFunction(view, f, dist=dist, block=block, **flags)
52 return parallel_function
53
54 #--------------------------------------------------------------------------
55 # Classes
56 #--------------------------------------------------------------------------
57
58 class RemoteFunction(object):
59 """Turn an existing function into a remote function.
60
61 Parameters
62 ----------
63
64 view : View instance
65 The view to be used for execution
66 f : callable
67 The function to be wrapped into a remote function
68 block : bool [default: None]
69 Whether to wait for results or not. The default behavior is
70 to use the current `block` attribute of `view`
71
72 **flags : remaining kwargs are passed to View.temp_flags
73 """
74
75 view = None # the remote connection
76 func = None # the wrapped function
77 block = None # whether to block
78 flags = None # dict of extra kwargs for temp_flags
79
80 def __init__(self, view, f, block=None, **flags):
81 self.view = view
82 self.func = f
83 self.block=block
84 self.flags=flags
85
86 def __call__(self, *args, **kwargs):
87 block = self.view.block if self.block is None else self.block
88 with self.view.temp_flags(block=block, **self.flags):
89 return self.view.apply(self.func, *args, **kwargs)
90
91
92 class ParallelFunction(RemoteFunction):
93 """Class for mapping a function to sequences.
94
95 This will distribute the sequences according the a mapper, and call
96 the function on each sub-sequence. If called via map, then the function
97 will be called once on each element, rather that each sub-sequence.
98
99 Parameters
100 ----------
101
102 view : View instance
103 The view to be used for execution
104 f : callable
105 The function to be wrapped into a remote function
106 dist : str [default: 'b']
107 The key for which mapObject to use to distribute sequences
108 options are:
109 * 'b' : use contiguous chunks in order
110 * 'r' : use round-robin striping
111 block : bool [default: None]
112 Whether to wait for results or not. The default behavior is
113 to use the current `block` attribute of `view`
114 chunksize : int or None
115 The size of chunk to use when breaking up sequences in a load-balanced manner
116 **flags : remaining kwargs are passed to View.temp_flags
117 """
118
119 chunksize=None
120 mapObject=None
121
122 def __init__(self, view, f, dist='b', block=None, chunksize=None, **flags):
123 super(ParallelFunction, self).__init__(view, f, block=block, **flags)
124 self.chunksize = chunksize
125
126 mapClass = Map.dists[dist]
127 self.mapObject = mapClass()
128
129 def __call__(self, *sequences):
130 # check that the length of sequences match
131 len_0 = len(sequences[0])
132 for s in sequences:
133 if len(s)!=len_0:
134 msg = 'all sequences must have equal length, but %i!=%i'%(len_0,len(s))
135 raise ValueError(msg)
136 balanced = 'Balanced' in self.view.__class__.__name__
137 if balanced:
138 if self.chunksize:
139 nparts = len_0/self.chunksize + int(len_0%self.chunksize > 0)
140 else:
141 nparts = len_0
142 targets = [None]*nparts
143 else:
144 if self.chunksize:
145 warnings.warn("`chunksize` is ignored unless load balancing", UserWarning)
146 # multiplexed:
147 targets = self.view.targets
148 nparts = len(targets)
149
150 msg_ids = []
151 # my_f = lambda *a: map(self.func, *a)
152 client = self.view.client
153 for index, t in enumerate(targets):
154 args = []
155 for seq in sequences:
156 part = self.mapObject.getPartition(seq, index, nparts)
157 if len(part) == 0:
158 continue
159 else:
160 args.append(part)
161 if not args:
162 continue
163
164 # print (args)
165 if hasattr(self, '_map'):
166 f = map
167 args = [self.func]+args
168 else:
169 f=self.func
170
171 view = self.view if balanced else client[t]
172 with view.temp_flags(block=False, **self.flags):
173 ar = view.apply(f, *args)
174
175 msg_ids.append(ar.msg_ids[0])
176
177 r = AsyncMapResult(self.view.client, msg_ids, self.mapObject, fname=self.func.__name__)
178
179 if self.block:
180 try:
181 return r.get()
182 except KeyboardInterrupt:
183 return r
184 else:
185 return r
186
187 def map(self, *sequences):
188 """call a function on each element of a sequence remotely.
189 This should behave very much like the builtin map, but return an AsyncMapResult
190 if self.block is False.
191 """
192 # set _map as a flag for use inside self.__call__
193 self._map = True
194 try:
195 ret = self.__call__(*sequences)
196 finally:
197 del self._map
198 return ret
199
200 __all__ = ['remote', 'parallel', 'RemoteFunction', 'ParallelFunction'] No newline at end of file
This diff has been collapsed as it changes many lines, (1033 lines changed) Show them Hide them
@@ -0,0 +1,1033 b''
1 """Views of remote engines."""
2 #-----------------------------------------------------------------------------
3 # Copyright (C) 2010 The IPython Development Team
4 #
5 # Distributed under the terms of the BSD License. The full license is in
6 # the file COPYING, distributed as part of this software.
7 #-----------------------------------------------------------------------------
8
9 #-----------------------------------------------------------------------------
10 # Imports
11 #-----------------------------------------------------------------------------
12
13 import imp
14 import sys
15 import warnings
16 from contextlib import contextmanager
17 from types import ModuleType
18
19 import zmq
20
21 from IPython.testing import decorators as testdec
22 from IPython.utils.traitlets import HasTraits, Any, Bool, List, Dict, Set, Int, Instance, CFloat
23
24 from IPython.external.decorator import decorator
25
26 from IPython.parallel import util
27 from IPython.parallel.controller.dependency import Dependency, dependent
28
29 from . import map as Map
30 from .asyncresult import AsyncResult, AsyncMapResult
31 from .remotefunction import ParallelFunction, parallel, remote
32
33 #-----------------------------------------------------------------------------
34 # Decorators
35 #-----------------------------------------------------------------------------
36
37 @decorator
38 def save_ids(f, self, *args, **kwargs):
39 """Keep our history and outstanding attributes up to date after a method call."""
40 n_previous = len(self.client.history)
41 try:
42 ret = f(self, *args, **kwargs)
43 finally:
44 nmsgs = len(self.client.history) - n_previous
45 msg_ids = self.client.history[-nmsgs:]
46 self.history.extend(msg_ids)
47 map(self.outstanding.add, msg_ids)
48 return ret
49
50 @decorator
51 def sync_results(f, self, *args, **kwargs):
52 """sync relevant results from self.client to our results attribute."""
53 ret = f(self, *args, **kwargs)
54 delta = self.outstanding.difference(self.client.outstanding)
55 completed = self.outstanding.intersection(delta)
56 self.outstanding = self.outstanding.difference(completed)
57 for msg_id in completed:
58 self.results[msg_id] = self.client.results[msg_id]
59 return ret
60
61 @decorator
62 def spin_after(f, self, *args, **kwargs):
63 """call spin after the method."""
64 ret = f(self, *args, **kwargs)
65 self.spin()
66 return ret
67
68 #-----------------------------------------------------------------------------
69 # Classes
70 #-----------------------------------------------------------------------------
71
72 @testdec.skip_doctest
73 class View(HasTraits):
74 """Base View class for more convenint apply(f,*args,**kwargs) syntax via attributes.
75
76 Don't use this class, use subclasses.
77
78 Methods
79 -------
80
81 spin
82 flushes incoming results and registration state changes
83 control methods spin, and requesting `ids` also ensures up to date
84
85 wait
86 wait on one or more msg_ids
87
88 execution methods
89 apply
90 legacy: execute, run
91
92 data movement
93 push, pull, scatter, gather
94
95 query methods
96 get_result, queue_status, purge_results, result_status
97
98 control methods
99 abort, shutdown
100
101 """
102 # flags
103 block=Bool(False)
104 track=Bool(True)
105 targets = Any()
106
107 history=List()
108 outstanding = Set()
109 results = Dict()
110 client = Instance('IPython.parallel.Client')
111
112 _socket = Instance('zmq.Socket')
113 _flag_names = List(['targets', 'block', 'track'])
114 _targets = Any()
115 _idents = Any()
116
117 def __init__(self, client=None, socket=None, **flags):
118 super(View, self).__init__(client=client, _socket=socket)
119 self.block = client.block
120
121 self.set_flags(**flags)
122
123 assert not self.__class__ is View, "Don't use base View objects, use subclasses"
124
125
126 def __repr__(self):
127 strtargets = str(self.targets)
128 if len(strtargets) > 16:
129 strtargets = strtargets[:12]+'...]'
130 return "<%s %s>"%(self.__class__.__name__, strtargets)
131
132 def set_flags(self, **kwargs):
133 """set my attribute flags by keyword.
134
135 Views determine behavior with a few attributes (`block`, `track`, etc.).
136 These attributes can be set all at once by name with this method.
137
138 Parameters
139 ----------
140
141 block : bool
142 whether to wait for results
143 track : bool
144 whether to create a MessageTracker to allow the user to
145 safely edit after arrays and buffers during non-copying
146 sends.
147 """
148 for name, value in kwargs.iteritems():
149 if name not in self._flag_names:
150 raise KeyError("Invalid name: %r"%name)
151 else:
152 setattr(self, name, value)
153
154 @contextmanager
155 def temp_flags(self, **kwargs):
156 """temporarily set flags, for use in `with` statements.
157
158 See set_flags for permanent setting of flags
159
160 Examples
161 --------
162
163 >>> view.track=False
164 ...
165 >>> with view.temp_flags(track=True):
166 ... ar = view.apply(dostuff, my_big_array)
167 ... ar.tracker.wait() # wait for send to finish
168 >>> view.track
169 False
170
171 """
172 # preflight: save flags, and set temporaries
173 saved_flags = {}
174 for f in self._flag_names:
175 saved_flags[f] = getattr(self, f)
176 self.set_flags(**kwargs)
177 # yield to the with-statement block
178 try:
179 yield
180 finally:
181 # postflight: restore saved flags
182 self.set_flags(**saved_flags)
183
184
185 #----------------------------------------------------------------
186 # apply
187 #----------------------------------------------------------------
188
189 @sync_results
190 @save_ids
191 def _really_apply(self, f, args, kwargs, block=None, **options):
192 """wrapper for client.send_apply_message"""
193 raise NotImplementedError("Implement in subclasses")
194
195 def apply(self, f, *args, **kwargs):
196 """calls f(*args, **kwargs) on remote engines, returning the result.
197
198 This method sets all apply flags via this View's attributes.
199
200 if self.block is False:
201 returns AsyncResult
202 else:
203 returns actual result of f(*args, **kwargs)
204 """
205 return self._really_apply(f, args, kwargs)
206
207 def apply_async(self, f, *args, **kwargs):
208 """calls f(*args, **kwargs) on remote engines in a nonblocking manner.
209
210 returns AsyncResult
211 """
212 return self._really_apply(f, args, kwargs, block=False)
213
214 @spin_after
215 def apply_sync(self, f, *args, **kwargs):
216 """calls f(*args, **kwargs) on remote engines in a blocking manner,
217 returning the result.
218
219 returns: actual result of f(*args, **kwargs)
220 """
221 return self._really_apply(f, args, kwargs, block=True)
222
223 #----------------------------------------------------------------
224 # wrappers for client and control methods
225 #----------------------------------------------------------------
226 @sync_results
227 def spin(self):
228 """spin the client, and sync"""
229 self.client.spin()
230
231 @sync_results
232 def wait(self, jobs=None, timeout=-1):
233 """waits on one or more `jobs`, for up to `timeout` seconds.
234
235 Parameters
236 ----------
237
238 jobs : int, str, or list of ints and/or strs, or one or more AsyncResult objects
239 ints are indices to self.history
240 strs are msg_ids
241 default: wait on all outstanding messages
242 timeout : float
243 a time in seconds, after which to give up.
244 default is -1, which means no timeout
245
246 Returns
247 -------
248
249 True : when all msg_ids are done
250 False : timeout reached, some msg_ids still outstanding
251 """
252 if jobs is None:
253 jobs = self.history
254 return self.client.wait(jobs, timeout)
255
256 def abort(self, jobs=None, targets=None, block=None):
257 """Abort jobs on my engines.
258
259 Parameters
260 ----------
261
262 jobs : None, str, list of strs, optional
263 if None: abort all jobs.
264 else: abort specific msg_id(s).
265 """
266 block = block if block is not None else self.block
267 targets = targets if targets is not None else self.targets
268 return self.client.abort(jobs=jobs, targets=targets, block=block)
269
270 def queue_status(self, targets=None, verbose=False):
271 """Fetch the Queue status of my engines"""
272 targets = targets if targets is not None else self.targets
273 return self.client.queue_status(targets=targets, verbose=verbose)
274
275 def purge_results(self, jobs=[], targets=[]):
276 """Instruct the controller to forget specific results."""
277 if targets is None or targets == 'all':
278 targets = self.targets
279 return self.client.purge_results(jobs=jobs, targets=targets)
280
281 def shutdown(self, targets=None, restart=False, hub=False, block=None):
282 """Terminates one or more engine processes, optionally including the hub.
283 """
284 block = self.block if block is None else block
285 if targets is None or targets == 'all':
286 targets = self.targets
287 return self.client.shutdown(targets=targets, restart=restart, hub=hub, block=block)
288
289 @spin_after
290 def get_result(self, indices_or_msg_ids=None):
291 """return one or more results, specified by history index or msg_id.
292
293 See client.get_result for details.
294
295 """
296
297 if indices_or_msg_ids is None:
298 indices_or_msg_ids = -1
299 if isinstance(indices_or_msg_ids, int):
300 indices_or_msg_ids = self.history[indices_or_msg_ids]
301 elif isinstance(indices_or_msg_ids, (list,tuple,set)):
302 indices_or_msg_ids = list(indices_or_msg_ids)
303 for i,index in enumerate(indices_or_msg_ids):
304 if isinstance(index, int):
305 indices_or_msg_ids[i] = self.history[index]
306 return self.client.get_result(indices_or_msg_ids)
307
308 #-------------------------------------------------------------------
309 # Map
310 #-------------------------------------------------------------------
311
312 def map(self, f, *sequences, **kwargs):
313 """override in subclasses"""
314 raise NotImplementedError
315
316 def map_async(self, f, *sequences, **kwargs):
317 """Parallel version of builtin `map`, using this view's engines.
318
319 This is equivalent to map(...block=False)
320
321 See `self.map` for details.
322 """
323 if 'block' in kwargs:
324 raise TypeError("map_async doesn't take a `block` keyword argument.")
325 kwargs['block'] = False
326 return self.map(f,*sequences,**kwargs)
327
328 def map_sync(self, f, *sequences, **kwargs):
329 """Parallel version of builtin `map`, using this view's engines.
330
331 This is equivalent to map(...block=True)
332
333 See `self.map` for details.
334 """
335 if 'block' in kwargs:
336 raise TypeError("map_sync doesn't take a `block` keyword argument.")
337 kwargs['block'] = True
338 return self.map(f,*sequences,**kwargs)
339
340 def imap(self, f, *sequences, **kwargs):
341 """Parallel version of `itertools.imap`.
342
343 See `self.map` for details.
344
345 """
346
347 return iter(self.map_async(f,*sequences, **kwargs))
348
349 #-------------------------------------------------------------------
350 # Decorators
351 #-------------------------------------------------------------------
352
353 def remote(self, block=True, **flags):
354 """Decorator for making a RemoteFunction"""
355 block = self.block if block is None else block
356 return remote(self, block=block, **flags)
357
358 def parallel(self, dist='b', block=None, **flags):
359 """Decorator for making a ParallelFunction"""
360 block = self.block if block is None else block
361 return parallel(self, dist=dist, block=block, **flags)
362
363 @testdec.skip_doctest
364 class DirectView(View):
365 """Direct Multiplexer View of one or more engines.
366
367 These are created via indexed access to a client:
368
369 >>> dv_1 = client[1]
370 >>> dv_all = client[:]
371 >>> dv_even = client[::2]
372 >>> dv_some = client[1:3]
373
374 This object provides dictionary access to engine namespaces:
375
376 # push a=5:
377 >>> dv['a'] = 5
378 # pull 'foo':
379 >>> db['foo']
380
381 """
382
383 def __init__(self, client=None, socket=None, targets=None):
384 super(DirectView, self).__init__(client=client, socket=socket, targets=targets)
385
386 @property
387 def importer(self):
388 """sync_imports(local=True) as a property.
389
390 See sync_imports for details.
391
392 """
393 return self.sync_imports(True)
394
395 @contextmanager
396 def sync_imports(self, local=True):
397 """Context Manager for performing simultaneous local and remote imports.
398
399 'import x as y' will *not* work. The 'as y' part will simply be ignored.
400
401 >>> with view.sync_imports():
402 ... from numpy import recarray
403 importing recarray from numpy on engine(s)
404
405 """
406 import __builtin__
407 local_import = __builtin__.__import__
408 modules = set()
409 results = []
410 @util.interactive
411 def remote_import(name, fromlist, level):
412 """the function to be passed to apply, that actually performs the import
413 on the engine, and loads up the user namespace.
414 """
415 import sys
416 user_ns = globals()
417 mod = __import__(name, fromlist=fromlist, level=level)
418 if fromlist:
419 for key in fromlist:
420 user_ns[key] = getattr(mod, key)
421 else:
422 user_ns[name] = sys.modules[name]
423
424 def view_import(name, globals={}, locals={}, fromlist=[], level=-1):
425 """the drop-in replacement for __import__, that optionally imports
426 locally as well.
427 """
428 # don't override nested imports
429 save_import = __builtin__.__import__
430 __builtin__.__import__ = local_import
431
432 if imp.lock_held():
433 # this is a side-effect import, don't do it remotely, or even
434 # ignore the local effects
435 return local_import(name, globals, locals, fromlist, level)
436
437 imp.acquire_lock()
438 if local:
439 mod = local_import(name, globals, locals, fromlist, level)
440 else:
441 raise NotImplementedError("remote-only imports not yet implemented")
442 imp.release_lock()
443
444 key = name+':'+','.join(fromlist or [])
445 if level == -1 and key not in modules:
446 modules.add(key)
447 if fromlist:
448 print "importing %s from %s on engine(s)"%(','.join(fromlist), name)
449 else:
450 print "importing %s on engine(s)"%name
451 results.append(self.apply_async(remote_import, name, fromlist, level))
452 # restore override
453 __builtin__.__import__ = save_import
454
455 return mod
456
457 # override __import__
458 __builtin__.__import__ = view_import
459 try:
460 # enter the block
461 yield
462 except ImportError:
463 if not local:
464 # ignore import errors if not doing local imports
465 pass
466 finally:
467 # always restore __import__
468 __builtin__.__import__ = local_import
469
470 for r in results:
471 # raise possible remote ImportErrors here
472 r.get()
473
474
475 @sync_results
476 @save_ids
477 def _really_apply(self, f, args=None, kwargs=None, targets=None, block=None, track=None):
478 """calls f(*args, **kwargs) on remote engines, returning the result.
479
480 This method sets all of `apply`'s flags via this View's attributes.
481
482 Parameters
483 ----------
484
485 f : callable
486
487 args : list [default: empty]
488
489 kwargs : dict [default: empty]
490
491 targets : target list [default: self.targets]
492 where to run
493 block : bool [default: self.block]
494 whether to block
495 track : bool [default: self.track]
496 whether to ask zmq to track the message, for safe non-copying sends
497
498 Returns
499 -------
500
501 if self.block is False:
502 returns AsyncResult
503 else:
504 returns actual result of f(*args, **kwargs) on the engine(s)
505 This will be a list of self.targets is also a list (even length 1), or
506 the single result if self.targets is an integer engine id
507 """
508 args = [] if args is None else args
509 kwargs = {} if kwargs is None else kwargs
510 block = self.block if block is None else block
511 track = self.track if track is None else track
512 targets = self.targets if targets is None else targets
513
514 _idents = self.client._build_targets(targets)[0]
515 msg_ids = []
516 trackers = []
517 for ident in _idents:
518 msg = self.client.send_apply_message(self._socket, f, args, kwargs, track=track,
519 ident=ident)
520 if track:
521 trackers.append(msg['tracker'])
522 msg_ids.append(msg['msg_id'])
523 tracker = None if track is False else zmq.MessageTracker(*trackers)
524 ar = AsyncResult(self.client, msg_ids, fname=f.__name__, targets=targets, tracker=tracker)
525 if block:
526 try:
527 return ar.get()
528 except KeyboardInterrupt:
529 pass
530 return ar
531
532 @spin_after
533 def map(self, f, *sequences, **kwargs):
534 """view.map(f, *sequences, block=self.block) => list|AsyncMapResult
535
536 Parallel version of builtin `map`, using this View's `targets`.
537
538 There will be one task per target, so work will be chunked
539 if the sequences are longer than `targets`.
540
541 Results can be iterated as they are ready, but will become available in chunks.
542
543 Parameters
544 ----------
545
546 f : callable
547 function to be mapped
548 *sequences: one or more sequences of matching length
549 the sequences to be distributed and passed to `f`
550 block : bool
551 whether to wait for the result or not [default self.block]
552
553 Returns
554 -------
555
556 if block=False:
557 AsyncMapResult
558 An object like AsyncResult, but which reassembles the sequence of results
559 into a single list. AsyncMapResults can be iterated through before all
560 results are complete.
561 else:
562 list
563 the result of map(f,*sequences)
564 """
565
566 block = kwargs.pop('block', self.block)
567 for k in kwargs.keys():
568 if k not in ['block', 'track']:
569 raise TypeError("invalid keyword arg, %r"%k)
570
571 assert len(sequences) > 0, "must have some sequences to map onto!"
572 pf = ParallelFunction(self, f, block=block, **kwargs)
573 return pf.map(*sequences)
574
575 def execute(self, code, targets=None, block=None):
576 """Executes `code` on `targets` in blocking or nonblocking manner.
577
578 ``execute`` is always `bound` (affects engine namespace)
579
580 Parameters
581 ----------
582
583 code : str
584 the code string to be executed
585 block : bool
586 whether or not to wait until done to return
587 default: self.block
588 """
589 return self._really_apply(util._execute, args=(code,), block=block, targets=targets)
590
591 def run(self, filename, targets=None, block=None):
592 """Execute contents of `filename` on my engine(s).
593
594 This simply reads the contents of the file and calls `execute`.
595
596 Parameters
597 ----------
598
599 filename : str
600 The path to the file
601 targets : int/str/list of ints/strs
602 the engines on which to execute
603 default : all
604 block : bool
605 whether or not to wait until done
606 default: self.block
607
608 """
609 with open(filename, 'r') as f:
610 # add newline in case of trailing indented whitespace
611 # which will cause SyntaxError
612 code = f.read()+'\n'
613 return self.execute(code, block=block, targets=targets)
614
615 def update(self, ns):
616 """update remote namespace with dict `ns`
617
618 See `push` for details.
619 """
620 return self.push(ns, block=self.block, track=self.track)
621
622 def push(self, ns, targets=None, block=None, track=None):
623 """update remote namespace with dict `ns`
624
625 Parameters
626 ----------
627
628 ns : dict
629 dict of keys with which to update engine namespace(s)
630 block : bool [default : self.block]
631 whether to wait to be notified of engine receipt
632
633 """
634
635 block = block if block is not None else self.block
636 track = track if track is not None else self.track
637 targets = targets if targets is not None else self.targets
638 # applier = self.apply_sync if block else self.apply_async
639 if not isinstance(ns, dict):
640 raise TypeError("Must be a dict, not %s"%type(ns))
641 return self._really_apply(util._push, (ns,), block=block, track=track, targets=targets)
642
643 def get(self, key_s):
644 """get object(s) by `key_s` from remote namespace
645
646 see `pull` for details.
647 """
648 # block = block if block is not None else self.block
649 return self.pull(key_s, block=True)
650
651 def pull(self, names, targets=None, block=True):
652 """get object(s) by `name` from remote namespace
653
654 will return one object if it is a key.
655 can also take a list of keys, in which case it will return a list of objects.
656 """
657 block = block if block is not None else self.block
658 targets = targets if targets is not None else self.targets
659 applier = self.apply_sync if block else self.apply_async
660 if isinstance(names, basestring):
661 pass
662 elif isinstance(names, (list,tuple,set)):
663 for key in names:
664 if not isinstance(key, basestring):
665 raise TypeError("keys must be str, not type %r"%type(key))
666 else:
667 raise TypeError("names must be strs, not %r"%names)
668 return self._really_apply(util._pull, (names,), block=block, targets=targets)
669
670 def scatter(self, key, seq, dist='b', flatten=False, targets=None, block=None, track=None):
671 """
672 Partition a Python sequence and send the partitions to a set of engines.
673 """
674 block = block if block is not None else self.block
675 track = track if track is not None else self.track
676 targets = targets if targets is not None else self.targets
677
678 mapObject = Map.dists[dist]()
679 nparts = len(targets)
680 msg_ids = []
681 trackers = []
682 for index, engineid in enumerate(targets):
683 partition = mapObject.getPartition(seq, index, nparts)
684 if flatten and len(partition) == 1:
685 ns = {key: partition[0]}
686 else:
687 ns = {key: partition}
688 r = self.push(ns, block=False, track=track, targets=engineid)
689 msg_ids.extend(r.msg_ids)
690 if track:
691 trackers.append(r._tracker)
692
693 if track:
694 tracker = zmq.MessageTracker(*trackers)
695 else:
696 tracker = None
697
698 r = AsyncResult(self.client, msg_ids, fname='scatter', targets=targets, tracker=tracker)
699 if block:
700 r.wait()
701 else:
702 return r
703
704 @sync_results
705 @save_ids
706 def gather(self, key, dist='b', targets=None, block=None):
707 """
708 Gather a partitioned sequence on a set of engines as a single local seq.
709 """
710 block = block if block is not None else self.block
711 targets = targets if targets is not None else self.targets
712 mapObject = Map.dists[dist]()
713 msg_ids = []
714
715 for index, engineid in enumerate(targets):
716 msg_ids.extend(self.pull(key, block=False, targets=engineid).msg_ids)
717
718 r = AsyncMapResult(self.client, msg_ids, mapObject, fname='gather')
719
720 if block:
721 try:
722 return r.get()
723 except KeyboardInterrupt:
724 pass
725 return r
726
727 def __getitem__(self, key):
728 return self.get(key)
729
730 def __setitem__(self,key, value):
731 self.update({key:value})
732
733 def clear(self, targets=None, block=False):
734 """Clear the remote namespaces on my engines."""
735 block = block if block is not None else self.block
736 targets = targets if targets is not None else self.targets
737 return self.client.clear(targets=targets, block=block)
738
739 def kill(self, targets=None, block=True):
740 """Kill my engines."""
741 block = block if block is not None else self.block
742 targets = targets if targets is not None else self.targets
743 return self.client.kill(targets=targets, block=block)
744
745 #----------------------------------------
746 # activate for %px,%autopx magics
747 #----------------------------------------
748 def activate(self):
749 """Make this `View` active for parallel magic commands.
750
751 IPython has a magic command syntax to work with `MultiEngineClient` objects.
752 In a given IPython session there is a single active one. While
753 there can be many `Views` created and used by the user,
754 there is only one active one. The active `View` is used whenever
755 the magic commands %px and %autopx are used.
756
757 The activate() method is called on a given `View` to make it
758 active. Once this has been done, the magic commands can be used.
759 """
760
761 try:
762 # This is injected into __builtins__.
763 ip = get_ipython()
764 except NameError:
765 print "The IPython parallel magics (%result, %px, %autopx) only work within IPython."
766 else:
767 pmagic = ip.plugin_manager.get_plugin('parallelmagic')
768 if pmagic is not None:
769 pmagic.active_multiengine_client = self
770 else:
771 print "You must first load the parallelmagic extension " \
772 "by doing '%load_ext parallelmagic'"
773
774
775 @testdec.skip_doctest
776 class LoadBalancedView(View):
777 """An load-balancing View that only executes via the Task scheduler.
778
779 Load-balanced views can be created with the client's `view` method:
780
781 >>> v = client.load_balanced_view()
782
783 or targets can be specified, to restrict the potential destinations:
784
785 >>> v = client.client.load_balanced_view(([1,3])
786
787 which would restrict loadbalancing to between engines 1 and 3.
788
789 """
790
791 follow=Any()
792 after=Any()
793 timeout=CFloat()
794
795 _task_scheme = Any()
796 _flag_names = List(['targets', 'block', 'track', 'follow', 'after', 'timeout'])
797
798 def __init__(self, client=None, socket=None, **flags):
799 super(LoadBalancedView, self).__init__(client=client, socket=socket, **flags)
800 self._task_scheme=client._task_scheme
801
802 def _validate_dependency(self, dep):
803 """validate a dependency.
804
805 For use in `set_flags`.
806 """
807 if dep is None or isinstance(dep, (str, AsyncResult, Dependency)):
808 return True
809 elif isinstance(dep, (list,set, tuple)):
810 for d in dep:
811 if not isinstance(d, (str, AsyncResult)):
812 return False
813 elif isinstance(dep, dict):
814 if set(dep.keys()) != set(Dependency().as_dict().keys()):
815 return False
816 if not isinstance(dep['msg_ids'], list):
817 return False
818 for d in dep['msg_ids']:
819 if not isinstance(d, str):
820 return False
821 else:
822 return False
823
824 return True
825
826 def _render_dependency(self, dep):
827 """helper for building jsonable dependencies from various input forms."""
828 if isinstance(dep, Dependency):
829 return dep.as_dict()
830 elif isinstance(dep, AsyncResult):
831 return dep.msg_ids
832 elif dep is None:
833 return []
834 else:
835 # pass to Dependency constructor
836 return list(Dependency(dep))
837
838 def set_flags(self, **kwargs):
839 """set my attribute flags by keyword.
840
841 A View is a wrapper for the Client's apply method, but with attributes
842 that specify keyword arguments, those attributes can be set by keyword
843 argument with this method.
844
845 Parameters
846 ----------
847
848 block : bool
849 whether to wait for results
850 track : bool
851 whether to create a MessageTracker to allow the user to
852 safely edit after arrays and buffers during non-copying
853 sends.
854 #
855 after : Dependency or collection of msg_ids
856 Only for load-balanced execution (targets=None)
857 Specify a list of msg_ids as a time-based dependency.
858 This job will only be run *after* the dependencies
859 have been met.
860
861 follow : Dependency or collection of msg_ids
862 Only for load-balanced execution (targets=None)
863 Specify a list of msg_ids as a location-based dependency.
864 This job will only be run on an engine where this dependency
865 is met.
866
867 timeout : float/int or None
868 Only for load-balanced execution (targets=None)
869 Specify an amount of time (in seconds) for the scheduler to
870 wait for dependencies to be met before failing with a
871 DependencyTimeout.
872 """
873
874 super(LoadBalancedView, self).set_flags(**kwargs)
875 for name in ('follow', 'after'):
876 if name in kwargs:
877 value = kwargs[name]
878 if self._validate_dependency(value):
879 setattr(self, name, value)
880 else:
881 raise ValueError("Invalid dependency: %r"%value)
882 if 'timeout' in kwargs:
883 t = kwargs['timeout']
884 if not isinstance(t, (int, long, float, type(None))):
885 raise TypeError("Invalid type for timeout: %r"%type(t))
886 if t is not None:
887 if t < 0:
888 raise ValueError("Invalid timeout: %s"%t)
889 self.timeout = t
890
891 @sync_results
892 @save_ids
893 def _really_apply(self, f, args=None, kwargs=None, block=None, track=None,
894 after=None, follow=None, timeout=None,
895 targets=None):
896 """calls f(*args, **kwargs) on a remote engine, returning the result.
897
898 This method temporarily sets all of `apply`'s flags for a single call.
899
900 Parameters
901 ----------
902
903 f : callable
904
905 args : list [default: empty]
906
907 kwargs : dict [default: empty]
908
909 block : bool [default: self.block]
910 whether to block
911 track : bool [default: self.track]
912 whether to ask zmq to track the message, for safe non-copying sends
913
914 !!!!!! TODO: THE REST HERE !!!!
915
916 Returns
917 -------
918
919 if self.block is False:
920 returns AsyncResult
921 else:
922 returns actual result of f(*args, **kwargs) on the engine(s)
923 This will be a list of self.targets is also a list (even length 1), or
924 the single result if self.targets is an integer engine id
925 """
926
927 # validate whether we can run
928 if self._socket.closed:
929 msg = "Task farming is disabled"
930 if self._task_scheme == 'pure':
931 msg += " because the pure ZMQ scheduler cannot handle"
932 msg += " disappearing engines."
933 raise RuntimeError(msg)
934
935 if self._task_scheme == 'pure':
936 # pure zmq scheme doesn't support dependencies
937 msg = "Pure ZMQ scheduler doesn't support dependencies"
938 if (follow or after):
939 # hard fail on DAG dependencies
940 raise RuntimeError(msg)
941 if isinstance(f, dependent):
942 # soft warn on functional dependencies
943 warnings.warn(msg, RuntimeWarning)
944
945 # build args
946 args = [] if args is None else args
947 kwargs = {} if kwargs is None else kwargs
948 block = self.block if block is None else block
949 track = self.track if track is None else track
950 after = self.after if after is None else after
951 follow = self.follow if follow is None else follow
952 timeout = self.timeout if timeout is None else timeout
953 targets = self.targets if targets is None else targets
954
955 if targets is None:
956 idents = []
957 else:
958 idents = self.client._build_targets(targets)[0]
959
960 after = self._render_dependency(after)
961 follow = self._render_dependency(follow)
962 subheader = dict(after=after, follow=follow, timeout=timeout, targets=idents)
963
964 msg = self.client.send_apply_message(self._socket, f, args, kwargs, track=track,
965 subheader=subheader)
966 tracker = None if track is False else msg['tracker']
967
968 ar = AsyncResult(self.client, msg['msg_id'], fname=f.__name__, targets=None, tracker=tracker)
969
970 if block:
971 try:
972 return ar.get()
973 except KeyboardInterrupt:
974 pass
975 return ar
976
977 @spin_after
978 @save_ids
979 def map(self, f, *sequences, **kwargs):
980 """view.map(f, *sequences, block=self.block, chunksize=1) => list|AsyncMapResult
981
982 Parallel version of builtin `map`, load-balanced by this View.
983
984 `block`, and `chunksize` can be specified by keyword only.
985
986 Each `chunksize` elements will be a separate task, and will be
987 load-balanced. This lets individual elements be available for iteration
988 as soon as they arrive.
989
990 Parameters
991 ----------
992
993 f : callable
994 function to be mapped
995 *sequences: one or more sequences of matching length
996 the sequences to be distributed and passed to `f`
997 block : bool
998 whether to wait for the result or not [default self.block]
999 track : bool
1000 whether to create a MessageTracker to allow the user to
1001 safely edit after arrays and buffers during non-copying
1002 sends.
1003 chunksize : int
1004 how many elements should be in each task [default 1]
1005
1006 Returns
1007 -------
1008
1009 if block=False:
1010 AsyncMapResult
1011 An object like AsyncResult, but which reassembles the sequence of results
1012 into a single list. AsyncMapResults can be iterated through before all
1013 results are complete.
1014 else:
1015 the result of map(f,*sequences)
1016
1017 """
1018
1019 # default
1020 block = kwargs.get('block', self.block)
1021 chunksize = kwargs.get('chunksize', 1)
1022
1023 keyset = set(kwargs.keys())
1024 extra_keys = keyset.difference_update(set(['block', 'chunksize']))
1025 if extra_keys:
1026 raise TypeError("Invalid kwargs: %s"%list(extra_keys))
1027
1028 assert len(sequences) > 0, "must have some sequences to map onto!"
1029
1030 pf = ParallelFunction(self, f, block=block, chunksize=chunksize)
1031 return pf.map(*sequences)
1032
1033 __all__ = ['LoadBalancedView', 'DirectView'] No newline at end of file
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
@@ -0,0 +1,117 b''
1 #!/usr/bin/env python
2 """The IPython Controller with 0MQ
3 This is a collection of one Hub and several Schedulers.
4 """
5 #-----------------------------------------------------------------------------
6 # Copyright (C) 2010 The IPython Development Team
7 #
8 # Distributed under the terms of the BSD License. The full license is in
9 # the file COPYING, distributed as part of this software.
10 #-----------------------------------------------------------------------------
11
12 #-----------------------------------------------------------------------------
13 # Imports
14 #-----------------------------------------------------------------------------
15 from __future__ import print_function
16
17 from multiprocessing import Process
18
19 import zmq
20 from zmq.devices import ProcessMonitoredQueue
21 # internal:
22 from IPython.utils.importstring import import_item
23 from IPython.utils.traitlets import Int, CStr, Instance, List, Bool
24
25 from IPython.parallel.util import signal_children
26 from .hub import Hub, HubFactory
27 from .scheduler import launch_scheduler
28
29 #-----------------------------------------------------------------------------
30 # Configurable
31 #-----------------------------------------------------------------------------
32
33
34 class ControllerFactory(HubFactory):
35 """Configurable for setting up a Hub and Schedulers."""
36
37 usethreads = Bool(False, config=True)
38 # pure-zmq downstream HWM
39 hwm = Int(0, config=True)
40
41 # internal
42 children = List()
43 mq_class = CStr('zmq.devices.ProcessMonitoredQueue')
44
45 def _usethreads_changed(self, name, old, new):
46 self.mq_class = 'zmq.devices.%sMonitoredQueue'%('Thread' if new else 'Process')
47
48 def __init__(self, **kwargs):
49 super(ControllerFactory, self).__init__(**kwargs)
50 self.subconstructors.append(self.construct_schedulers)
51
52 def start(self):
53 super(ControllerFactory, self).start()
54 child_procs = []
55 for child in self.children:
56 child.start()
57 if isinstance(child, ProcessMonitoredQueue):
58 child_procs.append(child.launcher)
59 elif isinstance(child, Process):
60 child_procs.append(child)
61 if child_procs:
62 signal_children(child_procs)
63
64
65 def construct_schedulers(self):
66 children = self.children
67 mq = import_item(self.mq_class)
68
69 maybe_inproc = 'inproc://monitor' if self.usethreads else self.monitor_url
70 # IOPub relay (in a Process)
71 q = mq(zmq.PUB, zmq.SUB, zmq.PUB, 'N/A','iopub')
72 q.bind_in(self.client_info['iopub'])
73 q.bind_out(self.engine_info['iopub'])
74 q.setsockopt_out(zmq.SUBSCRIBE, '')
75 q.connect_mon(maybe_inproc)
76 q.daemon=True
77 children.append(q)
78
79 # Multiplexer Queue (in a Process)
80 q = mq(zmq.XREP, zmq.XREP, zmq.PUB, 'in', 'out')
81 q.bind_in(self.client_info['mux'])
82 q.setsockopt_in(zmq.IDENTITY, 'mux')
83 q.bind_out(self.engine_info['mux'])
84 q.connect_mon(maybe_inproc)
85 q.daemon=True
86 children.append(q)
87
88 # Control Queue (in a Process)
89 q = mq(zmq.XREP, zmq.XREP, zmq.PUB, 'incontrol', 'outcontrol')
90 q.bind_in(self.client_info['control'])
91 q.setsockopt_in(zmq.IDENTITY, 'control')
92 q.bind_out(self.engine_info['control'])
93 q.connect_mon(maybe_inproc)
94 q.daemon=True
95 children.append(q)
96 # Task Queue (in a Process)
97 if self.scheme == 'pure':
98 self.log.warn("task::using pure XREQ Task scheduler")
99 q = mq(zmq.XREP, zmq.XREQ, zmq.PUB, 'intask', 'outtask')
100 q.setsockopt_out(zmq.HWM, self.hwm)
101 q.bind_in(self.client_info['task'][1])
102 q.setsockopt_in(zmq.IDENTITY, 'task')
103 q.bind_out(self.engine_info['task'])
104 q.connect_mon(maybe_inproc)
105 q.daemon=True
106 children.append(q)
107 elif self.scheme == 'none':
108 self.log.warn("task::using no Task scheduler")
109
110 else:
111 self.log.info("task::using Python %s Task scheduler"%self.scheme)
112 sargs = (self.client_info['task'][1], self.engine_info['task'], self.monitor_url, self.client_info['notification'])
113 kwargs = dict(scheme=self.scheme,logname=self.log.name, loglevel=self.log.level, config=self.config)
114 q = Process(target=launch_scheduler, args=sargs, kwargs=kwargs)
115 q.daemon=True
116 children.append(q)
117
@@ -0,0 +1,196 b''
1 """Dependency utilities"""
2 #-----------------------------------------------------------------------------
3 # Copyright (C) 2010-2011 The IPython Development Team
4 #
5 # Distributed under the terms of the BSD License. The full license is in
6 # the file COPYING, distributed as part of this software.
7 #-----------------------------------------------------------------------------
8
9 from types import ModuleType
10
11 from IPython.parallel.client.asyncresult import AsyncResult
12 from IPython.parallel.error import UnmetDependency
13 from IPython.parallel.util import interactive
14
15 class depend(object):
16 """Dependency decorator, for use with tasks.
17
18 `@depend` lets you define a function for engine dependencies
19 just like you use `apply` for tasks.
20
21
22 Examples
23 --------
24 ::
25
26 @depend(df, a,b, c=5)
27 def f(m,n,p)
28
29 view.apply(f, 1,2,3)
30
31 will call df(a,b,c=5) on the engine, and if it returns False or
32 raises an UnmetDependency error, then the task will not be run
33 and another engine will be tried.
34 """
35 def __init__(self, f, *args, **kwargs):
36 self.f = f
37 self.args = args
38 self.kwargs = kwargs
39
40 def __call__(self, f):
41 return dependent(f, self.f, *self.args, **self.kwargs)
42
43 class dependent(object):
44 """A function that depends on another function.
45 This is an object to prevent the closure used
46 in traditional decorators, which are not picklable.
47 """
48
49 def __init__(self, f, df, *dargs, **dkwargs):
50 self.f = f
51 self.func_name = getattr(f, '__name__', 'f')
52 self.df = df
53 self.dargs = dargs
54 self.dkwargs = dkwargs
55
56 def __call__(self, *args, **kwargs):
57 # if hasattr(self.f, 'func_globals') and hasattr(self.df, 'func_globals'):
58 # self.df.func_globals = self.f.func_globals
59 if self.df(*self.dargs, **self.dkwargs) is False:
60 raise UnmetDependency()
61 return self.f(*args, **kwargs)
62
63 @property
64 def __name__(self):
65 return self.func_name
66
67 @interactive
68 def _require(*names):
69 """Helper for @require decorator."""
70 from IPython.parallel.error import UnmetDependency
71 user_ns = globals()
72 for name in names:
73 if name in user_ns:
74 continue
75 try:
76 exec 'import %s'%name in user_ns
77 except ImportError:
78 raise UnmetDependency(name)
79 return True
80
81 def require(*mods):
82 """Simple decorator for requiring names to be importable.
83
84 Examples
85 --------
86
87 In [1]: @require('numpy')
88 ...: def norm(a):
89 ...: import numpy
90 ...: return numpy.linalg.norm(a,2)
91 """
92 names = []
93 for mod in mods:
94 if isinstance(mod, ModuleType):
95 mod = mod.__name__
96
97 if isinstance(mod, basestring):
98 names.append(mod)
99 else:
100 raise TypeError("names must be modules or module names, not %s"%type(mod))
101
102 return depend(_require, *names)
103
104 class Dependency(set):
105 """An object for representing a set of msg_id dependencies.
106
107 Subclassed from set().
108
109 Parameters
110 ----------
111 dependencies: list/set of msg_ids or AsyncResult objects or output of Dependency.as_dict()
112 The msg_ids to depend on
113 all : bool [default True]
114 Whether the dependency should be considered met when *all* depending tasks have completed
115 or only when *any* have been completed.
116 success : bool [default True]
117 Whether to consider successes as fulfilling dependencies.
118 failure : bool [default False]
119 Whether to consider failures as fulfilling dependencies.
120
121 If `all=success=True` and `failure=False`, then the task will fail with an ImpossibleDependency
122 as soon as the first depended-upon task fails.
123 """
124
125 all=True
126 success=True
127 failure=True
128
129 def __init__(self, dependencies=[], all=True, success=True, failure=False):
130 if isinstance(dependencies, dict):
131 # load from dict
132 all = dependencies.get('all', True)
133 success = dependencies.get('success', success)
134 failure = dependencies.get('failure', failure)
135 dependencies = dependencies.get('dependencies', [])
136 ids = []
137
138 # extract ids from various sources:
139 if isinstance(dependencies, (basestring, AsyncResult)):
140 dependencies = [dependencies]
141 for d in dependencies:
142 if isinstance(d, basestring):
143 ids.append(d)
144 elif isinstance(d, AsyncResult):
145 ids.extend(d.msg_ids)
146 else:
147 raise TypeError("invalid dependency type: %r"%type(d))
148
149 set.__init__(self, ids)
150 self.all = all
151 if not (success or failure):
152 raise ValueError("Must depend on at least one of successes or failures!")
153 self.success=success
154 self.failure = failure
155
156 def check(self, completed, failed=None):
157 """check whether our dependencies have been met."""
158 if len(self) == 0:
159 return True
160 against = set()
161 if self.success:
162 against = completed
163 if failed is not None and self.failure:
164 against = against.union(failed)
165 if self.all:
166 return self.issubset(against)
167 else:
168 return not self.isdisjoint(against)
169
170 def unreachable(self, completed, failed=None):
171 """return whether this dependency has become impossible."""
172 if len(self) == 0:
173 return False
174 against = set()
175 if not self.success:
176 against = completed
177 if failed is not None and not self.failure:
178 against = against.union(failed)
179 if self.all:
180 return not self.isdisjoint(against)
181 else:
182 return self.issubset(against)
183
184
185 def as_dict(self):
186 """Represent this dependency as a dict. For json compatibility."""
187 return dict(
188 dependencies=list(self),
189 all=self.all,
190 success=self.success,
191 failure=self.failure
192 )
193
194
195 __all__ = ['depend', 'require', 'dependent', 'Dependency']
196
@@ -0,0 +1,155 b''
1 """A Task logger that presents our DB interface,
2 but exists entirely in memory and implemented with dicts.
3
4 TaskRecords are dicts of the form:
5 {
6 'msg_id' : str(uuid),
7 'client_uuid' : str(uuid),
8 'engine_uuid' : str(uuid) or None,
9 'header' : dict(header),
10 'content': dict(content),
11 'buffers': list(buffers),
12 'submitted': datetime,
13 'started': datetime or None,
14 'completed': datetime or None,
15 'resubmitted': datetime or None,
16 'result_header' : dict(header) or None,
17 'result_content' : dict(content) or None,
18 'result_buffers' : list(buffers) or None,
19 }
20 With this info, many of the special categories of tasks can be defined by query:
21
22 pending: completed is None
23 client's outstanding: client_uuid = uuid && completed is None
24 MIA: arrived is None (and completed is None)
25 etc.
26
27 EngineRecords are dicts of the form:
28 {
29 'eid' : int(id),
30 'uuid': str(uuid)
31 }
32 This may be extended, but is currently.
33
34 We support a subset of mongodb operators:
35 $lt,$gt,$lte,$gte,$ne,$in,$nin,$all,$mod,$exists
36 """
37 #-----------------------------------------------------------------------------
38 # Copyright (C) 2010 The IPython Development Team
39 #
40 # Distributed under the terms of the BSD License. The full license is in
41 # the file COPYING, distributed as part of this software.
42 #-----------------------------------------------------------------------------
43
44
45 from datetime import datetime
46
47 from IPython.config.configurable import Configurable
48
49 from IPython.utils.traitlets import Dict, CUnicode
50
51 filters = {
52 '$lt' : lambda a,b: a < b,
53 '$gt' : lambda a,b: b > a,
54 '$eq' : lambda a,b: a == b,
55 '$ne' : lambda a,b: a != b,
56 '$lte': lambda a,b: a <= b,
57 '$gte': lambda a,b: a >= b,
58 '$in' : lambda a,b: a in b,
59 '$nin': lambda a,b: a not in b,
60 '$all': lambda a,b: all([ a in bb for bb in b ]),
61 '$mod': lambda a,b: a%b[0] == b[1],
62 '$exists' : lambda a,b: (b and a is not None) or (a is None and not b)
63 }
64
65
66 class CompositeFilter(object):
67 """Composite filter for matching multiple properties."""
68
69 def __init__(self, dikt):
70 self.tests = []
71 self.values = []
72 for key, value in dikt.iteritems():
73 self.tests.append(filters[key])
74 self.values.append(value)
75
76 def __call__(self, value):
77 for test,check in zip(self.tests, self.values):
78 if not test(value, check):
79 return False
80 return True
81
82 class BaseDB(Configurable):
83 """Empty Parent class so traitlets work on DB."""
84 # base configurable traits:
85 session = CUnicode("")
86
87 class DictDB(BaseDB):
88 """Basic in-memory dict-based object for saving Task Records.
89
90 This is the first object to present the DB interface
91 for logging tasks out of memory.
92
93 The interface is based on MongoDB, so adding a MongoDB
94 backend should be straightforward.
95 """
96
97 _records = Dict()
98
99 def _match_one(self, rec, tests):
100 """Check if a specific record matches tests."""
101 for key,test in tests.iteritems():
102 if not test(rec.get(key, None)):
103 return False
104 return True
105
106 def _match(self, check, id_only=True):
107 """Find all the matches for a check dict."""
108 matches = {}
109 tests = {}
110 for k,v in check.iteritems():
111 if isinstance(v, dict):
112 tests[k] = CompositeFilter(v)
113 else:
114 tests[k] = lambda o: o==v
115
116 for msg_id, rec in self._records.iteritems():
117 if self._match_one(rec, tests):
118 matches[msg_id] = rec
119 if id_only:
120 return matches.keys()
121 else:
122 return matches
123
124
125 def add_record(self, msg_id, rec):
126 """Add a new Task Record, by msg_id."""
127 if self._records.has_key(msg_id):
128 raise KeyError("Already have msg_id %r"%(msg_id))
129 self._records[msg_id] = rec
130
131 def get_record(self, msg_id):
132 """Get a specific Task Record, by msg_id."""
133 if not self._records.has_key(msg_id):
134 raise KeyError("No such msg_id %r"%(msg_id))
135 return self._records[msg_id]
136
137 def update_record(self, msg_id, rec):
138 """Update the data in an existing record."""
139 self._records[msg_id].update(rec)
140
141 def drop_matching_records(self, check):
142 """Remove a record from the DB."""
143 matches = self._match(check, id_only=True)
144 for m in matches:
145 del self._records[m]
146
147 def drop_record(self, msg_id):
148 """Remove a record from the DB."""
149 del self._records[msg_id]
150
151
152 def find_records(self, check, id_only=False):
153 """Find records matching a query dict."""
154 matches = self._match(check, id_only)
155 return matches No newline at end of file
@@ -0,0 +1,163 b''
1 #!/usr/bin/env python
2 """
3 A multi-heart Heartbeat system using PUB and XREP sockets. pings are sent out on the PUB,
4 and hearts are tracked based on their XREQ identities.
5 """
6 #-----------------------------------------------------------------------------
7 # Copyright (C) 2010-2011 The IPython Development Team
8 #
9 # Distributed under the terms of the BSD License. The full license is in
10 # the file COPYING, distributed as part of this software.
11 #-----------------------------------------------------------------------------
12
13 from __future__ import print_function
14 import time
15 import uuid
16
17 import zmq
18 from zmq.devices import ProcessDevice, ThreadDevice
19 from zmq.eventloop import ioloop, zmqstream
20
21 from IPython.utils.traitlets import Set, Instance, CFloat, Bool
22 from IPython.parallel.factory import LoggingFactory
23
24 class Heart(object):
25 """A basic heart object for responding to a HeartMonitor.
26 This is a simple wrapper with defaults for the most common
27 Device model for responding to heartbeats.
28
29 It simply builds a threadsafe zmq.FORWARDER Device, defaulting to using
30 SUB/XREQ for in/out.
31
32 You can specify the XREQ's IDENTITY via the optional heart_id argument."""
33 device=None
34 id=None
35 def __init__(self, in_addr, out_addr, in_type=zmq.SUB, out_type=zmq.XREQ, heart_id=None):
36 self.device = ThreadDevice(zmq.FORWARDER, in_type, out_type)
37 self.device.daemon=True
38 self.device.connect_in(in_addr)
39 self.device.connect_out(out_addr)
40 if in_type == zmq.SUB:
41 self.device.setsockopt_in(zmq.SUBSCRIBE, "")
42 if heart_id is None:
43 heart_id = str(uuid.uuid4())
44 self.device.setsockopt_out(zmq.IDENTITY, heart_id)
45 self.id = heart_id
46
47 def start(self):
48 return self.device.start()
49
50 class HeartMonitor(LoggingFactory):
51 """A basic HeartMonitor class
52 pingstream: a PUB stream
53 pongstream: an XREP stream
54 period: the period of the heartbeat in milliseconds"""
55
56 period=CFloat(1000, config=True) # in milliseconds
57
58 pingstream=Instance('zmq.eventloop.zmqstream.ZMQStream')
59 pongstream=Instance('zmq.eventloop.zmqstream.ZMQStream')
60 loop = Instance('zmq.eventloop.ioloop.IOLoop')
61 def _loop_default(self):
62 return ioloop.IOLoop.instance()
63 debug=Bool(False)
64
65 # not settable:
66 hearts=Set()
67 responses=Set()
68 on_probation=Set()
69 last_ping=CFloat(0)
70 _new_handlers = Set()
71 _failure_handlers = Set()
72 lifetime = CFloat(0)
73 tic = CFloat(0)
74
75 def __init__(self, **kwargs):
76 super(HeartMonitor, self).__init__(**kwargs)
77
78 self.pongstream.on_recv(self.handle_pong)
79
80 def start(self):
81 self.caller = ioloop.PeriodicCallback(self.beat, self.period, self.loop)
82 self.caller.start()
83
84 def add_new_heart_handler(self, handler):
85 """add a new handler for new hearts"""
86 self.log.debug("heartbeat::new_heart_handler: %s"%handler)
87 self._new_handlers.add(handler)
88
89 def add_heart_failure_handler(self, handler):
90 """add a new handler for heart failure"""
91 self.log.debug("heartbeat::new heart failure handler: %s"%handler)
92 self._failure_handlers.add(handler)
93
94 def beat(self):
95 self.pongstream.flush()
96 self.last_ping = self.lifetime
97
98 toc = time.time()
99 self.lifetime += toc-self.tic
100 self.tic = toc
101 # self.log.debug("heartbeat::%s"%self.lifetime)
102 goodhearts = self.hearts.intersection(self.responses)
103 missed_beats = self.hearts.difference(goodhearts)
104 heartfailures = self.on_probation.intersection(missed_beats)
105 newhearts = self.responses.difference(goodhearts)
106 map(self.handle_new_heart, newhearts)
107 map(self.handle_heart_failure, heartfailures)
108 self.on_probation = missed_beats.intersection(self.hearts)
109 self.responses = set()
110 # print self.on_probation, self.hearts
111 # self.log.debug("heartbeat::beat %.3f, %i beating hearts"%(self.lifetime, len(self.hearts)))
112 self.pingstream.send(str(self.lifetime))
113
114 def handle_new_heart(self, heart):
115 if self._new_handlers:
116 for handler in self._new_handlers:
117 handler(heart)
118 else:
119 self.log.info("heartbeat::yay, got new heart %s!"%heart)
120 self.hearts.add(heart)
121
122 def handle_heart_failure(self, heart):
123 if self._failure_handlers:
124 for handler in self._failure_handlers:
125 try:
126 handler(heart)
127 except Exception as e:
128 self.log.error("heartbeat::Bad Handler! %s"%handler, exc_info=True)
129 pass
130 else:
131 self.log.info("heartbeat::Heart %s failed :("%heart)
132 self.hearts.remove(heart)
133
134
135 def handle_pong(self, msg):
136 "a heart just beat"
137 if msg[1] == str(self.lifetime):
138 delta = time.time()-self.tic
139 # self.log.debug("heartbeat::heart %r took %.2f ms to respond"%(msg[0], 1000*delta))
140 self.responses.add(msg[0])
141 elif msg[1] == str(self.last_ping):
142 delta = time.time()-self.tic + (self.lifetime-self.last_ping)
143 self.log.warn("heartbeat::heart %r missed a beat, and took %.2f ms to respond"%(msg[0], 1000*delta))
144 self.responses.add(msg[0])
145 else:
146 self.log.warn("heartbeat::got bad heartbeat (possibly old?): %s (current=%.3f)"%
147 (msg[1],self.lifetime))
148
149
150 if __name__ == '__main__':
151 loop = ioloop.IOLoop.instance()
152 context = zmq.Context()
153 pub = context.socket(zmq.PUB)
154 pub.bind('tcp://127.0.0.1:5555')
155 xrep = context.socket(zmq.XREP)
156 xrep.bind('tcp://127.0.0.1:5556')
157
158 outstream = zmqstream.ZMQStream(pub, loop)
159 instream = zmqstream.ZMQStream(xrep, loop)
160
161 hb = HeartMonitor(loop, outstream, instream)
162
163 loop.start()
This diff has been collapsed as it changes many lines, (1089 lines changed) Show them Hide them
@@ -0,0 +1,1089 b''
1 #!/usr/bin/env python
2 """The IPython Controller Hub with 0MQ
3 This is the master object that handles connections from engines and clients,
4 and monitors traffic through the various queues.
5 """
6 #-----------------------------------------------------------------------------
7 # Copyright (C) 2010 The IPython Development Team
8 #
9 # Distributed under the terms of the BSD License. The full license is in
10 # the file COPYING, distributed as part of this software.
11 #-----------------------------------------------------------------------------
12
13 #-----------------------------------------------------------------------------
14 # Imports
15 #-----------------------------------------------------------------------------
16 from __future__ import print_function
17
18 import sys
19 import time
20 from datetime import datetime
21
22 import zmq
23 from zmq.eventloop import ioloop
24 from zmq.eventloop.zmqstream import ZMQStream
25
26 # internal:
27 from IPython.utils.importstring import import_item
28 from IPython.utils.traitlets import HasTraits, Instance, Int, CStr, Str, Dict, Set, List, Bool
29
30 from IPython.parallel import error
31 from IPython.parallel.factory import RegistrationFactory, LoggingFactory
32 from IPython.parallel.util import select_random_ports, validate_url_container, ISO8601
33
34 from .heartmonitor import HeartMonitor
35
36 #-----------------------------------------------------------------------------
37 # Code
38 #-----------------------------------------------------------------------------
39
40 def _passer(*args, **kwargs):
41 return
42
43 def _printer(*args, **kwargs):
44 print (args)
45 print (kwargs)
46
47 def empty_record():
48 """Return an empty dict with all record keys."""
49 return {
50 'msg_id' : None,
51 'header' : None,
52 'content': None,
53 'buffers': None,
54 'submitted': None,
55 'client_uuid' : None,
56 'engine_uuid' : None,
57 'started': None,
58 'completed': None,
59 'resubmitted': None,
60 'result_header' : None,
61 'result_content' : None,
62 'result_buffers' : None,
63 'queue' : None,
64 'pyin' : None,
65 'pyout': None,
66 'pyerr': None,
67 'stdout': '',
68 'stderr': '',
69 }
70
71 def init_record(msg):
72 """Initialize a TaskRecord based on a request."""
73 header = msg['header']
74 return {
75 'msg_id' : header['msg_id'],
76 'header' : header,
77 'content': msg['content'],
78 'buffers': msg['buffers'],
79 'submitted': datetime.strptime(header['date'], ISO8601),
80 'client_uuid' : None,
81 'engine_uuid' : None,
82 'started': None,
83 'completed': None,
84 'resubmitted': None,
85 'result_header' : None,
86 'result_content' : None,
87 'result_buffers' : None,
88 'queue' : None,
89 'pyin' : None,
90 'pyout': None,
91 'pyerr': None,
92 'stdout': '',
93 'stderr': '',
94 }
95
96
97 class EngineConnector(HasTraits):
98 """A simple object for accessing the various zmq connections of an object.
99 Attributes are:
100 id (int): engine ID
101 uuid (str): uuid (unused?)
102 queue (str): identity of queue's XREQ socket
103 registration (str): identity of registration XREQ socket
104 heartbeat (str): identity of heartbeat XREQ socket
105 """
106 id=Int(0)
107 queue=Str()
108 control=Str()
109 registration=Str()
110 heartbeat=Str()
111 pending=Set()
112
113 class HubFactory(RegistrationFactory):
114 """The Configurable for setting up a Hub."""
115
116 # name of a scheduler scheme
117 scheme = Str('leastload', config=True)
118
119 # port-pairs for monitoredqueues:
120 hb = Instance(list, config=True)
121 def _hb_default(self):
122 return select_random_ports(2)
123
124 mux = Instance(list, config=True)
125 def _mux_default(self):
126 return select_random_ports(2)
127
128 task = Instance(list, config=True)
129 def _task_default(self):
130 return select_random_ports(2)
131
132 control = Instance(list, config=True)
133 def _control_default(self):
134 return select_random_ports(2)
135
136 iopub = Instance(list, config=True)
137 def _iopub_default(self):
138 return select_random_ports(2)
139
140 # single ports:
141 mon_port = Instance(int, config=True)
142 def _mon_port_default(self):
143 return select_random_ports(1)[0]
144
145 notifier_port = Instance(int, config=True)
146 def _notifier_port_default(self):
147 return select_random_ports(1)[0]
148
149 ping = Int(1000, config=True) # ping frequency
150
151 engine_ip = CStr('127.0.0.1', config=True)
152 engine_transport = CStr('tcp', config=True)
153
154 client_ip = CStr('127.0.0.1', config=True)
155 client_transport = CStr('tcp', config=True)
156
157 monitor_ip = CStr('127.0.0.1', config=True)
158 monitor_transport = CStr('tcp', config=True)
159
160 monitor_url = CStr('')
161
162 db_class = CStr('IPython.parallel.controller.dictdb.DictDB', config=True)
163
164 # not configurable
165 db = Instance('IPython.parallel.controller.dictdb.BaseDB')
166 heartmonitor = Instance('IPython.parallel.controller.heartmonitor.HeartMonitor')
167 subconstructors = List()
168 _constructed = Bool(False)
169
170 def _ip_changed(self, name, old, new):
171 self.engine_ip = new
172 self.client_ip = new
173 self.monitor_ip = new
174 self._update_monitor_url()
175
176 def _update_monitor_url(self):
177 self.monitor_url = "%s://%s:%i"%(self.monitor_transport, self.monitor_ip, self.mon_port)
178
179 def _transport_changed(self, name, old, new):
180 self.engine_transport = new
181 self.client_transport = new
182 self.monitor_transport = new
183 self._update_monitor_url()
184
185 def __init__(self, **kwargs):
186 super(HubFactory, self).__init__(**kwargs)
187 self._update_monitor_url()
188 # self.on_trait_change(self._sync_ips, 'ip')
189 # self.on_trait_change(self._sync_transports, 'transport')
190 self.subconstructors.append(self.construct_hub)
191
192
193 def construct(self):
194 assert not self._constructed, "already constructed!"
195
196 for subc in self.subconstructors:
197 subc()
198
199 self._constructed = True
200
201
202 def start(self):
203 assert self._constructed, "must be constructed by self.construct() first!"
204 self.heartmonitor.start()
205 self.log.info("Heartmonitor started")
206
207 def construct_hub(self):
208 """construct"""
209 client_iface = "%s://%s:"%(self.client_transport, self.client_ip) + "%i"
210 engine_iface = "%s://%s:"%(self.engine_transport, self.engine_ip) + "%i"
211
212 ctx = self.context
213 loop = self.loop
214
215 # Registrar socket
216 q = ZMQStream(ctx.socket(zmq.XREP), loop)
217 q.bind(client_iface % self.regport)
218 self.log.info("Hub listening on %s for registration."%(client_iface%self.regport))
219 if self.client_ip != self.engine_ip:
220 q.bind(engine_iface % self.regport)
221 self.log.info("Hub listening on %s for registration."%(engine_iface%self.regport))
222
223 ### Engine connections ###
224
225 # heartbeat
226 hpub = ctx.socket(zmq.PUB)
227 hpub.bind(engine_iface % self.hb[0])
228 hrep = ctx.socket(zmq.XREP)
229 hrep.bind(engine_iface % self.hb[1])
230 self.heartmonitor = HeartMonitor(loop=loop, pingstream=ZMQStream(hpub,loop), pongstream=ZMQStream(hrep,loop),
231 period=self.ping, logname=self.log.name)
232
233 ### Client connections ###
234 # Notifier socket
235 n = ZMQStream(ctx.socket(zmq.PUB), loop)
236 n.bind(client_iface%self.notifier_port)
237
238 ### build and launch the queues ###
239
240 # monitor socket
241 sub = ctx.socket(zmq.SUB)
242 sub.setsockopt(zmq.SUBSCRIBE, "")
243 sub.bind(self.monitor_url)
244 sub.bind('inproc://monitor')
245 sub = ZMQStream(sub, loop)
246
247 # connect the db
248 self.log.info('Hub using DB backend: %r'%(self.db_class.split()[-1]))
249 # cdir = self.config.Global.cluster_dir
250 self.db = import_item(self.db_class)(session=self.session.session, config=self.config)
251 time.sleep(.25)
252
253 # build connection dicts
254 self.engine_info = {
255 'control' : engine_iface%self.control[1],
256 'mux': engine_iface%self.mux[1],
257 'heartbeat': (engine_iface%self.hb[0], engine_iface%self.hb[1]),
258 'task' : engine_iface%self.task[1],
259 'iopub' : engine_iface%self.iopub[1],
260 # 'monitor' : engine_iface%self.mon_port,
261 }
262
263 self.client_info = {
264 'control' : client_iface%self.control[0],
265 'mux': client_iface%self.mux[0],
266 'task' : (self.scheme, client_iface%self.task[0]),
267 'iopub' : client_iface%self.iopub[0],
268 'notification': client_iface%self.notifier_port
269 }
270 self.log.debug("Hub engine addrs: %s"%self.engine_info)
271 self.log.debug("Hub client addrs: %s"%self.client_info)
272 self.hub = Hub(loop=loop, session=self.session, monitor=sub, heartmonitor=self.heartmonitor,
273 query=q, notifier=n, db=self.db,
274 engine_info=self.engine_info, client_info=self.client_info,
275 logname=self.log.name)
276
277
278 class Hub(LoggingFactory):
279 """The IPython Controller Hub with 0MQ connections
280
281 Parameters
282 ==========
283 loop: zmq IOLoop instance
284 session: StreamSession object
285 <removed> context: zmq context for creating new connections (?)
286 queue: ZMQStream for monitoring the command queue (SUB)
287 query: ZMQStream for engine registration and client queries requests (XREP)
288 heartbeat: HeartMonitor object checking the pulse of the engines
289 notifier: ZMQStream for broadcasting engine registration changes (PUB)
290 db: connection to db for out of memory logging of commands
291 NotImplemented
292 engine_info: dict of zmq connection information for engines to connect
293 to the queues.
294 client_info: dict of zmq connection information for engines to connect
295 to the queues.
296 """
297 # internal data structures:
298 ids=Set() # engine IDs
299 keytable=Dict()
300 by_ident=Dict()
301 engines=Dict()
302 clients=Dict()
303 hearts=Dict()
304 pending=Set()
305 queues=Dict() # pending msg_ids keyed by engine_id
306 tasks=Dict() # pending msg_ids submitted as tasks, keyed by client_id
307 completed=Dict() # completed msg_ids keyed by engine_id
308 all_completed=Set() # completed msg_ids keyed by engine_id
309 dead_engines=Set() # completed msg_ids keyed by engine_id
310 # mia=None
311 incoming_registrations=Dict()
312 registration_timeout=Int()
313 _idcounter=Int(0)
314
315 # objects from constructor:
316 loop=Instance(ioloop.IOLoop)
317 query=Instance(ZMQStream)
318 monitor=Instance(ZMQStream)
319 heartmonitor=Instance(HeartMonitor)
320 notifier=Instance(ZMQStream)
321 db=Instance(object)
322 client_info=Dict()
323 engine_info=Dict()
324
325
326 def __init__(self, **kwargs):
327 """
328 # universal:
329 loop: IOLoop for creating future connections
330 session: streamsession for sending serialized data
331 # engine:
332 queue: ZMQStream for monitoring queue messages
333 query: ZMQStream for engine+client registration and client requests
334 heartbeat: HeartMonitor object for tracking engines
335 # extra:
336 db: ZMQStream for db connection (NotImplemented)
337 engine_info: zmq address/protocol dict for engine connections
338 client_info: zmq address/protocol dict for client connections
339 """
340
341 super(Hub, self).__init__(**kwargs)
342 self.registration_timeout = max(5000, 2*self.heartmonitor.period)
343
344 # validate connection dicts:
345 for k,v in self.client_info.iteritems():
346 if k == 'task':
347 validate_url_container(v[1])
348 else:
349 validate_url_container(v)
350 # validate_url_container(self.client_info)
351 validate_url_container(self.engine_info)
352
353 # register our callbacks
354 self.query.on_recv(self.dispatch_query)
355 self.monitor.on_recv(self.dispatch_monitor_traffic)
356
357 self.heartmonitor.add_heart_failure_handler(self.handle_heart_failure)
358 self.heartmonitor.add_new_heart_handler(self.handle_new_heart)
359
360 self.monitor_handlers = { 'in' : self.save_queue_request,
361 'out': self.save_queue_result,
362 'intask': self.save_task_request,
363 'outtask': self.save_task_result,
364 'tracktask': self.save_task_destination,
365 'incontrol': _passer,
366 'outcontrol': _passer,
367 'iopub': self.save_iopub_message,
368 }
369
370 self.query_handlers = {'queue_request': self.queue_status,
371 'result_request': self.get_results,
372 'purge_request': self.purge_results,
373 'load_request': self.check_load,
374 'resubmit_request': self.resubmit_task,
375 'shutdown_request': self.shutdown_request,
376 'registration_request' : self.register_engine,
377 'unregistration_request' : self.unregister_engine,
378 'connection_request': self.connection_request,
379 }
380
381 self.log.info("hub::created hub")
382
383 @property
384 def _next_id(self):
385 """gemerate a new ID.
386
387 No longer reuse old ids, just count from 0."""
388 newid = self._idcounter
389 self._idcounter += 1
390 return newid
391 # newid = 0
392 # incoming = [id[0] for id in self.incoming_registrations.itervalues()]
393 # # print newid, self.ids, self.incoming_registrations
394 # while newid in self.ids or newid in incoming:
395 # newid += 1
396 # return newid
397
398 #-----------------------------------------------------------------------------
399 # message validation
400 #-----------------------------------------------------------------------------
401
402 def _validate_targets(self, targets):
403 """turn any valid targets argument into a list of integer ids"""
404 if targets is None:
405 # default to all
406 targets = self.ids
407
408 if isinstance(targets, (int,str,unicode)):
409 # only one target specified
410 targets = [targets]
411 _targets = []
412 for t in targets:
413 # map raw identities to ids
414 if isinstance(t, (str,unicode)):
415 t = self.by_ident.get(t, t)
416 _targets.append(t)
417 targets = _targets
418 bad_targets = [ t for t in targets if t not in self.ids ]
419 if bad_targets:
420 raise IndexError("No Such Engine: %r"%bad_targets)
421 if not targets:
422 raise IndexError("No Engines Registered")
423 return targets
424
425 #-----------------------------------------------------------------------------
426 # dispatch methods (1 per stream)
427 #-----------------------------------------------------------------------------
428
429 # def dispatch_registration_request(self, msg):
430 # """"""
431 # self.log.debug("registration::dispatch_register_request(%s)"%msg)
432 # idents,msg = self.session.feed_identities(msg)
433 # if not idents:
434 # self.log.error("Bad Query Message: %s"%msg, exc_info=True)
435 # return
436 # try:
437 # msg = self.session.unpack_message(msg,content=True)
438 # except:
439 # self.log.error("registration::got bad registration message: %s"%msg, exc_info=True)
440 # return
441 #
442 # msg_type = msg['msg_type']
443 # content = msg['content']
444 #
445 # handler = self.query_handlers.get(msg_type, None)
446 # if handler is None:
447 # self.log.error("registration::got bad registration message: %s"%msg)
448 # else:
449 # handler(idents, msg)
450
451 def dispatch_monitor_traffic(self, msg):
452 """all ME and Task queue messages come through here, as well as
453 IOPub traffic."""
454 self.log.debug("monitor traffic: %s"%msg[:2])
455 switch = msg[0]
456 idents, msg = self.session.feed_identities(msg[1:])
457 if not idents:
458 self.log.error("Bad Monitor Message: %s"%msg)
459 return
460 handler = self.monitor_handlers.get(switch, None)
461 if handler is not None:
462 handler(idents, msg)
463 else:
464 self.log.error("Invalid monitor topic: %s"%switch)
465
466
467 def dispatch_query(self, msg):
468 """Route registration requests and queries from clients."""
469 idents, msg = self.session.feed_identities(msg)
470 if not idents:
471 self.log.error("Bad Query Message: %s"%msg)
472 return
473 client_id = idents[0]
474 try:
475 msg = self.session.unpack_message(msg, content=True)
476 except:
477 content = error.wrap_exception()
478 self.log.error("Bad Query Message: %s"%msg, exc_info=True)
479 self.session.send(self.query, "hub_error", ident=client_id,
480 content=content)
481 return
482
483 # print client_id, header, parent, content
484 #switch on message type:
485 msg_type = msg['msg_type']
486 self.log.info("client::client %s requested %s"%(client_id, msg_type))
487 handler = self.query_handlers.get(msg_type, None)
488 try:
489 assert handler is not None, "Bad Message Type: %s"%msg_type
490 except:
491 content = error.wrap_exception()
492 self.log.error("Bad Message Type: %s"%msg_type, exc_info=True)
493 self.session.send(self.query, "hub_error", ident=client_id,
494 content=content)
495 return
496 else:
497 handler(idents, msg)
498
499 def dispatch_db(self, msg):
500 """"""
501 raise NotImplementedError
502
503 #---------------------------------------------------------------------------
504 # handler methods (1 per event)
505 #---------------------------------------------------------------------------
506
507 #----------------------- Heartbeat --------------------------------------
508
509 def handle_new_heart(self, heart):
510 """handler to attach to heartbeater.
511 Called when a new heart starts to beat.
512 Triggers completion of registration."""
513 self.log.debug("heartbeat::handle_new_heart(%r)"%heart)
514 if heart not in self.incoming_registrations:
515 self.log.info("heartbeat::ignoring new heart: %r"%heart)
516 else:
517 self.finish_registration(heart)
518
519
520 def handle_heart_failure(self, heart):
521 """handler to attach to heartbeater.
522 called when a previously registered heart fails to respond to beat request.
523 triggers unregistration"""
524 self.log.debug("heartbeat::handle_heart_failure(%r)"%heart)
525 eid = self.hearts.get(heart, None)
526 queue = self.engines[eid].queue
527 if eid is None:
528 self.log.info("heartbeat::ignoring heart failure %r"%heart)
529 else:
530 self.unregister_engine(heart, dict(content=dict(id=eid, queue=queue)))
531
532 #----------------------- MUX Queue Traffic ------------------------------
533
534 def save_queue_request(self, idents, msg):
535 if len(idents) < 2:
536 self.log.error("invalid identity prefix: %s"%idents)
537 return
538 queue_id, client_id = idents[:2]
539 try:
540 msg = self.session.unpack_message(msg, content=False)
541 except:
542 self.log.error("queue::client %r sent invalid message to %r: %s"%(client_id, queue_id, msg), exc_info=True)
543 return
544
545 eid = self.by_ident.get(queue_id, None)
546 if eid is None:
547 self.log.error("queue::target %r not registered"%queue_id)
548 self.log.debug("queue:: valid are: %s"%(self.by_ident.keys()))
549 return
550
551 header = msg['header']
552 msg_id = header['msg_id']
553 record = init_record(msg)
554 record['engine_uuid'] = queue_id
555 record['client_uuid'] = client_id
556 record['queue'] = 'mux'
557
558 try:
559 # it's posible iopub arrived first:
560 existing = self.db.get_record(msg_id)
561 for key,evalue in existing.iteritems():
562 rvalue = record[key]
563 if evalue and rvalue and evalue != rvalue:
564 self.log.error("conflicting initial state for record: %s:%s <> %s"%(msg_id, rvalue, evalue))
565 elif evalue and not rvalue:
566 record[key] = evalue
567 self.db.update_record(msg_id, record)
568 except KeyError:
569 self.db.add_record(msg_id, record)
570
571 self.pending.add(msg_id)
572 self.queues[eid].append(msg_id)
573
574 def save_queue_result(self, idents, msg):
575 if len(idents) < 2:
576 self.log.error("invalid identity prefix: %s"%idents)
577 return
578
579 client_id, queue_id = idents[:2]
580 try:
581 msg = self.session.unpack_message(msg, content=False)
582 except:
583 self.log.error("queue::engine %r sent invalid message to %r: %s"%(
584 queue_id,client_id, msg), exc_info=True)
585 return
586
587 eid = self.by_ident.get(queue_id, None)
588 if eid is None:
589 self.log.error("queue::unknown engine %r is sending a reply: "%queue_id)
590 # self.log.debug("queue:: %s"%msg[2:])
591 return
592
593 parent = msg['parent_header']
594 if not parent:
595 return
596 msg_id = parent['msg_id']
597 if msg_id in self.pending:
598 self.pending.remove(msg_id)
599 self.all_completed.add(msg_id)
600 self.queues[eid].remove(msg_id)
601 self.completed[eid].append(msg_id)
602 elif msg_id not in self.all_completed:
603 # it could be a result from a dead engine that died before delivering the
604 # result
605 self.log.warn("queue:: unknown msg finished %s"%msg_id)
606 return
607 # update record anyway, because the unregistration could have been premature
608 rheader = msg['header']
609 completed = datetime.strptime(rheader['date'], ISO8601)
610 started = rheader.get('started', None)
611 if started is not None:
612 started = datetime.strptime(started, ISO8601)
613 result = {
614 'result_header' : rheader,
615 'result_content': msg['content'],
616 'started' : started,
617 'completed' : completed
618 }
619
620 result['result_buffers'] = msg['buffers']
621 self.db.update_record(msg_id, result)
622
623
624 #--------------------- Task Queue Traffic ------------------------------
625
626 def save_task_request(self, idents, msg):
627 """Save the submission of a task."""
628 client_id = idents[0]
629
630 try:
631 msg = self.session.unpack_message(msg, content=False)
632 except:
633 self.log.error("task::client %r sent invalid task message: %s"%(
634 client_id, msg), exc_info=True)
635 return
636 record = init_record(msg)
637
638 record['client_uuid'] = client_id
639 record['queue'] = 'task'
640 header = msg['header']
641 msg_id = header['msg_id']
642 self.pending.add(msg_id)
643 try:
644 # it's posible iopub arrived first:
645 existing = self.db.get_record(msg_id)
646 for key,evalue in existing.iteritems():
647 rvalue = record[key]
648 if evalue and rvalue and evalue != rvalue:
649 self.log.error("conflicting initial state for record: %s:%s <> %s"%(msg_id, rvalue, evalue))
650 elif evalue and not rvalue:
651 record[key] = evalue
652 self.db.update_record(msg_id, record)
653 except KeyError:
654 self.db.add_record(msg_id, record)
655
656 def save_task_result(self, idents, msg):
657 """save the result of a completed task."""
658 client_id = idents[0]
659 try:
660 msg = self.session.unpack_message(msg, content=False)
661 except:
662 self.log.error("task::invalid task result message send to %r: %s"%(
663 client_id, msg), exc_info=True)
664 raise
665 return
666
667 parent = msg['parent_header']
668 if not parent:
669 # print msg
670 self.log.warn("Task %r had no parent!"%msg)
671 return
672 msg_id = parent['msg_id']
673
674 header = msg['header']
675 engine_uuid = header.get('engine', None)
676 eid = self.by_ident.get(engine_uuid, None)
677
678 if msg_id in self.pending:
679 self.pending.remove(msg_id)
680 self.all_completed.add(msg_id)
681 if eid is not None:
682 self.completed[eid].append(msg_id)
683 if msg_id in self.tasks[eid]:
684 self.tasks[eid].remove(msg_id)
685 completed = datetime.strptime(header['date'], ISO8601)
686 started = header.get('started', None)
687 if started is not None:
688 started = datetime.strptime(started, ISO8601)
689 result = {
690 'result_header' : header,
691 'result_content': msg['content'],
692 'started' : started,
693 'completed' : completed,
694 'engine_uuid': engine_uuid
695 }
696
697 result['result_buffers'] = msg['buffers']
698 self.db.update_record(msg_id, result)
699
700 else:
701 self.log.debug("task::unknown task %s finished"%msg_id)
702
703 def save_task_destination(self, idents, msg):
704 try:
705 msg = self.session.unpack_message(msg, content=True)
706 except:
707 self.log.error("task::invalid task tracking message", exc_info=True)
708 return
709 content = msg['content']
710 # print (content)
711 msg_id = content['msg_id']
712 engine_uuid = content['engine_id']
713 eid = self.by_ident[engine_uuid]
714
715 self.log.info("task::task %s arrived on %s"%(msg_id, eid))
716 # if msg_id in self.mia:
717 # self.mia.remove(msg_id)
718 # else:
719 # self.log.debug("task::task %s not listed as MIA?!"%(msg_id))
720
721 self.tasks[eid].append(msg_id)
722 # self.pending[msg_id][1].update(received=datetime.now(),engine=(eid,engine_uuid))
723 self.db.update_record(msg_id, dict(engine_uuid=engine_uuid))
724
725 def mia_task_request(self, idents, msg):
726 raise NotImplementedError
727 client_id = idents[0]
728 # content = dict(mia=self.mia,status='ok')
729 # self.session.send('mia_reply', content=content, idents=client_id)
730
731
732 #--------------------- IOPub Traffic ------------------------------
733
734 def save_iopub_message(self, topics, msg):
735 """save an iopub message into the db"""
736 # print (topics)
737 try:
738 msg = self.session.unpack_message(msg, content=True)
739 except:
740 self.log.error("iopub::invalid IOPub message", exc_info=True)
741 return
742
743 parent = msg['parent_header']
744 if not parent:
745 self.log.error("iopub::invalid IOPub message: %s"%msg)
746 return
747 msg_id = parent['msg_id']
748 msg_type = msg['msg_type']
749 content = msg['content']
750
751 # ensure msg_id is in db
752 try:
753 rec = self.db.get_record(msg_id)
754 except KeyError:
755 rec = empty_record()
756 rec['msg_id'] = msg_id
757 self.db.add_record(msg_id, rec)
758 # stream
759 d = {}
760 if msg_type == 'stream':
761 name = content['name']
762 s = rec[name] or ''
763 d[name] = s + content['data']
764
765 elif msg_type == 'pyerr':
766 d['pyerr'] = content
767 else:
768 d[msg_type] = content['data']
769
770 self.db.update_record(msg_id, d)
771
772
773
774 #-------------------------------------------------------------------------
775 # Registration requests
776 #-------------------------------------------------------------------------
777
778 def connection_request(self, client_id, msg):
779 """Reply with connection addresses for clients."""
780 self.log.info("client::client %s connected"%client_id)
781 content = dict(status='ok')
782 content.update(self.client_info)
783 jsonable = {}
784 for k,v in self.keytable.iteritems():
785 if v not in self.dead_engines:
786 jsonable[str(k)] = v
787 content['engines'] = jsonable
788 self.session.send(self.query, 'connection_reply', content, parent=msg, ident=client_id)
789
790 def register_engine(self, reg, msg):
791 """Register a new engine."""
792 content = msg['content']
793 try:
794 queue = content['queue']
795 except KeyError:
796 self.log.error("registration::queue not specified", exc_info=True)
797 return
798 heart = content.get('heartbeat', None)
799 """register a new engine, and create the socket(s) necessary"""
800 eid = self._next_id
801 # print (eid, queue, reg, heart)
802
803 self.log.debug("registration::register_engine(%i, %r, %r, %r)"%(eid, queue, reg, heart))
804
805 content = dict(id=eid,status='ok')
806 content.update(self.engine_info)
807 # check if requesting available IDs:
808 if queue in self.by_ident:
809 try:
810 raise KeyError("queue_id %r in use"%queue)
811 except:
812 content = error.wrap_exception()
813 self.log.error("queue_id %r in use"%queue, exc_info=True)
814 elif heart in self.hearts: # need to check unique hearts?
815 try:
816 raise KeyError("heart_id %r in use"%heart)
817 except:
818 self.log.error("heart_id %r in use"%heart, exc_info=True)
819 content = error.wrap_exception()
820 else:
821 for h, pack in self.incoming_registrations.iteritems():
822 if heart == h:
823 try:
824 raise KeyError("heart_id %r in use"%heart)
825 except:
826 self.log.error("heart_id %r in use"%heart, exc_info=True)
827 content = error.wrap_exception()
828 break
829 elif queue == pack[1]:
830 try:
831 raise KeyError("queue_id %r in use"%queue)
832 except:
833 self.log.error("queue_id %r in use"%queue, exc_info=True)
834 content = error.wrap_exception()
835 break
836
837 msg = self.session.send(self.query, "registration_reply",
838 content=content,
839 ident=reg)
840
841 if content['status'] == 'ok':
842 if heart in self.heartmonitor.hearts:
843 # already beating
844 self.incoming_registrations[heart] = (eid,queue,reg[0],None)
845 self.finish_registration(heart)
846 else:
847 purge = lambda : self._purge_stalled_registration(heart)
848 dc = ioloop.DelayedCallback(purge, self.registration_timeout, self.loop)
849 dc.start()
850 self.incoming_registrations[heart] = (eid,queue,reg[0],dc)
851 else:
852 self.log.error("registration::registration %i failed: %s"%(eid, content['evalue']))
853 return eid
854
855 def unregister_engine(self, ident, msg):
856 """Unregister an engine that explicitly requested to leave."""
857 try:
858 eid = msg['content']['id']
859 except:
860 self.log.error("registration::bad engine id for unregistration: %s"%ident, exc_info=True)
861 return
862 self.log.info("registration::unregister_engine(%s)"%eid)
863 # print (eid)
864 uuid = self.keytable[eid]
865 content=dict(id=eid, queue=uuid)
866 self.dead_engines.add(uuid)
867 # self.ids.remove(eid)
868 # uuid = self.keytable.pop(eid)
869 #
870 # ec = self.engines.pop(eid)
871 # self.hearts.pop(ec.heartbeat)
872 # self.by_ident.pop(ec.queue)
873 # self.completed.pop(eid)
874 handleit = lambda : self._handle_stranded_msgs(eid, uuid)
875 dc = ioloop.DelayedCallback(handleit, self.registration_timeout, self.loop)
876 dc.start()
877 ############## TODO: HANDLE IT ################
878
879 if self.notifier:
880 self.session.send(self.notifier, "unregistration_notification", content=content)
881
882 def _handle_stranded_msgs(self, eid, uuid):
883 """Handle messages known to be on an engine when the engine unregisters.
884
885 It is possible that this will fire prematurely - that is, an engine will
886 go down after completing a result, and the client will be notified
887 that the result failed and later receive the actual result.
888 """
889
890 outstanding = self.queues[eid]
891
892 for msg_id in outstanding:
893 self.pending.remove(msg_id)
894 self.all_completed.add(msg_id)
895 try:
896 raise error.EngineError("Engine %r died while running task %r"%(eid, msg_id))
897 except:
898 content = error.wrap_exception()
899 # build a fake header:
900 header = {}
901 header['engine'] = uuid
902 header['date'] = datetime.now().strftime(ISO8601)
903 rec = dict(result_content=content, result_header=header, result_buffers=[])
904 rec['completed'] = header['date']
905 rec['engine_uuid'] = uuid
906 self.db.update_record(msg_id, rec)
907
908 def finish_registration(self, heart):
909 """Second half of engine registration, called after our HeartMonitor
910 has received a beat from the Engine's Heart."""
911 try:
912 (eid,queue,reg,purge) = self.incoming_registrations.pop(heart)
913 except KeyError:
914 self.log.error("registration::tried to finish nonexistant registration", exc_info=True)
915 return
916 self.log.info("registration::finished registering engine %i:%r"%(eid,queue))
917 if purge is not None:
918 purge.stop()
919 control = queue
920 self.ids.add(eid)
921 self.keytable[eid] = queue
922 self.engines[eid] = EngineConnector(id=eid, queue=queue, registration=reg,
923 control=control, heartbeat=heart)
924 self.by_ident[queue] = eid
925 self.queues[eid] = list()
926 self.tasks[eid] = list()
927 self.completed[eid] = list()
928 self.hearts[heart] = eid
929 content = dict(id=eid, queue=self.engines[eid].queue)
930 if self.notifier:
931 self.session.send(self.notifier, "registration_notification", content=content)
932 self.log.info("engine::Engine Connected: %i"%eid)
933
934 def _purge_stalled_registration(self, heart):
935 if heart in self.incoming_registrations:
936 eid = self.incoming_registrations.pop(heart)[0]
937 self.log.info("registration::purging stalled registration: %i"%eid)
938 else:
939 pass
940
941 #-------------------------------------------------------------------------
942 # Client Requests
943 #-------------------------------------------------------------------------
944
945 def shutdown_request(self, client_id, msg):
946 """handle shutdown request."""
947 self.session.send(self.query, 'shutdown_reply', content={'status': 'ok'}, ident=client_id)
948 # also notify other clients of shutdown
949 self.session.send(self.notifier, 'shutdown_notice', content={'status': 'ok'})
950 dc = ioloop.DelayedCallback(lambda : self._shutdown(), 1000, self.loop)
951 dc.start()
952
953 def _shutdown(self):
954 self.log.info("hub::hub shutting down.")
955 time.sleep(0.1)
956 sys.exit(0)
957
958
959 def check_load(self, client_id, msg):
960 content = msg['content']
961 try:
962 targets = content['targets']
963 targets = self._validate_targets(targets)
964 except:
965 content = error.wrap_exception()
966 self.session.send(self.query, "hub_error",
967 content=content, ident=client_id)
968 return
969
970 content = dict(status='ok')
971 # loads = {}
972 for t in targets:
973 content[bytes(t)] = len(self.queues[t])+len(self.tasks[t])
974 self.session.send(self.query, "load_reply", content=content, ident=client_id)
975
976
977 def queue_status(self, client_id, msg):
978 """Return the Queue status of one or more targets.
979 if verbose: return the msg_ids
980 else: return len of each type.
981 keys: queue (pending MUX jobs)
982 tasks (pending Task jobs)
983 completed (finished jobs from both queues)"""
984 content = msg['content']
985 targets = content['targets']
986 try:
987 targets = self._validate_targets(targets)
988 except:
989 content = error.wrap_exception()
990 self.session.send(self.query, "hub_error",
991 content=content, ident=client_id)
992 return
993 verbose = content.get('verbose', False)
994 content = dict(status='ok')
995 for t in targets:
996 queue = self.queues[t]
997 completed = self.completed[t]
998 tasks = self.tasks[t]
999 if not verbose:
1000 queue = len(queue)
1001 completed = len(completed)
1002 tasks = len(tasks)
1003 content[bytes(t)] = {'queue': queue, 'completed': completed , 'tasks': tasks}
1004 # pending
1005 self.session.send(self.query, "queue_reply", content=content, ident=client_id)
1006
1007 def purge_results(self, client_id, msg):
1008 """Purge results from memory. This method is more valuable before we move
1009 to a DB based message storage mechanism."""
1010 content = msg['content']
1011 msg_ids = content.get('msg_ids', [])
1012 reply = dict(status='ok')
1013 if msg_ids == 'all':
1014 self.db.drop_matching_records(dict(completed={'$ne':None}))
1015 else:
1016 for msg_id in msg_ids:
1017 if msg_id in self.all_completed:
1018 self.db.drop_record(msg_id)
1019 else:
1020 if msg_id in self.pending:
1021 try:
1022 raise IndexError("msg pending: %r"%msg_id)
1023 except:
1024 reply = error.wrap_exception()
1025 else:
1026 try:
1027 raise IndexError("No such msg: %r"%msg_id)
1028 except:
1029 reply = error.wrap_exception()
1030 break
1031 eids = content.get('engine_ids', [])
1032 for eid in eids:
1033 if eid not in self.engines:
1034 try:
1035 raise IndexError("No such engine: %i"%eid)
1036 except:
1037 reply = error.wrap_exception()
1038 break
1039 msg_ids = self.completed.pop(eid)
1040 uid = self.engines[eid].queue
1041 self.db.drop_matching_records(dict(engine_uuid=uid, completed={'$ne':None}))
1042
1043 self.session.send(self.query, 'purge_reply', content=reply, ident=client_id)
1044
1045 def resubmit_task(self, client_id, msg, buffers):
1046 """Resubmit a task."""
1047 raise NotImplementedError
1048
1049 def get_results(self, client_id, msg):
1050 """Get the result of 1 or more messages."""
1051 content = msg['content']
1052 msg_ids = sorted(set(content['msg_ids']))
1053 statusonly = content.get('status_only', False)
1054 pending = []
1055 completed = []
1056 content = dict(status='ok')
1057 content['pending'] = pending
1058 content['completed'] = completed
1059 buffers = []
1060 if not statusonly:
1061 content['results'] = {}
1062 records = self.db.find_records(dict(msg_id={'$in':msg_ids}))
1063 for msg_id in msg_ids:
1064 if msg_id in self.pending:
1065 pending.append(msg_id)
1066 elif msg_id in self.all_completed:
1067 completed.append(msg_id)
1068 if not statusonly:
1069 rec = records[msg_id]
1070 io_dict = {}
1071 for key in 'pyin pyout pyerr stdout stderr'.split():
1072 io_dict[key] = rec[key]
1073 content[msg_id] = { 'result_content': rec['result_content'],
1074 'header': rec['header'],
1075 'result_header' : rec['result_header'],
1076 'io' : io_dict,
1077 }
1078 if rec['result_buffers']:
1079 buffers.extend(map(str, rec['result_buffers']))
1080 else:
1081 try:
1082 raise KeyError('No such message: '+msg_id)
1083 except:
1084 content = error.wrap_exception()
1085 break
1086 self.session.send(self.query, "result_reply", content=content,
1087 parent=msg, ident=client_id,
1088 buffers=buffers)
1089
@@ -0,0 +1,80 b''
1 """A TaskRecord backend using mongodb"""
2 #-----------------------------------------------------------------------------
3 # Copyright (C) 2010 The IPython Development Team
4 #
5 # Distributed under the terms of the BSD License. The full license is in
6 # the file COPYING, distributed as part of this software.
7 #-----------------------------------------------------------------------------
8
9 from datetime import datetime
10
11 from pymongo import Connection
12 from pymongo.binary import Binary
13
14 from IPython.utils.traitlets import Dict, List, CUnicode
15
16 from .dictdb import BaseDB
17
18 #-----------------------------------------------------------------------------
19 # MongoDB class
20 #-----------------------------------------------------------------------------
21
22 class MongoDB(BaseDB):
23 """MongoDB TaskRecord backend."""
24
25 connection_args = List(config=True)
26 connection_kwargs = Dict(config=True)
27 database = CUnicode(config=True)
28 _table = Dict()
29
30 def __init__(self, **kwargs):
31 super(MongoDB, self).__init__(**kwargs)
32 self._connection = Connection(*self.connection_args, **self.connection_kwargs)
33 if not self.database:
34 self.database = self.session
35 self._db = self._connection[self.database]
36 self._records = self._db['task_records']
37
38 def _binary_buffers(self, rec):
39 for key in ('buffers', 'result_buffers'):
40 if key in rec:
41 rec[key] = map(Binary, rec[key])
42
43 def add_record(self, msg_id, rec):
44 """Add a new Task Record, by msg_id."""
45 # print rec
46 rec = _binary_buffers(rec)
47 obj_id = self._records.insert(rec)
48 self._table[msg_id] = obj_id
49
50 def get_record(self, msg_id):
51 """Get a specific Task Record, by msg_id."""
52 return self._records.find_one(self._table[msg_id])
53
54 def update_record(self, msg_id, rec):
55 """Update the data in an existing record."""
56 rec = _binary_buffers(rec)
57 obj_id = self._table[msg_id]
58 self._records.update({'_id':obj_id}, {'$set': rec})
59
60 def drop_matching_records(self, check):
61 """Remove a record from the DB."""
62 self._records.remove(check)
63
64 def drop_record(self, msg_id):
65 """Remove a record from the DB."""
66 obj_id = self._table.pop(msg_id)
67 self._records.remove(obj_id)
68
69 def find_records(self, check, id_only=False):
70 """Find records matching a query dict."""
71 matches = list(self._records.find(check))
72 if id_only:
73 return [ rec['msg_id'] for rec in matches ]
74 else:
75 data = {}
76 for rec in matches:
77 data[rec['msg_id']] = rec
78 return data
79
80
This diff has been collapsed as it changes many lines, (592 lines changed) Show them Hide them
@@ -0,0 +1,592 b''
1 """The Python scheduler for rich scheduling.
2
3 The Pure ZMQ scheduler does not allow routing schemes other than LRU,
4 nor does it check msg_id DAG dependencies. For those, a slightly slower
5 Python Scheduler exists.
6 """
7 #-----------------------------------------------------------------------------
8 # Copyright (C) 2010-2011 The IPython Development Team
9 #
10 # Distributed under the terms of the BSD License. The full license is in
11 # the file COPYING, distributed as part of this software.
12 #-----------------------------------------------------------------------------
13
14 #----------------------------------------------------------------------
15 # Imports
16 #----------------------------------------------------------------------
17
18 from __future__ import print_function
19
20 import logging
21 import sys
22
23 from datetime import datetime, timedelta
24 from random import randint, random
25 from types import FunctionType
26
27 try:
28 import numpy
29 except ImportError:
30 numpy = None
31
32 import zmq
33 from zmq.eventloop import ioloop, zmqstream
34
35 # local imports
36 from IPython.external.decorator import decorator
37 from IPython.utils.traitlets import Instance, Dict, List, Set
38
39 from IPython.parallel import error
40 from IPython.parallel.factory import SessionFactory
41 from IPython.parallel.util import connect_logger, local_logger
42
43 from .dependency import Dependency
44
45 @decorator
46 def logged(f,self,*args,**kwargs):
47 # print ("#--------------------")
48 self.log.debug("scheduler::%s(*%s,**%s)"%(f.func_name, args, kwargs))
49 # print ("#--")
50 return f(self,*args, **kwargs)
51
52 #----------------------------------------------------------------------
53 # Chooser functions
54 #----------------------------------------------------------------------
55
56 def plainrandom(loads):
57 """Plain random pick."""
58 n = len(loads)
59 return randint(0,n-1)
60
61 def lru(loads):
62 """Always pick the front of the line.
63
64 The content of `loads` is ignored.
65
66 Assumes LRU ordering of loads, with oldest first.
67 """
68 return 0
69
70 def twobin(loads):
71 """Pick two at random, use the LRU of the two.
72
73 The content of loads is ignored.
74
75 Assumes LRU ordering of loads, with oldest first.
76 """
77 n = len(loads)
78 a = randint(0,n-1)
79 b = randint(0,n-1)
80 return min(a,b)
81
82 def weighted(loads):
83 """Pick two at random using inverse load as weight.
84
85 Return the less loaded of the two.
86 """
87 # weight 0 a million times more than 1:
88 weights = 1./(1e-6+numpy.array(loads))
89 sums = weights.cumsum()
90 t = sums[-1]
91 x = random()*t
92 y = random()*t
93 idx = 0
94 idy = 0
95 while sums[idx] < x:
96 idx += 1
97 while sums[idy] < y:
98 idy += 1
99 if weights[idy] > weights[idx]:
100 return idy
101 else:
102 return idx
103
104 def leastload(loads):
105 """Always choose the lowest load.
106
107 If the lowest load occurs more than once, the first
108 occurance will be used. If loads has LRU ordering, this means
109 the LRU of those with the lowest load is chosen.
110 """
111 return loads.index(min(loads))
112
113 #---------------------------------------------------------------------
114 # Classes
115 #---------------------------------------------------------------------
116 # store empty default dependency:
117 MET = Dependency([])
118
119 class TaskScheduler(SessionFactory):
120 """Python TaskScheduler object.
121
122 This is the simplest object that supports msg_id based
123 DAG dependencies. *Only* task msg_ids are checked, not
124 msg_ids of jobs submitted via the MUX queue.
125
126 """
127
128 # input arguments:
129 scheme = Instance(FunctionType, default=leastload) # function for determining the destination
130 client_stream = Instance(zmqstream.ZMQStream) # client-facing stream
131 engine_stream = Instance(zmqstream.ZMQStream) # engine-facing stream
132 notifier_stream = Instance(zmqstream.ZMQStream) # hub-facing sub stream
133 mon_stream = Instance(zmqstream.ZMQStream) # hub-facing pub stream
134
135 # internals:
136 graph = Dict() # dict by msg_id of [ msg_ids that depend on key ]
137 depending = Dict() # dict by msg_id of (msg_id, raw_msg, after, follow)
138 pending = Dict() # dict by engine_uuid of submitted tasks
139 completed = Dict() # dict by engine_uuid of completed tasks
140 failed = Dict() # dict by engine_uuid of failed tasks
141 destinations = Dict() # dict by msg_id of engine_uuids where jobs ran (reverse of completed+failed)
142 clients = Dict() # dict by msg_id for who submitted the task
143 targets = List() # list of target IDENTs
144 loads = List() # list of engine loads
145 all_completed = Set() # set of all completed tasks
146 all_failed = Set() # set of all failed tasks
147 all_done = Set() # set of all finished tasks=union(completed,failed)
148 all_ids = Set() # set of all submitted task IDs
149 blacklist = Dict() # dict by msg_id of locations where a job has encountered UnmetDependency
150 auditor = Instance('zmq.eventloop.ioloop.PeriodicCallback')
151
152
153 def start(self):
154 self.engine_stream.on_recv(self.dispatch_result, copy=False)
155 self._notification_handlers = dict(
156 registration_notification = self._register_engine,
157 unregistration_notification = self._unregister_engine
158 )
159 self.notifier_stream.on_recv(self.dispatch_notification)
160 self.auditor = ioloop.PeriodicCallback(self.audit_timeouts, 2e3, self.loop) # 1 Hz
161 self.auditor.start()
162 self.log.info("Scheduler started...%r"%self)
163
164 def resume_receiving(self):
165 """Resume accepting jobs."""
166 self.client_stream.on_recv(self.dispatch_submission, copy=False)
167
168 def stop_receiving(self):
169 """Stop accepting jobs while there are no engines.
170 Leave them in the ZMQ queue."""
171 self.client_stream.on_recv(None)
172
173 #-----------------------------------------------------------------------
174 # [Un]Registration Handling
175 #-----------------------------------------------------------------------
176
177 def dispatch_notification(self, msg):
178 """dispatch register/unregister events."""
179 idents,msg = self.session.feed_identities(msg)
180 msg = self.session.unpack_message(msg)
181 msg_type = msg['msg_type']
182 handler = self._notification_handlers.get(msg_type, None)
183 if handler is None:
184 raise Exception("Unhandled message type: %s"%msg_type)
185 else:
186 try:
187 handler(str(msg['content']['queue']))
188 except KeyError:
189 self.log.error("task::Invalid notification msg: %s"%msg)
190
191 @logged
192 def _register_engine(self, uid):
193 """New engine with ident `uid` became available."""
194 # head of the line:
195 self.targets.insert(0,uid)
196 self.loads.insert(0,0)
197 # initialize sets
198 self.completed[uid] = set()
199 self.failed[uid] = set()
200 self.pending[uid] = {}
201 if len(self.targets) == 1:
202 self.resume_receiving()
203
204 def _unregister_engine(self, uid):
205 """Existing engine with ident `uid` became unavailable."""
206 if len(self.targets) == 1:
207 # this was our only engine
208 self.stop_receiving()
209
210 # handle any potentially finished tasks:
211 self.engine_stream.flush()
212
213 self.completed.pop(uid)
214 self.failed.pop(uid)
215 # don't pop destinations, because it might be used later
216 # map(self.destinations.pop, self.completed.pop(uid))
217 # map(self.destinations.pop, self.failed.pop(uid))
218
219 idx = self.targets.index(uid)
220 self.targets.pop(idx)
221 self.loads.pop(idx)
222
223 # wait 5 seconds before cleaning up pending jobs, since the results might
224 # still be incoming
225 if self.pending[uid]:
226 dc = ioloop.DelayedCallback(lambda : self.handle_stranded_tasks(uid), 5000, self.loop)
227 dc.start()
228
229 @logged
230 def handle_stranded_tasks(self, engine):
231 """Deal with jobs resident in an engine that died."""
232 lost = self.pending.pop(engine)
233
234 for msg_id, (raw_msg, targets, MET, follow, timeout) in lost.iteritems():
235 self.all_failed.add(msg_id)
236 self.all_done.add(msg_id)
237 idents,msg = self.session.feed_identities(raw_msg, copy=False)
238 msg = self.session.unpack_message(msg, copy=False, content=False)
239 parent = msg['header']
240 idents = [idents[0],engine]+idents[1:]
241 # print (idents)
242 try:
243 raise error.EngineError("Engine %r died while running task %r"%(engine, msg_id))
244 except:
245 content = error.wrap_exception()
246 msg = self.session.send(self.client_stream, 'apply_reply', content,
247 parent=parent, ident=idents)
248 self.session.send(self.mon_stream, msg, ident=['outtask']+idents)
249 self.update_graph(msg_id)
250
251
252 #-----------------------------------------------------------------------
253 # Job Submission
254 #-----------------------------------------------------------------------
255 @logged
256 def dispatch_submission(self, raw_msg):
257 """Dispatch job submission to appropriate handlers."""
258 # ensure targets up to date:
259 self.notifier_stream.flush()
260 try:
261 idents, msg = self.session.feed_identities(raw_msg, copy=False)
262 msg = self.session.unpack_message(msg, content=False, copy=False)
263 except:
264 self.log.error("task::Invaid task: %s"%raw_msg, exc_info=True)
265 return
266
267 # send to monitor
268 self.mon_stream.send_multipart(['intask']+raw_msg, copy=False)
269
270 header = msg['header']
271 msg_id = header['msg_id']
272 self.all_ids.add(msg_id)
273
274 # targets
275 targets = set(header.get('targets', []))
276
277 # time dependencies
278 after = Dependency(header.get('after', []))
279 if after.all:
280 if after.success:
281 after.difference_update(self.all_completed)
282 if after.failure:
283 after.difference_update(self.all_failed)
284 if after.check(self.all_completed, self.all_failed):
285 # recast as empty set, if `after` already met,
286 # to prevent unnecessary set comparisons
287 after = MET
288
289 # location dependencies
290 follow = Dependency(header.get('follow', []))
291
292 # turn timeouts into datetime objects:
293 timeout = header.get('timeout', None)
294 if timeout:
295 timeout = datetime.now() + timedelta(0,timeout,0)
296
297 args = [raw_msg, targets, after, follow, timeout]
298
299 # validate and reduce dependencies:
300 for dep in after,follow:
301 # check valid:
302 if msg_id in dep or dep.difference(self.all_ids):
303 self.depending[msg_id] = args
304 return self.fail_unreachable(msg_id, error.InvalidDependency)
305 # check if unreachable:
306 if dep.unreachable(self.all_completed, self.all_failed):
307 self.depending[msg_id] = args
308 return self.fail_unreachable(msg_id)
309
310 if after.check(self.all_completed, self.all_failed):
311 # time deps already met, try to run
312 if not self.maybe_run(msg_id, *args):
313 # can't run yet
314 self.save_unmet(msg_id, *args)
315 else:
316 self.save_unmet(msg_id, *args)
317
318 # @logged
319 def audit_timeouts(self):
320 """Audit all waiting tasks for expired timeouts."""
321 now = datetime.now()
322 for msg_id in self.depending.keys():
323 # must recheck, in case one failure cascaded to another:
324 if msg_id in self.depending:
325 raw,after,targets,follow,timeout = self.depending[msg_id]
326 if timeout and timeout < now:
327 self.fail_unreachable(msg_id, timeout=True)
328
329 @logged
330 def fail_unreachable(self, msg_id, why=error.ImpossibleDependency):
331 """a task has become unreachable, send a reply with an ImpossibleDependency
332 error."""
333 if msg_id not in self.depending:
334 self.log.error("msg %r already failed!"%msg_id)
335 return
336 raw_msg,targets,after,follow,timeout = self.depending.pop(msg_id)
337 for mid in follow.union(after):
338 if mid in self.graph:
339 self.graph[mid].remove(msg_id)
340
341 # FIXME: unpacking a message I've already unpacked, but didn't save:
342 idents,msg = self.session.feed_identities(raw_msg, copy=False)
343 msg = self.session.unpack_message(msg, copy=False, content=False)
344 header = msg['header']
345
346 try:
347 raise why()
348 except:
349 content = error.wrap_exception()
350
351 self.all_done.add(msg_id)
352 self.all_failed.add(msg_id)
353
354 msg = self.session.send(self.client_stream, 'apply_reply', content,
355 parent=header, ident=idents)
356 self.session.send(self.mon_stream, msg, ident=['outtask']+idents)
357
358 self.update_graph(msg_id, success=False)
359
360 @logged
361 def maybe_run(self, msg_id, raw_msg, targets, after, follow, timeout):
362 """check location dependencies, and run if they are met."""
363 blacklist = self.blacklist.setdefault(msg_id, set())
364 if follow or targets or blacklist:
365 # we need a can_run filter
366 def can_run(idx):
367 target = self.targets[idx]
368 # check targets
369 if targets and target not in targets:
370 return False
371 # check blacklist
372 if target in blacklist:
373 return False
374 # check follow
375 return follow.check(self.completed[target], self.failed[target])
376
377 indices = filter(can_run, range(len(self.targets)))
378 if not indices:
379 # couldn't run
380 if follow.all:
381 # check follow for impossibility
382 dests = set()
383 relevant = set()
384 if follow.success:
385 relevant = self.all_completed
386 if follow.failure:
387 relevant = relevant.union(self.all_failed)
388 for m in follow.intersection(relevant):
389 dests.add(self.destinations[m])
390 if len(dests) > 1:
391 self.fail_unreachable(msg_id)
392 return False
393 if targets:
394 # check blacklist+targets for impossibility
395 targets.difference_update(blacklist)
396 if not targets or not targets.intersection(self.targets):
397 self.fail_unreachable(msg_id)
398 return False
399 return False
400 else:
401 indices = None
402
403 self.submit_task(msg_id, raw_msg, targets, follow, timeout, indices)
404 return True
405
406 @logged
407 def save_unmet(self, msg_id, raw_msg, targets, after, follow, timeout):
408 """Save a message for later submission when its dependencies are met."""
409 self.depending[msg_id] = [raw_msg,targets,after,follow,timeout]
410 # track the ids in follow or after, but not those already finished
411 for dep_id in after.union(follow).difference(self.all_done):
412 if dep_id not in self.graph:
413 self.graph[dep_id] = set()
414 self.graph[dep_id].add(msg_id)
415
416 @logged
417 def submit_task(self, msg_id, raw_msg, targets, follow, timeout, indices=None):
418 """Submit a task to any of a subset of our targets."""
419 if indices:
420 loads = [self.loads[i] for i in indices]
421 else:
422 loads = self.loads
423 idx = self.scheme(loads)
424 if indices:
425 idx = indices[idx]
426 target = self.targets[idx]
427 # print (target, map(str, msg[:3]))
428 self.engine_stream.send(target, flags=zmq.SNDMORE, copy=False)
429 self.engine_stream.send_multipart(raw_msg, copy=False)
430 self.add_job(idx)
431 self.pending[target][msg_id] = (raw_msg, targets, MET, follow, timeout)
432 content = dict(msg_id=msg_id, engine_id=target)
433 self.session.send(self.mon_stream, 'task_destination', content=content,
434 ident=['tracktask',self.session.session])
435
436 #-----------------------------------------------------------------------
437 # Result Handling
438 #-----------------------------------------------------------------------
439 @logged
440 def dispatch_result(self, raw_msg):
441 """dispatch method for result replies"""
442 try:
443 idents,msg = self.session.feed_identities(raw_msg, copy=False)
444 msg = self.session.unpack_message(msg, content=False, copy=False)
445 except:
446 self.log.error("task::Invaid result: %s"%raw_msg, exc_info=True)
447 return
448
449 header = msg['header']
450 if header.get('dependencies_met', True):
451 success = (header['status'] == 'ok')
452 self.handle_result(idents, msg['parent_header'], raw_msg, success)
453 # send to Hub monitor
454 self.mon_stream.send_multipart(['outtask']+raw_msg, copy=False)
455 else:
456 self.handle_unmet_dependency(idents, msg['parent_header'])
457
458 @logged
459 def handle_result(self, idents, parent, raw_msg, success=True):
460 """handle a real task result, either success or failure"""
461 # first, relay result to client
462 engine = idents[0]
463 client = idents[1]
464 # swap_ids for XREP-XREP mirror
465 raw_msg[:2] = [client,engine]
466 # print (map(str, raw_msg[:4]))
467 self.client_stream.send_multipart(raw_msg, copy=False)
468 # now, update our data structures
469 msg_id = parent['msg_id']
470 self.blacklist.pop(msg_id, None)
471 self.pending[engine].pop(msg_id)
472 if success:
473 self.completed[engine].add(msg_id)
474 self.all_completed.add(msg_id)
475 else:
476 self.failed[engine].add(msg_id)
477 self.all_failed.add(msg_id)
478 self.all_done.add(msg_id)
479 self.destinations[msg_id] = engine
480
481 self.update_graph(msg_id, success)
482
483 @logged
484 def handle_unmet_dependency(self, idents, parent):
485 """handle an unmet dependency"""
486 engine = idents[0]
487 msg_id = parent['msg_id']
488
489 if msg_id not in self.blacklist:
490 self.blacklist[msg_id] = set()
491 self.blacklist[msg_id].add(engine)
492
493 args = self.pending[engine].pop(msg_id)
494 raw,targets,after,follow,timeout = args
495
496 if self.blacklist[msg_id] == targets:
497 self.depending[msg_id] = args
498 return self.fail_unreachable(msg_id)
499
500 elif not self.maybe_run(msg_id, *args):
501 # resubmit failed, put it back in our dependency tree
502 self.save_unmet(msg_id, *args)
503
504
505 @logged
506 def update_graph(self, dep_id, success=True):
507 """dep_id just finished. Update our dependency
508 graph and submit any jobs that just became runable."""
509 # print ("\n\n***********")
510 # pprint (dep_id)
511 # pprint (self.graph)
512 # pprint (self.depending)
513 # pprint (self.all_completed)
514 # pprint (self.all_failed)
515 # print ("\n\n***********\n\n")
516 if dep_id not in self.graph:
517 return
518 jobs = self.graph.pop(dep_id)
519
520 for msg_id in jobs:
521 raw_msg, targets, after, follow, timeout = self.depending[msg_id]
522
523 if after.unreachable(self.all_completed, self.all_failed) or follow.unreachable(self.all_completed, self.all_failed):
524 self.fail_unreachable(msg_id)
525
526 elif after.check(self.all_completed, self.all_failed): # time deps met, maybe run
527 if self.maybe_run(msg_id, raw_msg, targets, MET, follow, timeout):
528
529 self.depending.pop(msg_id)
530 for mid in follow.union(after):
531 if mid in self.graph:
532 self.graph[mid].remove(msg_id)
533
534 #----------------------------------------------------------------------
535 # methods to be overridden by subclasses
536 #----------------------------------------------------------------------
537
538 def add_job(self, idx):
539 """Called after self.targets[idx] just got the job with header.
540 Override with subclasses. The default ordering is simple LRU.
541 The default loads are the number of outstanding jobs."""
542 self.loads[idx] += 1
543 for lis in (self.targets, self.loads):
544 lis.append(lis.pop(idx))
545
546
547 def finish_job(self, idx):
548 """Called after self.targets[idx] just finished a job.
549 Override with subclasses."""
550 self.loads[idx] -= 1
551
552
553
554 def launch_scheduler(in_addr, out_addr, mon_addr, not_addr, config=None,logname='ZMQ',
555 log_addr=None, loglevel=logging.DEBUG, scheme='lru',
556 identity=b'task'):
557 from zmq.eventloop import ioloop
558 from zmq.eventloop.zmqstream import ZMQStream
559
560 ctx = zmq.Context()
561 loop = ioloop.IOLoop()
562 print (in_addr, out_addr, mon_addr, not_addr)
563 ins = ZMQStream(ctx.socket(zmq.XREP),loop)
564 ins.setsockopt(zmq.IDENTITY, identity)
565 ins.bind(in_addr)
566
567 outs = ZMQStream(ctx.socket(zmq.XREP),loop)
568 outs.setsockopt(zmq.IDENTITY, identity)
569 outs.bind(out_addr)
570 mons = ZMQStream(ctx.socket(zmq.PUB),loop)
571 mons.connect(mon_addr)
572 nots = ZMQStream(ctx.socket(zmq.SUB),loop)
573 nots.setsockopt(zmq.SUBSCRIBE, '')
574 nots.connect(not_addr)
575
576 scheme = globals().get(scheme, None)
577 # setup logging
578 if log_addr:
579 connect_logger(logname, ctx, log_addr, root="scheduler", loglevel=loglevel)
580 else:
581 local_logger(logname, loglevel)
582
583 scheduler = TaskScheduler(client_stream=ins, engine_stream=outs,
584 mon_stream=mons, notifier_stream=nots,
585 scheme=scheme, loop=loop, logname=logname,
586 config=config)
587 scheduler.start()
588 try:
589 loop.start()
590 except KeyboardInterrupt:
591 print ("interrupted, exiting...", file=sys.__stderr__)
592
@@ -0,0 +1,284 b''
1 """A TaskRecord backend using sqlite3"""
2 #-----------------------------------------------------------------------------
3 # Copyright (C) 2011 The IPython Development Team
4 #
5 # Distributed under the terms of the BSD License. The full license is in
6 # the file COPYING, distributed as part of this software.
7 #-----------------------------------------------------------------------------
8
9 import json
10 import os
11 import cPickle as pickle
12 from datetime import datetime
13
14 import sqlite3
15
16 from zmq.eventloop import ioloop
17
18 from IPython.utils.traitlets import CUnicode, CStr, Instance, List
19 from .dictdb import BaseDB
20 from IPython.parallel.util import ISO8601
21
22 #-----------------------------------------------------------------------------
23 # SQLite operators, adapters, and converters
24 #-----------------------------------------------------------------------------
25
26 operators = {
27 '$lt' : lambda a,b: "%s < ?",
28 '$gt' : ">",
29 # null is handled weird with ==,!=
30 '$eq' : "IS",
31 '$ne' : "IS NOT",
32 '$lte': "<=",
33 '$gte': ">=",
34 '$in' : ('IS', ' OR '),
35 '$nin': ('IS NOT', ' AND '),
36 # '$all': None,
37 # '$mod': None,
38 # '$exists' : None
39 }
40
41 def _adapt_datetime(dt):
42 return dt.strftime(ISO8601)
43
44 def _convert_datetime(ds):
45 if ds is None:
46 return ds
47 else:
48 return datetime.strptime(ds, ISO8601)
49
50 def _adapt_dict(d):
51 return json.dumps(d)
52
53 def _convert_dict(ds):
54 if ds is None:
55 return ds
56 else:
57 return json.loads(ds)
58
59 def _adapt_bufs(bufs):
60 # this is *horrible*
61 # copy buffers into single list and pickle it:
62 if bufs and isinstance(bufs[0], (bytes, buffer)):
63 return sqlite3.Binary(pickle.dumps(map(bytes, bufs),-1))
64 elif bufs:
65 return bufs
66 else:
67 return None
68
69 def _convert_bufs(bs):
70 if bs is None:
71 return []
72 else:
73 return pickle.loads(bytes(bs))
74
75 #-----------------------------------------------------------------------------
76 # SQLiteDB class
77 #-----------------------------------------------------------------------------
78
79 class SQLiteDB(BaseDB):
80 """SQLite3 TaskRecord backend."""
81
82 filename = CUnicode('tasks.db', config=True)
83 location = CUnicode('', config=True)
84 table = CUnicode("", config=True)
85
86 _db = Instance('sqlite3.Connection')
87 _keys = List(['msg_id' ,
88 'header' ,
89 'content',
90 'buffers',
91 'submitted',
92 'client_uuid' ,
93 'engine_uuid' ,
94 'started',
95 'completed',
96 'resubmitted',
97 'result_header' ,
98 'result_content' ,
99 'result_buffers' ,
100 'queue' ,
101 'pyin' ,
102 'pyout',
103 'pyerr',
104 'stdout',
105 'stderr',
106 ])
107
108 def __init__(self, **kwargs):
109 super(SQLiteDB, self).__init__(**kwargs)
110 if not self.table:
111 # use session, and prefix _, since starting with # is illegal
112 self.table = '_'+self.session.replace('-','_')
113 if not self.location:
114 if hasattr(self.config.Global, 'cluster_dir'):
115 self.location = self.config.Global.cluster_dir
116 else:
117 self.location = '.'
118 self._init_db()
119
120 # register db commit as 2s periodic callback
121 # to prevent clogging pipes
122 # assumes we are being run in a zmq ioloop app
123 loop = ioloop.IOLoop.instance()
124 pc = ioloop.PeriodicCallback(self._db.commit, 2000, loop)
125 pc.start()
126
127 def _defaults(self):
128 """create an empty record"""
129 d = {}
130 for key in self._keys:
131 d[key] = None
132 return d
133
134 def _init_db(self):
135 """Connect to the database and get new session number."""
136 # register adapters
137 sqlite3.register_adapter(datetime, _adapt_datetime)
138 sqlite3.register_converter('datetime', _convert_datetime)
139 sqlite3.register_adapter(dict, _adapt_dict)
140 sqlite3.register_converter('dict', _convert_dict)
141 sqlite3.register_adapter(list, _adapt_bufs)
142 sqlite3.register_converter('bufs', _convert_bufs)
143 # connect to the db
144 dbfile = os.path.join(self.location, self.filename)
145 self._db = sqlite3.connect(dbfile, detect_types=sqlite3.PARSE_DECLTYPES,
146 # isolation_level = None)#,
147 cached_statements=64)
148 # print dir(self._db)
149
150 self._db.execute("""CREATE TABLE IF NOT EXISTS %s
151 (msg_id text PRIMARY KEY,
152 header dict text,
153 content dict text,
154 buffers bufs blob,
155 submitted datetime text,
156 client_uuid text,
157 engine_uuid text,
158 started datetime text,
159 completed datetime text,
160 resubmitted datetime text,
161 result_header dict text,
162 result_content dict text,
163 result_buffers bufs blob,
164 queue text,
165 pyin text,
166 pyout text,
167 pyerr text,
168 stdout text,
169 stderr text)
170 """%self.table)
171 # self._db.execute("""CREATE TABLE IF NOT EXISTS %s_buffers
172 # (msg_id text, result integer, buffer blob)
173 # """%self.table)
174 self._db.commit()
175
176 def _dict_to_list(self, d):
177 """turn a mongodb-style record dict into a list."""
178
179 return [ d[key] for key in self._keys ]
180
181 def _list_to_dict(self, line):
182 """Inverse of dict_to_list"""
183 d = self._defaults()
184 for key,value in zip(self._keys, line):
185 d[key] = value
186
187 return d
188
189 def _render_expression(self, check):
190 """Turn a mongodb-style search dict into an SQL query."""
191 expressions = []
192 args = []
193
194 skeys = set(check.keys())
195 skeys.difference_update(set(self._keys))
196 skeys.difference_update(set(['buffers', 'result_buffers']))
197 if skeys:
198 raise KeyError("Illegal testing key(s): %s"%skeys)
199
200 for name,sub_check in check.iteritems():
201 if isinstance(sub_check, dict):
202 for test,value in sub_check.iteritems():
203 try:
204 op = operators[test]
205 except KeyError:
206 raise KeyError("Unsupported operator: %r"%test)
207 if isinstance(op, tuple):
208 op, join = op
209 expr = "%s %s ?"%(name, op)
210 if isinstance(value, (tuple,list)):
211 expr = '( %s )'%( join.join([expr]*len(value)) )
212 args.extend(value)
213 else:
214 args.append(value)
215 expressions.append(expr)
216 else:
217 # it's an equality check
218 expressions.append("%s IS ?"%name)
219 args.append(sub_check)
220
221 expr = " AND ".join(expressions)
222 return expr, args
223
224 def add_record(self, msg_id, rec):
225 """Add a new Task Record, by msg_id."""
226 d = self._defaults()
227 d.update(rec)
228 d['msg_id'] = msg_id
229 line = self._dict_to_list(d)
230 tups = '(%s)'%(','.join(['?']*len(line)))
231 self._db.execute("INSERT INTO %s VALUES %s"%(self.table, tups), line)
232 # self._db.commit()
233
234 def get_record(self, msg_id):
235 """Get a specific Task Record, by msg_id."""
236 cursor = self._db.execute("""SELECT * FROM %s WHERE msg_id==?"""%self.table, (msg_id,))
237 line = cursor.fetchone()
238 if line is None:
239 raise KeyError("No such msg: %r"%msg_id)
240 return self._list_to_dict(line)
241
242 def update_record(self, msg_id, rec):
243 """Update the data in an existing record."""
244 query = "UPDATE %s SET "%self.table
245 sets = []
246 keys = sorted(rec.keys())
247 values = []
248 for key in keys:
249 sets.append('%s = ?'%key)
250 values.append(rec[key])
251 query += ', '.join(sets)
252 query += ' WHERE msg_id == %r'%msg_id
253 self._db.execute(query, values)
254 # self._db.commit()
255
256 def drop_record(self, msg_id):
257 """Remove a record from the DB."""
258 self._db.execute("""DELETE FROM %s WHERE mgs_id==?"""%self.table, (msg_id,))
259 # self._db.commit()
260
261 def drop_matching_records(self, check):
262 """Remove a record from the DB."""
263 expr,args = self._render_expression(check)
264 query = "DELETE FROM %s WHERE %s"%(self.table, expr)
265 self._db.execute(query,args)
266 # self._db.commit()
267
268 def find_records(self, check, id_only=False):
269 """Find records matching a query dict."""
270 req = 'msg_id' if id_only else '*'
271 expr,args = self._render_expression(check)
272 query = """SELECT %s FROM %s WHERE %s"""%(req, self.table, expr)
273 cursor = self._db.execute(query, args)
274 matches = cursor.fetchall()
275 if id_only:
276 return [ m[0] for m in matches ]
277 else:
278 records = {}
279 for line in matches:
280 rec = self._list_to_dict(line)
281 records[rec['msg_id']] = rec
282 return records
283
284 __all__ = ['SQLiteDB'] No newline at end of file
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
@@ -0,0 +1,156 b''
1 #!/usr/bin/env python
2 """A simple engine that talks to a controller over 0MQ.
3 it handles registration, etc. and launches a kernel
4 connected to the Controller's Schedulers.
5 """
6 #-----------------------------------------------------------------------------
7 # Copyright (C) 2010-2011 The IPython Development Team
8 #
9 # Distributed under the terms of the BSD License. The full license is in
10 # the file COPYING, distributed as part of this software.
11 #-----------------------------------------------------------------------------
12
13 from __future__ import print_function
14
15 import sys
16 import time
17
18 import zmq
19 from zmq.eventloop import ioloop, zmqstream
20
21 # internal
22 from IPython.utils.traitlets import Instance, Str, Dict, Int, Type, CFloat
23 # from IPython.utils.localinterfaces import LOCALHOST
24
25 from IPython.parallel.controller.heartmonitor import Heart
26 from IPython.parallel.factory import RegistrationFactory
27 from IPython.parallel.streamsession import Message
28 from IPython.parallel.util import disambiguate_url
29
30 from .streamkernel import Kernel
31
32 class EngineFactory(RegistrationFactory):
33 """IPython engine"""
34
35 # configurables:
36 user_ns=Dict(config=True)
37 out_stream_factory=Type('IPython.zmq.iostream.OutStream', config=True)
38 display_hook_factory=Type('IPython.zmq.displayhook.DisplayHook', config=True)
39 location=Str(config=True)
40 timeout=CFloat(2,config=True)
41
42 # not configurable:
43 id=Int(allow_none=True)
44 registrar=Instance('zmq.eventloop.zmqstream.ZMQStream')
45 kernel=Instance(Kernel)
46
47
48 def __init__(self, **kwargs):
49 super(EngineFactory, self).__init__(**kwargs)
50 ctx = self.context
51
52 reg = ctx.socket(zmq.XREQ)
53 reg.setsockopt(zmq.IDENTITY, self.ident)
54 reg.connect(self.url)
55 self.registrar = zmqstream.ZMQStream(reg, self.loop)
56
57 def register(self):
58 """send the registration_request"""
59
60 self.log.info("registering")
61 content = dict(queue=self.ident, heartbeat=self.ident, control=self.ident)
62 self.registrar.on_recv(self.complete_registration)
63 # print (self.session.key)
64 self.session.send(self.registrar, "registration_request",content=content)
65
66 def complete_registration(self, msg):
67 # print msg
68 self._abort_dc.stop()
69 ctx = self.context
70 loop = self.loop
71 identity = self.ident
72
73 idents,msg = self.session.feed_identities(msg)
74 msg = Message(self.session.unpack_message(msg))
75
76 if msg.content.status == 'ok':
77 self.id = int(msg.content.id)
78
79 # create Shell Streams (MUX, Task, etc.):
80 queue_addr = msg.content.mux
81 shell_addrs = [ str(queue_addr) ]
82 task_addr = msg.content.task
83 if task_addr:
84 shell_addrs.append(str(task_addr))
85
86 # Uncomment this to go back to two-socket model
87 # shell_streams = []
88 # for addr in shell_addrs:
89 # stream = zmqstream.ZMQStream(ctx.socket(zmq.XREP), loop)
90 # stream.setsockopt(zmq.IDENTITY, identity)
91 # stream.connect(disambiguate_url(addr, self.location))
92 # shell_streams.append(stream)
93
94 # Now use only one shell stream for mux and tasks
95 stream = zmqstream.ZMQStream(ctx.socket(zmq.XREP), loop)
96 stream.setsockopt(zmq.IDENTITY, identity)
97 shell_streams = [stream]
98 for addr in shell_addrs:
99 stream.connect(disambiguate_url(addr, self.location))
100 # end single stream-socket
101
102 # control stream:
103 control_addr = str(msg.content.control)
104 control_stream = zmqstream.ZMQStream(ctx.socket(zmq.XREP), loop)
105 control_stream.setsockopt(zmq.IDENTITY, identity)
106 control_stream.connect(disambiguate_url(control_addr, self.location))
107
108 # create iopub stream:
109 iopub_addr = msg.content.iopub
110 iopub_stream = zmqstream.ZMQStream(ctx.socket(zmq.PUB), loop)
111 iopub_stream.setsockopt(zmq.IDENTITY, identity)
112 iopub_stream.connect(disambiguate_url(iopub_addr, self.location))
113
114 # launch heartbeat
115 hb_addrs = msg.content.heartbeat
116 # print (hb_addrs)
117
118 # # Redirect input streams and set a display hook.
119 if self.out_stream_factory:
120 sys.stdout = self.out_stream_factory(self.session, iopub_stream, u'stdout')
121 sys.stdout.topic = 'engine.%i.stdout'%self.id
122 sys.stderr = self.out_stream_factory(self.session, iopub_stream, u'stderr')
123 sys.stderr.topic = 'engine.%i.stderr'%self.id
124 if self.display_hook_factory:
125 sys.displayhook = self.display_hook_factory(self.session, iopub_stream)
126 sys.displayhook.topic = 'engine.%i.pyout'%self.id
127
128 self.kernel = Kernel(config=self.config, int_id=self.id, ident=self.ident, session=self.session,
129 control_stream=control_stream, shell_streams=shell_streams, iopub_stream=iopub_stream,
130 loop=loop, user_ns = self.user_ns, logname=self.log.name)
131 self.kernel.start()
132 hb_addrs = [ disambiguate_url(addr, self.location) for addr in hb_addrs ]
133 heart = Heart(*map(str, hb_addrs), heart_id=identity)
134 # ioloop.DelayedCallback(heart.start, 1000, self.loop).start()
135 heart.start()
136
137
138 else:
139 self.log.fatal("Registration Failed: %s"%msg)
140 raise Exception("Registration Failed: %s"%msg)
141
142 self.log.info("Completed registration with id %i"%self.id)
143
144
145 def abort(self):
146 self.log.fatal("Registration timed out")
147 self.session.send(self.registrar, "unregistration_request", content=dict(id=self.id))
148 time.sleep(1)
149 sys.exit(255)
150
151 def start(self):
152 dc = ioloop.DelayedCallback(self.register, 0, self.loop)
153 dc.start()
154 self._abort_dc = ioloop.DelayedCallback(self.abort, self.timeout*1000, self.loop)
155 self._abort_dc.start()
156
@@ -0,0 +1,225 b''
1 """KernelStarter class that intercepts Control Queue messages, and handles process management."""
2 #-----------------------------------------------------------------------------
3 # Copyright (C) 2010-2011 The IPython Development Team
4 #
5 # Distributed under the terms of the BSD License. The full license is in
6 # the file COPYING, distributed as part of this software.
7 #-----------------------------------------------------------------------------
8
9 from zmq.eventloop import ioloop
10
11 from IPython.parallel.streamsession import StreamSession
12
13 class KernelStarter(object):
14 """Object for resetting/killing the Kernel."""
15
16
17 def __init__(self, session, upstream, downstream, *kernel_args, **kernel_kwargs):
18 self.session = session
19 self.upstream = upstream
20 self.downstream = downstream
21 self.kernel_args = kernel_args
22 self.kernel_kwargs = kernel_kwargs
23 self.handlers = {}
24 for method in 'shutdown_request shutdown_reply'.split():
25 self.handlers[method] = getattr(self, method)
26
27 def start(self):
28 self.upstream.on_recv(self.dispatch_request)
29 self.downstream.on_recv(self.dispatch_reply)
30
31 #--------------------------------------------------------------------------
32 # Dispatch methods
33 #--------------------------------------------------------------------------
34
35 def dispatch_request(self, raw_msg):
36 idents, msg = self.session.feed_identities()
37 try:
38 msg = self.session.unpack_message(msg, content=False)
39 except:
40 print ("bad msg: %s"%msg)
41
42 msgtype = msg['msg_type']
43 handler = self.handlers.get(msgtype, None)
44 if handler is None:
45 self.downstream.send_multipart(raw_msg, copy=False)
46 else:
47 handler(msg)
48
49 def dispatch_reply(self, raw_msg):
50 idents, msg = self.session.feed_identities()
51 try:
52 msg = self.session.unpack_message(msg, content=False)
53 except:
54 print ("bad msg: %s"%msg)
55
56 msgtype = msg['msg_type']
57 handler = self.handlers.get(msgtype, None)
58 if handler is None:
59 self.upstream.send_multipart(raw_msg, copy=False)
60 else:
61 handler(msg)
62
63 #--------------------------------------------------------------------------
64 # Handlers
65 #--------------------------------------------------------------------------
66
67 def shutdown_request(self, msg):
68 """"""
69 self.downstream.send_multipart(msg)
70
71 #--------------------------------------------------------------------------
72 # Kernel process management methods, from KernelManager:
73 #--------------------------------------------------------------------------
74
75 def _check_local(addr):
76 if isinstance(addr, tuple):
77 addr = addr[0]
78 return addr in LOCAL_IPS
79
80 def start_kernel(self, **kw):
81 """Starts a kernel process and configures the manager to use it.
82
83 If random ports (port=0) are being used, this method must be called
84 before the channels are created.
85
86 Parameters:
87 -----------
88 ipython : bool, optional (default True)
89 Whether to use an IPython kernel instead of a plain Python kernel.
90 """
91 self.kernel = Process(target=make_kernel, args=self.kernel_args,
92 kwargs=self.kernel_kwargs)
93
94 def shutdown_kernel(self, restart=False):
95 """ Attempts to the stop the kernel process cleanly. If the kernel
96 cannot be stopped, it is killed, if possible.
97 """
98 # FIXME: Shutdown does not work on Windows due to ZMQ errors!
99 if sys.platform == 'win32':
100 self.kill_kernel()
101 return
102
103 # Don't send any additional kernel kill messages immediately, to give
104 # the kernel a chance to properly execute shutdown actions. Wait for at
105 # most 1s, checking every 0.1s.
106 self.xreq_channel.shutdown(restart=restart)
107 for i in range(10):
108 if self.is_alive:
109 time.sleep(0.1)
110 else:
111 break
112 else:
113 # OK, we've waited long enough.
114 if self.has_kernel:
115 self.kill_kernel()
116
117 def restart_kernel(self, now=False):
118 """Restarts a kernel with the same arguments that were used to launch
119 it. If the old kernel was launched with random ports, the same ports
120 will be used for the new kernel.
121
122 Parameters
123 ----------
124 now : bool, optional
125 If True, the kernel is forcefully restarted *immediately*, without
126 having a chance to do any cleanup action. Otherwise the kernel is
127 given 1s to clean up before a forceful restart is issued.
128
129 In all cases the kernel is restarted, the only difference is whether
130 it is given a chance to perform a clean shutdown or not.
131 """
132 if self._launch_args is None:
133 raise RuntimeError("Cannot restart the kernel. "
134 "No previous call to 'start_kernel'.")
135 else:
136 if self.has_kernel:
137 if now:
138 self.kill_kernel()
139 else:
140 self.shutdown_kernel(restart=True)
141 self.start_kernel(**self._launch_args)
142
143 # FIXME: Messages get dropped in Windows due to probable ZMQ bug
144 # unless there is some delay here.
145 if sys.platform == 'win32':
146 time.sleep(0.2)
147
148 @property
149 def has_kernel(self):
150 """Returns whether a kernel process has been specified for the kernel
151 manager.
152 """
153 return self.kernel is not None
154
155 def kill_kernel(self):
156 """ Kill the running kernel. """
157 if self.has_kernel:
158 # Pause the heart beat channel if it exists.
159 if self._hb_channel is not None:
160 self._hb_channel.pause()
161
162 # Attempt to kill the kernel.
163 try:
164 self.kernel.kill()
165 except OSError, e:
166 # In Windows, we will get an Access Denied error if the process
167 # has already terminated. Ignore it.
168 if not (sys.platform == 'win32' and e.winerror == 5):
169 raise
170 self.kernel = None
171 else:
172 raise RuntimeError("Cannot kill kernel. No kernel is running!")
173
174 def interrupt_kernel(self):
175 """ Interrupts the kernel. Unlike ``signal_kernel``, this operation is
176 well supported on all platforms.
177 """
178 if self.has_kernel:
179 if sys.platform == 'win32':
180 from parentpoller import ParentPollerWindows as Poller
181 Poller.send_interrupt(self.kernel.win32_interrupt_event)
182 else:
183 self.kernel.send_signal(signal.SIGINT)
184 else:
185 raise RuntimeError("Cannot interrupt kernel. No kernel is running!")
186
187 def signal_kernel(self, signum):
188 """ Sends a signal to the kernel. Note that since only SIGTERM is
189 supported on Windows, this function is only useful on Unix systems.
190 """
191 if self.has_kernel:
192 self.kernel.send_signal(signum)
193 else:
194 raise RuntimeError("Cannot signal kernel. No kernel is running!")
195
196 @property
197 def is_alive(self):
198 """Is the kernel process still running?"""
199 # FIXME: not using a heartbeat means this method is broken for any
200 # remote kernel, it's only capable of handling local kernels.
201 if self.has_kernel:
202 if self.kernel.poll() is None:
203 return True
204 else:
205 return False
206 else:
207 # We didn't start the kernel with this KernelManager so we don't
208 # know if it is running. We should use a heartbeat for this case.
209 return True
210
211
212 def make_starter(up_addr, down_addr, *args, **kwargs):
213 """entry point function for launching a kernelstarter in a subprocess"""
214 loop = ioloop.IOLoop.instance()
215 ctx = zmq.Context()
216 session = StreamSession()
217 upstream = zmqstream.ZMQStream(ctx.socket(zmq.XREQ),loop)
218 upstream.connect(up_addr)
219 downstream = zmqstream.ZMQStream(ctx.socket(zmq.XREQ),loop)
220 downstream.connect(down_addr)
221
222 starter = KernelStarter(session, upstream, downstream, *args, **kwargs)
223 starter.start()
224 loop.start()
225 No newline at end of file
@@ -0,0 +1,423 b''
1 #!/usr/bin/env python
2 """
3 Kernel adapted from kernel.py to use ZMQ Streams
4 """
5 #-----------------------------------------------------------------------------
6 # Copyright (C) 2010-2011 The IPython Development Team
7 #
8 # Distributed under the terms of the BSD License. The full license is in
9 # the file COPYING, distributed as part of this software.
10 #-----------------------------------------------------------------------------
11
12 #-----------------------------------------------------------------------------
13 # Imports
14 #-----------------------------------------------------------------------------
15
16 # Standard library imports.
17 from __future__ import print_function
18
19 import sys
20 import time
21
22 from code import CommandCompiler
23 from datetime import datetime
24 from pprint import pprint
25
26 # System library imports.
27 import zmq
28 from zmq.eventloop import ioloop, zmqstream
29
30 # Local imports.
31 from IPython.utils.traitlets import Instance, List, Int, Dict, Set, Str
32 from IPython.zmq.completer import KernelCompleter
33
34 from IPython.parallel.error import wrap_exception
35 from IPython.parallel.factory import SessionFactory
36 from IPython.parallel.util import serialize_object, unpack_apply_message, ISO8601
37
38 def printer(*args):
39 pprint(args, stream=sys.__stdout__)
40
41
42 class _Passer:
43 """Empty class that implements `send()` that does nothing."""
44 def send(self, *args, **kwargs):
45 pass
46 send_multipart = send
47
48
49 #-----------------------------------------------------------------------------
50 # Main kernel class
51 #-----------------------------------------------------------------------------
52
53 class Kernel(SessionFactory):
54
55 #---------------------------------------------------------------------------
56 # Kernel interface
57 #---------------------------------------------------------------------------
58
59 # kwargs:
60 int_id = Int(-1, config=True)
61 user_ns = Dict(config=True)
62 exec_lines = List(config=True)
63
64 control_stream = Instance(zmqstream.ZMQStream)
65 task_stream = Instance(zmqstream.ZMQStream)
66 iopub_stream = Instance(zmqstream.ZMQStream)
67 client = Instance('IPython.parallel.Client')
68
69 # internals
70 shell_streams = List()
71 compiler = Instance(CommandCompiler, (), {})
72 completer = Instance(KernelCompleter)
73
74 aborted = Set()
75 shell_handlers = Dict()
76 control_handlers = Dict()
77
78 def _set_prefix(self):
79 self.prefix = "engine.%s"%self.int_id
80
81 def _connect_completer(self):
82 self.completer = KernelCompleter(self.user_ns)
83
84 def __init__(self, **kwargs):
85 super(Kernel, self).__init__(**kwargs)
86 self._set_prefix()
87 self._connect_completer()
88
89 self.on_trait_change(self._set_prefix, 'id')
90 self.on_trait_change(self._connect_completer, 'user_ns')
91
92 # Build dict of handlers for message types
93 for msg_type in ['execute_request', 'complete_request', 'apply_request',
94 'clear_request']:
95 self.shell_handlers[msg_type] = getattr(self, msg_type)
96
97 for msg_type in ['shutdown_request', 'abort_request']+self.shell_handlers.keys():
98 self.control_handlers[msg_type] = getattr(self, msg_type)
99
100 self._initial_exec_lines()
101
102 def _wrap_exception(self, method=None):
103 e_info = dict(engine_uuid=self.ident, engine_id=self.int_id, method=method)
104 content=wrap_exception(e_info)
105 return content
106
107 def _initial_exec_lines(self):
108 s = _Passer()
109 content = dict(silent=True, user_variable=[],user_expressions=[])
110 for line in self.exec_lines:
111 self.log.debug("executing initialization: %s"%line)
112 content.update({'code':line})
113 msg = self.session.msg('execute_request', content)
114 self.execute_request(s, [], msg)
115
116
117 #-------------------- control handlers -----------------------------
118 def abort_queues(self):
119 for stream in self.shell_streams:
120 if stream:
121 self.abort_queue(stream)
122
123 def abort_queue(self, stream):
124 while True:
125 try:
126 msg = self.session.recv(stream, zmq.NOBLOCK,content=True)
127 except zmq.ZMQError as e:
128 if e.errno == zmq.EAGAIN:
129 break
130 else:
131 return
132 else:
133 if msg is None:
134 return
135 else:
136 idents,msg = msg
137
138 # assert self.reply_socketly_socket.rcvmore(), "Unexpected missing message part."
139 # msg = self.reply_socket.recv_json()
140 self.log.info("Aborting:")
141 self.log.info(str(msg))
142 msg_type = msg['msg_type']
143 reply_type = msg_type.split('_')[0] + '_reply'
144 # reply_msg = self.session.msg(reply_type, {'status' : 'aborted'}, msg)
145 # self.reply_socket.send(ident,zmq.SNDMORE)
146 # self.reply_socket.send_json(reply_msg)
147 reply_msg = self.session.send(stream, reply_type,
148 content={'status' : 'aborted'}, parent=msg, ident=idents)[0]
149 self.log.debug(str(reply_msg))
150 # We need to wait a bit for requests to come in. This can probably
151 # be set shorter for true asynchronous clients.
152 time.sleep(0.05)
153
154 def abort_request(self, stream, ident, parent):
155 """abort a specifig msg by id"""
156 msg_ids = parent['content'].get('msg_ids', None)
157 if isinstance(msg_ids, basestring):
158 msg_ids = [msg_ids]
159 if not msg_ids:
160 self.abort_queues()
161 for mid in msg_ids:
162 self.aborted.add(str(mid))
163
164 content = dict(status='ok')
165 reply_msg = self.session.send(stream, 'abort_reply', content=content,
166 parent=parent, ident=ident)
167 self.log.debug(str(reply_msg))
168
169 def shutdown_request(self, stream, ident, parent):
170 """kill ourself. This should really be handled in an external process"""
171 try:
172 self.abort_queues()
173 except:
174 content = self._wrap_exception('shutdown')
175 else:
176 content = dict(parent['content'])
177 content['status'] = 'ok'
178 msg = self.session.send(stream, 'shutdown_reply',
179 content=content, parent=parent, ident=ident)
180 self.log.debug(str(msg))
181 dc = ioloop.DelayedCallback(lambda : sys.exit(0), 1000, self.loop)
182 dc.start()
183
184 def dispatch_control(self, msg):
185 idents,msg = self.session.feed_identities(msg, copy=False)
186 try:
187 msg = self.session.unpack_message(msg, content=True, copy=False)
188 except:
189 self.log.error("Invalid Message", exc_info=True)
190 return
191
192 header = msg['header']
193 msg_id = header['msg_id']
194
195 handler = self.control_handlers.get(msg['msg_type'], None)
196 if handler is None:
197 self.log.error("UNKNOWN CONTROL MESSAGE TYPE: %r"%msg['msg_type'])
198 else:
199 handler(self.control_stream, idents, msg)
200
201
202 #-------------------- queue helpers ------------------------------
203
204 def check_dependencies(self, dependencies):
205 if not dependencies:
206 return True
207 if len(dependencies) == 2 and dependencies[0] in 'any all'.split():
208 anyorall = dependencies[0]
209 dependencies = dependencies[1]
210 else:
211 anyorall = 'all'
212 results = self.client.get_results(dependencies,status_only=True)
213 if results['status'] != 'ok':
214 return False
215
216 if anyorall == 'any':
217 if not results['completed']:
218 return False
219 else:
220 if results['pending']:
221 return False
222
223 return True
224
225 def check_aborted(self, msg_id):
226 return msg_id in self.aborted
227
228 #-------------------- queue handlers -----------------------------
229
230 def clear_request(self, stream, idents, parent):
231 """Clear our namespace."""
232 self.user_ns = {}
233 msg = self.session.send(stream, 'clear_reply', ident=idents, parent=parent,
234 content = dict(status='ok'))
235 self._initial_exec_lines()
236
237 def execute_request(self, stream, ident, parent):
238 self.log.debug('execute request %s'%parent)
239 try:
240 code = parent[u'content'][u'code']
241 except:
242 self.log.error("Got bad msg: %s"%parent, exc_info=True)
243 return
244 self.session.send(self.iopub_stream, u'pyin', {u'code':code},parent=parent,
245 ident='%s.pyin'%self.prefix)
246 started = datetime.now().strftime(ISO8601)
247 try:
248 comp_code = self.compiler(code, '<zmq-kernel>')
249 # allow for not overriding displayhook
250 if hasattr(sys.displayhook, 'set_parent'):
251 sys.displayhook.set_parent(parent)
252 sys.stdout.set_parent(parent)
253 sys.stderr.set_parent(parent)
254 exec comp_code in self.user_ns, self.user_ns
255 except:
256 exc_content = self._wrap_exception('execute')
257 # exc_msg = self.session.msg(u'pyerr', exc_content, parent)
258 self.session.send(self.iopub_stream, u'pyerr', exc_content, parent=parent,
259 ident='%s.pyerr'%self.prefix)
260 reply_content = exc_content
261 else:
262 reply_content = {'status' : 'ok'}
263
264 reply_msg = self.session.send(stream, u'execute_reply', reply_content, parent=parent,
265 ident=ident, subheader = dict(started=started))
266 self.log.debug(str(reply_msg))
267 if reply_msg['content']['status'] == u'error':
268 self.abort_queues()
269
270 def complete_request(self, stream, ident, parent):
271 matches = {'matches' : self.complete(parent),
272 'status' : 'ok'}
273 completion_msg = self.session.send(stream, 'complete_reply',
274 matches, parent, ident)
275 # print >> sys.__stdout__, completion_msg
276
277 def complete(self, msg):
278 return self.completer.complete(msg.content.line, msg.content.text)
279
280 def apply_request(self, stream, ident, parent):
281 # flush previous reply, so this request won't block it
282 stream.flush(zmq.POLLOUT)
283
284 try:
285 content = parent[u'content']
286 bufs = parent[u'buffers']
287 msg_id = parent['header']['msg_id']
288 # bound = parent['header'].get('bound', False)
289 except:
290 self.log.error("Got bad msg: %s"%parent, exc_info=True)
291 return
292 # pyin_msg = self.session.msg(u'pyin',{u'code':code}, parent=parent)
293 # self.iopub_stream.send(pyin_msg)
294 # self.session.send(self.iopub_stream, u'pyin', {u'code':code},parent=parent)
295 sub = {'dependencies_met' : True, 'engine' : self.ident,
296 'started': datetime.now().strftime(ISO8601)}
297 try:
298 # allow for not overriding displayhook
299 if hasattr(sys.displayhook, 'set_parent'):
300 sys.displayhook.set_parent(parent)
301 sys.stdout.set_parent(parent)
302 sys.stderr.set_parent(parent)
303 # exec "f(*args,**kwargs)" in self.user_ns, self.user_ns
304 working = self.user_ns
305 # suffix =
306 prefix = "_"+str(msg_id).replace("-","")+"_"
307
308 f,args,kwargs = unpack_apply_message(bufs, working, copy=False)
309 # if bound:
310 # bound_ns = Namespace(working)
311 # args = [bound_ns]+list(args)
312
313 fname = getattr(f, '__name__', 'f')
314
315 fname = prefix+"f"
316 argname = prefix+"args"
317 kwargname = prefix+"kwargs"
318 resultname = prefix+"result"
319
320 ns = { fname : f, argname : args, kwargname : kwargs , resultname : None }
321 # print ns
322 working.update(ns)
323 code = "%s=%s(*%s,**%s)"%(resultname, fname, argname, kwargname)
324 try:
325 exec code in working,working
326 result = working.get(resultname)
327 finally:
328 for key in ns.iterkeys():
329 working.pop(key)
330 # if bound:
331 # working.update(bound_ns)
332
333 packed_result,buf = serialize_object(result)
334 result_buf = [packed_result]+buf
335 except:
336 exc_content = self._wrap_exception('apply')
337 # exc_msg = self.session.msg(u'pyerr', exc_content, parent)
338 self.session.send(self.iopub_stream, u'pyerr', exc_content, parent=parent,
339 ident='%s.pyerr'%self.prefix)
340 reply_content = exc_content
341 result_buf = []
342
343 if exc_content['ename'] == 'UnmetDependency':
344 sub['dependencies_met'] = False
345 else:
346 reply_content = {'status' : 'ok'}
347
348 # put 'ok'/'error' status in header, for scheduler introspection:
349 sub['status'] = reply_content['status']
350
351 reply_msg = self.session.send(stream, u'apply_reply', reply_content,
352 parent=parent, ident=ident,buffers=result_buf, subheader=sub)
353
354 # flush i/o
355 # should this be before reply_msg is sent, like in the single-kernel code,
356 # or should nothing get in the way of real results?
357 sys.stdout.flush()
358 sys.stderr.flush()
359
360 def dispatch_queue(self, stream, msg):
361 self.control_stream.flush()
362 idents,msg = self.session.feed_identities(msg, copy=False)
363 try:
364 msg = self.session.unpack_message(msg, content=True, copy=False)
365 except:
366 self.log.error("Invalid Message", exc_info=True)
367 return
368
369
370 header = msg['header']
371 msg_id = header['msg_id']
372 if self.check_aborted(msg_id):
373 self.aborted.remove(msg_id)
374 # is it safe to assume a msg_id will not be resubmitted?
375 reply_type = msg['msg_type'].split('_')[0] + '_reply'
376 reply_msg = self.session.send(stream, reply_type,
377 content={'status' : 'aborted'}, parent=msg, ident=idents)
378 return
379 handler = self.shell_handlers.get(msg['msg_type'], None)
380 if handler is None:
381 self.log.error("UNKNOWN MESSAGE TYPE: %r"%msg['msg_type'])
382 else:
383 handler(stream, idents, msg)
384
385 def start(self):
386 #### stream mode:
387 if self.control_stream:
388 self.control_stream.on_recv(self.dispatch_control, copy=False)
389 self.control_stream.on_err(printer)
390
391 def make_dispatcher(stream):
392 def dispatcher(msg):
393 return self.dispatch_queue(stream, msg)
394 return dispatcher
395
396 for s in self.shell_streams:
397 s.on_recv(make_dispatcher(s), copy=False)
398 s.on_err(printer)
399
400 if self.iopub_stream:
401 self.iopub_stream.on_err(printer)
402
403 #### while True mode:
404 # while True:
405 # idle = True
406 # try:
407 # msg = self.shell_stream.socket.recv_multipart(
408 # zmq.NOBLOCK, copy=False)
409 # except zmq.ZMQError, e:
410 # if e.errno != zmq.EAGAIN:
411 # raise e
412 # else:
413 # idle=False
414 # self.dispatch_queue(self.shell_stream, msg)
415 #
416 # if not self.task_stream.empty():
417 # idle=False
418 # msg = self.task_stream.recv_multipart()
419 # self.dispatch_queue(self.task_stream, msg)
420 # if idle:
421 # # don't busywait
422 # time.sleep(1e-3)
423
@@ -0,0 +1,313 b''
1 # encoding: utf-8
2
3 """Classes and functions for kernel related errors and exceptions."""
4 from __future__ import print_function
5
6 import sys
7 import traceback
8
9 __docformat__ = "restructuredtext en"
10
11 # Tell nose to skip this module
12 __test__ = {}
13
14 #-------------------------------------------------------------------------------
15 # Copyright (C) 2008 The IPython Development Team
16 #
17 # Distributed under the terms of the BSD License. The full license is in
18 # the file COPYING, distributed as part of this software.
19 #-------------------------------------------------------------------------------
20
21 #-------------------------------------------------------------------------------
22 # Error classes
23 #-------------------------------------------------------------------------------
24 class IPythonError(Exception):
25 """Base exception that all of our exceptions inherit from.
26
27 This can be raised by code that doesn't have any more specific
28 information."""
29
30 pass
31
32 # Exceptions associated with the controller objects
33 class ControllerError(IPythonError): pass
34
35 class ControllerCreationError(ControllerError): pass
36
37
38 # Exceptions associated with the Engines
39 class EngineError(IPythonError): pass
40
41 class EngineCreationError(EngineError): pass
42
43 class KernelError(IPythonError):
44 pass
45
46 class NotDefined(KernelError):
47 def __init__(self, name):
48 self.name = name
49 self.args = (name,)
50
51 def __repr__(self):
52 return '<NotDefined: %s>' % self.name
53
54 __str__ = __repr__
55
56
57 class QueueCleared(KernelError):
58 pass
59
60
61 class IdInUse(KernelError):
62 pass
63
64
65 class ProtocolError(KernelError):
66 pass
67
68
69 class ConnectionError(KernelError):
70 pass
71
72
73 class InvalidEngineID(KernelError):
74 pass
75
76
77 class NoEnginesRegistered(KernelError):
78 pass
79
80
81 class InvalidClientID(KernelError):
82 pass
83
84
85 class InvalidDeferredID(KernelError):
86 pass
87
88
89 class SerializationError(KernelError):
90 pass
91
92
93 class MessageSizeError(KernelError):
94 pass
95
96
97 class PBMessageSizeError(MessageSizeError):
98 pass
99
100
101 class ResultNotCompleted(KernelError):
102 pass
103
104
105 class ResultAlreadyRetrieved(KernelError):
106 pass
107
108 class ClientError(KernelError):
109 pass
110
111
112 class TaskAborted(KernelError):
113 pass
114
115
116 class TaskTimeout(KernelError):
117 pass
118
119
120 class NotAPendingResult(KernelError):
121 pass
122
123
124 class UnpickleableException(KernelError):
125 pass
126
127
128 class AbortedPendingDeferredError(KernelError):
129 pass
130
131
132 class InvalidProperty(KernelError):
133 pass
134
135
136 class MissingBlockArgument(KernelError):
137 pass
138
139
140 class StopLocalExecution(KernelError):
141 pass
142
143
144 class SecurityError(KernelError):
145 pass
146
147
148 class FileTimeoutError(KernelError):
149 pass
150
151 class TimeoutError(KernelError):
152 pass
153
154 class UnmetDependency(KernelError):
155 pass
156
157 class ImpossibleDependency(UnmetDependency):
158 pass
159
160 class DependencyTimeout(ImpossibleDependency):
161 pass
162
163 class InvalidDependency(ImpossibleDependency):
164 pass
165
166 class RemoteError(KernelError):
167 """Error raised elsewhere"""
168 ename=None
169 evalue=None
170 traceback=None
171 engine_info=None
172
173 def __init__(self, ename, evalue, traceback, engine_info=None):
174 self.ename=ename
175 self.evalue=evalue
176 self.traceback=traceback
177 self.engine_info=engine_info or {}
178 self.args=(ename, evalue)
179
180 def __repr__(self):
181 engineid = self.engine_info.get('engine_id', ' ')
182 return "<Remote[%s]:%s(%s)>"%(engineid, self.ename, self.evalue)
183
184 def __str__(self):
185 sig = "%s(%s)"%(self.ename, self.evalue)
186 if self.traceback:
187 return sig + '\n' + self.traceback
188 else:
189 return sig
190
191
192 class TaskRejectError(KernelError):
193 """Exception to raise when a task should be rejected by an engine.
194
195 This exception can be used to allow a task running on an engine to test
196 if the engine (or the user's namespace on the engine) has the needed
197 task dependencies. If not, the task should raise this exception. For
198 the task to be retried on another engine, the task should be created
199 with the `retries` argument > 1.
200
201 The advantage of this approach over our older properties system is that
202 tasks have full access to the user's namespace on the engines and the
203 properties don't have to be managed or tested by the controller.
204 """
205
206
207 class CompositeError(RemoteError):
208 """Error for representing possibly multiple errors on engines"""
209 def __init__(self, message, elist):
210 Exception.__init__(self, *(message, elist))
211 # Don't use pack_exception because it will conflict with the .message
212 # attribute that is being deprecated in 2.6 and beyond.
213 self.msg = message
214 self.elist = elist
215 self.args = [ e[0] for e in elist ]
216
217 def _get_engine_str(self, ei):
218 if not ei:
219 return '[Engine Exception]'
220 else:
221 return '[%s:%s]: ' % (ei['engine_id'], ei['method'])
222
223 def _get_traceback(self, ev):
224 try:
225 tb = ev._ipython_traceback_text
226 except AttributeError:
227 return 'No traceback available'
228 else:
229 return tb
230
231 def __str__(self):
232 s = str(self.msg)
233 for en, ev, etb, ei in self.elist:
234 engine_str = self._get_engine_str(ei)
235 s = s + '\n' + engine_str + en + ': ' + str(ev)
236 return s
237
238 def __repr__(self):
239 return "CompositeError(%i)"%len(self.elist)
240
241 def print_tracebacks(self, excid=None):
242 if excid is None:
243 for (en,ev,etb,ei) in self.elist:
244 print (self._get_engine_str(ei))
245 print (etb or 'No traceback available')
246 print ()
247 else:
248 try:
249 en,ev,etb,ei = self.elist[excid]
250 except:
251 raise IndexError("an exception with index %i does not exist"%excid)
252 else:
253 print (self._get_engine_str(ei))
254 print (etb or 'No traceback available')
255
256 def raise_exception(self, excid=0):
257 try:
258 en,ev,etb,ei = self.elist[excid]
259 except:
260 raise IndexError("an exception with index %i does not exist"%excid)
261 else:
262 raise RemoteError(en, ev, etb, ei)
263
264
265 def collect_exceptions(rdict_or_list, method='unspecified'):
266 """check a result dict for errors, and raise CompositeError if any exist.
267 Passthrough otherwise."""
268 elist = []
269 if isinstance(rdict_or_list, dict):
270 rlist = rdict_or_list.values()
271 else:
272 rlist = rdict_or_list
273 for r in rlist:
274 if isinstance(r, RemoteError):
275 en, ev, etb, ei = r.ename, r.evalue, r.traceback, r.engine_info
276 # Sometimes we could have CompositeError in our list. Just take
277 # the errors out of them and put them in our new list. This
278 # has the effect of flattening lists of CompositeErrors into one
279 # CompositeError
280 if en=='CompositeError':
281 for e in ev.elist:
282 elist.append(e)
283 else:
284 elist.append((en, ev, etb, ei))
285 if len(elist)==0:
286 return rdict_or_list
287 else:
288 msg = "one or more exceptions from call to method: %s" % (method)
289 # This silliness is needed so the debugger has access to the exception
290 # instance (e in this case)
291 try:
292 raise CompositeError(msg, elist)
293 except CompositeError as e:
294 raise e
295
296 def wrap_exception(engine_info={}):
297 etype, evalue, tb = sys.exc_info()
298 stb = traceback.format_exception(etype, evalue, tb)
299 exc_content = {
300 'status' : 'error',
301 'traceback' : stb,
302 'ename' : unicode(etype.__name__),
303 'evalue' : unicode(evalue),
304 'engine_info' : engine_info
305 }
306 return exc_content
307
308 def unwrap_exception(content):
309 err = RemoteError(content['ename'], content['evalue'],
310 ''.join(content['traceback']),
311 content.get('engine_info', {}))
312 return err
313
@@ -0,0 +1,152 b''
1 """Base config factories."""
2
3 #-----------------------------------------------------------------------------
4 # Copyright (C) 2008-2009 The IPython Development Team
5 #
6 # Distributed under the terms of the BSD License. The full license is in
7 # the file COPYING, distributed as part of this software.
8 #-----------------------------------------------------------------------------
9
10 #-----------------------------------------------------------------------------
11 # Imports
12 #-----------------------------------------------------------------------------
13
14
15 import logging
16 import os
17 import uuid
18
19 from zmq.eventloop.ioloop import IOLoop
20
21 from IPython.config.configurable import Configurable
22 from IPython.utils.importstring import import_item
23 from IPython.utils.traitlets import Str,Int,Instance, CUnicode, CStr
24
25 import IPython.parallel.streamsession as ss
26 from IPython.parallel.util import select_random_ports
27
28 #-----------------------------------------------------------------------------
29 # Classes
30 #-----------------------------------------------------------------------------
31 class LoggingFactory(Configurable):
32 """A most basic class, that has a `log` (type:`Logger`) attribute, set via a `logname` Trait."""
33 log = Instance('logging.Logger', ('ZMQ', logging.WARN))
34 logname = CUnicode('ZMQ')
35 def _logname_changed(self, name, old, new):
36 self.log = logging.getLogger(new)
37
38
39 class SessionFactory(LoggingFactory):
40 """The Base factory from which every factory in IPython.parallel inherits"""
41
42 packer = Str('',config=True)
43 unpacker = Str('',config=True)
44 ident = CStr('',config=True)
45 def _ident_default(self):
46 return str(uuid.uuid4())
47 username = CUnicode(os.environ.get('USER','username'),config=True)
48 exec_key = CUnicode('',config=True)
49 # not configurable:
50 context = Instance('zmq.Context', (), {})
51 session = Instance('IPython.parallel.streamsession.StreamSession')
52 loop = Instance('zmq.eventloop.ioloop.IOLoop', allow_none=False)
53 def _loop_default(self):
54 return IOLoop.instance()
55
56
57 def __init__(self, **kwargs):
58 super(SessionFactory, self).__init__(**kwargs)
59 exec_key = self.exec_key or None
60 # set the packers:
61 if not self.packer:
62 packer_f = unpacker_f = None
63 elif self.packer.lower() == 'json':
64 packer_f = ss.json_packer
65 unpacker_f = ss.json_unpacker
66 elif self.packer.lower() == 'pickle':
67 packer_f = ss.pickle_packer
68 unpacker_f = ss.pickle_unpacker
69 else:
70 packer_f = import_item(self.packer)
71 unpacker_f = import_item(self.unpacker)
72
73 # construct the session
74 self.session = ss.StreamSession(self.username, self.ident, packer=packer_f, unpacker=unpacker_f, key=exec_key)
75
76
77 class RegistrationFactory(SessionFactory):
78 """The Base Configurable for objects that involve registration."""
79
80 url = Str('', config=True) # url takes precedence over ip,regport,transport
81 transport = Str('tcp', config=True)
82 ip = Str('127.0.0.1', config=True)
83 regport = Instance(int, config=True)
84 def _regport_default(self):
85 # return 10101
86 return select_random_ports(1)[0]
87
88 def __init__(self, **kwargs):
89 super(RegistrationFactory, self).__init__(**kwargs)
90 self._propagate_url()
91 self._rebuild_url()
92 self.on_trait_change(self._propagate_url, 'url')
93 self.on_trait_change(self._rebuild_url, 'ip')
94 self.on_trait_change(self._rebuild_url, 'transport')
95 self.on_trait_change(self._rebuild_url, 'regport')
96
97 def _rebuild_url(self):
98 self.url = "%s://%s:%i"%(self.transport, self.ip, self.regport)
99
100 def _propagate_url(self):
101 """Ensure self.url contains full transport://interface:port"""
102 if self.url:
103 iface = self.url.split('://',1)
104 if len(iface) == 2:
105 self.transport,iface = iface
106 iface = iface.split(':')
107 self.ip = iface[0]
108 if iface[1]:
109 self.regport = int(iface[1])
110
111 #-----------------------------------------------------------------------------
112 # argparse argument extenders
113 #-----------------------------------------------------------------------------
114
115
116 def add_session_arguments(parser):
117 paa = parser.add_argument
118 paa('--ident',
119 type=str, dest='SessionFactory.ident',
120 help='set the ZMQ and session identity [default: random uuid]',
121 metavar='identity')
122 # paa('--execkey',
123 # type=str, dest='SessionFactory.exec_key',
124 # help='path to a file containing an execution key.',
125 # metavar='execkey')
126 paa('--packer',
127 type=str, dest='SessionFactory.packer',
128 help='method to serialize messages: {json,pickle} [default: json]',
129 metavar='packer')
130 paa('--unpacker',
131 type=str, dest='SessionFactory.unpacker',
132 help='inverse function of `packer`. Only necessary when using something other than json|pickle',
133 metavar='packer')
134
135 def add_registration_arguments(parser):
136 paa = parser.add_argument
137 paa('--ip',
138 type=str, dest='RegistrationFactory.ip',
139 help="The IP used for registration [default: localhost]",
140 metavar='ip')
141 paa('--transport',
142 type=str, dest='RegistrationFactory.transport',
143 help="The ZeroMQ transport used for registration [default: tcp]",
144 metavar='transport')
145 paa('--url',
146 type=str, dest='RegistrationFactory.url',
147 help='set transport,ip,regport in one go, e.g. tcp://127.0.0.1:10101',
148 metavar='url')
149 paa('--regport',
150 type=int, dest='RegistrationFactory.regport',
151 help="The port used for registration [default: 10101]",
152 metavar='ip')
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
@@ -0,0 +1,16 b''
1 # encoding: utf-8
2
3 """"""
4
5 __docformat__ = "restructuredtext en"
6
7 #-------------------------------------------------------------------------------
8 # Copyright (C) 2008 The IPython Development Team
9 #
10 # Distributed under the terms of the BSD License. The full license is in
11 # the file COPYING, distributed as part of this software.
12 #-------------------------------------------------------------------------------
13
14 #-------------------------------------------------------------------------------
15 # Imports
16 #------------------------------------------------------------------------------- No newline at end of file
@@ -0,0 +1,18 b''
1 #!/usr/bin/env python
2 # encoding: utf-8
3
4 #-----------------------------------------------------------------------------
5 # Copyright (C) 2008-2009 The IPython Development Team
6 #
7 # Distributed under the terms of the BSD License. The full license is in
8 # the file COPYING, distributed as part of this software.
9 #-----------------------------------------------------------------------------
10
11 #-----------------------------------------------------------------------------
12 # Imports
13 #-----------------------------------------------------------------------------
14
15
16 from IPython.parallel.apps.ipclusterapp import launch_new_instance
17
18 launch_new_instance()
@@ -0,0 +1,18 b''
1 #!/usr/bin/env python
2 # encoding: utf-8
3
4 #-----------------------------------------------------------------------------
5 # Copyright (C) 2008-2009 The IPython Development Team
6 #
7 # Distributed under the terms of the BSD License. The full license is in
8 # the file COPYING, distributed as part of this software.
9 #-----------------------------------------------------------------------------
10
11 #-----------------------------------------------------------------------------
12 # Imports
13 #-----------------------------------------------------------------------------
14
15
16 from IPython.parallel.apps.ipcontrollerapp import launch_new_instance
17
18 launch_new_instance()
@@ -0,0 +1,20 b''
1 #!/usr/bin/env python
2 # encoding: utf-8
3
4 #-----------------------------------------------------------------------------
5 # Copyright (C) 2008-2009 The IPython Development Team
6 #
7 # Distributed under the terms of the BSD License. The full license is in
8 # the file COPYING, distributed as part of this software.
9 #-----------------------------------------------------------------------------
10
11 #-----------------------------------------------------------------------------
12 # Imports
13 #-----------------------------------------------------------------------------
14
15
16 from IPython.parallel.apps.ipengineapp import launch_new_instance
17
18 launch_new_instance()
19
20
@@ -0,0 +1,20 b''
1 #!/usr/bin/env python
2 # encoding: utf-8
3
4 #-----------------------------------------------------------------------------
5 # Copyright (C) 2008-2009 The IPython Development Team
6 #
7 # Distributed under the terms of the BSD License. The full license is in
8 # the file COPYING, distributed as part of this software.
9 #-----------------------------------------------------------------------------
10
11 #-----------------------------------------------------------------------------
12 # Imports
13 #-----------------------------------------------------------------------------
14
15
16 from IPython.parallel.apps.iploggerapp import launch_new_instance
17
18 launch_new_instance()
19
20
@@ -0,0 +1,418 b''
1 #!/usr/bin/env python
2 """edited session.py to work with streams, and move msg_type to the header
3 """
4 #-----------------------------------------------------------------------------
5 # Copyright (C) 2010-2011 The IPython Development Team
6 #
7 # Distributed under the terms of the BSD License. The full license is in
8 # the file COPYING, distributed as part of this software.
9 #-----------------------------------------------------------------------------
10
11
12 import os
13 import pprint
14 import uuid
15 from datetime import datetime
16
17 try:
18 import cPickle
19 pickle = cPickle
20 except:
21 cPickle = None
22 import pickle
23
24 import zmq
25 from zmq.utils import jsonapi
26 from zmq.eventloop.zmqstream import ZMQStream
27
28 from .util import ISO8601
29
30 # packer priority: jsonlib[2], cPickle, simplejson/json, pickle
31 json_name = '' if not jsonapi.jsonmod else jsonapi.jsonmod.__name__
32 if json_name in ('jsonlib', 'jsonlib2'):
33 use_json = True
34 elif json_name:
35 if cPickle is None:
36 use_json = True
37 else:
38 use_json = False
39 else:
40 use_json = False
41
42 def squash_unicode(obj):
43 if isinstance(obj,dict):
44 for key in obj.keys():
45 obj[key] = squash_unicode(obj[key])
46 if isinstance(key, unicode):
47 obj[squash_unicode(key)] = obj.pop(key)
48 elif isinstance(obj, list):
49 for i,v in enumerate(obj):
50 obj[i] = squash_unicode(v)
51 elif isinstance(obj, unicode):
52 obj = obj.encode('utf8')
53 return obj
54
55 json_packer = jsonapi.dumps
56 json_unpacker = lambda s: squash_unicode(jsonapi.loads(s))
57
58 pickle_packer = lambda o: pickle.dumps(o,-1)
59 pickle_unpacker = pickle.loads
60
61 if use_json:
62 default_packer = json_packer
63 default_unpacker = json_unpacker
64 else:
65 default_packer = pickle_packer
66 default_unpacker = pickle_unpacker
67
68
69 DELIM="<IDS|MSG>"
70
71 class Message(object):
72 """A simple message object that maps dict keys to attributes.
73
74 A Message can be created from a dict and a dict from a Message instance
75 simply by calling dict(msg_obj)."""
76
77 def __init__(self, msg_dict):
78 dct = self.__dict__
79 for k, v in dict(msg_dict).iteritems():
80 if isinstance(v, dict):
81 v = Message(v)
82 dct[k] = v
83
84 # Having this iterator lets dict(msg_obj) work out of the box.
85 def __iter__(self):
86 return iter(self.__dict__.iteritems())
87
88 def __repr__(self):
89 return repr(self.__dict__)
90
91 def __str__(self):
92 return pprint.pformat(self.__dict__)
93
94 def __contains__(self, k):
95 return k in self.__dict__
96
97 def __getitem__(self, k):
98 return self.__dict__[k]
99
100
101 def msg_header(msg_id, msg_type, username, session):
102 date=datetime.now().strftime(ISO8601)
103 return locals()
104
105 def extract_header(msg_or_header):
106 """Given a message or header, return the header."""
107 if not msg_or_header:
108 return {}
109 try:
110 # See if msg_or_header is the entire message.
111 h = msg_or_header['header']
112 except KeyError:
113 try:
114 # See if msg_or_header is just the header
115 h = msg_or_header['msg_id']
116 except KeyError:
117 raise
118 else:
119 h = msg_or_header
120 if not isinstance(h, dict):
121 h = dict(h)
122 return h
123
124 class StreamSession(object):
125 """tweaked version of IPython.zmq.session.Session, for development in Parallel"""
126 debug=False
127 key=None
128
129 def __init__(self, username=None, session=None, packer=None, unpacker=None, key=None, keyfile=None):
130 if username is None:
131 username = os.environ.get('USER','username')
132 self.username = username
133 if session is None:
134 self.session = str(uuid.uuid4())
135 else:
136 self.session = session
137 self.msg_id = str(uuid.uuid4())
138 if packer is None:
139 self.pack = default_packer
140 else:
141 if not callable(packer):
142 raise TypeError("packer must be callable, not %s"%type(packer))
143 self.pack = packer
144
145 if unpacker is None:
146 self.unpack = default_unpacker
147 else:
148 if not callable(unpacker):
149 raise TypeError("unpacker must be callable, not %s"%type(unpacker))
150 self.unpack = unpacker
151
152 if key is not None and keyfile is not None:
153 raise TypeError("Must specify key OR keyfile, not both")
154 if keyfile is not None:
155 with open(keyfile) as f:
156 self.key = f.read().strip()
157 else:
158 self.key = key
159 if isinstance(self.key, unicode):
160 self.key = self.key.encode('utf8')
161 # print key, keyfile, self.key
162 self.none = self.pack({})
163
164 def msg_header(self, msg_type):
165 h = msg_header(self.msg_id, msg_type, self.username, self.session)
166 self.msg_id = str(uuid.uuid4())
167 return h
168
169 def msg(self, msg_type, content=None, parent=None, subheader=None):
170 msg = {}
171 msg['header'] = self.msg_header(msg_type)
172 msg['msg_id'] = msg['header']['msg_id']
173 msg['parent_header'] = {} if parent is None else extract_header(parent)
174 msg['msg_type'] = msg_type
175 msg['content'] = {} if content is None else content
176 sub = {} if subheader is None else subheader
177 msg['header'].update(sub)
178 return msg
179
180 def check_key(self, msg_or_header):
181 """Check that a message's header has the right key"""
182 if self.key is None:
183 return True
184 header = extract_header(msg_or_header)
185 return header.get('key', None) == self.key
186
187
188 def send(self, stream, msg_or_type, content=None, buffers=None, parent=None, subheader=None, ident=None, track=False):
189 """Build and send a message via stream or socket.
190
191 Parameters
192 ----------
193
194 stream : zmq.Socket or ZMQStream
195 the socket-like object used to send the data
196 msg_or_type : str or Message/dict
197 Normally, msg_or_type will be a msg_type unless a message is being sent more
198 than once.
199
200 content : dict or None
201 the content of the message (ignored if msg_or_type is a message)
202 buffers : list or None
203 the already-serialized buffers to be appended to the message
204 parent : Message or dict or None
205 the parent or parent header describing the parent of this message
206 subheader : dict or None
207 extra header keys for this message's header
208 ident : bytes or list of bytes
209 the zmq.IDENTITY routing path
210 track : bool
211 whether to track. Only for use with Sockets, because ZMQStream objects cannot track messages.
212
213 Returns
214 -------
215 msg : message dict
216 the constructed message
217 (msg,tracker) : (message dict, MessageTracker)
218 if track=True, then a 2-tuple will be returned, the first element being the constructed
219 message, and the second being the MessageTracker
220
221 """
222
223 if not isinstance(stream, (zmq.Socket, ZMQStream)):
224 raise TypeError("stream must be Socket or ZMQStream, not %r"%type(stream))
225 elif track and isinstance(stream, ZMQStream):
226 raise TypeError("ZMQStream cannot track messages")
227
228 if isinstance(msg_or_type, (Message, dict)):
229 # we got a Message, not a msg_type
230 # don't build a new Message
231 msg = msg_or_type
232 content = msg['content']
233 else:
234 msg = self.msg(msg_or_type, content, parent, subheader)
235
236 buffers = [] if buffers is None else buffers
237 to_send = []
238 if isinstance(ident, list):
239 # accept list of idents
240 to_send.extend(ident)
241 elif ident is not None:
242 to_send.append(ident)
243 to_send.append(DELIM)
244 if self.key is not None:
245 to_send.append(self.key)
246 to_send.append(self.pack(msg['header']))
247 to_send.append(self.pack(msg['parent_header']))
248
249 if content is None:
250 content = self.none
251 elif isinstance(content, dict):
252 content = self.pack(content)
253 elif isinstance(content, bytes):
254 # content is already packed, as in a relayed message
255 pass
256 else:
257 raise TypeError("Content incorrect type: %s"%type(content))
258 to_send.append(content)
259 flag = 0
260 if buffers:
261 flag = zmq.SNDMORE
262 _track = False
263 else:
264 _track=track
265 if track:
266 tracker = stream.send_multipart(to_send, flag, copy=False, track=_track)
267 else:
268 tracker = stream.send_multipart(to_send, flag, copy=False)
269 for b in buffers[:-1]:
270 stream.send(b, flag, copy=False)
271 if buffers:
272 if track:
273 tracker = stream.send(buffers[-1], copy=False, track=track)
274 else:
275 tracker = stream.send(buffers[-1], copy=False)
276
277 # omsg = Message(msg)
278 if self.debug:
279 pprint.pprint(msg)
280 pprint.pprint(to_send)
281 pprint.pprint(buffers)
282
283 msg['tracker'] = tracker
284
285 return msg
286
287 def send_raw(self, stream, msg, flags=0, copy=True, ident=None):
288 """Send a raw message via ident path.
289
290 Parameters
291 ----------
292 msg : list of sendable buffers"""
293 to_send = []
294 if isinstance(ident, bytes):
295 ident = [ident]
296 if ident is not None:
297 to_send.extend(ident)
298 to_send.append(DELIM)
299 if self.key is not None:
300 to_send.append(self.key)
301 to_send.extend(msg)
302 stream.send_multipart(msg, flags, copy=copy)
303
304 def recv(self, socket, mode=zmq.NOBLOCK, content=True, copy=True):
305 """receives and unpacks a message
306 returns [idents], msg"""
307 if isinstance(socket, ZMQStream):
308 socket = socket.socket
309 try:
310 msg = socket.recv_multipart(mode)
311 except zmq.ZMQError as e:
312 if e.errno == zmq.EAGAIN:
313 # We can convert EAGAIN to None as we know in this case
314 # recv_multipart won't return None.
315 return None
316 else:
317 raise
318 # return an actual Message object
319 # determine the number of idents by trying to unpack them.
320 # this is terrible:
321 idents, msg = self.feed_identities(msg, copy)
322 try:
323 return idents, self.unpack_message(msg, content=content, copy=copy)
324 except Exception as e:
325 print (idents, msg)
326 # TODO: handle it
327 raise e
328
329 def feed_identities(self, msg, copy=True):
330 """feed until DELIM is reached, then return the prefix as idents and remainder as
331 msg. This is easily broken by setting an IDENT to DELIM, but that would be silly.
332
333 Parameters
334 ----------
335 msg : a list of Message or bytes objects
336 the message to be split
337 copy : bool
338 flag determining whether the arguments are bytes or Messages
339
340 Returns
341 -------
342 (idents,msg) : two lists
343 idents will always be a list of bytes - the indentity prefix
344 msg will be a list of bytes or Messages, unchanged from input
345 msg should be unpackable via self.unpack_message at this point.
346 """
347 ikey = int(self.key is not None)
348 minlen = 3 + ikey
349 msg = list(msg)
350 idents = []
351 while len(msg) > minlen:
352 if copy:
353 s = msg[0]
354 else:
355 s = msg[0].bytes
356 if s == DELIM:
357 msg.pop(0)
358 break
359 else:
360 idents.append(s)
361 msg.pop(0)
362
363 return idents, msg
364
365 def unpack_message(self, msg, content=True, copy=True):
366 """Return a message object from the format
367 sent by self.send.
368
369 Parameters:
370 -----------
371
372 content : bool (True)
373 whether to unpack the content dict (True),
374 or leave it serialized (False)
375
376 copy : bool (True)
377 whether to return the bytes (True),
378 or the non-copying Message object in each place (False)
379
380 """
381 ikey = int(self.key is not None)
382 minlen = 3 + ikey
383 message = {}
384 if not copy:
385 for i in range(minlen):
386 msg[i] = msg[i].bytes
387 if ikey:
388 if not self.key == msg[0]:
389 raise KeyError("Invalid Session Key: %s"%msg[0])
390 if not len(msg) >= minlen:
391 raise TypeError("malformed message, must have at least %i elements"%minlen)
392 message['header'] = self.unpack(msg[ikey+0])
393 message['msg_type'] = message['header']['msg_type']
394 message['parent_header'] = self.unpack(msg[ikey+1])
395 if content:
396 message['content'] = self.unpack(msg[ikey+2])
397 else:
398 message['content'] = msg[ikey+2]
399
400 message['buffers'] = msg[ikey+3:]# [ m.buffer for m in msg[3:] ]
401 return message
402
403
404 def test_msg2obj():
405 am = dict(x=1)
406 ao = Message(am)
407 assert ao.x == am['x']
408
409 am['y'] = dict(z=1)
410 ao = Message(am)
411 assert ao.y.z == am['y']['z']
412
413 k1, k2 = 'y', 'z'
414 assert ao[k1][k2] == am[k1][k2]
415
416 am2 = dict(ao)
417 assert am['x'] == am2['x']
418 assert am['y']['z'] == am2['y']['z']
@@ -0,0 +1,69 b''
1 """toplevel setup/teardown for parallel tests."""
2
3 #-------------------------------------------------------------------------------
4 # Copyright (C) 2011 The IPython Development Team
5 #
6 # Distributed under the terms of the BSD License. The full license is in
7 # the file COPYING, distributed as part of this software.
8 #-------------------------------------------------------------------------------
9
10 #-------------------------------------------------------------------------------
11 # Imports
12 #-------------------------------------------------------------------------------
13
14 import tempfile
15 import time
16 from subprocess import Popen, PIPE, STDOUT
17
18 from IPython.parallel import Client
19
20 processes = []
21 blackhole = tempfile.TemporaryFile()
22
23 # nose setup/teardown
24
25 def setup():
26 cp = Popen('ipcontroller --profile iptest -r --log-level 10 --log-to-file'.split(), stdout=blackhole, stderr=STDOUT)
27 processes.append(cp)
28 time.sleep(.5)
29 add_engines(1)
30 c = Client(profile='iptest')
31 while not c.ids:
32 time.sleep(.1)
33 c.spin()
34 c.close()
35
36 def add_engines(n=1, profile='iptest'):
37 rc = Client(profile=profile)
38 base = len(rc)
39 eps = []
40 for i in range(n):
41 ep = Popen(['ipengine']+ ['--profile', profile, '--log-level', '10', '--log-to-file'], stdout=blackhole, stderr=STDOUT)
42 # ep.start()
43 processes.append(ep)
44 eps.append(ep)
45 while len(rc) < base+n:
46 time.sleep(.1)
47 rc.spin()
48 rc.close()
49 return eps
50
51 def teardown():
52 time.sleep(1)
53 while processes:
54 p = processes.pop()
55 if p.poll() is None:
56 try:
57 p.terminate()
58 except Exception, e:
59 print e
60 pass
61 if p.poll() is None:
62 time.sleep(.25)
63 if p.poll() is None:
64 try:
65 print 'killing'
66 p.kill()
67 except:
68 print "couldn't shutdown process: ", p
69
@@ -0,0 +1,115 b''
1 """base class for parallel client tests"""
2
3 #-------------------------------------------------------------------------------
4 # Copyright (C) 2011 The IPython Development Team
5 #
6 # Distributed under the terms of the BSD License. The full license is in
7 # the file COPYING, distributed as part of this software.
8 #-------------------------------------------------------------------------------
9
10 import sys
11 import tempfile
12 import time
13
14 from nose import SkipTest
15
16 import zmq
17 from zmq.tests import BaseZMQTestCase
18
19 from IPython.external.decorator import decorator
20
21 from IPython.parallel import error
22 from IPython.parallel import Client
23 from IPython.parallel.tests import processes,add_engines
24
25 # simple tasks for use in apply tests
26
27 def segfault():
28 """this will segfault"""
29 import ctypes
30 ctypes.memset(-1,0,1)
31
32 def wait(n):
33 """sleep for a time"""
34 import time
35 time.sleep(n)
36 return n
37
38 def raiser(eclass):
39 """raise an exception"""
40 raise eclass()
41
42 # test decorator for skipping tests when libraries are unavailable
43 def skip_without(*names):
44 """skip a test if some names are not importable"""
45 @decorator
46 def skip_without_names(f, *args, **kwargs):
47 """decorator to skip tests in the absence of numpy."""
48 for name in names:
49 try:
50 __import__(name)
51 except ImportError:
52 raise SkipTest
53 return f(*args, **kwargs)
54 return skip_without_names
55
56 class ClusterTestCase(BaseZMQTestCase):
57
58 def add_engines(self, n=1, block=True):
59 """add multiple engines to our cluster"""
60 self.engines.extend(add_engines(n))
61 if block:
62 self.wait_on_engines()
63
64 def wait_on_engines(self, timeout=5):
65 """wait for our engines to connect."""
66 n = len(self.engines)+self.base_engine_count
67 tic = time.time()
68 while time.time()-tic < timeout and len(self.client.ids) < n:
69 time.sleep(0.1)
70
71 assert not len(self.client.ids) < n, "waiting for engines timed out"
72
73 def connect_client(self):
74 """connect a client with my Context, and track its sockets for cleanup"""
75 c = Client(profile='iptest', context=self.context)
76 for name in filter(lambda n:n.endswith('socket'), dir(c)):
77 s = getattr(c, name)
78 s.setsockopt(zmq.LINGER, 0)
79 self.sockets.append(s)
80 return c
81
82 def assertRaisesRemote(self, etype, f, *args, **kwargs):
83 try:
84 try:
85 f(*args, **kwargs)
86 except error.CompositeError as e:
87 e.raise_exception()
88 except error.RemoteError as e:
89 self.assertEquals(etype.__name__, e.ename, "Should have raised %r, but raised %r"%(e.ename, etype.__name__))
90 else:
91 self.fail("should have raised a RemoteError")
92
93 def setUp(self):
94 BaseZMQTestCase.setUp(self)
95 self.client = self.connect_client()
96 self.base_engine_count=len(self.client.ids)
97 self.engines=[]
98
99 def tearDown(self):
100 # self.client.clear(block=True)
101 # close fds:
102 for e in filter(lambda e: e.poll() is not None, processes):
103 processes.remove(e)
104
105 # allow flushing of incoming messages to prevent crash on socket close
106 self.client.wait(timeout=2)
107 # time.sleep(2)
108 self.client.spin()
109 self.client.close()
110 BaseZMQTestCase.tearDown(self)
111 # this will be redundant when pyzmq merges PR #88
112 # self.context.term()
113 # print tempfile.TemporaryFile().fileno(),
114 # sys.stdout.flush()
115 No newline at end of file
@@ -0,0 +1,69 b''
1 """Tests for asyncresult.py"""
2
3 #-------------------------------------------------------------------------------
4 # Copyright (C) 2011 The IPython Development Team
5 #
6 # Distributed under the terms of the BSD License. The full license is in
7 # the file COPYING, distributed as part of this software.
8 #-------------------------------------------------------------------------------
9
10 #-------------------------------------------------------------------------------
11 # Imports
12 #-------------------------------------------------------------------------------
13
14
15 from IPython.parallel.error import TimeoutError
16
17 from IPython.parallel.tests import add_engines
18 from .clienttest import ClusterTestCase
19
20 def setup():
21 add_engines(2)
22
23 def wait(n):
24 import time
25 time.sleep(n)
26 return n
27
28 class AsyncResultTest(ClusterTestCase):
29
30 def test_single_result(self):
31 eid = self.client.ids[-1]
32 ar = self.client[eid].apply_async(lambda : 42)
33 self.assertEquals(ar.get(), 42)
34 ar = self.client[[eid]].apply_async(lambda : 42)
35 self.assertEquals(ar.get(), [42])
36 ar = self.client[-1:].apply_async(lambda : 42)
37 self.assertEquals(ar.get(), [42])
38
39 def test_get_after_done(self):
40 ar = self.client[-1].apply_async(lambda : 42)
41 self.assertFalse(ar.ready())
42 ar.wait()
43 self.assertTrue(ar.ready())
44 self.assertEquals(ar.get(), 42)
45 self.assertEquals(ar.get(), 42)
46
47 def test_get_before_done(self):
48 ar = self.client[-1].apply_async(wait, 0.1)
49 self.assertRaises(TimeoutError, ar.get, 0)
50 ar.wait(0)
51 self.assertFalse(ar.ready())
52 self.assertEquals(ar.get(), 0.1)
53
54 def test_get_after_error(self):
55 ar = self.client[-1].apply_async(lambda : 1/0)
56 ar.wait()
57 self.assertRaisesRemote(ZeroDivisionError, ar.get)
58 self.assertRaisesRemote(ZeroDivisionError, ar.get)
59 self.assertRaisesRemote(ZeroDivisionError, ar.get_dict)
60
61 def test_get_dict(self):
62 n = len(self.client)
63 ar = self.client[:].apply_async(lambda : 5)
64 self.assertEquals(ar.get(), [5]*n)
65 d = ar.get_dict()
66 self.assertEquals(sorted(d.keys()), sorted(self.client.ids))
67 for eid,r in d.iteritems():
68 self.assertEquals(r, 5)
69
@@ -0,0 +1,147 b''
1 """Tests for parallel client.py"""
2
3 #-------------------------------------------------------------------------------
4 # Copyright (C) 2011 The IPython Development Team
5 #
6 # Distributed under the terms of the BSD License. The full license is in
7 # the file COPYING, distributed as part of this software.
8 #-------------------------------------------------------------------------------
9
10 #-------------------------------------------------------------------------------
11 # Imports
12 #-------------------------------------------------------------------------------
13
14 import time
15 from tempfile import mktemp
16
17 import zmq
18
19 from IPython.parallel.client import client as clientmod
20 from IPython.parallel import error
21 from IPython.parallel import AsyncResult, AsyncHubResult
22 from IPython.parallel import LoadBalancedView, DirectView
23
24 from clienttest import ClusterTestCase, segfault, wait, add_engines
25
26 def setup():
27 add_engines(4)
28
29 class TestClient(ClusterTestCase):
30
31 def test_ids(self):
32 n = len(self.client.ids)
33 self.add_engines(3)
34 self.assertEquals(len(self.client.ids), n+3)
35
36 def test_view_indexing(self):
37 """test index access for views"""
38 self.add_engines(2)
39 targets = self.client._build_targets('all')[-1]
40 v = self.client[:]
41 self.assertEquals(v.targets, targets)
42 t = self.client.ids[2]
43 v = self.client[t]
44 self.assert_(isinstance(v, DirectView))
45 self.assertEquals(v.targets, t)
46 t = self.client.ids[2:4]
47 v = self.client[t]
48 self.assert_(isinstance(v, DirectView))
49 self.assertEquals(v.targets, t)
50 v = self.client[::2]
51 self.assert_(isinstance(v, DirectView))
52 self.assertEquals(v.targets, targets[::2])
53 v = self.client[1::3]
54 self.assert_(isinstance(v, DirectView))
55 self.assertEquals(v.targets, targets[1::3])
56 v = self.client[:-3]
57 self.assert_(isinstance(v, DirectView))
58 self.assertEquals(v.targets, targets[:-3])
59 v = self.client[-1]
60 self.assert_(isinstance(v, DirectView))
61 self.assertEquals(v.targets, targets[-1])
62 self.assertRaises(TypeError, lambda : self.client[None])
63
64 def test_lbview_targets(self):
65 """test load_balanced_view targets"""
66 v = self.client.load_balanced_view()
67 self.assertEquals(v.targets, None)
68 v = self.client.load_balanced_view(-1)
69 self.assertEquals(v.targets, [self.client.ids[-1]])
70 v = self.client.load_balanced_view('all')
71 self.assertEquals(v.targets, self.client.ids)
72
73 def test_targets(self):
74 """test various valid targets arguments"""
75 build = self.client._build_targets
76 ids = self.client.ids
77 idents,targets = build(None)
78 self.assertEquals(ids, targets)
79
80 def test_clear(self):
81 """test clear behavior"""
82 # self.add_engines(2)
83 v = self.client[:]
84 v.block=True
85 v.push(dict(a=5))
86 v.pull('a')
87 id0 = self.client.ids[-1]
88 self.client.clear(targets=id0)
89 self.client[:-1].pull('a')
90 self.assertRaisesRemote(NameError, self.client[id0].get, 'a')
91 self.client.clear(block=True)
92 for i in self.client.ids:
93 # print i
94 self.assertRaisesRemote(NameError, self.client[i].get, 'a')
95
96 def test_get_result(self):
97 """test getting results from the Hub."""
98 c = clientmod.Client(profile='iptest')
99 # self.add_engines(1)
100 t = c.ids[-1]
101 ar = c[t].apply_async(wait, 1)
102 # give the monitor time to notice the message
103 time.sleep(.25)
104 ahr = self.client.get_result(ar.msg_ids)
105 self.assertTrue(isinstance(ahr, AsyncHubResult))
106 self.assertEquals(ahr.get(), ar.get())
107 ar2 = self.client.get_result(ar.msg_ids)
108 self.assertFalse(isinstance(ar2, AsyncHubResult))
109 c.close()
110
111 def test_ids_list(self):
112 """test client.ids"""
113 # self.add_engines(2)
114 ids = self.client.ids
115 self.assertEquals(ids, self.client._ids)
116 self.assertFalse(ids is self.client._ids)
117 ids.remove(ids[-1])
118 self.assertNotEquals(ids, self.client._ids)
119
120 def test_queue_status(self):
121 # self.addEngine(4)
122 ids = self.client.ids
123 id0 = ids[0]
124 qs = self.client.queue_status(targets=id0)
125 self.assertTrue(isinstance(qs, dict))
126 self.assertEquals(sorted(qs.keys()), ['completed', 'queue', 'tasks'])
127 allqs = self.client.queue_status()
128 self.assertTrue(isinstance(allqs, dict))
129 self.assertEquals(sorted(allqs.keys()), self.client.ids)
130 for eid,qs in allqs.items():
131 self.assertTrue(isinstance(qs, dict))
132 self.assertEquals(sorted(qs.keys()), ['completed', 'queue', 'tasks'])
133
134 def test_shutdown(self):
135 # self.addEngine(4)
136 ids = self.client.ids
137 id0 = ids[0]
138 self.client.shutdown(id0, block=True)
139 while id0 in self.client.ids:
140 time.sleep(0.1)
141 self.client.spin()
142
143 self.assertRaises(IndexError, lambda : self.client[id0])
144
145 def test_result_status(self):
146 pass
147 # to be written
@@ -0,0 +1,101 b''
1 """Tests for dependency.py"""
2
3 __docformat__ = "restructuredtext en"
4
5 #-------------------------------------------------------------------------------
6 # Copyright (C) 2011 The IPython Development Team
7 #
8 # Distributed under the terms of the BSD License. The full license is in
9 # the file COPYING, distributed as part of this software.
10 #-------------------------------------------------------------------------------
11
12 #-------------------------------------------------------------------------------
13 # Imports
14 #-------------------------------------------------------------------------------
15
16 # import
17 import os
18
19 from IPython.utils.pickleutil import can, uncan
20
21 import IPython.parallel as pmod
22 from IPython.parallel.util import interactive
23
24 from IPython.parallel.tests import add_engines
25 from .clienttest import ClusterTestCase
26
27 def setup():
28 add_engines(1)
29
30 @pmod.require('time')
31 def wait(n):
32 time.sleep(n)
33 return n
34
35 mixed = map(str, range(10))
36 completed = map(str, range(0,10,2))
37 failed = map(str, range(1,10,2))
38
39 class DependencyTest(ClusterTestCase):
40
41 def setUp(self):
42 ClusterTestCase.setUp(self)
43 self.user_ns = {'__builtins__' : __builtins__}
44 self.view = self.client.load_balanced_view()
45 self.dview = self.client[-1]
46 self.succeeded = set(map(str, range(0,25,2)))
47 self.failed = set(map(str, range(1,25,2)))
48
49 def assertMet(self, dep):
50 self.assertTrue(dep.check(self.succeeded, self.failed), "Dependency should be met")
51
52 def assertUnmet(self, dep):
53 self.assertFalse(dep.check(self.succeeded, self.failed), "Dependency should not be met")
54
55 def assertUnreachable(self, dep):
56 self.assertTrue(dep.unreachable(self.succeeded, self.failed), "Dependency should be unreachable")
57
58 def assertReachable(self, dep):
59 self.assertFalse(dep.unreachable(self.succeeded, self.failed), "Dependency should be reachable")
60
61 def cancan(self, f):
62 """decorator to pass through canning into self.user_ns"""
63 return uncan(can(f), self.user_ns)
64
65 def test_require_imports(self):
66 """test that @require imports names"""
67 @self.cancan
68 @pmod.require('urllib')
69 @interactive
70 def encode(dikt):
71 return urllib.urlencode(dikt)
72 # must pass through canning to properly connect namespaces
73 self.assertEquals(encode(dict(a=5)), 'a=5')
74
75 def test_success_only(self):
76 dep = pmod.Dependency(mixed, success=True, failure=False)
77 self.assertUnmet(dep)
78 self.assertUnreachable(dep)
79 dep.all=False
80 self.assertMet(dep)
81 self.assertReachable(dep)
82 dep = pmod.Dependency(completed, success=True, failure=False)
83 self.assertMet(dep)
84 self.assertReachable(dep)
85 dep.all=False
86 self.assertMet(dep)
87 self.assertReachable(dep)
88
89 def test_failure_only(self):
90 dep = pmod.Dependency(mixed, success=False, failure=True)
91 self.assertUnmet(dep)
92 self.assertUnreachable(dep)
93 dep.all=False
94 self.assertMet(dep)
95 self.assertReachable(dep)
96 dep = pmod.Dependency(completed, success=False, failure=True)
97 self.assertUnmet(dep)
98 self.assertUnreachable(dep)
99 dep.all=False
100 self.assertUnmet(dep)
101 self.assertUnreachable(dep)
@@ -0,0 +1,108 b''
1 """test serialization with newserialized"""
2
3 #-------------------------------------------------------------------------------
4 # Copyright (C) 2011 The IPython Development Team
5 #
6 # Distributed under the terms of the BSD License. The full license is in
7 # the file COPYING, distributed as part of this software.
8 #-------------------------------------------------------------------------------
9
10 #-------------------------------------------------------------------------------
11 # Imports
12 #-------------------------------------------------------------------------------
13
14 from unittest import TestCase
15
16 from IPython.testing.decorators import parametric
17 from IPython.utils import newserialized as ns
18 from IPython.utils.pickleutil import can, uncan, CannedObject, CannedFunction
19 from IPython.parallel.tests.clienttest import skip_without
20
21
22 class CanningTestCase(TestCase):
23 def test_canning(self):
24 d = dict(a=5,b=6)
25 cd = can(d)
26 self.assertTrue(isinstance(cd, dict))
27
28 def test_canned_function(self):
29 f = lambda : 7
30 cf = can(f)
31 self.assertTrue(isinstance(cf, CannedFunction))
32
33 @parametric
34 def test_can_roundtrip(cls):
35 objs = [
36 dict(),
37 set(),
38 list(),
39 ['a',1,['a',1],u'e'],
40 ]
41 return map(cls.run_roundtrip, objs)
42
43 @classmethod
44 def run_roundtrip(self, obj):
45 o = uncan(can(obj))
46 assert o == obj, "failed assertion: %r == %r"%(o,obj)
47
48 def test_serialized_interfaces(self):
49
50 us = {'a':10, 'b':range(10)}
51 s = ns.serialize(us)
52 uus = ns.unserialize(s)
53 self.assertTrue(isinstance(s, ns.SerializeIt))
54 self.assertEquals(uus, us)
55
56 def test_pickle_serialized(self):
57 obj = {'a':1.45345, 'b':'asdfsdf', 'c':10000L}
58 original = ns.UnSerialized(obj)
59 originalSer = ns.SerializeIt(original)
60 firstData = originalSer.getData()
61 firstTD = originalSer.getTypeDescriptor()
62 firstMD = originalSer.getMetadata()
63 self.assertEquals(firstTD, 'pickle')
64 self.assertEquals(firstMD, {})
65 unSerialized = ns.UnSerializeIt(originalSer)
66 secondObj = unSerialized.getObject()
67 for k, v in secondObj.iteritems():
68 self.assertEquals(obj[k], v)
69 secondSer = ns.SerializeIt(ns.UnSerialized(secondObj))
70 self.assertEquals(firstData, secondSer.getData())
71 self.assertEquals(firstTD, secondSer.getTypeDescriptor() )
72 self.assertEquals(firstMD, secondSer.getMetadata())
73
74 @skip_without('numpy')
75 def test_ndarray_serialized(self):
76 import numpy
77 a = numpy.linspace(0.0, 1.0, 1000)
78 unSer1 = ns.UnSerialized(a)
79 ser1 = ns.SerializeIt(unSer1)
80 td = ser1.getTypeDescriptor()
81 self.assertEquals(td, 'ndarray')
82 md = ser1.getMetadata()
83 self.assertEquals(md['shape'], a.shape)
84 self.assertEquals(md['dtype'], a.dtype.str)
85 buff = ser1.getData()
86 self.assertEquals(buff, numpy.getbuffer(a))
87 s = ns.Serialized(buff, td, md)
88 final = ns.unserialize(s)
89 self.assertEquals(numpy.getbuffer(a), numpy.getbuffer(final))
90 self.assertTrue((a==final).all())
91 self.assertEquals(a.dtype.str, final.dtype.str)
92 self.assertEquals(a.shape, final.shape)
93 # test non-copying:
94 a[2] = 1e9
95 self.assertTrue((a==final).all())
96
97 def test_uncan_function_globals(self):
98 """test that uncanning a module function restores it into its module"""
99 from re import search
100 cf = can(search)
101 csearch = uncan(cf)
102 self.assertEqual(csearch.__module__, search.__module__)
103 self.assertNotEqual(csearch('asd', 'asdf'), None)
104 csearch = uncan(cf, dict(a=5))
105 self.assertEqual(csearch.__module__, search.__module__)
106 self.assertNotEqual(csearch('asd', 'asdf'), None)
107
108 No newline at end of file
@@ -0,0 +1,111 b''
1 """test building messages with streamsession"""
2
3 #-------------------------------------------------------------------------------
4 # Copyright (C) 2011 The IPython Development Team
5 #
6 # Distributed under the terms of the BSD License. The full license is in
7 # the file COPYING, distributed as part of this software.
8 #-------------------------------------------------------------------------------
9
10 #-------------------------------------------------------------------------------
11 # Imports
12 #-------------------------------------------------------------------------------
13
14 import os
15 import uuid
16 import zmq
17
18 from zmq.tests import BaseZMQTestCase
19 from zmq.eventloop.zmqstream import ZMQStream
20 # from IPython.zmq.tests import SessionTestCase
21 from IPython.parallel import streamsession as ss
22
23 class SessionTestCase(BaseZMQTestCase):
24
25 def setUp(self):
26 BaseZMQTestCase.setUp(self)
27 self.session = ss.StreamSession()
28
29 class TestSession(SessionTestCase):
30
31 def test_msg(self):
32 """message format"""
33 msg = self.session.msg('execute')
34 thekeys = set('header msg_id parent_header msg_type content'.split())
35 s = set(msg.keys())
36 self.assertEquals(s, thekeys)
37 self.assertTrue(isinstance(msg['content'],dict))
38 self.assertTrue(isinstance(msg['header'],dict))
39 self.assertTrue(isinstance(msg['parent_header'],dict))
40 self.assertEquals(msg['msg_type'], 'execute')
41
42
43
44 def test_args(self):
45 """initialization arguments for StreamSession"""
46 s = self.session
47 self.assertTrue(s.pack is ss.default_packer)
48 self.assertTrue(s.unpack is ss.default_unpacker)
49 self.assertEquals(s.username, os.environ.get('USER', 'username'))
50
51 s = ss.StreamSession(username=None)
52 self.assertEquals(s.username, os.environ.get('USER', 'username'))
53
54 self.assertRaises(TypeError, ss.StreamSession, packer='hi')
55 self.assertRaises(TypeError, ss.StreamSession, unpacker='hi')
56 u = str(uuid.uuid4())
57 s = ss.StreamSession(username='carrot', session=u)
58 self.assertEquals(s.session, u)
59 self.assertEquals(s.username, 'carrot')
60
61 def test_tracking(self):
62 """test tracking messages"""
63 a,b = self.create_bound_pair(zmq.PAIR, zmq.PAIR)
64 s = self.session
65 stream = ZMQStream(a)
66 msg = s.send(a, 'hello', track=False)
67 self.assertTrue(msg['tracker'] is None)
68 msg = s.send(a, 'hello', track=True)
69 self.assertTrue(isinstance(msg['tracker'], zmq.MessageTracker))
70 M = zmq.Message(b'hi there', track=True)
71 msg = s.send(a, 'hello', buffers=[M], track=True)
72 t = msg['tracker']
73 self.assertTrue(isinstance(t, zmq.MessageTracker))
74 self.assertRaises(zmq.NotDone, t.wait, .1)
75 del M
76 t.wait(1) # this will raise
77
78
79 # def test_rekey(self):
80 # """rekeying dict around json str keys"""
81 # d = {'0': uuid.uuid4(), 0:uuid.uuid4()}
82 # self.assertRaises(KeyError, ss.rekey, d)
83 #
84 # d = {'0': uuid.uuid4(), 1:uuid.uuid4(), 'asdf':uuid.uuid4()}
85 # d2 = {0:d['0'],1:d[1],'asdf':d['asdf']}
86 # rd = ss.rekey(d)
87 # self.assertEquals(d2,rd)
88 #
89 # d = {'1.5':uuid.uuid4(),'1':uuid.uuid4()}
90 # d2 = {1.5:d['1.5'],1:d['1']}
91 # rd = ss.rekey(d)
92 # self.assertEquals(d2,rd)
93 #
94 # d = {'1.0':uuid.uuid4(),'1':uuid.uuid4()}
95 # self.assertRaises(KeyError, ss.rekey, d)
96 #
97 def test_unique_msg_ids(self):
98 """test that messages receive unique ids"""
99 ids = set()
100 for i in range(2**12):
101 h = self.session.msg_header('test')
102 msg_id = h['msg_id']
103 self.assertTrue(msg_id not in ids)
104 ids.add(msg_id)
105
106 def test_feed_identities(self):
107 """scrub the front for zmq IDENTITIES"""
108 theids = "engine client other".split()
109 content = dict(code='whoda',stuff=object())
110 themsg = self.session.msg('execute',content=content)
111 pmsg = theids
@@ -0,0 +1,301 b''
1 """test View objects"""
2 #-------------------------------------------------------------------------------
3 # Copyright (C) 2011 The IPython Development Team
4 #
5 # Distributed under the terms of the BSD License. The full license is in
6 # the file COPYING, distributed as part of this software.
7 #-------------------------------------------------------------------------------
8
9 #-------------------------------------------------------------------------------
10 # Imports
11 #-------------------------------------------------------------------------------
12
13 import time
14 from tempfile import mktemp
15
16 import zmq
17
18 from IPython import parallel as pmod
19 from IPython.parallel import error
20 from IPython.parallel import AsyncResult, AsyncHubResult, AsyncMapResult
21 from IPython.parallel import LoadBalancedView, DirectView
22 from IPython.parallel.util import interactive
23
24 from IPython.parallel.tests import add_engines
25
26 from .clienttest import ClusterTestCase, segfault, wait, skip_without
27
28 def setup():
29 add_engines(3)
30
31 class TestView(ClusterTestCase):
32
33 def test_segfault_task(self):
34 """test graceful handling of engine death (balanced)"""
35 # self.add_engines(1)
36 ar = self.client[-1].apply_async(segfault)
37 self.assertRaisesRemote(error.EngineError, ar.get)
38 eid = ar.engine_id
39 while eid in self.client.ids:
40 time.sleep(.01)
41 self.client.spin()
42
43 def test_segfault_mux(self):
44 """test graceful handling of engine death (direct)"""
45 # self.add_engines(1)
46 eid = self.client.ids[-1]
47 ar = self.client[eid].apply_async(segfault)
48 self.assertRaisesRemote(error.EngineError, ar.get)
49 eid = ar.engine_id
50 while eid in self.client.ids:
51 time.sleep(.01)
52 self.client.spin()
53
54 def test_push_pull(self):
55 """test pushing and pulling"""
56 data = dict(a=10, b=1.05, c=range(10), d={'e':(1,2),'f':'hi'})
57 t = self.client.ids[-1]
58 v = self.client[t]
59 push = v.push
60 pull = v.pull
61 v.block=True
62 nengines = len(self.client)
63 push({'data':data})
64 d = pull('data')
65 self.assertEquals(d, data)
66 self.client[:].push({'data':data})
67 d = self.client[:].pull('data', block=True)
68 self.assertEquals(d, nengines*[data])
69 ar = push({'data':data}, block=False)
70 self.assertTrue(isinstance(ar, AsyncResult))
71 r = ar.get()
72 ar = self.client[:].pull('data', block=False)
73 self.assertTrue(isinstance(ar, AsyncResult))
74 r = ar.get()
75 self.assertEquals(r, nengines*[data])
76 self.client[:].push(dict(a=10,b=20))
77 r = self.client[:].pull(('a','b'))
78 self.assertEquals(r, nengines*[[10,20]])
79
80 def test_push_pull_function(self):
81 "test pushing and pulling functions"
82 def testf(x):
83 return 2.0*x
84
85 t = self.client.ids[-1]
86 self.client[t].block=True
87 push = self.client[t].push
88 pull = self.client[t].pull
89 execute = self.client[t].execute
90 push({'testf':testf})
91 r = pull('testf')
92 self.assertEqual(r(1.0), testf(1.0))
93 execute('r = testf(10)')
94 r = pull('r')
95 self.assertEquals(r, testf(10))
96 ar = self.client[:].push({'testf':testf}, block=False)
97 ar.get()
98 ar = self.client[:].pull('testf', block=False)
99 rlist = ar.get()
100 for r in rlist:
101 self.assertEqual(r(1.0), testf(1.0))
102 execute("def g(x): return x*x")
103 r = pull(('testf','g'))
104 self.assertEquals((r[0](10),r[1](10)), (testf(10), 100))
105
106 def test_push_function_globals(self):
107 """test that pushed functions have access to globals"""
108 @interactive
109 def geta():
110 return a
111 # self.add_engines(1)
112 v = self.client[-1]
113 v.block=True
114 v['f'] = geta
115 self.assertRaisesRemote(NameError, v.execute, 'b=f()')
116 v.execute('a=5')
117 v.execute('b=f()')
118 self.assertEquals(v['b'], 5)
119
120 def test_push_function_defaults(self):
121 """test that pushed functions preserve default args"""
122 def echo(a=10):
123 return a
124 v = self.client[-1]
125 v.block=True
126 v['f'] = echo
127 v.execute('b=f()')
128 self.assertEquals(v['b'], 10)
129
130 def test_get_result(self):
131 """test getting results from the Hub."""
132 c = pmod.Client(profile='iptest')
133 # self.add_engines(1)
134 t = c.ids[-1]
135 v = c[t]
136 v2 = self.client[t]
137 ar = v.apply_async(wait, 1)
138 # give the monitor time to notice the message
139 time.sleep(.25)
140 ahr = v2.get_result(ar.msg_ids)
141 self.assertTrue(isinstance(ahr, AsyncHubResult))
142 self.assertEquals(ahr.get(), ar.get())
143 ar2 = v2.get_result(ar.msg_ids)
144 self.assertFalse(isinstance(ar2, AsyncHubResult))
145 c.spin()
146 c.close()
147
148 def test_run_newline(self):
149 """test that run appends newline to files"""
150 tmpfile = mktemp()
151 with open(tmpfile, 'w') as f:
152 f.write("""def g():
153 return 5
154 """)
155 v = self.client[-1]
156 v.run(tmpfile, block=True)
157 self.assertEquals(v.apply_sync(lambda f: f(), pmod.Reference('g')), 5)
158
159 def test_apply_tracked(self):
160 """test tracking for apply"""
161 # self.add_engines(1)
162 t = self.client.ids[-1]
163 v = self.client[t]
164 v.block=False
165 def echo(n=1024*1024, **kwargs):
166 with v.temp_flags(**kwargs):
167 return v.apply(lambda x: x, 'x'*n)
168 ar = echo(1, track=False)
169 self.assertTrue(isinstance(ar._tracker, zmq.MessageTracker))
170 self.assertTrue(ar.sent)
171 ar = echo(track=True)
172 self.assertTrue(isinstance(ar._tracker, zmq.MessageTracker))
173 self.assertEquals(ar.sent, ar._tracker.done)
174 ar._tracker.wait()
175 self.assertTrue(ar.sent)
176
177 def test_push_tracked(self):
178 t = self.client.ids[-1]
179 ns = dict(x='x'*1024*1024)
180 v = self.client[t]
181 ar = v.push(ns, block=False, track=False)
182 self.assertTrue(isinstance(ar._tracker, zmq.MessageTracker))
183 self.assertTrue(ar.sent)
184
185 ar = v.push(ns, block=False, track=True)
186 self.assertTrue(isinstance(ar._tracker, zmq.MessageTracker))
187 self.assertEquals(ar.sent, ar._tracker.done)
188 ar._tracker.wait()
189 self.assertTrue(ar.sent)
190 ar.get()
191
192 def test_scatter_tracked(self):
193 t = self.client.ids
194 x='x'*1024*1024
195 ar = self.client[t].scatter('x', x, block=False, track=False)
196 self.assertTrue(isinstance(ar._tracker, zmq.MessageTracker))
197 self.assertTrue(ar.sent)
198
199 ar = self.client[t].scatter('x', x, block=False, track=True)
200 self.assertTrue(isinstance(ar._tracker, zmq.MessageTracker))
201 self.assertEquals(ar.sent, ar._tracker.done)
202 ar._tracker.wait()
203 self.assertTrue(ar.sent)
204 ar.get()
205
206 def test_remote_reference(self):
207 v = self.client[-1]
208 v['a'] = 123
209 ra = pmod.Reference('a')
210 b = v.apply_sync(lambda x: x, ra)
211 self.assertEquals(b, 123)
212
213
214 def test_scatter_gather(self):
215 view = self.client[:]
216 seq1 = range(16)
217 view.scatter('a', seq1)
218 seq2 = view.gather('a', block=True)
219 self.assertEquals(seq2, seq1)
220 self.assertRaisesRemote(NameError, view.gather, 'asdf', block=True)
221
222 @skip_without('numpy')
223 def test_scatter_gather_numpy(self):
224 import numpy
225 from numpy.testing.utils import assert_array_equal, assert_array_almost_equal
226 view = self.client[:]
227 a = numpy.arange(64)
228 view.scatter('a', a)
229 b = view.gather('a', block=True)
230 assert_array_equal(b, a)
231
232 def test_map(self):
233 view = self.client[:]
234 def f(x):
235 return x**2
236 data = range(16)
237 r = view.map_sync(f, data)
238 self.assertEquals(r, map(f, data))
239
240 def test_scatterGatherNonblocking(self):
241 data = range(16)
242 view = self.client[:]
243 view.scatter('a', data, block=False)
244 ar = view.gather('a', block=False)
245 self.assertEquals(ar.get(), data)
246
247 @skip_without('numpy')
248 def test_scatter_gather_numpy_nonblocking(self):
249 import numpy
250 from numpy.testing.utils import assert_array_equal, assert_array_almost_equal
251 a = numpy.arange(64)
252 view = self.client[:]
253 ar = view.scatter('a', a, block=False)
254 self.assertTrue(isinstance(ar, AsyncResult))
255 amr = view.gather('a', block=False)
256 self.assertTrue(isinstance(amr, AsyncMapResult))
257 assert_array_equal(amr.get(), a)
258
259 def test_execute(self):
260 view = self.client[:]
261 # self.client.debug=True
262 execute = view.execute
263 ar = execute('c=30', block=False)
264 self.assertTrue(isinstance(ar, AsyncResult))
265 ar = execute('d=[0,1,2]', block=False)
266 self.client.wait(ar, 1)
267 self.assertEquals(len(ar.get()), len(self.client))
268 for c in view['c']:
269 self.assertEquals(c, 30)
270
271 def test_abort(self):
272 view = self.client[-1]
273 ar = view.execute('import time; time.sleep(0.25)', block=False)
274 ar2 = view.apply_async(lambda : 2)
275 ar3 = view.apply_async(lambda : 3)
276 view.abort(ar2)
277 view.abort(ar3.msg_ids)
278 self.assertRaises(error.TaskAborted, ar2.get)
279 self.assertRaises(error.TaskAborted, ar3.get)
280
281 def test_temp_flags(self):
282 view = self.client[-1]
283 view.block=True
284 with view.temp_flags(block=False):
285 self.assertFalse(view.block)
286 self.assertTrue(view.block)
287
288 def test_importer(self):
289 view = self.client[-1]
290 view.clear(block=True)
291 with view.importer:
292 import re
293
294 @interactive
295 def findall(pat, s):
296 # this globals() step isn't necessary in real code
297 # only to prevent a closure in the test
298 return globals()['re'].findall(pat, s)
299
300 self.assertEquals(view.apply_sync(findall, '\w+', 'hello world'), 'hello world'.split())
301
@@ -0,0 +1,462 b''
1 """some generic utilities for dealing with classes, urls, and serialization"""
2 #-----------------------------------------------------------------------------
3 # Copyright (C) 2010-2011 The IPython Development Team
4 #
5 # Distributed under the terms of the BSD License. The full license is in
6 # the file COPYING, distributed as part of this software.
7 #-----------------------------------------------------------------------------
8
9 #-----------------------------------------------------------------------------
10 # Imports
11 #-----------------------------------------------------------------------------
12
13 # Standard library imports.
14 import logging
15 import os
16 import re
17 import stat
18 import socket
19 import sys
20 from signal import signal, SIGINT, SIGABRT, SIGTERM
21 try:
22 from signal import SIGKILL
23 except ImportError:
24 SIGKILL=None
25
26 try:
27 import cPickle
28 pickle = cPickle
29 except:
30 cPickle = None
31 import pickle
32
33 # System library imports
34 import zmq
35 from zmq.log import handlers
36
37 # IPython imports
38 from IPython.utils.pickleutil import can, uncan, canSequence, uncanSequence
39 from IPython.utils.newserialized import serialize, unserialize
40 from IPython.zmq.log import EnginePUBHandler
41
42 # globals
43 ISO8601="%Y-%m-%dT%H:%M:%S.%f"
44
45 #-----------------------------------------------------------------------------
46 # Classes
47 #-----------------------------------------------------------------------------
48
49 class Namespace(dict):
50 """Subclass of dict for attribute access to keys."""
51
52 def __getattr__(self, key):
53 """getattr aliased to getitem"""
54 if key in self.iterkeys():
55 return self[key]
56 else:
57 raise NameError(key)
58
59 def __setattr__(self, key, value):
60 """setattr aliased to setitem, with strict"""
61 if hasattr(dict, key):
62 raise KeyError("Cannot override dict keys %r"%key)
63 self[key] = value
64
65
66 class ReverseDict(dict):
67 """simple double-keyed subset of dict methods."""
68
69 def __init__(self, *args, **kwargs):
70 dict.__init__(self, *args, **kwargs)
71 self._reverse = dict()
72 for key, value in self.iteritems():
73 self._reverse[value] = key
74
75 def __getitem__(self, key):
76 try:
77 return dict.__getitem__(self, key)
78 except KeyError:
79 return self._reverse[key]
80
81 def __setitem__(self, key, value):
82 if key in self._reverse:
83 raise KeyError("Can't have key %r on both sides!"%key)
84 dict.__setitem__(self, key, value)
85 self._reverse[value] = key
86
87 def pop(self, key):
88 value = dict.pop(self, key)
89 self._reverse.pop(value)
90 return value
91
92 def get(self, key, default=None):
93 try:
94 return self[key]
95 except KeyError:
96 return default
97
98 #-----------------------------------------------------------------------------
99 # Functions
100 #-----------------------------------------------------------------------------
101
102 def validate_url(url):
103 """validate a url for zeromq"""
104 if not isinstance(url, basestring):
105 raise TypeError("url must be a string, not %r"%type(url))
106 url = url.lower()
107
108 proto_addr = url.split('://')
109 assert len(proto_addr) == 2, 'Invalid url: %r'%url
110 proto, addr = proto_addr
111 assert proto in ['tcp','pgm','epgm','ipc','inproc'], "Invalid protocol: %r"%proto
112
113 # domain pattern adapted from http://www.regexlib.com/REDetails.aspx?regexp_id=391
114 # author: Remi Sabourin
115 pat = re.compile(r'^([\w\d]([\w\d\-]{0,61}[\w\d])?\.)*[\w\d]([\w\d\-]{0,61}[\w\d])?$')
116
117 if proto == 'tcp':
118 lis = addr.split(':')
119 assert len(lis) == 2, 'Invalid url: %r'%url
120 addr,s_port = lis
121 try:
122 port = int(s_port)
123 except ValueError:
124 raise AssertionError("Invalid port %r in url: %r"%(port, url))
125
126 assert addr == '*' or pat.match(addr) is not None, 'Invalid url: %r'%url
127
128 else:
129 # only validate tcp urls currently
130 pass
131
132 return True
133
134
135 def validate_url_container(container):
136 """validate a potentially nested collection of urls."""
137 if isinstance(container, basestring):
138 url = container
139 return validate_url(url)
140 elif isinstance(container, dict):
141 container = container.itervalues()
142
143 for element in container:
144 validate_url_container(element)
145
146
147 def split_url(url):
148 """split a zmq url (tcp://ip:port) into ('tcp','ip','port')."""
149 proto_addr = url.split('://')
150 assert len(proto_addr) == 2, 'Invalid url: %r'%url
151 proto, addr = proto_addr
152 lis = addr.split(':')
153 assert len(lis) == 2, 'Invalid url: %r'%url
154 addr,s_port = lis
155 return proto,addr,s_port
156
157 def disambiguate_ip_address(ip, location=None):
158 """turn multi-ip interfaces '0.0.0.0' and '*' into connectable
159 ones, based on the location (default interpretation of location is localhost)."""
160 if ip in ('0.0.0.0', '*'):
161 external_ips = socket.gethostbyname_ex(socket.gethostname())[2]
162 if location is None or location in external_ips:
163 ip='127.0.0.1'
164 elif location:
165 return location
166 return ip
167
168 def disambiguate_url(url, location=None):
169 """turn multi-ip interfaces '0.0.0.0' and '*' into connectable
170 ones, based on the location (default interpretation is localhost).
171
172 This is for zeromq urls, such as tcp://*:10101."""
173 try:
174 proto,ip,port = split_url(url)
175 except AssertionError:
176 # probably not tcp url; could be ipc, etc.
177 return url
178
179 ip = disambiguate_ip_address(ip,location)
180
181 return "%s://%s:%s"%(proto,ip,port)
182
183
184 def rekey(dikt):
185 """Rekey a dict that has been forced to use str keys where there should be
186 ints by json. This belongs in the jsonutil added by fperez."""
187 for k in dikt.iterkeys():
188 if isinstance(k, str):
189 ik=fk=None
190 try:
191 ik = int(k)
192 except ValueError:
193 try:
194 fk = float(k)
195 except ValueError:
196 continue
197 if ik is not None:
198 nk = ik
199 else:
200 nk = fk
201 if nk in dikt:
202 raise KeyError("already have key %r"%nk)
203 dikt[nk] = dikt.pop(k)
204 return dikt
205
206 def serialize_object(obj, threshold=64e-6):
207 """Serialize an object into a list of sendable buffers.
208
209 Parameters
210 ----------
211
212 obj : object
213 The object to be serialized
214 threshold : float
215 The threshold for not double-pickling the content.
216
217
218 Returns
219 -------
220 ('pmd', [bufs]) :
221 where pmd is the pickled metadata wrapper,
222 bufs is a list of data buffers
223 """
224 databuffers = []
225 if isinstance(obj, (list, tuple)):
226 clist = canSequence(obj)
227 slist = map(serialize, clist)
228 for s in slist:
229 if s.typeDescriptor in ('buffer', 'ndarray') or s.getDataSize() > threshold:
230 databuffers.append(s.getData())
231 s.data = None
232 return pickle.dumps(slist,-1), databuffers
233 elif isinstance(obj, dict):
234 sobj = {}
235 for k in sorted(obj.iterkeys()):
236 s = serialize(can(obj[k]))
237 if s.typeDescriptor in ('buffer', 'ndarray') or s.getDataSize() > threshold:
238 databuffers.append(s.getData())
239 s.data = None
240 sobj[k] = s
241 return pickle.dumps(sobj,-1),databuffers
242 else:
243 s = serialize(can(obj))
244 if s.typeDescriptor in ('buffer', 'ndarray') or s.getDataSize() > threshold:
245 databuffers.append(s.getData())
246 s.data = None
247 return pickle.dumps(s,-1),databuffers
248
249
250 def unserialize_object(bufs):
251 """reconstruct an object serialized by serialize_object from data buffers."""
252 bufs = list(bufs)
253 sobj = pickle.loads(bufs.pop(0))
254 if isinstance(sobj, (list, tuple)):
255 for s in sobj:
256 if s.data is None:
257 s.data = bufs.pop(0)
258 return uncanSequence(map(unserialize, sobj)), bufs
259 elif isinstance(sobj, dict):
260 newobj = {}
261 for k in sorted(sobj.iterkeys()):
262 s = sobj[k]
263 if s.data is None:
264 s.data = bufs.pop(0)
265 newobj[k] = uncan(unserialize(s))
266 return newobj, bufs
267 else:
268 if sobj.data is None:
269 sobj.data = bufs.pop(0)
270 return uncan(unserialize(sobj)), bufs
271
272 def pack_apply_message(f, args, kwargs, threshold=64e-6):
273 """pack up a function, args, and kwargs to be sent over the wire
274 as a series of buffers. Any object whose data is larger than `threshold`
275 will not have their data copied (currently only numpy arrays support zero-copy)"""
276 msg = [pickle.dumps(can(f),-1)]
277 databuffers = [] # for large objects
278 sargs, bufs = serialize_object(args,threshold)
279 msg.append(sargs)
280 databuffers.extend(bufs)
281 skwargs, bufs = serialize_object(kwargs,threshold)
282 msg.append(skwargs)
283 databuffers.extend(bufs)
284 msg.extend(databuffers)
285 return msg
286
287 def unpack_apply_message(bufs, g=None, copy=True):
288 """unpack f,args,kwargs from buffers packed by pack_apply_message()
289 Returns: original f,args,kwargs"""
290 bufs = list(bufs) # allow us to pop
291 assert len(bufs) >= 3, "not enough buffers!"
292 if not copy:
293 for i in range(3):
294 bufs[i] = bufs[i].bytes
295 cf = pickle.loads(bufs.pop(0))
296 sargs = list(pickle.loads(bufs.pop(0)))
297 skwargs = dict(pickle.loads(bufs.pop(0)))
298 # print sargs, skwargs
299 f = uncan(cf, g)
300 for sa in sargs:
301 if sa.data is None:
302 m = bufs.pop(0)
303 if sa.getTypeDescriptor() in ('buffer', 'ndarray'):
304 if copy:
305 sa.data = buffer(m)
306 else:
307 sa.data = m.buffer
308 else:
309 if copy:
310 sa.data = m
311 else:
312 sa.data = m.bytes
313
314 args = uncanSequence(map(unserialize, sargs), g)
315 kwargs = {}
316 for k in sorted(skwargs.iterkeys()):
317 sa = skwargs[k]
318 if sa.data is None:
319 m = bufs.pop(0)
320 if sa.getTypeDescriptor() in ('buffer', 'ndarray'):
321 if copy:
322 sa.data = buffer(m)
323 else:
324 sa.data = m.buffer
325 else:
326 if copy:
327 sa.data = m
328 else:
329 sa.data = m.bytes
330
331 kwargs[k] = uncan(unserialize(sa), g)
332
333 return f,args,kwargs
334
335 #--------------------------------------------------------------------------
336 # helpers for implementing old MEC API via view.apply
337 #--------------------------------------------------------------------------
338
339 def interactive(f):
340 """decorator for making functions appear as interactively defined.
341 This results in the function being linked to the user_ns as globals()
342 instead of the module globals().
343 """
344 f.__module__ = '__main__'
345 return f
346
347 @interactive
348 def _push(ns):
349 """helper method for implementing `client.push` via `client.apply`"""
350 globals().update(ns)
351
352 @interactive
353 def _pull(keys):
354 """helper method for implementing `client.pull` via `client.apply`"""
355 user_ns = globals()
356 if isinstance(keys, (list,tuple, set)):
357 for key in keys:
358 if not user_ns.has_key(key):
359 raise NameError("name '%s' is not defined"%key)
360 return map(user_ns.get, keys)
361 else:
362 if not user_ns.has_key(keys):
363 raise NameError("name '%s' is not defined"%keys)
364 return user_ns.get(keys)
365
366 @interactive
367 def _execute(code):
368 """helper method for implementing `client.execute` via `client.apply`"""
369 exec code in globals()
370
371 #--------------------------------------------------------------------------
372 # extra process management utilities
373 #--------------------------------------------------------------------------
374
375 _random_ports = set()
376
377 def select_random_ports(n):
378 """Selects and return n random ports that are available."""
379 ports = []
380 for i in xrange(n):
381 sock = socket.socket()
382 sock.bind(('', 0))
383 while sock.getsockname()[1] in _random_ports:
384 sock.close()
385 sock = socket.socket()
386 sock.bind(('', 0))
387 ports.append(sock)
388 for i, sock in enumerate(ports):
389 port = sock.getsockname()[1]
390 sock.close()
391 ports[i] = port
392 _random_ports.add(port)
393 return ports
394
395 def signal_children(children):
396 """Relay interupt/term signals to children, for more solid process cleanup."""
397 def terminate_children(sig, frame):
398 logging.critical("Got signal %i, terminating children..."%sig)
399 for child in children:
400 child.terminate()
401
402 sys.exit(sig != SIGINT)
403 # sys.exit(sig)
404 for sig in (SIGINT, SIGABRT, SIGTERM):
405 signal(sig, terminate_children)
406
407 def generate_exec_key(keyfile):
408 import uuid
409 newkey = str(uuid.uuid4())
410 with open(keyfile, 'w') as f:
411 # f.write('ipython-key ')
412 f.write(newkey+'\n')
413 # set user-only RW permissions (0600)
414 # this will have no effect on Windows
415 os.chmod(keyfile, stat.S_IRUSR|stat.S_IWUSR)
416
417
418 def integer_loglevel(loglevel):
419 try:
420 loglevel = int(loglevel)
421 except ValueError:
422 if isinstance(loglevel, str):
423 loglevel = getattr(logging, loglevel)
424 return loglevel
425
426 def connect_logger(logname, context, iface, root="ip", loglevel=logging.DEBUG):
427 logger = logging.getLogger(logname)
428 if any([isinstance(h, handlers.PUBHandler) for h in logger.handlers]):
429 # don't add a second PUBHandler
430 return
431 loglevel = integer_loglevel(loglevel)
432 lsock = context.socket(zmq.PUB)
433 lsock.connect(iface)
434 handler = handlers.PUBHandler(lsock)
435 handler.setLevel(loglevel)
436 handler.root_topic = root
437 logger.addHandler(handler)
438 logger.setLevel(loglevel)
439
440 def connect_engine_logger(context, iface, engine, loglevel=logging.DEBUG):
441 logger = logging.getLogger()
442 if any([isinstance(h, handlers.PUBHandler) for h in logger.handlers]):
443 # don't add a second PUBHandler
444 return
445 loglevel = integer_loglevel(loglevel)
446 lsock = context.socket(zmq.PUB)
447 lsock.connect(iface)
448 handler = EnginePUBHandler(engine, lsock)
449 handler.setLevel(loglevel)
450 logger.addHandler(handler)
451 logger.setLevel(loglevel)
452
453 def local_logger(logname, loglevel=logging.DEBUG):
454 loglevel = integer_loglevel(loglevel)
455 logger = logging.getLogger(logname)
456 if any([isinstance(h, logging.StreamHandler) for h in logger.handlers]):
457 # don't add a second StreamHandler
458 return
459 handler = logging.StreamHandler()
460 handler.setLevel(loglevel)
461 logger.addHandler(handler)
462 logger.setLevel(loglevel)
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100755
NO CONTENT: new file 100755
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100755
NO CONTENT: new file 100755
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100755
NO CONTENT: new file 100755
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100755
NO CONTENT: new file 100755
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100755
NO CONTENT: new file 100755
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644, binary diff hidden
NO CONTENT: new file 100644, binary diff hidden
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
@@ -1,11 +1,12 b''
1 build
1 build
2 ./dist
2 ./dist
3 docs/dist
3 docs/dist
4 docs/build/*
4 docs/build
5 docs/_build
5 docs/source/api/generated
6 docs/source/api/generated
6 docs/gh-pages
7 docs/gh-pages
7 *.py[co]
8 *.py[co]
8 build
9 build
9 *.egg-info
10 *.egg-info
10 *~
11 *~
11 *.bak
12 *.bak
@@ -1,184 +1,241 b''
1 import os
1 import os
2
2
3 c = get_config()
3 c = get_config()
4
4
5 #-----------------------------------------------------------------------------
5 #-----------------------------------------------------------------------------
6 # Select which launchers to use
6 # Select which launchers to use
7 #-----------------------------------------------------------------------------
7 #-----------------------------------------------------------------------------
8
8
9 # This allows you to control what method is used to start the controller
9 # This allows you to control what method is used to start the controller
10 # and engines. The following methods are currently supported:
10 # and engines. The following methods are currently supported:
11 # - Start as a regular process on localhost.
11 # - Start as a regular process on localhost.
12 # - Start using mpiexec.
12 # - Start using mpiexec.
13 # - Start using the Windows HPC Server 2008 scheduler
13 # - Start using the Windows HPC Server 2008 scheduler
14 # - Start using PBS
14 # - Start using PBS/SGE
15 # - Start using SSH (currently broken)
15 # - Start using SSH
16
16
17
17
18 # The selected launchers can be configured below.
18 # The selected launchers can be configured below.
19
19
20 # Options are:
20 # Options are:
21 # - LocalControllerLauncher
21 # - LocalControllerLauncher
22 # - MPIExecControllerLauncher
22 # - MPIExecControllerLauncher
23 # - PBSControllerLauncher
23 # - PBSControllerLauncher
24 # - SGEControllerLauncher
24 # - WindowsHPCControllerLauncher
25 # - WindowsHPCControllerLauncher
25 # c.Global.controller_launcher = 'IPython.kernel.launcher.LocalControllerLauncher'
26 # c.Global.controller_launcher = 'IPython.parallel.apps.launcher.LocalControllerLauncher'
27 # c.Global.controller_launcher = 'IPython.parallel.apps.launcher.PBSControllerLauncher'
26
28
27 # Options are:
29 # Options are:
28 # - LocalEngineSetLauncher
30 # - LocalEngineSetLauncher
29 # - MPIExecEngineSetLauncher
31 # - MPIExecEngineSetLauncher
30 # - PBSEngineSetLauncher
32 # - PBSEngineSetLauncher
33 # - SGEEngineSetLauncher
31 # - WindowsHPCEngineSetLauncher
34 # - WindowsHPCEngineSetLauncher
32 # c.Global.engine_launcher = 'IPython.kernel.launcher.LocalEngineSetLauncher'
35 # c.Global.engine_launcher = 'IPython.parallel.apps.launcher.LocalEngineSetLauncher'
33
36
34 #-----------------------------------------------------------------------------
37 #-----------------------------------------------------------------------------
35 # Global configuration
38 # Global configuration
36 #-----------------------------------------------------------------------------
39 #-----------------------------------------------------------------------------
37
40
38 # The default number of engines that will be started. This is overridden by
41 # The default number of engines that will be started. This is overridden by
39 # the -n command line option: "ipcluster start -n 4"
42 # the -n command line option: "ipcluster start -n 4"
40 # c.Global.n = 2
43 # c.Global.n = 2
41
44
42 # Log to a file in cluster_dir/log, otherwise just log to sys.stdout.
45 # Log to a file in cluster_dir/log, otherwise just log to sys.stdout.
43 # c.Global.log_to_file = False
46 # c.Global.log_to_file = False
44
47
45 # Remove old logs from cluster_dir/log before starting.
48 # Remove old logs from cluster_dir/log before starting.
46 # c.Global.clean_logs = True
49 # c.Global.clean_logs = True
47
50
48 # The working directory for the process. The application will use os.chdir
51 # The working directory for the process. The application will use os.chdir
49 # to change to this directory before starting.
52 # to change to this directory before starting.
50 # c.Global.work_dir = os.getcwd()
53 # c.Global.work_dir = os.getcwd()
51
54
52
55
53 #-----------------------------------------------------------------------------
56 #-----------------------------------------------------------------------------
54 # Local process launchers
57 # Local process launchers
55 #-----------------------------------------------------------------------------
58 #-----------------------------------------------------------------------------
56
59
57 # The command line arguments to call the controller with.
60 # The command line arguments to call the controller with.
58 # c.LocalControllerLauncher.controller_args = \
61 # c.LocalControllerLauncher.controller_args = \
59 # ['--log-to-file','--log-level', '40']
62 # ['--log-to-file','--log-level', '40']
60
63
61 # The working directory for the controller
64 # The working directory for the controller
62 # c.LocalEngineSetLauncher.work_dir = u''
65 # c.LocalEngineSetLauncher.work_dir = u''
63
66
64 # Command line argument passed to the engines.
67 # Command line argument passed to the engines.
65 # c.LocalEngineSetLauncher.engine_args = ['--log-to-file','--log-level', '40']
68 # c.LocalEngineSetLauncher.engine_args = ['--log-to-file','--log-level', '40']
66
69
67 #-----------------------------------------------------------------------------
70 #-----------------------------------------------------------------------------
68 # MPIExec launchers
71 # MPIExec launchers
69 #-----------------------------------------------------------------------------
72 #-----------------------------------------------------------------------------
70
73
71 # The mpiexec/mpirun command to use in started the controller.
74 # The mpiexec/mpirun command to use in both the controller and engines.
72 # c.MPIExecControllerLauncher.mpi_cmd = ['mpiexec']
75 # c.MPIExecLauncher.mpi_cmd = ['mpiexec']
73
76
74 # Additional arguments to pass to the actual mpiexec command.
77 # Additional arguments to pass to the actual mpiexec command.
78 # c.MPIExecLauncher.mpi_args = []
79
80 # The mpiexec/mpirun command and args can be overridden if they should be different
81 # for controller and engines.
82 # c.MPIExecControllerLauncher.mpi_cmd = ['mpiexec']
75 # c.MPIExecControllerLauncher.mpi_args = []
83 # c.MPIExecControllerLauncher.mpi_args = []
84 # c.MPIExecEngineSetLauncher.mpi_cmd = ['mpiexec']
85 # c.MPIExecEngineSetLauncher.mpi_args = []
76
86
77 # The command line argument to call the controller with.
87 # The command line argument to call the controller with.
78 # c.MPIExecControllerLauncher.controller_args = \
88 # c.MPIExecControllerLauncher.controller_args = \
79 # ['--log-to-file','--log-level', '40']
89 # ['--log-to-file','--log-level', '40']
80
90
81
82 # The mpiexec/mpirun command to use in started the controller.
83 # c.MPIExecEngineSetLauncher.mpi_cmd = ['mpiexec']
84
85 # Additional arguments to pass to the actual mpiexec command.
86 # c.MPIExecEngineSetLauncher.mpi_args = []
87
88 # Command line argument passed to the engines.
91 # Command line argument passed to the engines.
89 # c.MPIExecEngineSetLauncher.engine_args = ['--log-to-file','--log-level', '40']
92 # c.MPIExecEngineSetLauncher.engine_args = ['--log-to-file','--log-level', '40']
90
93
91 # The default number of engines to start if not given elsewhere.
94 # The default number of engines to start if not given elsewhere.
92 # c.MPIExecEngineSetLauncher.n = 1
95 # c.MPIExecEngineSetLauncher.n = 1
93
96
94 #-----------------------------------------------------------------------------
97 #-----------------------------------------------------------------------------
95 # SSH launchers
98 # SSH launchers
96 #-----------------------------------------------------------------------------
99 #-----------------------------------------------------------------------------
97
100
98 # Todo
101 # ipclusterz can be used to launch controller and engines remotely via ssh.
102 # Note that currently ipclusterz does not do any file distribution, so if
103 # machines are not on a shared filesystem, config and json files must be
104 # distributed. For this reason, the reuse_files defaults to True on an
105 # ssh-launched Controller. This flag can be overridded by the program_args
106 # attribute of c.SSHControllerLauncher.
107
108 # set the ssh cmd for launching remote commands. The default is ['ssh']
109 # c.SSHLauncher.ssh_cmd = ['ssh']
110
111 # set the ssh cmd for launching remote commands. The default is ['ssh']
112 # c.SSHLauncher.ssh_args = ['tt']
113
114 # Set the user and hostname for the controller
115 # c.SSHControllerLauncher.hostname = 'controller.example.com'
116 # c.SSHControllerLauncher.user = os.environ.get('USER','username')
117
118 # Set the arguments to be passed to ipcontrollerz
119 # note that remotely launched ipcontrollerz will not get the contents of
120 # the local ipcontrollerz_config.py unless it resides on the *remote host*
121 # in the location specified by the --cluster_dir argument.
122 # c.SSHControllerLauncher.program_args = ['-r', '-ip', '0.0.0.0', '--cluster_dir', '/path/to/cd']
123
124 # Set the default args passed to ipenginez for SSH launched engines
125 # c.SSHEngineSetLauncher.engine_args = ['--mpi', 'mpi4py']
99
126
127 # SSH engines are launched as a dict of locations/n-engines.
128 # if a value is a tuple instead of an int, it is assumed to be of the form
129 # (n, [args]), setting the arguments to passed to ipenginez on `host`.
130 # otherwise, c.SSHEngineSetLauncher.engine_args will be used as the default.
131
132 # In this case, there will be 3 engines at my.example.com, and
133 # 2 at you@ipython.scipy.org with a special json connector location.
134 # c.SSHEngineSetLauncher.engines = {'my.example.com' : 3,
135 # 'you@ipython.scipy.org' : (2, ['-f', '/path/to/ipcontroller-engine.json']}
136 # }
100
137
101 #-----------------------------------------------------------------------------
138 #-----------------------------------------------------------------------------
102 # Unix batch (PBS) schedulers launchers
139 # Unix batch (PBS) schedulers launchers
103 #-----------------------------------------------------------------------------
140 #-----------------------------------------------------------------------------
104
141
142 # SGE and PBS are very similar. All configurables in this section called 'PBS*'
143 # also exist as 'SGE*'.
144
105 # The command line program to use to submit a PBS job.
145 # The command line program to use to submit a PBS job.
106 # c.PBSControllerLauncher.submit_command = 'qsub'
146 # c.PBSLauncher.submit_command = ['qsub']
107
147
108 # The command line program to use to delete a PBS job.
148 # The command line program to use to delete a PBS job.
109 # c.PBSControllerLauncher.delete_command = 'qdel'
149 # c.PBSLauncher.delete_command = ['qdel']
150
151 # The PBS queue in which the job should run
152 # c.PBSLauncher.queue = 'myqueue'
110
153
111 # A regular expression that takes the output of qsub and find the job id.
154 # A regular expression that takes the output of qsub and find the job id.
112 # c.PBSControllerLauncher.job_id_regexp = r'\d+'
155 # c.PBSLauncher.job_id_regexp = r'\d+'
156
157 # If for some reason the Controller and Engines have different options above, they
158 # can be set as c.PBSControllerLauncher.<option> etc.
159
160 # PBS and SGE have default templates, but you can specify your own, either as strings
161 # or from files, as described here:
113
162
114 # The batch submission script used to start the controller. This is where
163 # The batch submission script used to start the controller. This is where
115 # environment variables would be setup, etc. This string is interpolated using
164 # environment variables would be setup, etc. This string is interpreted using
116 # the Itpl module in IPython.external. Basically, you can use ${n} for the
165 # the Itpl module in IPython.external. Basically, you can use ${n} for the
117 # number of engine and ${cluster_dir} for the cluster_dir.
166 # number of engine and ${cluster_dir} for the cluster_dir.
118 # c.PBSControllerLauncher.batch_template = """"""
167 # c.PBSControllerLauncher.batch_template = """
168 # #PBS -N ipcontroller
169 # #PBS -q $queue
170 #
171 # ipcontrollerz --cluster-dir $cluster_dir
172 # """
173
174 # You can also load this template from a file
175 # c.PBSControllerLauncher.batch_template_file = u"/path/to/my/template.sh"
119
176
120 # The name of the instantiated batch script that will actually be used to
177 # The name of the instantiated batch script that will actually be used to
121 # submit the job. This will be written to the cluster directory.
178 # submit the job. This will be written to the cluster directory.
122 # c.PBSControllerLauncher.batch_file_name = u'pbs_batch_script_controller'
179 # c.PBSControllerLauncher.batch_file_name = u'pbs_controller'
123
124
125 # The command line program to use to submit a PBS job.
126 # c.PBSEngineSetLauncher.submit_command = 'qsub'
127
128 # The command line program to use to delete a PBS job.
129 # c.PBSEngineSetLauncher.delete_command = 'qdel'
130
131 # A regular expression that takes the output of qsub and find the job id.
132 # c.PBSEngineSetLauncher.job_id_regexp = r'\d+'
133
180
134 # The batch submission script used to start the engines. This is where
181 # The batch submission script used to start the engines. This is where
135 # environment variables would be setup, etc. This string is interpolated using
182 # environment variables would be setup, etc. This string is interpreted using
136 # the Itpl module in IPython.external. Basically, you can use ${n} for the
183 # the Itpl module in IPython.external. Basically, you can use ${n} for the
137 # number of engine and ${cluster_dir} for the cluster_dir.
184 # number of engine and ${cluster_dir} for the cluster_dir.
138 # c.PBSEngineSetLauncher.batch_template = """"""
185 # c.PBSEngineSetLauncher.batch_template = """
186 # #PBS -N ipcontroller
187 # #PBS -l nprocs=$n
188 #
189 # ipenginez --cluster-dir $cluster_dir$s
190 # """
191
192 # You can also load this template from a file
193 # c.PBSControllerLauncher.batch_template_file = u"/path/to/my/template.sh"
139
194
140 # The name of the instantiated batch script that will actually be used to
195 # The name of the instantiated batch script that will actually be used to
141 # submit the job. This will be written to the cluster directory.
196 # submit the job. This will be written to the cluster directory.
142 # c.PBSEngineSetLauncher.batch_file_name = u'pbs_batch_script_engines'
197 # c.PBSEngineSetLauncher.batch_file_name = u'pbs_engines'
198
199
143
200
144 #-----------------------------------------------------------------------------
201 #-----------------------------------------------------------------------------
145 # Windows HPC Server 2008 launcher configuration
202 # Windows HPC Server 2008 launcher configuration
146 #-----------------------------------------------------------------------------
203 #-----------------------------------------------------------------------------
147
204
148 # c.IPControllerJob.job_name = 'IPController'
205 # c.IPControllerJob.job_name = 'IPController'
149 # c.IPControllerJob.is_exclusive = False
206 # c.IPControllerJob.is_exclusive = False
150 # c.IPControllerJob.username = r'USERDOMAIN\USERNAME'
207 # c.IPControllerJob.username = r'USERDOMAIN\USERNAME'
151 # c.IPControllerJob.priority = 'Highest'
208 # c.IPControllerJob.priority = 'Highest'
152 # c.IPControllerJob.requested_nodes = ''
209 # c.IPControllerJob.requested_nodes = ''
153 # c.IPControllerJob.project = 'MyProject'
210 # c.IPControllerJob.project = 'MyProject'
154
211
155 # c.IPControllerTask.task_name = 'IPController'
212 # c.IPControllerTask.task_name = 'IPController'
156 # c.IPControllerTask.controller_cmd = [u'ipcontroller.exe']
213 # c.IPControllerTask.controller_cmd = [u'ipcontroller.exe']
157 # c.IPControllerTask.controller_args = ['--log-to-file', '--log-level', '40']
214 # c.IPControllerTask.controller_args = ['--log-to-file', '--log-level', '40']
158 # c.IPControllerTask.environment_variables = {}
215 # c.IPControllerTask.environment_variables = {}
159
216
160 # c.WindowsHPCControllerLauncher.scheduler = 'HEADNODE'
217 # c.WindowsHPCControllerLauncher.scheduler = 'HEADNODE'
161 # c.WindowsHPCControllerLauncher.job_file_name = u'ipcontroller_job.xml'
218 # c.WindowsHPCControllerLauncher.job_file_name = u'ipcontroller_job.xml'
162
219
163
220
164 # c.IPEngineSetJob.job_name = 'IPEngineSet'
221 # c.IPEngineSetJob.job_name = 'IPEngineSet'
165 # c.IPEngineSetJob.is_exclusive = False
222 # c.IPEngineSetJob.is_exclusive = False
166 # c.IPEngineSetJob.username = r'USERDOMAIN\USERNAME'
223 # c.IPEngineSetJob.username = r'USERDOMAIN\USERNAME'
167 # c.IPEngineSetJob.priority = 'Highest'
224 # c.IPEngineSetJob.priority = 'Highest'
168 # c.IPEngineSetJob.requested_nodes = ''
225 # c.IPEngineSetJob.requested_nodes = ''
169 # c.IPEngineSetJob.project = 'MyProject'
226 # c.IPEngineSetJob.project = 'MyProject'
170
227
171 # c.IPEngineTask.task_name = 'IPEngine'
228 # c.IPEngineTask.task_name = 'IPEngine'
172 # c.IPEngineTask.engine_cmd = [u'ipengine.exe']
229 # c.IPEngineTask.engine_cmd = [u'ipengine.exe']
173 # c.IPEngineTask.engine_args = ['--log-to-file', '--log-level', '40']
230 # c.IPEngineTask.engine_args = ['--log-to-file', '--log-level', '40']
174 # c.IPEngineTask.environment_variables = {}
231 # c.IPEngineTask.environment_variables = {}
175
232
176 # c.WindowsHPCEngineSetLauncher.scheduler = 'HEADNODE'
233 # c.WindowsHPCEngineSetLauncher.scheduler = 'HEADNODE'
177 # c.WindowsHPCEngineSetLauncher.job_file_name = u'ipengineset_job.xml'
234 # c.WindowsHPCEngineSetLauncher.job_file_name = u'ipengineset_job.xml'
178
235
179
236
180
237
181
238
182
239
183
240
184
241
@@ -1,136 +1,180 b''
1 from IPython.config.loader import Config
1 from IPython.config.loader import Config
2
2
3 c = get_config()
3 c = get_config()
4
4
5 #-----------------------------------------------------------------------------
5 #-----------------------------------------------------------------------------
6 # Global configuration
6 # Global configuration
7 #-----------------------------------------------------------------------------
7 #-----------------------------------------------------------------------------
8
8
9 # Basic Global config attributes
9 # Basic Global config attributes
10
10
11 # Start up messages are logged to stdout using the logging module.
11 # Start up messages are logged to stdout using the logging module.
12 # These all happen before the twisted reactor is started and are
12 # These all happen before the twisted reactor is started and are
13 # useful for debugging purposes. Can be (10=DEBUG,20=INFO,30=WARN,40=CRITICAL)
13 # useful for debugging purposes. Can be (10=DEBUG,20=INFO,30=WARN,40=CRITICAL)
14 # and smaller is more verbose.
14 # and smaller is more verbose.
15 # c.Global.log_level = 20
15 # c.Global.log_level = 20
16
16
17 # Log to a file in cluster_dir/log, otherwise just log to sys.stdout.
17 # Log to a file in cluster_dir/log, otherwise just log to sys.stdout.
18 # c.Global.log_to_file = False
18 # c.Global.log_to_file = False
19
19
20 # Remove old logs from cluster_dir/log before starting.
20 # Remove old logs from cluster_dir/log before starting.
21 # c.Global.clean_logs = True
21 # c.Global.clean_logs = True
22
22
23 # A list of Python statements that will be run before starting the
23 # A list of Python statements that will be run before starting the
24 # controller. This is provided because occasionally certain things need to
24 # controller. This is provided because occasionally certain things need to
25 # be imported in the controller for pickling to work.
25 # be imported in the controller for pickling to work.
26 # c.Global.import_statements = ['import math']
26 # c.Global.import_statements = ['import math']
27
27
28 # Reuse the controller's FURL files. If False, FURL files are regenerated
28 # Reuse the controller's JSON files. If False, JSON files are regenerated
29 # each time the controller is run. If True, they will be reused, *but*, you
29 # each time the controller is run. If True, they will be reused, *but*, you
30 # also must set the network ports by hand. If set, this will override the
30 # also must set the network ports by hand. If set, this will override the
31 # values set for the client and engine connections below.
31 # values set for the client and engine connections below.
32 # c.Global.reuse_furls = True
32 # c.Global.reuse_files = True
33
33
34 # Enable SSL encryption on all connections to the controller. If set, this
34 # Enable exec_key authentication on all messages. Default is True
35 # will override the values set for the client and engine connections below.
36 # c.Global.secure = True
35 # c.Global.secure = True
37
36
38 # The working directory for the process. The application will use os.chdir
37 # The working directory for the process. The application will use os.chdir
39 # to change to this directory before starting.
38 # to change to this directory before starting.
40 # c.Global.work_dir = os.getcwd()
39 # c.Global.work_dir = os.getcwd()
41
40
41 # The log url for logging to an `iploggerz` application. This will override
42 # log-to-file.
43 # c.Global.log_url = 'tcp://127.0.0.1:20202'
44
45 # The specific external IP that is used to disambiguate multi-interface URLs.
46 # The default behavior is to guess from external IPs gleaned from `socket`.
47 # c.Global.location = '192.168.1.123'
48
49 # The ssh server remote clients should use to connect to this controller.
50 # It must be a machine that can see the interface specified in client_ip.
51 # The default for client_ip is localhost, in which case the sshserver must
52 # be an external IP of the controller machine.
53 # c.Global.sshserver = 'controller.example.com'
54
55 # the url to use for registration. If set, this overrides engine-ip,
56 # engine-transport client-ip,client-transport, and regport.
57 # c.RegistrationFactory.url = 'tcp://*:12345'
58
59 # the port to use for registration. Clients and Engines both use this
60 # port for registration.
61 # c.RegistrationFactory.regport = 10101
62
42 #-----------------------------------------------------------------------------
63 #-----------------------------------------------------------------------------
43 # Configure the client services
64 # Configure the Task Scheduler
44 #-----------------------------------------------------------------------------
65 #-----------------------------------------------------------------------------
45
66
46 # Basic client service config attributes
67 # The routing scheme. 'pure' will use the pure-ZMQ scheduler. Any other
68 # value will use a Python scheduler with various routing schemes.
69 # python schemes are: lru, weighted, random, twobin. Default is 'weighted'.
70 # Note that the pure ZMQ scheduler does not support many features, such as
71 # dying engines, dependencies, or engine-subset load-balancing.
72 # c.ControllerFactory.scheme = 'pure'
47
73
48 # The network interface the controller will listen on for client connections.
74 # The pure ZMQ scheduler can limit the number of outstanding tasks per engine
49 # This should be an IP address or hostname of the controller's host. The empty
75 # by using the ZMQ HWM option. This allows engines with long-running tasks
50 # string means listen on all interfaces.
76 # to not steal too many tasks from other engines. The default is 0, which
51 # c.FCClientServiceFactory.ip = ''
77 # means agressively distribute messages, never waiting for them to finish.
78 # c.ControllerFactory.hwm = 1
52
79
53 # The TCP/IP port the controller will listen on for client connections. If 0
80 # Whether to use Threads or Processes to start the Schedulers. Threads will
54 # a random port will be used. If the controller's host has a firewall running
81 # use less resources, but potentially reduce throughput. Default is to
55 # it must allow incoming traffic on this port.
82 # use processes. Note that the a Python scheduler will always be in a Process.
56 # c.FCClientServiceFactory.port = 0
83 # c.ControllerFactory.usethreads
57
84
58 # The client learns how to connect to the controller by looking at the
85 #-----------------------------------------------------------------------------
59 # location field embedded in the FURL. If this field is empty, all network
86 # Configure the Hub
60 # interfaces that the controller is listening on will be listed. To have the
87 #-----------------------------------------------------------------------------
61 # client connect on a particular interface, list it here.
88
62 # c.FCClientServiceFactory.location = ''
89 # Which class to use for the db backend. Currently supported are DictDB (the
90 # default), and MongoDB. Uncomment this line to enable MongoDB, which will
91 # slow-down the Hub's responsiveness, but also reduce its memory footprint.
92 # c.HubFactory.db_class = 'IPython.parallel.controller.mongodb.MongoDB'
63
93
64 # Use SSL encryption for the client connection.
94 # The heartbeat ping frequency. This is the frequency (in ms) at which the
65 # c.FCClientServiceFactory.secure = True
95 # Hub pings engines for heartbeats. This determines how quickly the Hub
96 # will react to engines coming and going. A lower number means faster response
97 # time, but more network activity. The default is 100ms
98 # c.HubFactory.ping = 100
66
99
67 # Reuse the client FURL each time the controller is started. If set, you must
100 # HubFactory queue port pairs, to set by name: mux, iopub, control, task. Set
68 # also pick a specific network port above (FCClientServiceFactory.port).
101 # each as a tuple of length 2 of ints. The default is to find random
69 # c.FCClientServiceFactory.reuse_furls = False
102 # available ports
103 # c.HubFactory.mux = (10102,10112)
70
104
71 #-----------------------------------------------------------------------------
105 #-----------------------------------------------------------------------------
72 # Configure the engine services
106 # Configure the client connections
73 #-----------------------------------------------------------------------------
107 #-----------------------------------------------------------------------------
74
108
75 # Basic config attributes for the engine services.
109 # Basic client connection config attributes
76
110
77 # The network interface the controller will listen on for engine connections.
111 # The network interface the controller will listen on for client connections.
78 # This should be an IP address or hostname of the controller's host. The empty
112 # This should be an IP address or interface on the controller. An asterisk
79 # string means listen on all interfaces.
113 # means listen on all interfaces. The transport can be any transport
80 # c.FCEngineServiceFactory.ip = ''
114 # supported by zeromq (tcp,epgm,pgm,ib,ipc):
115 # c.HubFactory.client_ip = '*'
116 # c.HubFactory.client_transport = 'tcp'
81
117
82 # The TCP/IP port the controller will listen on for engine connections. If 0
118 # individual client ports to configure by name: query_port, notifier_port
83 # a random port will be used. If the controller's host has a firewall running
119 # c.HubFactory.query_port = 12345
84 # it must allow incoming traffic on this port.
85 # c.FCEngineServiceFactory.port = 0
86
120
87 # The engine learns how to connect to the controller by looking at the
121 #-----------------------------------------------------------------------------
88 # location field embedded in the FURL. If this field is empty, all network
122 # Configure the engine connections
89 # interfaces that the controller is listening on will be listed. To have the
123 #-----------------------------------------------------------------------------
90 # client connect on a particular interface, list it here.
91 # c.FCEngineServiceFactory.location = ''
92
124
93 # Use SSL encryption for the engine connection.
125 # Basic config attributes for the engine connections.
94 # c.FCEngineServiceFactory.secure = True
95
126
96 # Reuse the client FURL each time the controller is started. If set, you must
127 # The network interface the controller will listen on for engine connections.
97 # also pick a specific network port above (FCClientServiceFactory.port).
128 # This should be an IP address or interface on the controller. An asterisk
98 # c.FCEngineServiceFactory.reuse_furls = False
129 # means listen on all interfaces. The transport can be any transport
130 # supported by zeromq (tcp,epgm,pgm,ib,ipc):
131 # c.HubFactory.engine_ip = '*'
132 # c.HubFactory.engine_transport = 'tcp'
133
134 # set the engine heartbeat ports to use:
135 # c.HubFactory.hb = (10303,10313)
99
136
100 #-----------------------------------------------------------------------------
137 #-----------------------------------------------------------------------------
101 # Developer level configuration attributes
138 # Configure the TaskRecord database backend
102 #-----------------------------------------------------------------------------
139 #-----------------------------------------------------------------------------
103
140
104 # You shouldn't have to modify anything in this section. These attributes
141 # For memory/persistance reasons, tasks can be stored out-of-memory in a database.
105 # are more for developers who want to change the behavior of the controller
142 # Currently, only sqlite and mongodb are supported as backends, but the interface
106 # at a fundamental level.
143 # is fairly simple, so advanced developers could write their own backend.
107
144
108 # c.FCClientServiceFactory.cert_file = u'ipcontroller-client.pem'
145 # ----- in-memory configuration --------
109
146 # this line restores the default behavior: in-memory storage of all results.
110 # default_client_interfaces = Config()
147 # c.HubFactory.db_class = 'IPython.parallel.controller.dictdb.DictDB'
111 # default_client_interfaces.Task.interface_chain = [
148
112 # 'IPython.kernel.task.ITaskController',
149 # ----- sqlite configuration --------
113 # 'IPython.kernel.taskfc.IFCTaskController'
150 # use this line to activate sqlite:
114 # ]
151 # c.HubFactory.db_class = 'IPython.parallel.controller.sqlitedb.SQLiteDB'
115 #
152
116 # default_client_interfaces.Task.furl_file = u'ipcontroller-tc.furl'
153 # You can specify the name of the db-file. By default, this will be located
117 #
154 # in the active cluster_dir, e.g. ~/.ipython/clusterz_default/tasks.db
118 # default_client_interfaces.MultiEngine.interface_chain = [
155 # c.SQLiteDB.filename = 'tasks.db'
119 # 'IPython.kernel.multiengine.IMultiEngine',
156
120 # 'IPython.kernel.multienginefc.IFCSynchronousMultiEngine'
157 # You can also specify the location of the db-file, if you want it to be somewhere
121 # ]
158 # other than the cluster_dir.
122 #
159 # c.SQLiteDB.location = '/scratch/'
123 # default_client_interfaces.MultiEngine.furl_file = u'ipcontroller-mec.furl'
160
124 #
161 # This will specify the name of the table for the controller to use. The default
125 # c.FCEngineServiceFactory.interfaces = default_client_interfaces
162 # behavior is to use the session ID of the SessionFactory object (a uuid). Overriding
126
163 # this will result in results persisting for multiple sessions.
127 # c.FCEngineServiceFactory.cert_file = u'ipcontroller-engine.pem'
164 # c.SQLiteDB.table = 'results'
128
165
129 # default_engine_interfaces = Config()
166 # ----- mongodb configuration --------
130 # default_engine_interfaces.Default.interface_chain = [
167 # use this line to activate mongodb:
131 # 'IPython.kernel.enginefc.IFCControllerBase'
168 # c.HubFactory.db_class = 'IPython.parallel.controller.mongodb.MongoDB'
132 # ]
169
133 #
170 # You can specify the args and kwargs pymongo will use when creating the Connection.
134 # default_engine_interfaces.Default.furl_file = u'ipcontroller-engine.furl'
171 # For more information on what these options might be, see pymongo documentation.
135 #
172 # c.MongoDB.connection_kwargs = {}
136 # c.FCEngineServiceFactory.interfaces = default_engine_interfaces
173 # c.MongoDB.connection_args = []
174
175 # This will specify the name of the mongo database for the controller to use. The default
176 # behavior is to use the session ID of the SessionFactory object (a uuid). Overriding
177 # this will result in task results persisting through multiple sessions.
178 # c.MongoDB.database = 'ipythondb'
179
180
@@ -1,90 +1,85 b''
1 c = get_config()
1 c = get_config()
2
2
3 #-----------------------------------------------------------------------------
3 #-----------------------------------------------------------------------------
4 # Global configuration
4 # Global configuration
5 #-----------------------------------------------------------------------------
5 #-----------------------------------------------------------------------------
6
6
7 # Start up messages are logged to stdout using the logging module.
7 # Start up messages are logged to stdout using the logging module.
8 # These all happen before the twisted reactor is started and are
8 # These all happen before the twisted reactor is started and are
9 # useful for debugging purposes. Can be (10=DEBUG,20=INFO,30=WARN,40=CRITICAL)
9 # useful for debugging purposes. Can be (10=DEBUG,20=INFO,30=WARN,40=CRITICAL)
10 # and smaller is more verbose.
10 # and smaller is more verbose.
11 # c.Global.log_level = 20
11 # c.Global.log_level = 20
12
12
13 # Log to a file in cluster_dir/log, otherwise just log to sys.stdout.
13 # Log to a file in cluster_dir/log, otherwise just log to sys.stdout.
14 # c.Global.log_to_file = False
14 # c.Global.log_to_file = False
15
15
16 # Remove old logs from cluster_dir/log before starting.
16 # Remove old logs from cluster_dir/log before starting.
17 # c.Global.clean_logs = True
17 # c.Global.clean_logs = True
18
18
19 # A list of strings that will be executed in the users namespace on the engine
19 # A list of strings that will be executed in the users namespace on the engine
20 # before it connects to the controller.
20 # before it connects to the controller.
21 # c.Global.exec_lines = ['import numpy']
21 # c.Global.exec_lines = ['import numpy']
22
22
23 # The engine will try to connect to the controller multiple times, to allow
23 # The engine will try to connect to the controller multiple times, to allow
24 # the controller time to startup and write its FURL file. These parameters
24 # the controller time to startup and write its FURL file. These parameters
25 # control the number of retries (connect_max_tries) and the initial delay
25 # control the number of retries (connect_max_tries) and the initial delay
26 # (connect_delay) between attemps. The actual delay between attempts gets
26 # (connect_delay) between attemps. The actual delay between attempts gets
27 # longer each time by a factor of 1.5 (delay[i] = 1.5*delay[i-1])
27 # longer each time by a factor of 1.5 (delay[i] = 1.5*delay[i-1])
28 # those attemps.
28 # those attemps.
29 # c.Global.connect_delay = 0.1
29 # c.Global.connect_delay = 0.1
30 # c.Global.connect_max_tries = 15
30 # c.Global.connect_max_tries = 15
31
31
32 # By default, the engine will look for the controller's FURL file in its own
32 # By default, the engine will look for the controller's JSON file in its own
33 # cluster directory. Sometimes, the FURL file will be elsewhere and this
33 # cluster directory. Sometimes, the JSON file will be elsewhere and this
34 # attribute can be set to the full path of the FURL file.
34 # attribute can be set to the full path of the JSON file.
35 # c.Global.furl_file = u''
35 # c.Global.url_file = u'/path/to/my/ipcontroller-engine.json'
36
36
37 # The working directory for the process. The application will use os.chdir
37 # The working directory for the process. The application will use os.chdir
38 # to change to this directory before starting.
38 # to change to this directory before starting.
39 # c.Global.work_dir = os.getcwd()
39 # c.Global.work_dir = os.getcwd()
40
40
41 #-----------------------------------------------------------------------------
41 #-----------------------------------------------------------------------------
42 # MPI configuration
42 # MPI configuration
43 #-----------------------------------------------------------------------------
43 #-----------------------------------------------------------------------------
44
44
45 # Upon starting the engine can be configured to call MPI_Init. This section
45 # Upon starting the engine can be configured to call MPI_Init. This section
46 # configures that.
46 # configures that.
47
47
48 # Select which MPI section to execute to setup MPI. The value of this
48 # Select which MPI section to execute to setup MPI. The value of this
49 # attribute must match the name of another attribute in the MPI config
49 # attribute must match the name of another attribute in the MPI config
50 # section (mpi4py, pytrilinos, etc.). This can also be set by the --mpi
50 # section (mpi4py, pytrilinos, etc.). This can also be set by the --mpi
51 # command line option.
51 # command line option.
52 # c.MPI.use = ''
52 # c.MPI.use = ''
53
53
54 # Initialize MPI using mpi4py. To use this, set c.MPI.use = 'mpi4py' to use
54 # Initialize MPI using mpi4py. To use this, set c.MPI.use = 'mpi4py' to use
55 # --mpi=mpi4py at the command line.
55 # --mpi=mpi4py at the command line.
56 # c.MPI.mpi4py = """from mpi4py import MPI as mpi
56 # c.MPI.mpi4py = """from mpi4py import MPI as mpi
57 # mpi.size = mpi.COMM_WORLD.Get_size()
57 # mpi.size = mpi.COMM_WORLD.Get_size()
58 # mpi.rank = mpi.COMM_WORLD.Get_rank()
58 # mpi.rank = mpi.COMM_WORLD.Get_rank()
59 # """
59 # """
60
60
61 # Initialize MPI using pytrilinos. To use this, set c.MPI.use = 'pytrilinos'
61 # Initialize MPI using pytrilinos. To use this, set c.MPI.use = 'pytrilinos'
62 # to use --mpi=pytrilinos at the command line.
62 # to use --mpi=pytrilinos at the command line.
63 # c.MPI.pytrilinos = """from PyTrilinos import Epetra
63 # c.MPI.pytrilinos = """from PyTrilinos import Epetra
64 # class SimpleStruct:
64 # class SimpleStruct:
65 # pass
65 # pass
66 # mpi = SimpleStruct()
66 # mpi = SimpleStruct()
67 # mpi.rank = 0
67 # mpi.rank = 0
68 # mpi.size = 0
68 # mpi.size = 0
69 # """
69 # """
70
70
71 #-----------------------------------------------------------------------------
71 #-----------------------------------------------------------------------------
72 # Developer level configuration attributes
72 # Developer level configuration attributes
73 #-----------------------------------------------------------------------------
73 #-----------------------------------------------------------------------------
74
74
75 # You shouldn't have to modify anything in this section. These attributes
75 # You shouldn't have to modify anything in this section. These attributes
76 # are more for developers who want to change the behavior of the controller
76 # are more for developers who want to change the behavior of the controller
77 # at a fundamental level.
77 # at a fundamental level.
78
78
79 # You should not have to change these attributes.
79 # You should not have to change these attributes.
80
80
81 # c.Global.shell_class = 'IPython.kernel.core.interpreter.Interpreter'
81 # c.Global.url_file_name = u'ipcontroller-engine.furl'
82
83 # c.Global.furl_file_name = u'ipcontroller-engine.furl'
84
85
86
87
82
88
83
89
84
90
85
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file renamed from docs/examples/kernel/wordfreq.py to docs/examples/newparallel/davinci/wordfreq.py
NO CONTENT: file renamed from docs/examples/kernel/wordfreq.py to docs/examples/newparallel/davinci/wordfreq.py
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file renamed from docs/examples/kernel/fetchparse.py to docs/examples/newparallel/fetchparse.py
NO CONTENT: file renamed from docs/examples/kernel/fetchparse.py to docs/examples/newparallel/fetchparse.py
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file renamed from docs/examples/kernel/mcdriver.py to docs/examples/newparallel/mcdriver.py
NO CONTENT: file renamed from docs/examples/kernel/mcdriver.py to docs/examples/newparallel/mcdriver.py
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file renamed from docs/examples/kernel/mcpricer.py to docs/examples/newparallel/mcpricer.py
NO CONTENT: file renamed from docs/examples/kernel/mcpricer.py to docs/examples/newparallel/mcpricer.py
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file renamed from docs/examples/kernel/nwmerge.py to docs/examples/newparallel/nwmerge.py
NO CONTENT: file renamed from docs/examples/kernel/nwmerge.py to docs/examples/newparallel/nwmerge.py
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file renamed from docs/examples/kernel/parallelpi.py to docs/examples/newparallel/parallelpi.py
NO CONTENT: file renamed from docs/examples/kernel/parallelpi.py to docs/examples/newparallel/parallelpi.py
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file renamed from docs/examples/kernel/phistogram.py to docs/examples/newparallel/phistogram.py
NO CONTENT: file renamed from docs/examples/kernel/phistogram.py to docs/examples/newparallel/phistogram.py
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file renamed from docs/examples/kernel/pidigits.py to docs/examples/newparallel/pidigits.py
NO CONTENT: file renamed from docs/examples/kernel/pidigits.py to docs/examples/newparallel/pidigits.py
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file renamed from docs/examples/kernel/plotting_backend.py to docs/examples/newparallel/plotting/plotting_backend.py
NO CONTENT: file renamed from docs/examples/kernel/plotting_backend.py to docs/examples/newparallel/plotting/plotting_backend.py
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file renamed from docs/examples/kernel/plotting_frontend.py to docs/examples/newparallel/plotting/plotting_frontend.py
NO CONTENT: file renamed from docs/examples/kernel/plotting_frontend.py to docs/examples/newparallel/plotting/plotting_frontend.py
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file renamed from docs/examples/kernel/rmt.ipy to docs/examples/newparallel/rmt/rmt.ipy
NO CONTENT: file renamed from docs/examples/kernel/rmt.ipy to docs/examples/newparallel/rmt/rmt.ipy
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file renamed from docs/examples/kernel/rmtkernel.py to docs/examples/newparallel/rmt/rmtkernel.py
NO CONTENT: file renamed from docs/examples/kernel/rmtkernel.py to docs/examples/newparallel/rmt/rmtkernel.py
1 NO CONTENT: file renamed from docs/examples/kernel/task_profiler.py to docs/examples/newparallel/task_profiler.py
NO CONTENT: file renamed from docs/examples/kernel/task_profiler.py to docs/examples/newparallel/task_profiler.py
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file, binary diff hidden
NO CONTENT: modified file, binary diff hidden
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file, binary diff hidden
NO CONTENT: modified file, binary diff hidden
1 NO CONTENT: modified file, binary diff hidden
NO CONTENT: modified file, binary diff hidden
1 NO CONTENT: modified file, binary diff hidden
NO CONTENT: modified file, binary diff hidden
1 NO CONTENT: modified file, binary diff hidden
NO CONTENT: modified file, binary diff hidden
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed, binary diff hidden
NO CONTENT: file was removed, binary diff hidden
1 NO CONTENT: file was removed
NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed, binary diff hidden
NO CONTENT: file was removed, binary diff hidden
1 NO CONTENT: file was removed, binary diff hidden
NO CONTENT: file was removed, binary diff hidden
General Comments 0
You need to be logged in to leave comments. Login now