##// END OF EJS Templates
organize IPython.parallel into subpackages
MinRK -
Show More

The requested changes are too big and content was truncated. Show full diff

1 NO CONTENT: new file 100644
1 NO CONTENT: new file 100644
1 NO CONTENT: new file 100644
1 NO CONTENT: new file 100644
1 NO CONTENT: new file 100644
@@ -1,241 +1,241 b''
1 1 import os
2 2
3 3 c = get_config()
4 4
5 5 #-----------------------------------------------------------------------------
6 6 # Select which launchers to use
7 7 #-----------------------------------------------------------------------------
8 8
9 9 # This allows you to control what method is used to start the controller
10 10 # and engines. The following methods are currently supported:
11 11 # - Start as a regular process on localhost.
12 12 # - Start using mpiexec.
13 13 # - Start using the Windows HPC Server 2008 scheduler
14 14 # - Start using PBS/SGE
15 15 # - Start using SSH
16 16
17 17
18 18 # The selected launchers can be configured below.
19 19
20 20 # Options are:
21 21 # - LocalControllerLauncher
22 22 # - MPIExecControllerLauncher
23 23 # - PBSControllerLauncher
24 24 # - SGEControllerLauncher
25 25 # - WindowsHPCControllerLauncher
26 # c.Global.controller_launcher = 'IPython.parallel.launcher.LocalControllerLauncher'
27 # c.Global.controller_launcher = 'IPython.parallel.launcher.PBSControllerLauncher'
26 # c.Global.controller_launcher = 'IPython.parallel.apps.launcher.LocalControllerLauncher'
27 # c.Global.controller_launcher = 'IPython.parallel.apps.launcher.PBSControllerLauncher'
28 28
29 29 # Options are:
30 30 # - LocalEngineSetLauncher
31 31 # - MPIExecEngineSetLauncher
32 32 # - PBSEngineSetLauncher
33 33 # - SGEEngineSetLauncher
34 34 # - WindowsHPCEngineSetLauncher
35 # c.Global.engine_launcher = 'IPython.parallel.launcher.LocalEngineSetLauncher'
35 # c.Global.engine_launcher = 'IPython.parallel.apps.launcher.LocalEngineSetLauncher'
36 36
37 37 #-----------------------------------------------------------------------------
38 38 # Global configuration
39 39 #-----------------------------------------------------------------------------
40 40
41 41 # The default number of engines that will be started. This is overridden by
42 42 # the -n command line option: "ipcluster start -n 4"
43 43 # c.Global.n = 2
44 44
45 45 # Log to a file in cluster_dir/log, otherwise just log to sys.stdout.
46 46 # c.Global.log_to_file = False
47 47
48 48 # Remove old logs from cluster_dir/log before starting.
49 49 # c.Global.clean_logs = True
50 50
51 51 # The working directory for the process. The application will use os.chdir
52 52 # to change to this directory before starting.
53 53 # c.Global.work_dir = os.getcwd()
54 54
55 55
56 56 #-----------------------------------------------------------------------------
57 57 # Local process launchers
58 58 #-----------------------------------------------------------------------------
59 59
60 60 # The command line arguments to call the controller with.
61 61 # c.LocalControllerLauncher.controller_args = \
62 62 # ['--log-to-file','--log-level', '40']
63 63
64 64 # The working directory for the controller
65 65 # c.LocalEngineSetLauncher.work_dir = u''
66 66
67 67 # Command line argument passed to the engines.
68 68 # c.LocalEngineSetLauncher.engine_args = ['--log-to-file','--log-level', '40']
69 69
70 70 #-----------------------------------------------------------------------------
71 71 # MPIExec launchers
72 72 #-----------------------------------------------------------------------------
73 73
74 74 # The mpiexec/mpirun command to use in both the controller and engines.
75 75 # c.MPIExecLauncher.mpi_cmd = ['mpiexec']
76 76
77 77 # Additional arguments to pass to the actual mpiexec command.
78 78 # c.MPIExecLauncher.mpi_args = []
79 79
80 80 # The mpiexec/mpirun command and args can be overridden if they should be different
81 81 # for controller and engines.
82 82 # c.MPIExecControllerLauncher.mpi_cmd = ['mpiexec']
83 83 # c.MPIExecControllerLauncher.mpi_args = []
84 84 # c.MPIExecEngineSetLauncher.mpi_cmd = ['mpiexec']
85 85 # c.MPIExecEngineSetLauncher.mpi_args = []
86 86
87 87 # The command line argument to call the controller with.
88 88 # c.MPIExecControllerLauncher.controller_args = \
89 89 # ['--log-to-file','--log-level', '40']
90 90
91 91 # Command line argument passed to the engines.
92 92 # c.MPIExecEngineSetLauncher.engine_args = ['--log-to-file','--log-level', '40']
93 93
94 94 # The default number of engines to start if not given elsewhere.
95 95 # c.MPIExecEngineSetLauncher.n = 1
96 96
97 97 #-----------------------------------------------------------------------------
98 98 # SSH launchers
99 99 #-----------------------------------------------------------------------------
100 100
101 101 # ipclusterz can be used to launch controller and engines remotely via ssh.
102 102 # Note that currently ipclusterz does not do any file distribution, so if
103 103 # machines are not on a shared filesystem, config and json files must be
104 104 # distributed. For this reason, the reuse_files defaults to True on an
105 105 # ssh-launched Controller. This flag can be overridded by the program_args
106 106 # attribute of c.SSHControllerLauncher.
107 107
108 108 # set the ssh cmd for launching remote commands. The default is ['ssh']
109 109 # c.SSHLauncher.ssh_cmd = ['ssh']
110 110
111 111 # set the ssh cmd for launching remote commands. The default is ['ssh']
112 112 # c.SSHLauncher.ssh_args = ['tt']
113 113
114 114 # Set the user and hostname for the controller
115 115 # c.SSHControllerLauncher.hostname = 'controller.example.com'
116 116 # c.SSHControllerLauncher.user = os.environ.get('USER','username')
117 117
118 118 # Set the arguments to be passed to ipcontrollerz
119 119 # note that remotely launched ipcontrollerz will not get the contents of
120 120 # the local ipcontrollerz_config.py unless it resides on the *remote host*
121 121 # in the location specified by the --cluster_dir argument.
122 122 # c.SSHControllerLauncher.program_args = ['-r', '-ip', '0.0.0.0', '--cluster_dir', '/path/to/cd']
123 123
124 124 # Set the default args passed to ipenginez for SSH launched engines
125 125 # c.SSHEngineSetLauncher.engine_args = ['--mpi', 'mpi4py']
126 126
127 127 # SSH engines are launched as a dict of locations/n-engines.
128 128 # if a value is a tuple instead of an int, it is assumed to be of the form
129 129 # (n, [args]), setting the arguments to passed to ipenginez on `host`.
130 130 # otherwise, c.SSHEngineSetLauncher.engine_args will be used as the default.
131 131
132 132 # In this case, there will be 3 engines at my.example.com, and
133 133 # 2 at you@ipython.scipy.org with a special json connector location.
134 134 # c.SSHEngineSetLauncher.engines = {'my.example.com' : 3,
135 135 # 'you@ipython.scipy.org' : (2, ['-f', '/path/to/ipcontroller-engine.json']}
136 136 # }
137 137
138 138 #-----------------------------------------------------------------------------
139 139 # Unix batch (PBS) schedulers launchers
140 140 #-----------------------------------------------------------------------------
141 141
142 142 # SGE and PBS are very similar. All configurables in this section called 'PBS*'
143 143 # also exist as 'SGE*'.
144 144
145 145 # The command line program to use to submit a PBS job.
146 146 # c.PBSLauncher.submit_command = ['qsub']
147 147
148 148 # The command line program to use to delete a PBS job.
149 149 # c.PBSLauncher.delete_command = ['qdel']
150 150
151 151 # The PBS queue in which the job should run
152 152 # c.PBSLauncher.queue = 'myqueue'
153 153
154 154 # A regular expression that takes the output of qsub and find the job id.
155 155 # c.PBSLauncher.job_id_regexp = r'\d+'
156 156
157 157 # If for some reason the Controller and Engines have different options above, they
158 158 # can be set as c.PBSControllerLauncher.<option> etc.
159 159
160 160 # PBS and SGE have default templates, but you can specify your own, either as strings
161 161 # or from files, as described here:
162 162
163 163 # The batch submission script used to start the controller. This is where
164 164 # environment variables would be setup, etc. This string is interpreted using
165 165 # the Itpl module in IPython.external. Basically, you can use ${n} for the
166 166 # number of engine and ${cluster_dir} for the cluster_dir.
167 167 # c.PBSControllerLauncher.batch_template = """
168 168 # #PBS -N ipcontroller
169 169 # #PBS -q $queue
170 170 #
171 171 # ipcontrollerz --cluster-dir $cluster_dir
172 172 # """
173 173
174 174 # You can also load this template from a file
175 175 # c.PBSControllerLauncher.batch_template_file = u"/path/to/my/template.sh"
176 176
177 177 # The name of the instantiated batch script that will actually be used to
178 178 # submit the job. This will be written to the cluster directory.
179 179 # c.PBSControllerLauncher.batch_file_name = u'pbs_controller'
180 180
181 181 # The batch submission script used to start the engines. This is where
182 182 # environment variables would be setup, etc. This string is interpreted using
183 183 # the Itpl module in IPython.external. Basically, you can use ${n} for the
184 184 # number of engine and ${cluster_dir} for the cluster_dir.
185 185 # c.PBSEngineSetLauncher.batch_template = """
186 186 # #PBS -N ipcontroller
187 187 # #PBS -l nprocs=$n
188 188 #
189 189 # ipenginez --cluster-dir $cluster_dir$s
190 190 # """
191 191
192 192 # You can also load this template from a file
193 193 # c.PBSControllerLauncher.batch_template_file = u"/path/to/my/template.sh"
194 194
195 195 # The name of the instantiated batch script that will actually be used to
196 196 # submit the job. This will be written to the cluster directory.
197 197 # c.PBSEngineSetLauncher.batch_file_name = u'pbs_engines'
198 198
199 199
200 200
201 201 #-----------------------------------------------------------------------------
202 202 # Windows HPC Server 2008 launcher configuration
203 203 #-----------------------------------------------------------------------------
204 204
205 205 # c.IPControllerJob.job_name = 'IPController'
206 206 # c.IPControllerJob.is_exclusive = False
207 207 # c.IPControllerJob.username = r'USERDOMAIN\USERNAME'
208 208 # c.IPControllerJob.priority = 'Highest'
209 209 # c.IPControllerJob.requested_nodes = ''
210 210 # c.IPControllerJob.project = 'MyProject'
211 211
212 212 # c.IPControllerTask.task_name = 'IPController'
213 213 # c.IPControllerTask.controller_cmd = [u'ipcontroller.exe']
214 214 # c.IPControllerTask.controller_args = ['--log-to-file', '--log-level', '40']
215 215 # c.IPControllerTask.environment_variables = {}
216 216
217 217 # c.WindowsHPCControllerLauncher.scheduler = 'HEADNODE'
218 218 # c.WindowsHPCControllerLauncher.job_file_name = u'ipcontroller_job.xml'
219 219
220 220
221 221 # c.IPEngineSetJob.job_name = 'IPEngineSet'
222 222 # c.IPEngineSetJob.is_exclusive = False
223 223 # c.IPEngineSetJob.username = r'USERDOMAIN\USERNAME'
224 224 # c.IPEngineSetJob.priority = 'Highest'
225 225 # c.IPEngineSetJob.requested_nodes = ''
226 226 # c.IPEngineSetJob.project = 'MyProject'
227 227
228 228 # c.IPEngineTask.task_name = 'IPEngine'
229 229 # c.IPEngineTask.engine_cmd = [u'ipengine.exe']
230 230 # c.IPEngineTask.engine_args = ['--log-to-file', '--log-level', '40']
231 231 # c.IPEngineTask.environment_variables = {}
232 232
233 233 # c.WindowsHPCEngineSetLauncher.scheduler = 'HEADNODE'
234 234 # c.WindowsHPCEngineSetLauncher.job_file_name = u'ipengineset_job.xml'
235 235
236 236
237 237
238 238
239 239
240 240
241 241
@@ -1,180 +1,180 b''
1 1 from IPython.config.loader import Config
2 2
3 3 c = get_config()
4 4
5 5 #-----------------------------------------------------------------------------
6 6 # Global configuration
7 7 #-----------------------------------------------------------------------------
8 8
9 9 # Basic Global config attributes
10 10
11 11 # Start up messages are logged to stdout using the logging module.
12 12 # These all happen before the twisted reactor is started and are
13 13 # useful for debugging purposes. Can be (10=DEBUG,20=INFO,30=WARN,40=CRITICAL)
14 14 # and smaller is more verbose.
15 15 # c.Global.log_level = 20
16 16
17 17 # Log to a file in cluster_dir/log, otherwise just log to sys.stdout.
18 18 # c.Global.log_to_file = False
19 19
20 20 # Remove old logs from cluster_dir/log before starting.
21 21 # c.Global.clean_logs = True
22 22
23 23 # A list of Python statements that will be run before starting the
24 24 # controller. This is provided because occasionally certain things need to
25 25 # be imported in the controller for pickling to work.
26 26 # c.Global.import_statements = ['import math']
27 27
28 28 # Reuse the controller's JSON files. If False, JSON files are regenerated
29 29 # each time the controller is run. If True, they will be reused, *but*, you
30 30 # also must set the network ports by hand. If set, this will override the
31 31 # values set for the client and engine connections below.
32 32 # c.Global.reuse_files = True
33 33
34 34 # Enable exec_key authentication on all messages. Default is True
35 35 # c.Global.secure = True
36 36
37 37 # The working directory for the process. The application will use os.chdir
38 38 # to change to this directory before starting.
39 39 # c.Global.work_dir = os.getcwd()
40 40
41 41 # The log url for logging to an `iploggerz` application. This will override
42 42 # log-to-file.
43 43 # c.Global.log_url = 'tcp://127.0.0.1:20202'
44 44
45 45 # The specific external IP that is used to disambiguate multi-interface URLs.
46 46 # The default behavior is to guess from external IPs gleaned from `socket`.
47 47 # c.Global.location = '192.168.1.123'
48 48
49 49 # The ssh server remote clients should use to connect to this controller.
50 50 # It must be a machine that can see the interface specified in client_ip.
51 51 # The default for client_ip is localhost, in which case the sshserver must
52 52 # be an external IP of the controller machine.
53 53 # c.Global.sshserver = 'controller.example.com'
54 54
55 55 # the url to use for registration. If set, this overrides engine-ip,
56 56 # engine-transport client-ip,client-transport, and regport.
57 57 # c.RegistrationFactory.url = 'tcp://*:12345'
58 58
59 59 # the port to use for registration. Clients and Engines both use this
60 60 # port for registration.
61 61 # c.RegistrationFactory.regport = 10101
62 62
63 63 #-----------------------------------------------------------------------------
64 64 # Configure the Task Scheduler
65 65 #-----------------------------------------------------------------------------
66 66
67 67 # The routing scheme. 'pure' will use the pure-ZMQ scheduler. Any other
68 68 # value will use a Python scheduler with various routing schemes.
69 69 # python schemes are: lru, weighted, random, twobin. Default is 'weighted'.
70 70 # Note that the pure ZMQ scheduler does not support many features, such as
71 71 # dying engines, dependencies, or engine-subset load-balancing.
72 72 # c.ControllerFactory.scheme = 'pure'
73 73
74 74 # The pure ZMQ scheduler can limit the number of outstanding tasks per engine
75 75 # by using the ZMQ HWM option. This allows engines with long-running tasks
76 76 # to not steal too many tasks from other engines. The default is 0, which
77 77 # means agressively distribute messages, never waiting for them to finish.
78 78 # c.ControllerFactory.hwm = 1
79 79
80 80 # Whether to use Threads or Processes to start the Schedulers. Threads will
81 81 # use less resources, but potentially reduce throughput. Default is to
82 82 # use processes. Note that the a Python scheduler will always be in a Process.
83 83 # c.ControllerFactory.usethreads
84 84
85 85 #-----------------------------------------------------------------------------
86 86 # Configure the Hub
87 87 #-----------------------------------------------------------------------------
88 88
89 89 # Which class to use for the db backend. Currently supported are DictDB (the
90 90 # default), and MongoDB. Uncomment this line to enable MongoDB, which will
91 91 # slow-down the Hub's responsiveness, but also reduce its memory footprint.
92 # c.HubFactory.db_class = 'IPython.parallel.mongodb.MongoDB'
92 # c.HubFactory.db_class = 'IPython.parallel.controller.mongodb.MongoDB'
93 93
94 94 # The heartbeat ping frequency. This is the frequency (in ms) at which the
95 95 # Hub pings engines for heartbeats. This determines how quickly the Hub
96 96 # will react to engines coming and going. A lower number means faster response
97 97 # time, but more network activity. The default is 100ms
98 98 # c.HubFactory.ping = 100
99 99
100 100 # HubFactory queue port pairs, to set by name: mux, iopub, control, task. Set
101 101 # each as a tuple of length 2 of ints. The default is to find random
102 102 # available ports
103 103 # c.HubFactory.mux = (10102,10112)
104 104
105 105 #-----------------------------------------------------------------------------
106 106 # Configure the client connections
107 107 #-----------------------------------------------------------------------------
108 108
109 109 # Basic client connection config attributes
110 110
111 111 # The network interface the controller will listen on for client connections.
112 112 # This should be an IP address or interface on the controller. An asterisk
113 113 # means listen on all interfaces. The transport can be any transport
114 114 # supported by zeromq (tcp,epgm,pgm,ib,ipc):
115 115 # c.HubFactory.client_ip = '*'
116 116 # c.HubFactory.client_transport = 'tcp'
117 117
118 118 # individual client ports to configure by name: query_port, notifier_port
119 119 # c.HubFactory.query_port = 12345
120 120
121 121 #-----------------------------------------------------------------------------
122 122 # Configure the engine connections
123 123 #-----------------------------------------------------------------------------
124 124
125 125 # Basic config attributes for the engine connections.
126 126
127 127 # The network interface the controller will listen on for engine connections.
128 128 # This should be an IP address or interface on the controller. An asterisk
129 129 # means listen on all interfaces. The transport can be any transport
130 130 # supported by zeromq (tcp,epgm,pgm,ib,ipc):
131 131 # c.HubFactory.engine_ip = '*'
132 132 # c.HubFactory.engine_transport = 'tcp'
133 133
134 134 # set the engine heartbeat ports to use:
135 135 # c.HubFactory.hb = (10303,10313)
136 136
137 137 #-----------------------------------------------------------------------------
138 138 # Configure the TaskRecord database backend
139 139 #-----------------------------------------------------------------------------
140 140
141 141 # For memory/persistance reasons, tasks can be stored out-of-memory in a database.
142 142 # Currently, only sqlite and mongodb are supported as backends, but the interface
143 143 # is fairly simple, so advanced developers could write their own backend.
144 144
145 145 # ----- in-memory configuration --------
146 146 # this line restores the default behavior: in-memory storage of all results.
147 # c.HubFactory.db_class = 'IPython.parallel.dictdb.DictDB'
147 # c.HubFactory.db_class = 'IPython.parallel.controller.dictdb.DictDB'
148 148
149 149 # ----- sqlite configuration --------
150 150 # use this line to activate sqlite:
151 # c.HubFactory.db_class = 'IPython.parallel.sqlitedb.SQLiteDB'
151 # c.HubFactory.db_class = 'IPython.parallel.controller.sqlitedb.SQLiteDB'
152 152
153 153 # You can specify the name of the db-file. By default, this will be located
154 154 # in the active cluster_dir, e.g. ~/.ipython/clusterz_default/tasks.db
155 155 # c.SQLiteDB.filename = 'tasks.db'
156 156
157 157 # You can also specify the location of the db-file, if you want it to be somewhere
158 158 # other than the cluster_dir.
159 159 # c.SQLiteDB.location = '/scratch/'
160 160
161 161 # This will specify the name of the table for the controller to use. The default
162 162 # behavior is to use the session ID of the SessionFactory object (a uuid). Overriding
163 163 # this will result in results persisting for multiple sessions.
164 164 # c.SQLiteDB.table = 'results'
165 165
166 166 # ----- mongodb configuration --------
167 167 # use this line to activate mongodb:
168 # c.HubFactory.db_class = 'IPython.parallel.mongodb.MongoDB'
168 # c.HubFactory.db_class = 'IPython.parallel.controller.mongodb.MongoDB'
169 169
170 170 # You can specify the args and kwargs pymongo will use when creating the Connection.
171 171 # For more information on what these options might be, see pymongo documentation.
172 172 # c.MongoDB.connection_kwargs = {}
173 173 # c.MongoDB.connection_args = []
174 174
175 175 # This will specify the name of the mongo database for the controller to use. The default
176 176 # behavior is to use the session ID of the SessionFactory object (a uuid). Overriding
177 177 # this will result in task results persisting through multiple sessions.
178 178 # c.MongoDB.database = 'ipythondb'
179 179
180 180
@@ -1,295 +1,295 b''
1 1 """Basic ssh tunneling utilities."""
2 2
3 3 #-----------------------------------------------------------------------------
4 4 # Copyright (C) 2008-2010 The IPython Development Team
5 5 #
6 6 # Distributed under the terms of the BSD License. The full license is in
7 7 # the file COPYING, distributed as part of this software.
8 8 #-----------------------------------------------------------------------------
9 9
10 10
11 11
12 12 #-----------------------------------------------------------------------------
13 13 # Imports
14 14 #-----------------------------------------------------------------------------
15 15
16 16 from __future__ import print_function
17 17
18 18 import os,sys, atexit
19 19 from multiprocessing import Process
20 20 from getpass import getpass, getuser
21 21 import warnings
22 22
23 23 try:
24 24 with warnings.catch_warnings():
25 25 warnings.simplefilter('ignore', DeprecationWarning)
26 26 import paramiko
27 27 except ImportError:
28 28 paramiko = None
29 29 else:
30 30 from forward import forward_tunnel
31 31
32 32 try:
33 33 from IPython.external import pexpect
34 34 except ImportError:
35 35 pexpect = None
36 36
37 from IPython.parallel.entry_point import select_random_ports
37 from IPython.parallel.util import select_random_ports
38 38
39 39 #-----------------------------------------------------------------------------
40 40 # Code
41 41 #-----------------------------------------------------------------------------
42 42
43 43 #-----------------------------------------------------------------------------
44 44 # Check for passwordless login
45 45 #-----------------------------------------------------------------------------
46 46
47 47 def try_passwordless_ssh(server, keyfile, paramiko=None):
48 48 """Attempt to make an ssh connection without a password.
49 49 This is mainly used for requiring password input only once
50 50 when many tunnels may be connected to the same server.
51 51
52 52 If paramiko is None, the default for the platform is chosen.
53 53 """
54 54 if paramiko is None:
55 55 paramiko = sys.platform == 'win32'
56 56 if not paramiko:
57 57 f = _try_passwordless_openssh
58 58 else:
59 59 f = _try_passwordless_paramiko
60 60 return f(server, keyfile)
61 61
62 62 def _try_passwordless_openssh(server, keyfile):
63 63 """Try passwordless login with shell ssh command."""
64 64 if pexpect is None:
65 65 raise ImportError("pexpect unavailable, use paramiko")
66 66 cmd = 'ssh -f '+ server
67 67 if keyfile:
68 68 cmd += ' -i ' + keyfile
69 69 cmd += ' exit'
70 70 p = pexpect.spawn(cmd)
71 71 while True:
72 72 try:
73 73 p.expect('[Ppassword]:', timeout=.1)
74 74 except pexpect.TIMEOUT:
75 75 continue
76 76 except pexpect.EOF:
77 77 return True
78 78 else:
79 79 return False
80 80
81 81 def _try_passwordless_paramiko(server, keyfile):
82 82 """Try passwordless login with paramiko."""
83 83 if paramiko is None:
84 84 raise ImportError("paramiko unavailable, use openssh")
85 85 username, server, port = _split_server(server)
86 86 client = paramiko.SSHClient()
87 87 client.load_system_host_keys()
88 88 client.set_missing_host_key_policy(paramiko.WarningPolicy())
89 89 try:
90 90 client.connect(server, port, username=username, key_filename=keyfile,
91 91 look_for_keys=True)
92 92 except paramiko.AuthenticationException:
93 93 return False
94 94 else:
95 95 client.close()
96 96 return True
97 97
98 98
99 99 def tunnel_connection(socket, addr, server, keyfile=None, password=None, paramiko=None):
100 100 """Connect a socket to an address via an ssh tunnel.
101 101
102 102 This is a wrapper for socket.connect(addr), when addr is not accessible
103 103 from the local machine. It simply creates an ssh tunnel using the remaining args,
104 104 and calls socket.connect('tcp://localhost:lport') where lport is the randomly
105 105 selected local port of the tunnel.
106 106
107 107 """
108 108 lport = select_random_ports(1)[0]
109 109 transport, addr = addr.split('://')
110 110 ip,rport = addr.split(':')
111 111 rport = int(rport)
112 112 if paramiko is None:
113 113 paramiko = sys.platform == 'win32'
114 114 if paramiko:
115 115 tunnelf = paramiko_tunnel
116 116 else:
117 117 tunnelf = openssh_tunnel
118 118 tunnel = tunnelf(lport, rport, server, remoteip=ip, keyfile=keyfile, password=password)
119 119 socket.connect('tcp://127.0.0.1:%i'%lport)
120 120 return tunnel
121 121
122 122 def openssh_tunnel(lport, rport, server, remoteip='127.0.0.1', keyfile=None, password=None, timeout=15):
123 123 """Create an ssh tunnel using command-line ssh that connects port lport
124 124 on this machine to localhost:rport on server. The tunnel
125 125 will automatically close when not in use, remaining open
126 126 for a minimum of timeout seconds for an initial connection.
127 127
128 128 This creates a tunnel redirecting `localhost:lport` to `remoteip:rport`,
129 129 as seen from `server`.
130 130
131 131 keyfile and password may be specified, but ssh config is checked for defaults.
132 132
133 133 Parameters
134 134 ----------
135 135
136 136 lport : int
137 137 local port for connecting to the tunnel from this machine.
138 138 rport : int
139 139 port on the remote machine to connect to.
140 140 server : str
141 141 The ssh server to connect to. The full ssh server string will be parsed.
142 142 user@server:port
143 143 remoteip : str [Default: 127.0.0.1]
144 144 The remote ip, specifying the destination of the tunnel.
145 145 Default is localhost, which means that the tunnel would redirect
146 146 localhost:lport on this machine to localhost:rport on the *server*.
147 147
148 148 keyfile : str; path to public key file
149 149 This specifies a key to be used in ssh login, default None.
150 150 Regular default ssh keys will be used without specifying this argument.
151 151 password : str;
152 152 Your ssh password to the ssh server. Note that if this is left None,
153 153 you will be prompted for it if passwordless key based login is unavailable.
154 154
155 155 """
156 156 if pexpect is None:
157 157 raise ImportError("pexpect unavailable, use paramiko_tunnel")
158 158 ssh="ssh "
159 159 if keyfile:
160 160 ssh += "-i " + keyfile
161 161 cmd = ssh + " -f -L 127.0.0.1:%i:%s:%i %s sleep %i"%(lport, remoteip, rport, server, timeout)
162 162 tunnel = pexpect.spawn(cmd)
163 163 failed = False
164 164 while True:
165 165 try:
166 166 tunnel.expect('[Pp]assword:', timeout=.1)
167 167 except pexpect.TIMEOUT:
168 168 continue
169 169 except pexpect.EOF:
170 170 if tunnel.exitstatus:
171 171 print (tunnel.exitstatus)
172 172 print (tunnel.before)
173 173 print (tunnel.after)
174 174 raise RuntimeError("tunnel '%s' failed to start"%(cmd))
175 175 else:
176 176 return tunnel.pid
177 177 else:
178 178 if failed:
179 179 print("Password rejected, try again")
180 180 password=None
181 181 if password is None:
182 182 password = getpass("%s's password: "%(server))
183 183 tunnel.sendline(password)
184 184 failed = True
185 185
186 186 def _split_server(server):
187 187 if '@' in server:
188 188 username,server = server.split('@', 1)
189 189 else:
190 190 username = getuser()
191 191 if ':' in server:
192 192 server, port = server.split(':')
193 193 port = int(port)
194 194 else:
195 195 port = 22
196 196 return username, server, port
197 197
198 198 def paramiko_tunnel(lport, rport, server, remoteip='127.0.0.1', keyfile=None, password=None, timeout=15):
199 199 """launch a tunner with paramiko in a subprocess. This should only be used
200 200 when shell ssh is unavailable (e.g. Windows).
201 201
202 202 This creates a tunnel redirecting `localhost:lport` to `remoteip:rport`,
203 203 as seen from `server`.
204 204
205 205 If you are familiar with ssh tunnels, this creates the tunnel:
206 206
207 207 ssh server -L localhost:lport:remoteip:rport
208 208
209 209 keyfile and password may be specified, but ssh config is checked for defaults.
210 210
211 211
212 212 Parameters
213 213 ----------
214 214
215 215 lport : int
216 216 local port for connecting to the tunnel from this machine.
217 217 rport : int
218 218 port on the remote machine to connect to.
219 219 server : str
220 220 The ssh server to connect to. The full ssh server string will be parsed.
221 221 user@server:port
222 222 remoteip : str [Default: 127.0.0.1]
223 223 The remote ip, specifying the destination of the tunnel.
224 224 Default is localhost, which means that the tunnel would redirect
225 225 localhost:lport on this machine to localhost:rport on the *server*.
226 226
227 227 keyfile : str; path to public key file
228 228 This specifies a key to be used in ssh login, default None.
229 229 Regular default ssh keys will be used without specifying this argument.
230 230 password : str;
231 231 Your ssh password to the ssh server. Note that if this is left None,
232 232 you will be prompted for it if passwordless key based login is unavailable.
233 233
234 234 """
235 235 if paramiko is None:
236 236 raise ImportError("Paramiko not available")
237 237
238 238 if password is None:
239 239 if not _check_passwordless_paramiko(server, keyfile):
240 240 password = getpass("%s's password: "%(server))
241 241
242 242 p = Process(target=_paramiko_tunnel,
243 243 args=(lport, rport, server, remoteip),
244 244 kwargs=dict(keyfile=keyfile, password=password))
245 245 p.daemon=False
246 246 p.start()
247 247 atexit.register(_shutdown_process, p)
248 248 return p
249 249
250 250 def _shutdown_process(p):
251 251 if p.isalive():
252 252 p.terminate()
253 253
254 254 def _paramiko_tunnel(lport, rport, server, remoteip, keyfile=None, password=None):
255 255 """Function for actually starting a paramiko tunnel, to be passed
256 256 to multiprocessing.Process(target=this), and not called directly.
257 257 """
258 258 username, server, port = _split_server(server)
259 259 client = paramiko.SSHClient()
260 260 client.load_system_host_keys()
261 261 client.set_missing_host_key_policy(paramiko.WarningPolicy())
262 262
263 263 try:
264 264 client.connect(server, port, username=username, key_filename=keyfile,
265 265 look_for_keys=True, password=password)
266 266 # except paramiko.AuthenticationException:
267 267 # if password is None:
268 268 # password = getpass("%s@%s's password: "%(username, server))
269 269 # client.connect(server, port, username=username, password=password)
270 270 # else:
271 271 # raise
272 272 except Exception as e:
273 273 print ('*** Failed to connect to %s:%d: %r' % (server, port, e))
274 274 sys.exit(1)
275 275
276 276 # print ('Now forwarding port %d to %s:%d ...' % (lport, server, rport))
277 277
278 278 try:
279 279 forward_tunnel(lport, remoteip, rport, client.get_transport())
280 280 except KeyboardInterrupt:
281 281 print ('SIGINT: Port forwarding stopped cleanly')
282 282 sys.exit(0)
283 283 except Exception as e:
284 284 print ("Port forwarding stopped uncleanly: %s"%e)
285 285 sys.exit(255)
286 286
287 287 if sys.platform == 'win32':
288 288 ssh_tunnel = paramiko_tunnel
289 289 else:
290 290 ssh_tunnel = openssh_tunnel
291 291
292 292
293 293 __all__ = ['tunnel_connection', 'ssh_tunnel', 'openssh_tunnel', 'paramiko_tunnel', 'try_passwordless_ssh']
294 294
295 295
@@ -1,25 +1,26 b''
1 1 """The IPython ZMQ-based parallel computing interface."""
2 2 #-----------------------------------------------------------------------------
3 3 # Copyright (C) 2011 The IPython Development Team
4 4 #
5 5 # Distributed under the terms of the BSD License. The full license is in
6 6 # the file COPYING, distributed as part of this software.
7 7 #-----------------------------------------------------------------------------
8 8
9 9 #-----------------------------------------------------------------------------
10 10 # Imports
11 11 #-----------------------------------------------------------------------------
12 12
13 13 import zmq
14 14
15 if zmq.__version__ < '2.1.3':
16 raise ImportError("IPython.parallel requires pyzmq/0MQ >= 2.1.3, you appear to have %s"%zmq.__version__)
15 if zmq.__version__ < '2.1.4':
16 raise ImportError("IPython.parallel requires pyzmq/0MQ >= 2.1.4, you appear to have %s"%zmq.__version__)
17 17
18 from .asyncresult import *
19 from .client import Client
20 from .dependency import *
21 from .remotefunction import *
22 from .view import *
23 18 from IPython.utils.pickleutil import Reference
24 19
20 from .client.asyncresult import *
21 from .client.client import Client
22 from .client.remotefunction import *
23 from .client.view import *
24 from .controller.dependency import *
25
25 26
1 NO CONTENT: file renamed from IPython/parallel/clusterdir.py to IPython/parallel/apps/clusterdir.py
@@ -1,592 +1,592 b''
1 1 #!/usr/bin/env python
2 2 # encoding: utf-8
3 3 """
4 4 The ipcluster application.
5 5 """
6 6
7 7 #-----------------------------------------------------------------------------
8 8 # Copyright (C) 2008-2009 The IPython Development Team
9 9 #
10 10 # Distributed under the terms of the BSD License. The full license is in
11 11 # the file COPYING, distributed as part of this software.
12 12 #-----------------------------------------------------------------------------
13 13
14 14 #-----------------------------------------------------------------------------
15 15 # Imports
16 16 #-----------------------------------------------------------------------------
17 17
18 18 import errno
19 19 import logging
20 20 import os
21 21 import re
22 22 import signal
23 23
24 24 import zmq
25 25 from zmq.eventloop import ioloop
26 26
27 27 from IPython.external.argparse import ArgumentParser, SUPPRESS
28 28 from IPython.utils.importstring import import_item
29 from IPython.parallel.clusterdir import (
29 from .clusterdir import (
30 30 ApplicationWithClusterDir, ClusterDirConfigLoader,
31 31 ClusterDirError, PIDFileError
32 32 )
33 33
34 34
35 35 #-----------------------------------------------------------------------------
36 36 # Module level variables
37 37 #-----------------------------------------------------------------------------
38 38
39 39
40 40 default_config_file_name = u'ipcluster_config.py'
41 41
42 42
43 43 _description = """\
44 44 Start an IPython cluster for parallel computing.\n\n
45 45
46 46 An IPython cluster consists of 1 controller and 1 or more engines.
47 47 This command automates the startup of these processes using a wide
48 48 range of startup methods (SSH, local processes, PBS, mpiexec,
49 49 Windows HPC Server 2008). To start a cluster with 4 engines on your
50 50 local host simply do 'ipcluster start -n 4'. For more complex usage
51 51 you will typically do 'ipcluster create -p mycluster', then edit
52 52 configuration files, followed by 'ipcluster start -p mycluster -n 4'.
53 53 """
54 54
55 55
56 56 # Exit codes for ipcluster
57 57
58 58 # This will be the exit code if the ipcluster appears to be running because
59 59 # a .pid file exists
60 60 ALREADY_STARTED = 10
61 61
62 62
63 63 # This will be the exit code if ipcluster stop is run, but there is not .pid
64 64 # file to be found.
65 65 ALREADY_STOPPED = 11
66 66
67 67 # This will be the exit code if ipcluster engines is run, but there is not .pid
68 68 # file to be found.
69 69 NO_CLUSTER = 12
70 70
71 71
72 72 #-----------------------------------------------------------------------------
73 73 # Command line options
74 74 #-----------------------------------------------------------------------------
75 75
76 76
77 77 class IPClusterAppConfigLoader(ClusterDirConfigLoader):
78 78
79 79 def _add_arguments(self):
80 80 # Don't call ClusterDirConfigLoader._add_arguments as we don't want
81 81 # its defaults on self.parser. Instead, we will put those on
82 82 # default options on our subparsers.
83 83
84 84 # This has all the common options that all subcommands use
85 85 parent_parser1 = ArgumentParser(
86 86 add_help=False,
87 87 argument_default=SUPPRESS
88 88 )
89 89 self._add_ipython_dir(parent_parser1)
90 90 self._add_log_level(parent_parser1)
91 91
92 92 # This has all the common options that other subcommands use
93 93 parent_parser2 = ArgumentParser(
94 94 add_help=False,
95 95 argument_default=SUPPRESS
96 96 )
97 97 self._add_cluster_profile(parent_parser2)
98 98 self._add_cluster_dir(parent_parser2)
99 99 self._add_work_dir(parent_parser2)
100 100 paa = parent_parser2.add_argument
101 101 paa('--log-to-file',
102 102 action='store_true', dest='Global.log_to_file',
103 103 help='Log to a file in the log directory (default is stdout)')
104 104
105 105 # Create the object used to create the subparsers.
106 106 subparsers = self.parser.add_subparsers(
107 107 dest='Global.subcommand',
108 108 title='ipcluster subcommands',
109 109 description=
110 110 """ipcluster has a variety of subcommands. The general way of
111 111 running ipcluster is 'ipcluster <cmd> [options]'. To get help
112 112 on a particular subcommand do 'ipcluster <cmd> -h'."""
113 113 # help="For more help, type 'ipcluster <cmd> -h'",
114 114 )
115 115
116 116 # The "list" subcommand parser
117 117 parser_list = subparsers.add_parser(
118 118 'list',
119 119 parents=[parent_parser1],
120 120 argument_default=SUPPRESS,
121 121 help="List all clusters in cwd and ipython_dir.",
122 122 description=
123 123 """List all available clusters, by cluster directory, that can
124 124 be found in the current working directly or in the ipython
125 125 directory. Cluster directories are named using the convention
126 126 'cluster_<profile>'."""
127 127 )
128 128
129 129 # The "create" subcommand parser
130 130 parser_create = subparsers.add_parser(
131 131 'create',
132 132 parents=[parent_parser1, parent_parser2],
133 133 argument_default=SUPPRESS,
134 134 help="Create a new cluster directory.",
135 135 description=
136 136 """Create an ipython cluster directory by its profile name or
137 137 cluster directory path. Cluster directories contain
138 138 configuration, log and security related files and are named
139 139 using the convention 'cluster_<profile>'. By default they are
140 140 located in your ipython directory. Once created, you will
141 141 probably need to edit the configuration files in the cluster
142 142 directory to configure your cluster. Most users will create a
143 143 cluster directory by profile name,
144 144 'ipcluster create -p mycluster', which will put the directory
145 145 in '<ipython_dir>/cluster_mycluster'.
146 146 """
147 147 )
148 148 paa = parser_create.add_argument
149 149 paa('--reset-config',
150 150 dest='Global.reset_config', action='store_true',
151 151 help=
152 152 """Recopy the default config files to the cluster directory.
153 153 You will loose any modifications you have made to these files.""")
154 154
155 155 # The "start" subcommand parser
156 156 parser_start = subparsers.add_parser(
157 157 'start',
158 158 parents=[parent_parser1, parent_parser2],
159 159 argument_default=SUPPRESS,
160 160 help="Start a cluster.",
161 161 description=
162 162 """Start an ipython cluster by its profile name or cluster
163 163 directory. Cluster directories contain configuration, log and
164 164 security related files and are named using the convention
165 165 'cluster_<profile>' and should be creating using the 'start'
166 166 subcommand of 'ipcluster'. If your cluster directory is in
167 167 the cwd or the ipython directory, you can simply refer to it
168 168 using its profile name, 'ipcluster start -n 4 -p <profile>`,
169 169 otherwise use the '--cluster-dir' option.
170 170 """
171 171 )
172 172
173 173 paa = parser_start.add_argument
174 174 paa('-n', '--number',
175 175 type=int, dest='Global.n',
176 176 help='The number of engines to start.',
177 177 metavar='Global.n')
178 178 paa('--clean-logs',
179 179 dest='Global.clean_logs', action='store_true',
180 180 help='Delete old log flies before starting.')
181 181 paa('--no-clean-logs',
182 182 dest='Global.clean_logs', action='store_false',
183 183 help="Don't delete old log flies before starting.")
184 184 paa('--daemon',
185 185 dest='Global.daemonize', action='store_true',
186 186 help='Daemonize the ipcluster program. This implies --log-to-file')
187 187 paa('--no-daemon',
188 188 dest='Global.daemonize', action='store_false',
189 189 help="Dont't daemonize the ipcluster program.")
190 190 paa('--delay',
191 191 type=float, dest='Global.delay',
192 192 help="Specify the delay (in seconds) between starting the controller and starting the engine(s).")
193 193
194 194 # The "stop" subcommand parser
195 195 parser_stop = subparsers.add_parser(
196 196 'stop',
197 197 parents=[parent_parser1, parent_parser2],
198 198 argument_default=SUPPRESS,
199 199 help="Stop a running cluster.",
200 200 description=
201 201 """Stop a running ipython cluster by its profile name or cluster
202 202 directory. Cluster directories are named using the convention
203 203 'cluster_<profile>'. If your cluster directory is in
204 204 the cwd or the ipython directory, you can simply refer to it
205 205 using its profile name, 'ipcluster stop -p <profile>`, otherwise
206 206 use the '--cluster-dir' option.
207 207 """
208 208 )
209 209 paa = parser_stop.add_argument
210 210 paa('--signal',
211 211 dest='Global.signal', type=int,
212 212 help="The signal number to use in stopping the cluster (default=2).",
213 213 metavar="Global.signal")
214 214
215 215 # the "engines" subcommand parser
216 216 parser_engines = subparsers.add_parser(
217 217 'engines',
218 218 parents=[parent_parser1, parent_parser2],
219 219 argument_default=SUPPRESS,
220 220 help="Attach some engines to an existing controller or cluster.",
221 221 description=
222 222 """Start one or more engines to connect to an existing Cluster
223 223 by profile name or cluster directory.
224 224 Cluster directories contain configuration, log and
225 225 security related files and are named using the convention
226 226 'cluster_<profile>' and should be creating using the 'start'
227 227 subcommand of 'ipcluster'. If your cluster directory is in
228 228 the cwd or the ipython directory, you can simply refer to it
229 229 using its profile name, 'ipcluster engines -n 4 -p <profile>`,
230 230 otherwise use the '--cluster-dir' option.
231 231 """
232 232 )
233 233 paa = parser_engines.add_argument
234 234 paa('-n', '--number',
235 235 type=int, dest='Global.n',
236 236 help='The number of engines to start.',
237 237 metavar='Global.n')
238 238 paa('--daemon',
239 239 dest='Global.daemonize', action='store_true',
240 240 help='Daemonize the ipcluster program. This implies --log-to-file')
241 241 paa('--no-daemon',
242 242 dest='Global.daemonize', action='store_false',
243 243 help="Dont't daemonize the ipcluster program.")
244 244
245 245 #-----------------------------------------------------------------------------
246 246 # Main application
247 247 #-----------------------------------------------------------------------------
248 248
249 249
250 250 class IPClusterApp(ApplicationWithClusterDir):
251 251
252 252 name = u'ipcluster'
253 253 description = _description
254 254 usage = None
255 255 command_line_loader = IPClusterAppConfigLoader
256 256 default_config_file_name = default_config_file_name
257 257 default_log_level = logging.INFO
258 258 auto_create_cluster_dir = False
259 259
260 260 def create_default_config(self):
261 261 super(IPClusterApp, self).create_default_config()
262 262 self.default_config.Global.controller_launcher = \
263 263 'IPython.parallel.launcher.LocalControllerLauncher'
264 264 self.default_config.Global.engine_launcher = \
265 265 'IPython.parallel.launcher.LocalEngineSetLauncher'
266 266 self.default_config.Global.n = 2
267 267 self.default_config.Global.delay = 2
268 268 self.default_config.Global.reset_config = False
269 269 self.default_config.Global.clean_logs = True
270 270 self.default_config.Global.signal = signal.SIGINT
271 271 self.default_config.Global.daemonize = False
272 272
273 273 def find_resources(self):
274 274 subcommand = self.command_line_config.Global.subcommand
275 275 if subcommand=='list':
276 276 self.list_cluster_dirs()
277 277 # Exit immediately because there is nothing left to do.
278 278 self.exit()
279 279 elif subcommand=='create':
280 280 self.auto_create_cluster_dir = True
281 281 super(IPClusterApp, self).find_resources()
282 282 elif subcommand=='start' or subcommand=='stop':
283 283 self.auto_create_cluster_dir = True
284 284 try:
285 285 super(IPClusterApp, self).find_resources()
286 286 except ClusterDirError:
287 287 raise ClusterDirError(
288 288 "Could not find a cluster directory. A cluster dir must "
289 289 "be created before running 'ipcluster start'. Do "
290 290 "'ipcluster create -h' or 'ipcluster list -h' for more "
291 291 "information about creating and listing cluster dirs."
292 292 )
293 293 elif subcommand=='engines':
294 294 self.auto_create_cluster_dir = False
295 295 try:
296 296 super(IPClusterApp, self).find_resources()
297 297 except ClusterDirError:
298 298 raise ClusterDirError(
299 299 "Could not find a cluster directory. A cluster dir must "
300 300 "be created before running 'ipcluster start'. Do "
301 301 "'ipcluster create -h' or 'ipcluster list -h' for more "
302 302 "information about creating and listing cluster dirs."
303 303 )
304 304
305 305 def list_cluster_dirs(self):
306 306 # Find the search paths
307 307 cluster_dir_paths = os.environ.get('IPCLUSTER_DIR_PATH','')
308 308 if cluster_dir_paths:
309 309 cluster_dir_paths = cluster_dir_paths.split(':')
310 310 else:
311 311 cluster_dir_paths = []
312 312 try:
313 313 ipython_dir = self.command_line_config.Global.ipython_dir
314 314 except AttributeError:
315 315 ipython_dir = self.default_config.Global.ipython_dir
316 316 paths = [os.getcwd(), ipython_dir] + \
317 317 cluster_dir_paths
318 318 paths = list(set(paths))
319 319
320 320 self.log.info('Searching for cluster dirs in paths: %r' % paths)
321 321 for path in paths:
322 322 files = os.listdir(path)
323 323 for f in files:
324 324 full_path = os.path.join(path, f)
325 325 if os.path.isdir(full_path) and f.startswith('cluster_'):
326 326 profile = full_path.split('_')[-1]
327 327 start_cmd = 'ipcluster start -p %s -n 4' % profile
328 328 print start_cmd + " ==> " + full_path
329 329
330 330 def pre_construct(self):
331 331 # IPClusterApp.pre_construct() is where we cd to the working directory.
332 332 super(IPClusterApp, self).pre_construct()
333 333 config = self.master_config
334 334 try:
335 335 daemon = config.Global.daemonize
336 336 if daemon:
337 337 config.Global.log_to_file = True
338 338 except AttributeError:
339 339 pass
340 340
341 341 def construct(self):
342 342 config = self.master_config
343 343 subcmd = config.Global.subcommand
344 344 reset = config.Global.reset_config
345 345 if subcmd == 'list':
346 346 return
347 347 if subcmd == 'create':
348 348 self.log.info('Copying default config files to cluster directory '
349 349 '[overwrite=%r]' % (reset,))
350 350 self.cluster_dir_obj.copy_all_config_files(overwrite=reset)
351 351 if subcmd =='start':
352 352 self.cluster_dir_obj.copy_all_config_files(overwrite=False)
353 353 self.start_logging()
354 354 self.loop = ioloop.IOLoop.instance()
355 355 # reactor.callWhenRunning(self.start_launchers)
356 356 dc = ioloop.DelayedCallback(self.start_launchers, 0, self.loop)
357 357 dc.start()
358 358 if subcmd == 'engines':
359 359 self.start_logging()
360 360 self.loop = ioloop.IOLoop.instance()
361 361 # reactor.callWhenRunning(self.start_launchers)
362 362 engine_only = lambda : self.start_launchers(controller=False)
363 363 dc = ioloop.DelayedCallback(engine_only, 0, self.loop)
364 364 dc.start()
365 365
366 366 def start_launchers(self, controller=True):
367 367 config = self.master_config
368 368
369 369 # Create the launchers. In both bases, we set the work_dir of
370 370 # the launcher to the cluster_dir. This is where the launcher's
371 371 # subprocesses will be launched. It is not where the controller
372 372 # and engine will be launched.
373 373 if controller:
374 374 cl_class = import_item(config.Global.controller_launcher)
375 375 self.controller_launcher = cl_class(
376 376 work_dir=self.cluster_dir, config=config,
377 377 logname=self.log.name
378 378 )
379 379 # Setup the observing of stopping. If the controller dies, shut
380 380 # everything down as that will be completely fatal for the engines.
381 381 self.controller_launcher.on_stop(self.stop_launchers)
382 382 # But, we don't monitor the stopping of engines. An engine dying
383 383 # is just fine and in principle a user could start a new engine.
384 384 # Also, if we did monitor engine stopping, it is difficult to
385 385 # know what to do when only some engines die. Currently, the
386 386 # observing of engine stopping is inconsistent. Some launchers
387 387 # might trigger on a single engine stopping, other wait until
388 388 # all stop. TODO: think more about how to handle this.
389 389 else:
390 390 self.controller_launcher = None
391 391
392 392 el_class = import_item(config.Global.engine_launcher)
393 393 self.engine_launcher = el_class(
394 394 work_dir=self.cluster_dir, config=config, logname=self.log.name
395 395 )
396 396
397 397 # Setup signals
398 398 signal.signal(signal.SIGINT, self.sigint_handler)
399 399
400 400 # Start the controller and engines
401 401 self._stopping = False # Make sure stop_launchers is not called 2x.
402 402 if controller:
403 403 self.start_controller()
404 404 dc = ioloop.DelayedCallback(self.start_engines, 1000*config.Global.delay*controller, self.loop)
405 405 dc.start()
406 406 self.startup_message()
407 407
408 408 def startup_message(self, r=None):
409 409 self.log.info("IPython cluster: started")
410 410 return r
411 411
412 412 def start_controller(self, r=None):
413 413 # self.log.info("In start_controller")
414 414 config = self.master_config
415 415 d = self.controller_launcher.start(
416 416 cluster_dir=config.Global.cluster_dir
417 417 )
418 418 return d
419 419
420 420 def start_engines(self, r=None):
421 421 # self.log.info("In start_engines")
422 422 config = self.master_config
423 423
424 424 d = self.engine_launcher.start(
425 425 config.Global.n,
426 426 cluster_dir=config.Global.cluster_dir
427 427 )
428 428 return d
429 429
430 430 def stop_controller(self, r=None):
431 431 # self.log.info("In stop_controller")
432 432 if self.controller_launcher and self.controller_launcher.running:
433 433 return self.controller_launcher.stop()
434 434
435 435 def stop_engines(self, r=None):
436 436 # self.log.info("In stop_engines")
437 437 if self.engine_launcher.running:
438 438 d = self.engine_launcher.stop()
439 439 # d.addErrback(self.log_err)
440 440 return d
441 441 else:
442 442 return None
443 443
444 444 def log_err(self, f):
445 445 self.log.error(f.getTraceback())
446 446 return None
447 447
448 448 def stop_launchers(self, r=None):
449 449 if not self._stopping:
450 450 self._stopping = True
451 451 # if isinstance(r, failure.Failure):
452 452 # self.log.error('Unexpected error in ipcluster:')
453 453 # self.log.info(r.getTraceback())
454 454 self.log.error("IPython cluster: stopping")
455 455 # These return deferreds. We are not doing anything with them
456 456 # but we are holding refs to them as a reminder that they
457 457 # do return deferreds.
458 458 d1 = self.stop_engines()
459 459 d2 = self.stop_controller()
460 460 # Wait a few seconds to let things shut down.
461 461 dc = ioloop.DelayedCallback(self.loop.stop, 4000, self.loop)
462 462 dc.start()
463 463 # reactor.callLater(4.0, reactor.stop)
464 464
465 465 def sigint_handler(self, signum, frame):
466 466 self.stop_launchers()
467 467
468 468 def start_logging(self):
469 469 # Remove old log files of the controller and engine
470 470 if self.master_config.Global.clean_logs:
471 471 log_dir = self.master_config.Global.log_dir
472 472 for f in os.listdir(log_dir):
473 473 if re.match(r'ip(engine|controller)z-\d+\.(log|err|out)',f):
474 474 os.remove(os.path.join(log_dir, f))
475 475 # This will remove old log files for ipcluster itself
476 476 super(IPClusterApp, self).start_logging()
477 477
478 478 def start_app(self):
479 479 """Start the application, depending on what subcommand is used."""
480 480 subcmd = self.master_config.Global.subcommand
481 481 if subcmd=='create' or subcmd=='list':
482 482 return
483 483 elif subcmd=='start':
484 484 self.start_app_start()
485 485 elif subcmd=='stop':
486 486 self.start_app_stop()
487 487 elif subcmd=='engines':
488 488 self.start_app_engines()
489 489
490 490 def start_app_start(self):
491 491 """Start the app for the start subcommand."""
492 492 config = self.master_config
493 493 # First see if the cluster is already running
494 494 try:
495 495 pid = self.get_pid_from_file()
496 496 except PIDFileError:
497 497 pass
498 498 else:
499 499 self.log.critical(
500 500 'Cluster is already running with [pid=%s]. '
501 501 'use "ipcluster stop" to stop the cluster.' % pid
502 502 )
503 503 # Here I exit with a unusual exit status that other processes
504 504 # can watch for to learn how I existed.
505 505 self.exit(ALREADY_STARTED)
506 506
507 507 # Now log and daemonize
508 508 self.log.info(
509 509 'Starting ipcluster with [daemon=%r]' % config.Global.daemonize
510 510 )
511 511 # TODO: Get daemonize working on Windows or as a Windows Server.
512 512 if config.Global.daemonize:
513 513 if os.name=='posix':
514 514 from twisted.scripts._twistd_unix import daemonize
515 515 daemonize()
516 516
517 517 # Now write the new pid file AFTER our new forked pid is active.
518 518 self.write_pid_file()
519 519 try:
520 520 self.loop.start()
521 521 except KeyboardInterrupt:
522 522 pass
523 523 except zmq.ZMQError as e:
524 524 if e.errno == errno.EINTR:
525 525 pass
526 526 else:
527 527 raise
528 528 self.remove_pid_file()
529 529
530 530 def start_app_engines(self):
531 531 """Start the app for the start subcommand."""
532 532 config = self.master_config
533 533 # First see if the cluster is already running
534 534
535 535 # Now log and daemonize
536 536 self.log.info(
537 537 'Starting engines with [daemon=%r]' % config.Global.daemonize
538 538 )
539 539 # TODO: Get daemonize working on Windows or as a Windows Server.
540 540 if config.Global.daemonize:
541 541 if os.name=='posix':
542 542 from twisted.scripts._twistd_unix import daemonize
543 543 daemonize()
544 544
545 545 # Now write the new pid file AFTER our new forked pid is active.
546 546 # self.write_pid_file()
547 547 try:
548 548 self.loop.start()
549 549 except KeyboardInterrupt:
550 550 pass
551 551 except zmq.ZMQError as e:
552 552 if e.errno == errno.EINTR:
553 553 pass
554 554 else:
555 555 raise
556 556 # self.remove_pid_file()
557 557
558 558 def start_app_stop(self):
559 559 """Start the app for the stop subcommand."""
560 560 config = self.master_config
561 561 try:
562 562 pid = self.get_pid_from_file()
563 563 except PIDFileError:
564 564 self.log.critical(
565 565 'Problem reading pid file, cluster is probably not running.'
566 566 )
567 567 # Here I exit with a unusual exit status that other processes
568 568 # can watch for to learn how I existed.
569 569 self.exit(ALREADY_STOPPED)
570 570 else:
571 571 if os.name=='posix':
572 572 sig = config.Global.signal
573 573 self.log.info(
574 574 "Stopping cluster [pid=%r] with [signal=%r]" % (pid, sig)
575 575 )
576 576 os.kill(pid, sig)
577 577 elif os.name=='nt':
578 578 # As of right now, we don't support daemonize on Windows, so
579 579 # stop will not do anything. Minimally, it should clean up the
580 580 # old .pid files.
581 581 self.remove_pid_file()
582 582
583 583
584 584 def launch_new_instance():
585 585 """Create and run the IPython cluster."""
586 586 app = IPClusterApp()
587 587 app.start()
588 588
589 589
590 590 if __name__ == '__main__':
591 591 launch_new_instance()
592 592
@@ -1,431 +1,432 b''
1 1 #!/usr/bin/env python
2 2 # encoding: utf-8
3 3 """
4 4 The IPython controller application.
5 5 """
6 6
7 7 #-----------------------------------------------------------------------------
8 8 # Copyright (C) 2008-2009 The IPython Development Team
9 9 #
10 10 # Distributed under the terms of the BSD License. The full license is in
11 11 # the file COPYING, distributed as part of this software.
12 12 #-----------------------------------------------------------------------------
13 13
14 14 #-----------------------------------------------------------------------------
15 15 # Imports
16 16 #-----------------------------------------------------------------------------
17 17
18 18 from __future__ import with_statement
19 19
20 20 import copy
21 21 import os
22 22 import logging
23 23 import socket
24 24 import stat
25 25 import sys
26 26 import uuid
27 27
28 28 import zmq
29 29 from zmq.log.handlers import PUBHandler
30 30 from zmq.utils import jsonapi as json
31 31
32 32 from IPython.config.loader import Config
33
33 34 from IPython.parallel import factory
34 from IPython.parallel.controller import ControllerFactory
35 from IPython.parallel.clusterdir import (
35 from .clusterdir import (
36 36 ApplicationWithClusterDir,
37 37 ClusterDirConfigLoader
38 38 )
39 39 from IPython.parallel.util import disambiguate_ip_address, split_url
40 40 # from IPython.kernel.fcutil import FCServiceFactory, FURLError
41 41 from IPython.utils.traitlets import Instance, Unicode
42 42
43 from IPython.parallel.controller.controller import ControllerFactory
43 44
44 45
45 46 #-----------------------------------------------------------------------------
46 47 # Module level variables
47 48 #-----------------------------------------------------------------------------
48 49
49 50
50 51 #: The default config file name for this application
51 52 default_config_file_name = u'ipcontroller_config.py'
52 53
53 54
54 55 _description = """Start the IPython controller for parallel computing.
55 56
56 57 The IPython controller provides a gateway between the IPython engines and
57 58 clients. The controller needs to be started before the engines and can be
58 59 configured using command line options or using a cluster directory. Cluster
59 60 directories contain config, log and security files and are usually located in
60 61 your ipython directory and named as "cluster_<profile>". See the --profile
61 62 and --cluster-dir options for details.
62 63 """
63 64
64 65 #-----------------------------------------------------------------------------
65 66 # Default interfaces
66 67 #-----------------------------------------------------------------------------
67 68
68 69 # The default client interfaces for FCClientServiceFactory.interfaces
69 70 default_client_interfaces = Config()
70 71 default_client_interfaces.Default.url_file = 'ipcontroller-client.url'
71 72
72 73 # Make this a dict we can pass to Config.__init__ for the default
73 74 default_client_interfaces = dict(copy.deepcopy(default_client_interfaces.items()))
74 75
75 76
76 77
77 78 # The default engine interfaces for FCEngineServiceFactory.interfaces
78 79 default_engine_interfaces = Config()
79 80 default_engine_interfaces.Default.url_file = u'ipcontroller-engine.url'
80 81
81 82 # Make this a dict we can pass to Config.__init__ for the default
82 83 default_engine_interfaces = dict(copy.deepcopy(default_engine_interfaces.items()))
83 84
84 85
85 86 #-----------------------------------------------------------------------------
86 87 # Service factories
87 88 #-----------------------------------------------------------------------------
88 89
89 90 #
90 91 # class FCClientServiceFactory(FCServiceFactory):
91 92 # """A Foolscap implementation of the client services."""
92 93 #
93 94 # cert_file = Unicode(u'ipcontroller-client.pem', config=True)
94 95 # interfaces = Instance(klass=Config, kw=default_client_interfaces,
95 96 # allow_none=False, config=True)
96 97 #
97 98 #
98 99 # class FCEngineServiceFactory(FCServiceFactory):
99 100 # """A Foolscap implementation of the engine services."""
100 101 #
101 102 # cert_file = Unicode(u'ipcontroller-engine.pem', config=True)
102 103 # interfaces = Instance(klass=dict, kw=default_engine_interfaces,
103 104 # allow_none=False, config=True)
104 105 #
105 106
106 107 #-----------------------------------------------------------------------------
107 108 # Command line options
108 109 #-----------------------------------------------------------------------------
109 110
110 111
111 112 class IPControllerAppConfigLoader(ClusterDirConfigLoader):
112 113
113 114 def _add_arguments(self):
114 115 super(IPControllerAppConfigLoader, self)._add_arguments()
115 116 paa = self.parser.add_argument
116 117
117 118 ## Hub Config:
118 119 paa('--mongodb',
119 120 dest='HubFactory.db_class', action='store_const',
120 const='IPython.parallel.mongodb.MongoDB',
121 const='IPython.parallel.controller.mongodb.MongoDB',
121 122 help='Use MongoDB for task storage [default: in-memory]')
122 123 paa('--sqlite',
123 124 dest='HubFactory.db_class', action='store_const',
124 const='IPython.parallel.sqlitedb.SQLiteDB',
125 const='IPython.parallel.controller.sqlitedb.SQLiteDB',
125 126 help='Use SQLite3 for DB task storage [default: in-memory]')
126 127 paa('--hb',
127 128 type=int, dest='HubFactory.hb', nargs=2,
128 129 help='The (2) ports the Hub\'s Heartmonitor will use for the heartbeat '
129 130 'connections [default: random]',
130 131 metavar='Hub.hb_ports')
131 132 paa('--ping',
132 133 type=int, dest='HubFactory.ping',
133 134 help='The frequency at which the Hub pings the engines for heartbeats '
134 135 ' (in ms) [default: 100]',
135 136 metavar='Hub.ping')
136 137
137 138 # Client config
138 139 paa('--client-ip',
139 140 type=str, dest='HubFactory.client_ip',
140 141 help='The IP address or hostname the Hub will listen on for '
141 142 'client connections. Both engine-ip and client-ip can be set simultaneously '
142 143 'via --ip [default: loopback]',
143 144 metavar='Hub.client_ip')
144 145 paa('--client-transport',
145 146 type=str, dest='HubFactory.client_transport',
146 147 help='The ZeroMQ transport the Hub will use for '
147 148 'client connections. Both engine-transport and client-transport can be set simultaneously '
148 149 'via --transport [default: tcp]',
149 150 metavar='Hub.client_transport')
150 151 paa('--query',
151 152 type=int, dest='HubFactory.query_port',
152 153 help='The port on which the Hub XREP socket will listen for result queries from clients [default: random]',
153 154 metavar='Hub.query_port')
154 155 paa('--notifier',
155 156 type=int, dest='HubFactory.notifier_port',
156 157 help='The port on which the Hub PUB socket will listen for notification connections [default: random]',
157 158 metavar='Hub.notifier_port')
158 159
159 160 # Engine config
160 161 paa('--engine-ip',
161 162 type=str, dest='HubFactory.engine_ip',
162 163 help='The IP address or hostname the Hub will listen on for '
163 164 'engine connections. This applies to the Hub and its schedulers'
164 165 'engine-ip and client-ip can be set simultaneously '
165 166 'via --ip [default: loopback]',
166 167 metavar='Hub.engine_ip')
167 168 paa('--engine-transport',
168 169 type=str, dest='HubFactory.engine_transport',
169 170 help='The ZeroMQ transport the Hub will use for '
170 171 'client connections. Both engine-transport and client-transport can be set simultaneously '
171 172 'via --transport [default: tcp]',
172 173 metavar='Hub.engine_transport')
173 174
174 175 # Scheduler config
175 176 paa('--mux',
176 177 type=int, dest='ControllerFactory.mux', nargs=2,
177 178 help='The (2) ports the MUX scheduler will listen on for client,engine '
178 179 'connections, respectively [default: random]',
179 180 metavar='Scheduler.mux_ports')
180 181 paa('--task',
181 182 type=int, dest='ControllerFactory.task', nargs=2,
182 183 help='The (2) ports the Task scheduler will listen on for client,engine '
183 184 'connections, respectively [default: random]',
184 185 metavar='Scheduler.task_ports')
185 186 paa('--control',
186 187 type=int, dest='ControllerFactory.control', nargs=2,
187 188 help='The (2) ports the Control scheduler will listen on for client,engine '
188 189 'connections, respectively [default: random]',
189 190 metavar='Scheduler.control_ports')
190 191 paa('--iopub',
191 192 type=int, dest='ControllerFactory.iopub', nargs=2,
192 193 help='The (2) ports the IOPub scheduler will listen on for client,engine '
193 194 'connections, respectively [default: random]',
194 195 metavar='Scheduler.iopub_ports')
195 196
196 197 paa('--scheme',
197 198 type=str, dest='HubFactory.scheme',
198 199 choices = ['pure', 'lru', 'plainrandom', 'weighted', 'twobin','leastload'],
199 200 help='select the task scheduler scheme [default: Python LRU]',
200 201 metavar='Scheduler.scheme')
201 202 paa('--usethreads',
202 203 dest='ControllerFactory.usethreads', action="store_true",
203 204 help='Use threads instead of processes for the schedulers',
204 205 )
205 206 paa('--hwm',
206 207 dest='ControllerFactory.hwm', type=int,
207 208 help='specify the High Water Mark (HWM) for the downstream '
208 209 'socket in the pure ZMQ scheduler. This is the maximum number '
209 210 'of allowed outstanding tasks on each engine.',
210 211 )
211 212
212 213 ## Global config
213 214 paa('--log-to-file',
214 215 action='store_true', dest='Global.log_to_file',
215 216 help='Log to a file in the log directory (default is stdout)')
216 217 paa('--log-url',
217 218 type=str, dest='Global.log_url',
218 219 help='Broadcast logs to an iploggerz process [default: disabled]')
219 220 paa('-r','--reuse-files',
220 221 action='store_true', dest='Global.reuse_files',
221 222 help='Try to reuse existing json connection files.')
222 223 paa('--no-secure',
223 224 action='store_false', dest='Global.secure',
224 225 help='Turn off execution keys (default).')
225 226 paa('--secure',
226 227 action='store_true', dest='Global.secure',
227 228 help='Turn on execution keys.')
228 229 paa('--execkey',
229 230 type=str, dest='Global.exec_key',
230 231 help='path to a file containing an execution key.',
231 232 metavar='keyfile')
232 233 paa('--ssh',
233 234 type=str, dest='Global.sshserver',
234 235 help='ssh url for clients to use when connecting to the Controller '
235 236 'processes. It should be of the form: [user@]server[:port]. The '
236 237 'Controller\'s listening addresses must be accessible from the ssh server',
237 238 metavar='Global.sshserver')
238 239 paa('--location',
239 240 type=str, dest='Global.location',
240 241 help="The external IP or domain name of this machine, used for disambiguating "
241 242 "engine and client connections.",
242 243 metavar='Global.location')
243 244 factory.add_session_arguments(self.parser)
244 245 factory.add_registration_arguments(self.parser)
245 246
246 247
247 248 #-----------------------------------------------------------------------------
248 249 # The main application
249 250 #-----------------------------------------------------------------------------
250 251
251 252
252 253 class IPControllerApp(ApplicationWithClusterDir):
253 254
254 255 name = u'ipcontroller'
255 256 description = _description
256 257 command_line_loader = IPControllerAppConfigLoader
257 258 default_config_file_name = default_config_file_name
258 259 auto_create_cluster_dir = True
259 260
260 261
261 262 def create_default_config(self):
262 263 super(IPControllerApp, self).create_default_config()
263 264 # Don't set defaults for Global.secure or Global.reuse_furls
264 265 # as those are set in a component.
265 266 self.default_config.Global.import_statements = []
266 267 self.default_config.Global.clean_logs = True
267 268 self.default_config.Global.secure = True
268 269 self.default_config.Global.reuse_files = False
269 270 self.default_config.Global.exec_key = "exec_key.key"
270 271 self.default_config.Global.sshserver = None
271 272 self.default_config.Global.location = None
272 273
273 274 def pre_construct(self):
274 275 super(IPControllerApp, self).pre_construct()
275 276 c = self.master_config
276 277 # The defaults for these are set in FCClientServiceFactory and
277 278 # FCEngineServiceFactory, so we only set them here if the global
278 279 # options have be set to override the class level defaults.
279 280
280 281 # if hasattr(c.Global, 'reuse_furls'):
281 282 # c.FCClientServiceFactory.reuse_furls = c.Global.reuse_furls
282 283 # c.FCEngineServiceFactory.reuse_furls = c.Global.reuse_furls
283 284 # del c.Global.reuse_furls
284 285 # if hasattr(c.Global, 'secure'):
285 286 # c.FCClientServiceFactory.secure = c.Global.secure
286 287 # c.FCEngineServiceFactory.secure = c.Global.secure
287 288 # del c.Global.secure
288 289
289 290 def save_connection_dict(self, fname, cdict):
290 291 """save a connection dict to json file."""
291 292 c = self.master_config
292 293 url = cdict['url']
293 294 location = cdict['location']
294 295 if not location:
295 296 try:
296 297 proto,ip,port = split_url(url)
297 298 except AssertionError:
298 299 pass
299 300 else:
300 301 location = socket.gethostbyname_ex(socket.gethostname())[2][-1]
301 302 cdict['location'] = location
302 303 fname = os.path.join(c.Global.security_dir, fname)
303 304 with open(fname, 'w') as f:
304 305 f.write(json.dumps(cdict, indent=2))
305 306 os.chmod(fname, stat.S_IRUSR|stat.S_IWUSR)
306 307
307 308 def load_config_from_json(self):
308 309 """load config from existing json connector files."""
309 310 c = self.master_config
310 311 # load from engine config
311 312 with open(os.path.join(c.Global.security_dir, 'ipcontroller-engine.json')) as f:
312 313 cfg = json.loads(f.read())
313 314 key = c.SessionFactory.exec_key = cfg['exec_key']
314 315 xport,addr = cfg['url'].split('://')
315 316 c.HubFactory.engine_transport = xport
316 317 ip,ports = addr.split(':')
317 318 c.HubFactory.engine_ip = ip
318 319 c.HubFactory.regport = int(ports)
319 320 c.Global.location = cfg['location']
320 321
321 322 # load client config
322 323 with open(os.path.join(c.Global.security_dir, 'ipcontroller-client.json')) as f:
323 324 cfg = json.loads(f.read())
324 325 assert key == cfg['exec_key'], "exec_key mismatch between engine and client keys"
325 326 xport,addr = cfg['url'].split('://')
326 327 c.HubFactory.client_transport = xport
327 328 ip,ports = addr.split(':')
328 329 c.HubFactory.client_ip = ip
329 330 c.Global.sshserver = cfg['ssh']
330 331 assert int(ports) == c.HubFactory.regport, "regport mismatch"
331 332
332 333 def construct(self):
333 334 # This is the working dir by now.
334 335 sys.path.insert(0, '')
335 336 c = self.master_config
336 337
337 338 self.import_statements()
338 339 reusing = c.Global.reuse_files
339 340 if reusing:
340 341 try:
341 342 self.load_config_from_json()
342 343 except (AssertionError,IOError):
343 344 reusing=False
344 345 # check again, because reusing may have failed:
345 346 if reusing:
346 347 pass
347 348 elif c.Global.secure:
348 349 keyfile = os.path.join(c.Global.security_dir, c.Global.exec_key)
349 350 key = str(uuid.uuid4())
350 351 with open(keyfile, 'w') as f:
351 352 f.write(key)
352 353 os.chmod(keyfile, stat.S_IRUSR|stat.S_IWUSR)
353 354 c.SessionFactory.exec_key = key
354 355 else:
355 356 c.SessionFactory.exec_key = ''
356 357 key = None
357 358
358 359 try:
359 360 self.factory = ControllerFactory(config=c, logname=self.log.name)
360 361 self.start_logging()
361 362 self.factory.construct()
362 363 except:
363 364 self.log.error("Couldn't construct the Controller", exc_info=True)
364 365 self.exit(1)
365 366
366 367 if not reusing:
367 368 # save to new json config files
368 369 f = self.factory
369 370 cdict = {'exec_key' : key,
370 371 'ssh' : c.Global.sshserver,
371 372 'url' : "%s://%s:%s"%(f.client_transport, f.client_ip, f.regport),
372 373 'location' : c.Global.location
373 374 }
374 375 self.save_connection_dict('ipcontroller-client.json', cdict)
375 376 edict = cdict
376 377 edict['url']="%s://%s:%s"%((f.client_transport, f.client_ip, f.regport))
377 378 self.save_connection_dict('ipcontroller-engine.json', edict)
378 379
379 380
380 381 def save_urls(self):
381 382 """save the registration urls to files."""
382 383 c = self.master_config
383 384
384 385 sec_dir = c.Global.security_dir
385 386 cf = self.factory
386 387
387 388 with open(os.path.join(sec_dir, 'ipcontroller-engine.url'), 'w') as f:
388 389 f.write("%s://%s:%s"%(cf.engine_transport, cf.engine_ip, cf.regport))
389 390
390 391 with open(os.path.join(sec_dir, 'ipcontroller-client.url'), 'w') as f:
391 392 f.write("%s://%s:%s"%(cf.client_transport, cf.client_ip, cf.regport))
392 393
393 394
394 395 def import_statements(self):
395 396 statements = self.master_config.Global.import_statements
396 397 for s in statements:
397 398 try:
398 399 self.log.msg("Executing statement: '%s'" % s)
399 400 exec s in globals(), locals()
400 401 except:
401 402 self.log.msg("Error running statement: %s" % s)
402 403
403 404 def start_logging(self):
404 405 super(IPControllerApp, self).start_logging()
405 406 if self.master_config.Global.log_url:
406 407 context = self.factory.context
407 408 lsock = context.socket(zmq.PUB)
408 409 lsock.connect(self.master_config.Global.log_url)
409 410 handler = PUBHandler(lsock)
410 411 handler.root_topic = 'controller'
411 412 handler.setLevel(self.log_level)
412 413 self.log.addHandler(handler)
413 414 #
414 415 def start_app(self):
415 416 # Start the subprocesses:
416 417 self.factory.start()
417 418 self.write_pid_file(overwrite=True)
418 419 try:
419 420 self.factory.loop.start()
420 421 except KeyboardInterrupt:
421 422 self.log.critical("Interrupted, Exiting...\n")
422 423
423 424
424 425 def launch_new_instance():
425 426 """Create and run the IPython controller"""
426 427 app = IPControllerApp()
427 428 app.start()
428 429
429 430
430 431 if __name__ == '__main__':
431 432 launch_new_instance()
@@ -1,294 +1,295 b''
1 1 #!/usr/bin/env python
2 2 # encoding: utf-8
3 3 """
4 4 The IPython engine application
5 5 """
6 6
7 7 #-----------------------------------------------------------------------------
8 8 # Copyright (C) 2008-2009 The IPython Development Team
9 9 #
10 10 # Distributed under the terms of the BSD License. The full license is in
11 11 # the file COPYING, distributed as part of this software.
12 12 #-----------------------------------------------------------------------------
13 13
14 14 #-----------------------------------------------------------------------------
15 15 # Imports
16 16 #-----------------------------------------------------------------------------
17 17
18 18 import json
19 19 import os
20 20 import sys
21 21
22 22 import zmq
23 23 from zmq.eventloop import ioloop
24 24
25 from IPython.parallel.clusterdir import (
25 from .clusterdir import (
26 26 ApplicationWithClusterDir,
27 27 ClusterDirConfigLoader
28 28 )
29 29 from IPython.zmq.log import EnginePUBHandler
30 30
31 31 from IPython.parallel import factory
32 from IPython.parallel.engine import EngineFactory
33 from IPython.parallel.streamkernel import Kernel
32 from IPython.parallel.engine.engine import EngineFactory
33 from IPython.parallel.engine.streamkernel import Kernel
34 34 from IPython.parallel.util import disambiguate_url
35
35 36 from IPython.utils.importstring import import_item
36 37
37 38
38 39 #-----------------------------------------------------------------------------
39 40 # Module level variables
40 41 #-----------------------------------------------------------------------------
41 42
42 43 #: The default config file name for this application
43 44 default_config_file_name = u'ipengine_config.py'
44 45
45 46
46 47 mpi4py_init = """from mpi4py import MPI as mpi
47 48 mpi.size = mpi.COMM_WORLD.Get_size()
48 49 mpi.rank = mpi.COMM_WORLD.Get_rank()
49 50 """
50 51
51 52
52 53 pytrilinos_init = """from PyTrilinos import Epetra
53 54 class SimpleStruct:
54 55 pass
55 56 mpi = SimpleStruct()
56 57 mpi.rank = 0
57 58 mpi.size = 0
58 59 """
59 60
60 61
61 62 _description = """Start an IPython engine for parallel computing.\n\n
62 63
63 64 IPython engines run in parallel and perform computations on behalf of a client
64 65 and controller. A controller needs to be started before the engines. The
65 66 engine can be configured using command line options or using a cluster
66 67 directory. Cluster directories contain config, log and security files and are
67 68 usually located in your ipython directory and named as "cluster_<profile>".
68 69 See the --profile and --cluster-dir options for details.
69 70 """
70 71
71 72 #-----------------------------------------------------------------------------
72 73 # Command line options
73 74 #-----------------------------------------------------------------------------
74 75
75 76
76 77 class IPEngineAppConfigLoader(ClusterDirConfigLoader):
77 78
78 79 def _add_arguments(self):
79 80 super(IPEngineAppConfigLoader, self)._add_arguments()
80 81 paa = self.parser.add_argument
81 82 # Controller config
82 83 paa('--file', '-f',
83 84 type=unicode, dest='Global.url_file',
84 85 help='The full location of the file containing the connection information fo '
85 86 'controller. If this is not given, the file must be in the '
86 87 'security directory of the cluster directory. This location is '
87 88 'resolved using the --profile and --app-dir options.',
88 89 metavar='Global.url_file')
89 90 # MPI
90 91 paa('--mpi',
91 92 type=str, dest='MPI.use',
92 93 help='How to enable MPI (mpi4py, pytrilinos, or empty string to disable).',
93 94 metavar='MPI.use')
94 95 # Global config
95 96 paa('--log-to-file',
96 97 action='store_true', dest='Global.log_to_file',
97 98 help='Log to a file in the log directory (default is stdout)')
98 99 paa('--log-url',
99 100 dest='Global.log_url',
100 101 help="url of ZMQ logger, as started with iploggerz")
101 102 # paa('--execkey',
102 103 # type=str, dest='Global.exec_key',
103 104 # help='path to a file containing an execution key.',
104 105 # metavar='keyfile')
105 106 # paa('--no-secure',
106 107 # action='store_false', dest='Global.secure',
107 108 # help='Turn off execution keys.')
108 109 # paa('--secure',
109 110 # action='store_true', dest='Global.secure',
110 111 # help='Turn on execution keys (default).')
111 112 # init command
112 113 paa('-c',
113 114 type=str, dest='Global.extra_exec_lines',
114 115 help='specify a command to be run at startup')
115 116
116 117 factory.add_session_arguments(self.parser)
117 118 factory.add_registration_arguments(self.parser)
118 119
119 120
120 121 #-----------------------------------------------------------------------------
121 122 # Main application
122 123 #-----------------------------------------------------------------------------
123 124
124 125
125 126 class IPEngineApp(ApplicationWithClusterDir):
126 127
127 128 name = u'ipengine'
128 129 description = _description
129 130 command_line_loader = IPEngineAppConfigLoader
130 131 default_config_file_name = default_config_file_name
131 132 auto_create_cluster_dir = True
132 133
133 134 def create_default_config(self):
134 135 super(IPEngineApp, self).create_default_config()
135 136
136 137 # The engine should not clean logs as we don't want to remove the
137 138 # active log files of other running engines.
138 139 self.default_config.Global.clean_logs = False
139 140 self.default_config.Global.secure = True
140 141
141 142 # Global config attributes
142 143 self.default_config.Global.exec_lines = []
143 144 self.default_config.Global.extra_exec_lines = ''
144 145
145 146 # Configuration related to the controller
146 147 # This must match the filename (path not included) that the controller
147 148 # used for the FURL file.
148 149 self.default_config.Global.url_file = u''
149 150 self.default_config.Global.url_file_name = u'ipcontroller-engine.json'
150 151 # If given, this is the actual location of the controller's FURL file.
151 152 # If not, this is computed using the profile, app_dir and furl_file_name
152 153 # self.default_config.Global.key_file_name = u'exec_key.key'
153 154 # self.default_config.Global.key_file = u''
154 155
155 156 # MPI related config attributes
156 157 self.default_config.MPI.use = ''
157 158 self.default_config.MPI.mpi4py = mpi4py_init
158 159 self.default_config.MPI.pytrilinos = pytrilinos_init
159 160
160 161 def post_load_command_line_config(self):
161 162 pass
162 163
163 164 def pre_construct(self):
164 165 super(IPEngineApp, self).pre_construct()
165 166 # self.find_cont_url_file()
166 167 self.find_url_file()
167 168 if self.master_config.Global.extra_exec_lines:
168 169 self.master_config.Global.exec_lines.append(self.master_config.Global.extra_exec_lines)
169 170
170 171 # def find_key_file(self):
171 172 # """Set the key file.
172 173 #
173 174 # Here we don't try to actually see if it exists for is valid as that
174 175 # is hadled by the connection logic.
175 176 # """
176 177 # config = self.master_config
177 178 # # Find the actual controller key file
178 179 # if not config.Global.key_file:
179 180 # try_this = os.path.join(
180 181 # config.Global.cluster_dir,
181 182 # config.Global.security_dir,
182 183 # config.Global.key_file_name
183 184 # )
184 185 # config.Global.key_file = try_this
185 186
186 187 def find_url_file(self):
187 188 """Set the key file.
188 189
189 190 Here we don't try to actually see if it exists for is valid as that
190 191 is hadled by the connection logic.
191 192 """
192 193 config = self.master_config
193 194 # Find the actual controller key file
194 195 if not config.Global.url_file:
195 196 try_this = os.path.join(
196 197 config.Global.cluster_dir,
197 198 config.Global.security_dir,
198 199 config.Global.url_file_name
199 200 )
200 201 config.Global.url_file = try_this
201 202
202 203 def construct(self):
203 204 # This is the working dir by now.
204 205 sys.path.insert(0, '')
205 206 config = self.master_config
206 207 # if os.path.exists(config.Global.key_file) and config.Global.secure:
207 208 # config.SessionFactory.exec_key = config.Global.key_file
208 209 if os.path.exists(config.Global.url_file):
209 210 with open(config.Global.url_file) as f:
210 211 d = json.loads(f.read())
211 212 for k,v in d.iteritems():
212 213 if isinstance(v, unicode):
213 214 d[k] = v.encode()
214 215 if d['exec_key']:
215 216 config.SessionFactory.exec_key = d['exec_key']
216 217 d['url'] = disambiguate_url(d['url'], d['location'])
217 218 config.RegistrationFactory.url=d['url']
218 219 config.EngineFactory.location = d['location']
219 220
220 221
221 222
222 223 config.Kernel.exec_lines = config.Global.exec_lines
223 224
224 225 self.start_mpi()
225 226
226 227 # Create the underlying shell class and EngineService
227 228 # shell_class = import_item(self.master_config.Global.shell_class)
228 229 try:
229 230 self.engine = EngineFactory(config=config, logname=self.log.name)
230 231 except:
231 232 self.log.error("Couldn't start the Engine", exc_info=True)
232 233 self.exit(1)
233 234
234 235 self.start_logging()
235 236
236 237 # Create the service hierarchy
237 238 # self.main_service = service.MultiService()
238 239 # self.engine_service.setServiceParent(self.main_service)
239 240 # self.tub_service = Tub()
240 241 # self.tub_service.setServiceParent(self.main_service)
241 242 # # This needs to be called before the connection is initiated
242 243 # self.main_service.startService()
243 244
244 245 # This initiates the connection to the controller and calls
245 246 # register_engine to tell the controller we are ready to do work
246 247 # self.engine_connector = EngineConnector(self.tub_service)
247 248
248 249 # self.log.info("Using furl file: %s" % self.master_config.Global.furl_file)
249 250
250 251 # reactor.callWhenRunning(self.call_connect)
251 252
252 253
253 254 def start_logging(self):
254 255 super(IPEngineApp, self).start_logging()
255 256 if self.master_config.Global.log_url:
256 257 context = self.engine.context
257 258 lsock = context.socket(zmq.PUB)
258 259 lsock.connect(self.master_config.Global.log_url)
259 260 handler = EnginePUBHandler(self.engine, lsock)
260 261 handler.setLevel(self.log_level)
261 262 self.log.addHandler(handler)
262 263
263 264 def start_mpi(self):
264 265 global mpi
265 266 mpikey = self.master_config.MPI.use
266 267 mpi_import_statement = self.master_config.MPI.get(mpikey, None)
267 268 if mpi_import_statement is not None:
268 269 try:
269 270 self.log.info("Initializing MPI:")
270 271 self.log.info(mpi_import_statement)
271 272 exec mpi_import_statement in globals()
272 273 except:
273 274 mpi = None
274 275 else:
275 276 mpi = None
276 277
277 278
278 279 def start_app(self):
279 280 self.engine.start()
280 281 try:
281 282 self.engine.loop.start()
282 283 except KeyboardInterrupt:
283 284 self.log.critical("Engine Interrupted, shutting down...\n")
284 285
285 286
286 287 def launch_new_instance():
287 288 """Create and run the IPython controller"""
288 289 app = IPEngineApp()
289 290 app.start()
290 291
291 292
292 293 if __name__ == '__main__':
293 294 launch_new_instance()
294 295
@@ -1,132 +1,132 b''
1 1 #!/usr/bin/env python
2 2 # encoding: utf-8
3 3 """
4 4 A simple IPython logger application
5 5 """
6 6
7 7 #-----------------------------------------------------------------------------
8 8 # Copyright (C) 2011 The IPython Development Team
9 9 #
10 10 # Distributed under the terms of the BSD License. The full license is in
11 11 # the file COPYING, distributed as part of this software.
12 12 #-----------------------------------------------------------------------------
13 13
14 14 #-----------------------------------------------------------------------------
15 15 # Imports
16 16 #-----------------------------------------------------------------------------
17 17
18 18 import os
19 19 import sys
20 20
21 21 import zmq
22 22
23 from IPython.parallel.clusterdir import (
23 from .clusterdir import (
24 24 ApplicationWithClusterDir,
25 25 ClusterDirConfigLoader
26 26 )
27 27 from .logwatcher import LogWatcher
28 28
29 29 #-----------------------------------------------------------------------------
30 30 # Module level variables
31 31 #-----------------------------------------------------------------------------
32 32
33 33 #: The default config file name for this application
34 34 default_config_file_name = u'iplogger_config.py'
35 35
36 36 _description = """Start an IPython logger for parallel computing.\n\n
37 37
38 38 IPython controllers and engines (and your own processes) can broadcast log messages
39 39 by registering a `zmq.log.handlers.PUBHandler` with the `logging` module. The
40 40 logger can be configured using command line options or using a cluster
41 41 directory. Cluster directories contain config, log and security files and are
42 42 usually located in your ipython directory and named as "cluster_<profile>".
43 43 See the --profile and --cluster-dir options for details.
44 44 """
45 45
46 46 #-----------------------------------------------------------------------------
47 47 # Command line options
48 48 #-----------------------------------------------------------------------------
49 49
50 50
51 51 class IPLoggerAppConfigLoader(ClusterDirConfigLoader):
52 52
53 53 def _add_arguments(self):
54 54 super(IPLoggerAppConfigLoader, self)._add_arguments()
55 55 paa = self.parser.add_argument
56 56 # Controller config
57 57 paa('--url',
58 58 type=str, dest='LogWatcher.url',
59 59 help='The url the LogWatcher will listen on',
60 60 )
61 61 # MPI
62 62 paa('--topics',
63 63 type=str, dest='LogWatcher.topics', nargs='+',
64 64 help='What topics to subscribe to',
65 65 metavar='topics')
66 66 # Global config
67 67 paa('--log-to-file',
68 68 action='store_true', dest='Global.log_to_file',
69 69 help='Log to a file in the log directory (default is stdout)')
70 70
71 71
72 72 #-----------------------------------------------------------------------------
73 73 # Main application
74 74 #-----------------------------------------------------------------------------
75 75
76 76
77 77 class IPLoggerApp(ApplicationWithClusterDir):
78 78
79 79 name = u'iploggerz'
80 80 description = _description
81 81 command_line_loader = IPLoggerAppConfigLoader
82 82 default_config_file_name = default_config_file_name
83 83 auto_create_cluster_dir = True
84 84
85 85 def create_default_config(self):
86 86 super(IPLoggerApp, self).create_default_config()
87 87
88 88 # The engine should not clean logs as we don't want to remove the
89 89 # active log files of other running engines.
90 90 self.default_config.Global.clean_logs = False
91 91
92 92 # If given, this is the actual location of the logger's URL file.
93 93 # If not, this is computed using the profile, app_dir and furl_file_name
94 94 self.default_config.Global.url_file_name = u'iplogger.url'
95 95 self.default_config.Global.url_file = u''
96 96
97 97 def post_load_command_line_config(self):
98 98 pass
99 99
100 100 def pre_construct(self):
101 101 super(IPLoggerApp, self).pre_construct()
102 102
103 103 def construct(self):
104 104 # This is the working dir by now.
105 105 sys.path.insert(0, '')
106 106
107 107 self.start_logging()
108 108
109 109 try:
110 110 self.watcher = LogWatcher(config=self.master_config, logname=self.log.name)
111 111 except:
112 112 self.log.error("Couldn't start the LogWatcher", exc_info=True)
113 113 self.exit(1)
114 114
115 115
116 116 def start_app(self):
117 117 try:
118 118 self.watcher.start()
119 119 self.watcher.loop.start()
120 120 except KeyboardInterrupt:
121 121 self.log.critical("Logging Interrupted, shutting down...\n")
122 122
123 123
124 124 def launch_new_instance():
125 125 """Create and run the IPython LogWatcher"""
126 126 app = IPLoggerApp()
127 127 app.start()
128 128
129 129
130 130 if __name__ == '__main__':
131 131 launch_new_instance()
132 132
@@ -1,971 +1,971 b''
1 1 #!/usr/bin/env python
2 2 # encoding: utf-8
3 3 """
4 4 Facilities for launching IPython processes asynchronously.
5 5 """
6 6
7 7 #-----------------------------------------------------------------------------
8 8 # Copyright (C) 2008-2009 The IPython Development Team
9 9 #
10 10 # Distributed under the terms of the BSD License. The full license is in
11 11 # the file COPYING, distributed as part of this software.
12 12 #-----------------------------------------------------------------------------
13 13
14 14 #-----------------------------------------------------------------------------
15 15 # Imports
16 16 #-----------------------------------------------------------------------------
17 17
18 18 import copy
19 19 import logging
20 20 import os
21 21 import re
22 22 import stat
23 23
24 24 from signal import SIGINT, SIGTERM
25 25 try:
26 26 from signal import SIGKILL
27 27 except ImportError:
28 28 SIGKILL=SIGTERM
29 29
30 30 from subprocess import Popen, PIPE, STDOUT
31 31 try:
32 32 from subprocess import check_output
33 33 except ImportError:
34 34 # pre-2.7, define check_output with Popen
35 35 def check_output(*args, **kwargs):
36 36 kwargs.update(dict(stdout=PIPE))
37 37 p = Popen(*args, **kwargs)
38 38 out,err = p.communicate()
39 39 return out
40 40
41 41 from zmq.eventloop import ioloop
42 42
43 43 from IPython.external import Itpl
44 44 # from IPython.config.configurable import Configurable
45 45 from IPython.utils.traitlets import Any, Str, Int, List, Unicode, Dict, Instance, CUnicode
46 46 from IPython.utils.path import get_ipython_module_path
47 47 from IPython.utils.process import find_cmd, pycmd2argv, FindCmdError
48 48
49 from .factory import LoggingFactory
49 from IPython.parallel.factory import LoggingFactory
50 50
51 51 # load winhpcjob only on Windows
52 52 try:
53 53 from .winhpcjob import (
54 54 IPControllerTask, IPEngineTask,
55 55 IPControllerJob, IPEngineSetJob
56 56 )
57 57 except ImportError:
58 58 pass
59 59
60 60
61 61 #-----------------------------------------------------------------------------
62 62 # Paths to the kernel apps
63 63 #-----------------------------------------------------------------------------
64 64
65 65
66 66 ipcluster_cmd_argv = pycmd2argv(get_ipython_module_path(
67 'IPython.parallel.ipclusterapp'
67 'IPython.parallel.apps.ipclusterapp'
68 68 ))
69 69
70 70 ipengine_cmd_argv = pycmd2argv(get_ipython_module_path(
71 'IPython.parallel.ipengineapp'
71 'IPython.parallel.apps.ipengineapp'
72 72 ))
73 73
74 74 ipcontroller_cmd_argv = pycmd2argv(get_ipython_module_path(
75 'IPython.parallel.ipcontrollerapp'
75 'IPython.parallel.apps.ipcontrollerapp'
76 76 ))
77 77
78 78 #-----------------------------------------------------------------------------
79 79 # Base launchers and errors
80 80 #-----------------------------------------------------------------------------
81 81
82 82
83 83 class LauncherError(Exception):
84 84 pass
85 85
86 86
87 87 class ProcessStateError(LauncherError):
88 88 pass
89 89
90 90
91 91 class UnknownStatus(LauncherError):
92 92 pass
93 93
94 94
95 95 class BaseLauncher(LoggingFactory):
96 96 """An asbtraction for starting, stopping and signaling a process."""
97 97
98 98 # In all of the launchers, the work_dir is where child processes will be
99 99 # run. This will usually be the cluster_dir, but may not be. any work_dir
100 100 # passed into the __init__ method will override the config value.
101 101 # This should not be used to set the work_dir for the actual engine
102 102 # and controller. Instead, use their own config files or the
103 103 # controller_args, engine_args attributes of the launchers to add
104 104 # the --work-dir option.
105 105 work_dir = Unicode(u'.')
106 106 loop = Instance('zmq.eventloop.ioloop.IOLoop')
107 107
108 108 start_data = Any()
109 109 stop_data = Any()
110 110
111 111 def _loop_default(self):
112 112 return ioloop.IOLoop.instance()
113 113
114 114 def __init__(self, work_dir=u'.', config=None, **kwargs):
115 115 super(BaseLauncher, self).__init__(work_dir=work_dir, config=config, **kwargs)
116 116 self.state = 'before' # can be before, running, after
117 117 self.stop_callbacks = []
118 118 self.start_data = None
119 119 self.stop_data = None
120 120
121 121 @property
122 122 def args(self):
123 123 """A list of cmd and args that will be used to start the process.
124 124
125 125 This is what is passed to :func:`spawnProcess` and the first element
126 126 will be the process name.
127 127 """
128 128 return self.find_args()
129 129
130 130 def find_args(self):
131 131 """The ``.args`` property calls this to find the args list.
132 132
133 133 Subcommand should implement this to construct the cmd and args.
134 134 """
135 135 raise NotImplementedError('find_args must be implemented in a subclass')
136 136
137 137 @property
138 138 def arg_str(self):
139 139 """The string form of the program arguments."""
140 140 return ' '.join(self.args)
141 141
142 142 @property
143 143 def running(self):
144 144 """Am I running."""
145 145 if self.state == 'running':
146 146 return True
147 147 else:
148 148 return False
149 149
150 150 def start(self):
151 151 """Start the process.
152 152
153 153 This must return a deferred that fires with information about the
154 154 process starting (like a pid, job id, etc.).
155 155 """
156 156 raise NotImplementedError('start must be implemented in a subclass')
157 157
158 158 def stop(self):
159 159 """Stop the process and notify observers of stopping.
160 160
161 161 This must return a deferred that fires with information about the
162 162 processing stopping, like errors that occur while the process is
163 163 attempting to be shut down. This deferred won't fire when the process
164 164 actually stops. To observe the actual process stopping, see
165 165 :func:`observe_stop`.
166 166 """
167 167 raise NotImplementedError('stop must be implemented in a subclass')
168 168
169 169 def on_stop(self, f):
170 170 """Get a deferred that will fire when the process stops.
171 171
172 172 The deferred will fire with data that contains information about
173 173 the exit status of the process.
174 174 """
175 175 if self.state=='after':
176 176 return f(self.stop_data)
177 177 else:
178 178 self.stop_callbacks.append(f)
179 179
180 180 def notify_start(self, data):
181 181 """Call this to trigger startup actions.
182 182
183 183 This logs the process startup and sets the state to 'running'. It is
184 184 a pass-through so it can be used as a callback.
185 185 """
186 186
187 187 self.log.info('Process %r started: %r' % (self.args[0], data))
188 188 self.start_data = data
189 189 self.state = 'running'
190 190 return data
191 191
192 192 def notify_stop(self, data):
193 193 """Call this to trigger process stop actions.
194 194
195 195 This logs the process stopping and sets the state to 'after'. Call
196 196 this to trigger all the deferreds from :func:`observe_stop`."""
197 197
198 198 self.log.info('Process %r stopped: %r' % (self.args[0], data))
199 199 self.stop_data = data
200 200 self.state = 'after'
201 201 for i in range(len(self.stop_callbacks)):
202 202 d = self.stop_callbacks.pop()
203 203 d(data)
204 204 return data
205 205
206 206 def signal(self, sig):
207 207 """Signal the process.
208 208
209 209 Return a semi-meaningless deferred after signaling the process.
210 210
211 211 Parameters
212 212 ----------
213 213 sig : str or int
214 214 'KILL', 'INT', etc., or any signal number
215 215 """
216 216 raise NotImplementedError('signal must be implemented in a subclass')
217 217
218 218
219 219 #-----------------------------------------------------------------------------
220 220 # Local process launchers
221 221 #-----------------------------------------------------------------------------
222 222
223 223
224 224 class LocalProcessLauncher(BaseLauncher):
225 225 """Start and stop an external process in an asynchronous manner.
226 226
227 227 This will launch the external process with a working directory of
228 228 ``self.work_dir``.
229 229 """
230 230
231 231 # This is used to to construct self.args, which is passed to
232 232 # spawnProcess.
233 233 cmd_and_args = List([])
234 234 poll_frequency = Int(100) # in ms
235 235
236 236 def __init__(self, work_dir=u'.', config=None, **kwargs):
237 237 super(LocalProcessLauncher, self).__init__(
238 238 work_dir=work_dir, config=config, **kwargs
239 239 )
240 240 self.process = None
241 241 self.start_deferred = None
242 242 self.poller = None
243 243
244 244 def find_args(self):
245 245 return self.cmd_and_args
246 246
247 247 def start(self):
248 248 if self.state == 'before':
249 249 self.process = Popen(self.args,
250 250 stdout=PIPE,stderr=PIPE,stdin=PIPE,
251 251 env=os.environ,
252 252 cwd=self.work_dir
253 253 )
254 254
255 255 self.loop.add_handler(self.process.stdout.fileno(), self.handle_stdout, self.loop.READ)
256 256 self.loop.add_handler(self.process.stderr.fileno(), self.handle_stderr, self.loop.READ)
257 257 self.poller = ioloop.PeriodicCallback(self.poll, self.poll_frequency, self.loop)
258 258 self.poller.start()
259 259 self.notify_start(self.process.pid)
260 260 else:
261 261 s = 'The process was already started and has state: %r' % self.state
262 262 raise ProcessStateError(s)
263 263
264 264 def stop(self):
265 265 return self.interrupt_then_kill()
266 266
267 267 def signal(self, sig):
268 268 if self.state == 'running':
269 269 self.process.send_signal(sig)
270 270
271 271 def interrupt_then_kill(self, delay=2.0):
272 272 """Send INT, wait a delay and then send KILL."""
273 273 self.signal(SIGINT)
274 274 self.killer = ioloop.DelayedCallback(lambda : self.signal(SIGKILL), delay*1000, self.loop)
275 275 self.killer.start()
276 276
277 277 # callbacks, etc:
278 278
279 279 def handle_stdout(self, fd, events):
280 280 line = self.process.stdout.readline()
281 281 # a stopped process will be readable but return empty strings
282 282 if line:
283 283 self.log.info(line[:-1])
284 284 else:
285 285 self.poll()
286 286
287 287 def handle_stderr(self, fd, events):
288 288 line = self.process.stderr.readline()
289 289 # a stopped process will be readable but return empty strings
290 290 if line:
291 291 self.log.error(line[:-1])
292 292 else:
293 293 self.poll()
294 294
295 295 def poll(self):
296 296 status = self.process.poll()
297 297 if status is not None:
298 298 self.poller.stop()
299 299 self.loop.remove_handler(self.process.stdout.fileno())
300 300 self.loop.remove_handler(self.process.stderr.fileno())
301 301 self.notify_stop(dict(exit_code=status, pid=self.process.pid))
302 302 return status
303 303
304 304 class LocalControllerLauncher(LocalProcessLauncher):
305 305 """Launch a controller as a regular external process."""
306 306
307 307 controller_cmd = List(ipcontroller_cmd_argv, config=True)
308 308 # Command line arguments to ipcontroller.
309 309 controller_args = List(['--log-to-file','--log-level', str(logging.INFO)], config=True)
310 310
311 311 def find_args(self):
312 312 return self.controller_cmd + self.controller_args
313 313
314 314 def start(self, cluster_dir):
315 315 """Start the controller by cluster_dir."""
316 316 self.controller_args.extend(['--cluster-dir', cluster_dir])
317 317 self.cluster_dir = unicode(cluster_dir)
318 318 self.log.info("Starting LocalControllerLauncher: %r" % self.args)
319 319 return super(LocalControllerLauncher, self).start()
320 320
321 321
322 322 class LocalEngineLauncher(LocalProcessLauncher):
323 323 """Launch a single engine as a regular externall process."""
324 324
325 325 engine_cmd = List(ipengine_cmd_argv, config=True)
326 326 # Command line arguments for ipengine.
327 327 engine_args = List(
328 328 ['--log-to-file','--log-level', str(logging.INFO)], config=True
329 329 )
330 330
331 331 def find_args(self):
332 332 return self.engine_cmd + self.engine_args
333 333
334 334 def start(self, cluster_dir):
335 335 """Start the engine by cluster_dir."""
336 336 self.engine_args.extend(['--cluster-dir', cluster_dir])
337 337 self.cluster_dir = unicode(cluster_dir)
338 338 return super(LocalEngineLauncher, self).start()
339 339
340 340
341 341 class LocalEngineSetLauncher(BaseLauncher):
342 342 """Launch a set of engines as regular external processes."""
343 343
344 344 # Command line arguments for ipengine.
345 345 engine_args = List(
346 346 ['--log-to-file','--log-level', str(logging.INFO)], config=True
347 347 )
348 348 # launcher class
349 349 launcher_class = LocalEngineLauncher
350 350
351 351 launchers = Dict()
352 352 stop_data = Dict()
353 353
354 354 def __init__(self, work_dir=u'.', config=None, **kwargs):
355 355 super(LocalEngineSetLauncher, self).__init__(
356 356 work_dir=work_dir, config=config, **kwargs
357 357 )
358 358 self.stop_data = {}
359 359
360 360 def start(self, n, cluster_dir):
361 361 """Start n engines by profile or cluster_dir."""
362 362 self.cluster_dir = unicode(cluster_dir)
363 363 dlist = []
364 364 for i in range(n):
365 365 el = self.launcher_class(work_dir=self.work_dir, config=self.config, logname=self.log.name)
366 366 # Copy the engine args over to each engine launcher.
367 367 el.engine_args = copy.deepcopy(self.engine_args)
368 368 el.on_stop(self._notice_engine_stopped)
369 369 d = el.start(cluster_dir)
370 370 if i==0:
371 371 self.log.info("Starting LocalEngineSetLauncher: %r" % el.args)
372 372 self.launchers[i] = el
373 373 dlist.append(d)
374 374 self.notify_start(dlist)
375 375 # The consumeErrors here could be dangerous
376 376 # dfinal = gatherBoth(dlist, consumeErrors=True)
377 377 # dfinal.addCallback(self.notify_start)
378 378 return dlist
379 379
380 380 def find_args(self):
381 381 return ['engine set']
382 382
383 383 def signal(self, sig):
384 384 dlist = []
385 385 for el in self.launchers.itervalues():
386 386 d = el.signal(sig)
387 387 dlist.append(d)
388 388 # dfinal = gatherBoth(dlist, consumeErrors=True)
389 389 return dlist
390 390
391 391 def interrupt_then_kill(self, delay=1.0):
392 392 dlist = []
393 393 for el in self.launchers.itervalues():
394 394 d = el.interrupt_then_kill(delay)
395 395 dlist.append(d)
396 396 # dfinal = gatherBoth(dlist, consumeErrors=True)
397 397 return dlist
398 398
399 399 def stop(self):
400 400 return self.interrupt_then_kill()
401 401
402 402 def _notice_engine_stopped(self, data):
403 403 pid = data['pid']
404 404 for idx,el in self.launchers.iteritems():
405 405 if el.process.pid == pid:
406 406 break
407 407 self.launchers.pop(idx)
408 408 self.stop_data[idx] = data
409 409 if not self.launchers:
410 410 self.notify_stop(self.stop_data)
411 411
412 412
413 413 #-----------------------------------------------------------------------------
414 414 # MPIExec launchers
415 415 #-----------------------------------------------------------------------------
416 416
417 417
418 418 class MPIExecLauncher(LocalProcessLauncher):
419 419 """Launch an external process using mpiexec."""
420 420
421 421 # The mpiexec command to use in starting the process.
422 422 mpi_cmd = List(['mpiexec'], config=True)
423 423 # The command line arguments to pass to mpiexec.
424 424 mpi_args = List([], config=True)
425 425 # The program to start using mpiexec.
426 426 program = List(['date'], config=True)
427 427 # The command line argument to the program.
428 428 program_args = List([], config=True)
429 429 # The number of instances of the program to start.
430 430 n = Int(1, config=True)
431 431
432 432 def find_args(self):
433 433 """Build self.args using all the fields."""
434 434 return self.mpi_cmd + ['-n', str(self.n)] + self.mpi_args + \
435 435 self.program + self.program_args
436 436
437 437 def start(self, n):
438 438 """Start n instances of the program using mpiexec."""
439 439 self.n = n
440 440 return super(MPIExecLauncher, self).start()
441 441
442 442
443 443 class MPIExecControllerLauncher(MPIExecLauncher):
444 444 """Launch a controller using mpiexec."""
445 445
446 446 controller_cmd = List(ipcontroller_cmd_argv, config=True)
447 447 # Command line arguments to ipcontroller.
448 448 controller_args = List(['--log-to-file','--log-level', str(logging.INFO)], config=True)
449 449 n = Int(1, config=False)
450 450
451 451 def start(self, cluster_dir):
452 452 """Start the controller by cluster_dir."""
453 453 self.controller_args.extend(['--cluster-dir', cluster_dir])
454 454 self.cluster_dir = unicode(cluster_dir)
455 455 self.log.info("Starting MPIExecControllerLauncher: %r" % self.args)
456 456 return super(MPIExecControllerLauncher, self).start(1)
457 457
458 458 def find_args(self):
459 459 return self.mpi_cmd + ['-n', self.n] + self.mpi_args + \
460 460 self.controller_cmd + self.controller_args
461 461
462 462
463 463 class MPIExecEngineSetLauncher(MPIExecLauncher):
464 464
465 465 program = List(ipengine_cmd_argv, config=True)
466 466 # Command line arguments for ipengine.
467 467 program_args = List(
468 468 ['--log-to-file','--log-level', str(logging.INFO)], config=True
469 469 )
470 470 n = Int(1, config=True)
471 471
472 472 def start(self, n, cluster_dir):
473 473 """Start n engines by profile or cluster_dir."""
474 474 self.program_args.extend(['--cluster-dir', cluster_dir])
475 475 self.cluster_dir = unicode(cluster_dir)
476 476 self.n = n
477 477 self.log.info('Starting MPIExecEngineSetLauncher: %r' % self.args)
478 478 return super(MPIExecEngineSetLauncher, self).start(n)
479 479
480 480 #-----------------------------------------------------------------------------
481 481 # SSH launchers
482 482 #-----------------------------------------------------------------------------
483 483
484 484 # TODO: Get SSH Launcher working again.
485 485
486 486 class SSHLauncher(LocalProcessLauncher):
487 487 """A minimal launcher for ssh.
488 488
489 489 To be useful this will probably have to be extended to use the ``sshx``
490 490 idea for environment variables. There could be other things this needs
491 491 as well.
492 492 """
493 493
494 494 ssh_cmd = List(['ssh'], config=True)
495 495 ssh_args = List(['-tt'], config=True)
496 496 program = List(['date'], config=True)
497 497 program_args = List([], config=True)
498 498 hostname = CUnicode('', config=True)
499 499 user = CUnicode('', config=True)
500 500 location = CUnicode('')
501 501
502 502 def _hostname_changed(self, name, old, new):
503 503 if self.user:
504 504 self.location = u'%s@%s' % (self.user, new)
505 505 else:
506 506 self.location = new
507 507
508 508 def _user_changed(self, name, old, new):
509 509 self.location = u'%s@%s' % (new, self.hostname)
510 510
511 511 def find_args(self):
512 512 return self.ssh_cmd + self.ssh_args + [self.location] + \
513 513 self.program + self.program_args
514 514
515 515 def start(self, cluster_dir, hostname=None, user=None):
516 516 self.cluster_dir = unicode(cluster_dir)
517 517 if hostname is not None:
518 518 self.hostname = hostname
519 519 if user is not None:
520 520 self.user = user
521 521
522 522 return super(SSHLauncher, self).start()
523 523
524 524 def signal(self, sig):
525 525 if self.state == 'running':
526 526 # send escaped ssh connection-closer
527 527 self.process.stdin.write('~.')
528 528 self.process.stdin.flush()
529 529
530 530
531 531
532 532 class SSHControllerLauncher(SSHLauncher):
533 533
534 534 program = List(ipcontroller_cmd_argv, config=True)
535 535 # Command line arguments to ipcontroller.
536 536 program_args = List(['-r', '--log-to-file','--log-level', str(logging.INFO)], config=True)
537 537
538 538
539 539 class SSHEngineLauncher(SSHLauncher):
540 540 program = List(ipengine_cmd_argv, config=True)
541 541 # Command line arguments for ipengine.
542 542 program_args = List(
543 543 ['--log-to-file','--log-level', str(logging.INFO)], config=True
544 544 )
545 545
546 546 class SSHEngineSetLauncher(LocalEngineSetLauncher):
547 547 launcher_class = SSHEngineLauncher
548 548 engines = Dict(config=True)
549 549
550 550 def start(self, n, cluster_dir):
551 551 """Start engines by profile or cluster_dir.
552 552 `n` is ignored, and the `engines` config property is used instead.
553 553 """
554 554
555 555 self.cluster_dir = unicode(cluster_dir)
556 556 dlist = []
557 557 for host, n in self.engines.iteritems():
558 558 if isinstance(n, (tuple, list)):
559 559 n, args = n
560 560 else:
561 561 args = copy.deepcopy(self.engine_args)
562 562
563 563 if '@' in host:
564 564 user,host = host.split('@',1)
565 565 else:
566 566 user=None
567 567 for i in range(n):
568 568 el = self.launcher_class(work_dir=self.work_dir, config=self.config, logname=self.log.name)
569 569
570 570 # Copy the engine args over to each engine launcher.
571 571 i
572 572 el.program_args = args
573 573 el.on_stop(self._notice_engine_stopped)
574 574 d = el.start(cluster_dir, user=user, hostname=host)
575 575 if i==0:
576 576 self.log.info("Starting SSHEngineSetLauncher: %r" % el.args)
577 577 self.launchers[host+str(i)] = el
578 578 dlist.append(d)
579 579 self.notify_start(dlist)
580 580 return dlist
581 581
582 582
583 583
584 584 #-----------------------------------------------------------------------------
585 585 # Windows HPC Server 2008 scheduler launchers
586 586 #-----------------------------------------------------------------------------
587 587
588 588
589 589 # This is only used on Windows.
590 590 def find_job_cmd():
591 591 if os.name=='nt':
592 592 try:
593 593 return find_cmd('job')
594 594 except FindCmdError:
595 595 return 'job'
596 596 else:
597 597 return 'job'
598 598
599 599
600 600 class WindowsHPCLauncher(BaseLauncher):
601 601
602 602 # A regular expression used to get the job id from the output of the
603 603 # submit_command.
604 604 job_id_regexp = Str(r'\d+', config=True)
605 605 # The filename of the instantiated job script.
606 606 job_file_name = CUnicode(u'ipython_job.xml', config=True)
607 607 # The full path to the instantiated job script. This gets made dynamically
608 608 # by combining the work_dir with the job_file_name.
609 609 job_file = CUnicode(u'')
610 610 # The hostname of the scheduler to submit the job to
611 611 scheduler = CUnicode('', config=True)
612 612 job_cmd = CUnicode(find_job_cmd(), config=True)
613 613
614 614 def __init__(self, work_dir=u'.', config=None, **kwargs):
615 615 super(WindowsHPCLauncher, self).__init__(
616 616 work_dir=work_dir, config=config, **kwargs
617 617 )
618 618
619 619 @property
620 620 def job_file(self):
621 621 return os.path.join(self.work_dir, self.job_file_name)
622 622
623 623 def write_job_file(self, n):
624 624 raise NotImplementedError("Implement write_job_file in a subclass.")
625 625
626 626 def find_args(self):
627 627 return [u'job.exe']
628 628
629 629 def parse_job_id(self, output):
630 630 """Take the output of the submit command and return the job id."""
631 631 m = re.search(self.job_id_regexp, output)
632 632 if m is not None:
633 633 job_id = m.group()
634 634 else:
635 635 raise LauncherError("Job id couldn't be determined: %s" % output)
636 636 self.job_id = job_id
637 637 self.log.info('Job started with job id: %r' % job_id)
638 638 return job_id
639 639
640 640 def start(self, n):
641 641 """Start n copies of the process using the Win HPC job scheduler."""
642 642 self.write_job_file(n)
643 643 args = [
644 644 'submit',
645 645 '/jobfile:%s' % self.job_file,
646 646 '/scheduler:%s' % self.scheduler
647 647 ]
648 648 self.log.info("Starting Win HPC Job: %s" % (self.job_cmd + ' ' + ' '.join(args),))
649 649 # Twisted will raise DeprecationWarnings if we try to pass unicode to this
650 650 output = check_output([self.job_cmd]+args,
651 651 env=os.environ,
652 652 cwd=self.work_dir,
653 653 stderr=STDOUT
654 654 )
655 655 job_id = self.parse_job_id(output)
656 656 self.notify_start(job_id)
657 657 return job_id
658 658
659 659 def stop(self):
660 660 args = [
661 661 'cancel',
662 662 self.job_id,
663 663 '/scheduler:%s' % self.scheduler
664 664 ]
665 665 self.log.info("Stopping Win HPC Job: %s" % (self.job_cmd + ' ' + ' '.join(args),))
666 666 try:
667 667 output = check_output([self.job_cmd]+args,
668 668 env=os.environ,
669 669 cwd=self.work_dir,
670 670 stderr=STDOUT
671 671 )
672 672 except:
673 673 output = 'The job already appears to be stoppped: %r' % self.job_id
674 674 self.notify_stop(dict(job_id=self.job_id, output=output)) # Pass the output of the kill cmd
675 675 return output
676 676
677 677
678 678 class WindowsHPCControllerLauncher(WindowsHPCLauncher):
679 679
680 680 job_file_name = CUnicode(u'ipcontroller_job.xml', config=True)
681 681 extra_args = List([], config=False)
682 682
683 683 def write_job_file(self, n):
684 684 job = IPControllerJob(config=self.config)
685 685
686 686 t = IPControllerTask(config=self.config)
687 687 # The tasks work directory is *not* the actual work directory of
688 688 # the controller. It is used as the base path for the stdout/stderr
689 689 # files that the scheduler redirects to.
690 690 t.work_directory = self.cluster_dir
691 691 # Add the --cluster-dir and from self.start().
692 692 t.controller_args.extend(self.extra_args)
693 693 job.add_task(t)
694 694
695 695 self.log.info("Writing job description file: %s" % self.job_file)
696 696 job.write(self.job_file)
697 697
698 698 @property
699 699 def job_file(self):
700 700 return os.path.join(self.cluster_dir, self.job_file_name)
701 701
702 702 def start(self, cluster_dir):
703 703 """Start the controller by cluster_dir."""
704 704 self.extra_args = ['--cluster-dir', cluster_dir]
705 705 self.cluster_dir = unicode(cluster_dir)
706 706 return super(WindowsHPCControllerLauncher, self).start(1)
707 707
708 708
709 709 class WindowsHPCEngineSetLauncher(WindowsHPCLauncher):
710 710
711 711 job_file_name = CUnicode(u'ipengineset_job.xml', config=True)
712 712 extra_args = List([], config=False)
713 713
714 714 def write_job_file(self, n):
715 715 job = IPEngineSetJob(config=self.config)
716 716
717 717 for i in range(n):
718 718 t = IPEngineTask(config=self.config)
719 719 # The tasks work directory is *not* the actual work directory of
720 720 # the engine. It is used as the base path for the stdout/stderr
721 721 # files that the scheduler redirects to.
722 722 t.work_directory = self.cluster_dir
723 723 # Add the --cluster-dir and from self.start().
724 724 t.engine_args.extend(self.extra_args)
725 725 job.add_task(t)
726 726
727 727 self.log.info("Writing job description file: %s" % self.job_file)
728 728 job.write(self.job_file)
729 729
730 730 @property
731 731 def job_file(self):
732 732 return os.path.join(self.cluster_dir, self.job_file_name)
733 733
734 734 def start(self, n, cluster_dir):
735 735 """Start the controller by cluster_dir."""
736 736 self.extra_args = ['--cluster-dir', cluster_dir]
737 737 self.cluster_dir = unicode(cluster_dir)
738 738 return super(WindowsHPCEngineSetLauncher, self).start(n)
739 739
740 740
741 741 #-----------------------------------------------------------------------------
742 742 # Batch (PBS) system launchers
743 743 #-----------------------------------------------------------------------------
744 744
745 745 class BatchSystemLauncher(BaseLauncher):
746 746 """Launch an external process using a batch system.
747 747
748 748 This class is designed to work with UNIX batch systems like PBS, LSF,
749 749 GridEngine, etc. The overall model is that there are different commands
750 750 like qsub, qdel, etc. that handle the starting and stopping of the process.
751 751
752 752 This class also has the notion of a batch script. The ``batch_template``
753 753 attribute can be set to a string that is a template for the batch script.
754 754 This template is instantiated using Itpl. Thus the template can use
755 755 ${n} fot the number of instances. Subclasses can add additional variables
756 756 to the template dict.
757 757 """
758 758
759 759 # Subclasses must fill these in. See PBSEngineSet
760 760 # The name of the command line program used to submit jobs.
761 761 submit_command = List([''], config=True)
762 762 # The name of the command line program used to delete jobs.
763 763 delete_command = List([''], config=True)
764 764 # A regular expression used to get the job id from the output of the
765 765 # submit_command.
766 766 job_id_regexp = CUnicode('', config=True)
767 767 # The string that is the batch script template itself.
768 768 batch_template = CUnicode('', config=True)
769 769 # The file that contains the batch template
770 770 batch_template_file = CUnicode(u'', config=True)
771 771 # The filename of the instantiated batch script.
772 772 batch_file_name = CUnicode(u'batch_script', config=True)
773 773 # The PBS Queue
774 774 queue = CUnicode(u'', config=True)
775 775
776 776 # not configurable, override in subclasses
777 777 # PBS Job Array regex
778 778 job_array_regexp = CUnicode('')
779 779 job_array_template = CUnicode('')
780 780 # PBS Queue regex
781 781 queue_regexp = CUnicode('')
782 782 queue_template = CUnicode('')
783 783 # The default batch template, override in subclasses
784 784 default_template = CUnicode('')
785 785 # The full path to the instantiated batch script.
786 786 batch_file = CUnicode(u'')
787 787 # the format dict used with batch_template:
788 788 context = Dict()
789 789
790 790
791 791 def find_args(self):
792 792 return self.submit_command + [self.batch_file]
793 793
794 794 def __init__(self, work_dir=u'.', config=None, **kwargs):
795 795 super(BatchSystemLauncher, self).__init__(
796 796 work_dir=work_dir, config=config, **kwargs
797 797 )
798 798 self.batch_file = os.path.join(self.work_dir, self.batch_file_name)
799 799
800 800 def parse_job_id(self, output):
801 801 """Take the output of the submit command and return the job id."""
802 802 m = re.search(self.job_id_regexp, output)
803 803 if m is not None:
804 804 job_id = m.group()
805 805 else:
806 806 raise LauncherError("Job id couldn't be determined: %s" % output)
807 807 self.job_id = job_id
808 808 self.log.info('Job submitted with job id: %r' % job_id)
809 809 return job_id
810 810
811 811 def write_batch_script(self, n):
812 812 """Instantiate and write the batch script to the work_dir."""
813 813 self.context['n'] = n
814 814 self.context['queue'] = self.queue
815 815 print self.context
816 816 # first priority is batch_template if set
817 817 if self.batch_template_file and not self.batch_template:
818 818 # second priority is batch_template_file
819 819 with open(self.batch_template_file) as f:
820 820 self.batch_template = f.read()
821 821 if not self.batch_template:
822 822 # third (last) priority is default_template
823 823 self.batch_template = self.default_template
824 824
825 825 regex = re.compile(self.job_array_regexp)
826 826 # print regex.search(self.batch_template)
827 827 if not regex.search(self.batch_template):
828 828 self.log.info("adding job array settings to batch script")
829 829 firstline, rest = self.batch_template.split('\n',1)
830 830 self.batch_template = u'\n'.join([firstline, self.job_array_template, rest])
831 831
832 832 regex = re.compile(self.queue_regexp)
833 833 # print regex.search(self.batch_template)
834 834 if self.queue and not regex.search(self.batch_template):
835 835 self.log.info("adding PBS queue settings to batch script")
836 836 firstline, rest = self.batch_template.split('\n',1)
837 837 self.batch_template = u'\n'.join([firstline, self.queue_template, rest])
838 838
839 839 script_as_string = Itpl.itplns(self.batch_template, self.context)
840 840 self.log.info('Writing instantiated batch script: %s' % self.batch_file)
841 841
842 842 with open(self.batch_file, 'w') as f:
843 843 f.write(script_as_string)
844 844 os.chmod(self.batch_file, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
845 845
846 846 def start(self, n, cluster_dir):
847 847 """Start n copies of the process using a batch system."""
848 848 # Here we save profile and cluster_dir in the context so they
849 849 # can be used in the batch script template as ${profile} and
850 850 # ${cluster_dir}
851 851 self.context['cluster_dir'] = cluster_dir
852 852 self.cluster_dir = unicode(cluster_dir)
853 853 self.write_batch_script(n)
854 854 output = check_output(self.args, env=os.environ)
855 855
856 856 job_id = self.parse_job_id(output)
857 857 self.notify_start(job_id)
858 858 return job_id
859 859
860 860 def stop(self):
861 861 output = check_output(self.delete_command+[self.job_id], env=os.environ)
862 862 self.notify_stop(dict(job_id=self.job_id, output=output)) # Pass the output of the kill cmd
863 863 return output
864 864
865 865
866 866 class PBSLauncher(BatchSystemLauncher):
867 867 """A BatchSystemLauncher subclass for PBS."""
868 868
869 869 submit_command = List(['qsub'], config=True)
870 870 delete_command = List(['qdel'], config=True)
871 871 job_id_regexp = CUnicode(r'\d+', config=True)
872 872
873 873 batch_file = CUnicode(u'')
874 874 job_array_regexp = CUnicode('#PBS\W+-t\W+[\w\d\-\$]+')
875 875 job_array_template = CUnicode('#PBS -t 1-$n')
876 876 queue_regexp = CUnicode('#PBS\W+-q\W+\$?\w+')
877 877 queue_template = CUnicode('#PBS -q $queue')
878 878
879 879
880 880 class PBSControllerLauncher(PBSLauncher):
881 881 """Launch a controller using PBS."""
882 882
883 883 batch_file_name = CUnicode(u'pbs_controller', config=True)
884 884 default_template= CUnicode("""#!/bin/sh
885 885 #PBS -V
886 886 #PBS -N ipcontroller
887 887 %s --log-to-file --cluster-dir $cluster_dir
888 888 """%(' '.join(ipcontroller_cmd_argv)))
889 889
890 890 def start(self, cluster_dir):
891 891 """Start the controller by profile or cluster_dir."""
892 892 self.log.info("Starting PBSControllerLauncher: %r" % self.args)
893 893 return super(PBSControllerLauncher, self).start(1, cluster_dir)
894 894
895 895
896 896 class PBSEngineSetLauncher(PBSLauncher):
897 897 """Launch Engines using PBS"""
898 898 batch_file_name = CUnicode(u'pbs_engines', config=True)
899 899 default_template= CUnicode(u"""#!/bin/sh
900 900 #PBS -V
901 901 #PBS -N ipengine
902 902 %s --cluster-dir $cluster_dir
903 903 """%(' '.join(ipengine_cmd_argv)))
904 904
905 905 def start(self, n, cluster_dir):
906 906 """Start n engines by profile or cluster_dir."""
907 907 self.log.info('Starting %i engines with PBSEngineSetLauncher: %r' % (n, self.args))
908 908 return super(PBSEngineSetLauncher, self).start(n, cluster_dir)
909 909
910 910 #SGE is very similar to PBS
911 911
912 912 class SGELauncher(PBSLauncher):
913 913 """Sun GridEngine is a PBS clone with slightly different syntax"""
914 914 job_array_regexp = CUnicode('#$$\W+-t\W+[\w\d\-\$]+')
915 915 job_array_template = CUnicode('#$$ -t 1-$n')
916 916 queue_regexp = CUnicode('#$$\W+-q\W+\$?\w+')
917 917 queue_template = CUnicode('#$$ -q $queue')
918 918
919 919 class SGEControllerLauncher(SGELauncher):
920 920 """Launch a controller using SGE."""
921 921
922 922 batch_file_name = CUnicode(u'sge_controller', config=True)
923 923 default_template= CUnicode(u"""#$$ -V
924 924 #$$ -S /bin/sh
925 925 #$$ -N ipcontroller
926 926 %s --log-to-file --cluster-dir $cluster_dir
927 927 """%(' '.join(ipcontroller_cmd_argv)))
928 928
929 929 def start(self, cluster_dir):
930 930 """Start the controller by profile or cluster_dir."""
931 931 self.log.info("Starting PBSControllerLauncher: %r" % self.args)
932 932 return super(PBSControllerLauncher, self).start(1, cluster_dir)
933 933
934 934 class SGEEngineSetLauncher(SGELauncher):
935 935 """Launch Engines with SGE"""
936 936 batch_file_name = CUnicode(u'sge_engines', config=True)
937 937 default_template = CUnicode("""#$$ -V
938 938 #$$ -S /bin/sh
939 939 #$$ -N ipengine
940 940 %s --cluster-dir $cluster_dir
941 941 """%(' '.join(ipengine_cmd_argv)))
942 942
943 943 def start(self, n, cluster_dir):
944 944 """Start n engines by profile or cluster_dir."""
945 945 self.log.info('Starting %i engines with SGEEngineSetLauncher: %r' % (n, self.args))
946 946 return super(SGEEngineSetLauncher, self).start(n, cluster_dir)
947 947
948 948
949 949 #-----------------------------------------------------------------------------
950 950 # A launcher for ipcluster itself!
951 951 #-----------------------------------------------------------------------------
952 952
953 953
954 954 class IPClusterLauncher(LocalProcessLauncher):
955 955 """Launch the ipcluster program in an external process."""
956 956
957 957 ipcluster_cmd = List(ipcluster_cmd_argv, config=True)
958 958 # Command line arguments to pass to ipcluster.
959 959 ipcluster_args = List(
960 960 ['--clean-logs', '--log-to-file', '--log-level', str(logging.INFO)], config=True)
961 961 ipcluster_subcommand = Str('start')
962 962 ipcluster_n = Int(2)
963 963
964 964 def find_args(self):
965 965 return self.ipcluster_cmd + [self.ipcluster_subcommand] + \
966 966 ['-n', repr(self.ipcluster_n)] + self.ipcluster_args
967 967
968 968 def start(self):
969 969 self.log.info("Starting ipcluster: %r" % self.args)
970 970 return super(IPClusterLauncher, self).start()
971 971
@@ -1,98 +1,98 b''
1 1 #!/usr/bin/env python
2 2 """A simple logger object that consolidates messages incoming from ipcluster processes."""
3 3
4 4 #-----------------------------------------------------------------------------
5 5 # Copyright (C) 2011 The IPython Development Team
6 6 #
7 7 # Distributed under the terms of the BSD License. The full license is in
8 8 # the file COPYING, distributed as part of this software.
9 9 #-----------------------------------------------------------------------------
10 10
11 11 #-----------------------------------------------------------------------------
12 12 # Imports
13 13 #-----------------------------------------------------------------------------
14 14
15 15
16 16 import logging
17 17 import sys
18 18
19 19 import zmq
20 20 from zmq.eventloop import ioloop, zmqstream
21 21
22 22 from IPython.utils.traitlets import Int, Str, Instance, List
23 23
24 from .factory import LoggingFactory
24 from IPython.parallel.factory import LoggingFactory
25 25
26 26 #-----------------------------------------------------------------------------
27 27 # Classes
28 28 #-----------------------------------------------------------------------------
29 29
30 30
31 31 class LogWatcher(LoggingFactory):
32 32 """A simple class that receives messages on a SUB socket, as published
33 33 by subclasses of `zmq.log.handlers.PUBHandler`, and logs them itself.
34 34
35 35 This can subscribe to multiple topics, but defaults to all topics.
36 36 """
37 37 # configurables
38 38 topics = List([''], config=True)
39 39 url = Str('tcp://127.0.0.1:20202', config=True)
40 40
41 41 # internals
42 42 context = Instance(zmq.Context, (), {})
43 43 stream = Instance('zmq.eventloop.zmqstream.ZMQStream')
44 44 loop = Instance('zmq.eventloop.ioloop.IOLoop')
45 45 def _loop_default(self):
46 46 return ioloop.IOLoop.instance()
47 47
48 48 def __init__(self, **kwargs):
49 49 super(LogWatcher, self).__init__(**kwargs)
50 50 s = self.context.socket(zmq.SUB)
51 51 s.bind(self.url)
52 52 self.stream = zmqstream.ZMQStream(s, self.loop)
53 53 self.subscribe()
54 54 self.on_trait_change(self.subscribe, 'topics')
55 55
56 56 def start(self):
57 57 self.stream.on_recv(self.log_message)
58 58
59 59 def stop(self):
60 60 self.stream.stop_on_recv()
61 61
62 62 def subscribe(self):
63 63 """Update our SUB socket's subscriptions."""
64 64 self.stream.setsockopt(zmq.UNSUBSCRIBE, '')
65 65 for topic in self.topics:
66 66 self.log.debug("Subscribing to: %r"%topic)
67 67 self.stream.setsockopt(zmq.SUBSCRIBE, topic)
68 68
69 69 def _extract_level(self, topic_str):
70 70 """Turn 'engine.0.INFO.extra' into (logging.INFO, 'engine.0.extra')"""
71 71 topics = topic_str.split('.')
72 72 for idx,t in enumerate(topics):
73 73 level = getattr(logging, t, None)
74 74 if level is not None:
75 75 break
76 76
77 77 if level is None:
78 78 level = logging.INFO
79 79 else:
80 80 topics.pop(idx)
81 81
82 82 return level, '.'.join(topics)
83 83
84 84
85 85 def log_message(self, raw):
86 86 """receive and parse a message, then log it."""
87 87 if len(raw) != 2 or '.' not in raw[0]:
88 88 self.log.error("Invalid log message: %s"%raw)
89 89 return
90 90 else:
91 91 topic, msg = raw
92 92 # don't newline, since log messages always newline:
93 93 topic,level_name = topic.rsplit('.',1)
94 94 level,topic = self._extract_level(topic)
95 95 if msg[-1] == '\n':
96 96 msg = msg[:-1]
97 97 logging.log(level, "[%s] %s" % (topic, msg))
98 98
1 NO CONTENT: file renamed from IPython/parallel/winhpcjob.py to IPython/parallel/apps/winhpcjob.py
@@ -1,340 +1,340 b''
1 1 """AsyncResult objects for the client"""
2 2 #-----------------------------------------------------------------------------
3 3 # Copyright (C) 2010-2011 The IPython Development Team
4 4 #
5 5 # Distributed under the terms of the BSD License. The full license is in
6 6 # the file COPYING, distributed as part of this software.
7 7 #-----------------------------------------------------------------------------
8 8
9 9 #-----------------------------------------------------------------------------
10 10 # Imports
11 11 #-----------------------------------------------------------------------------
12 12
13 13 import time
14 14
15 15 from zmq import MessageTracker
16 16
17 17 from IPython.external.decorator import decorator
18 from . import error
18 from IPython.parallel import error
19 19
20 20 #-----------------------------------------------------------------------------
21 21 # Classes
22 22 #-----------------------------------------------------------------------------
23 23
24 24 # global empty tracker that's always done:
25 25 finished_tracker = MessageTracker()
26 26
27 27 @decorator
28 28 def check_ready(f, self, *args, **kwargs):
29 29 """Call spin() to sync state prior to calling the method."""
30 30 self.wait(0)
31 31 if not self._ready:
32 32 raise error.TimeoutError("result not ready")
33 33 return f(self, *args, **kwargs)
34 34
35 35 class AsyncResult(object):
36 36 """Class for representing results of non-blocking calls.
37 37
38 38 Provides the same interface as :py:class:`multiprocessing.pool.AsyncResult`.
39 39 """
40 40
41 41 msg_ids = None
42 42 _targets = None
43 43 _tracker = None
44 44 _single_result = False
45 45
46 46 def __init__(self, client, msg_ids, fname='unknown', targets=None, tracker=None):
47 47 if isinstance(msg_ids, basestring):
48 48 # always a list
49 49 msg_ids = [msg_ids]
50 50 if tracker is None:
51 51 # default to always done
52 52 tracker = finished_tracker
53 53 self._client = client
54 54 self.msg_ids = msg_ids
55 55 self._fname=fname
56 56 self._targets = targets
57 57 self._tracker = tracker
58 58 self._ready = False
59 59 self._success = None
60 60 if len(msg_ids) == 1:
61 61 self._single_result = not isinstance(targets, (list, tuple))
62 62 else:
63 63 self._single_result = False
64 64
65 65 def __repr__(self):
66 66 if self._ready:
67 67 return "<%s: finished>"%(self.__class__.__name__)
68 68 else:
69 69 return "<%s: %s>"%(self.__class__.__name__,self._fname)
70 70
71 71
72 72 def _reconstruct_result(self, res):
73 73 """Reconstruct our result from actual result list (always a list)
74 74
75 75 Override me in subclasses for turning a list of results
76 76 into the expected form.
77 77 """
78 78 if self._single_result:
79 79 return res[0]
80 80 else:
81 81 return res
82 82
83 83 def get(self, timeout=-1):
84 84 """Return the result when it arrives.
85 85
86 86 If `timeout` is not ``None`` and the result does not arrive within
87 87 `timeout` seconds then ``TimeoutError`` is raised. If the
88 88 remote call raised an exception then that exception will be reraised
89 89 by get() inside a `RemoteError`.
90 90 """
91 91 if not self.ready():
92 92 self.wait(timeout)
93 93
94 94 if self._ready:
95 95 if self._success:
96 96 return self._result
97 97 else:
98 98 raise self._exception
99 99 else:
100 100 raise error.TimeoutError("Result not ready.")
101 101
102 102 def ready(self):
103 103 """Return whether the call has completed."""
104 104 if not self._ready:
105 105 self.wait(0)
106 106 return self._ready
107 107
108 108 def wait(self, timeout=-1):
109 109 """Wait until the result is available or until `timeout` seconds pass.
110 110
111 111 This method always returns None.
112 112 """
113 113 if self._ready:
114 114 return
115 115 self._ready = self._client.wait(self.msg_ids, timeout)
116 116 if self._ready:
117 117 try:
118 118 results = map(self._client.results.get, self.msg_ids)
119 119 self._result = results
120 120 if self._single_result:
121 121 r = results[0]
122 122 if isinstance(r, Exception):
123 123 raise r
124 124 else:
125 125 results = error.collect_exceptions(results, self._fname)
126 126 self._result = self._reconstruct_result(results)
127 127 except Exception, e:
128 128 self._exception = e
129 129 self._success = False
130 130 else:
131 131 self._success = True
132 132 finally:
133 133 self._metadata = map(self._client.metadata.get, self.msg_ids)
134 134
135 135
136 136 def successful(self):
137 137 """Return whether the call completed without raising an exception.
138 138
139 139 Will raise ``AssertionError`` if the result is not ready.
140 140 """
141 141 assert self.ready()
142 142 return self._success
143 143
144 144 #----------------------------------------------------------------
145 145 # Extra methods not in mp.pool.AsyncResult
146 146 #----------------------------------------------------------------
147 147
148 148 def get_dict(self, timeout=-1):
149 149 """Get the results as a dict, keyed by engine_id.
150 150
151 151 timeout behavior is described in `get()`.
152 152 """
153 153
154 154 results = self.get(timeout)
155 155 engine_ids = [ md['engine_id'] for md in self._metadata ]
156 156 bycount = sorted(engine_ids, key=lambda k: engine_ids.count(k))
157 157 maxcount = bycount.count(bycount[-1])
158 158 if maxcount > 1:
159 159 raise ValueError("Cannot build dict, %i jobs ran on engine #%i"%(
160 160 maxcount, bycount[-1]))
161 161
162 162 return dict(zip(engine_ids,results))
163 163
164 164 @property
165 165 def result(self):
166 166 """result property wrapper for `get(timeout=0)`."""
167 167 return self.get()
168 168
169 169 # abbreviated alias:
170 170 r = result
171 171
172 172 @property
173 173 @check_ready
174 174 def metadata(self):
175 175 """property for accessing execution metadata."""
176 176 if self._single_result:
177 177 return self._metadata[0]
178 178 else:
179 179 return self._metadata
180 180
181 181 @property
182 182 def result_dict(self):
183 183 """result property as a dict."""
184 184 return self.get_dict()
185 185
186 186 def __dict__(self):
187 187 return self.get_dict(0)
188 188
189 189 def abort(self):
190 190 """abort my tasks."""
191 191 assert not self.ready(), "Can't abort, I am already done!"
192 192 return self.client.abort(self.msg_ids, targets=self._targets, block=True)
193 193
194 194 @property
195 195 def sent(self):
196 196 """check whether my messages have been sent."""
197 197 return self._tracker.done
198 198
199 199 def wait_for_send(self, timeout=-1):
200 200 """wait for pyzmq send to complete.
201 201
202 202 This is necessary when sending arrays that you intend to edit in-place.
203 203 `timeout` is in seconds, and will raise TimeoutError if it is reached
204 204 before the send completes.
205 205 """
206 206 return self._tracker.wait(timeout)
207 207
208 208 #-------------------------------------
209 209 # dict-access
210 210 #-------------------------------------
211 211
212 212 @check_ready
213 213 def __getitem__(self, key):
214 214 """getitem returns result value(s) if keyed by int/slice, or metadata if key is str.
215 215 """
216 216 if isinstance(key, int):
217 217 return error.collect_exceptions([self._result[key]], self._fname)[0]
218 218 elif isinstance(key, slice):
219 219 return error.collect_exceptions(self._result[key], self._fname)
220 220 elif isinstance(key, basestring):
221 221 values = [ md[key] for md in self._metadata ]
222 222 if self._single_result:
223 223 return values[0]
224 224 else:
225 225 return values
226 226 else:
227 227 raise TypeError("Invalid key type %r, must be 'int','slice', or 'str'"%type(key))
228 228
229 229 @check_ready
230 230 def __getattr__(self, key):
231 231 """getattr maps to getitem for convenient attr access to metadata."""
232 232 if key not in self._metadata[0].keys():
233 233 raise AttributeError("%r object has no attribute %r"%(
234 234 self.__class__.__name__, key))
235 235 return self.__getitem__(key)
236 236
237 237 # asynchronous iterator:
238 238 def __iter__(self):
239 239 if self._single_result:
240 240 raise TypeError("AsyncResults with a single result are not iterable.")
241 241 try:
242 242 rlist = self.get(0)
243 243 except error.TimeoutError:
244 244 # wait for each result individually
245 245 for msg_id in self.msg_ids:
246 246 ar = AsyncResult(self._client, msg_id, self._fname)
247 247 yield ar.get()
248 248 else:
249 249 # already done
250 250 for r in rlist:
251 251 yield r
252 252
253 253
254 254
255 255 class AsyncMapResult(AsyncResult):
256 256 """Class for representing results of non-blocking gathers.
257 257
258 258 This will properly reconstruct the gather.
259 259 """
260 260
261 261 def __init__(self, client, msg_ids, mapObject, fname=''):
262 262 AsyncResult.__init__(self, client, msg_ids, fname=fname)
263 263 self._mapObject = mapObject
264 264 self._single_result = False
265 265
266 266 def _reconstruct_result(self, res):
267 267 """Perform the gather on the actual results."""
268 268 return self._mapObject.joinPartitions(res)
269 269
270 270 # asynchronous iterator:
271 271 def __iter__(self):
272 272 try:
273 273 rlist = self.get(0)
274 274 except error.TimeoutError:
275 275 # wait for each result individually
276 276 for msg_id in self.msg_ids:
277 277 ar = AsyncResult(self._client, msg_id, self._fname)
278 278 rlist = ar.get()
279 279 try:
280 280 for r in rlist:
281 281 yield r
282 282 except TypeError:
283 283 # flattened, not a list
284 284 # this could get broken by flattened data that returns iterables
285 285 # but most calls to map do not expose the `flatten` argument
286 286 yield rlist
287 287 else:
288 288 # already done
289 289 for r in rlist:
290 290 yield r
291 291
292 292
293 293 class AsyncHubResult(AsyncResult):
294 294 """Class to wrap pending results that must be requested from the Hub.
295 295
296 296 Note that waiting/polling on these objects requires polling the Hubover the network,
297 297 so use `AsyncHubResult.wait()` sparingly.
298 298 """
299 299
300 300 def wait(self, timeout=-1):
301 301 """wait for result to complete."""
302 302 start = time.time()
303 303 if self._ready:
304 304 return
305 305 local_ids = filter(lambda msg_id: msg_id in self._client.outstanding, self.msg_ids)
306 306 local_ready = self._client.wait(local_ids, timeout)
307 307 if local_ready:
308 308 remote_ids = filter(lambda msg_id: msg_id not in self._client.results, self.msg_ids)
309 309 if not remote_ids:
310 310 self._ready = True
311 311 else:
312 312 rdict = self._client.result_status(remote_ids, status_only=False)
313 313 pending = rdict['pending']
314 314 while pending and (timeout < 0 or time.time() < start+timeout):
315 315 rdict = self._client.result_status(remote_ids, status_only=False)
316 316 pending = rdict['pending']
317 317 if pending:
318 318 time.sleep(0.1)
319 319 if not pending:
320 320 self._ready = True
321 321 if self._ready:
322 322 try:
323 323 results = map(self._client.results.get, self.msg_ids)
324 324 self._result = results
325 325 if self._single_result:
326 326 r = results[0]
327 327 if isinstance(r, Exception):
328 328 raise r
329 329 else:
330 330 results = error.collect_exceptions(results, self._fname)
331 331 self._result = self._reconstruct_result(results)
332 332 except Exception, e:
333 333 self._exception = e
334 334 self._success = False
335 335 else:
336 336 self._success = True
337 337 finally:
338 338 self._metadata = map(self._client.metadata.get, self.msg_ids)
339 339
340 340 __all__ = ['AsyncResult', 'AsyncMapResult', 'AsyncHubResult'] No newline at end of file
@@ -1,1278 +1,1279 b''
1 1 """A semi-synchronous Client for the ZMQ cluster"""
2 2 #-----------------------------------------------------------------------------
3 3 # Copyright (C) 2010 The IPython Development Team
4 4 #
5 5 # Distributed under the terms of the BSD License. The full license is in
6 6 # the file COPYING, distributed as part of this software.
7 7 #-----------------------------------------------------------------------------
8 8
9 9 #-----------------------------------------------------------------------------
10 10 # Imports
11 11 #-----------------------------------------------------------------------------
12 12
13 13 import os
14 14 import json
15 15 import time
16 16 import warnings
17 17 from datetime import datetime
18 18 from getpass import getpass
19 19 from pprint import pprint
20 20
21 21 pjoin = os.path.join
22 22
23 23 import zmq
24 24 # from zmq.eventloop import ioloop, zmqstream
25 25
26 26 from IPython.utils.path import get_ipython_dir
27 27 from IPython.utils.traitlets import (HasTraits, Int, Instance, CUnicode,
28 28 Dict, List, Bool, Str, Set)
29 29 from IPython.external.decorator import decorator
30 30 from IPython.external.ssh import tunnel
31 31
32 from . import error
33 from . import util
34 from . import streamsession as ss
32 from IPython.parallel import error
33 from IPython.parallel import streamsession as ss
34 from IPython.parallel import util
35
35 36 from .asyncresult import AsyncResult, AsyncHubResult
36 from .clusterdir import ClusterDir, ClusterDirError
37 from IPython.parallel.apps.clusterdir import ClusterDir, ClusterDirError
37 38 from .view import DirectView, LoadBalancedView
38 39
39 40 #--------------------------------------------------------------------------
40 41 # Decorators for Client methods
41 42 #--------------------------------------------------------------------------
42 43
43 44 @decorator
44 45 def spin_first(f, self, *args, **kwargs):
45 46 """Call spin() to sync state prior to calling the method."""
46 47 self.spin()
47 48 return f(self, *args, **kwargs)
48 49
49 50 @decorator
50 51 def default_block(f, self, *args, **kwargs):
51 52 """Default to self.block; preserve self.block."""
52 53 block = kwargs.get('block',None)
53 54 block = self.block if block is None else block
54 55 saveblock = self.block
55 56 self.block = block
56 57 try:
57 58 ret = f(self, *args, **kwargs)
58 59 finally:
59 60 self.block = saveblock
60 61 return ret
61 62
62 63
63 64 #--------------------------------------------------------------------------
64 65 # Classes
65 66 #--------------------------------------------------------------------------
66 67
67 68 class Metadata(dict):
68 69 """Subclass of dict for initializing metadata values.
69 70
70 71 Attribute access works on keys.
71 72
72 73 These objects have a strict set of keys - errors will raise if you try
73 74 to add new keys.
74 75 """
75 76 def __init__(self, *args, **kwargs):
76 77 dict.__init__(self)
77 78 md = {'msg_id' : None,
78 79 'submitted' : None,
79 80 'started' : None,
80 81 'completed' : None,
81 82 'received' : None,
82 83 'engine_uuid' : None,
83 84 'engine_id' : None,
84 85 'follow' : None,
85 86 'after' : None,
86 87 'status' : None,
87 88
88 89 'pyin' : None,
89 90 'pyout' : None,
90 91 'pyerr' : None,
91 92 'stdout' : '',
92 93 'stderr' : '',
93 94 }
94 95 self.update(md)
95 96 self.update(dict(*args, **kwargs))
96 97
97 98 def __getattr__(self, key):
98 99 """getattr aliased to getitem"""
99 100 if key in self.iterkeys():
100 101 return self[key]
101 102 else:
102 103 raise AttributeError(key)
103 104
104 105 def __setattr__(self, key, value):
105 106 """setattr aliased to setitem, with strict"""
106 107 if key in self.iterkeys():
107 108 self[key] = value
108 109 else:
109 110 raise AttributeError(key)
110 111
111 112 def __setitem__(self, key, value):
112 113 """strict static key enforcement"""
113 114 if key in self.iterkeys():
114 115 dict.__setitem__(self, key, value)
115 116 else:
116 117 raise KeyError(key)
117 118
118 119
119 120 class Client(HasTraits):
120 121 """A semi-synchronous client to the IPython ZMQ cluster
121 122
122 123 Parameters
123 124 ----------
124 125
125 126 url_or_file : bytes; zmq url or path to ipcontroller-client.json
126 127 Connection information for the Hub's registration. If a json connector
127 128 file is given, then likely no further configuration is necessary.
128 129 [Default: use profile]
129 130 profile : bytes
130 131 The name of the Cluster profile to be used to find connector information.
131 132 [Default: 'default']
132 133 context : zmq.Context
133 134 Pass an existing zmq.Context instance, otherwise the client will create its own.
134 135 username : bytes
135 136 set username to be passed to the Session object
136 137 debug : bool
137 138 flag for lots of message printing for debug purposes
138 139
139 140 #-------------- ssh related args ----------------
140 141 # These are args for configuring the ssh tunnel to be used
141 142 # credentials are used to forward connections over ssh to the Controller
142 143 # Note that the ip given in `addr` needs to be relative to sshserver
143 144 # The most basic case is to leave addr as pointing to localhost (127.0.0.1),
144 145 # and set sshserver as the same machine the Controller is on. However,
145 146 # the only requirement is that sshserver is able to see the Controller
146 147 # (i.e. is within the same trusted network).
147 148
148 149 sshserver : str
149 150 A string of the form passed to ssh, i.e. 'server.tld' or 'user@server.tld:port'
150 151 If keyfile or password is specified, and this is not, it will default to
151 152 the ip given in addr.
152 153 sshkey : str; path to public ssh key file
153 154 This specifies a key to be used in ssh login, default None.
154 155 Regular default ssh keys will be used without specifying this argument.
155 156 password : str
156 157 Your ssh password to sshserver. Note that if this is left None,
157 158 you will be prompted for it if passwordless key based login is unavailable.
158 159 paramiko : bool
159 160 flag for whether to use paramiko instead of shell ssh for tunneling.
160 161 [default: True on win32, False else]
161 162
162 163 ------- exec authentication args -------
163 164 If even localhost is untrusted, you can have some protection against
164 165 unauthorized execution by using a key. Messages are still sent
165 166 as cleartext, so if someone can snoop your loopback traffic this will
166 167 not help against malicious attacks.
167 168
168 169 exec_key : str
169 170 an authentication key or file containing a key
170 171 default: None
171 172
172 173
173 174 Attributes
174 175 ----------
175 176
176 177 ids : list of int engine IDs
177 178 requesting the ids attribute always synchronizes
178 179 the registration state. To request ids without synchronization,
179 180 use semi-private _ids attributes.
180 181
181 182 history : list of msg_ids
182 183 a list of msg_ids, keeping track of all the execution
183 184 messages you have submitted in order.
184 185
185 186 outstanding : set of msg_ids
186 187 a set of msg_ids that have been submitted, but whose
187 188 results have not yet been received.
188 189
189 190 results : dict
190 191 a dict of all our results, keyed by msg_id
191 192
192 193 block : bool
193 194 determines default behavior when block not specified
194 195 in execution methods
195 196
196 197 Methods
197 198 -------
198 199
199 200 spin
200 201 flushes incoming results and registration state changes
201 202 control methods spin, and requesting `ids` also ensures up to date
202 203
203 204 wait
204 205 wait on one or more msg_ids
205 206
206 207 execution methods
207 208 apply
208 209 legacy: execute, run
209 210
210 211 data movement
211 212 push, pull, scatter, gather
212 213
213 214 query methods
214 215 queue_status, get_result, purge, result_status
215 216
216 217 control methods
217 218 abort, shutdown
218 219
219 220 """
220 221
221 222
222 223 block = Bool(False)
223 224 outstanding = Set()
224 225 results = Instance('collections.defaultdict', (dict,))
225 226 metadata = Instance('collections.defaultdict', (Metadata,))
226 227 history = List()
227 228 debug = Bool(False)
228 229 profile=CUnicode('default')
229 230
230 231 _outstanding_dict = Instance('collections.defaultdict', (set,))
231 232 _ids = List()
232 233 _connected=Bool(False)
233 234 _ssh=Bool(False)
234 235 _context = Instance('zmq.Context')
235 236 _config = Dict()
236 237 _engines=Instance(util.ReverseDict, (), {})
237 238 # _hub_socket=Instance('zmq.Socket')
238 239 _query_socket=Instance('zmq.Socket')
239 240 _control_socket=Instance('zmq.Socket')
240 241 _iopub_socket=Instance('zmq.Socket')
241 242 _notification_socket=Instance('zmq.Socket')
242 243 _mux_socket=Instance('zmq.Socket')
243 244 _task_socket=Instance('zmq.Socket')
244 245 _task_scheme=Str()
245 246 _closed = False
246 247 _ignored_control_replies=Int(0)
247 248 _ignored_hub_replies=Int(0)
248 249
249 250 def __init__(self, url_or_file=None, profile='default', cluster_dir=None, ipython_dir=None,
250 251 context=None, username=None, debug=False, exec_key=None,
251 252 sshserver=None, sshkey=None, password=None, paramiko=None,
252 253 timeout=10
253 254 ):
254 255 super(Client, self).__init__(debug=debug, profile=profile)
255 256 if context is None:
256 257 context = zmq.Context.instance()
257 258 self._context = context
258 259
259 260
260 261 self._setup_cluster_dir(profile, cluster_dir, ipython_dir)
261 262 if self._cd is not None:
262 263 if url_or_file is None:
263 264 url_or_file = pjoin(self._cd.security_dir, 'ipcontroller-client.json')
264 265 assert url_or_file is not None, "I can't find enough information to connect to a hub!"\
265 266 " Please specify at least one of url_or_file or profile."
266 267
267 268 try:
268 269 util.validate_url(url_or_file)
269 270 except AssertionError:
270 271 if not os.path.exists(url_or_file):
271 272 if self._cd:
272 273 url_or_file = os.path.join(self._cd.security_dir, url_or_file)
273 274 assert os.path.exists(url_or_file), "Not a valid connection file or url: %r"%url_or_file
274 275 with open(url_or_file) as f:
275 276 cfg = json.loads(f.read())
276 277 else:
277 278 cfg = {'url':url_or_file}
278 279
279 280 # sync defaults from args, json:
280 281 if sshserver:
281 282 cfg['ssh'] = sshserver
282 283 if exec_key:
283 284 cfg['exec_key'] = exec_key
284 285 exec_key = cfg['exec_key']
285 286 sshserver=cfg['ssh']
286 287 url = cfg['url']
287 288 location = cfg.setdefault('location', None)
288 289 cfg['url'] = util.disambiguate_url(cfg['url'], location)
289 290 url = cfg['url']
290 291
291 292 self._config = cfg
292 293
293 294 self._ssh = bool(sshserver or sshkey or password)
294 295 if self._ssh and sshserver is None:
295 296 # default to ssh via localhost
296 297 sshserver = url.split('://')[1].split(':')[0]
297 298 if self._ssh and password is None:
298 299 if tunnel.try_passwordless_ssh(sshserver, sshkey, paramiko):
299 300 password=False
300 301 else:
301 302 password = getpass("SSH Password for %s: "%sshserver)
302 303 ssh_kwargs = dict(keyfile=sshkey, password=password, paramiko=paramiko)
303 304 if exec_key is not None and os.path.isfile(exec_key):
304 305 arg = 'keyfile'
305 306 else:
306 307 arg = 'key'
307 308 key_arg = {arg:exec_key}
308 309 if username is None:
309 310 self.session = ss.StreamSession(**key_arg)
310 311 else:
311 312 self.session = ss.StreamSession(username, **key_arg)
312 313 self._query_socket = self._context.socket(zmq.XREQ)
313 314 self._query_socket.setsockopt(zmq.IDENTITY, self.session.session)
314 315 if self._ssh:
315 316 tunnel.tunnel_connection(self._query_socket, url, sshserver, **ssh_kwargs)
316 317 else:
317 318 self._query_socket.connect(url)
318 319
319 320 self.session.debug = self.debug
320 321
321 322 self._notification_handlers = {'registration_notification' : self._register_engine,
322 323 'unregistration_notification' : self._unregister_engine,
323 324 'shutdown_notification' : lambda msg: self.close(),
324 325 }
325 326 self._queue_handlers = {'execute_reply' : self._handle_execute_reply,
326 327 'apply_reply' : self._handle_apply_reply}
327 328 self._connect(sshserver, ssh_kwargs, timeout)
328 329
329 330 def __del__(self):
330 331 """cleanup sockets, but _not_ context."""
331 332 self.close()
332 333
333 334 def _setup_cluster_dir(self, profile, cluster_dir, ipython_dir):
334 335 if ipython_dir is None:
335 336 ipython_dir = get_ipython_dir()
336 337 if cluster_dir is not None:
337 338 try:
338 339 self._cd = ClusterDir.find_cluster_dir(cluster_dir)
339 340 return
340 341 except ClusterDirError:
341 342 pass
342 343 elif profile is not None:
343 344 try:
344 345 self._cd = ClusterDir.find_cluster_dir_by_profile(
345 346 ipython_dir, profile)
346 347 return
347 348 except ClusterDirError:
348 349 pass
349 350 self._cd = None
350 351
351 352 def _update_engines(self, engines):
352 353 """Update our engines dict and _ids from a dict of the form: {id:uuid}."""
353 354 for k,v in engines.iteritems():
354 355 eid = int(k)
355 356 self._engines[eid] = bytes(v) # force not unicode
356 357 self._ids.append(eid)
357 358 self._ids = sorted(self._ids)
358 359 if sorted(self._engines.keys()) != range(len(self._engines)) and \
359 360 self._task_scheme == 'pure' and self._task_socket:
360 361 self._stop_scheduling_tasks()
361 362
362 363 def _stop_scheduling_tasks(self):
363 364 """Stop scheduling tasks because an engine has been unregistered
364 365 from a pure ZMQ scheduler.
365 366 """
366 367 self._task_socket.close()
367 368 self._task_socket = None
368 369 msg = "An engine has been unregistered, and we are using pure " +\
369 370 "ZMQ task scheduling. Task farming will be disabled."
370 371 if self.outstanding:
371 372 msg += " If you were running tasks when this happened, " +\
372 373 "some `outstanding` msg_ids may never resolve."
373 374 warnings.warn(msg, RuntimeWarning)
374 375
375 376 def _build_targets(self, targets):
376 377 """Turn valid target IDs or 'all' into two lists:
377 378 (int_ids, uuids).
378 379 """
379 380 if targets is None:
380 381 targets = self._ids
381 382 elif isinstance(targets, str):
382 383 if targets.lower() == 'all':
383 384 targets = self._ids
384 385 else:
385 386 raise TypeError("%r not valid str target, must be 'all'"%(targets))
386 387 elif isinstance(targets, int):
387 388 if targets < 0:
388 389 targets = self.ids[targets]
389 390 if targets not in self.ids:
390 391 raise IndexError("No such engine: %i"%targets)
391 392 targets = [targets]
392 393
393 394 if isinstance(targets, slice):
394 395 indices = range(len(self._ids))[targets]
395 396 ids = self.ids
396 397 targets = [ ids[i] for i in indices ]
397 398
398 399 if not isinstance(targets, (tuple, list, xrange)):
399 400 raise TypeError("targets by int/slice/collection of ints only, not %s"%(type(targets)))
400 401
401 402 return [self._engines[t] for t in targets], list(targets)
402 403
403 404 def _connect(self, sshserver, ssh_kwargs, timeout):
404 405 """setup all our socket connections to the cluster. This is called from
405 406 __init__."""
406 407
407 408 # Maybe allow reconnecting?
408 409 if self._connected:
409 410 return
410 411 self._connected=True
411 412
412 413 def connect_socket(s, url):
413 414 url = util.disambiguate_url(url, self._config['location'])
414 415 if self._ssh:
415 416 return tunnel.tunnel_connection(s, url, sshserver, **ssh_kwargs)
416 417 else:
417 418 return s.connect(url)
418 419
419 420 self.session.send(self._query_socket, 'connection_request')
420 421 r,w,x = zmq.select([self._query_socket],[],[], timeout)
421 422 if not r:
422 423 raise error.TimeoutError("Hub connection request timed out")
423 424 idents,msg = self.session.recv(self._query_socket,mode=0)
424 425 if self.debug:
425 426 pprint(msg)
426 427 msg = ss.Message(msg)
427 428 content = msg.content
428 429 self._config['registration'] = dict(content)
429 430 if content.status == 'ok':
430 431 if content.mux:
431 432 self._mux_socket = self._context.socket(zmq.XREQ)
432 433 self._mux_socket.setsockopt(zmq.IDENTITY, self.session.session)
433 434 connect_socket(self._mux_socket, content.mux)
434 435 if content.task:
435 436 self._task_scheme, task_addr = content.task
436 437 self._task_socket = self._context.socket(zmq.XREQ)
437 438 self._task_socket.setsockopt(zmq.IDENTITY, self.session.session)
438 439 connect_socket(self._task_socket, task_addr)
439 440 if content.notification:
440 441 self._notification_socket = self._context.socket(zmq.SUB)
441 442 connect_socket(self._notification_socket, content.notification)
442 443 self._notification_socket.setsockopt(zmq.SUBSCRIBE, b'')
443 444 # if content.query:
444 445 # self._query_socket = self._context.socket(zmq.XREQ)
445 446 # self._query_socket.setsockopt(zmq.IDENTITY, self.session.session)
446 447 # connect_socket(self._query_socket, content.query)
447 448 if content.control:
448 449 self._control_socket = self._context.socket(zmq.XREQ)
449 450 self._control_socket.setsockopt(zmq.IDENTITY, self.session.session)
450 451 connect_socket(self._control_socket, content.control)
451 452 if content.iopub:
452 453 self._iopub_socket = self._context.socket(zmq.SUB)
453 454 self._iopub_socket.setsockopt(zmq.SUBSCRIBE, b'')
454 455 self._iopub_socket.setsockopt(zmq.IDENTITY, self.session.session)
455 456 connect_socket(self._iopub_socket, content.iopub)
456 457 self._update_engines(dict(content.engines))
457 458 else:
458 459 self._connected = False
459 460 raise Exception("Failed to connect!")
460 461
461 462 #--------------------------------------------------------------------------
462 463 # handlers and callbacks for incoming messages
463 464 #--------------------------------------------------------------------------
464 465
465 466 def _unwrap_exception(self, content):
466 467 """unwrap exception, and remap engine_id to int."""
467 468 e = error.unwrap_exception(content)
468 469 # print e.traceback
469 470 if e.engine_info:
470 471 e_uuid = e.engine_info['engine_uuid']
471 472 eid = self._engines[e_uuid]
472 473 e.engine_info['engine_id'] = eid
473 474 return e
474 475
475 476 def _extract_metadata(self, header, parent, content):
476 477 md = {'msg_id' : parent['msg_id'],
477 478 'received' : datetime.now(),
478 479 'engine_uuid' : header.get('engine', None),
479 480 'follow' : parent.get('follow', []),
480 481 'after' : parent.get('after', []),
481 482 'status' : content['status'],
482 483 }
483 484
484 485 if md['engine_uuid'] is not None:
485 486 md['engine_id'] = self._engines.get(md['engine_uuid'], None)
486 487
487 488 if 'date' in parent:
488 489 md['submitted'] = datetime.strptime(parent['date'], util.ISO8601)
489 490 if 'started' in header:
490 491 md['started'] = datetime.strptime(header['started'], util.ISO8601)
491 492 if 'date' in header:
492 493 md['completed'] = datetime.strptime(header['date'], util.ISO8601)
493 494 return md
494 495
495 496 def _register_engine(self, msg):
496 497 """Register a new engine, and update our connection info."""
497 498 content = msg['content']
498 499 eid = content['id']
499 500 d = {eid : content['queue']}
500 501 self._update_engines(d)
501 502
502 503 def _unregister_engine(self, msg):
503 504 """Unregister an engine that has died."""
504 505 content = msg['content']
505 506 eid = int(content['id'])
506 507 if eid in self._ids:
507 508 self._ids.remove(eid)
508 509 uuid = self._engines.pop(eid)
509 510
510 511 self._handle_stranded_msgs(eid, uuid)
511 512
512 513 if self._task_socket and self._task_scheme == 'pure':
513 514 self._stop_scheduling_tasks()
514 515
515 516 def _handle_stranded_msgs(self, eid, uuid):
516 517 """Handle messages known to be on an engine when the engine unregisters.
517 518
518 519 It is possible that this will fire prematurely - that is, an engine will
519 520 go down after completing a result, and the client will be notified
520 521 of the unregistration and later receive the successful result.
521 522 """
522 523
523 524 outstanding = self._outstanding_dict[uuid]
524 525
525 526 for msg_id in list(outstanding):
526 527 if msg_id in self.results:
527 528 # we already
528 529 continue
529 530 try:
530 531 raise error.EngineError("Engine %r died while running task %r"%(eid, msg_id))
531 532 except:
532 533 content = error.wrap_exception()
533 534 # build a fake message:
534 535 parent = {}
535 536 header = {}
536 537 parent['msg_id'] = msg_id
537 538 header['engine'] = uuid
538 539 header['date'] = datetime.now().strftime(util.ISO8601)
539 540 msg = dict(parent_header=parent, header=header, content=content)
540 541 self._handle_apply_reply(msg)
541 542
542 543 def _handle_execute_reply(self, msg):
543 544 """Save the reply to an execute_request into our results.
544 545
545 546 execute messages are never actually used. apply is used instead.
546 547 """
547 548
548 549 parent = msg['parent_header']
549 550 msg_id = parent['msg_id']
550 551 if msg_id not in self.outstanding:
551 552 if msg_id in self.history:
552 553 print ("got stale result: %s"%msg_id)
553 554 else:
554 555 print ("got unknown result: %s"%msg_id)
555 556 else:
556 557 self.outstanding.remove(msg_id)
557 558 self.results[msg_id] = self._unwrap_exception(msg['content'])
558 559
559 560 def _handle_apply_reply(self, msg):
560 561 """Save the reply to an apply_request into our results."""
561 562 parent = msg['parent_header']
562 563 msg_id = parent['msg_id']
563 564 if msg_id not in self.outstanding:
564 565 if msg_id in self.history:
565 566 print ("got stale result: %s"%msg_id)
566 567 print self.results[msg_id]
567 568 print msg
568 569 else:
569 570 print ("got unknown result: %s"%msg_id)
570 571 else:
571 572 self.outstanding.remove(msg_id)
572 573 content = msg['content']
573 574 header = msg['header']
574 575
575 576 # construct metadata:
576 577 md = self.metadata[msg_id]
577 578 md.update(self._extract_metadata(header, parent, content))
578 579 # is this redundant?
579 580 self.metadata[msg_id] = md
580 581
581 582 e_outstanding = self._outstanding_dict[md['engine_uuid']]
582 583 if msg_id in e_outstanding:
583 584 e_outstanding.remove(msg_id)
584 585
585 586 # construct result:
586 587 if content['status'] == 'ok':
587 588 self.results[msg_id] = util.unserialize_object(msg['buffers'])[0]
588 589 elif content['status'] == 'aborted':
589 590 self.results[msg_id] = error.TaskAborted(msg_id)
590 591 elif content['status'] == 'resubmitted':
591 592 # TODO: handle resubmission
592 593 pass
593 594 else:
594 595 self.results[msg_id] = self._unwrap_exception(content)
595 596
596 597 def _flush_notifications(self):
597 598 """Flush notifications of engine registrations waiting
598 599 in ZMQ queue."""
599 600 msg = self.session.recv(self._notification_socket, mode=zmq.NOBLOCK)
600 601 while msg is not None:
601 602 if self.debug:
602 603 pprint(msg)
603 604 msg = msg[-1]
604 605 msg_type = msg['msg_type']
605 606 handler = self._notification_handlers.get(msg_type, None)
606 607 if handler is None:
607 608 raise Exception("Unhandled message type: %s"%msg.msg_type)
608 609 else:
609 610 handler(msg)
610 611 msg = self.session.recv(self._notification_socket, mode=zmq.NOBLOCK)
611 612
612 613 def _flush_results(self, sock):
613 614 """Flush task or queue results waiting in ZMQ queue."""
614 615 msg = self.session.recv(sock, mode=zmq.NOBLOCK)
615 616 while msg is not None:
616 617 if self.debug:
617 618 pprint(msg)
618 619 msg = msg[-1]
619 620 msg_type = msg['msg_type']
620 621 handler = self._queue_handlers.get(msg_type, None)
621 622 if handler is None:
622 623 raise Exception("Unhandled message type: %s"%msg.msg_type)
623 624 else:
624 625 handler(msg)
625 626 msg = self.session.recv(sock, mode=zmq.NOBLOCK)
626 627
627 628 def _flush_control(self, sock):
628 629 """Flush replies from the control channel waiting
629 630 in the ZMQ queue.
630 631
631 632 Currently: ignore them."""
632 633 if self._ignored_control_replies <= 0:
633 634 return
634 635 msg = self.session.recv(sock, mode=zmq.NOBLOCK)
635 636 while msg is not None:
636 637 self._ignored_control_replies -= 1
637 638 if self.debug:
638 639 pprint(msg)
639 640 msg = self.session.recv(sock, mode=zmq.NOBLOCK)
640 641
641 642 def _flush_ignored_control(self):
642 643 """flush ignored control replies"""
643 644 while self._ignored_control_replies > 0:
644 645 self.session.recv(self._control_socket)
645 646 self._ignored_control_replies -= 1
646 647
647 648 def _flush_ignored_hub_replies(self):
648 649 msg = self.session.recv(self._query_socket, mode=zmq.NOBLOCK)
649 650 while msg is not None:
650 651 msg = self.session.recv(self._query_socket, mode=zmq.NOBLOCK)
651 652
652 653 def _flush_iopub(self, sock):
653 654 """Flush replies from the iopub channel waiting
654 655 in the ZMQ queue.
655 656 """
656 657 msg = self.session.recv(sock, mode=zmq.NOBLOCK)
657 658 while msg is not None:
658 659 if self.debug:
659 660 pprint(msg)
660 661 msg = msg[-1]
661 662 parent = msg['parent_header']
662 663 msg_id = parent['msg_id']
663 664 content = msg['content']
664 665 header = msg['header']
665 666 msg_type = msg['msg_type']
666 667
667 668 # init metadata:
668 669 md = self.metadata[msg_id]
669 670
670 671 if msg_type == 'stream':
671 672 name = content['name']
672 673 s = md[name] or ''
673 674 md[name] = s + content['data']
674 675 elif msg_type == 'pyerr':
675 676 md.update({'pyerr' : self._unwrap_exception(content)})
676 677 else:
677 678 md.update({msg_type : content['data']})
678 679
679 680 # reduntant?
680 681 self.metadata[msg_id] = md
681 682
682 683 msg = self.session.recv(sock, mode=zmq.NOBLOCK)
683 684
684 685 #--------------------------------------------------------------------------
685 686 # len, getitem
686 687 #--------------------------------------------------------------------------
687 688
688 689 def __len__(self):
689 690 """len(client) returns # of engines."""
690 691 return len(self.ids)
691 692
692 693 def __getitem__(self, key):
693 694 """index access returns DirectView multiplexer objects
694 695
695 696 Must be int, slice, or list/tuple/xrange of ints"""
696 697 if not isinstance(key, (int, slice, tuple, list, xrange)):
697 698 raise TypeError("key by int/slice/iterable of ints only, not %s"%(type(key)))
698 699 else:
699 700 return self.direct_view(key)
700 701
701 702 #--------------------------------------------------------------------------
702 703 # Begin public methods
703 704 #--------------------------------------------------------------------------
704 705
705 706 @property
706 707 def ids(self):
707 708 """Always up-to-date ids property."""
708 709 self._flush_notifications()
709 710 # always copy:
710 711 return list(self._ids)
711 712
712 713 def close(self):
713 714 if self._closed:
714 715 return
715 716 snames = filter(lambda n: n.endswith('socket'), dir(self))
716 717 for socket in map(lambda name: getattr(self, name), snames):
717 718 if isinstance(socket, zmq.Socket) and not socket.closed:
718 719 socket.close()
719 720 self._closed = True
720 721
721 722 def spin(self):
722 723 """Flush any registration notifications and execution results
723 724 waiting in the ZMQ queue.
724 725 """
725 726 if self._notification_socket:
726 727 self._flush_notifications()
727 728 if self._mux_socket:
728 729 self._flush_results(self._mux_socket)
729 730 if self._task_socket:
730 731 self._flush_results(self._task_socket)
731 732 if self._control_socket:
732 733 self._flush_control(self._control_socket)
733 734 if self._iopub_socket:
734 735 self._flush_iopub(self._iopub_socket)
735 736 if self._query_socket:
736 737 self._flush_ignored_hub_replies()
737 738
738 739 def wait(self, jobs=None, timeout=-1):
739 740 """waits on one or more `jobs`, for up to `timeout` seconds.
740 741
741 742 Parameters
742 743 ----------
743 744
744 745 jobs : int, str, or list of ints and/or strs, or one or more AsyncResult objects
745 746 ints are indices to self.history
746 747 strs are msg_ids
747 748 default: wait on all outstanding messages
748 749 timeout : float
749 750 a time in seconds, after which to give up.
750 751 default is -1, which means no timeout
751 752
752 753 Returns
753 754 -------
754 755
755 756 True : when all msg_ids are done
756 757 False : timeout reached, some msg_ids still outstanding
757 758 """
758 759 tic = time.time()
759 760 if jobs is None:
760 761 theids = self.outstanding
761 762 else:
762 763 if isinstance(jobs, (int, str, AsyncResult)):
763 764 jobs = [jobs]
764 765 theids = set()
765 766 for job in jobs:
766 767 if isinstance(job, int):
767 768 # index access
768 769 job = self.history[job]
769 770 elif isinstance(job, AsyncResult):
770 771 map(theids.add, job.msg_ids)
771 772 continue
772 773 theids.add(job)
773 774 if not theids.intersection(self.outstanding):
774 775 return True
775 776 self.spin()
776 777 while theids.intersection(self.outstanding):
777 778 if timeout >= 0 and ( time.time()-tic ) > timeout:
778 779 break
779 780 time.sleep(1e-3)
780 781 self.spin()
781 782 return len(theids.intersection(self.outstanding)) == 0
782 783
783 784 #--------------------------------------------------------------------------
784 785 # Control methods
785 786 #--------------------------------------------------------------------------
786 787
787 788 @spin_first
788 789 @default_block
789 790 def clear(self, targets=None, block=None):
790 791 """Clear the namespace in target(s)."""
791 792 targets = self._build_targets(targets)[0]
792 793 for t in targets:
793 794 self.session.send(self._control_socket, 'clear_request', content={}, ident=t)
794 795 error = False
795 796 if self.block:
796 797 self._flush_ignored_control()
797 798 for i in range(len(targets)):
798 799 idents,msg = self.session.recv(self._control_socket,0)
799 800 if self.debug:
800 801 pprint(msg)
801 802 if msg['content']['status'] != 'ok':
802 803 error = self._unwrap_exception(msg['content'])
803 804 else:
804 805 self._ignored_control_replies += len(targets)
805 806 if error:
806 807 raise error
807 808
808 809
809 810 @spin_first
810 811 @default_block
811 812 def abort(self, jobs=None, targets=None, block=None):
812 813 """Abort specific jobs from the execution queues of target(s).
813 814
814 815 This is a mechanism to prevent jobs that have already been submitted
815 816 from executing.
816 817
817 818 Parameters
818 819 ----------
819 820
820 821 jobs : msg_id, list of msg_ids, or AsyncResult
821 822 The jobs to be aborted
822 823
823 824
824 825 """
825 826 targets = self._build_targets(targets)[0]
826 827 msg_ids = []
827 828 if isinstance(jobs, (basestring,AsyncResult)):
828 829 jobs = [jobs]
829 830 bad_ids = filter(lambda obj: not isinstance(obj, (basestring, AsyncResult)), jobs)
830 831 if bad_ids:
831 832 raise TypeError("Invalid msg_id type %r, expected str or AsyncResult"%bad_ids[0])
832 833 for j in jobs:
833 834 if isinstance(j, AsyncResult):
834 835 msg_ids.extend(j.msg_ids)
835 836 else:
836 837 msg_ids.append(j)
837 838 content = dict(msg_ids=msg_ids)
838 839 for t in targets:
839 840 self.session.send(self._control_socket, 'abort_request',
840 841 content=content, ident=t)
841 842 error = False
842 843 if self.block:
843 844 self._flush_ignored_control()
844 845 for i in range(len(targets)):
845 846 idents,msg = self.session.recv(self._control_socket,0)
846 847 if self.debug:
847 848 pprint(msg)
848 849 if msg['content']['status'] != 'ok':
849 850 error = self._unwrap_exception(msg['content'])
850 851 else:
851 852 self._ignored_control_replies += len(targets)
852 853 if error:
853 854 raise error
854 855
855 856 @spin_first
856 857 @default_block
857 858 def shutdown(self, targets=None, restart=False, hub=False, block=None):
858 859 """Terminates one or more engine processes, optionally including the hub."""
859 860 if hub:
860 861 targets = 'all'
861 862 targets = self._build_targets(targets)[0]
862 863 for t in targets:
863 864 self.session.send(self._control_socket, 'shutdown_request',
864 865 content={'restart':restart},ident=t)
865 866 error = False
866 867 if block or hub:
867 868 self._flush_ignored_control()
868 869 for i in range(len(targets)):
869 870 idents,msg = self.session.recv(self._control_socket, 0)
870 871 if self.debug:
871 872 pprint(msg)
872 873 if msg['content']['status'] != 'ok':
873 874 error = self._unwrap_exception(msg['content'])
874 875 else:
875 876 self._ignored_control_replies += len(targets)
876 877
877 878 if hub:
878 879 time.sleep(0.25)
879 880 self.session.send(self._query_socket, 'shutdown_request')
880 881 idents,msg = self.session.recv(self._query_socket, 0)
881 882 if self.debug:
882 883 pprint(msg)
883 884 if msg['content']['status'] != 'ok':
884 885 error = self._unwrap_exception(msg['content'])
885 886
886 887 if error:
887 888 raise error
888 889
889 890 #--------------------------------------------------------------------------
890 891 # Execution methods
891 892 #--------------------------------------------------------------------------
892 893
893 894 @default_block
894 895 def _execute(self, code, targets='all', block=None):
895 896 """Executes `code` on `targets` in blocking or nonblocking manner.
896 897
897 898 ``execute`` is always `bound` (affects engine namespace)
898 899
899 900 Parameters
900 901 ----------
901 902
902 903 code : str
903 904 the code string to be executed
904 905 targets : int/str/list of ints/strs
905 906 the engines on which to execute
906 907 default : all
907 908 block : bool
908 909 whether or not to wait until done to return
909 910 default: self.block
910 911 """
911 912 return self[targets].execute(code, block=block)
912 913
913 914 def _maybe_raise(self, result):
914 915 """wrapper for maybe raising an exception if apply failed."""
915 916 if isinstance(result, error.RemoteError):
916 917 raise result
917 918
918 919 return result
919 920
920 921 def send_apply_message(self, socket, f, args=None, kwargs=None, subheader=None, track=False,
921 922 ident=None):
922 923 """construct and send an apply message via a socket.
923 924
924 925 This is the principal method with which all engine execution is performed by views.
925 926 """
926 927
927 928 assert not self._closed, "cannot use me anymore, I'm closed!"
928 929 # defaults:
929 930 args = args if args is not None else []
930 931 kwargs = kwargs if kwargs is not None else {}
931 932 subheader = subheader if subheader is not None else {}
932 933
933 934 # validate arguments
934 935 if not callable(f):
935 936 raise TypeError("f must be callable, not %s"%type(f))
936 937 if not isinstance(args, (tuple, list)):
937 938 raise TypeError("args must be tuple or list, not %s"%type(args))
938 939 if not isinstance(kwargs, dict):
939 940 raise TypeError("kwargs must be dict, not %s"%type(kwargs))
940 941 if not isinstance(subheader, dict):
941 942 raise TypeError("subheader must be dict, not %s"%type(subheader))
942 943
943 944 if not self._ids:
944 945 # flush notification socket if no engines yet
945 946 any_ids = self.ids
946 947 if not any_ids:
947 948 raise error.NoEnginesRegistered("Can't execute without any connected engines.")
948 949 # enforce types of f,args,kwargs
949 950
950 951 bufs = util.pack_apply_message(f,args,kwargs)
951 952
952 953 msg = self.session.send(socket, "apply_request", buffers=bufs, ident=ident,
953 954 subheader=subheader, track=track)
954 955
955 956 msg_id = msg['msg_id']
956 957 self.outstanding.add(msg_id)
957 958 if ident:
958 959 # possibly routed to a specific engine
959 960 if isinstance(ident, list):
960 961 ident = ident[-1]
961 962 if ident in self._engines.values():
962 963 # save for later, in case of engine death
963 964 self._outstanding_dict[ident].add(msg_id)
964 965 self.history.append(msg_id)
965 966 self.metadata[msg_id]['submitted'] = datetime.now()
966 967
967 968 return msg
968 969
969 970 #--------------------------------------------------------------------------
970 971 # construct a View object
971 972 #--------------------------------------------------------------------------
972 973
973 974 def load_balanced_view(self, targets=None):
974 975 """construct a DirectView object.
975 976
976 977 If no arguments are specified, create a LoadBalancedView
977 978 using all engines.
978 979
979 980 Parameters
980 981 ----------
981 982
982 983 targets: list,slice,int,etc. [default: use all engines]
983 984 The subset of engines across which to load-balance
984 985 """
985 986 if targets is not None:
986 987 targets = self._build_targets(targets)[1]
987 988 return LoadBalancedView(client=self, socket=self._task_socket, targets=targets)
988 989
989 990 def direct_view(self, targets='all'):
990 991 """construct a DirectView object.
991 992
992 993 If no targets are specified, create a DirectView
993 994 using all engines.
994 995
995 996 Parameters
996 997 ----------
997 998
998 999 targets: list,slice,int,etc. [default: use all engines]
999 1000 The engines to use for the View
1000 1001 """
1001 1002 single = isinstance(targets, int)
1002 1003 targets = self._build_targets(targets)[1]
1003 1004 if single:
1004 1005 targets = targets[0]
1005 1006 return DirectView(client=self, socket=self._mux_socket, targets=targets)
1006 1007
1007 1008 #--------------------------------------------------------------------------
1008 1009 # Data movement (TO BE REMOVED)
1009 1010 #--------------------------------------------------------------------------
1010 1011
1011 1012 @default_block
1012 1013 def _push(self, ns, targets='all', block=None, track=False):
1013 1014 """Push the contents of `ns` into the namespace on `target`"""
1014 1015 if not isinstance(ns, dict):
1015 1016 raise TypeError("Must be a dict, not %s"%type(ns))
1016 1017 result = self.apply(util._push, kwargs=ns, targets=targets, block=block, bound=True, balanced=False, track=track)
1017 1018 if not block:
1018 1019 return result
1019 1020
1020 1021 @default_block
1021 1022 def _pull(self, keys, targets='all', block=None):
1022 1023 """Pull objects from `target`'s namespace by `keys`"""
1023 1024 if isinstance(keys, basestring):
1024 1025 pass
1025 1026 elif isinstance(keys, (list,tuple,set)):
1026 1027 for key in keys:
1027 1028 if not isinstance(key, basestring):
1028 1029 raise TypeError("keys must be str, not type %r"%type(key))
1029 1030 else:
1030 1031 raise TypeError("keys must be strs, not %r"%keys)
1031 1032 result = self.apply(util._pull, (keys,), targets=targets, block=block, bound=True, balanced=False)
1032 1033 return result
1033 1034
1034 1035 #--------------------------------------------------------------------------
1035 1036 # Query methods
1036 1037 #--------------------------------------------------------------------------
1037 1038
1038 1039 @spin_first
1039 1040 @default_block
1040 1041 def get_result(self, indices_or_msg_ids=None, block=None):
1041 1042 """Retrieve a result by msg_id or history index, wrapped in an AsyncResult object.
1042 1043
1043 1044 If the client already has the results, no request to the Hub will be made.
1044 1045
1045 1046 This is a convenient way to construct AsyncResult objects, which are wrappers
1046 1047 that include metadata about execution, and allow for awaiting results that
1047 1048 were not submitted by this Client.
1048 1049
1049 1050 It can also be a convenient way to retrieve the metadata associated with
1050 1051 blocking execution, since it always retrieves
1051 1052
1052 1053 Examples
1053 1054 --------
1054 1055 ::
1055 1056
1056 1057 In [10]: r = client.apply()
1057 1058
1058 1059 Parameters
1059 1060 ----------
1060 1061
1061 1062 indices_or_msg_ids : integer history index, str msg_id, or list of either
1062 1063 The indices or msg_ids of indices to be retrieved
1063 1064
1064 1065 block : bool
1065 1066 Whether to wait for the result to be done
1066 1067
1067 1068 Returns
1068 1069 -------
1069 1070
1070 1071 AsyncResult
1071 1072 A single AsyncResult object will always be returned.
1072 1073
1073 1074 AsyncHubResult
1074 1075 A subclass of AsyncResult that retrieves results from the Hub
1075 1076
1076 1077 """
1077 1078 if indices_or_msg_ids is None:
1078 1079 indices_or_msg_ids = -1
1079 1080
1080 1081 if not isinstance(indices_or_msg_ids, (list,tuple)):
1081 1082 indices_or_msg_ids = [indices_or_msg_ids]
1082 1083
1083 1084 theids = []
1084 1085 for id in indices_or_msg_ids:
1085 1086 if isinstance(id, int):
1086 1087 id = self.history[id]
1087 1088 if not isinstance(id, str):
1088 1089 raise TypeError("indices must be str or int, not %r"%id)
1089 1090 theids.append(id)
1090 1091
1091 1092 local_ids = filter(lambda msg_id: msg_id in self.history or msg_id in self.results, theids)
1092 1093 remote_ids = filter(lambda msg_id: msg_id not in local_ids, theids)
1093 1094
1094 1095 if remote_ids:
1095 1096 ar = AsyncHubResult(self, msg_ids=theids)
1096 1097 else:
1097 1098 ar = AsyncResult(self, msg_ids=theids)
1098 1099
1099 1100 if block:
1100 1101 ar.wait()
1101 1102
1102 1103 return ar
1103 1104
1104 1105 @spin_first
1105 1106 def result_status(self, msg_ids, status_only=True):
1106 1107 """Check on the status of the result(s) of the apply request with `msg_ids`.
1107 1108
1108 1109 If status_only is False, then the actual results will be retrieved, else
1109 1110 only the status of the results will be checked.
1110 1111
1111 1112 Parameters
1112 1113 ----------
1113 1114
1114 1115 msg_ids : list of msg_ids
1115 1116 if int:
1116 1117 Passed as index to self.history for convenience.
1117 1118 status_only : bool (default: True)
1118 1119 if False:
1119 1120 Retrieve the actual results of completed tasks.
1120 1121
1121 1122 Returns
1122 1123 -------
1123 1124
1124 1125 results : dict
1125 1126 There will always be the keys 'pending' and 'completed', which will
1126 1127 be lists of msg_ids that are incomplete or complete. If `status_only`
1127 1128 is False, then completed results will be keyed by their `msg_id`.
1128 1129 """
1129 1130 if not isinstance(msg_ids, (list,tuple)):
1130 1131 msg_ids = [msg_ids]
1131 1132
1132 1133 theids = []
1133 1134 for msg_id in msg_ids:
1134 1135 if isinstance(msg_id, int):
1135 1136 msg_id = self.history[msg_id]
1136 1137 if not isinstance(msg_id, basestring):
1137 1138 raise TypeError("msg_ids must be str, not %r"%msg_id)
1138 1139 theids.append(msg_id)
1139 1140
1140 1141 completed = []
1141 1142 local_results = {}
1142 1143
1143 1144 # comment this block out to temporarily disable local shortcut:
1144 1145 for msg_id in theids:
1145 1146 if msg_id in self.results:
1146 1147 completed.append(msg_id)
1147 1148 local_results[msg_id] = self.results[msg_id]
1148 1149 theids.remove(msg_id)
1149 1150
1150 1151 if theids: # some not locally cached
1151 1152 content = dict(msg_ids=theids, status_only=status_only)
1152 1153 msg = self.session.send(self._query_socket, "result_request", content=content)
1153 1154 zmq.select([self._query_socket], [], [])
1154 1155 idents,msg = self.session.recv(self._query_socket, zmq.NOBLOCK)
1155 1156 if self.debug:
1156 1157 pprint(msg)
1157 1158 content = msg['content']
1158 1159 if content['status'] != 'ok':
1159 1160 raise self._unwrap_exception(content)
1160 1161 buffers = msg['buffers']
1161 1162 else:
1162 1163 content = dict(completed=[],pending=[])
1163 1164
1164 1165 content['completed'].extend(completed)
1165 1166
1166 1167 if status_only:
1167 1168 return content
1168 1169
1169 1170 failures = []
1170 1171 # load cached results into result:
1171 1172 content.update(local_results)
1172 1173 # update cache with results:
1173 1174 for msg_id in sorted(theids):
1174 1175 if msg_id in content['completed']:
1175 1176 rec = content[msg_id]
1176 1177 parent = rec['header']
1177 1178 header = rec['result_header']
1178 1179 rcontent = rec['result_content']
1179 1180 iodict = rec['io']
1180 1181 if isinstance(rcontent, str):
1181 1182 rcontent = self.session.unpack(rcontent)
1182 1183
1183 1184 md = self.metadata[msg_id]
1184 1185 md.update(self._extract_metadata(header, parent, rcontent))
1185 1186 md.update(iodict)
1186 1187
1187 1188 if rcontent['status'] == 'ok':
1188 1189 res,buffers = util.unserialize_object(buffers)
1189 1190 else:
1190 1191 print rcontent
1191 1192 res = self._unwrap_exception(rcontent)
1192 1193 failures.append(res)
1193 1194
1194 1195 self.results[msg_id] = res
1195 1196 content[msg_id] = res
1196 1197
1197 1198 if len(theids) == 1 and failures:
1198 1199 raise failures[0]
1199 1200
1200 1201 error.collect_exceptions(failures, "result_status")
1201 1202 return content
1202 1203
1203 1204 @spin_first
1204 1205 def queue_status(self, targets='all', verbose=False):
1205 1206 """Fetch the status of engine queues.
1206 1207
1207 1208 Parameters
1208 1209 ----------
1209 1210
1210 1211 targets : int/str/list of ints/strs
1211 1212 the engines whose states are to be queried.
1212 1213 default : all
1213 1214 verbose : bool
1214 1215 Whether to return lengths only, or lists of ids for each element
1215 1216 """
1216 1217 engine_ids = self._build_targets(targets)[1]
1217 1218 content = dict(targets=engine_ids, verbose=verbose)
1218 1219 self.session.send(self._query_socket, "queue_request", content=content)
1219 1220 idents,msg = self.session.recv(self._query_socket, 0)
1220 1221 if self.debug:
1221 1222 pprint(msg)
1222 1223 content = msg['content']
1223 1224 status = content.pop('status')
1224 1225 if status != 'ok':
1225 1226 raise self._unwrap_exception(content)
1226 1227 content = util.rekey(content)
1227 1228 if isinstance(targets, int):
1228 1229 return content[targets]
1229 1230 else:
1230 1231 return content
1231 1232
1232 1233 @spin_first
1233 1234 def purge_results(self, jobs=[], targets=[]):
1234 1235 """Tell the Hub to forget results.
1235 1236
1236 1237 Individual results can be purged by msg_id, or the entire
1237 1238 history of specific targets can be purged.
1238 1239
1239 1240 Parameters
1240 1241 ----------
1241 1242
1242 1243 jobs : str or list of str or AsyncResult objects
1243 1244 the msg_ids whose results should be forgotten.
1244 1245 targets : int/str/list of ints/strs
1245 1246 The targets, by uuid or int_id, whose entire history is to be purged.
1246 1247 Use `targets='all'` to scrub everything from the Hub's memory.
1247 1248
1248 1249 default : None
1249 1250 """
1250 1251 if not targets and not jobs:
1251 1252 raise ValueError("Must specify at least one of `targets` and `jobs`")
1252 1253 if targets:
1253 1254 targets = self._build_targets(targets)[1]
1254 1255
1255 1256 # construct msg_ids from jobs
1256 1257 msg_ids = []
1257 1258 if isinstance(jobs, (basestring,AsyncResult)):
1258 1259 jobs = [jobs]
1259 1260 bad_ids = filter(lambda obj: not isinstance(obj, (basestring, AsyncResult)), jobs)
1260 1261 if bad_ids:
1261 1262 raise TypeError("Invalid msg_id type %r, expected str or AsyncResult"%bad_ids[0])
1262 1263 for j in jobs:
1263 1264 if isinstance(j, AsyncResult):
1264 1265 msg_ids.extend(j.msg_ids)
1265 1266 else:
1266 1267 msg_ids.append(j)
1267 1268
1268 1269 content = dict(targets=targets, msg_ids=msg_ids)
1269 1270 self.session.send(self._query_socket, "purge_request", content=content)
1270 1271 idents, msg = self.session.recv(self._query_socket, 0)
1271 1272 if self.debug:
1272 1273 pprint(msg)
1273 1274 content = msg['content']
1274 1275 if content['status'] != 'ok':
1275 1276 raise self._unwrap_exception(content)
1276 1277
1277 1278
1278 1279 __all__ = [ 'Client' ]
1 NO CONTENT: file renamed from IPython/parallel/map.py to IPython/parallel/client/map.py
1 NO CONTENT: file renamed from IPython/parallel/remotefunction.py to IPython/parallel/client/remotefunction.py
@@ -1,1036 +1,1033 b''
1 1 """Views of remote engines."""
2 2 #-----------------------------------------------------------------------------
3 3 # Copyright (C) 2010 The IPython Development Team
4 4 #
5 5 # Distributed under the terms of the BSD License. The full license is in
6 6 # the file COPYING, distributed as part of this software.
7 7 #-----------------------------------------------------------------------------
8 8
9 9 #-----------------------------------------------------------------------------
10 10 # Imports
11 11 #-----------------------------------------------------------------------------
12 12
13 13 import imp
14 14 import sys
15 15 import warnings
16 16 from contextlib import contextmanager
17 17 from types import ModuleType
18 18
19 19 import zmq
20 20
21 21 from IPython.testing import decorators as testdec
22 22 from IPython.utils.traitlets import HasTraits, Any, Bool, List, Dict, Set, Int, Instance, CFloat
23 23
24 24 from IPython.external.decorator import decorator
25 25
26 from IPython.parallel import util
27 from IPython.parallel.controller.dependency import Dependency, dependent
28
26 29 from . import map as Map
27 from . import util
28 30 from .asyncresult import AsyncResult, AsyncMapResult
29 from .dependency import Dependency, dependent
30 31 from .remotefunction import ParallelFunction, parallel, remote
31 32
32 33 #-----------------------------------------------------------------------------
33 34 # Decorators
34 35 #-----------------------------------------------------------------------------
35 36
36 37 @decorator
37 38 def save_ids(f, self, *args, **kwargs):
38 39 """Keep our history and outstanding attributes up to date after a method call."""
39 40 n_previous = len(self.client.history)
40 41 try:
41 42 ret = f(self, *args, **kwargs)
42 43 finally:
43 44 nmsgs = len(self.client.history) - n_previous
44 45 msg_ids = self.client.history[-nmsgs:]
45 46 self.history.extend(msg_ids)
46 47 map(self.outstanding.add, msg_ids)
47 48 return ret
48 49
49 50 @decorator
50 51 def sync_results(f, self, *args, **kwargs):
51 52 """sync relevant results from self.client to our results attribute."""
52 53 ret = f(self, *args, **kwargs)
53 54 delta = self.outstanding.difference(self.client.outstanding)
54 55 completed = self.outstanding.intersection(delta)
55 56 self.outstanding = self.outstanding.difference(completed)
56 57 for msg_id in completed:
57 58 self.results[msg_id] = self.client.results[msg_id]
58 59 return ret
59 60
60 61 @decorator
61 62 def spin_after(f, self, *args, **kwargs):
62 63 """call spin after the method."""
63 64 ret = f(self, *args, **kwargs)
64 65 self.spin()
65 66 return ret
66 67
67 68 #-----------------------------------------------------------------------------
68 69 # Classes
69 70 #-----------------------------------------------------------------------------
70 71
72 @testdec.skip_doctest
71 73 class View(HasTraits):
72 74 """Base View class for more convenint apply(f,*args,**kwargs) syntax via attributes.
73 75
74 76 Don't use this class, use subclasses.
75 77
76 78 Methods
77 79 -------
78 80
79 81 spin
80 82 flushes incoming results and registration state changes
81 83 control methods spin, and requesting `ids` also ensures up to date
82 84
83 85 wait
84 86 wait on one or more msg_ids
85 87
86 88 execution methods
87 89 apply
88 90 legacy: execute, run
89 91
90 92 data movement
91 93 push, pull, scatter, gather
92 94
93 95 query methods
94 96 get_result, queue_status, purge_results, result_status
95 97
96 98 control methods
97 99 abort, shutdown
98 100
99 101 """
100 102 # flags
101 103 block=Bool(False)
102 104 track=Bool(True)
103 105 targets = Any()
104 106
105 107 history=List()
106 108 outstanding = Set()
107 109 results = Dict()
108 client = Instance('IPython.parallel.client.Client')
110 client = Instance('IPython.parallel.Client')
109 111
110 112 _socket = Instance('zmq.Socket')
111 113 _flag_names = List(['targets', 'block', 'track'])
112 114 _targets = Any()
113 115 _idents = Any()
114 116
115 117 def __init__(self, client=None, socket=None, **flags):
116 118 super(View, self).__init__(client=client, _socket=socket)
117 119 self.block = client.block
118 120
119 121 self.set_flags(**flags)
120 122
121 123 assert not self.__class__ is View, "Don't use base View objects, use subclasses"
122 124
123 125
124 126 def __repr__(self):
125 127 strtargets = str(self.targets)
126 128 if len(strtargets) > 16:
127 129 strtargets = strtargets[:12]+'...]'
128 130 return "<%s %s>"%(self.__class__.__name__, strtargets)
129 131
130 132 def set_flags(self, **kwargs):
131 133 """set my attribute flags by keyword.
132 134
133 135 Views determine behavior with a few attributes (`block`, `track`, etc.).
134 136 These attributes can be set all at once by name with this method.
135 137
136 138 Parameters
137 139 ----------
138 140
139 141 block : bool
140 142 whether to wait for results
141 143 track : bool
142 144 whether to create a MessageTracker to allow the user to
143 145 safely edit after arrays and buffers during non-copying
144 146 sends.
145 147 """
146 148 for name, value in kwargs.iteritems():
147 149 if name not in self._flag_names:
148 150 raise KeyError("Invalid name: %r"%name)
149 151 else:
150 152 setattr(self, name, value)
151 153
152 154 @contextmanager
153 155 def temp_flags(self, **kwargs):
154 156 """temporarily set flags, for use in `with` statements.
155 157
156 158 See set_flags for permanent setting of flags
157 159
158 160 Examples
159 161 --------
160 162
161 163 >>> view.track=False
162 164 ...
163 165 >>> with view.temp_flags(track=True):
164 166 ... ar = view.apply(dostuff, my_big_array)
165 167 ... ar.tracker.wait() # wait for send to finish
166 168 >>> view.track
167 169 False
168 170
169 171 """
170 172 # preflight: save flags, and set temporaries
171 173 saved_flags = {}
172 174 for f in self._flag_names:
173 175 saved_flags[f] = getattr(self, f)
174 176 self.set_flags(**kwargs)
175 177 # yield to the with-statement block
176 178 try:
177 179 yield
178 180 finally:
179 181 # postflight: restore saved flags
180 182 self.set_flags(**saved_flags)
181 183
182 184
183 185 #----------------------------------------------------------------
184 186 # apply
185 187 #----------------------------------------------------------------
186 188
187 189 @sync_results
188 190 @save_ids
189 191 def _really_apply(self, f, args, kwargs, block=None, **options):
190 192 """wrapper for client.send_apply_message"""
191 193 raise NotImplementedError("Implement in subclasses")
192 194
193 195 def apply(self, f, *args, **kwargs):
194 196 """calls f(*args, **kwargs) on remote engines, returning the result.
195 197
196 198 This method sets all apply flags via this View's attributes.
197 199
198 200 if self.block is False:
199 201 returns AsyncResult
200 202 else:
201 203 returns actual result of f(*args, **kwargs)
202 204 """
203 205 return self._really_apply(f, args, kwargs)
204 206
205 207 def apply_async(self, f, *args, **kwargs):
206 208 """calls f(*args, **kwargs) on remote engines in a nonblocking manner.
207 209
208 210 returns AsyncResult
209 211 """
210 212 return self._really_apply(f, args, kwargs, block=False)
211 213
212 214 @spin_after
213 215 def apply_sync(self, f, *args, **kwargs):
214 216 """calls f(*args, **kwargs) on remote engines in a blocking manner,
215 217 returning the result.
216 218
217 219 returns: actual result of f(*args, **kwargs)
218 220 """
219 221 return self._really_apply(f, args, kwargs, block=True)
220 222
221 223 #----------------------------------------------------------------
222 224 # wrappers for client and control methods
223 225 #----------------------------------------------------------------
224 226 @sync_results
225 227 def spin(self):
226 228 """spin the client, and sync"""
227 229 self.client.spin()
228 230
229 231 @sync_results
230 232 def wait(self, jobs=None, timeout=-1):
231 233 """waits on one or more `jobs`, for up to `timeout` seconds.
232 234
233 235 Parameters
234 236 ----------
235 237
236 238 jobs : int, str, or list of ints and/or strs, or one or more AsyncResult objects
237 239 ints are indices to self.history
238 240 strs are msg_ids
239 241 default: wait on all outstanding messages
240 242 timeout : float
241 243 a time in seconds, after which to give up.
242 244 default is -1, which means no timeout
243 245
244 246 Returns
245 247 -------
246 248
247 249 True : when all msg_ids are done
248 250 False : timeout reached, some msg_ids still outstanding
249 251 """
250 252 if jobs is None:
251 253 jobs = self.history
252 254 return self.client.wait(jobs, timeout)
253 255
254 256 def abort(self, jobs=None, targets=None, block=None):
255 257 """Abort jobs on my engines.
256 258
257 259 Parameters
258 260 ----------
259 261
260 262 jobs : None, str, list of strs, optional
261 263 if None: abort all jobs.
262 264 else: abort specific msg_id(s).
263 265 """
264 266 block = block if block is not None else self.block
265 267 targets = targets if targets is not None else self.targets
266 268 return self.client.abort(jobs=jobs, targets=targets, block=block)
267 269
268 270 def queue_status(self, targets=None, verbose=False):
269 271 """Fetch the Queue status of my engines"""
270 272 targets = targets if targets is not None else self.targets
271 273 return self.client.queue_status(targets=targets, verbose=verbose)
272 274
273 275 def purge_results(self, jobs=[], targets=[]):
274 276 """Instruct the controller to forget specific results."""
275 277 if targets is None or targets == 'all':
276 278 targets = self.targets
277 279 return self.client.purge_results(jobs=jobs, targets=targets)
278 280
279 281 def shutdown(self, targets=None, restart=False, hub=False, block=None):
280 282 """Terminates one or more engine processes, optionally including the hub.
281 283 """
282 284 block = self.block if block is None else block
283 285 if targets is None or targets == 'all':
284 286 targets = self.targets
285 287 return self.client.shutdown(targets=targets, restart=restart, hub=hub, block=block)
286 288
287 289 @spin_after
288 290 def get_result(self, indices_or_msg_ids=None):
289 291 """return one or more results, specified by history index or msg_id.
290 292
291 293 See client.get_result for details.
292 294
293 295 """
294 296
295 297 if indices_or_msg_ids is None:
296 298 indices_or_msg_ids = -1
297 299 if isinstance(indices_or_msg_ids, int):
298 300 indices_or_msg_ids = self.history[indices_or_msg_ids]
299 301 elif isinstance(indices_or_msg_ids, (list,tuple,set)):
300 302 indices_or_msg_ids = list(indices_or_msg_ids)
301 303 for i,index in enumerate(indices_or_msg_ids):
302 304 if isinstance(index, int):
303 305 indices_or_msg_ids[i] = self.history[index]
304 306 return self.client.get_result(indices_or_msg_ids)
305 307
306 308 #-------------------------------------------------------------------
307 309 # Map
308 310 #-------------------------------------------------------------------
309 311
310 312 def map(self, f, *sequences, **kwargs):
311 313 """override in subclasses"""
312 314 raise NotImplementedError
313 315
314 316 def map_async(self, f, *sequences, **kwargs):
315 317 """Parallel version of builtin `map`, using this view's engines.
316 318
317 319 This is equivalent to map(...block=False)
318 320
319 321 See `self.map` for details.
320 322 """
321 323 if 'block' in kwargs:
322 324 raise TypeError("map_async doesn't take a `block` keyword argument.")
323 325 kwargs['block'] = False
324 326 return self.map(f,*sequences,**kwargs)
325 327
326 328 def map_sync(self, f, *sequences, **kwargs):
327 329 """Parallel version of builtin `map`, using this view's engines.
328 330
329 331 This is equivalent to map(...block=True)
330 332
331 333 See `self.map` for details.
332 334 """
333 335 if 'block' in kwargs:
334 336 raise TypeError("map_sync doesn't take a `block` keyword argument.")
335 337 kwargs['block'] = True
336 338 return self.map(f,*sequences,**kwargs)
337 339
338 340 def imap(self, f, *sequences, **kwargs):
339 341 """Parallel version of `itertools.imap`.
340 342
341 343 See `self.map` for details.
342 344
343 345 """
344 346
345 347 return iter(self.map_async(f,*sequences, **kwargs))
346 348
347 349 #-------------------------------------------------------------------
348 350 # Decorators
349 351 #-------------------------------------------------------------------
350 352
351 353 def remote(self, block=True, **flags):
352 354 """Decorator for making a RemoteFunction"""
353 355 block = self.block if block is None else block
354 356 return remote(self, block=block, **flags)
355 357
356 358 def parallel(self, dist='b', block=None, **flags):
357 359 """Decorator for making a ParallelFunction"""
358 360 block = self.block if block is None else block
359 361 return parallel(self, dist=dist, block=block, **flags)
360 362
361 363 @testdec.skip_doctest
362 364 class DirectView(View):
363 365 """Direct Multiplexer View of one or more engines.
364 366
365 367 These are created via indexed access to a client:
366 368
367 369 >>> dv_1 = client[1]
368 370 >>> dv_all = client[:]
369 371 >>> dv_even = client[::2]
370 372 >>> dv_some = client[1:3]
371 373
372 374 This object provides dictionary access to engine namespaces:
373 375
374 376 # push a=5:
375 377 >>> dv['a'] = 5
376 378 # pull 'foo':
377 379 >>> db['foo']
378 380
379 381 """
380 382
381 383 def __init__(self, client=None, socket=None, targets=None):
382 384 super(DirectView, self).__init__(client=client, socket=socket, targets=targets)
383 385
384 386 @property
385 387 def importer(self):
386 388 """sync_imports(local=True) as a property.
387 389
388 390 See sync_imports for details.
389 391
390 In [10]: with v.importer:
391 ....: import numpy
392 ....:
393 importing numpy on engine(s)
394
395 392 """
396 393 return self.sync_imports(True)
397 394
398 395 @contextmanager
399 396 def sync_imports(self, local=True):
400 397 """Context Manager for performing simultaneous local and remote imports.
401 398
402 399 'import x as y' will *not* work. The 'as y' part will simply be ignored.
403 400
404 401 >>> with view.sync_imports():
405 402 ... from numpy import recarray
406 403 importing recarray from numpy on engine(s)
407 404
408 405 """
409 406 import __builtin__
410 407 local_import = __builtin__.__import__
411 408 modules = set()
412 409 results = []
413 410 @util.interactive
414 411 def remote_import(name, fromlist, level):
415 412 """the function to be passed to apply, that actually performs the import
416 413 on the engine, and loads up the user namespace.
417 414 """
418 415 import sys
419 416 user_ns = globals()
420 417 mod = __import__(name, fromlist=fromlist, level=level)
421 418 if fromlist:
422 419 for key in fromlist:
423 420 user_ns[key] = getattr(mod, key)
424 421 else:
425 422 user_ns[name] = sys.modules[name]
426 423
427 424 def view_import(name, globals={}, locals={}, fromlist=[], level=-1):
428 425 """the drop-in replacement for __import__, that optionally imports
429 426 locally as well.
430 427 """
431 428 # don't override nested imports
432 429 save_import = __builtin__.__import__
433 430 __builtin__.__import__ = local_import
434 431
435 432 if imp.lock_held():
436 433 # this is a side-effect import, don't do it remotely, or even
437 434 # ignore the local effects
438 435 return local_import(name, globals, locals, fromlist, level)
439 436
440 437 imp.acquire_lock()
441 438 if local:
442 439 mod = local_import(name, globals, locals, fromlist, level)
443 440 else:
444 441 raise NotImplementedError("remote-only imports not yet implemented")
445 442 imp.release_lock()
446 443
447 444 key = name+':'+','.join(fromlist or [])
448 445 if level == -1 and key not in modules:
449 446 modules.add(key)
450 447 if fromlist:
451 448 print "importing %s from %s on engine(s)"%(','.join(fromlist), name)
452 449 else:
453 450 print "importing %s on engine(s)"%name
454 451 results.append(self.apply_async(remote_import, name, fromlist, level))
455 452 # restore override
456 453 __builtin__.__import__ = save_import
457 454
458 455 return mod
459 456
460 457 # override __import__
461 458 __builtin__.__import__ = view_import
462 459 try:
463 460 # enter the block
464 461 yield
465 462 except ImportError:
466 463 if not local:
467 464 # ignore import errors if not doing local imports
468 465 pass
469 466 finally:
470 467 # always restore __import__
471 468 __builtin__.__import__ = local_import
472 469
473 470 for r in results:
474 471 # raise possible remote ImportErrors here
475 472 r.get()
476 473
477 474
478 475 @sync_results
479 476 @save_ids
480 477 def _really_apply(self, f, args=None, kwargs=None, targets=None, block=None, track=None):
481 478 """calls f(*args, **kwargs) on remote engines, returning the result.
482 479
483 480 This method sets all of `apply`'s flags via this View's attributes.
484 481
485 482 Parameters
486 483 ----------
487 484
488 485 f : callable
489 486
490 487 args : list [default: empty]
491 488
492 489 kwargs : dict [default: empty]
493 490
494 491 targets : target list [default: self.targets]
495 492 where to run
496 493 block : bool [default: self.block]
497 494 whether to block
498 495 track : bool [default: self.track]
499 496 whether to ask zmq to track the message, for safe non-copying sends
500 497
501 498 Returns
502 499 -------
503 500
504 501 if self.block is False:
505 502 returns AsyncResult
506 503 else:
507 504 returns actual result of f(*args, **kwargs) on the engine(s)
508 505 This will be a list of self.targets is also a list (even length 1), or
509 506 the single result if self.targets is an integer engine id
510 507 """
511 508 args = [] if args is None else args
512 509 kwargs = {} if kwargs is None else kwargs
513 510 block = self.block if block is None else block
514 511 track = self.track if track is None else track
515 512 targets = self.targets if targets is None else targets
516 513
517 514 _idents = self.client._build_targets(targets)[0]
518 515 msg_ids = []
519 516 trackers = []
520 517 for ident in _idents:
521 518 msg = self.client.send_apply_message(self._socket, f, args, kwargs, track=track,
522 519 ident=ident)
523 520 if track:
524 521 trackers.append(msg['tracker'])
525 522 msg_ids.append(msg['msg_id'])
526 523 tracker = None if track is False else zmq.MessageTracker(*trackers)
527 524 ar = AsyncResult(self.client, msg_ids, fname=f.__name__, targets=targets, tracker=tracker)
528 525 if block:
529 526 try:
530 527 return ar.get()
531 528 except KeyboardInterrupt:
532 529 pass
533 530 return ar
534 531
535 532 @spin_after
536 533 def map(self, f, *sequences, **kwargs):
537 534 """view.map(f, *sequences, block=self.block) => list|AsyncMapResult
538 535
539 536 Parallel version of builtin `map`, using this View's `targets`.
540 537
541 538 There will be one task per target, so work will be chunked
542 539 if the sequences are longer than `targets`.
543 540
544 541 Results can be iterated as they are ready, but will become available in chunks.
545 542
546 543 Parameters
547 544 ----------
548 545
549 546 f : callable
550 547 function to be mapped
551 548 *sequences: one or more sequences of matching length
552 549 the sequences to be distributed and passed to `f`
553 550 block : bool
554 551 whether to wait for the result or not [default self.block]
555 552
556 553 Returns
557 554 -------
558 555
559 556 if block=False:
560 557 AsyncMapResult
561 558 An object like AsyncResult, but which reassembles the sequence of results
562 559 into a single list. AsyncMapResults can be iterated through before all
563 560 results are complete.
564 561 else:
565 562 list
566 563 the result of map(f,*sequences)
567 564 """
568 565
569 566 block = kwargs.pop('block', self.block)
570 567 for k in kwargs.keys():
571 568 if k not in ['block', 'track']:
572 569 raise TypeError("invalid keyword arg, %r"%k)
573 570
574 571 assert len(sequences) > 0, "must have some sequences to map onto!"
575 572 pf = ParallelFunction(self, f, block=block, **kwargs)
576 573 return pf.map(*sequences)
577 574
578 575 def execute(self, code, targets=None, block=None):
579 576 """Executes `code` on `targets` in blocking or nonblocking manner.
580 577
581 578 ``execute`` is always `bound` (affects engine namespace)
582 579
583 580 Parameters
584 581 ----------
585 582
586 583 code : str
587 584 the code string to be executed
588 585 block : bool
589 586 whether or not to wait until done to return
590 587 default: self.block
591 588 """
592 589 return self._really_apply(util._execute, args=(code,), block=block, targets=targets)
593 590
594 591 def run(self, filename, targets=None, block=None):
595 592 """Execute contents of `filename` on my engine(s).
596 593
597 594 This simply reads the contents of the file and calls `execute`.
598 595
599 596 Parameters
600 597 ----------
601 598
602 599 filename : str
603 600 The path to the file
604 601 targets : int/str/list of ints/strs
605 602 the engines on which to execute
606 603 default : all
607 604 block : bool
608 605 whether or not to wait until done
609 606 default: self.block
610 607
611 608 """
612 609 with open(filename, 'r') as f:
613 610 # add newline in case of trailing indented whitespace
614 611 # which will cause SyntaxError
615 612 code = f.read()+'\n'
616 613 return self.execute(code, block=block, targets=targets)
617 614
618 615 def update(self, ns):
619 616 """update remote namespace with dict `ns`
620 617
621 618 See `push` for details.
622 619 """
623 620 return self.push(ns, block=self.block, track=self.track)
624 621
625 622 def push(self, ns, targets=None, block=None, track=None):
626 623 """update remote namespace with dict `ns`
627 624
628 625 Parameters
629 626 ----------
630 627
631 628 ns : dict
632 629 dict of keys with which to update engine namespace(s)
633 630 block : bool [default : self.block]
634 631 whether to wait to be notified of engine receipt
635 632
636 633 """
637 634
638 635 block = block if block is not None else self.block
639 636 track = track if track is not None else self.track
640 637 targets = targets if targets is not None else self.targets
641 638 # applier = self.apply_sync if block else self.apply_async
642 639 if not isinstance(ns, dict):
643 640 raise TypeError("Must be a dict, not %s"%type(ns))
644 641 return self._really_apply(util._push, (ns,), block=block, track=track, targets=targets)
645 642
646 643 def get(self, key_s):
647 644 """get object(s) by `key_s` from remote namespace
648 645
649 646 see `pull` for details.
650 647 """
651 648 # block = block if block is not None else self.block
652 649 return self.pull(key_s, block=True)
653 650
654 651 def pull(self, names, targets=None, block=True):
655 652 """get object(s) by `name` from remote namespace
656 653
657 654 will return one object if it is a key.
658 655 can also take a list of keys, in which case it will return a list of objects.
659 656 """
660 657 block = block if block is not None else self.block
661 658 targets = targets if targets is not None else self.targets
662 659 applier = self.apply_sync if block else self.apply_async
663 660 if isinstance(names, basestring):
664 661 pass
665 662 elif isinstance(names, (list,tuple,set)):
666 663 for key in names:
667 664 if not isinstance(key, basestring):
668 665 raise TypeError("keys must be str, not type %r"%type(key))
669 666 else:
670 667 raise TypeError("names must be strs, not %r"%names)
671 668 return self._really_apply(util._pull, (names,), block=block, targets=targets)
672 669
673 670 def scatter(self, key, seq, dist='b', flatten=False, targets=None, block=None, track=None):
674 671 """
675 672 Partition a Python sequence and send the partitions to a set of engines.
676 673 """
677 674 block = block if block is not None else self.block
678 675 track = track if track is not None else self.track
679 676 targets = targets if targets is not None else self.targets
680 677
681 678 mapObject = Map.dists[dist]()
682 679 nparts = len(targets)
683 680 msg_ids = []
684 681 trackers = []
685 682 for index, engineid in enumerate(targets):
686 683 partition = mapObject.getPartition(seq, index, nparts)
687 684 if flatten and len(partition) == 1:
688 685 ns = {key: partition[0]}
689 686 else:
690 687 ns = {key: partition}
691 688 r = self.push(ns, block=False, track=track, targets=engineid)
692 689 msg_ids.extend(r.msg_ids)
693 690 if track:
694 691 trackers.append(r._tracker)
695 692
696 693 if track:
697 694 tracker = zmq.MessageTracker(*trackers)
698 695 else:
699 696 tracker = None
700 697
701 698 r = AsyncResult(self.client, msg_ids, fname='scatter', targets=targets, tracker=tracker)
702 699 if block:
703 700 r.wait()
704 701 else:
705 702 return r
706 703
707 704 @sync_results
708 705 @save_ids
709 706 def gather(self, key, dist='b', targets=None, block=None):
710 707 """
711 708 Gather a partitioned sequence on a set of engines as a single local seq.
712 709 """
713 710 block = block if block is not None else self.block
714 711 targets = targets if targets is not None else self.targets
715 712 mapObject = Map.dists[dist]()
716 713 msg_ids = []
717 714
718 715 for index, engineid in enumerate(targets):
719 716 msg_ids.extend(self.pull(key, block=False, targets=engineid).msg_ids)
720 717
721 718 r = AsyncMapResult(self.client, msg_ids, mapObject, fname='gather')
722 719
723 720 if block:
724 721 try:
725 722 return r.get()
726 723 except KeyboardInterrupt:
727 724 pass
728 725 return r
729 726
730 727 def __getitem__(self, key):
731 728 return self.get(key)
732 729
733 730 def __setitem__(self,key, value):
734 731 self.update({key:value})
735 732
736 733 def clear(self, targets=None, block=False):
737 734 """Clear the remote namespaces on my engines."""
738 735 block = block if block is not None else self.block
739 736 targets = targets if targets is not None else self.targets
740 737 return self.client.clear(targets=targets, block=block)
741 738
742 739 def kill(self, targets=None, block=True):
743 740 """Kill my engines."""
744 741 block = block if block is not None else self.block
745 742 targets = targets if targets is not None else self.targets
746 743 return self.client.kill(targets=targets, block=block)
747 744
748 745 #----------------------------------------
749 746 # activate for %px,%autopx magics
750 747 #----------------------------------------
751 748 def activate(self):
752 749 """Make this `View` active for parallel magic commands.
753 750
754 751 IPython has a magic command syntax to work with `MultiEngineClient` objects.
755 752 In a given IPython session there is a single active one. While
756 753 there can be many `Views` created and used by the user,
757 754 there is only one active one. The active `View` is used whenever
758 755 the magic commands %px and %autopx are used.
759 756
760 757 The activate() method is called on a given `View` to make it
761 758 active. Once this has been done, the magic commands can be used.
762 759 """
763 760
764 761 try:
765 762 # This is injected into __builtins__.
766 763 ip = get_ipython()
767 764 except NameError:
768 765 print "The IPython parallel magics (%result, %px, %autopx) only work within IPython."
769 766 else:
770 767 pmagic = ip.plugin_manager.get_plugin('parallelmagic')
771 768 if pmagic is not None:
772 769 pmagic.active_multiengine_client = self
773 770 else:
774 771 print "You must first load the parallelmagic extension " \
775 772 "by doing '%load_ext parallelmagic'"
776 773
777 774
778 775 @testdec.skip_doctest
779 776 class LoadBalancedView(View):
780 777 """An load-balancing View that only executes via the Task scheduler.
781 778
782 779 Load-balanced views can be created with the client's `view` method:
783 780
784 781 >>> v = client.load_balanced_view()
785 782
786 783 or targets can be specified, to restrict the potential destinations:
787 784
788 785 >>> v = client.client.load_balanced_view(([1,3])
789 786
790 787 which would restrict loadbalancing to between engines 1 and 3.
791 788
792 789 """
793 790
794 791 follow=Any()
795 792 after=Any()
796 793 timeout=CFloat()
797 794
798 795 _task_scheme = Any()
799 796 _flag_names = List(['targets', 'block', 'track', 'follow', 'after', 'timeout'])
800 797
801 798 def __init__(self, client=None, socket=None, **flags):
802 799 super(LoadBalancedView, self).__init__(client=client, socket=socket, **flags)
803 800 self._task_scheme=client._task_scheme
804 801
805 802 def _validate_dependency(self, dep):
806 803 """validate a dependency.
807 804
808 805 For use in `set_flags`.
809 806 """
810 807 if dep is None or isinstance(dep, (str, AsyncResult, Dependency)):
811 808 return True
812 809 elif isinstance(dep, (list,set, tuple)):
813 810 for d in dep:
814 811 if not isinstance(d, (str, AsyncResult)):
815 812 return False
816 813 elif isinstance(dep, dict):
817 814 if set(dep.keys()) != set(Dependency().as_dict().keys()):
818 815 return False
819 816 if not isinstance(dep['msg_ids'], list):
820 817 return False
821 818 for d in dep['msg_ids']:
822 819 if not isinstance(d, str):
823 820 return False
824 821 else:
825 822 return False
826 823
827 824 return True
828 825
829 826 def _render_dependency(self, dep):
830 827 """helper for building jsonable dependencies from various input forms."""
831 828 if isinstance(dep, Dependency):
832 829 return dep.as_dict()
833 830 elif isinstance(dep, AsyncResult):
834 831 return dep.msg_ids
835 832 elif dep is None:
836 833 return []
837 834 else:
838 835 # pass to Dependency constructor
839 836 return list(Dependency(dep))
840 837
841 838 def set_flags(self, **kwargs):
842 839 """set my attribute flags by keyword.
843 840
844 841 A View is a wrapper for the Client's apply method, but with attributes
845 842 that specify keyword arguments, those attributes can be set by keyword
846 843 argument with this method.
847 844
848 845 Parameters
849 846 ----------
850 847
851 848 block : bool
852 849 whether to wait for results
853 850 track : bool
854 851 whether to create a MessageTracker to allow the user to
855 852 safely edit after arrays and buffers during non-copying
856 853 sends.
857 854 #
858 855 after : Dependency or collection of msg_ids
859 856 Only for load-balanced execution (targets=None)
860 857 Specify a list of msg_ids as a time-based dependency.
861 858 This job will only be run *after* the dependencies
862 859 have been met.
863 860
864 861 follow : Dependency or collection of msg_ids
865 862 Only for load-balanced execution (targets=None)
866 863 Specify a list of msg_ids as a location-based dependency.
867 864 This job will only be run on an engine where this dependency
868 865 is met.
869 866
870 867 timeout : float/int or None
871 868 Only for load-balanced execution (targets=None)
872 869 Specify an amount of time (in seconds) for the scheduler to
873 870 wait for dependencies to be met before failing with a
874 871 DependencyTimeout.
875 872 """
876 873
877 874 super(LoadBalancedView, self).set_flags(**kwargs)
878 875 for name in ('follow', 'after'):
879 876 if name in kwargs:
880 877 value = kwargs[name]
881 878 if self._validate_dependency(value):
882 879 setattr(self, name, value)
883 880 else:
884 881 raise ValueError("Invalid dependency: %r"%value)
885 882 if 'timeout' in kwargs:
886 883 t = kwargs['timeout']
887 884 if not isinstance(t, (int, long, float, type(None))):
888 885 raise TypeError("Invalid type for timeout: %r"%type(t))
889 886 if t is not None:
890 887 if t < 0:
891 888 raise ValueError("Invalid timeout: %s"%t)
892 889 self.timeout = t
893 890
894 891 @sync_results
895 892 @save_ids
896 893 def _really_apply(self, f, args=None, kwargs=None, block=None, track=None,
897 894 after=None, follow=None, timeout=None,
898 895 targets=None):
899 896 """calls f(*args, **kwargs) on a remote engine, returning the result.
900 897
901 898 This method temporarily sets all of `apply`'s flags for a single call.
902 899
903 900 Parameters
904 901 ----------
905 902
906 903 f : callable
907 904
908 905 args : list [default: empty]
909 906
910 907 kwargs : dict [default: empty]
911 908
912 909 block : bool [default: self.block]
913 910 whether to block
914 911 track : bool [default: self.track]
915 912 whether to ask zmq to track the message, for safe non-copying sends
916 913
917 914 !!!!!! TODO: THE REST HERE !!!!
918 915
919 916 Returns
920 917 -------
921 918
922 919 if self.block is False:
923 920 returns AsyncResult
924 921 else:
925 922 returns actual result of f(*args, **kwargs) on the engine(s)
926 923 This will be a list of self.targets is also a list (even length 1), or
927 924 the single result if self.targets is an integer engine id
928 925 """
929 926
930 927 # validate whether we can run
931 928 if self._socket.closed:
932 929 msg = "Task farming is disabled"
933 930 if self._task_scheme == 'pure':
934 931 msg += " because the pure ZMQ scheduler cannot handle"
935 932 msg += " disappearing engines."
936 933 raise RuntimeError(msg)
937 934
938 935 if self._task_scheme == 'pure':
939 936 # pure zmq scheme doesn't support dependencies
940 937 msg = "Pure ZMQ scheduler doesn't support dependencies"
941 938 if (follow or after):
942 939 # hard fail on DAG dependencies
943 940 raise RuntimeError(msg)
944 941 if isinstance(f, dependent):
945 942 # soft warn on functional dependencies
946 943 warnings.warn(msg, RuntimeWarning)
947 944
948 945 # build args
949 946 args = [] if args is None else args
950 947 kwargs = {} if kwargs is None else kwargs
951 948 block = self.block if block is None else block
952 949 track = self.track if track is None else track
953 950 after = self.after if after is None else after
954 951 follow = self.follow if follow is None else follow
955 952 timeout = self.timeout if timeout is None else timeout
956 953 targets = self.targets if targets is None else targets
957 954
958 955 if targets is None:
959 956 idents = []
960 957 else:
961 958 idents = self.client._build_targets(targets)[0]
962 959
963 960 after = self._render_dependency(after)
964 961 follow = self._render_dependency(follow)
965 962 subheader = dict(after=after, follow=follow, timeout=timeout, targets=idents)
966 963
967 964 msg = self.client.send_apply_message(self._socket, f, args, kwargs, track=track,
968 965 subheader=subheader)
969 966 tracker = None if track is False else msg['tracker']
970 967
971 968 ar = AsyncResult(self.client, msg['msg_id'], fname=f.__name__, targets=None, tracker=tracker)
972 969
973 970 if block:
974 971 try:
975 972 return ar.get()
976 973 except KeyboardInterrupt:
977 974 pass
978 975 return ar
979 976
980 977 @spin_after
981 978 @save_ids
982 979 def map(self, f, *sequences, **kwargs):
983 980 """view.map(f, *sequences, block=self.block, chunksize=1) => list|AsyncMapResult
984 981
985 982 Parallel version of builtin `map`, load-balanced by this View.
986 983
987 984 `block`, and `chunksize` can be specified by keyword only.
988 985
989 986 Each `chunksize` elements will be a separate task, and will be
990 987 load-balanced. This lets individual elements be available for iteration
991 988 as soon as they arrive.
992 989
993 990 Parameters
994 991 ----------
995 992
996 993 f : callable
997 994 function to be mapped
998 995 *sequences: one or more sequences of matching length
999 996 the sequences to be distributed and passed to `f`
1000 997 block : bool
1001 998 whether to wait for the result or not [default self.block]
1002 999 track : bool
1003 1000 whether to create a MessageTracker to allow the user to
1004 1001 safely edit after arrays and buffers during non-copying
1005 1002 sends.
1006 1003 chunksize : int
1007 1004 how many elements should be in each task [default 1]
1008 1005
1009 1006 Returns
1010 1007 -------
1011 1008
1012 1009 if block=False:
1013 1010 AsyncMapResult
1014 1011 An object like AsyncResult, but which reassembles the sequence of results
1015 1012 into a single list. AsyncMapResults can be iterated through before all
1016 1013 results are complete.
1017 1014 else:
1018 1015 the result of map(f,*sequences)
1019 1016
1020 1017 """
1021 1018
1022 1019 # default
1023 1020 block = kwargs.get('block', self.block)
1024 1021 chunksize = kwargs.get('chunksize', 1)
1025 1022
1026 1023 keyset = set(kwargs.keys())
1027 1024 extra_keys = keyset.difference_update(set(['block', 'chunksize']))
1028 1025 if extra_keys:
1029 1026 raise TypeError("Invalid kwargs: %s"%list(extra_keys))
1030 1027
1031 1028 assert len(sequences) > 0, "must have some sequences to map onto!"
1032 1029
1033 1030 pf = ParallelFunction(self, f, block=block, chunksize=chunksize)
1034 1031 return pf.map(*sequences)
1035 1032
1036 1033 __all__ = ['LoadBalancedView', 'DirectView'] No newline at end of file
@@ -1,118 +1,117 b''
1 1 #!/usr/bin/env python
2 2 """The IPython Controller with 0MQ
3 3 This is a collection of one Hub and several Schedulers.
4 4 """
5 5 #-----------------------------------------------------------------------------
6 6 # Copyright (C) 2010 The IPython Development Team
7 7 #
8 8 # Distributed under the terms of the BSD License. The full license is in
9 9 # the file COPYING, distributed as part of this software.
10 10 #-----------------------------------------------------------------------------
11 11
12 12 #-----------------------------------------------------------------------------
13 13 # Imports
14 14 #-----------------------------------------------------------------------------
15 15 from __future__ import print_function
16 16
17 import logging
18 17 from multiprocessing import Process
19 18
20 19 import zmq
21 20 from zmq.devices import ProcessMonitoredQueue
22 21 # internal:
23 22 from IPython.utils.importstring import import_item
24 23 from IPython.utils.traitlets import Int, CStr, Instance, List, Bool
25 24
26 from .entry_point import signal_children
25 from IPython.parallel.util import signal_children
27 26 from .hub import Hub, HubFactory
28 27 from .scheduler import launch_scheduler
29 28
30 29 #-----------------------------------------------------------------------------
31 30 # Configurable
32 31 #-----------------------------------------------------------------------------
33 32
34 33
35 34 class ControllerFactory(HubFactory):
36 35 """Configurable for setting up a Hub and Schedulers."""
37 36
38 37 usethreads = Bool(False, config=True)
39 38 # pure-zmq downstream HWM
40 39 hwm = Int(0, config=True)
41 40
42 41 # internal
43 42 children = List()
44 43 mq_class = CStr('zmq.devices.ProcessMonitoredQueue')
45 44
46 45 def _usethreads_changed(self, name, old, new):
47 46 self.mq_class = 'zmq.devices.%sMonitoredQueue'%('Thread' if new else 'Process')
48 47
49 48 def __init__(self, **kwargs):
50 49 super(ControllerFactory, self).__init__(**kwargs)
51 50 self.subconstructors.append(self.construct_schedulers)
52 51
53 52 def start(self):
54 53 super(ControllerFactory, self).start()
55 54 child_procs = []
56 55 for child in self.children:
57 56 child.start()
58 57 if isinstance(child, ProcessMonitoredQueue):
59 58 child_procs.append(child.launcher)
60 59 elif isinstance(child, Process):
61 60 child_procs.append(child)
62 61 if child_procs:
63 62 signal_children(child_procs)
64 63
65 64
66 65 def construct_schedulers(self):
67 66 children = self.children
68 67 mq = import_item(self.mq_class)
69 68
70 69 maybe_inproc = 'inproc://monitor' if self.usethreads else self.monitor_url
71 70 # IOPub relay (in a Process)
72 71 q = mq(zmq.PUB, zmq.SUB, zmq.PUB, 'N/A','iopub')
73 72 q.bind_in(self.client_info['iopub'])
74 73 q.bind_out(self.engine_info['iopub'])
75 74 q.setsockopt_out(zmq.SUBSCRIBE, '')
76 75 q.connect_mon(maybe_inproc)
77 76 q.daemon=True
78 77 children.append(q)
79 78
80 79 # Multiplexer Queue (in a Process)
81 80 q = mq(zmq.XREP, zmq.XREP, zmq.PUB, 'in', 'out')
82 81 q.bind_in(self.client_info['mux'])
83 82 q.setsockopt_in(zmq.IDENTITY, 'mux')
84 83 q.bind_out(self.engine_info['mux'])
85 84 q.connect_mon(maybe_inproc)
86 85 q.daemon=True
87 86 children.append(q)
88 87
89 88 # Control Queue (in a Process)
90 89 q = mq(zmq.XREP, zmq.XREP, zmq.PUB, 'incontrol', 'outcontrol')
91 90 q.bind_in(self.client_info['control'])
92 91 q.setsockopt_in(zmq.IDENTITY, 'control')
93 92 q.bind_out(self.engine_info['control'])
94 93 q.connect_mon(maybe_inproc)
95 94 q.daemon=True
96 95 children.append(q)
97 96 # Task Queue (in a Process)
98 97 if self.scheme == 'pure':
99 98 self.log.warn("task::using pure XREQ Task scheduler")
100 99 q = mq(zmq.XREP, zmq.XREQ, zmq.PUB, 'intask', 'outtask')
101 100 q.setsockopt_out(zmq.HWM, self.hwm)
102 101 q.bind_in(self.client_info['task'][1])
103 102 q.setsockopt_in(zmq.IDENTITY, 'task')
104 103 q.bind_out(self.engine_info['task'])
105 104 q.connect_mon(maybe_inproc)
106 105 q.daemon=True
107 106 children.append(q)
108 107 elif self.scheme == 'none':
109 108 self.log.warn("task::using no Task scheduler")
110 109
111 110 else:
112 111 self.log.info("task::using Python %s Task scheduler"%self.scheme)
113 112 sargs = (self.client_info['task'][1], self.engine_info['task'], self.monitor_url, self.client_info['notification'])
114 113 kwargs = dict(scheme=self.scheme,logname=self.log.name, loglevel=self.log.level, config=self.config)
115 114 q = Process(target=launch_scheduler, args=sargs, kwargs=kwargs)
116 115 q.daemon=True
117 116 children.append(q)
118 117
@@ -1,196 +1,196 b''
1 1 """Dependency utilities"""
2 2 #-----------------------------------------------------------------------------
3 3 # Copyright (C) 2010-2011 The IPython Development Team
4 4 #
5 5 # Distributed under the terms of the BSD License. The full license is in
6 6 # the file COPYING, distributed as part of this software.
7 7 #-----------------------------------------------------------------------------
8 8
9 9 from types import ModuleType
10 10
11 from .asyncresult import AsyncResult
12 from .error import UnmetDependency
13 from .util import interactive
11 from IPython.parallel.client.asyncresult import AsyncResult
12 from IPython.parallel.error import UnmetDependency
13 from IPython.parallel.util import interactive
14 14
15 15 class depend(object):
16 16 """Dependency decorator, for use with tasks.
17 17
18 18 `@depend` lets you define a function for engine dependencies
19 19 just like you use `apply` for tasks.
20 20
21 21
22 22 Examples
23 23 --------
24 24 ::
25 25
26 26 @depend(df, a,b, c=5)
27 27 def f(m,n,p)
28 28
29 29 view.apply(f, 1,2,3)
30 30
31 31 will call df(a,b,c=5) on the engine, and if it returns False or
32 32 raises an UnmetDependency error, then the task will not be run
33 33 and another engine will be tried.
34 34 """
35 35 def __init__(self, f, *args, **kwargs):
36 36 self.f = f
37 37 self.args = args
38 38 self.kwargs = kwargs
39 39
40 40 def __call__(self, f):
41 41 return dependent(f, self.f, *self.args, **self.kwargs)
42 42
43 43 class dependent(object):
44 44 """A function that depends on another function.
45 45 This is an object to prevent the closure used
46 46 in traditional decorators, which are not picklable.
47 47 """
48 48
49 49 def __init__(self, f, df, *dargs, **dkwargs):
50 50 self.f = f
51 51 self.func_name = getattr(f, '__name__', 'f')
52 52 self.df = df
53 53 self.dargs = dargs
54 54 self.dkwargs = dkwargs
55 55
56 56 def __call__(self, *args, **kwargs):
57 57 # if hasattr(self.f, 'func_globals') and hasattr(self.df, 'func_globals'):
58 58 # self.df.func_globals = self.f.func_globals
59 59 if self.df(*self.dargs, **self.dkwargs) is False:
60 60 raise UnmetDependency()
61 61 return self.f(*args, **kwargs)
62 62
63 63 @property
64 64 def __name__(self):
65 65 return self.func_name
66 66
67 67 @interactive
68 68 def _require(*names):
69 69 """Helper for @require decorator."""
70 70 from IPython.parallel.error import UnmetDependency
71 71 user_ns = globals()
72 72 for name in names:
73 73 if name in user_ns:
74 74 continue
75 75 try:
76 76 exec 'import %s'%name in user_ns
77 77 except ImportError:
78 78 raise UnmetDependency(name)
79 79 return True
80 80
81 81 def require(*mods):
82 82 """Simple decorator for requiring names to be importable.
83 83
84 84 Examples
85 85 --------
86 86
87 87 In [1]: @require('numpy')
88 88 ...: def norm(a):
89 89 ...: import numpy
90 90 ...: return numpy.linalg.norm(a,2)
91 91 """
92 92 names = []
93 93 for mod in mods:
94 94 if isinstance(mod, ModuleType):
95 95 mod = mod.__name__
96 96
97 97 if isinstance(mod, basestring):
98 98 names.append(mod)
99 99 else:
100 100 raise TypeError("names must be modules or module names, not %s"%type(mod))
101 101
102 102 return depend(_require, *names)
103 103
104 104 class Dependency(set):
105 105 """An object for representing a set of msg_id dependencies.
106 106
107 107 Subclassed from set().
108 108
109 109 Parameters
110 110 ----------
111 111 dependencies: list/set of msg_ids or AsyncResult objects or output of Dependency.as_dict()
112 112 The msg_ids to depend on
113 113 all : bool [default True]
114 114 Whether the dependency should be considered met when *all* depending tasks have completed
115 115 or only when *any* have been completed.
116 116 success : bool [default True]
117 117 Whether to consider successes as fulfilling dependencies.
118 118 failure : bool [default False]
119 119 Whether to consider failures as fulfilling dependencies.
120 120
121 121 If `all=success=True` and `failure=False`, then the task will fail with an ImpossibleDependency
122 122 as soon as the first depended-upon task fails.
123 123 """
124 124
125 125 all=True
126 126 success=True
127 127 failure=True
128 128
129 129 def __init__(self, dependencies=[], all=True, success=True, failure=False):
130 130 if isinstance(dependencies, dict):
131 131 # load from dict
132 132 all = dependencies.get('all', True)
133 133 success = dependencies.get('success', success)
134 134 failure = dependencies.get('failure', failure)
135 135 dependencies = dependencies.get('dependencies', [])
136 136 ids = []
137 137
138 138 # extract ids from various sources:
139 139 if isinstance(dependencies, (basestring, AsyncResult)):
140 140 dependencies = [dependencies]
141 141 for d in dependencies:
142 142 if isinstance(d, basestring):
143 143 ids.append(d)
144 144 elif isinstance(d, AsyncResult):
145 145 ids.extend(d.msg_ids)
146 146 else:
147 147 raise TypeError("invalid dependency type: %r"%type(d))
148 148
149 149 set.__init__(self, ids)
150 150 self.all = all
151 151 if not (success or failure):
152 152 raise ValueError("Must depend on at least one of successes or failures!")
153 153 self.success=success
154 154 self.failure = failure
155 155
156 156 def check(self, completed, failed=None):
157 157 """check whether our dependencies have been met."""
158 158 if len(self) == 0:
159 159 return True
160 160 against = set()
161 161 if self.success:
162 162 against = completed
163 163 if failed is not None and self.failure:
164 164 against = against.union(failed)
165 165 if self.all:
166 166 return self.issubset(against)
167 167 else:
168 168 return not self.isdisjoint(against)
169 169
170 170 def unreachable(self, completed, failed=None):
171 171 """return whether this dependency has become impossible."""
172 172 if len(self) == 0:
173 173 return False
174 174 against = set()
175 175 if not self.success:
176 176 against = completed
177 177 if failed is not None and not self.failure:
178 178 against = against.union(failed)
179 179 if self.all:
180 180 return not self.isdisjoint(against)
181 181 else:
182 182 return self.issubset(against)
183 183
184 184
185 185 def as_dict(self):
186 186 """Represent this dependency as a dict. For json compatibility."""
187 187 return dict(
188 188 dependencies=list(self),
189 189 all=self.all,
190 190 success=self.success,
191 191 failure=self.failure
192 192 )
193 193
194 194
195 195 __all__ = ['depend', 'require', 'dependent', 'Dependency']
196 196
1 NO CONTENT: file renamed from IPython/parallel/dictdb.py to IPython/parallel/controller/dictdb.py
@@ -1,164 +1,163 b''
1 1 #!/usr/bin/env python
2 2 """
3 3 A multi-heart Heartbeat system using PUB and XREP sockets. pings are sent out on the PUB,
4 4 and hearts are tracked based on their XREQ identities.
5 5 """
6 6 #-----------------------------------------------------------------------------
7 7 # Copyright (C) 2010-2011 The IPython Development Team
8 8 #
9 9 # Distributed under the terms of the BSD License. The full license is in
10 10 # the file COPYING, distributed as part of this software.
11 11 #-----------------------------------------------------------------------------
12 12
13 13 from __future__ import print_function
14 14 import time
15 import logging
16 15 import uuid
17 16
18 17 import zmq
19 18 from zmq.devices import ProcessDevice,ThreadDevice
20 19 from zmq.eventloop import ioloop, zmqstream
21 20
22 21 from IPython.utils.traitlets import Set, Instance, CFloat, Bool
23 from .factory import LoggingFactory
22 from IPython.parallel.factory import LoggingFactory
24 23
25 24 class Heart(object):
26 25 """A basic heart object for responding to a HeartMonitor.
27 26 This is a simple wrapper with defaults for the most common
28 27 Device model for responding to heartbeats.
29 28
30 29 It simply builds a threadsafe zmq.FORWARDER Device, defaulting to using
31 30 SUB/XREQ for in/out.
32 31
33 32 You can specify the XREQ's IDENTITY via the optional heart_id argument."""
34 33 device=None
35 34 id=None
36 35 def __init__(self, in_addr, out_addr, in_type=zmq.SUB, out_type=zmq.XREQ, heart_id=None):
37 36 self.device = ThreadDevice(zmq.FORWARDER, in_type, out_type)
38 37 self.device.daemon=True
39 38 self.device.connect_in(in_addr)
40 39 self.device.connect_out(out_addr)
41 40 if in_type == zmq.SUB:
42 41 self.device.setsockopt_in(zmq.SUBSCRIBE, "")
43 42 if heart_id is None:
44 43 heart_id = str(uuid.uuid4())
45 44 self.device.setsockopt_out(zmq.IDENTITY, heart_id)
46 45 self.id = heart_id
47 46
48 47 def start(self):
49 48 return self.device.start()
50 49
51 50 class HeartMonitor(LoggingFactory):
52 51 """A basic HeartMonitor class
53 52 pingstream: a PUB stream
54 53 pongstream: an XREP stream
55 54 period: the period of the heartbeat in milliseconds"""
56 55
57 56 period=CFloat(1000, config=True) # in milliseconds
58 57
59 58 pingstream=Instance('zmq.eventloop.zmqstream.ZMQStream')
60 59 pongstream=Instance('zmq.eventloop.zmqstream.ZMQStream')
61 60 loop = Instance('zmq.eventloop.ioloop.IOLoop')
62 61 def _loop_default(self):
63 62 return ioloop.IOLoop.instance()
64 63 debug=Bool(False)
65 64
66 65 # not settable:
67 66 hearts=Set()
68 67 responses=Set()
69 68 on_probation=Set()
70 69 last_ping=CFloat(0)
71 70 _new_handlers = Set()
72 71 _failure_handlers = Set()
73 72 lifetime = CFloat(0)
74 73 tic = CFloat(0)
75 74
76 75 def __init__(self, **kwargs):
77 76 super(HeartMonitor, self).__init__(**kwargs)
78 77
79 78 self.pongstream.on_recv(self.handle_pong)
80 79
81 80 def start(self):
82 81 self.caller = ioloop.PeriodicCallback(self.beat, self.period, self.loop)
83 82 self.caller.start()
84 83
85 84 def add_new_heart_handler(self, handler):
86 85 """add a new handler for new hearts"""
87 86 self.log.debug("heartbeat::new_heart_handler: %s"%handler)
88 87 self._new_handlers.add(handler)
89 88
90 89 def add_heart_failure_handler(self, handler):
91 90 """add a new handler for heart failure"""
92 91 self.log.debug("heartbeat::new heart failure handler: %s"%handler)
93 92 self._failure_handlers.add(handler)
94 93
95 94 def beat(self):
96 95 self.pongstream.flush()
97 96 self.last_ping = self.lifetime
98 97
99 98 toc = time.time()
100 99 self.lifetime += toc-self.tic
101 100 self.tic = toc
102 101 # self.log.debug("heartbeat::%s"%self.lifetime)
103 102 goodhearts = self.hearts.intersection(self.responses)
104 103 missed_beats = self.hearts.difference(goodhearts)
105 104 heartfailures = self.on_probation.intersection(missed_beats)
106 105 newhearts = self.responses.difference(goodhearts)
107 106 map(self.handle_new_heart, newhearts)
108 107 map(self.handle_heart_failure, heartfailures)
109 108 self.on_probation = missed_beats.intersection(self.hearts)
110 109 self.responses = set()
111 110 # print self.on_probation, self.hearts
112 111 # self.log.debug("heartbeat::beat %.3f, %i beating hearts"%(self.lifetime, len(self.hearts)))
113 112 self.pingstream.send(str(self.lifetime))
114 113
115 114 def handle_new_heart(self, heart):
116 115 if self._new_handlers:
117 116 for handler in self._new_handlers:
118 117 handler(heart)
119 118 else:
120 119 self.log.info("heartbeat::yay, got new heart %s!"%heart)
121 120 self.hearts.add(heart)
122 121
123 122 def handle_heart_failure(self, heart):
124 123 if self._failure_handlers:
125 124 for handler in self._failure_handlers:
126 125 try:
127 126 handler(heart)
128 127 except Exception as e:
129 128 self.log.error("heartbeat::Bad Handler! %s"%handler, exc_info=True)
130 129 pass
131 130 else:
132 131 self.log.info("heartbeat::Heart %s failed :("%heart)
133 132 self.hearts.remove(heart)
134 133
135 134
136 135 def handle_pong(self, msg):
137 136 "a heart just beat"
138 137 if msg[1] == str(self.lifetime):
139 138 delta = time.time()-self.tic
140 139 # self.log.debug("heartbeat::heart %r took %.2f ms to respond"%(msg[0], 1000*delta))
141 140 self.responses.add(msg[0])
142 141 elif msg[1] == str(self.last_ping):
143 142 delta = time.time()-self.tic + (self.lifetime-self.last_ping)
144 143 self.log.warn("heartbeat::heart %r missed a beat, and took %.2f ms to respond"%(msg[0], 1000*delta))
145 144 self.responses.add(msg[0])
146 145 else:
147 146 self.log.warn("heartbeat::got bad heartbeat (possibly old?): %s (current=%.3f)"%
148 147 (msg[1],self.lifetime))
149 148
150 149
151 150 if __name__ == '__main__':
152 151 loop = ioloop.IOLoop.instance()
153 152 context = zmq.Context()
154 153 pub = context.socket(zmq.PUB)
155 154 pub.bind('tcp://127.0.0.1:5555')
156 155 xrep = context.socket(zmq.XREP)
157 156 xrep.bind('tcp://127.0.0.1:5556')
158 157
159 158 outstream = zmqstream.ZMQStream(pub, loop)
160 159 instream = zmqstream.ZMQStream(xrep, loop)
161 160
162 161 hb = HeartMonitor(loop, outstream, instream)
163 162
164 163 loop.start()
@@ -1,1090 +1,1089 b''
1 1 #!/usr/bin/env python
2 2 """The IPython Controller Hub with 0MQ
3 3 This is the master object that handles connections from engines and clients,
4 4 and monitors traffic through the various queues.
5 5 """
6 6 #-----------------------------------------------------------------------------
7 7 # Copyright (C) 2010 The IPython Development Team
8 8 #
9 9 # Distributed under the terms of the BSD License. The full license is in
10 10 # the file COPYING, distributed as part of this software.
11 11 #-----------------------------------------------------------------------------
12 12
13 13 #-----------------------------------------------------------------------------
14 14 # Imports
15 15 #-----------------------------------------------------------------------------
16 16 from __future__ import print_function
17 17
18 18 import sys
19 19 import time
20 20 from datetime import datetime
21 21
22 22 import zmq
23 23 from zmq.eventloop import ioloop
24 24 from zmq.eventloop.zmqstream import ZMQStream
25 25
26 26 # internal:
27 27 from IPython.utils.importstring import import_item
28 28 from IPython.utils.traitlets import HasTraits, Instance, Int, CStr, Str, Dict, Set, List, Bool
29 29
30 from .entry_point import select_random_ports
31 from .factory import RegistrationFactory, LoggingFactory
30 from IPython.parallel import error
31 from IPython.parallel.factory import RegistrationFactory, LoggingFactory
32 from IPython.parallel.util import select_random_ports, validate_url_container, ISO8601
32 33
33 from . import error
34 34 from .heartmonitor import HeartMonitor
35 from .util import validate_url_container, ISO8601
36 35
37 36 #-----------------------------------------------------------------------------
38 37 # Code
39 38 #-----------------------------------------------------------------------------
40 39
41 40 def _passer(*args, **kwargs):
42 41 return
43 42
44 43 def _printer(*args, **kwargs):
45 44 print (args)
46 45 print (kwargs)
47 46
48 47 def empty_record():
49 48 """Return an empty dict with all record keys."""
50 49 return {
51 50 'msg_id' : None,
52 51 'header' : None,
53 52 'content': None,
54 53 'buffers': None,
55 54 'submitted': None,
56 55 'client_uuid' : None,
57 56 'engine_uuid' : None,
58 57 'started': None,
59 58 'completed': None,
60 59 'resubmitted': None,
61 60 'result_header' : None,
62 61 'result_content' : None,
63 62 'result_buffers' : None,
64 63 'queue' : None,
65 64 'pyin' : None,
66 65 'pyout': None,
67 66 'pyerr': None,
68 67 'stdout': '',
69 68 'stderr': '',
70 69 }
71 70
72 71 def init_record(msg):
73 72 """Initialize a TaskRecord based on a request."""
74 73 header = msg['header']
75 74 return {
76 75 'msg_id' : header['msg_id'],
77 76 'header' : header,
78 77 'content': msg['content'],
79 78 'buffers': msg['buffers'],
80 79 'submitted': datetime.strptime(header['date'], ISO8601),
81 80 'client_uuid' : None,
82 81 'engine_uuid' : None,
83 82 'started': None,
84 83 'completed': None,
85 84 'resubmitted': None,
86 85 'result_header' : None,
87 86 'result_content' : None,
88 87 'result_buffers' : None,
89 88 'queue' : None,
90 89 'pyin' : None,
91 90 'pyout': None,
92 91 'pyerr': None,
93 92 'stdout': '',
94 93 'stderr': '',
95 94 }
96 95
97 96
98 97 class EngineConnector(HasTraits):
99 98 """A simple object for accessing the various zmq connections of an object.
100 99 Attributes are:
101 100 id (int): engine ID
102 101 uuid (str): uuid (unused?)
103 102 queue (str): identity of queue's XREQ socket
104 103 registration (str): identity of registration XREQ socket
105 104 heartbeat (str): identity of heartbeat XREQ socket
106 105 """
107 106 id=Int(0)
108 107 queue=Str()
109 108 control=Str()
110 109 registration=Str()
111 110 heartbeat=Str()
112 111 pending=Set()
113 112
114 113 class HubFactory(RegistrationFactory):
115 114 """The Configurable for setting up a Hub."""
116 115
117 116 # name of a scheduler scheme
118 117 scheme = Str('leastload', config=True)
119 118
120 119 # port-pairs for monitoredqueues:
121 120 hb = Instance(list, config=True)
122 121 def _hb_default(self):
123 122 return select_random_ports(2)
124 123
125 124 mux = Instance(list, config=True)
126 125 def _mux_default(self):
127 126 return select_random_ports(2)
128 127
129 128 task = Instance(list, config=True)
130 129 def _task_default(self):
131 130 return select_random_ports(2)
132 131
133 132 control = Instance(list, config=True)
134 133 def _control_default(self):
135 134 return select_random_ports(2)
136 135
137 136 iopub = Instance(list, config=True)
138 137 def _iopub_default(self):
139 138 return select_random_ports(2)
140 139
141 140 # single ports:
142 141 mon_port = Instance(int, config=True)
143 142 def _mon_port_default(self):
144 143 return select_random_ports(1)[0]
145 144
146 145 notifier_port = Instance(int, config=True)
147 146 def _notifier_port_default(self):
148 147 return select_random_ports(1)[0]
149 148
150 149 ping = Int(1000, config=True) # ping frequency
151 150
152 151 engine_ip = CStr('127.0.0.1', config=True)
153 152 engine_transport = CStr('tcp', config=True)
154 153
155 154 client_ip = CStr('127.0.0.1', config=True)
156 155 client_transport = CStr('tcp', config=True)
157 156
158 157 monitor_ip = CStr('127.0.0.1', config=True)
159 158 monitor_transport = CStr('tcp', config=True)
160 159
161 160 monitor_url = CStr('')
162 161
163 db_class = CStr('IPython.parallel.dictdb.DictDB', config=True)
162 db_class = CStr('IPython.parallel.controller.dictdb.DictDB', config=True)
164 163
165 164 # not configurable
166 db = Instance('IPython.parallel.dictdb.BaseDB')
167 heartmonitor = Instance('IPython.parallel.heartmonitor.HeartMonitor')
165 db = Instance('IPython.parallel.controller.dictdb.BaseDB')
166 heartmonitor = Instance('IPython.parallel.controller.heartmonitor.HeartMonitor')
168 167 subconstructors = List()
169 168 _constructed = Bool(False)
170 169
171 170 def _ip_changed(self, name, old, new):
172 171 self.engine_ip = new
173 172 self.client_ip = new
174 173 self.monitor_ip = new
175 174 self._update_monitor_url()
176 175
177 176 def _update_monitor_url(self):
178 177 self.monitor_url = "%s://%s:%i"%(self.monitor_transport, self.monitor_ip, self.mon_port)
179 178
180 179 def _transport_changed(self, name, old, new):
181 180 self.engine_transport = new
182 181 self.client_transport = new
183 182 self.monitor_transport = new
184 183 self._update_monitor_url()
185 184
186 185 def __init__(self, **kwargs):
187 186 super(HubFactory, self).__init__(**kwargs)
188 187 self._update_monitor_url()
189 188 # self.on_trait_change(self._sync_ips, 'ip')
190 189 # self.on_trait_change(self._sync_transports, 'transport')
191 190 self.subconstructors.append(self.construct_hub)
192 191
193 192
194 193 def construct(self):
195 194 assert not self._constructed, "already constructed!"
196 195
197 196 for subc in self.subconstructors:
198 197 subc()
199 198
200 199 self._constructed = True
201 200
202 201
203 202 def start(self):
204 203 assert self._constructed, "must be constructed by self.construct() first!"
205 204 self.heartmonitor.start()
206 205 self.log.info("Heartmonitor started")
207 206
208 207 def construct_hub(self):
209 208 """construct"""
210 209 client_iface = "%s://%s:"%(self.client_transport, self.client_ip) + "%i"
211 210 engine_iface = "%s://%s:"%(self.engine_transport, self.engine_ip) + "%i"
212 211
213 212 ctx = self.context
214 213 loop = self.loop
215 214
216 215 # Registrar socket
217 216 q = ZMQStream(ctx.socket(zmq.XREP), loop)
218 217 q.bind(client_iface % self.regport)
219 218 self.log.info("Hub listening on %s for registration."%(client_iface%self.regport))
220 219 if self.client_ip != self.engine_ip:
221 220 q.bind(engine_iface % self.regport)
222 221 self.log.info("Hub listening on %s for registration."%(engine_iface%self.regport))
223 222
224 223 ### Engine connections ###
225 224
226 225 # heartbeat
227 226 hpub = ctx.socket(zmq.PUB)
228 227 hpub.bind(engine_iface % self.hb[0])
229 228 hrep = ctx.socket(zmq.XREP)
230 229 hrep.bind(engine_iface % self.hb[1])
231 230 self.heartmonitor = HeartMonitor(loop=loop, pingstream=ZMQStream(hpub,loop), pongstream=ZMQStream(hrep,loop),
232 231 period=self.ping, logname=self.log.name)
233 232
234 233 ### Client connections ###
235 234 # Notifier socket
236 235 n = ZMQStream(ctx.socket(zmq.PUB), loop)
237 236 n.bind(client_iface%self.notifier_port)
238 237
239 238 ### build and launch the queues ###
240 239
241 240 # monitor socket
242 241 sub = ctx.socket(zmq.SUB)
243 242 sub.setsockopt(zmq.SUBSCRIBE, "")
244 243 sub.bind(self.monitor_url)
245 244 sub.bind('inproc://monitor')
246 245 sub = ZMQStream(sub, loop)
247 246
248 247 # connect the db
249 248 self.log.info('Hub using DB backend: %r'%(self.db_class.split()[-1]))
250 249 # cdir = self.config.Global.cluster_dir
251 250 self.db = import_item(self.db_class)(session=self.session.session, config=self.config)
252 251 time.sleep(.25)
253 252
254 253 # build connection dicts
255 254 self.engine_info = {
256 255 'control' : engine_iface%self.control[1],
257 256 'mux': engine_iface%self.mux[1],
258 257 'heartbeat': (engine_iface%self.hb[0], engine_iface%self.hb[1]),
259 258 'task' : engine_iface%self.task[1],
260 259 'iopub' : engine_iface%self.iopub[1],
261 260 # 'monitor' : engine_iface%self.mon_port,
262 261 }
263 262
264 263 self.client_info = {
265 264 'control' : client_iface%self.control[0],
266 265 'mux': client_iface%self.mux[0],
267 266 'task' : (self.scheme, client_iface%self.task[0]),
268 267 'iopub' : client_iface%self.iopub[0],
269 268 'notification': client_iface%self.notifier_port
270 269 }
271 270 self.log.debug("Hub engine addrs: %s"%self.engine_info)
272 271 self.log.debug("Hub client addrs: %s"%self.client_info)
273 272 self.hub = Hub(loop=loop, session=self.session, monitor=sub, heartmonitor=self.heartmonitor,
274 273 query=q, notifier=n, db=self.db,
275 274 engine_info=self.engine_info, client_info=self.client_info,
276 275 logname=self.log.name)
277 276
278 277
279 278 class Hub(LoggingFactory):
280 279 """The IPython Controller Hub with 0MQ connections
281 280
282 281 Parameters
283 282 ==========
284 283 loop: zmq IOLoop instance
285 284 session: StreamSession object
286 285 <removed> context: zmq context for creating new connections (?)
287 286 queue: ZMQStream for monitoring the command queue (SUB)
288 287 query: ZMQStream for engine registration and client queries requests (XREP)
289 288 heartbeat: HeartMonitor object checking the pulse of the engines
290 289 notifier: ZMQStream for broadcasting engine registration changes (PUB)
291 290 db: connection to db for out of memory logging of commands
292 291 NotImplemented
293 292 engine_info: dict of zmq connection information for engines to connect
294 293 to the queues.
295 294 client_info: dict of zmq connection information for engines to connect
296 295 to the queues.
297 296 """
298 297 # internal data structures:
299 298 ids=Set() # engine IDs
300 299 keytable=Dict()
301 300 by_ident=Dict()
302 301 engines=Dict()
303 302 clients=Dict()
304 303 hearts=Dict()
305 304 pending=Set()
306 305 queues=Dict() # pending msg_ids keyed by engine_id
307 306 tasks=Dict() # pending msg_ids submitted as tasks, keyed by client_id
308 307 completed=Dict() # completed msg_ids keyed by engine_id
309 308 all_completed=Set() # completed msg_ids keyed by engine_id
310 309 dead_engines=Set() # completed msg_ids keyed by engine_id
311 310 # mia=None
312 311 incoming_registrations=Dict()
313 312 registration_timeout=Int()
314 313 _idcounter=Int(0)
315 314
316 315 # objects from constructor:
317 316 loop=Instance(ioloop.IOLoop)
318 317 query=Instance(ZMQStream)
319 318 monitor=Instance(ZMQStream)
320 319 heartmonitor=Instance(HeartMonitor)
321 320 notifier=Instance(ZMQStream)
322 321 db=Instance(object)
323 322 client_info=Dict()
324 323 engine_info=Dict()
325 324
326 325
327 326 def __init__(self, **kwargs):
328 327 """
329 328 # universal:
330 329 loop: IOLoop for creating future connections
331 330 session: streamsession for sending serialized data
332 331 # engine:
333 332 queue: ZMQStream for monitoring queue messages
334 333 query: ZMQStream for engine+client registration and client requests
335 334 heartbeat: HeartMonitor object for tracking engines
336 335 # extra:
337 336 db: ZMQStream for db connection (NotImplemented)
338 337 engine_info: zmq address/protocol dict for engine connections
339 338 client_info: zmq address/protocol dict for client connections
340 339 """
341 340
342 341 super(Hub, self).__init__(**kwargs)
343 342 self.registration_timeout = max(5000, 2*self.heartmonitor.period)
344 343
345 344 # validate connection dicts:
346 345 for k,v in self.client_info.iteritems():
347 346 if k == 'task':
348 347 validate_url_container(v[1])
349 348 else:
350 349 validate_url_container(v)
351 350 # validate_url_container(self.client_info)
352 351 validate_url_container(self.engine_info)
353 352
354 353 # register our callbacks
355 354 self.query.on_recv(self.dispatch_query)
356 355 self.monitor.on_recv(self.dispatch_monitor_traffic)
357 356
358 357 self.heartmonitor.add_heart_failure_handler(self.handle_heart_failure)
359 358 self.heartmonitor.add_new_heart_handler(self.handle_new_heart)
360 359
361 360 self.monitor_handlers = { 'in' : self.save_queue_request,
362 361 'out': self.save_queue_result,
363 362 'intask': self.save_task_request,
364 363 'outtask': self.save_task_result,
365 364 'tracktask': self.save_task_destination,
366 365 'incontrol': _passer,
367 366 'outcontrol': _passer,
368 367 'iopub': self.save_iopub_message,
369 368 }
370 369
371 370 self.query_handlers = {'queue_request': self.queue_status,
372 371 'result_request': self.get_results,
373 372 'purge_request': self.purge_results,
374 373 'load_request': self.check_load,
375 374 'resubmit_request': self.resubmit_task,
376 375 'shutdown_request': self.shutdown_request,
377 376 'registration_request' : self.register_engine,
378 377 'unregistration_request' : self.unregister_engine,
379 378 'connection_request': self.connection_request,
380 379 }
381 380
382 381 self.log.info("hub::created hub")
383 382
384 383 @property
385 384 def _next_id(self):
386 385 """gemerate a new ID.
387 386
388 387 No longer reuse old ids, just count from 0."""
389 388 newid = self._idcounter
390 389 self._idcounter += 1
391 390 return newid
392 391 # newid = 0
393 392 # incoming = [id[0] for id in self.incoming_registrations.itervalues()]
394 393 # # print newid, self.ids, self.incoming_registrations
395 394 # while newid in self.ids or newid in incoming:
396 395 # newid += 1
397 396 # return newid
398 397
399 398 #-----------------------------------------------------------------------------
400 399 # message validation
401 400 #-----------------------------------------------------------------------------
402 401
403 402 def _validate_targets(self, targets):
404 403 """turn any valid targets argument into a list of integer ids"""
405 404 if targets is None:
406 405 # default to all
407 406 targets = self.ids
408 407
409 408 if isinstance(targets, (int,str,unicode)):
410 409 # only one target specified
411 410 targets = [targets]
412 411 _targets = []
413 412 for t in targets:
414 413 # map raw identities to ids
415 414 if isinstance(t, (str,unicode)):
416 415 t = self.by_ident.get(t, t)
417 416 _targets.append(t)
418 417 targets = _targets
419 418 bad_targets = [ t for t in targets if t not in self.ids ]
420 419 if bad_targets:
421 420 raise IndexError("No Such Engine: %r"%bad_targets)
422 421 if not targets:
423 422 raise IndexError("No Engines Registered")
424 423 return targets
425 424
426 425 #-----------------------------------------------------------------------------
427 426 # dispatch methods (1 per stream)
428 427 #-----------------------------------------------------------------------------
429 428
430 429 # def dispatch_registration_request(self, msg):
431 430 # """"""
432 431 # self.log.debug("registration::dispatch_register_request(%s)"%msg)
433 432 # idents,msg = self.session.feed_identities(msg)
434 433 # if not idents:
435 434 # self.log.error("Bad Query Message: %s"%msg, exc_info=True)
436 435 # return
437 436 # try:
438 437 # msg = self.session.unpack_message(msg,content=True)
439 438 # except:
440 439 # self.log.error("registration::got bad registration message: %s"%msg, exc_info=True)
441 440 # return
442 441 #
443 442 # msg_type = msg['msg_type']
444 443 # content = msg['content']
445 444 #
446 445 # handler = self.query_handlers.get(msg_type, None)
447 446 # if handler is None:
448 447 # self.log.error("registration::got bad registration message: %s"%msg)
449 448 # else:
450 449 # handler(idents, msg)
451 450
452 451 def dispatch_monitor_traffic(self, msg):
453 452 """all ME and Task queue messages come through here, as well as
454 453 IOPub traffic."""
455 454 self.log.debug("monitor traffic: %s"%msg[:2])
456 455 switch = msg[0]
457 456 idents, msg = self.session.feed_identities(msg[1:])
458 457 if not idents:
459 458 self.log.error("Bad Monitor Message: %s"%msg)
460 459 return
461 460 handler = self.monitor_handlers.get(switch, None)
462 461 if handler is not None:
463 462 handler(idents, msg)
464 463 else:
465 464 self.log.error("Invalid monitor topic: %s"%switch)
466 465
467 466
468 467 def dispatch_query(self, msg):
469 468 """Route registration requests and queries from clients."""
470 469 idents, msg = self.session.feed_identities(msg)
471 470 if not idents:
472 471 self.log.error("Bad Query Message: %s"%msg)
473 472 return
474 473 client_id = idents[0]
475 474 try:
476 475 msg = self.session.unpack_message(msg, content=True)
477 476 except:
478 477 content = error.wrap_exception()
479 478 self.log.error("Bad Query Message: %s"%msg, exc_info=True)
480 479 self.session.send(self.query, "hub_error", ident=client_id,
481 480 content=content)
482 481 return
483 482
484 483 # print client_id, header, parent, content
485 484 #switch on message type:
486 485 msg_type = msg['msg_type']
487 486 self.log.info("client::client %s requested %s"%(client_id, msg_type))
488 487 handler = self.query_handlers.get(msg_type, None)
489 488 try:
490 489 assert handler is not None, "Bad Message Type: %s"%msg_type
491 490 except:
492 491 content = error.wrap_exception()
493 492 self.log.error("Bad Message Type: %s"%msg_type, exc_info=True)
494 493 self.session.send(self.query, "hub_error", ident=client_id,
495 494 content=content)
496 495 return
497 496 else:
498 497 handler(idents, msg)
499 498
500 499 def dispatch_db(self, msg):
501 500 """"""
502 501 raise NotImplementedError
503 502
504 503 #---------------------------------------------------------------------------
505 504 # handler methods (1 per event)
506 505 #---------------------------------------------------------------------------
507 506
508 507 #----------------------- Heartbeat --------------------------------------
509 508
510 509 def handle_new_heart(self, heart):
511 510 """handler to attach to heartbeater.
512 511 Called when a new heart starts to beat.
513 512 Triggers completion of registration."""
514 513 self.log.debug("heartbeat::handle_new_heart(%r)"%heart)
515 514 if heart not in self.incoming_registrations:
516 515 self.log.info("heartbeat::ignoring new heart: %r"%heart)
517 516 else:
518 517 self.finish_registration(heart)
519 518
520 519
521 520 def handle_heart_failure(self, heart):
522 521 """handler to attach to heartbeater.
523 522 called when a previously registered heart fails to respond to beat request.
524 523 triggers unregistration"""
525 524 self.log.debug("heartbeat::handle_heart_failure(%r)"%heart)
526 525 eid = self.hearts.get(heart, None)
527 526 queue = self.engines[eid].queue
528 527 if eid is None:
529 528 self.log.info("heartbeat::ignoring heart failure %r"%heart)
530 529 else:
531 530 self.unregister_engine(heart, dict(content=dict(id=eid, queue=queue)))
532 531
533 532 #----------------------- MUX Queue Traffic ------------------------------
534 533
535 534 def save_queue_request(self, idents, msg):
536 535 if len(idents) < 2:
537 536 self.log.error("invalid identity prefix: %s"%idents)
538 537 return
539 538 queue_id, client_id = idents[:2]
540 539 try:
541 540 msg = self.session.unpack_message(msg, content=False)
542 541 except:
543 542 self.log.error("queue::client %r sent invalid message to %r: %s"%(client_id, queue_id, msg), exc_info=True)
544 543 return
545 544
546 545 eid = self.by_ident.get(queue_id, None)
547 546 if eid is None:
548 547 self.log.error("queue::target %r not registered"%queue_id)
549 548 self.log.debug("queue:: valid are: %s"%(self.by_ident.keys()))
550 549 return
551 550
552 551 header = msg['header']
553 552 msg_id = header['msg_id']
554 553 record = init_record(msg)
555 554 record['engine_uuid'] = queue_id
556 555 record['client_uuid'] = client_id
557 556 record['queue'] = 'mux'
558 557
559 558 try:
560 559 # it's posible iopub arrived first:
561 560 existing = self.db.get_record(msg_id)
562 561 for key,evalue in existing.iteritems():
563 562 rvalue = record[key]
564 563 if evalue and rvalue and evalue != rvalue:
565 564 self.log.error("conflicting initial state for record: %s:%s <> %s"%(msg_id, rvalue, evalue))
566 565 elif evalue and not rvalue:
567 566 record[key] = evalue
568 567 self.db.update_record(msg_id, record)
569 568 except KeyError:
570 569 self.db.add_record(msg_id, record)
571 570
572 571 self.pending.add(msg_id)
573 572 self.queues[eid].append(msg_id)
574 573
575 574 def save_queue_result(self, idents, msg):
576 575 if len(idents) < 2:
577 576 self.log.error("invalid identity prefix: %s"%idents)
578 577 return
579 578
580 579 client_id, queue_id = idents[:2]
581 580 try:
582 581 msg = self.session.unpack_message(msg, content=False)
583 582 except:
584 583 self.log.error("queue::engine %r sent invalid message to %r: %s"%(
585 584 queue_id,client_id, msg), exc_info=True)
586 585 return
587 586
588 587 eid = self.by_ident.get(queue_id, None)
589 588 if eid is None:
590 589 self.log.error("queue::unknown engine %r is sending a reply: "%queue_id)
591 590 # self.log.debug("queue:: %s"%msg[2:])
592 591 return
593 592
594 593 parent = msg['parent_header']
595 594 if not parent:
596 595 return
597 596 msg_id = parent['msg_id']
598 597 if msg_id in self.pending:
599 598 self.pending.remove(msg_id)
600 599 self.all_completed.add(msg_id)
601 600 self.queues[eid].remove(msg_id)
602 601 self.completed[eid].append(msg_id)
603 602 elif msg_id not in self.all_completed:
604 603 # it could be a result from a dead engine that died before delivering the
605 604 # result
606 605 self.log.warn("queue:: unknown msg finished %s"%msg_id)
607 606 return
608 607 # update record anyway, because the unregistration could have been premature
609 608 rheader = msg['header']
610 609 completed = datetime.strptime(rheader['date'], ISO8601)
611 610 started = rheader.get('started', None)
612 611 if started is not None:
613 612 started = datetime.strptime(started, ISO8601)
614 613 result = {
615 614 'result_header' : rheader,
616 615 'result_content': msg['content'],
617 616 'started' : started,
618 617 'completed' : completed
619 618 }
620 619
621 620 result['result_buffers'] = msg['buffers']
622 621 self.db.update_record(msg_id, result)
623 622
624 623
625 624 #--------------------- Task Queue Traffic ------------------------------
626 625
627 626 def save_task_request(self, idents, msg):
628 627 """Save the submission of a task."""
629 628 client_id = idents[0]
630 629
631 630 try:
632 631 msg = self.session.unpack_message(msg, content=False)
633 632 except:
634 633 self.log.error("task::client %r sent invalid task message: %s"%(
635 634 client_id, msg), exc_info=True)
636 635 return
637 636 record = init_record(msg)
638 637
639 638 record['client_uuid'] = client_id
640 639 record['queue'] = 'task'
641 640 header = msg['header']
642 641 msg_id = header['msg_id']
643 642 self.pending.add(msg_id)
644 643 try:
645 644 # it's posible iopub arrived first:
646 645 existing = self.db.get_record(msg_id)
647 646 for key,evalue in existing.iteritems():
648 647 rvalue = record[key]
649 648 if evalue and rvalue and evalue != rvalue:
650 649 self.log.error("conflicting initial state for record: %s:%s <> %s"%(msg_id, rvalue, evalue))
651 650 elif evalue and not rvalue:
652 651 record[key] = evalue
653 652 self.db.update_record(msg_id, record)
654 653 except KeyError:
655 654 self.db.add_record(msg_id, record)
656 655
657 656 def save_task_result(self, idents, msg):
658 657 """save the result of a completed task."""
659 658 client_id = idents[0]
660 659 try:
661 660 msg = self.session.unpack_message(msg, content=False)
662 661 except:
663 662 self.log.error("task::invalid task result message send to %r: %s"%(
664 663 client_id, msg), exc_info=True)
665 664 raise
666 665 return
667 666
668 667 parent = msg['parent_header']
669 668 if not parent:
670 669 # print msg
671 670 self.log.warn("Task %r had no parent!"%msg)
672 671 return
673 672 msg_id = parent['msg_id']
674 673
675 674 header = msg['header']
676 675 engine_uuid = header.get('engine', None)
677 676 eid = self.by_ident.get(engine_uuid, None)
678 677
679 678 if msg_id in self.pending:
680 679 self.pending.remove(msg_id)
681 680 self.all_completed.add(msg_id)
682 681 if eid is not None:
683 682 self.completed[eid].append(msg_id)
684 683 if msg_id in self.tasks[eid]:
685 684 self.tasks[eid].remove(msg_id)
686 685 completed = datetime.strptime(header['date'], ISO8601)
687 686 started = header.get('started', None)
688 687 if started is not None:
689 688 started = datetime.strptime(started, ISO8601)
690 689 result = {
691 690 'result_header' : header,
692 691 'result_content': msg['content'],
693 692 'started' : started,
694 693 'completed' : completed,
695 694 'engine_uuid': engine_uuid
696 695 }
697 696
698 697 result['result_buffers'] = msg['buffers']
699 698 self.db.update_record(msg_id, result)
700 699
701 700 else:
702 701 self.log.debug("task::unknown task %s finished"%msg_id)
703 702
704 703 def save_task_destination(self, idents, msg):
705 704 try:
706 705 msg = self.session.unpack_message(msg, content=True)
707 706 except:
708 707 self.log.error("task::invalid task tracking message", exc_info=True)
709 708 return
710 709 content = msg['content']
711 710 # print (content)
712 711 msg_id = content['msg_id']
713 712 engine_uuid = content['engine_id']
714 713 eid = self.by_ident[engine_uuid]
715 714
716 715 self.log.info("task::task %s arrived on %s"%(msg_id, eid))
717 716 # if msg_id in self.mia:
718 717 # self.mia.remove(msg_id)
719 718 # else:
720 719 # self.log.debug("task::task %s not listed as MIA?!"%(msg_id))
721 720
722 721 self.tasks[eid].append(msg_id)
723 722 # self.pending[msg_id][1].update(received=datetime.now(),engine=(eid,engine_uuid))
724 723 self.db.update_record(msg_id, dict(engine_uuid=engine_uuid))
725 724
726 725 def mia_task_request(self, idents, msg):
727 726 raise NotImplementedError
728 727 client_id = idents[0]
729 728 # content = dict(mia=self.mia,status='ok')
730 729 # self.session.send('mia_reply', content=content, idents=client_id)
731 730
732 731
733 732 #--------------------- IOPub Traffic ------------------------------
734 733
735 734 def save_iopub_message(self, topics, msg):
736 735 """save an iopub message into the db"""
737 736 # print (topics)
738 737 try:
739 738 msg = self.session.unpack_message(msg, content=True)
740 739 except:
741 740 self.log.error("iopub::invalid IOPub message", exc_info=True)
742 741 return
743 742
744 743 parent = msg['parent_header']
745 744 if not parent:
746 745 self.log.error("iopub::invalid IOPub message: %s"%msg)
747 746 return
748 747 msg_id = parent['msg_id']
749 748 msg_type = msg['msg_type']
750 749 content = msg['content']
751 750
752 751 # ensure msg_id is in db
753 752 try:
754 753 rec = self.db.get_record(msg_id)
755 754 except KeyError:
756 755 rec = empty_record()
757 756 rec['msg_id'] = msg_id
758 757 self.db.add_record(msg_id, rec)
759 758 # stream
760 759 d = {}
761 760 if msg_type == 'stream':
762 761 name = content['name']
763 762 s = rec[name] or ''
764 763 d[name] = s + content['data']
765 764
766 765 elif msg_type == 'pyerr':
767 766 d['pyerr'] = content
768 767 else:
769 768 d[msg_type] = content['data']
770 769
771 770 self.db.update_record(msg_id, d)
772 771
773 772
774 773
775 774 #-------------------------------------------------------------------------
776 775 # Registration requests
777 776 #-------------------------------------------------------------------------
778 777
779 778 def connection_request(self, client_id, msg):
780 779 """Reply with connection addresses for clients."""
781 780 self.log.info("client::client %s connected"%client_id)
782 781 content = dict(status='ok')
783 782 content.update(self.client_info)
784 783 jsonable = {}
785 784 for k,v in self.keytable.iteritems():
786 785 if v not in self.dead_engines:
787 786 jsonable[str(k)] = v
788 787 content['engines'] = jsonable
789 788 self.session.send(self.query, 'connection_reply', content, parent=msg, ident=client_id)
790 789
791 790 def register_engine(self, reg, msg):
792 791 """Register a new engine."""
793 792 content = msg['content']
794 793 try:
795 794 queue = content['queue']
796 795 except KeyError:
797 796 self.log.error("registration::queue not specified", exc_info=True)
798 797 return
799 798 heart = content.get('heartbeat', None)
800 799 """register a new engine, and create the socket(s) necessary"""
801 800 eid = self._next_id
802 801 # print (eid, queue, reg, heart)
803 802
804 803 self.log.debug("registration::register_engine(%i, %r, %r, %r)"%(eid, queue, reg, heart))
805 804
806 805 content = dict(id=eid,status='ok')
807 806 content.update(self.engine_info)
808 807 # check if requesting available IDs:
809 808 if queue in self.by_ident:
810 809 try:
811 810 raise KeyError("queue_id %r in use"%queue)
812 811 except:
813 812 content = error.wrap_exception()
814 813 self.log.error("queue_id %r in use"%queue, exc_info=True)
815 814 elif heart in self.hearts: # need to check unique hearts?
816 815 try:
817 816 raise KeyError("heart_id %r in use"%heart)
818 817 except:
819 818 self.log.error("heart_id %r in use"%heart, exc_info=True)
820 819 content = error.wrap_exception()
821 820 else:
822 821 for h, pack in self.incoming_registrations.iteritems():
823 822 if heart == h:
824 823 try:
825 824 raise KeyError("heart_id %r in use"%heart)
826 825 except:
827 826 self.log.error("heart_id %r in use"%heart, exc_info=True)
828 827 content = error.wrap_exception()
829 828 break
830 829 elif queue == pack[1]:
831 830 try:
832 831 raise KeyError("queue_id %r in use"%queue)
833 832 except:
834 833 self.log.error("queue_id %r in use"%queue, exc_info=True)
835 834 content = error.wrap_exception()
836 835 break
837 836
838 837 msg = self.session.send(self.query, "registration_reply",
839 838 content=content,
840 839 ident=reg)
841 840
842 841 if content['status'] == 'ok':
843 842 if heart in self.heartmonitor.hearts:
844 843 # already beating
845 844 self.incoming_registrations[heart] = (eid,queue,reg[0],None)
846 845 self.finish_registration(heart)
847 846 else:
848 847 purge = lambda : self._purge_stalled_registration(heart)
849 848 dc = ioloop.DelayedCallback(purge, self.registration_timeout, self.loop)
850 849 dc.start()
851 850 self.incoming_registrations[heart] = (eid,queue,reg[0],dc)
852 851 else:
853 852 self.log.error("registration::registration %i failed: %s"%(eid, content['evalue']))
854 853 return eid
855 854
856 855 def unregister_engine(self, ident, msg):
857 856 """Unregister an engine that explicitly requested to leave."""
858 857 try:
859 858 eid = msg['content']['id']
860 859 except:
861 860 self.log.error("registration::bad engine id for unregistration: %s"%ident, exc_info=True)
862 861 return
863 862 self.log.info("registration::unregister_engine(%s)"%eid)
864 863 # print (eid)
865 864 uuid = self.keytable[eid]
866 865 content=dict(id=eid, queue=uuid)
867 866 self.dead_engines.add(uuid)
868 867 # self.ids.remove(eid)
869 868 # uuid = self.keytable.pop(eid)
870 869 #
871 870 # ec = self.engines.pop(eid)
872 871 # self.hearts.pop(ec.heartbeat)
873 872 # self.by_ident.pop(ec.queue)
874 873 # self.completed.pop(eid)
875 874 handleit = lambda : self._handle_stranded_msgs(eid, uuid)
876 875 dc = ioloop.DelayedCallback(handleit, self.registration_timeout, self.loop)
877 876 dc.start()
878 877 ############## TODO: HANDLE IT ################
879 878
880 879 if self.notifier:
881 880 self.session.send(self.notifier, "unregistration_notification", content=content)
882 881
883 882 def _handle_stranded_msgs(self, eid, uuid):
884 883 """Handle messages known to be on an engine when the engine unregisters.
885 884
886 885 It is possible that this will fire prematurely - that is, an engine will
887 886 go down after completing a result, and the client will be notified
888 887 that the result failed and later receive the actual result.
889 888 """
890 889
891 890 outstanding = self.queues[eid]
892 891
893 892 for msg_id in outstanding:
894 893 self.pending.remove(msg_id)
895 894 self.all_completed.add(msg_id)
896 895 try:
897 896 raise error.EngineError("Engine %r died while running task %r"%(eid, msg_id))
898 897 except:
899 898 content = error.wrap_exception()
900 899 # build a fake header:
901 900 header = {}
902 901 header['engine'] = uuid
903 902 header['date'] = datetime.now().strftime(ISO8601)
904 903 rec = dict(result_content=content, result_header=header, result_buffers=[])
905 904 rec['completed'] = header['date']
906 905 rec['engine_uuid'] = uuid
907 906 self.db.update_record(msg_id, rec)
908 907
909 908 def finish_registration(self, heart):
910 909 """Second half of engine registration, called after our HeartMonitor
911 910 has received a beat from the Engine's Heart."""
912 911 try:
913 912 (eid,queue,reg,purge) = self.incoming_registrations.pop(heart)
914 913 except KeyError:
915 914 self.log.error("registration::tried to finish nonexistant registration", exc_info=True)
916 915 return
917 916 self.log.info("registration::finished registering engine %i:%r"%(eid,queue))
918 917 if purge is not None:
919 918 purge.stop()
920 919 control = queue
921 920 self.ids.add(eid)
922 921 self.keytable[eid] = queue
923 922 self.engines[eid] = EngineConnector(id=eid, queue=queue, registration=reg,
924 923 control=control, heartbeat=heart)
925 924 self.by_ident[queue] = eid
926 925 self.queues[eid] = list()
927 926 self.tasks[eid] = list()
928 927 self.completed[eid] = list()
929 928 self.hearts[heart] = eid
930 929 content = dict(id=eid, queue=self.engines[eid].queue)
931 930 if self.notifier:
932 931 self.session.send(self.notifier, "registration_notification", content=content)
933 932 self.log.info("engine::Engine Connected: %i"%eid)
934 933
935 934 def _purge_stalled_registration(self, heart):
936 935 if heart in self.incoming_registrations:
937 936 eid = self.incoming_registrations.pop(heart)[0]
938 937 self.log.info("registration::purging stalled registration: %i"%eid)
939 938 else:
940 939 pass
941 940
942 941 #-------------------------------------------------------------------------
943 942 # Client Requests
944 943 #-------------------------------------------------------------------------
945 944
946 945 def shutdown_request(self, client_id, msg):
947 946 """handle shutdown request."""
948 947 self.session.send(self.query, 'shutdown_reply', content={'status': 'ok'}, ident=client_id)
949 948 # also notify other clients of shutdown
950 949 self.session.send(self.notifier, 'shutdown_notice', content={'status': 'ok'})
951 950 dc = ioloop.DelayedCallback(lambda : self._shutdown(), 1000, self.loop)
952 951 dc.start()
953 952
954 953 def _shutdown(self):
955 954 self.log.info("hub::hub shutting down.")
956 955 time.sleep(0.1)
957 956 sys.exit(0)
958 957
959 958
960 959 def check_load(self, client_id, msg):
961 960 content = msg['content']
962 961 try:
963 962 targets = content['targets']
964 963 targets = self._validate_targets(targets)
965 964 except:
966 965 content = error.wrap_exception()
967 966 self.session.send(self.query, "hub_error",
968 967 content=content, ident=client_id)
969 968 return
970 969
971 970 content = dict(status='ok')
972 971 # loads = {}
973 972 for t in targets:
974 973 content[bytes(t)] = len(self.queues[t])+len(self.tasks[t])
975 974 self.session.send(self.query, "load_reply", content=content, ident=client_id)
976 975
977 976
978 977 def queue_status(self, client_id, msg):
979 978 """Return the Queue status of one or more targets.
980 979 if verbose: return the msg_ids
981 980 else: return len of each type.
982 981 keys: queue (pending MUX jobs)
983 982 tasks (pending Task jobs)
984 983 completed (finished jobs from both queues)"""
985 984 content = msg['content']
986 985 targets = content['targets']
987 986 try:
988 987 targets = self._validate_targets(targets)
989 988 except:
990 989 content = error.wrap_exception()
991 990 self.session.send(self.query, "hub_error",
992 991 content=content, ident=client_id)
993 992 return
994 993 verbose = content.get('verbose', False)
995 994 content = dict(status='ok')
996 995 for t in targets:
997 996 queue = self.queues[t]
998 997 completed = self.completed[t]
999 998 tasks = self.tasks[t]
1000 999 if not verbose:
1001 1000 queue = len(queue)
1002 1001 completed = len(completed)
1003 1002 tasks = len(tasks)
1004 1003 content[bytes(t)] = {'queue': queue, 'completed': completed , 'tasks': tasks}
1005 1004 # pending
1006 1005 self.session.send(self.query, "queue_reply", content=content, ident=client_id)
1007 1006
1008 1007 def purge_results(self, client_id, msg):
1009 1008 """Purge results from memory. This method is more valuable before we move
1010 1009 to a DB based message storage mechanism."""
1011 1010 content = msg['content']
1012 1011 msg_ids = content.get('msg_ids', [])
1013 1012 reply = dict(status='ok')
1014 1013 if msg_ids == 'all':
1015 1014 self.db.drop_matching_records(dict(completed={'$ne':None}))
1016 1015 else:
1017 1016 for msg_id in msg_ids:
1018 1017 if msg_id in self.all_completed:
1019 1018 self.db.drop_record(msg_id)
1020 1019 else:
1021 1020 if msg_id in self.pending:
1022 1021 try:
1023 1022 raise IndexError("msg pending: %r"%msg_id)
1024 1023 except:
1025 1024 reply = error.wrap_exception()
1026 1025 else:
1027 1026 try:
1028 1027 raise IndexError("No such msg: %r"%msg_id)
1029 1028 except:
1030 1029 reply = error.wrap_exception()
1031 1030 break
1032 1031 eids = content.get('engine_ids', [])
1033 1032 for eid in eids:
1034 1033 if eid not in self.engines:
1035 1034 try:
1036 1035 raise IndexError("No such engine: %i"%eid)
1037 1036 except:
1038 1037 reply = error.wrap_exception()
1039 1038 break
1040 1039 msg_ids = self.completed.pop(eid)
1041 1040 uid = self.engines[eid].queue
1042 1041 self.db.drop_matching_records(dict(engine_uuid=uid, completed={'$ne':None}))
1043 1042
1044 1043 self.session.send(self.query, 'purge_reply', content=reply, ident=client_id)
1045 1044
1046 1045 def resubmit_task(self, client_id, msg, buffers):
1047 1046 """Resubmit a task."""
1048 1047 raise NotImplementedError
1049 1048
1050 1049 def get_results(self, client_id, msg):
1051 1050 """Get the result of 1 or more messages."""
1052 1051 content = msg['content']
1053 1052 msg_ids = sorted(set(content['msg_ids']))
1054 1053 statusonly = content.get('status_only', False)
1055 1054 pending = []
1056 1055 completed = []
1057 1056 content = dict(status='ok')
1058 1057 content['pending'] = pending
1059 1058 content['completed'] = completed
1060 1059 buffers = []
1061 1060 if not statusonly:
1062 1061 content['results'] = {}
1063 1062 records = self.db.find_records(dict(msg_id={'$in':msg_ids}))
1064 1063 for msg_id in msg_ids:
1065 1064 if msg_id in self.pending:
1066 1065 pending.append(msg_id)
1067 1066 elif msg_id in self.all_completed:
1068 1067 completed.append(msg_id)
1069 1068 if not statusonly:
1070 1069 rec = records[msg_id]
1071 1070 io_dict = {}
1072 1071 for key in 'pyin pyout pyerr stdout stderr'.split():
1073 1072 io_dict[key] = rec[key]
1074 1073 content[msg_id] = { 'result_content': rec['result_content'],
1075 1074 'header': rec['header'],
1076 1075 'result_header' : rec['result_header'],
1077 1076 'io' : io_dict,
1078 1077 }
1079 1078 if rec['result_buffers']:
1080 1079 buffers.extend(map(str, rec['result_buffers']))
1081 1080 else:
1082 1081 try:
1083 1082 raise KeyError('No such message: '+msg_id)
1084 1083 except:
1085 1084 content = error.wrap_exception()
1086 1085 break
1087 1086 self.session.send(self.query, "result_reply", content=content,
1088 1087 parent=msg, ident=client_id,
1089 1088 buffers=buffers)
1090 1089
1 NO CONTENT: file renamed from IPython/parallel/mongodb.py to IPython/parallel/controller/mongodb.py
@@ -1,592 +1,592 b''
1 1 """The Python scheduler for rich scheduling.
2 2
3 3 The Pure ZMQ scheduler does not allow routing schemes other than LRU,
4 4 nor does it check msg_id DAG dependencies. For those, a slightly slower
5 5 Python Scheduler exists.
6 6 """
7 7 #-----------------------------------------------------------------------------
8 8 # Copyright (C) 2010-2011 The IPython Development Team
9 9 #
10 10 # Distributed under the terms of the BSD License. The full license is in
11 11 # the file COPYING, distributed as part of this software.
12 12 #-----------------------------------------------------------------------------
13 13
14 14 #----------------------------------------------------------------------
15 15 # Imports
16 16 #----------------------------------------------------------------------
17 17
18 18 from __future__ import print_function
19 19
20 20 import logging
21 21 import sys
22 22
23 23 from datetime import datetime, timedelta
24 24 from random import randint, random
25 25 from types import FunctionType
26 26
27 27 try:
28 28 import numpy
29 29 except ImportError:
30 30 numpy = None
31 31
32 32 import zmq
33 33 from zmq.eventloop import ioloop, zmqstream
34 34
35 35 # local imports
36 36 from IPython.external.decorator import decorator
37 37 from IPython.utils.traitlets import Instance, Dict, List, Set
38 38
39 from . import error
40 from .dependency import Dependency
41 from .entry_point import connect_logger, local_logger
42 from .factory import SessionFactory
39 from IPython.parallel import error
40 from IPython.parallel.factory import SessionFactory
41 from IPython.parallel.util import connect_logger, local_logger
43 42
43 from .dependency import Dependency
44 44
45 45 @decorator
46 46 def logged(f,self,*args,**kwargs):
47 47 # print ("#--------------------")
48 48 self.log.debug("scheduler::%s(*%s,**%s)"%(f.func_name, args, kwargs))
49 49 # print ("#--")
50 50 return f(self,*args, **kwargs)
51 51
52 52 #----------------------------------------------------------------------
53 53 # Chooser functions
54 54 #----------------------------------------------------------------------
55 55
56 56 def plainrandom(loads):
57 57 """Plain random pick."""
58 58 n = len(loads)
59 59 return randint(0,n-1)
60 60
61 61 def lru(loads):
62 62 """Always pick the front of the line.
63 63
64 64 The content of `loads` is ignored.
65 65
66 66 Assumes LRU ordering of loads, with oldest first.
67 67 """
68 68 return 0
69 69
70 70 def twobin(loads):
71 71 """Pick two at random, use the LRU of the two.
72 72
73 73 The content of loads is ignored.
74 74
75 75 Assumes LRU ordering of loads, with oldest first.
76 76 """
77 77 n = len(loads)
78 78 a = randint(0,n-1)
79 79 b = randint(0,n-1)
80 80 return min(a,b)
81 81
82 82 def weighted(loads):
83 83 """Pick two at random using inverse load as weight.
84 84
85 85 Return the less loaded of the two.
86 86 """
87 87 # weight 0 a million times more than 1:
88 88 weights = 1./(1e-6+numpy.array(loads))
89 89 sums = weights.cumsum()
90 90 t = sums[-1]
91 91 x = random()*t
92 92 y = random()*t
93 93 idx = 0
94 94 idy = 0
95 95 while sums[idx] < x:
96 96 idx += 1
97 97 while sums[idy] < y:
98 98 idy += 1
99 99 if weights[idy] > weights[idx]:
100 100 return idy
101 101 else:
102 102 return idx
103 103
104 104 def leastload(loads):
105 105 """Always choose the lowest load.
106 106
107 107 If the lowest load occurs more than once, the first
108 108 occurance will be used. If loads has LRU ordering, this means
109 109 the LRU of those with the lowest load is chosen.
110 110 """
111 111 return loads.index(min(loads))
112 112
113 113 #---------------------------------------------------------------------
114 114 # Classes
115 115 #---------------------------------------------------------------------
116 116 # store empty default dependency:
117 117 MET = Dependency([])
118 118
119 119 class TaskScheduler(SessionFactory):
120 120 """Python TaskScheduler object.
121 121
122 122 This is the simplest object that supports msg_id based
123 123 DAG dependencies. *Only* task msg_ids are checked, not
124 124 msg_ids of jobs submitted via the MUX queue.
125 125
126 126 """
127 127
128 128 # input arguments:
129 129 scheme = Instance(FunctionType, default=leastload) # function for determining the destination
130 130 client_stream = Instance(zmqstream.ZMQStream) # client-facing stream
131 131 engine_stream = Instance(zmqstream.ZMQStream) # engine-facing stream
132 132 notifier_stream = Instance(zmqstream.ZMQStream) # hub-facing sub stream
133 133 mon_stream = Instance(zmqstream.ZMQStream) # hub-facing pub stream
134 134
135 135 # internals:
136 136 graph = Dict() # dict by msg_id of [ msg_ids that depend on key ]
137 137 depending = Dict() # dict by msg_id of (msg_id, raw_msg, after, follow)
138 138 pending = Dict() # dict by engine_uuid of submitted tasks
139 139 completed = Dict() # dict by engine_uuid of completed tasks
140 140 failed = Dict() # dict by engine_uuid of failed tasks
141 141 destinations = Dict() # dict by msg_id of engine_uuids where jobs ran (reverse of completed+failed)
142 142 clients = Dict() # dict by msg_id for who submitted the task
143 143 targets = List() # list of target IDENTs
144 144 loads = List() # list of engine loads
145 145 all_completed = Set() # set of all completed tasks
146 146 all_failed = Set() # set of all failed tasks
147 147 all_done = Set() # set of all finished tasks=union(completed,failed)
148 148 all_ids = Set() # set of all submitted task IDs
149 149 blacklist = Dict() # dict by msg_id of locations where a job has encountered UnmetDependency
150 150 auditor = Instance('zmq.eventloop.ioloop.PeriodicCallback')
151 151
152 152
153 153 def start(self):
154 154 self.engine_stream.on_recv(self.dispatch_result, copy=False)
155 155 self._notification_handlers = dict(
156 156 registration_notification = self._register_engine,
157 157 unregistration_notification = self._unregister_engine
158 158 )
159 159 self.notifier_stream.on_recv(self.dispatch_notification)
160 160 self.auditor = ioloop.PeriodicCallback(self.audit_timeouts, 2e3, self.loop) # 1 Hz
161 161 self.auditor.start()
162 162 self.log.info("Scheduler started...%r"%self)
163 163
164 164 def resume_receiving(self):
165 165 """Resume accepting jobs."""
166 166 self.client_stream.on_recv(self.dispatch_submission, copy=False)
167 167
168 168 def stop_receiving(self):
169 169 """Stop accepting jobs while there are no engines.
170 170 Leave them in the ZMQ queue."""
171 171 self.client_stream.on_recv(None)
172 172
173 173 #-----------------------------------------------------------------------
174 174 # [Un]Registration Handling
175 175 #-----------------------------------------------------------------------
176 176
177 177 def dispatch_notification(self, msg):
178 178 """dispatch register/unregister events."""
179 179 idents,msg = self.session.feed_identities(msg)
180 180 msg = self.session.unpack_message(msg)
181 181 msg_type = msg['msg_type']
182 182 handler = self._notification_handlers.get(msg_type, None)
183 183 if handler is None:
184 184 raise Exception("Unhandled message type: %s"%msg_type)
185 185 else:
186 186 try:
187 187 handler(str(msg['content']['queue']))
188 188 except KeyError:
189 189 self.log.error("task::Invalid notification msg: %s"%msg)
190 190
191 191 @logged
192 192 def _register_engine(self, uid):
193 193 """New engine with ident `uid` became available."""
194 194 # head of the line:
195 195 self.targets.insert(0,uid)
196 196 self.loads.insert(0,0)
197 197 # initialize sets
198 198 self.completed[uid] = set()
199 199 self.failed[uid] = set()
200 200 self.pending[uid] = {}
201 201 if len(self.targets) == 1:
202 202 self.resume_receiving()
203 203
204 204 def _unregister_engine(self, uid):
205 205 """Existing engine with ident `uid` became unavailable."""
206 206 if len(self.targets) == 1:
207 207 # this was our only engine
208 208 self.stop_receiving()
209 209
210 210 # handle any potentially finished tasks:
211 211 self.engine_stream.flush()
212 212
213 213 self.completed.pop(uid)
214 214 self.failed.pop(uid)
215 215 # don't pop destinations, because it might be used later
216 216 # map(self.destinations.pop, self.completed.pop(uid))
217 217 # map(self.destinations.pop, self.failed.pop(uid))
218 218
219 219 idx = self.targets.index(uid)
220 220 self.targets.pop(idx)
221 221 self.loads.pop(idx)
222 222
223 223 # wait 5 seconds before cleaning up pending jobs, since the results might
224 224 # still be incoming
225 225 if self.pending[uid]:
226 226 dc = ioloop.DelayedCallback(lambda : self.handle_stranded_tasks(uid), 5000, self.loop)
227 227 dc.start()
228 228
229 229 @logged
230 230 def handle_stranded_tasks(self, engine):
231 231 """Deal with jobs resident in an engine that died."""
232 232 lost = self.pending.pop(engine)
233 233
234 234 for msg_id, (raw_msg, targets, MET, follow, timeout) in lost.iteritems():
235 235 self.all_failed.add(msg_id)
236 236 self.all_done.add(msg_id)
237 237 idents,msg = self.session.feed_identities(raw_msg, copy=False)
238 238 msg = self.session.unpack_message(msg, copy=False, content=False)
239 239 parent = msg['header']
240 240 idents = [idents[0],engine]+idents[1:]
241 241 # print (idents)
242 242 try:
243 243 raise error.EngineError("Engine %r died while running task %r"%(engine, msg_id))
244 244 except:
245 245 content = error.wrap_exception()
246 246 msg = self.session.send(self.client_stream, 'apply_reply', content,
247 247 parent=parent, ident=idents)
248 248 self.session.send(self.mon_stream, msg, ident=['outtask']+idents)
249 249 self.update_graph(msg_id)
250 250
251 251
252 252 #-----------------------------------------------------------------------
253 253 # Job Submission
254 254 #-----------------------------------------------------------------------
255 255 @logged
256 256 def dispatch_submission(self, raw_msg):
257 257 """Dispatch job submission to appropriate handlers."""
258 258 # ensure targets up to date:
259 259 self.notifier_stream.flush()
260 260 try:
261 261 idents, msg = self.session.feed_identities(raw_msg, copy=False)
262 262 msg = self.session.unpack_message(msg, content=False, copy=False)
263 263 except:
264 264 self.log.error("task::Invaid task: %s"%raw_msg, exc_info=True)
265 265 return
266 266
267 267 # send to monitor
268 268 self.mon_stream.send_multipart(['intask']+raw_msg, copy=False)
269 269
270 270 header = msg['header']
271 271 msg_id = header['msg_id']
272 272 self.all_ids.add(msg_id)
273 273
274 274 # targets
275 275 targets = set(header.get('targets', []))
276 276
277 277 # time dependencies
278 278 after = Dependency(header.get('after', []))
279 279 if after.all:
280 280 if after.success:
281 281 after.difference_update(self.all_completed)
282 282 if after.failure:
283 283 after.difference_update(self.all_failed)
284 284 if after.check(self.all_completed, self.all_failed):
285 285 # recast as empty set, if `after` already met,
286 286 # to prevent unnecessary set comparisons
287 287 after = MET
288 288
289 289 # location dependencies
290 290 follow = Dependency(header.get('follow', []))
291 291
292 292 # turn timeouts into datetime objects:
293 293 timeout = header.get('timeout', None)
294 294 if timeout:
295 295 timeout = datetime.now() + timedelta(0,timeout,0)
296 296
297 297 args = [raw_msg, targets, after, follow, timeout]
298 298
299 299 # validate and reduce dependencies:
300 300 for dep in after,follow:
301 301 # check valid:
302 302 if msg_id in dep or dep.difference(self.all_ids):
303 303 self.depending[msg_id] = args
304 304 return self.fail_unreachable(msg_id, error.InvalidDependency)
305 305 # check if unreachable:
306 306 if dep.unreachable(self.all_completed, self.all_failed):
307 307 self.depending[msg_id] = args
308 308 return self.fail_unreachable(msg_id)
309 309
310 310 if after.check(self.all_completed, self.all_failed):
311 311 # time deps already met, try to run
312 312 if not self.maybe_run(msg_id, *args):
313 313 # can't run yet
314 314 self.save_unmet(msg_id, *args)
315 315 else:
316 316 self.save_unmet(msg_id, *args)
317 317
318 318 # @logged
319 319 def audit_timeouts(self):
320 320 """Audit all waiting tasks for expired timeouts."""
321 321 now = datetime.now()
322 322 for msg_id in self.depending.keys():
323 323 # must recheck, in case one failure cascaded to another:
324 324 if msg_id in self.depending:
325 325 raw,after,targets,follow,timeout = self.depending[msg_id]
326 326 if timeout and timeout < now:
327 327 self.fail_unreachable(msg_id, timeout=True)
328 328
329 329 @logged
330 330 def fail_unreachable(self, msg_id, why=error.ImpossibleDependency):
331 331 """a task has become unreachable, send a reply with an ImpossibleDependency
332 332 error."""
333 333 if msg_id not in self.depending:
334 334 self.log.error("msg %r already failed!"%msg_id)
335 335 return
336 336 raw_msg,targets,after,follow,timeout = self.depending.pop(msg_id)
337 337 for mid in follow.union(after):
338 338 if mid in self.graph:
339 339 self.graph[mid].remove(msg_id)
340 340
341 341 # FIXME: unpacking a message I've already unpacked, but didn't save:
342 342 idents,msg = self.session.feed_identities(raw_msg, copy=False)
343 343 msg = self.session.unpack_message(msg, copy=False, content=False)
344 344 header = msg['header']
345 345
346 346 try:
347 347 raise why()
348 348 except:
349 349 content = error.wrap_exception()
350 350
351 351 self.all_done.add(msg_id)
352 352 self.all_failed.add(msg_id)
353 353
354 354 msg = self.session.send(self.client_stream, 'apply_reply', content,
355 355 parent=header, ident=idents)
356 356 self.session.send(self.mon_stream, msg, ident=['outtask']+idents)
357 357
358 358 self.update_graph(msg_id, success=False)
359 359
360 360 @logged
361 361 def maybe_run(self, msg_id, raw_msg, targets, after, follow, timeout):
362 362 """check location dependencies, and run if they are met."""
363 363 blacklist = self.blacklist.setdefault(msg_id, set())
364 364 if follow or targets or blacklist:
365 365 # we need a can_run filter
366 366 def can_run(idx):
367 367 target = self.targets[idx]
368 368 # check targets
369 369 if targets and target not in targets:
370 370 return False
371 371 # check blacklist
372 372 if target in blacklist:
373 373 return False
374 374 # check follow
375 375 return follow.check(self.completed[target], self.failed[target])
376 376
377 377 indices = filter(can_run, range(len(self.targets)))
378 378 if not indices:
379 379 # couldn't run
380 380 if follow.all:
381 381 # check follow for impossibility
382 382 dests = set()
383 383 relevant = set()
384 384 if follow.success:
385 385 relevant = self.all_completed
386 386 if follow.failure:
387 387 relevant = relevant.union(self.all_failed)
388 388 for m in follow.intersection(relevant):
389 389 dests.add(self.destinations[m])
390 390 if len(dests) > 1:
391 391 self.fail_unreachable(msg_id)
392 392 return False
393 393 if targets:
394 394 # check blacklist+targets for impossibility
395 395 targets.difference_update(blacklist)
396 396 if not targets or not targets.intersection(self.targets):
397 397 self.fail_unreachable(msg_id)
398 398 return False
399 399 return False
400 400 else:
401 401 indices = None
402 402
403 403 self.submit_task(msg_id, raw_msg, targets, follow, timeout, indices)
404 404 return True
405 405
406 406 @logged
407 407 def save_unmet(self, msg_id, raw_msg, targets, after, follow, timeout):
408 408 """Save a message for later submission when its dependencies are met."""
409 409 self.depending[msg_id] = [raw_msg,targets,after,follow,timeout]
410 410 # track the ids in follow or after, but not those already finished
411 411 for dep_id in after.union(follow).difference(self.all_done):
412 412 if dep_id not in self.graph:
413 413 self.graph[dep_id] = set()
414 414 self.graph[dep_id].add(msg_id)
415 415
416 416 @logged
417 417 def submit_task(self, msg_id, raw_msg, targets, follow, timeout, indices=None):
418 418 """Submit a task to any of a subset of our targets."""
419 419 if indices:
420 420 loads = [self.loads[i] for i in indices]
421 421 else:
422 422 loads = self.loads
423 423 idx = self.scheme(loads)
424 424 if indices:
425 425 idx = indices[idx]
426 426 target = self.targets[idx]
427 427 # print (target, map(str, msg[:3]))
428 428 self.engine_stream.send(target, flags=zmq.SNDMORE, copy=False)
429 429 self.engine_stream.send_multipart(raw_msg, copy=False)
430 430 self.add_job(idx)
431 431 self.pending[target][msg_id] = (raw_msg, targets, MET, follow, timeout)
432 432 content = dict(msg_id=msg_id, engine_id=target)
433 433 self.session.send(self.mon_stream, 'task_destination', content=content,
434 434 ident=['tracktask',self.session.session])
435 435
436 436 #-----------------------------------------------------------------------
437 437 # Result Handling
438 438 #-----------------------------------------------------------------------
439 439 @logged
440 440 def dispatch_result(self, raw_msg):
441 441 """dispatch method for result replies"""
442 442 try:
443 443 idents,msg = self.session.feed_identities(raw_msg, copy=False)
444 444 msg = self.session.unpack_message(msg, content=False, copy=False)
445 445 except:
446 446 self.log.error("task::Invaid result: %s"%raw_msg, exc_info=True)
447 447 return
448 448
449 449 header = msg['header']
450 450 if header.get('dependencies_met', True):
451 451 success = (header['status'] == 'ok')
452 452 self.handle_result(idents, msg['parent_header'], raw_msg, success)
453 453 # send to Hub monitor
454 454 self.mon_stream.send_multipart(['outtask']+raw_msg, copy=False)
455 455 else:
456 456 self.handle_unmet_dependency(idents, msg['parent_header'])
457 457
458 458 @logged
459 459 def handle_result(self, idents, parent, raw_msg, success=True):
460 460 """handle a real task result, either success or failure"""
461 461 # first, relay result to client
462 462 engine = idents[0]
463 463 client = idents[1]
464 464 # swap_ids for XREP-XREP mirror
465 465 raw_msg[:2] = [client,engine]
466 466 # print (map(str, raw_msg[:4]))
467 467 self.client_stream.send_multipart(raw_msg, copy=False)
468 468 # now, update our data structures
469 469 msg_id = parent['msg_id']
470 470 self.blacklist.pop(msg_id, None)
471 471 self.pending[engine].pop(msg_id)
472 472 if success:
473 473 self.completed[engine].add(msg_id)
474 474 self.all_completed.add(msg_id)
475 475 else:
476 476 self.failed[engine].add(msg_id)
477 477 self.all_failed.add(msg_id)
478 478 self.all_done.add(msg_id)
479 479 self.destinations[msg_id] = engine
480 480
481 481 self.update_graph(msg_id, success)
482 482
483 483 @logged
484 484 def handle_unmet_dependency(self, idents, parent):
485 485 """handle an unmet dependency"""
486 486 engine = idents[0]
487 487 msg_id = parent['msg_id']
488 488
489 489 if msg_id not in self.blacklist:
490 490 self.blacklist[msg_id] = set()
491 491 self.blacklist[msg_id].add(engine)
492 492
493 493 args = self.pending[engine].pop(msg_id)
494 494 raw,targets,after,follow,timeout = args
495 495
496 496 if self.blacklist[msg_id] == targets:
497 497 self.depending[msg_id] = args
498 498 return self.fail_unreachable(msg_id)
499 499
500 500 elif not self.maybe_run(msg_id, *args):
501 501 # resubmit failed, put it back in our dependency tree
502 502 self.save_unmet(msg_id, *args)
503 503
504 504
505 505 @logged
506 506 def update_graph(self, dep_id, success=True):
507 507 """dep_id just finished. Update our dependency
508 508 graph and submit any jobs that just became runable."""
509 509 # print ("\n\n***********")
510 510 # pprint (dep_id)
511 511 # pprint (self.graph)
512 512 # pprint (self.depending)
513 513 # pprint (self.all_completed)
514 514 # pprint (self.all_failed)
515 515 # print ("\n\n***********\n\n")
516 516 if dep_id not in self.graph:
517 517 return
518 518 jobs = self.graph.pop(dep_id)
519 519
520 520 for msg_id in jobs:
521 521 raw_msg, targets, after, follow, timeout = self.depending[msg_id]
522 522
523 523 if after.unreachable(self.all_completed, self.all_failed) or follow.unreachable(self.all_completed, self.all_failed):
524 524 self.fail_unreachable(msg_id)
525 525
526 526 elif after.check(self.all_completed, self.all_failed): # time deps met, maybe run
527 527 if self.maybe_run(msg_id, raw_msg, targets, MET, follow, timeout):
528 528
529 529 self.depending.pop(msg_id)
530 530 for mid in follow.union(after):
531 531 if mid in self.graph:
532 532 self.graph[mid].remove(msg_id)
533 533
534 534 #----------------------------------------------------------------------
535 535 # methods to be overridden by subclasses
536 536 #----------------------------------------------------------------------
537 537
538 538 def add_job(self, idx):
539 539 """Called after self.targets[idx] just got the job with header.
540 540 Override with subclasses. The default ordering is simple LRU.
541 541 The default loads are the number of outstanding jobs."""
542 542 self.loads[idx] += 1
543 543 for lis in (self.targets, self.loads):
544 544 lis.append(lis.pop(idx))
545 545
546 546
547 547 def finish_job(self, idx):
548 548 """Called after self.targets[idx] just finished a job.
549 549 Override with subclasses."""
550 550 self.loads[idx] -= 1
551 551
552 552
553 553
554 554 def launch_scheduler(in_addr, out_addr, mon_addr, not_addr, config=None,logname='ZMQ',
555 555 log_addr=None, loglevel=logging.DEBUG, scheme='lru',
556 556 identity=b'task'):
557 557 from zmq.eventloop import ioloop
558 558 from zmq.eventloop.zmqstream import ZMQStream
559 559
560 560 ctx = zmq.Context()
561 561 loop = ioloop.IOLoop()
562 562 print (in_addr, out_addr, mon_addr, not_addr)
563 563 ins = ZMQStream(ctx.socket(zmq.XREP),loop)
564 564 ins.setsockopt(zmq.IDENTITY, identity)
565 565 ins.bind(in_addr)
566 566
567 567 outs = ZMQStream(ctx.socket(zmq.XREP),loop)
568 568 outs.setsockopt(zmq.IDENTITY, identity)
569 569 outs.bind(out_addr)
570 570 mons = ZMQStream(ctx.socket(zmq.PUB),loop)
571 571 mons.connect(mon_addr)
572 572 nots = ZMQStream(ctx.socket(zmq.SUB),loop)
573 573 nots.setsockopt(zmq.SUBSCRIBE, '')
574 574 nots.connect(not_addr)
575 575
576 576 scheme = globals().get(scheme, None)
577 577 # setup logging
578 578 if log_addr:
579 579 connect_logger(logname, ctx, log_addr, root="scheduler", loglevel=loglevel)
580 580 else:
581 581 local_logger(logname, loglevel)
582 582
583 583 scheduler = TaskScheduler(client_stream=ins, engine_stream=outs,
584 584 mon_stream=mons, notifier_stream=nots,
585 585 scheme=scheme, loop=loop, logname=logname,
586 586 config=config)
587 587 scheduler.start()
588 588 try:
589 589 loop.start()
590 590 except KeyboardInterrupt:
591 591 print ("interrupted, exiting...", file=sys.__stderr__)
592 592
@@ -1,284 +1,284 b''
1 1 """A TaskRecord backend using sqlite3"""
2 2 #-----------------------------------------------------------------------------
3 3 # Copyright (C) 2011 The IPython Development Team
4 4 #
5 5 # Distributed under the terms of the BSD License. The full license is in
6 6 # the file COPYING, distributed as part of this software.
7 7 #-----------------------------------------------------------------------------
8 8
9 9 import json
10 10 import os
11 11 import cPickle as pickle
12 12 from datetime import datetime
13 13
14 14 import sqlite3
15 15
16 16 from zmq.eventloop import ioloop
17 17
18 18 from IPython.utils.traitlets import CUnicode, CStr, Instance, List
19 19 from .dictdb import BaseDB
20 from .util import ISO8601
20 from IPython.parallel.util import ISO8601
21 21
22 22 #-----------------------------------------------------------------------------
23 23 # SQLite operators, adapters, and converters
24 24 #-----------------------------------------------------------------------------
25 25
26 26 operators = {
27 27 '$lt' : lambda a,b: "%s < ?",
28 28 '$gt' : ">",
29 29 # null is handled weird with ==,!=
30 30 '$eq' : "IS",
31 31 '$ne' : "IS NOT",
32 32 '$lte': "<=",
33 33 '$gte': ">=",
34 34 '$in' : ('IS', ' OR '),
35 35 '$nin': ('IS NOT', ' AND '),
36 36 # '$all': None,
37 37 # '$mod': None,
38 38 # '$exists' : None
39 39 }
40 40
41 41 def _adapt_datetime(dt):
42 42 return dt.strftime(ISO8601)
43 43
44 44 def _convert_datetime(ds):
45 45 if ds is None:
46 46 return ds
47 47 else:
48 48 return datetime.strptime(ds, ISO8601)
49 49
50 50 def _adapt_dict(d):
51 51 return json.dumps(d)
52 52
53 53 def _convert_dict(ds):
54 54 if ds is None:
55 55 return ds
56 56 else:
57 57 return json.loads(ds)
58 58
59 59 def _adapt_bufs(bufs):
60 60 # this is *horrible*
61 61 # copy buffers into single list and pickle it:
62 62 if bufs and isinstance(bufs[0], (bytes, buffer)):
63 63 return sqlite3.Binary(pickle.dumps(map(bytes, bufs),-1))
64 64 elif bufs:
65 65 return bufs
66 66 else:
67 67 return None
68 68
69 69 def _convert_bufs(bs):
70 70 if bs is None:
71 71 return []
72 72 else:
73 73 return pickle.loads(bytes(bs))
74 74
75 75 #-----------------------------------------------------------------------------
76 76 # SQLiteDB class
77 77 #-----------------------------------------------------------------------------
78 78
79 79 class SQLiteDB(BaseDB):
80 80 """SQLite3 TaskRecord backend."""
81 81
82 82 filename = CUnicode('tasks.db', config=True)
83 83 location = CUnicode('', config=True)
84 84 table = CUnicode("", config=True)
85 85
86 86 _db = Instance('sqlite3.Connection')
87 87 _keys = List(['msg_id' ,
88 88 'header' ,
89 89 'content',
90 90 'buffers',
91 91 'submitted',
92 92 'client_uuid' ,
93 93 'engine_uuid' ,
94 94 'started',
95 95 'completed',
96 96 'resubmitted',
97 97 'result_header' ,
98 98 'result_content' ,
99 99 'result_buffers' ,
100 100 'queue' ,
101 101 'pyin' ,
102 102 'pyout',
103 103 'pyerr',
104 104 'stdout',
105 105 'stderr',
106 106 ])
107 107
108 108 def __init__(self, **kwargs):
109 109 super(SQLiteDB, self).__init__(**kwargs)
110 110 if not self.table:
111 111 # use session, and prefix _, since starting with # is illegal
112 112 self.table = '_'+self.session.replace('-','_')
113 113 if not self.location:
114 114 if hasattr(self.config.Global, 'cluster_dir'):
115 115 self.location = self.config.Global.cluster_dir
116 116 else:
117 117 self.location = '.'
118 118 self._init_db()
119 119
120 120 # register db commit as 2s periodic callback
121 121 # to prevent clogging pipes
122 122 # assumes we are being run in a zmq ioloop app
123 123 loop = ioloop.IOLoop.instance()
124 124 pc = ioloop.PeriodicCallback(self._db.commit, 2000, loop)
125 125 pc.start()
126 126
127 127 def _defaults(self):
128 128 """create an empty record"""
129 129 d = {}
130 130 for key in self._keys:
131 131 d[key] = None
132 132 return d
133 133
134 134 def _init_db(self):
135 135 """Connect to the database and get new session number."""
136 136 # register adapters
137 137 sqlite3.register_adapter(datetime, _adapt_datetime)
138 138 sqlite3.register_converter('datetime', _convert_datetime)
139 139 sqlite3.register_adapter(dict, _adapt_dict)
140 140 sqlite3.register_converter('dict', _convert_dict)
141 141 sqlite3.register_adapter(list, _adapt_bufs)
142 142 sqlite3.register_converter('bufs', _convert_bufs)
143 143 # connect to the db
144 144 dbfile = os.path.join(self.location, self.filename)
145 145 self._db = sqlite3.connect(dbfile, detect_types=sqlite3.PARSE_DECLTYPES,
146 146 # isolation_level = None)#,
147 147 cached_statements=64)
148 148 # print dir(self._db)
149 149
150 150 self._db.execute("""CREATE TABLE IF NOT EXISTS %s
151 151 (msg_id text PRIMARY KEY,
152 152 header dict text,
153 153 content dict text,
154 154 buffers bufs blob,
155 155 submitted datetime text,
156 156 client_uuid text,
157 157 engine_uuid text,
158 158 started datetime text,
159 159 completed datetime text,
160 160 resubmitted datetime text,
161 161 result_header dict text,
162 162 result_content dict text,
163 163 result_buffers bufs blob,
164 164 queue text,
165 165 pyin text,
166 166 pyout text,
167 167 pyerr text,
168 168 stdout text,
169 169 stderr text)
170 170 """%self.table)
171 171 # self._db.execute("""CREATE TABLE IF NOT EXISTS %s_buffers
172 172 # (msg_id text, result integer, buffer blob)
173 173 # """%self.table)
174 174 self._db.commit()
175 175
176 176 def _dict_to_list(self, d):
177 177 """turn a mongodb-style record dict into a list."""
178 178
179 179 return [ d[key] for key in self._keys ]
180 180
181 181 def _list_to_dict(self, line):
182 182 """Inverse of dict_to_list"""
183 183 d = self._defaults()
184 184 for key,value in zip(self._keys, line):
185 185 d[key] = value
186 186
187 187 return d
188 188
189 189 def _render_expression(self, check):
190 190 """Turn a mongodb-style search dict into an SQL query."""
191 191 expressions = []
192 192 args = []
193 193
194 194 skeys = set(check.keys())
195 195 skeys.difference_update(set(self._keys))
196 196 skeys.difference_update(set(['buffers', 'result_buffers']))
197 197 if skeys:
198 198 raise KeyError("Illegal testing key(s): %s"%skeys)
199 199
200 200 for name,sub_check in check.iteritems():
201 201 if isinstance(sub_check, dict):
202 202 for test,value in sub_check.iteritems():
203 203 try:
204 204 op = operators[test]
205 205 except KeyError:
206 206 raise KeyError("Unsupported operator: %r"%test)
207 207 if isinstance(op, tuple):
208 208 op, join = op
209 209 expr = "%s %s ?"%(name, op)
210 210 if isinstance(value, (tuple,list)):
211 211 expr = '( %s )'%( join.join([expr]*len(value)) )
212 212 args.extend(value)
213 213 else:
214 214 args.append(value)
215 215 expressions.append(expr)
216 216 else:
217 217 # it's an equality check
218 218 expressions.append("%s IS ?"%name)
219 219 args.append(sub_check)
220 220
221 221 expr = " AND ".join(expressions)
222 222 return expr, args
223 223
224 224 def add_record(self, msg_id, rec):
225 225 """Add a new Task Record, by msg_id."""
226 226 d = self._defaults()
227 227 d.update(rec)
228 228 d['msg_id'] = msg_id
229 229 line = self._dict_to_list(d)
230 230 tups = '(%s)'%(','.join(['?']*len(line)))
231 231 self._db.execute("INSERT INTO %s VALUES %s"%(self.table, tups), line)
232 232 # self._db.commit()
233 233
234 234 def get_record(self, msg_id):
235 235 """Get a specific Task Record, by msg_id."""
236 236 cursor = self._db.execute("""SELECT * FROM %s WHERE msg_id==?"""%self.table, (msg_id,))
237 237 line = cursor.fetchone()
238 238 if line is None:
239 239 raise KeyError("No such msg: %r"%msg_id)
240 240 return self._list_to_dict(line)
241 241
242 242 def update_record(self, msg_id, rec):
243 243 """Update the data in an existing record."""
244 244 query = "UPDATE %s SET "%self.table
245 245 sets = []
246 246 keys = sorted(rec.keys())
247 247 values = []
248 248 for key in keys:
249 249 sets.append('%s = ?'%key)
250 250 values.append(rec[key])
251 251 query += ', '.join(sets)
252 252 query += ' WHERE msg_id == %r'%msg_id
253 253 self._db.execute(query, values)
254 254 # self._db.commit()
255 255
256 256 def drop_record(self, msg_id):
257 257 """Remove a record from the DB."""
258 258 self._db.execute("""DELETE FROM %s WHERE mgs_id==?"""%self.table, (msg_id,))
259 259 # self._db.commit()
260 260
261 261 def drop_matching_records(self, check):
262 262 """Remove a record from the DB."""
263 263 expr,args = self._render_expression(check)
264 264 query = "DELETE FROM %s WHERE %s"%(self.table, expr)
265 265 self._db.execute(query,args)
266 266 # self._db.commit()
267 267
268 268 def find_records(self, check, id_only=False):
269 269 """Find records matching a query dict."""
270 270 req = 'msg_id' if id_only else '*'
271 271 expr,args = self._render_expression(check)
272 272 query = """SELECT %s FROM %s WHERE %s"""%(req, self.table, expr)
273 273 cursor = self._db.execute(query, args)
274 274 matches = cursor.fetchall()
275 275 if id_only:
276 276 return [ m[0] for m in matches ]
277 277 else:
278 278 records = {}
279 279 for line in matches:
280 280 rec = self._list_to_dict(line)
281 281 records[rec['msg_id']] = rec
282 282 return records
283 283
284 284 __all__ = ['SQLiteDB'] No newline at end of file
@@ -1,155 +1,156 b''
1 1 #!/usr/bin/env python
2 2 """A simple engine that talks to a controller over 0MQ.
3 3 it handles registration, etc. and launches a kernel
4 4 connected to the Controller's Schedulers.
5 5 """
6 6 #-----------------------------------------------------------------------------
7 7 # Copyright (C) 2010-2011 The IPython Development Team
8 8 #
9 9 # Distributed under the terms of the BSD License. The full license is in
10 10 # the file COPYING, distributed as part of this software.
11 11 #-----------------------------------------------------------------------------
12 12
13 13 from __future__ import print_function
14 14
15 15 import sys
16 16 import time
17 17
18 18 import zmq
19 19 from zmq.eventloop import ioloop, zmqstream
20 20
21 21 # internal
22 22 from IPython.utils.traitlets import Instance, Str, Dict, Int, Type, CFloat
23 23 # from IPython.utils.localinterfaces import LOCALHOST
24 24
25 from . import heartmonitor
26 from .factory import RegistrationFactory
25 from IPython.parallel.controller.heartmonitor import Heart
26 from IPython.parallel.factory import RegistrationFactory
27 from IPython.parallel.streamsession import Message
28 from IPython.parallel.util import disambiguate_url
29
27 30 from .streamkernel import Kernel
28 from .streamsession import Message
29 from .util import disambiguate_url
30 31
31 32 class EngineFactory(RegistrationFactory):
32 33 """IPython engine"""
33 34
34 35 # configurables:
35 36 user_ns=Dict(config=True)
36 37 out_stream_factory=Type('IPython.zmq.iostream.OutStream', config=True)
37 38 display_hook_factory=Type('IPython.zmq.displayhook.DisplayHook', config=True)
38 39 location=Str(config=True)
39 40 timeout=CFloat(2,config=True)
40 41
41 42 # not configurable:
42 43 id=Int(allow_none=True)
43 44 registrar=Instance('zmq.eventloop.zmqstream.ZMQStream')
44 45 kernel=Instance(Kernel)
45 46
46 47
47 48 def __init__(self, **kwargs):
48 49 super(EngineFactory, self).__init__(**kwargs)
49 50 ctx = self.context
50 51
51 52 reg = ctx.socket(zmq.XREQ)
52 53 reg.setsockopt(zmq.IDENTITY, self.ident)
53 54 reg.connect(self.url)
54 55 self.registrar = zmqstream.ZMQStream(reg, self.loop)
55 56
56 57 def register(self):
57 58 """send the registration_request"""
58 59
59 60 self.log.info("registering")
60 61 content = dict(queue=self.ident, heartbeat=self.ident, control=self.ident)
61 62 self.registrar.on_recv(self.complete_registration)
62 63 # print (self.session.key)
63 64 self.session.send(self.registrar, "registration_request",content=content)
64 65
65 66 def complete_registration(self, msg):
66 67 # print msg
67 68 self._abort_dc.stop()
68 69 ctx = self.context
69 70 loop = self.loop
70 71 identity = self.ident
71 72
72 73 idents,msg = self.session.feed_identities(msg)
73 74 msg = Message(self.session.unpack_message(msg))
74 75
75 76 if msg.content.status == 'ok':
76 77 self.id = int(msg.content.id)
77 78
78 79 # create Shell Streams (MUX, Task, etc.):
79 80 queue_addr = msg.content.mux
80 81 shell_addrs = [ str(queue_addr) ]
81 82 task_addr = msg.content.task
82 83 if task_addr:
83 84 shell_addrs.append(str(task_addr))
84 85
85 86 # Uncomment this to go back to two-socket model
86 87 # shell_streams = []
87 88 # for addr in shell_addrs:
88 89 # stream = zmqstream.ZMQStream(ctx.socket(zmq.XREP), loop)
89 90 # stream.setsockopt(zmq.IDENTITY, identity)
90 91 # stream.connect(disambiguate_url(addr, self.location))
91 92 # shell_streams.append(stream)
92 93
93 94 # Now use only one shell stream for mux and tasks
94 95 stream = zmqstream.ZMQStream(ctx.socket(zmq.XREP), loop)
95 96 stream.setsockopt(zmq.IDENTITY, identity)
96 97 shell_streams = [stream]
97 98 for addr in shell_addrs:
98 99 stream.connect(disambiguate_url(addr, self.location))
99 100 # end single stream-socket
100 101
101 102 # control stream:
102 103 control_addr = str(msg.content.control)
103 104 control_stream = zmqstream.ZMQStream(ctx.socket(zmq.XREP), loop)
104 105 control_stream.setsockopt(zmq.IDENTITY, identity)
105 106 control_stream.connect(disambiguate_url(control_addr, self.location))
106 107
107 108 # create iopub stream:
108 109 iopub_addr = msg.content.iopub
109 110 iopub_stream = zmqstream.ZMQStream(ctx.socket(zmq.PUB), loop)
110 111 iopub_stream.setsockopt(zmq.IDENTITY, identity)
111 112 iopub_stream.connect(disambiguate_url(iopub_addr, self.location))
112 113
113 114 # launch heartbeat
114 115 hb_addrs = msg.content.heartbeat
115 116 # print (hb_addrs)
116 117
117 118 # # Redirect input streams and set a display hook.
118 119 if self.out_stream_factory:
119 120 sys.stdout = self.out_stream_factory(self.session, iopub_stream, u'stdout')
120 121 sys.stdout.topic = 'engine.%i.stdout'%self.id
121 122 sys.stderr = self.out_stream_factory(self.session, iopub_stream, u'stderr')
122 123 sys.stderr.topic = 'engine.%i.stderr'%self.id
123 124 if self.display_hook_factory:
124 125 sys.displayhook = self.display_hook_factory(self.session, iopub_stream)
125 126 sys.displayhook.topic = 'engine.%i.pyout'%self.id
126 127
127 128 self.kernel = Kernel(config=self.config, int_id=self.id, ident=self.ident, session=self.session,
128 129 control_stream=control_stream, shell_streams=shell_streams, iopub_stream=iopub_stream,
129 130 loop=loop, user_ns = self.user_ns, logname=self.log.name)
130 131 self.kernel.start()
131 132 hb_addrs = [ disambiguate_url(addr, self.location) for addr in hb_addrs ]
132 heart = heartmonitor.Heart(*map(str, hb_addrs), heart_id=identity)
133 heart = Heart(*map(str, hb_addrs), heart_id=identity)
133 134 # ioloop.DelayedCallback(heart.start, 1000, self.loop).start()
134 135 heart.start()
135 136
136 137
137 138 else:
138 139 self.log.fatal("Registration Failed: %s"%msg)
139 140 raise Exception("Registration Failed: %s"%msg)
140 141
141 142 self.log.info("Completed registration with id %i"%self.id)
142 143
143 144
144 145 def abort(self):
145 146 self.log.fatal("Registration timed out")
146 147 self.session.send(self.registrar, "unregistration_request", content=dict(id=self.id))
147 148 time.sleep(1)
148 149 sys.exit(255)
149 150
150 151 def start(self):
151 152 dc = ioloop.DelayedCallback(self.register, 0, self.loop)
152 153 dc.start()
153 154 self._abort_dc = ioloop.DelayedCallback(self.abort, self.timeout*1000, self.loop)
154 155 self._abort_dc.start()
155 156
@@ -1,225 +1,225 b''
1 1 """KernelStarter class that intercepts Control Queue messages, and handles process management."""
2 2 #-----------------------------------------------------------------------------
3 3 # Copyright (C) 2010-2011 The IPython Development Team
4 4 #
5 5 # Distributed under the terms of the BSD License. The full license is in
6 6 # the file COPYING, distributed as part of this software.
7 7 #-----------------------------------------------------------------------------
8 8
9 9 from zmq.eventloop import ioloop
10 10
11 from .streamsession import StreamSession
11 from IPython.parallel.streamsession import StreamSession
12 12
13 13 class KernelStarter(object):
14 14 """Object for resetting/killing the Kernel."""
15 15
16 16
17 17 def __init__(self, session, upstream, downstream, *kernel_args, **kernel_kwargs):
18 18 self.session = session
19 19 self.upstream = upstream
20 20 self.downstream = downstream
21 21 self.kernel_args = kernel_args
22 22 self.kernel_kwargs = kernel_kwargs
23 23 self.handlers = {}
24 24 for method in 'shutdown_request shutdown_reply'.split():
25 25 self.handlers[method] = getattr(self, method)
26 26
27 27 def start(self):
28 28 self.upstream.on_recv(self.dispatch_request)
29 29 self.downstream.on_recv(self.dispatch_reply)
30 30
31 31 #--------------------------------------------------------------------------
32 32 # Dispatch methods
33 33 #--------------------------------------------------------------------------
34 34
35 35 def dispatch_request(self, raw_msg):
36 36 idents, msg = self.session.feed_identities()
37 37 try:
38 38 msg = self.session.unpack_message(msg, content=False)
39 39 except:
40 40 print ("bad msg: %s"%msg)
41 41
42 42 msgtype = msg['msg_type']
43 43 handler = self.handlers.get(msgtype, None)
44 44 if handler is None:
45 45 self.downstream.send_multipart(raw_msg, copy=False)
46 46 else:
47 47 handler(msg)
48 48
49 49 def dispatch_reply(self, raw_msg):
50 50 idents, msg = self.session.feed_identities()
51 51 try:
52 52 msg = self.session.unpack_message(msg, content=False)
53 53 except:
54 54 print ("bad msg: %s"%msg)
55 55
56 56 msgtype = msg['msg_type']
57 57 handler = self.handlers.get(msgtype, None)
58 58 if handler is None:
59 59 self.upstream.send_multipart(raw_msg, copy=False)
60 60 else:
61 61 handler(msg)
62 62
63 63 #--------------------------------------------------------------------------
64 64 # Handlers
65 65 #--------------------------------------------------------------------------
66 66
67 67 def shutdown_request(self, msg):
68 68 """"""
69 69 self.downstream.send_multipart(msg)
70 70
71 71 #--------------------------------------------------------------------------
72 72 # Kernel process management methods, from KernelManager:
73 73 #--------------------------------------------------------------------------
74 74
75 75 def _check_local(addr):
76 76 if isinstance(addr, tuple):
77 77 addr = addr[0]
78 78 return addr in LOCAL_IPS
79 79
80 80 def start_kernel(self, **kw):
81 81 """Starts a kernel process and configures the manager to use it.
82 82
83 83 If random ports (port=0) are being used, this method must be called
84 84 before the channels are created.
85 85
86 86 Parameters:
87 87 -----------
88 88 ipython : bool, optional (default True)
89 89 Whether to use an IPython kernel instead of a plain Python kernel.
90 90 """
91 91 self.kernel = Process(target=make_kernel, args=self.kernel_args,
92 92 kwargs=self.kernel_kwargs)
93 93
94 94 def shutdown_kernel(self, restart=False):
95 95 """ Attempts to the stop the kernel process cleanly. If the kernel
96 96 cannot be stopped, it is killed, if possible.
97 97 """
98 98 # FIXME: Shutdown does not work on Windows due to ZMQ errors!
99 99 if sys.platform == 'win32':
100 100 self.kill_kernel()
101 101 return
102 102
103 103 # Don't send any additional kernel kill messages immediately, to give
104 104 # the kernel a chance to properly execute shutdown actions. Wait for at
105 105 # most 1s, checking every 0.1s.
106 106 self.xreq_channel.shutdown(restart=restart)
107 107 for i in range(10):
108 108 if self.is_alive:
109 109 time.sleep(0.1)
110 110 else:
111 111 break
112 112 else:
113 113 # OK, we've waited long enough.
114 114 if self.has_kernel:
115 115 self.kill_kernel()
116 116
117 117 def restart_kernel(self, now=False):
118 118 """Restarts a kernel with the same arguments that were used to launch
119 119 it. If the old kernel was launched with random ports, the same ports
120 120 will be used for the new kernel.
121 121
122 122 Parameters
123 123 ----------
124 124 now : bool, optional
125 125 If True, the kernel is forcefully restarted *immediately*, without
126 126 having a chance to do any cleanup action. Otherwise the kernel is
127 127 given 1s to clean up before a forceful restart is issued.
128 128
129 129 In all cases the kernel is restarted, the only difference is whether
130 130 it is given a chance to perform a clean shutdown or not.
131 131 """
132 132 if self._launch_args is None:
133 133 raise RuntimeError("Cannot restart the kernel. "
134 134 "No previous call to 'start_kernel'.")
135 135 else:
136 136 if self.has_kernel:
137 137 if now:
138 138 self.kill_kernel()
139 139 else:
140 140 self.shutdown_kernel(restart=True)
141 141 self.start_kernel(**self._launch_args)
142 142
143 143 # FIXME: Messages get dropped in Windows due to probable ZMQ bug
144 144 # unless there is some delay here.
145 145 if sys.platform == 'win32':
146 146 time.sleep(0.2)
147 147
148 148 @property
149 149 def has_kernel(self):
150 150 """Returns whether a kernel process has been specified for the kernel
151 151 manager.
152 152 """
153 153 return self.kernel is not None
154 154
155 155 def kill_kernel(self):
156 156 """ Kill the running kernel. """
157 157 if self.has_kernel:
158 158 # Pause the heart beat channel if it exists.
159 159 if self._hb_channel is not None:
160 160 self._hb_channel.pause()
161 161
162 162 # Attempt to kill the kernel.
163 163 try:
164 164 self.kernel.kill()
165 165 except OSError, e:
166 166 # In Windows, we will get an Access Denied error if the process
167 167 # has already terminated. Ignore it.
168 168 if not (sys.platform == 'win32' and e.winerror == 5):
169 169 raise
170 170 self.kernel = None
171 171 else:
172 172 raise RuntimeError("Cannot kill kernel. No kernel is running!")
173 173
174 174 def interrupt_kernel(self):
175 175 """ Interrupts the kernel. Unlike ``signal_kernel``, this operation is
176 176 well supported on all platforms.
177 177 """
178 178 if self.has_kernel:
179 179 if sys.platform == 'win32':
180 180 from parentpoller import ParentPollerWindows as Poller
181 181 Poller.send_interrupt(self.kernel.win32_interrupt_event)
182 182 else:
183 183 self.kernel.send_signal(signal.SIGINT)
184 184 else:
185 185 raise RuntimeError("Cannot interrupt kernel. No kernel is running!")
186 186
187 187 def signal_kernel(self, signum):
188 188 """ Sends a signal to the kernel. Note that since only SIGTERM is
189 189 supported on Windows, this function is only useful on Unix systems.
190 190 """
191 191 if self.has_kernel:
192 192 self.kernel.send_signal(signum)
193 193 else:
194 194 raise RuntimeError("Cannot signal kernel. No kernel is running!")
195 195
196 196 @property
197 197 def is_alive(self):
198 198 """Is the kernel process still running?"""
199 199 # FIXME: not using a heartbeat means this method is broken for any
200 200 # remote kernel, it's only capable of handling local kernels.
201 201 if self.has_kernel:
202 202 if self.kernel.poll() is None:
203 203 return True
204 204 else:
205 205 return False
206 206 else:
207 207 # We didn't start the kernel with this KernelManager so we don't
208 208 # know if it is running. We should use a heartbeat for this case.
209 209 return True
210 210
211 211
212 212 def make_starter(up_addr, down_addr, *args, **kwargs):
213 213 """entry point function for launching a kernelstarter in a subprocess"""
214 214 loop = ioloop.IOLoop.instance()
215 215 ctx = zmq.Context()
216 216 session = StreamSession()
217 217 upstream = zmqstream.ZMQStream(ctx.socket(zmq.XREQ),loop)
218 218 upstream.connect(up_addr)
219 219 downstream = zmqstream.ZMQStream(ctx.socket(zmq.XREQ),loop)
220 220 downstream.connect(down_addr)
221 221
222 222 starter = KernelStarter(session, upstream, downstream, *args, **kwargs)
223 223 starter.start()
224 224 loop.start()
225 225 No newline at end of file
@@ -1,489 +1,423 b''
1 1 #!/usr/bin/env python
2 2 """
3 3 Kernel adapted from kernel.py to use ZMQ Streams
4 4 """
5 5 #-----------------------------------------------------------------------------
6 6 # Copyright (C) 2010-2011 The IPython Development Team
7 7 #
8 8 # Distributed under the terms of the BSD License. The full license is in
9 9 # the file COPYING, distributed as part of this software.
10 10 #-----------------------------------------------------------------------------
11 11
12 12 #-----------------------------------------------------------------------------
13 13 # Imports
14 14 #-----------------------------------------------------------------------------
15 15
16 16 # Standard library imports.
17 17 from __future__ import print_function
18 18
19 19 import sys
20 20 import time
21 21
22 22 from code import CommandCompiler
23 23 from datetime import datetime
24 24 from pprint import pprint
25 from signal import SIGTERM, SIGKILL
26 25
27 26 # System library imports.
28 27 import zmq
29 28 from zmq.eventloop import ioloop, zmqstream
30 29
31 30 # Local imports.
32 from IPython.core import ultratb
33 31 from IPython.utils.traitlets import Instance, List, Int, Dict, Set, Str
34 32 from IPython.zmq.completer import KernelCompleter
35 from IPython.zmq.iostream import OutStream
36 from IPython.zmq.displayhook import DisplayHook
37 33
38 from . import heartmonitor
39 from .client import Client
40 from .error import wrap_exception
41 from .factory import SessionFactory
42 from .streamsession import StreamSession
43 from .util import serialize_object, unpack_apply_message, ISO8601, Namespace
34 from IPython.parallel.error import wrap_exception
35 from IPython.parallel.factory import SessionFactory
36 from IPython.parallel.util import serialize_object, unpack_apply_message, ISO8601
44 37
45 38 def printer(*args):
46 39 pprint(args, stream=sys.__stdout__)
47 40
48 41
49 42 class _Passer:
50 43 """Empty class that implements `send()` that does nothing."""
51 44 def send(self, *args, **kwargs):
52 45 pass
53 46 send_multipart = send
54 47
55 48
56 49 #-----------------------------------------------------------------------------
57 50 # Main kernel class
58 51 #-----------------------------------------------------------------------------
59 52
60 53 class Kernel(SessionFactory):
61 54
62 55 #---------------------------------------------------------------------------
63 56 # Kernel interface
64 57 #---------------------------------------------------------------------------
65 58
66 59 # kwargs:
67 60 int_id = Int(-1, config=True)
68 61 user_ns = Dict(config=True)
69 62 exec_lines = List(config=True)
70 63
71 64 control_stream = Instance(zmqstream.ZMQStream)
72 65 task_stream = Instance(zmqstream.ZMQStream)
73 66 iopub_stream = Instance(zmqstream.ZMQStream)
74 client = Instance('IPython.parallel.client.Client')
67 client = Instance('IPython.parallel.Client')
75 68
76 69 # internals
77 70 shell_streams = List()
78 71 compiler = Instance(CommandCompiler, (), {})
79 72 completer = Instance(KernelCompleter)
80 73
81 74 aborted = Set()
82 75 shell_handlers = Dict()
83 76 control_handlers = Dict()
84 77
85 78 def _set_prefix(self):
86 79 self.prefix = "engine.%s"%self.int_id
87 80
88 81 def _connect_completer(self):
89 82 self.completer = KernelCompleter(self.user_ns)
90 83
91 84 def __init__(self, **kwargs):
92 85 super(Kernel, self).__init__(**kwargs)
93 86 self._set_prefix()
94 87 self._connect_completer()
95 88
96 89 self.on_trait_change(self._set_prefix, 'id')
97 90 self.on_trait_change(self._connect_completer, 'user_ns')
98 91
99 92 # Build dict of handlers for message types
100 93 for msg_type in ['execute_request', 'complete_request', 'apply_request',
101 94 'clear_request']:
102 95 self.shell_handlers[msg_type] = getattr(self, msg_type)
103 96
104 97 for msg_type in ['shutdown_request', 'abort_request']+self.shell_handlers.keys():
105 98 self.control_handlers[msg_type] = getattr(self, msg_type)
106 99
107 100 self._initial_exec_lines()
108 101
109 102 def _wrap_exception(self, method=None):
110 103 e_info = dict(engine_uuid=self.ident, engine_id=self.int_id, method=method)
111 104 content=wrap_exception(e_info)
112 105 return content
113 106
114 107 def _initial_exec_lines(self):
115 108 s = _Passer()
116 109 content = dict(silent=True, user_variable=[],user_expressions=[])
117 110 for line in self.exec_lines:
118 111 self.log.debug("executing initialization: %s"%line)
119 112 content.update({'code':line})
120 113 msg = self.session.msg('execute_request', content)
121 114 self.execute_request(s, [], msg)
122 115
123 116
124 117 #-------------------- control handlers -----------------------------
125 118 def abort_queues(self):
126 119 for stream in self.shell_streams:
127 120 if stream:
128 121 self.abort_queue(stream)
129 122
130 123 def abort_queue(self, stream):
131 124 while True:
132 125 try:
133 126 msg = self.session.recv(stream, zmq.NOBLOCK,content=True)
134 127 except zmq.ZMQError as e:
135 128 if e.errno == zmq.EAGAIN:
136 129 break
137 130 else:
138 131 return
139 132 else:
140 133 if msg is None:
141 134 return
142 135 else:
143 136 idents,msg = msg
144 137
145 138 # assert self.reply_socketly_socket.rcvmore(), "Unexpected missing message part."
146 139 # msg = self.reply_socket.recv_json()
147 140 self.log.info("Aborting:")
148 141 self.log.info(str(msg))
149 142 msg_type = msg['msg_type']
150 143 reply_type = msg_type.split('_')[0] + '_reply'
151 144 # reply_msg = self.session.msg(reply_type, {'status' : 'aborted'}, msg)
152 145 # self.reply_socket.send(ident,zmq.SNDMORE)
153 146 # self.reply_socket.send_json(reply_msg)
154 147 reply_msg = self.session.send(stream, reply_type,
155 148 content={'status' : 'aborted'}, parent=msg, ident=idents)[0]
156 149 self.log.debug(str(reply_msg))
157 150 # We need to wait a bit for requests to come in. This can probably
158 151 # be set shorter for true asynchronous clients.
159 152 time.sleep(0.05)
160 153
161 154 def abort_request(self, stream, ident, parent):
162 155 """abort a specifig msg by id"""
163 156 msg_ids = parent['content'].get('msg_ids', None)
164 157 if isinstance(msg_ids, basestring):
165 158 msg_ids = [msg_ids]
166 159 if not msg_ids:
167 160 self.abort_queues()
168 161 for mid in msg_ids:
169 162 self.aborted.add(str(mid))
170 163
171 164 content = dict(status='ok')
172 165 reply_msg = self.session.send(stream, 'abort_reply', content=content,
173 166 parent=parent, ident=ident)
174 167 self.log.debug(str(reply_msg))
175 168
176 169 def shutdown_request(self, stream, ident, parent):
177 170 """kill ourself. This should really be handled in an external process"""
178 171 try:
179 172 self.abort_queues()
180 173 except:
181 174 content = self._wrap_exception('shutdown')
182 175 else:
183 176 content = dict(parent['content'])
184 177 content['status'] = 'ok'
185 178 msg = self.session.send(stream, 'shutdown_reply',
186 179 content=content, parent=parent, ident=ident)
187 180 self.log.debug(str(msg))
188 181 dc = ioloop.DelayedCallback(lambda : sys.exit(0), 1000, self.loop)
189 182 dc.start()
190 183
191 184 def dispatch_control(self, msg):
192 185 idents,msg = self.session.feed_identities(msg, copy=False)
193 186 try:
194 187 msg = self.session.unpack_message(msg, content=True, copy=False)
195 188 except:
196 189 self.log.error("Invalid Message", exc_info=True)
197 190 return
198 191
199 192 header = msg['header']
200 193 msg_id = header['msg_id']
201 194
202 195 handler = self.control_handlers.get(msg['msg_type'], None)
203 196 if handler is None:
204 197 self.log.error("UNKNOWN CONTROL MESSAGE TYPE: %r"%msg['msg_type'])
205 198 else:
206 199 handler(self.control_stream, idents, msg)
207 200
208 201
209 202 #-------------------- queue helpers ------------------------------
210 203
211 204 def check_dependencies(self, dependencies):
212 205 if not dependencies:
213 206 return True
214 207 if len(dependencies) == 2 and dependencies[0] in 'any all'.split():
215 208 anyorall = dependencies[0]
216 209 dependencies = dependencies[1]
217 210 else:
218 211 anyorall = 'all'
219 212 results = self.client.get_results(dependencies,status_only=True)
220 213 if results['status'] != 'ok':
221 214 return False
222 215
223 216 if anyorall == 'any':
224 217 if not results['completed']:
225 218 return False
226 219 else:
227 220 if results['pending']:
228 221 return False
229 222
230 223 return True
231 224
232 225 def check_aborted(self, msg_id):
233 226 return msg_id in self.aborted
234 227
235 228 #-------------------- queue handlers -----------------------------
236 229
237 230 def clear_request(self, stream, idents, parent):
238 231 """Clear our namespace."""
239 232 self.user_ns = {}
240 233 msg = self.session.send(stream, 'clear_reply', ident=idents, parent=parent,
241 234 content = dict(status='ok'))
242 235 self._initial_exec_lines()
243 236
244 237 def execute_request(self, stream, ident, parent):
245 238 self.log.debug('execute request %s'%parent)
246 239 try:
247 240 code = parent[u'content'][u'code']
248 241 except:
249 242 self.log.error("Got bad msg: %s"%parent, exc_info=True)
250 243 return
251 244 self.session.send(self.iopub_stream, u'pyin', {u'code':code},parent=parent,
252 245 ident='%s.pyin'%self.prefix)
253 246 started = datetime.now().strftime(ISO8601)
254 247 try:
255 248 comp_code = self.compiler(code, '<zmq-kernel>')
256 249 # allow for not overriding displayhook
257 250 if hasattr(sys.displayhook, 'set_parent'):
258 251 sys.displayhook.set_parent(parent)
259 252 sys.stdout.set_parent(parent)
260 253 sys.stderr.set_parent(parent)
261 254 exec comp_code in self.user_ns, self.user_ns
262 255 except:
263 256 exc_content = self._wrap_exception('execute')
264 257 # exc_msg = self.session.msg(u'pyerr', exc_content, parent)
265 258 self.session.send(self.iopub_stream, u'pyerr', exc_content, parent=parent,
266 259 ident='%s.pyerr'%self.prefix)
267 260 reply_content = exc_content
268 261 else:
269 262 reply_content = {'status' : 'ok'}
270 263
271 264 reply_msg = self.session.send(stream, u'execute_reply', reply_content, parent=parent,
272 265 ident=ident, subheader = dict(started=started))
273 266 self.log.debug(str(reply_msg))
274 267 if reply_msg['content']['status'] == u'error':
275 268 self.abort_queues()
276 269
277 270 def complete_request(self, stream, ident, parent):
278 271 matches = {'matches' : self.complete(parent),
279 272 'status' : 'ok'}
280 273 completion_msg = self.session.send(stream, 'complete_reply',
281 274 matches, parent, ident)
282 275 # print >> sys.__stdout__, completion_msg
283 276
284 277 def complete(self, msg):
285 278 return self.completer.complete(msg.content.line, msg.content.text)
286 279
287 280 def apply_request(self, stream, ident, parent):
288 281 # flush previous reply, so this request won't block it
289 282 stream.flush(zmq.POLLOUT)
290 283
291 284 try:
292 285 content = parent[u'content']
293 286 bufs = parent[u'buffers']
294 287 msg_id = parent['header']['msg_id']
295 288 # bound = parent['header'].get('bound', False)
296 289 except:
297 290 self.log.error("Got bad msg: %s"%parent, exc_info=True)
298 291 return
299 292 # pyin_msg = self.session.msg(u'pyin',{u'code':code}, parent=parent)
300 293 # self.iopub_stream.send(pyin_msg)
301 294 # self.session.send(self.iopub_stream, u'pyin', {u'code':code},parent=parent)
302 295 sub = {'dependencies_met' : True, 'engine' : self.ident,
303 296 'started': datetime.now().strftime(ISO8601)}
304 297 try:
305 298 # allow for not overriding displayhook
306 299 if hasattr(sys.displayhook, 'set_parent'):
307 300 sys.displayhook.set_parent(parent)
308 301 sys.stdout.set_parent(parent)
309 302 sys.stderr.set_parent(parent)
310 303 # exec "f(*args,**kwargs)" in self.user_ns, self.user_ns
311 304 working = self.user_ns
312 305 # suffix =
313 306 prefix = "_"+str(msg_id).replace("-","")+"_"
314 307
315 308 f,args,kwargs = unpack_apply_message(bufs, working, copy=False)
316 309 # if bound:
317 310 # bound_ns = Namespace(working)
318 311 # args = [bound_ns]+list(args)
319 312
320 313 fname = getattr(f, '__name__', 'f')
321 314
322 315 fname = prefix+"f"
323 316 argname = prefix+"args"
324 317 kwargname = prefix+"kwargs"
325 318 resultname = prefix+"result"
326 319
327 320 ns = { fname : f, argname : args, kwargname : kwargs , resultname : None }
328 321 # print ns
329 322 working.update(ns)
330 323 code = "%s=%s(*%s,**%s)"%(resultname, fname, argname, kwargname)
331 324 try:
332 325 exec code in working,working
333 326 result = working.get(resultname)
334 327 finally:
335 328 for key in ns.iterkeys():
336 329 working.pop(key)
337 330 # if bound:
338 331 # working.update(bound_ns)
339 332
340 333 packed_result,buf = serialize_object(result)
341 334 result_buf = [packed_result]+buf
342 335 except:
343 336 exc_content = self._wrap_exception('apply')
344 337 # exc_msg = self.session.msg(u'pyerr', exc_content, parent)
345 338 self.session.send(self.iopub_stream, u'pyerr', exc_content, parent=parent,
346 339 ident='%s.pyerr'%self.prefix)
347 340 reply_content = exc_content
348 341 result_buf = []
349 342
350 343 if exc_content['ename'] == 'UnmetDependency':
351 344 sub['dependencies_met'] = False
352 345 else:
353 346 reply_content = {'status' : 'ok'}
354 347
355 348 # put 'ok'/'error' status in header, for scheduler introspection:
356 349 sub['status'] = reply_content['status']
357 350
358 351 reply_msg = self.session.send(stream, u'apply_reply', reply_content,
359 352 parent=parent, ident=ident,buffers=result_buf, subheader=sub)
360 353
361 354 # flush i/o
362 355 # should this be before reply_msg is sent, like in the single-kernel code,
363 356 # or should nothing get in the way of real results?
364 357 sys.stdout.flush()
365 358 sys.stderr.flush()
366 359
367 360 def dispatch_queue(self, stream, msg):
368 361 self.control_stream.flush()
369 362 idents,msg = self.session.feed_identities(msg, copy=False)
370 363 try:
371 364 msg = self.session.unpack_message(msg, content=True, copy=False)
372 365 except:
373 366 self.log.error("Invalid Message", exc_info=True)
374 367 return
375 368
376 369
377 370 header = msg['header']
378 371 msg_id = header['msg_id']
379 372 if self.check_aborted(msg_id):
380 373 self.aborted.remove(msg_id)
381 374 # is it safe to assume a msg_id will not be resubmitted?
382 375 reply_type = msg['msg_type'].split('_')[0] + '_reply'
383 376 reply_msg = self.session.send(stream, reply_type,
384 377 content={'status' : 'aborted'}, parent=msg, ident=idents)
385 378 return
386 379 handler = self.shell_handlers.get(msg['msg_type'], None)
387 380 if handler is None:
388 381 self.log.error("UNKNOWN MESSAGE TYPE: %r"%msg['msg_type'])
389 382 else:
390 383 handler(stream, idents, msg)
391 384
392 385 def start(self):
393 386 #### stream mode:
394 387 if self.control_stream:
395 388 self.control_stream.on_recv(self.dispatch_control, copy=False)
396 389 self.control_stream.on_err(printer)
397 390
398 391 def make_dispatcher(stream):
399 392 def dispatcher(msg):
400 393 return self.dispatch_queue(stream, msg)
401 394 return dispatcher
402 395
403 396 for s in self.shell_streams:
404 397 s.on_recv(make_dispatcher(s), copy=False)
405 398 s.on_err(printer)
406 399
407 400 if self.iopub_stream:
408 401 self.iopub_stream.on_err(printer)
409 402
410 403 #### while True mode:
411 404 # while True:
412 405 # idle = True
413 406 # try:
414 407 # msg = self.shell_stream.socket.recv_multipart(
415 408 # zmq.NOBLOCK, copy=False)
416 409 # except zmq.ZMQError, e:
417 410 # if e.errno != zmq.EAGAIN:
418 411 # raise e
419 412 # else:
420 413 # idle=False
421 414 # self.dispatch_queue(self.shell_stream, msg)
422 415 #
423 416 # if not self.task_stream.empty():
424 417 # idle=False
425 418 # msg = self.task_stream.recv_multipart()
426 419 # self.dispatch_queue(self.task_stream, msg)
427 420 # if idle:
428 421 # # don't busywait
429 422 # time.sleep(1e-3)
430 423
431 def make_kernel(int_id, identity, control_addr, shell_addrs, iopub_addr, hb_addrs,
432 client_addr=None, loop=None, context=None, key=None,
433 out_stream_factory=OutStream, display_hook_factory=DisplayHook):
434 """NO LONGER IN USE"""
435 # create loop, context, and session:
436 if loop is None:
437 loop = ioloop.IOLoop.instance()
438 if context is None:
439 context = zmq.Context()
440 c = context
441 session = StreamSession(key=key)
442 # print (session.key)
443 # print (control_addr, shell_addrs, iopub_addr, hb_addrs)
444
445 # create Control Stream
446 control_stream = zmqstream.ZMQStream(c.socket(zmq.PAIR), loop)
447 control_stream.setsockopt(zmq.IDENTITY, identity)
448 control_stream.connect(control_addr)
449
450 # create Shell Streams (MUX, Task, etc.):
451 shell_streams = []
452 for addr in shell_addrs:
453 stream = zmqstream.ZMQStream(c.socket(zmq.PAIR), loop)
454 stream.setsockopt(zmq.IDENTITY, identity)
455 stream.connect(addr)
456 shell_streams.append(stream)
457
458 # create iopub stream:
459 iopub_stream = zmqstream.ZMQStream(c.socket(zmq.PUB), loop)
460 iopub_stream.setsockopt(zmq.IDENTITY, identity)
461 iopub_stream.connect(iopub_addr)
462
463 # Redirect input streams and set a display hook.
464 if out_stream_factory:
465 sys.stdout = out_stream_factory(session, iopub_stream, u'stdout')
466 sys.stdout.topic = 'engine.%i.stdout'%int_id
467 sys.stderr = out_stream_factory(session, iopub_stream, u'stderr')
468 sys.stderr.topic = 'engine.%i.stderr'%int_id
469 if display_hook_factory:
470 sys.displayhook = display_hook_factory(session, iopub_stream)
471 sys.displayhook.topic = 'engine.%i.pyout'%int_id
472
473
474 # launch heartbeat
475 heart = heartmonitor.Heart(*map(str, hb_addrs), heart_id=identity)
476 heart.start()
477
478 # create (optional) Client
479 if client_addr:
480 client = Client(client_addr, username=identity)
481 else:
482 client = None
483
484 kernel = Kernel(id=int_id, session=session, control_stream=control_stream,
485 shell_streams=shell_streams, iopub_stream=iopub_stream,
486 client=client, loop=loop)
487 kernel.start()
488 return loop, c, kernel
489
@@ -1,152 +1,152 b''
1 1 """Base config factories."""
2 2
3 3 #-----------------------------------------------------------------------------
4 4 # Copyright (C) 2008-2009 The IPython Development Team
5 5 #
6 6 # Distributed under the terms of the BSD License. The full license is in
7 7 # the file COPYING, distributed as part of this software.
8 8 #-----------------------------------------------------------------------------
9 9
10 10 #-----------------------------------------------------------------------------
11 11 # Imports
12 12 #-----------------------------------------------------------------------------
13 13
14 14
15 15 import logging
16 16 import os
17 17 import uuid
18 18
19 19 from zmq.eventloop.ioloop import IOLoop
20 20
21 21 from IPython.config.configurable import Configurable
22 22 from IPython.utils.importstring import import_item
23 23 from IPython.utils.traitlets import Str,Int,Instance, CUnicode, CStr
24 24
25 25 import IPython.parallel.streamsession as ss
26 from IPython.parallel.entry_point import select_random_ports
26 from IPython.parallel.util import select_random_ports
27 27
28 28 #-----------------------------------------------------------------------------
29 29 # Classes
30 30 #-----------------------------------------------------------------------------
31 31 class LoggingFactory(Configurable):
32 32 """A most basic class, that has a `log` (type:`Logger`) attribute, set via a `logname` Trait."""
33 33 log = Instance('logging.Logger', ('ZMQ', logging.WARN))
34 34 logname = CUnicode('ZMQ')
35 35 def _logname_changed(self, name, old, new):
36 36 self.log = logging.getLogger(new)
37 37
38 38
39 39 class SessionFactory(LoggingFactory):
40 40 """The Base factory from which every factory in IPython.parallel inherits"""
41 41
42 42 packer = Str('',config=True)
43 43 unpacker = Str('',config=True)
44 44 ident = CStr('',config=True)
45 45 def _ident_default(self):
46 46 return str(uuid.uuid4())
47 47 username = CUnicode(os.environ.get('USER','username'),config=True)
48 48 exec_key = CUnicode('',config=True)
49 49 # not configurable:
50 50 context = Instance('zmq.Context', (), {})
51 51 session = Instance('IPython.parallel.streamsession.StreamSession')
52 52 loop = Instance('zmq.eventloop.ioloop.IOLoop', allow_none=False)
53 53 def _loop_default(self):
54 54 return IOLoop.instance()
55 55
56 56
57 57 def __init__(self, **kwargs):
58 58 super(SessionFactory, self).__init__(**kwargs)
59 59 exec_key = self.exec_key or None
60 60 # set the packers:
61 61 if not self.packer:
62 62 packer_f = unpacker_f = None
63 63 elif self.packer.lower() == 'json':
64 64 packer_f = ss.json_packer
65 65 unpacker_f = ss.json_unpacker
66 66 elif self.packer.lower() == 'pickle':
67 67 packer_f = ss.pickle_packer
68 68 unpacker_f = ss.pickle_unpacker
69 69 else:
70 70 packer_f = import_item(self.packer)
71 71 unpacker_f = import_item(self.unpacker)
72 72
73 73 # construct the session
74 74 self.session = ss.StreamSession(self.username, self.ident, packer=packer_f, unpacker=unpacker_f, key=exec_key)
75 75
76 76
77 77 class RegistrationFactory(SessionFactory):
78 78 """The Base Configurable for objects that involve registration."""
79 79
80 80 url = Str('', config=True) # url takes precedence over ip,regport,transport
81 81 transport = Str('tcp', config=True)
82 82 ip = Str('127.0.0.1', config=True)
83 83 regport = Instance(int, config=True)
84 84 def _regport_default(self):
85 85 # return 10101
86 86 return select_random_ports(1)[0]
87 87
88 88 def __init__(self, **kwargs):
89 89 super(RegistrationFactory, self).__init__(**kwargs)
90 90 self._propagate_url()
91 91 self._rebuild_url()
92 92 self.on_trait_change(self._propagate_url, 'url')
93 93 self.on_trait_change(self._rebuild_url, 'ip')
94 94 self.on_trait_change(self._rebuild_url, 'transport')
95 95 self.on_trait_change(self._rebuild_url, 'regport')
96 96
97 97 def _rebuild_url(self):
98 98 self.url = "%s://%s:%i"%(self.transport, self.ip, self.regport)
99 99
100 100 def _propagate_url(self):
101 101 """Ensure self.url contains full transport://interface:port"""
102 102 if self.url:
103 103 iface = self.url.split('://',1)
104 104 if len(iface) == 2:
105 105 self.transport,iface = iface
106 106 iface = iface.split(':')
107 107 self.ip = iface[0]
108 108 if iface[1]:
109 109 self.regport = int(iface[1])
110 110
111 111 #-----------------------------------------------------------------------------
112 112 # argparse argument extenders
113 113 #-----------------------------------------------------------------------------
114 114
115 115
116 116 def add_session_arguments(parser):
117 117 paa = parser.add_argument
118 118 paa('--ident',
119 119 type=str, dest='SessionFactory.ident',
120 120 help='set the ZMQ and session identity [default: random uuid]',
121 121 metavar='identity')
122 122 # paa('--execkey',
123 123 # type=str, dest='SessionFactory.exec_key',
124 124 # help='path to a file containing an execution key.',
125 125 # metavar='execkey')
126 126 paa('--packer',
127 127 type=str, dest='SessionFactory.packer',
128 128 help='method to serialize messages: {json,pickle} [default: json]',
129 129 metavar='packer')
130 130 paa('--unpacker',
131 131 type=str, dest='SessionFactory.unpacker',
132 132 help='inverse function of `packer`. Only necessary when using something other than json|pickle',
133 133 metavar='packer')
134 134
135 135 def add_registration_arguments(parser):
136 136 paa = parser.add_argument
137 137 paa('--ip',
138 138 type=str, dest='RegistrationFactory.ip',
139 139 help="The IP used for registration [default: localhost]",
140 140 metavar='ip')
141 141 paa('--transport',
142 142 type=str, dest='RegistrationFactory.transport',
143 143 help="The ZeroMQ transport used for registration [default: tcp]",
144 144 metavar='transport')
145 145 paa('--url',
146 146 type=str, dest='RegistrationFactory.url',
147 147 help='set transport,ip,regport in one go, e.g. tcp://127.0.0.1:10101',
148 148 metavar='url')
149 149 paa('--regport',
150 150 type=int, dest='RegistrationFactory.regport',
151 151 help="The port used for registration [default: 10101]",
152 152 metavar='ip')
@@ -1,18 +1,18 b''
1 1 #!/usr/bin/env python
2 2 # encoding: utf-8
3 3
4 4 #-----------------------------------------------------------------------------
5 5 # Copyright (C) 2008-2009 The IPython Development Team
6 6 #
7 7 # Distributed under the terms of the BSD License. The full license is in
8 8 # the file COPYING, distributed as part of this software.
9 9 #-----------------------------------------------------------------------------
10 10
11 11 #-----------------------------------------------------------------------------
12 12 # Imports
13 13 #-----------------------------------------------------------------------------
14 14
15 15
16 from IPython.parallel.ipclusterapp import launch_new_instance
16 from IPython.parallel.apps.ipclusterapp import launch_new_instance
17 17
18 18 launch_new_instance()
@@ -1,18 +1,18 b''
1 1 #!/usr/bin/env python
2 2 # encoding: utf-8
3 3
4 4 #-----------------------------------------------------------------------------
5 5 # Copyright (C) 2008-2009 The IPython Development Team
6 6 #
7 7 # Distributed under the terms of the BSD License. The full license is in
8 8 # the file COPYING, distributed as part of this software.
9 9 #-----------------------------------------------------------------------------
10 10
11 11 #-----------------------------------------------------------------------------
12 12 # Imports
13 13 #-----------------------------------------------------------------------------
14 14
15 15
16 from IPython.parallel.ipcontrollerapp import launch_new_instance
16 from IPython.parallel.apps.ipcontrollerapp import launch_new_instance
17 17
18 18 launch_new_instance()
@@ -1,20 +1,20 b''
1 1 #!/usr/bin/env python
2 2 # encoding: utf-8
3 3
4 4 #-----------------------------------------------------------------------------
5 5 # Copyright (C) 2008-2009 The IPython Development Team
6 6 #
7 7 # Distributed under the terms of the BSD License. The full license is in
8 8 # the file COPYING, distributed as part of this software.
9 9 #-----------------------------------------------------------------------------
10 10
11 11 #-----------------------------------------------------------------------------
12 12 # Imports
13 13 #-----------------------------------------------------------------------------
14 14
15 15
16 from IPython.parallel.ipengineapp import launch_new_instance
16 from IPython.parallel.apps.ipengineapp import launch_new_instance
17 17
18 18 launch_new_instance()
19 19
20 20
@@ -1,20 +1,20 b''
1 1 #!/usr/bin/env python
2 2 # encoding: utf-8
3 3
4 4 #-----------------------------------------------------------------------------
5 5 # Copyright (C) 2008-2009 The IPython Development Team
6 6 #
7 7 # Distributed under the terms of the BSD License. The full license is in
8 8 # the file COPYING, distributed as part of this software.
9 9 #-----------------------------------------------------------------------------
10 10
11 11 #-----------------------------------------------------------------------------
12 12 # Imports
13 13 #-----------------------------------------------------------------------------
14 14
15 15
16 from IPython.parallel.iploggerapp import launch_new_instance
16 from IPython.parallel.apps.iploggerapp import launch_new_instance
17 17
18 18 launch_new_instance()
19 19
20 20
@@ -1,69 +1,69 b''
1 1 """toplevel setup/teardown for parallel tests."""
2 2
3 3 #-------------------------------------------------------------------------------
4 4 # Copyright (C) 2011 The IPython Development Team
5 5 #
6 6 # Distributed under the terms of the BSD License. The full license is in
7 7 # the file COPYING, distributed as part of this software.
8 8 #-------------------------------------------------------------------------------
9 9
10 10 #-------------------------------------------------------------------------------
11 11 # Imports
12 12 #-------------------------------------------------------------------------------
13 13
14 14 import tempfile
15 15 import time
16 16 from subprocess import Popen, PIPE, STDOUT
17 17
18 from IPython.parallel import client
18 from IPython.parallel import Client
19 19
20 20 processes = []
21 21 blackhole = tempfile.TemporaryFile()
22 22
23 23 # nose setup/teardown
24 24
25 25 def setup():
26 26 cp = Popen('ipcontroller --profile iptest -r --log-level 10 --log-to-file'.split(), stdout=blackhole, stderr=STDOUT)
27 27 processes.append(cp)
28 28 time.sleep(.5)
29 29 add_engines(1)
30 c = client.Client(profile='iptest')
30 c = Client(profile='iptest')
31 31 while not c.ids:
32 32 time.sleep(.1)
33 33 c.spin()
34 34 c.close()
35 35
36 36 def add_engines(n=1, profile='iptest'):
37 rc = client.Client(profile=profile)
37 rc = Client(profile=profile)
38 38 base = len(rc)
39 39 eps = []
40 40 for i in range(n):
41 41 ep = Popen(['ipengine']+ ['--profile', profile, '--log-level', '10', '--log-to-file'], stdout=blackhole, stderr=STDOUT)
42 42 # ep.start()
43 43 processes.append(ep)
44 44 eps.append(ep)
45 45 while len(rc) < base+n:
46 46 time.sleep(.1)
47 47 rc.spin()
48 48 rc.close()
49 49 return eps
50 50
51 51 def teardown():
52 52 time.sleep(1)
53 53 while processes:
54 54 p = processes.pop()
55 55 if p.poll() is None:
56 56 try:
57 57 p.terminate()
58 58 except Exception, e:
59 59 print e
60 60 pass
61 61 if p.poll() is None:
62 62 time.sleep(.25)
63 63 if p.poll() is None:
64 64 try:
65 65 print 'killing'
66 66 p.kill()
67 67 except:
68 68 print "couldn't shutdown process: ", p
69 69
@@ -1,119 +1,115 b''
1 1 """base class for parallel client tests"""
2 2
3 3 #-------------------------------------------------------------------------------
4 4 # Copyright (C) 2011 The IPython Development Team
5 5 #
6 6 # Distributed under the terms of the BSD License. The full license is in
7 7 # the file COPYING, distributed as part of this software.
8 8 #-------------------------------------------------------------------------------
9 9
10 10 import sys
11 11 import tempfile
12 12 import time
13 from signal import SIGINT
14 from multiprocessing import Process
15 13
16 14 from nose import SkipTest
17 15
18 16 import zmq
19 17 from zmq.tests import BaseZMQTestCase
20 18
21 19 from IPython.external.decorator import decorator
22 20
23 21 from IPython.parallel import error
24 from IPython.parallel.client import Client
25 from IPython.parallel.ipcluster import launch_process
26 from IPython.parallel.entry_point import select_random_ports
22 from IPython.parallel import Client
27 23 from IPython.parallel.tests import processes,add_engines
28 24
29 25 # simple tasks for use in apply tests
30 26
31 27 def segfault():
32 28 """this will segfault"""
33 29 import ctypes
34 30 ctypes.memset(-1,0,1)
35 31
36 32 def wait(n):
37 33 """sleep for a time"""
38 34 import time
39 35 time.sleep(n)
40 36 return n
41 37
42 38 def raiser(eclass):
43 39 """raise an exception"""
44 40 raise eclass()
45 41
46 42 # test decorator for skipping tests when libraries are unavailable
47 43 def skip_without(*names):
48 44 """skip a test if some names are not importable"""
49 45 @decorator
50 46 def skip_without_names(f, *args, **kwargs):
51 47 """decorator to skip tests in the absence of numpy."""
52 48 for name in names:
53 49 try:
54 50 __import__(name)
55 51 except ImportError:
56 52 raise SkipTest
57 53 return f(*args, **kwargs)
58 54 return skip_without_names
59 55
60 56 class ClusterTestCase(BaseZMQTestCase):
61 57
62 58 def add_engines(self, n=1, block=True):
63 59 """add multiple engines to our cluster"""
64 60 self.engines.extend(add_engines(n))
65 61 if block:
66 62 self.wait_on_engines()
67 63
68 64 def wait_on_engines(self, timeout=5):
69 65 """wait for our engines to connect."""
70 66 n = len(self.engines)+self.base_engine_count
71 67 tic = time.time()
72 68 while time.time()-tic < timeout and len(self.client.ids) < n:
73 69 time.sleep(0.1)
74 70
75 71 assert not len(self.client.ids) < n, "waiting for engines timed out"
76 72
77 73 def connect_client(self):
78 74 """connect a client with my Context, and track its sockets for cleanup"""
79 75 c = Client(profile='iptest', context=self.context)
80 76 for name in filter(lambda n:n.endswith('socket'), dir(c)):
81 77 s = getattr(c, name)
82 78 s.setsockopt(zmq.LINGER, 0)
83 79 self.sockets.append(s)
84 80 return c
85 81
86 82 def assertRaisesRemote(self, etype, f, *args, **kwargs):
87 83 try:
88 84 try:
89 85 f(*args, **kwargs)
90 86 except error.CompositeError as e:
91 87 e.raise_exception()
92 88 except error.RemoteError as e:
93 89 self.assertEquals(etype.__name__, e.ename, "Should have raised %r, but raised %r"%(e.ename, etype.__name__))
94 90 else:
95 91 self.fail("should have raised a RemoteError")
96 92
97 93 def setUp(self):
98 94 BaseZMQTestCase.setUp(self)
99 95 self.client = self.connect_client()
100 96 self.base_engine_count=len(self.client.ids)
101 97 self.engines=[]
102 98
103 99 def tearDown(self):
104 100 # self.client.clear(block=True)
105 101 # close fds:
106 102 for e in filter(lambda e: e.poll() is not None, processes):
107 103 processes.remove(e)
108 104
109 105 # allow flushing of incoming messages to prevent crash on socket close
110 106 self.client.wait(timeout=2)
111 107 # time.sleep(2)
112 108 self.client.spin()
113 109 self.client.close()
114 110 BaseZMQTestCase.tearDown(self)
115 111 # this will be redundant when pyzmq merges PR #88
116 112 # self.context.term()
117 113 # print tempfile.TemporaryFile().fileno(),
118 114 # sys.stdout.flush()
119 115 No newline at end of file
@@ -1,147 +1,147 b''
1 1 """Tests for parallel client.py"""
2 2
3 3 #-------------------------------------------------------------------------------
4 4 # Copyright (C) 2011 The IPython Development Team
5 5 #
6 6 # Distributed under the terms of the BSD License. The full license is in
7 7 # the file COPYING, distributed as part of this software.
8 8 #-------------------------------------------------------------------------------
9 9
10 10 #-------------------------------------------------------------------------------
11 11 # Imports
12 12 #-------------------------------------------------------------------------------
13 13
14 14 import time
15 15 from tempfile import mktemp
16 16
17 17 import zmq
18 18
19 from IPython.parallel import client as clientmod
19 from IPython.parallel.client import client as clientmod
20 20 from IPython.parallel import error
21 from IPython.parallel.asyncresult import AsyncResult, AsyncHubResult
22 from IPython.parallel.view import LoadBalancedView, DirectView
21 from IPython.parallel import AsyncResult, AsyncHubResult
22 from IPython.parallel import LoadBalancedView, DirectView
23 23
24 24 from clienttest import ClusterTestCase, segfault, wait, add_engines
25 25
26 26 def setup():
27 27 add_engines(4)
28 28
29 29 class TestClient(ClusterTestCase):
30 30
31 31 def test_ids(self):
32 32 n = len(self.client.ids)
33 33 self.add_engines(3)
34 34 self.assertEquals(len(self.client.ids), n+3)
35 35
36 36 def test_view_indexing(self):
37 37 """test index access for views"""
38 38 self.add_engines(2)
39 39 targets = self.client._build_targets('all')[-1]
40 40 v = self.client[:]
41 41 self.assertEquals(v.targets, targets)
42 42 t = self.client.ids[2]
43 43 v = self.client[t]
44 44 self.assert_(isinstance(v, DirectView))
45 45 self.assertEquals(v.targets, t)
46 46 t = self.client.ids[2:4]
47 47 v = self.client[t]
48 48 self.assert_(isinstance(v, DirectView))
49 49 self.assertEquals(v.targets, t)
50 50 v = self.client[::2]
51 51 self.assert_(isinstance(v, DirectView))
52 52 self.assertEquals(v.targets, targets[::2])
53 53 v = self.client[1::3]
54 54 self.assert_(isinstance(v, DirectView))
55 55 self.assertEquals(v.targets, targets[1::3])
56 56 v = self.client[:-3]
57 57 self.assert_(isinstance(v, DirectView))
58 58 self.assertEquals(v.targets, targets[:-3])
59 59 v = self.client[-1]
60 60 self.assert_(isinstance(v, DirectView))
61 61 self.assertEquals(v.targets, targets[-1])
62 62 self.assertRaises(TypeError, lambda : self.client[None])
63 63
64 64 def test_lbview_targets(self):
65 65 """test load_balanced_view targets"""
66 66 v = self.client.load_balanced_view()
67 67 self.assertEquals(v.targets, None)
68 68 v = self.client.load_balanced_view(-1)
69 69 self.assertEquals(v.targets, [self.client.ids[-1]])
70 70 v = self.client.load_balanced_view('all')
71 71 self.assertEquals(v.targets, self.client.ids)
72 72
73 73 def test_targets(self):
74 74 """test various valid targets arguments"""
75 75 build = self.client._build_targets
76 76 ids = self.client.ids
77 77 idents,targets = build(None)
78 78 self.assertEquals(ids, targets)
79 79
80 80 def test_clear(self):
81 81 """test clear behavior"""
82 82 # self.add_engines(2)
83 83 v = self.client[:]
84 84 v.block=True
85 85 v.push(dict(a=5))
86 86 v.pull('a')
87 87 id0 = self.client.ids[-1]
88 88 self.client.clear(targets=id0)
89 89 self.client[:-1].pull('a')
90 90 self.assertRaisesRemote(NameError, self.client[id0].get, 'a')
91 91 self.client.clear(block=True)
92 92 for i in self.client.ids:
93 93 # print i
94 94 self.assertRaisesRemote(NameError, self.client[i].get, 'a')
95 95
96 96 def test_get_result(self):
97 97 """test getting results from the Hub."""
98 98 c = clientmod.Client(profile='iptest')
99 99 # self.add_engines(1)
100 100 t = c.ids[-1]
101 101 ar = c[t].apply_async(wait, 1)
102 102 # give the monitor time to notice the message
103 103 time.sleep(.25)
104 104 ahr = self.client.get_result(ar.msg_ids)
105 105 self.assertTrue(isinstance(ahr, AsyncHubResult))
106 106 self.assertEquals(ahr.get(), ar.get())
107 107 ar2 = self.client.get_result(ar.msg_ids)
108 108 self.assertFalse(isinstance(ar2, AsyncHubResult))
109 109 c.close()
110 110
111 111 def test_ids_list(self):
112 112 """test client.ids"""
113 113 # self.add_engines(2)
114 114 ids = self.client.ids
115 115 self.assertEquals(ids, self.client._ids)
116 116 self.assertFalse(ids is self.client._ids)
117 117 ids.remove(ids[-1])
118 118 self.assertNotEquals(ids, self.client._ids)
119 119
120 120 def test_queue_status(self):
121 121 # self.addEngine(4)
122 122 ids = self.client.ids
123 123 id0 = ids[0]
124 124 qs = self.client.queue_status(targets=id0)
125 125 self.assertTrue(isinstance(qs, dict))
126 126 self.assertEquals(sorted(qs.keys()), ['completed', 'queue', 'tasks'])
127 127 allqs = self.client.queue_status()
128 128 self.assertTrue(isinstance(allqs, dict))
129 129 self.assertEquals(sorted(allqs.keys()), self.client.ids)
130 130 for eid,qs in allqs.items():
131 131 self.assertTrue(isinstance(qs, dict))
132 132 self.assertEquals(sorted(qs.keys()), ['completed', 'queue', 'tasks'])
133 133
134 134 def test_shutdown(self):
135 135 # self.addEngine(4)
136 136 ids = self.client.ids
137 137 id0 = ids[0]
138 138 self.client.shutdown(id0, block=True)
139 139 while id0 in self.client.ids:
140 140 time.sleep(0.1)
141 141 self.client.spin()
142 142
143 143 self.assertRaises(IndexError, lambda : self.client[id0])
144 144
145 145 def test_result_status(self):
146 146 pass
147 147 # to be written
@@ -1,101 +1,101 b''
1 1 """Tests for dependency.py"""
2 2
3 3 __docformat__ = "restructuredtext en"
4 4
5 5 #-------------------------------------------------------------------------------
6 6 # Copyright (C) 2011 The IPython Development Team
7 7 #
8 8 # Distributed under the terms of the BSD License. The full license is in
9 9 # the file COPYING, distributed as part of this software.
10 10 #-------------------------------------------------------------------------------
11 11
12 12 #-------------------------------------------------------------------------------
13 13 # Imports
14 14 #-------------------------------------------------------------------------------
15 15
16 16 # import
17 17 import os
18 18
19 19 from IPython.utils.pickleutil import can, uncan
20 20
21 from IPython.parallel import dependency as dmod
21 import IPython.parallel as pmod
22 22 from IPython.parallel.util import interactive
23 23
24 24 from IPython.parallel.tests import add_engines
25 25 from .clienttest import ClusterTestCase
26 26
27 27 def setup():
28 28 add_engines(1)
29 29
30 @dmod.require('time')
30 @pmod.require('time')
31 31 def wait(n):
32 32 time.sleep(n)
33 33 return n
34 34
35 35 mixed = map(str, range(10))
36 36 completed = map(str, range(0,10,2))
37 37 failed = map(str, range(1,10,2))
38 38
39 39 class DependencyTest(ClusterTestCase):
40 40
41 41 def setUp(self):
42 42 ClusterTestCase.setUp(self)
43 43 self.user_ns = {'__builtins__' : __builtins__}
44 44 self.view = self.client.load_balanced_view()
45 45 self.dview = self.client[-1]
46 46 self.succeeded = set(map(str, range(0,25,2)))
47 47 self.failed = set(map(str, range(1,25,2)))
48 48
49 49 def assertMet(self, dep):
50 50 self.assertTrue(dep.check(self.succeeded, self.failed), "Dependency should be met")
51 51
52 52 def assertUnmet(self, dep):
53 53 self.assertFalse(dep.check(self.succeeded, self.failed), "Dependency should not be met")
54 54
55 55 def assertUnreachable(self, dep):
56 56 self.assertTrue(dep.unreachable(self.succeeded, self.failed), "Dependency should be unreachable")
57 57
58 58 def assertReachable(self, dep):
59 59 self.assertFalse(dep.unreachable(self.succeeded, self.failed), "Dependency should be reachable")
60 60
61 61 def cancan(self, f):
62 62 """decorator to pass through canning into self.user_ns"""
63 63 return uncan(can(f), self.user_ns)
64 64
65 65 def test_require_imports(self):
66 66 """test that @require imports names"""
67 67 @self.cancan
68 @dmod.require('urllib')
68 @pmod.require('urllib')
69 69 @interactive
70 70 def encode(dikt):
71 71 return urllib.urlencode(dikt)
72 72 # must pass through canning to properly connect namespaces
73 73 self.assertEquals(encode(dict(a=5)), 'a=5')
74 74
75 75 def test_success_only(self):
76 dep = dmod.Dependency(mixed, success=True, failure=False)
76 dep = pmod.Dependency(mixed, success=True, failure=False)
77 77 self.assertUnmet(dep)
78 78 self.assertUnreachable(dep)
79 79 dep.all=False
80 80 self.assertMet(dep)
81 81 self.assertReachable(dep)
82 dep = dmod.Dependency(completed, success=True, failure=False)
82 dep = pmod.Dependency(completed, success=True, failure=False)
83 83 self.assertMet(dep)
84 84 self.assertReachable(dep)
85 85 dep.all=False
86 86 self.assertMet(dep)
87 87 self.assertReachable(dep)
88 88
89 89 def test_failure_only(self):
90 dep = dmod.Dependency(mixed, success=False, failure=True)
90 dep = pmod.Dependency(mixed, success=False, failure=True)
91 91 self.assertUnmet(dep)
92 92 self.assertUnreachable(dep)
93 93 dep.all=False
94 94 self.assertMet(dep)
95 95 self.assertReachable(dep)
96 dep = dmod.Dependency(completed, success=False, failure=True)
96 dep = pmod.Dependency(completed, success=False, failure=True)
97 97 self.assertUnmet(dep)
98 98 self.assertUnreachable(dep)
99 99 dep.all=False
100 100 self.assertUnmet(dep)
101 101 self.assertUnreachable(dep)
@@ -1,301 +1,301 b''
1 1 """test View objects"""
2 2 #-------------------------------------------------------------------------------
3 3 # Copyright (C) 2011 The IPython Development Team
4 4 #
5 5 # Distributed under the terms of the BSD License. The full license is in
6 6 # the file COPYING, distributed as part of this software.
7 7 #-------------------------------------------------------------------------------
8 8
9 9 #-------------------------------------------------------------------------------
10 10 # Imports
11 11 #-------------------------------------------------------------------------------
12 12
13 13 import time
14 14 from tempfile import mktemp
15 15
16 16 import zmq
17 17
18 18 from IPython import parallel as pmod
19 19 from IPython.parallel import error
20 from IPython.parallel.asyncresult import AsyncResult, AsyncHubResult, AsyncMapResult
21 from IPython.parallel.view import LoadBalancedView, DirectView
20 from IPython.parallel import AsyncResult, AsyncHubResult, AsyncMapResult
21 from IPython.parallel import LoadBalancedView, DirectView
22 22 from IPython.parallel.util import interactive
23 23
24 24 from IPython.parallel.tests import add_engines
25 25
26 26 from .clienttest import ClusterTestCase, segfault, wait, skip_without
27 27
28 28 def setup():
29 29 add_engines(3)
30 30
31 31 class TestView(ClusterTestCase):
32 32
33 33 def test_segfault_task(self):
34 34 """test graceful handling of engine death (balanced)"""
35 35 # self.add_engines(1)
36 36 ar = self.client[-1].apply_async(segfault)
37 37 self.assertRaisesRemote(error.EngineError, ar.get)
38 38 eid = ar.engine_id
39 39 while eid in self.client.ids:
40 40 time.sleep(.01)
41 41 self.client.spin()
42 42
43 43 def test_segfault_mux(self):
44 44 """test graceful handling of engine death (direct)"""
45 45 # self.add_engines(1)
46 46 eid = self.client.ids[-1]
47 47 ar = self.client[eid].apply_async(segfault)
48 48 self.assertRaisesRemote(error.EngineError, ar.get)
49 49 eid = ar.engine_id
50 50 while eid in self.client.ids:
51 51 time.sleep(.01)
52 52 self.client.spin()
53 53
54 54 def test_push_pull(self):
55 55 """test pushing and pulling"""
56 56 data = dict(a=10, b=1.05, c=range(10), d={'e':(1,2),'f':'hi'})
57 57 t = self.client.ids[-1]
58 58 v = self.client[t]
59 59 push = v.push
60 60 pull = v.pull
61 61 v.block=True
62 62 nengines = len(self.client)
63 63 push({'data':data})
64 64 d = pull('data')
65 65 self.assertEquals(d, data)
66 66 self.client[:].push({'data':data})
67 67 d = self.client[:].pull('data', block=True)
68 68 self.assertEquals(d, nengines*[data])
69 69 ar = push({'data':data}, block=False)
70 70 self.assertTrue(isinstance(ar, AsyncResult))
71 71 r = ar.get()
72 72 ar = self.client[:].pull('data', block=False)
73 73 self.assertTrue(isinstance(ar, AsyncResult))
74 74 r = ar.get()
75 75 self.assertEquals(r, nengines*[data])
76 76 self.client[:].push(dict(a=10,b=20))
77 77 r = self.client[:].pull(('a','b'))
78 78 self.assertEquals(r, nengines*[[10,20]])
79 79
80 80 def test_push_pull_function(self):
81 81 "test pushing and pulling functions"
82 82 def testf(x):
83 83 return 2.0*x
84 84
85 85 t = self.client.ids[-1]
86 86 self.client[t].block=True
87 87 push = self.client[t].push
88 88 pull = self.client[t].pull
89 89 execute = self.client[t].execute
90 90 push({'testf':testf})
91 91 r = pull('testf')
92 92 self.assertEqual(r(1.0), testf(1.0))
93 93 execute('r = testf(10)')
94 94 r = pull('r')
95 95 self.assertEquals(r, testf(10))
96 96 ar = self.client[:].push({'testf':testf}, block=False)
97 97 ar.get()
98 98 ar = self.client[:].pull('testf', block=False)
99 99 rlist = ar.get()
100 100 for r in rlist:
101 101 self.assertEqual(r(1.0), testf(1.0))
102 102 execute("def g(x): return x*x")
103 103 r = pull(('testf','g'))
104 104 self.assertEquals((r[0](10),r[1](10)), (testf(10), 100))
105 105
106 106 def test_push_function_globals(self):
107 107 """test that pushed functions have access to globals"""
108 108 @interactive
109 109 def geta():
110 110 return a
111 111 # self.add_engines(1)
112 112 v = self.client[-1]
113 113 v.block=True
114 114 v['f'] = geta
115 115 self.assertRaisesRemote(NameError, v.execute, 'b=f()')
116 116 v.execute('a=5')
117 117 v.execute('b=f()')
118 118 self.assertEquals(v['b'], 5)
119 119
120 120 def test_push_function_defaults(self):
121 121 """test that pushed functions preserve default args"""
122 122 def echo(a=10):
123 123 return a
124 124 v = self.client[-1]
125 125 v.block=True
126 126 v['f'] = echo
127 127 v.execute('b=f()')
128 128 self.assertEquals(v['b'], 10)
129 129
130 130 def test_get_result(self):
131 131 """test getting results from the Hub."""
132 132 c = pmod.Client(profile='iptest')
133 133 # self.add_engines(1)
134 134 t = c.ids[-1]
135 135 v = c[t]
136 136 v2 = self.client[t]
137 137 ar = v.apply_async(wait, 1)
138 138 # give the monitor time to notice the message
139 139 time.sleep(.25)
140 140 ahr = v2.get_result(ar.msg_ids)
141 141 self.assertTrue(isinstance(ahr, AsyncHubResult))
142 142 self.assertEquals(ahr.get(), ar.get())
143 143 ar2 = v2.get_result(ar.msg_ids)
144 144 self.assertFalse(isinstance(ar2, AsyncHubResult))
145 145 c.spin()
146 146 c.close()
147 147
148 148 def test_run_newline(self):
149 149 """test that run appends newline to files"""
150 150 tmpfile = mktemp()
151 151 with open(tmpfile, 'w') as f:
152 152 f.write("""def g():
153 153 return 5
154 154 """)
155 155 v = self.client[-1]
156 156 v.run(tmpfile, block=True)
157 157 self.assertEquals(v.apply_sync(lambda f: f(), pmod.Reference('g')), 5)
158 158
159 159 def test_apply_tracked(self):
160 160 """test tracking for apply"""
161 161 # self.add_engines(1)
162 162 t = self.client.ids[-1]
163 163 v = self.client[t]
164 164 v.block=False
165 165 def echo(n=1024*1024, **kwargs):
166 166 with v.temp_flags(**kwargs):
167 167 return v.apply(lambda x: x, 'x'*n)
168 168 ar = echo(1, track=False)
169 169 self.assertTrue(isinstance(ar._tracker, zmq.MessageTracker))
170 170 self.assertTrue(ar.sent)
171 171 ar = echo(track=True)
172 172 self.assertTrue(isinstance(ar._tracker, zmq.MessageTracker))
173 173 self.assertEquals(ar.sent, ar._tracker.done)
174 174 ar._tracker.wait()
175 175 self.assertTrue(ar.sent)
176 176
177 177 def test_push_tracked(self):
178 178 t = self.client.ids[-1]
179 179 ns = dict(x='x'*1024*1024)
180 180 v = self.client[t]
181 181 ar = v.push(ns, block=False, track=False)
182 182 self.assertTrue(isinstance(ar._tracker, zmq.MessageTracker))
183 183 self.assertTrue(ar.sent)
184 184
185 185 ar = v.push(ns, block=False, track=True)
186 186 self.assertTrue(isinstance(ar._tracker, zmq.MessageTracker))
187 187 self.assertEquals(ar.sent, ar._tracker.done)
188 188 ar._tracker.wait()
189 189 self.assertTrue(ar.sent)
190 190 ar.get()
191 191
192 192 def test_scatter_tracked(self):
193 193 t = self.client.ids
194 194 x='x'*1024*1024
195 195 ar = self.client[t].scatter('x', x, block=False, track=False)
196 196 self.assertTrue(isinstance(ar._tracker, zmq.MessageTracker))
197 197 self.assertTrue(ar.sent)
198 198
199 199 ar = self.client[t].scatter('x', x, block=False, track=True)
200 200 self.assertTrue(isinstance(ar._tracker, zmq.MessageTracker))
201 201 self.assertEquals(ar.sent, ar._tracker.done)
202 202 ar._tracker.wait()
203 203 self.assertTrue(ar.sent)
204 204 ar.get()
205 205
206 206 def test_remote_reference(self):
207 207 v = self.client[-1]
208 208 v['a'] = 123
209 209 ra = pmod.Reference('a')
210 210 b = v.apply_sync(lambda x: x, ra)
211 211 self.assertEquals(b, 123)
212 212
213 213
214 214 def test_scatter_gather(self):
215 215 view = self.client[:]
216 216 seq1 = range(16)
217 217 view.scatter('a', seq1)
218 218 seq2 = view.gather('a', block=True)
219 219 self.assertEquals(seq2, seq1)
220 220 self.assertRaisesRemote(NameError, view.gather, 'asdf', block=True)
221 221
222 222 @skip_without('numpy')
223 223 def test_scatter_gather_numpy(self):
224 224 import numpy
225 225 from numpy.testing.utils import assert_array_equal, assert_array_almost_equal
226 226 view = self.client[:]
227 227 a = numpy.arange(64)
228 228 view.scatter('a', a)
229 229 b = view.gather('a', block=True)
230 230 assert_array_equal(b, a)
231 231
232 232 def test_map(self):
233 233 view = self.client[:]
234 234 def f(x):
235 235 return x**2
236 236 data = range(16)
237 237 r = view.map_sync(f, data)
238 238 self.assertEquals(r, map(f, data))
239 239
240 240 def test_scatterGatherNonblocking(self):
241 241 data = range(16)
242 242 view = self.client[:]
243 243 view.scatter('a', data, block=False)
244 244 ar = view.gather('a', block=False)
245 245 self.assertEquals(ar.get(), data)
246 246
247 247 @skip_without('numpy')
248 248 def test_scatter_gather_numpy_nonblocking(self):
249 249 import numpy
250 250 from numpy.testing.utils import assert_array_equal, assert_array_almost_equal
251 251 a = numpy.arange(64)
252 252 view = self.client[:]
253 253 ar = view.scatter('a', a, block=False)
254 254 self.assertTrue(isinstance(ar, AsyncResult))
255 255 amr = view.gather('a', block=False)
256 256 self.assertTrue(isinstance(amr, AsyncMapResult))
257 257 assert_array_equal(amr.get(), a)
258 258
259 259 def test_execute(self):
260 260 view = self.client[:]
261 261 # self.client.debug=True
262 262 execute = view.execute
263 263 ar = execute('c=30', block=False)
264 264 self.assertTrue(isinstance(ar, AsyncResult))
265 265 ar = execute('d=[0,1,2]', block=False)
266 266 self.client.wait(ar, 1)
267 267 self.assertEquals(len(ar.get()), len(self.client))
268 268 for c in view['c']:
269 269 self.assertEquals(c, 30)
270 270
271 271 def test_abort(self):
272 272 view = self.client[-1]
273 273 ar = view.execute('import time; time.sleep(0.25)', block=False)
274 274 ar2 = view.apply_async(lambda : 2)
275 275 ar3 = view.apply_async(lambda : 3)
276 276 view.abort(ar2)
277 277 view.abort(ar3.msg_ids)
278 278 self.assertRaises(error.TaskAborted, ar2.get)
279 279 self.assertRaises(error.TaskAborted, ar3.get)
280 280
281 281 def test_temp_flags(self):
282 282 view = self.client[-1]
283 283 view.block=True
284 284 with view.temp_flags(block=False):
285 285 self.assertFalse(view.block)
286 286 self.assertTrue(view.block)
287 287
288 288 def test_importer(self):
289 289 view = self.client[-1]
290 290 view.clear(block=True)
291 291 with view.importer:
292 292 import re
293 293
294 294 @interactive
295 295 def findall(pat, s):
296 296 # this globals() step isn't necessary in real code
297 297 # only to prevent a closure in the test
298 298 return globals()['re'].findall(pat, s)
299 299
300 300 self.assertEquals(view.apply_sync(findall, '\w+', 'hello world'), 'hello world'.split())
301 301
@@ -1,354 +1,462 b''
1 1 """some generic utilities for dealing with classes, urls, and serialization"""
2 2 #-----------------------------------------------------------------------------
3 3 # Copyright (C) 2010-2011 The IPython Development Team
4 4 #
5 5 # Distributed under the terms of the BSD License. The full license is in
6 6 # the file COPYING, distributed as part of this software.
7 7 #-----------------------------------------------------------------------------
8 8
9 9 #-----------------------------------------------------------------------------
10 10 # Imports
11 11 #-----------------------------------------------------------------------------
12 12
13 # Standard library imports.
14 import logging
15 import os
13 16 import re
17 import stat
14 18 import socket
19 import sys
20 from signal import signal, SIGINT, SIGABRT, SIGTERM
21 try:
22 from signal import SIGKILL
23 except ImportError:
24 SIGKILL=None
15 25
16 26 try:
17 27 import cPickle
18 28 pickle = cPickle
19 29 except:
20 30 cPickle = None
21 31 import pickle
22 32
33 # System library imports
34 import zmq
35 from zmq.log import handlers
23 36
37 # IPython imports
24 38 from IPython.utils.pickleutil import can, uncan, canSequence, uncanSequence
25 39 from IPython.utils.newserialized import serialize, unserialize
40 from IPython.zmq.log import EnginePUBHandler
26 41
42 # globals
27 43 ISO8601="%Y-%m-%dT%H:%M:%S.%f"
28 44
29 45 #-----------------------------------------------------------------------------
30 46 # Classes
31 47 #-----------------------------------------------------------------------------
32 48
33 49 class Namespace(dict):
34 50 """Subclass of dict for attribute access to keys."""
35 51
36 52 def __getattr__(self, key):
37 53 """getattr aliased to getitem"""
38 54 if key in self.iterkeys():
39 55 return self[key]
40 56 else:
41 57 raise NameError(key)
42 58
43 59 def __setattr__(self, key, value):
44 60 """setattr aliased to setitem, with strict"""
45 61 if hasattr(dict, key):
46 62 raise KeyError("Cannot override dict keys %r"%key)
47 63 self[key] = value
48 64
49 65
50 66 class ReverseDict(dict):
51 67 """simple double-keyed subset of dict methods."""
52 68
53 69 def __init__(self, *args, **kwargs):
54 70 dict.__init__(self, *args, **kwargs)
55 71 self._reverse = dict()
56 72 for key, value in self.iteritems():
57 73 self._reverse[value] = key
58 74
59 75 def __getitem__(self, key):
60 76 try:
61 77 return dict.__getitem__(self, key)
62 78 except KeyError:
63 79 return self._reverse[key]
64 80
65 81 def __setitem__(self, key, value):
66 82 if key in self._reverse:
67 83 raise KeyError("Can't have key %r on both sides!"%key)
68 84 dict.__setitem__(self, key, value)
69 85 self._reverse[value] = key
70 86
71 87 def pop(self, key):
72 88 value = dict.pop(self, key)
73 89 self._reverse.pop(value)
74 90 return value
75 91
76 92 def get(self, key, default=None):
77 93 try:
78 94 return self[key]
79 95 except KeyError:
80 96 return default
81 97
82 98 #-----------------------------------------------------------------------------
83 99 # Functions
84 100 #-----------------------------------------------------------------------------
85 101
86 102 def validate_url(url):
87 103 """validate a url for zeromq"""
88 104 if not isinstance(url, basestring):
89 105 raise TypeError("url must be a string, not %r"%type(url))
90 106 url = url.lower()
91 107
92 108 proto_addr = url.split('://')
93 109 assert len(proto_addr) == 2, 'Invalid url: %r'%url
94 110 proto, addr = proto_addr
95 111 assert proto in ['tcp','pgm','epgm','ipc','inproc'], "Invalid protocol: %r"%proto
96 112
97 113 # domain pattern adapted from http://www.regexlib.com/REDetails.aspx?regexp_id=391
98 114 # author: Remi Sabourin
99 115 pat = re.compile(r'^([\w\d]([\w\d\-]{0,61}[\w\d])?\.)*[\w\d]([\w\d\-]{0,61}[\w\d])?$')
100 116
101 117 if proto == 'tcp':
102 118 lis = addr.split(':')
103 119 assert len(lis) == 2, 'Invalid url: %r'%url
104 120 addr,s_port = lis
105 121 try:
106 122 port = int(s_port)
107 123 except ValueError:
108 124 raise AssertionError("Invalid port %r in url: %r"%(port, url))
109 125
110 126 assert addr == '*' or pat.match(addr) is not None, 'Invalid url: %r'%url
111 127
112 128 else:
113 129 # only validate tcp urls currently
114 130 pass
115 131
116 132 return True
117 133
118 134
119 135 def validate_url_container(container):
120 136 """validate a potentially nested collection of urls."""
121 137 if isinstance(container, basestring):
122 138 url = container
123 139 return validate_url(url)
124 140 elif isinstance(container, dict):
125 141 container = container.itervalues()
126 142
127 143 for element in container:
128 144 validate_url_container(element)
129 145
130 146
131 147 def split_url(url):
132 148 """split a zmq url (tcp://ip:port) into ('tcp','ip','port')."""
133 149 proto_addr = url.split('://')
134 150 assert len(proto_addr) == 2, 'Invalid url: %r'%url
135 151 proto, addr = proto_addr
136 152 lis = addr.split(':')
137 153 assert len(lis) == 2, 'Invalid url: %r'%url
138 154 addr,s_port = lis
139 155 return proto,addr,s_port
140 156
141 157 def disambiguate_ip_address(ip, location=None):
142 158 """turn multi-ip interfaces '0.0.0.0' and '*' into connectable
143 159 ones, based on the location (default interpretation of location is localhost)."""
144 160 if ip in ('0.0.0.0', '*'):
145 161 external_ips = socket.gethostbyname_ex(socket.gethostname())[2]
146 162 if location is None or location in external_ips:
147 163 ip='127.0.0.1'
148 164 elif location:
149 165 return location
150 166 return ip
151 167
152 168 def disambiguate_url(url, location=None):
153 169 """turn multi-ip interfaces '0.0.0.0' and '*' into connectable
154 170 ones, based on the location (default interpretation is localhost).
155 171
156 172 This is for zeromq urls, such as tcp://*:10101."""
157 173 try:
158 174 proto,ip,port = split_url(url)
159 175 except AssertionError:
160 176 # probably not tcp url; could be ipc, etc.
161 177 return url
162 178
163 179 ip = disambiguate_ip_address(ip,location)
164 180
165 181 return "%s://%s:%s"%(proto,ip,port)
166 182
167 183
168 184 def rekey(dikt):
169 185 """Rekey a dict that has been forced to use str keys where there should be
170 186 ints by json. This belongs in the jsonutil added by fperez."""
171 187 for k in dikt.iterkeys():
172 188 if isinstance(k, str):
173 189 ik=fk=None
174 190 try:
175 191 ik = int(k)
176 192 except ValueError:
177 193 try:
178 194 fk = float(k)
179 195 except ValueError:
180 196 continue
181 197 if ik is not None:
182 198 nk = ik
183 199 else:
184 200 nk = fk
185 201 if nk in dikt:
186 202 raise KeyError("already have key %r"%nk)
187 203 dikt[nk] = dikt.pop(k)
188 204 return dikt
189 205
190 206 def serialize_object(obj, threshold=64e-6):
191 207 """Serialize an object into a list of sendable buffers.
192 208
193 209 Parameters
194 210 ----------
195 211
196 212 obj : object
197 213 The object to be serialized
198 214 threshold : float
199 215 The threshold for not double-pickling the content.
200 216
201 217
202 218 Returns
203 219 -------
204 220 ('pmd', [bufs]) :
205 221 where pmd is the pickled metadata wrapper,
206 222 bufs is a list of data buffers
207 223 """
208 224 databuffers = []
209 225 if isinstance(obj, (list, tuple)):
210 226 clist = canSequence(obj)
211 227 slist = map(serialize, clist)
212 228 for s in slist:
213 229 if s.typeDescriptor in ('buffer', 'ndarray') or s.getDataSize() > threshold:
214 230 databuffers.append(s.getData())
215 231 s.data = None
216 232 return pickle.dumps(slist,-1), databuffers
217 233 elif isinstance(obj, dict):
218 234 sobj = {}
219 235 for k in sorted(obj.iterkeys()):
220 236 s = serialize(can(obj[k]))
221 237 if s.typeDescriptor in ('buffer', 'ndarray') or s.getDataSize() > threshold:
222 238 databuffers.append(s.getData())
223 239 s.data = None
224 240 sobj[k] = s
225 241 return pickle.dumps(sobj,-1),databuffers
226 242 else:
227 243 s = serialize(can(obj))
228 244 if s.typeDescriptor in ('buffer', 'ndarray') or s.getDataSize() > threshold:
229 245 databuffers.append(s.getData())
230 246 s.data = None
231 247 return pickle.dumps(s,-1),databuffers
232 248
233 249
234 250 def unserialize_object(bufs):
235 251 """reconstruct an object serialized by serialize_object from data buffers."""
236 252 bufs = list(bufs)
237 253 sobj = pickle.loads(bufs.pop(0))
238 254 if isinstance(sobj, (list, tuple)):
239 255 for s in sobj:
240 256 if s.data is None:
241 257 s.data = bufs.pop(0)
242 258 return uncanSequence(map(unserialize, sobj)), bufs
243 259 elif isinstance(sobj, dict):
244 260 newobj = {}
245 261 for k in sorted(sobj.iterkeys()):
246 262 s = sobj[k]
247 263 if s.data is None:
248 264 s.data = bufs.pop(0)
249 265 newobj[k] = uncan(unserialize(s))
250 266 return newobj, bufs
251 267 else:
252 268 if sobj.data is None:
253 269 sobj.data = bufs.pop(0)
254 270 return uncan(unserialize(sobj)), bufs
255 271
256 272 def pack_apply_message(f, args, kwargs, threshold=64e-6):
257 273 """pack up a function, args, and kwargs to be sent over the wire
258 274 as a series of buffers. Any object whose data is larger than `threshold`
259 275 will not have their data copied (currently only numpy arrays support zero-copy)"""
260 276 msg = [pickle.dumps(can(f),-1)]
261 277 databuffers = [] # for large objects
262 278 sargs, bufs = serialize_object(args,threshold)
263 279 msg.append(sargs)
264 280 databuffers.extend(bufs)
265 281 skwargs, bufs = serialize_object(kwargs,threshold)
266 282 msg.append(skwargs)
267 283 databuffers.extend(bufs)
268 284 msg.extend(databuffers)
269 285 return msg
270 286
271 287 def unpack_apply_message(bufs, g=None, copy=True):
272 288 """unpack f,args,kwargs from buffers packed by pack_apply_message()
273 289 Returns: original f,args,kwargs"""
274 290 bufs = list(bufs) # allow us to pop
275 291 assert len(bufs) >= 3, "not enough buffers!"
276 292 if not copy:
277 293 for i in range(3):
278 294 bufs[i] = bufs[i].bytes
279 295 cf = pickle.loads(bufs.pop(0))
280 296 sargs = list(pickle.loads(bufs.pop(0)))
281 297 skwargs = dict(pickle.loads(bufs.pop(0)))
282 298 # print sargs, skwargs
283 299 f = uncan(cf, g)
284 300 for sa in sargs:
285 301 if sa.data is None:
286 302 m = bufs.pop(0)
287 303 if sa.getTypeDescriptor() in ('buffer', 'ndarray'):
288 304 if copy:
289 305 sa.data = buffer(m)
290 306 else:
291 307 sa.data = m.buffer
292 308 else:
293 309 if copy:
294 310 sa.data = m
295 311 else:
296 312 sa.data = m.bytes
297 313
298 314 args = uncanSequence(map(unserialize, sargs), g)
299 315 kwargs = {}
300 316 for k in sorted(skwargs.iterkeys()):
301 317 sa = skwargs[k]
302 318 if sa.data is None:
303 319 m = bufs.pop(0)
304 320 if sa.getTypeDescriptor() in ('buffer', 'ndarray'):
305 321 if copy:
306 322 sa.data = buffer(m)
307 323 else:
308 324 sa.data = m.buffer
309 325 else:
310 326 if copy:
311 327 sa.data = m
312 328 else:
313 329 sa.data = m.bytes
314 330
315 331 kwargs[k] = uncan(unserialize(sa), g)
316 332
317 333 return f,args,kwargs
318 334
319 335 #--------------------------------------------------------------------------
320 336 # helpers for implementing old MEC API via view.apply
321 337 #--------------------------------------------------------------------------
322 338
323 339 def interactive(f):
324 340 """decorator for making functions appear as interactively defined.
325 341 This results in the function being linked to the user_ns as globals()
326 342 instead of the module globals().
327 343 """
328 344 f.__module__ = '__main__'
329 345 return f
330 346
331 347 @interactive
332 348 def _push(ns):
333 349 """helper method for implementing `client.push` via `client.apply`"""
334 350 globals().update(ns)
335 351
336 352 @interactive
337 353 def _pull(keys):
338 354 """helper method for implementing `client.pull` via `client.apply`"""
339 355 user_ns = globals()
340 356 if isinstance(keys, (list,tuple, set)):
341 357 for key in keys:
342 358 if not user_ns.has_key(key):
343 359 raise NameError("name '%s' is not defined"%key)
344 360 return map(user_ns.get, keys)
345 361 else:
346 362 if not user_ns.has_key(keys):
347 363 raise NameError("name '%s' is not defined"%keys)
348 364 return user_ns.get(keys)
349 365
350 366 @interactive
351 367 def _execute(code):
352 368 """helper method for implementing `client.execute` via `client.apply`"""
353 369 exec code in globals()
354 370
371 #--------------------------------------------------------------------------
372 # extra process management utilities
373 #--------------------------------------------------------------------------
374
375 _random_ports = set()
376
377 def select_random_ports(n):
378 """Selects and return n random ports that are available."""
379 ports = []
380 for i in xrange(n):
381 sock = socket.socket()
382 sock.bind(('', 0))
383 while sock.getsockname()[1] in _random_ports:
384 sock.close()
385 sock = socket.socket()
386 sock.bind(('', 0))
387 ports.append(sock)
388 for i, sock in enumerate(ports):
389 port = sock.getsockname()[1]
390 sock.close()
391 ports[i] = port
392 _random_ports.add(port)
393 return ports
394
395 def signal_children(children):
396 """Relay interupt/term signals to children, for more solid process cleanup."""
397 def terminate_children(sig, frame):
398 logging.critical("Got signal %i, terminating children..."%sig)
399 for child in children:
400 child.terminate()
401
402 sys.exit(sig != SIGINT)
403 # sys.exit(sig)
404 for sig in (SIGINT, SIGABRT, SIGTERM):
405 signal(sig, terminate_children)
406
407 def generate_exec_key(keyfile):
408 import uuid
409 newkey = str(uuid.uuid4())
410 with open(keyfile, 'w') as f:
411 # f.write('ipython-key ')
412 f.write(newkey+'\n')
413 # set user-only RW permissions (0600)
414 # this will have no effect on Windows
415 os.chmod(keyfile, stat.S_IRUSR|stat.S_IWUSR)
416
417
418 def integer_loglevel(loglevel):
419 try:
420 loglevel = int(loglevel)
421 except ValueError:
422 if isinstance(loglevel, str):
423 loglevel = getattr(logging, loglevel)
424 return loglevel
425
426 def connect_logger(logname, context, iface, root="ip", loglevel=logging.DEBUG):
427 logger = logging.getLogger(logname)
428 if any([isinstance(h, handlers.PUBHandler) for h in logger.handlers]):
429 # don't add a second PUBHandler
430 return
431 loglevel = integer_loglevel(loglevel)
432 lsock = context.socket(zmq.PUB)
433 lsock.connect(iface)
434 handler = handlers.PUBHandler(lsock)
435 handler.setLevel(loglevel)
436 handler.root_topic = root
437 logger.addHandler(handler)
438 logger.setLevel(loglevel)
439
440 def connect_engine_logger(context, iface, engine, loglevel=logging.DEBUG):
441 logger = logging.getLogger()
442 if any([isinstance(h, handlers.PUBHandler) for h in logger.handlers]):
443 # don't add a second PUBHandler
444 return
445 loglevel = integer_loglevel(loglevel)
446 lsock = context.socket(zmq.PUB)
447 lsock.connect(iface)
448 handler = EnginePUBHandler(engine, lsock)
449 handler.setLevel(loglevel)
450 logger.addHandler(handler)
451 logger.setLevel(loglevel)
452
453 def local_logger(logname, loglevel=logging.DEBUG):
454 loglevel = integer_loglevel(loglevel)
455 logger = logging.getLogger(logname)
456 if any([isinstance(h, logging.StreamHandler) for h in logger.handlers]):
457 # don't add a second StreamHandler
458 return
459 handler = logging.StreamHandler()
460 handler.setLevel(loglevel)
461 logger.addHandler(handler)
462 logger.setLevel(loglevel)
@@ -1,420 +1,423 b''
1 1 # -*- coding: utf-8 -*-
2 2 """IPython Test Suite Runner.
3 3
4 4 This module provides a main entry point to a user script to test IPython
5 5 itself from the command line. There are two ways of running this script:
6 6
7 7 1. With the syntax `iptest all`. This runs our entire test suite by
8 8 calling this script (with different arguments) recursively. This
9 9 causes modules and package to be tested in different processes, using nose
10 10 or trial where appropriate.
11 11 2. With the regular nose syntax, like `iptest -vvs IPython`. In this form
12 12 the script simply calls nose, but with special command line flags and
13 13 plugins loaded.
14 14
15 15 """
16 16
17 17 #-----------------------------------------------------------------------------
18 18 # Copyright (C) 2009 The IPython Development Team
19 19 #
20 20 # Distributed under the terms of the BSD License. The full license is in
21 21 # the file COPYING, distributed as part of this software.
22 22 #-----------------------------------------------------------------------------
23 23
24 24 #-----------------------------------------------------------------------------
25 25 # Imports
26 26 #-----------------------------------------------------------------------------
27 27
28 28 # Stdlib
29 29 import os
30 30 import os.path as path
31 31 import signal
32 32 import sys
33 33 import subprocess
34 34 import tempfile
35 35 import time
36 36 import warnings
37 37
38 38 # Note: monkeypatch!
39 39 # We need to monkeypatch a small problem in nose itself first, before importing
40 40 # it for actual use. This should get into nose upstream, but its release cycle
41 41 # is slow and we need it for our parametric tests to work correctly.
42 42 from IPython.testing import nosepatch
43 43 # Now, proceed to import nose itself
44 44 import nose.plugins.builtin
45 45 from nose.core import TestProgram
46 46
47 47 # Our own imports
48 48 from IPython.utils.path import get_ipython_module_path
49 49 from IPython.utils.process import find_cmd, pycmd2argv
50 50 from IPython.utils.sysinfo import sys_info
51 51
52 52 from IPython.testing import globalipapp
53 53 from IPython.testing.plugin.ipdoctest import IPythonDoctest
54 54 from IPython.external.decorators import KnownFailure
55 55
56 56 pjoin = path.join
57 57
58 58
59 59 #-----------------------------------------------------------------------------
60 60 # Globals
61 61 #-----------------------------------------------------------------------------
62 62
63 63
64 64 #-----------------------------------------------------------------------------
65 65 # Warnings control
66 66 #-----------------------------------------------------------------------------
67 67
68 68 # Twisted generates annoying warnings with Python 2.6, as will do other code
69 69 # that imports 'sets' as of today
70 70 warnings.filterwarnings('ignore', 'the sets module is deprecated',
71 71 DeprecationWarning )
72 72
73 73 # This one also comes from Twisted
74 74 warnings.filterwarnings('ignore', 'the sha module is deprecated',
75 75 DeprecationWarning)
76 76
77 77 # Wx on Fedora11 spits these out
78 78 warnings.filterwarnings('ignore', 'wxPython/wxWidgets release number mismatch',
79 79 UserWarning)
80 80
81 81 #-----------------------------------------------------------------------------
82 82 # Logic for skipping doctests
83 83 #-----------------------------------------------------------------------------
84 84
85 85 def test_for(mod, min_version=None):
86 86 """Test to see if mod is importable."""
87 87 try:
88 88 __import__(mod)
89 89 except (ImportError, RuntimeError):
90 90 # GTK reports Runtime error if it can't be initialized even if it's
91 91 # importable.
92 92 return False
93 93 else:
94 94 if min_version:
95 95 return sys.modules[mod].__version__ >= min_version
96 96 else:
97 97 return True
98 98
99 99 # Global dict where we can store information on what we have and what we don't
100 100 # have available at test run time
101 101 have = {}
102 102
103 103 have['curses'] = test_for('_curses')
104 104 have['wx'] = test_for('wx')
105 105 have['wx.aui'] = test_for('wx.aui')
106 106 have['pexpect'] = test_for('pexpect')
107 107 have['zmq'] = test_for('zmq', '2.0.10')
108 108
109 109 #-----------------------------------------------------------------------------
110 110 # Functions and classes
111 111 #-----------------------------------------------------------------------------
112 112
113 113 def report():
114 114 """Return a string with a summary report of test-related variables."""
115 115
116 116 out = [ sys_info(), '\n']
117 117
118 118 avail = []
119 119 not_avail = []
120 120
121 121 for k, is_avail in have.items():
122 122 if is_avail:
123 123 avail.append(k)
124 124 else:
125 125 not_avail.append(k)
126 126
127 127 if avail:
128 128 out.append('\nTools and libraries available at test time:\n')
129 129 avail.sort()
130 130 out.append(' ' + ' '.join(avail)+'\n')
131 131
132 132 if not_avail:
133 133 out.append('\nTools and libraries NOT available at test time:\n')
134 134 not_avail.sort()
135 135 out.append(' ' + ' '.join(not_avail)+'\n')
136 136
137 137 return ''.join(out)
138 138
139 139
140 140 def make_exclude():
141 141 """Make patterns of modules and packages to exclude from testing.
142 142
143 143 For the IPythonDoctest plugin, we need to exclude certain patterns that
144 144 cause testing problems. We should strive to minimize the number of
145 145 skipped modules, since this means untested code.
146 146
147 147 These modules and packages will NOT get scanned by nose at all for tests.
148 148 """
149 149 # Simple utility to make IPython paths more readably, we need a lot of
150 150 # these below
151 151 ipjoin = lambda *paths: pjoin('IPython', *paths)
152 152
153 153 exclusions = [ipjoin('external'),
154 154 pjoin('IPython_doctest_plugin'),
155 155 ipjoin('quarantine'),
156 156 ipjoin('deathrow'),
157 157 ipjoin('testing', 'attic'),
158 158 # This guy is probably attic material
159 159 ipjoin('testing', 'mkdoctests'),
160 160 # Testing inputhook will need a lot of thought, to figure out
161 161 # how to have tests that don't lock up with the gui event
162 162 # loops in the picture
163 163 ipjoin('lib', 'inputhook'),
164 164 # Config files aren't really importable stand-alone
165 165 ipjoin('config', 'default'),
166 166 ipjoin('config', 'profile'),
167 167 ]
168 168
169 169 if not have['wx']:
170 170 exclusions.append(ipjoin('lib', 'inputhookwx'))
171 171
172 172 # We do this unconditionally, so that the test suite doesn't import
173 173 # gtk, changing the default encoding and masking some unicode bugs.
174 174 exclusions.append(ipjoin('lib', 'inputhookgtk'))
175 175
176 176 # These have to be skipped on win32 because the use echo, rm, cd, etc.
177 177 # See ticket https://bugs.launchpad.net/bugs/366982
178 178 if sys.platform == 'win32':
179 179 exclusions.append(ipjoin('testing', 'plugin', 'test_exampleip'))
180 180 exclusions.append(ipjoin('testing', 'plugin', 'dtexample'))
181 181
182 182 if not have['pexpect']:
183 183 exclusions.extend([ipjoin('scripts', 'irunner'),
184 184 ipjoin('lib', 'irunner')])
185 185
186 186 if not have['zmq']:
187 187 exclusions.append(ipjoin('zmq'))
188 188 exclusions.append(ipjoin('parallel'))
189 189
190 190 # This is needed for the reg-exp to match on win32 in the ipdoctest plugin.
191 191 if sys.platform == 'win32':
192 192 exclusions = [s.replace('\\','\\\\') for s in exclusions]
193 193
194 194 return exclusions
195 195
196 196
197 197 class IPTester(object):
198 198 """Call that calls iptest or trial in a subprocess.
199 199 """
200 200 #: string, name of test runner that will be called
201 201 runner = None
202 202 #: list, parameters for test runner
203 203 params = None
204 204 #: list, arguments of system call to be made to call test runner
205 205 call_args = None
206 206 #: list, process ids of subprocesses we start (for cleanup)
207 207 pids = None
208 208
209 209 def __init__(self, runner='iptest', params=None):
210 210 """Create new test runner."""
211 211 p = os.path
212 212 if runner == 'iptest':
213 213 iptest_app = get_ipython_module_path('IPython.testing.iptest')
214 214 self.runner = pycmd2argv(iptest_app) + sys.argv[1:]
215 215 else:
216 216 raise Exception('Not a valid test runner: %s' % repr(runner))
217 217 if params is None:
218 218 params = []
219 219 if isinstance(params, str):
220 220 params = [params]
221 221 self.params = params
222 222
223 223 # Assemble call
224 224 self.call_args = self.runner+self.params
225 225
226 226 # Store pids of anything we start to clean up on deletion, if possible
227 227 # (on posix only, since win32 has no os.kill)
228 228 self.pids = []
229 229
230 230 if sys.platform == 'win32':
231 231 def _run_cmd(self):
232 232 # On Windows, use os.system instead of subprocess.call, because I
233 233 # was having problems with subprocess and I just don't know enough
234 234 # about win32 to debug this reliably. Os.system may be the 'old
235 235 # fashioned' way to do it, but it works just fine. If someone
236 236 # later can clean this up that's fine, as long as the tests run
237 237 # reliably in win32.
238 238 # What types of problems are you having. They may be related to
239 239 # running Python in unboffered mode. BG.
240 240 return os.system(' '.join(self.call_args))
241 241 else:
242 242 def _run_cmd(self):
243 243 # print >> sys.stderr, '*** CMD:', ' '.join(self.call_args) # dbg
244 244 subp = subprocess.Popen(self.call_args)
245 245 self.pids.append(subp.pid)
246 246 # If this fails, the pid will be left in self.pids and cleaned up
247 247 # later, but if the wait call succeeds, then we can clear the
248 248 # stored pid.
249 249 retcode = subp.wait()
250 250 self.pids.pop()
251 251 return retcode
252 252
253 253 def run(self):
254 254 """Run the stored commands"""
255 255 try:
256 256 return self._run_cmd()
257 257 except:
258 258 import traceback
259 259 traceback.print_exc()
260 260 return 1 # signal failure
261 261
262 262 def __del__(self):
263 263 """Cleanup on exit by killing any leftover processes."""
264 264
265 265 if not hasattr(os, 'kill'):
266 266 return
267 267
268 268 for pid in self.pids:
269 269 try:
270 270 print 'Cleaning stale PID:', pid
271 271 os.kill(pid, signal.SIGKILL)
272 272 except OSError:
273 273 # This is just a best effort, if we fail or the process was
274 274 # really gone, ignore it.
275 275 pass
276 276
277 277
278 278 def make_runners():
279 279 """Define the top-level packages that need to be tested.
280 280 """
281 281
282 282 # Packages to be tested via nose, that only depend on the stdlib
283 283 nose_pkg_names = ['config', 'core', 'extensions', 'frontend', 'lib',
284 284 'scripts', 'testing', 'utils' ]
285 285
286 if have['zmq']:
287 nose_pkg_names.append('parallel')
288
286 289 # For debugging this code, only load quick stuff
287 290 #nose_pkg_names = ['core', 'extensions'] # dbg
288 291
289 292 # Make fully qualified package names prepending 'IPython.' to our name lists
290 293 nose_packages = ['IPython.%s' % m for m in nose_pkg_names ]
291 294
292 295 # Make runners
293 296 runners = [ (v, IPTester('iptest', params=v)) for v in nose_packages ]
294 297
295 298 return runners
296 299
297 300
298 301 def run_iptest():
299 302 """Run the IPython test suite using nose.
300 303
301 304 This function is called when this script is **not** called with the form
302 305 `iptest all`. It simply calls nose with appropriate command line flags
303 306 and accepts all of the standard nose arguments.
304 307 """
305 308
306 309 warnings.filterwarnings('ignore',
307 310 'This will be removed soon. Use IPython.testing.util instead')
308 311
309 312 argv = sys.argv + [ '--detailed-errors', # extra info in tracebacks
310 313
311 314 # Loading ipdoctest causes problems with Twisted, but
312 315 # our test suite runner now separates things and runs
313 316 # all Twisted tests with trial.
314 317 '--with-ipdoctest',
315 318 '--ipdoctest-tests','--ipdoctest-extension=txt',
316 319
317 320 # We add --exe because of setuptools' imbecility (it
318 321 # blindly does chmod +x on ALL files). Nose does the
319 322 # right thing and it tries to avoid executables,
320 323 # setuptools unfortunately forces our hand here. This
321 324 # has been discussed on the distutils list and the
322 325 # setuptools devs refuse to fix this problem!
323 326 '--exe',
324 327 ]
325 328
326 329 if nose.__version__ >= '0.11':
327 330 # I don't fully understand why we need this one, but depending on what
328 331 # directory the test suite is run from, if we don't give it, 0 tests
329 332 # get run. Specifically, if the test suite is run from the source dir
330 333 # with an argument (like 'iptest.py IPython.core', 0 tests are run,
331 334 # even if the same call done in this directory works fine). It appears
332 335 # that if the requested package is in the current dir, nose bails early
333 336 # by default. Since it's otherwise harmless, leave it in by default
334 337 # for nose >= 0.11, though unfortunately nose 0.10 doesn't support it.
335 338 argv.append('--traverse-namespace')
336 339
337 340 # Construct list of plugins, omitting the existing doctest plugin, which
338 341 # ours replaces (and extends).
339 342 plugins = [IPythonDoctest(make_exclude()), KnownFailure()]
340 343 for p in nose.plugins.builtin.plugins:
341 344 plug = p()
342 345 if plug.name == 'doctest':
343 346 continue
344 347 plugins.append(plug)
345 348
346 349 # We need a global ipython running in this process
347 350 globalipapp.start_ipython()
348 351 # Now nose can run
349 352 TestProgram(argv=argv, plugins=plugins)
350 353
351 354
352 355 def run_iptestall():
353 356 """Run the entire IPython test suite by calling nose and trial.
354 357
355 358 This function constructs :class:`IPTester` instances for all IPython
356 359 modules and package and then runs each of them. This causes the modules
357 360 and packages of IPython to be tested each in their own subprocess using
358 361 nose or twisted.trial appropriately.
359 362 """
360 363
361 364 runners = make_runners()
362 365
363 366 # Run the test runners in a temporary dir so we can nuke it when finished
364 367 # to clean up any junk files left over by accident. This also makes it
365 368 # robust against being run in non-writeable directories by mistake, as the
366 369 # temp dir will always be user-writeable.
367 370 curdir = os.getcwd()
368 371 testdir = tempfile.gettempdir()
369 372 os.chdir(testdir)
370 373
371 374 # Run all test runners, tracking execution time
372 375 failed = []
373 376 t_start = time.time()
374 377 try:
375 378 for (name, runner) in runners:
376 379 print '*'*70
377 380 print 'IPython test group:',name
378 381 res = runner.run()
379 382 if res:
380 383 failed.append( (name, runner) )
381 384 finally:
382 385 os.chdir(curdir)
383 386 t_end = time.time()
384 387 t_tests = t_end - t_start
385 388 nrunners = len(runners)
386 389 nfail = len(failed)
387 390 # summarize results
388 391 print
389 392 print '*'*70
390 393 print 'Test suite completed for system with the following information:'
391 394 print report()
392 395 print 'Ran %s test groups in %.3fs' % (nrunners, t_tests)
393 396 print
394 397 print 'Status:'
395 398 if not failed:
396 399 print 'OK'
397 400 else:
398 401 # If anything went wrong, point out what command to rerun manually to
399 402 # see the actual errors and individual summary
400 403 print 'ERROR - %s out of %s test groups failed.' % (nfail, nrunners)
401 404 for name, failed_runner in failed:
402 405 print '-'*40
403 406 print 'Runner failed:',name
404 407 print 'You may wish to rerun this one individually, with:'
405 408 print ' '.join(failed_runner.call_args)
406 409 print
407 410
408 411
409 412 def main():
410 413 for arg in sys.argv[1:]:
411 414 if arg.startswith('IPython'):
412 415 # This is in-process
413 416 run_iptest()
414 417 else:
415 418 # This starts subprocesses
416 419 run_iptestall()
417 420
418 421
419 422 if __name__ == '__main__':
420 423 main()
@@ -1,153 +1,153 b''
1 1 # encoding: utf-8
2 2
3 3 """Pickle related utilities. Perhaps this should be called 'can'."""
4 4
5 5 __docformat__ = "restructuredtext en"
6 6
7 7 #-------------------------------------------------------------------------------
8 8 # Copyright (C) 2008 The IPython Development Team
9 9 #
10 10 # Distributed under the terms of the BSD License. The full license is in
11 11 # the file COPYING, distributed as part of this software.
12 12 #-------------------------------------------------------------------------------
13 13
14 14 #-------------------------------------------------------------------------------
15 15 # Imports
16 16 #-------------------------------------------------------------------------------
17 17
18 18 import copy
19 19 import sys
20 20 from types import FunctionType
21 21
22 22 import codeutil
23 23
24 24 #-------------------------------------------------------------------------------
25 25 # Classes
26 26 #-------------------------------------------------------------------------------
27 27
28 28
29 29 class CannedObject(object):
30 30 def __init__(self, obj, keys=[]):
31 31 self.keys = keys
32 32 self.obj = copy.copy(obj)
33 33 for key in keys:
34 34 setattr(self.obj, key, can(getattr(obj, key)))
35 35
36 36
37 37 def getObject(self, g=None):
38 38 if g is None:
39 39 g = globals()
40 40 for key in self.keys:
41 41 setattr(self.obj, key, uncan(getattr(self.obj, key), g))
42 42 return self.obj
43 43
44 44 class Reference(CannedObject):
45 45 """object for wrapping a remote reference by name."""
46 46 def __init__(self, name):
47 47 if not isinstance(name, basestring):
48 48 raise TypeError("illegal name: %r"%name)
49 49 self.name = name
50 50
51 51 def __repr__(self):
52 52 return "<Reference: %r>"%self.name
53 53
54 54 def getObject(self, g=None):
55 55 if g is None:
56 56 g = globals()
57 57 try:
58 58 return g[self.name]
59 59 except KeyError:
60 60 raise NameError("name %r is not defined"%self.name)
61 61
62 62
63 63 class CannedFunction(CannedObject):
64 64
65 65 def __init__(self, f):
66 66 self._checkType(f)
67 67 self.code = f.func_code
68 68 self.defaults = f.func_defaults
69 69 self.module = f.__module__ or '__main__'
70 70 self.__name__ = f.__name__
71 71
72 72 def _checkType(self, obj):
73 73 assert isinstance(obj, FunctionType), "Not a function type"
74 74
75 75 def getObject(self, g=None):
76 76 # try to load function back into its module:
77 77 if not self.module.startswith('__'):
78 78 try:
79 79 __import__(self.module)
80 80 except ImportError:
81 81 pass
82 82 else:
83 83 g = sys.modules[self.module].__dict__
84 84
85 85 if g is None:
86 86 g = globals()
87 87 newFunc = FunctionType(self.code, g, self.__name__, self.defaults)
88 88 return newFunc
89 89
90 90 #-------------------------------------------------------------------------------
91 91 # Functions
92 92 #-------------------------------------------------------------------------------
93 93
94 94 def can(obj):
95 95 # import here to prevent module-level circular imports
96 from IPython.parallel.dependency import dependent
96 from IPython.parallel import dependent
97 97 if isinstance(obj, dependent):
98 98 keys = ('f','df')
99 99 return CannedObject(obj, keys=keys)
100 100 elif isinstance(obj, FunctionType):
101 101 return CannedFunction(obj)
102 102 elif isinstance(obj,dict):
103 103 return canDict(obj)
104 104 elif isinstance(obj, (list,tuple)):
105 105 return canSequence(obj)
106 106 else:
107 107 return obj
108 108
109 109 def canDict(obj):
110 110 if isinstance(obj, dict):
111 111 newobj = {}
112 112 for k, v in obj.iteritems():
113 113 newobj[k] = can(v)
114 114 return newobj
115 115 else:
116 116 return obj
117 117
118 118 def canSequence(obj):
119 119 if isinstance(obj, (list, tuple)):
120 120 t = type(obj)
121 121 return t([can(i) for i in obj])
122 122 else:
123 123 return obj
124 124
125 125 def uncan(obj, g=None):
126 126 if isinstance(obj, CannedObject):
127 127 return obj.getObject(g)
128 128 elif isinstance(obj,dict):
129 129 return uncanDict(obj, g)
130 130 elif isinstance(obj, (list,tuple)):
131 131 return uncanSequence(obj, g)
132 132 else:
133 133 return obj
134 134
135 135 def uncanDict(obj, g=None):
136 136 if isinstance(obj, dict):
137 137 newobj = {}
138 138 for k, v in obj.iteritems():
139 139 newobj[k] = uncan(v,g)
140 140 return newobj
141 141 else:
142 142 return obj
143 143
144 144 def uncanSequence(obj, g=None):
145 145 if isinstance(obj, (list, tuple)):
146 146 t = type(obj)
147 147 return t([uncan(i,g) for i in obj])
148 148 else:
149 149 return obj
150 150
151 151
152 152 def rebindFunctionGlobals(f, glbls):
153 153 return FunctionType(f.func_code, glbls)
@@ -1,621 +1,621 b''
1 1 .. _parallel_details:
2 2
3 3 ==========================================
4 4 Details of Parallel Computing with IPython
5 5 ==========================================
6 6
7 7 .. note::
8 8
9 9 There are still many sections to fill out
10 10
11 11
12 12 Caveats
13 13 =======
14 14
15 15 First, some caveats about the detailed workings of parallel computing with 0MQ and IPython.
16 16
17 17 Non-copying sends and numpy arrays
18 18 ----------------------------------
19 19
20 20 When numpy arrays are passed as arguments to apply or via data-movement methods, they are not
21 21 copied. This means that you must be careful if you are sending an array that you intend to work
22 22 on. PyZMQ does allow you to track when a message has been sent so you can know when it is safe
23 23 to edit the buffer, but IPython only allows for this.
24 24
25 25 It is also important to note that the non-copying receive of a message is *read-only*. That
26 26 means that if you intend to work in-place on an array that you have sent or received, you must
27 27 copy it. This is true for both numpy arrays sent to engines and numpy arrays retrieved as
28 28 results.
29 29
30 30 The following will fail:
31 31
32 32 .. sourcecode:: ipython
33 33
34 34 In [3]: A = numpy.zeros(2)
35 35
36 36 In [4]: def setter(a):
37 37 ...: a[0]=1
38 38 ...: return a
39 39
40 40 In [5]: rc[0].apply_sync(setter, A)
41 41 ---------------------------------------------------------------------------
42 42 RemoteError Traceback (most recent call last)
43 43 ...
44 44 RemoteError: RuntimeError(array is not writeable)
45 45 Traceback (most recent call last):
46 46 File "/path/to/site-packages/IPython/parallel/streamkernel.py", line 329, in apply_request
47 47 exec code in working, working
48 48 File "<string>", line 1, in <module>
49 49 File "<ipython-input-14-736187483856>", line 2, in setter
50 50 RuntimeError: array is not writeable
51 51
52 52 If you do need to edit the array in-place, just remember to copy the array if it's read-only.
53 53 The :attr:`ndarray.flags.writeable` flag will tell you if you can write to an array.
54 54
55 55 .. sourcecode:: ipython
56 56
57 57 In [3]: A = numpy.zeros(2)
58 58
59 59 In [4]: def setter(a):
60 60 ...: """only copy read-only arrays"""
61 61 ...: if not a.flags.writeable:
62 62 ...: a=a.copy()
63 63 ...: a[0]=1
64 64 ...: return a
65 65
66 66 In [5]: rc[0].apply_sync(setter, A)
67 67 Out[5]: array([ 1., 0.])
68 68
69 69 # note that results will also be read-only:
70 70 In [6]: _.flags.writeable
71 71 Out[6]: False
72 72
73 73 If you want to safely edit an array in-place after *sending* it, you must use the `track=True` flag. IPython always performs non-copying sends of arrays, which return immediately. You
74 74 must instruct IPython track those messages *at send time* in order to know for sure that the send has completed. AsyncResults have a :attr:`sent` property, and :meth:`wait_on_send` method
75 75 for checking and waiting for 0MQ to finish with a buffer.
76 76
77 77 .. sourcecode:: ipython
78 78
79 79 In [5]: A = numpy.random.random((1024,1024))
80 80
81 81 In [6]: view.track=True
82 82
83 83 In [7]: ar = view.apply_async(lambda x: 2*x, A)
84 84
85 85 In [8]: ar.sent
86 86 Out[8]: False
87 87
88 88 In [9]: ar.wait_on_send() # blocks until sent is True
89 89
90 90
91 91 What is sendable?
92 92 -----------------
93 93
94 94 If IPython doesn't know what to do with an object, it will pickle it. There is a short list of
95 95 objects that are not pickled: ``buffers``, ``str/bytes`` objects, and ``numpy``
96 96 arrays. These are handled specially by IPython in order to prevent the copying of data. Sending
97 97 bytes or numpy arrays will result in exactly zero in-memory copies of your data (unless the data
98 98 is very small).
99 99
100 100 If you have an object that provides a Python buffer interface, then you can always send that
101 101 buffer without copying - and reconstruct the object on the other side in your own code. It is
102 102 possible that the object reconstruction will become extensible, so you can add your own
103 103 non-copying types, but this does not yet exist.
104 104
105 105 Closures
106 106 ********
107 107
108 108 Just about anything in Python is pickleable. The one notable exception is objects (generally
109 109 functions) with *closures*. Closures can be a complicated topic, but the basic principal is that
110 110 functions that refer to variables in their parent scope have closures.
111 111
112 112 An example of a function that uses a closure:
113 113
114 114 .. sourcecode:: python
115 115
116 116 def f(a):
117 117 def inner():
118 118 # inner will have a closure
119 119 return a
120 120 return echo
121 121
122 122 f1 = f(1)
123 123 f2 = f(2)
124 124 f1() # returns 1
125 125 f2() # returns 2
126 126
127 127 f1 and f2 will have closures referring to the scope in which `inner` was defined, because they
128 128 use the variable 'a'. As a result, you would not be able to send ``f1`` or ``f2`` with IPython.
129 129 Note that you *would* be able to send `f`. This is only true for interactively defined
130 130 functions (as are often used in decorators), and only when there are variables used inside the
131 131 inner function, that are defined in the outer function. If the names are *not* in the outer
132 132 function, then there will not be a closure, and the generated function will look in
133 133 ``globals()`` for the name:
134 134
135 135 .. sourcecode:: python
136 136
137 137 def g(b):
138 138 # note that `b` is not referenced in inner's scope
139 139 def inner():
140 140 # this inner will *not* have a closure
141 141 return a
142 142 return echo
143 143 g1 = g(1)
144 144 g2 = g(2)
145 145 g1() # raises NameError on 'a'
146 146 a=5
147 147 g2() # returns 5
148 148
149 149 `g1` and `g2` *will* be sendable with IPython, and will treat the engine's namespace as
150 150 globals(). The :meth:`pull` method is implemented based on this principal. If we did not
151 151 provide pull, you could implement it yourself with `apply`, by simply returning objects out
152 152 of the global namespace:
153 153
154 154 .. sourcecode:: ipython
155 155
156 156 In [10]: view.apply(lambda : a)
157 157
158 158 # is equivalent to
159 159 In [11]: view.pull('a')
160 160
161 161 Running Code
162 162 ============
163 163
164 164 There are two principal units of execution in Python: strings of Python code (e.g. 'a=5'),
165 165 and Python functions. IPython is designed around the use of functions via the core
166 166 Client method, called `apply`.
167 167
168 168 Apply
169 169 -----
170 170
171 171 The principal method of remote execution is :meth:`apply`, of View objects. The Client provides
172 172 the full execution and communication API for engines via its low-level
173 173 :meth:`send_apply_message` method.
174 174
175 175 f : function
176 176 The fuction to be called remotely
177 177 args : tuple/list
178 178 The positional arguments passed to `f`
179 179 kwargs : dict
180 180 The keyword arguments passed to `f`
181 181
182 182 flags for all views:
183 183
184 184 block : bool (default: view.block)
185 185 Whether to wait for the result, or return immediately.
186 186 False:
187 187 returns AsyncResult
188 188 True:
189 189 returns actual result(s) of f(*args, **kwargs)
190 190 if multiple targets:
191 191 list of results, matching `targets`
192 192 track : bool [default view.track]
193 193 whether to track non-copying sends.
194 194
195 195 targets : int,list of ints, 'all', None [default view.targets]
196 196 Specify the destination of the job.
197 197 if 'all' or None:
198 198 Run on all active engines
199 199 if list:
200 200 Run on each specified engine
201 201 if int:
202 202 Run on single engine
203 203
204 204 Note that LoadBalancedView uses targets to restrict possible destinations. LoadBalanced calls
205 205 will always execute in just one location.
206 206
207 207 flags only in LoadBalancedViews:
208 208
209 209 after : Dependency or collection of msg_ids
210 210 Only for load-balanced execution (targets=None)
211 211 Specify a list of msg_ids as a time-based dependency.
212 212 This job will only be run *after* the dependencies
213 213 have been met.
214 214
215 215 follow : Dependency or collection of msg_ids
216 216 Only for load-balanced execution (targets=None)
217 217 Specify a list of msg_ids as a location-based dependency.
218 218 This job will only be run on an engine where this dependency
219 219 is met.
220 220
221 221 timeout : float/int or None
222 222 Only for load-balanced execution (targets=None)
223 223 Specify an amount of time (in seconds) for the scheduler to
224 224 wait for dependencies to be met before failing with a
225 225 DependencyTimeout.
226 226
227 227 execute and run
228 228 ---------------
229 229
230 230 For executing strings of Python code, :class:`DirectView`s also provide an :meth:`execute` and a
231 231 :meth:`run` method, which rather than take functions and arguments, take simple strings.
232 232 `execute` simply takes a string of Python code to execute, and sends it to the Engine(s). `run`
233 233 is the same as `execute`, but for a *file*, rather than a string. It is simply a wrapper that
234 234 does something very similar to ``execute(open(f).read())``.
235 235
236 236 .. note::
237 237
238 238 TODO: Example
239 239
240 240 Views
241 241 =====
242 242
243 243 The principal extension of the :class:`~parallel.Client` is the
244 :class:`~parallel.view.View` class. The client
244 :class:`~parallel.View` class. The client
245 245
246 246
247 247 DirectView
248 248 ----------
249 249
250 250 The :class:`.DirectView` is the class for the IPython :ref:`Multiplexing Interface
251 251 <parallel_multiengine>`.
252 252
253 253 Creating a DirectView
254 254 *********************
255 255
256 256 DirectViews can be created in two ways, by index access to a client, or by a client's
257 257 :meth:`view` method. Index access to a Client works in a few ways. First, you can create
258 258 DirectViews to single engines simply by accessing the client by engine id:
259 259
260 260 .. sourcecode:: ipython
261 261
262 262 In [2]: rc[0]
263 263 Out[2]: <DirectView 0>
264 264
265 265 You can also create a DirectView with a list of engines:
266 266
267 267 .. sourcecode:: ipython
268 268
269 269 In [2]: rc[0,1,2]
270 270 Out[2]: <DirectView [0,1,2]>
271 271
272 272 Other methods for accessing elements, such as slicing and negative indexing, work by passing
273 273 the index directly to the client's :attr:`ids` list, so:
274 274
275 275 .. sourcecode:: ipython
276 276
277 277 # negative index
278 278 In [2]: rc[-1]
279 279 Out[2]: <DirectView 3>
280 280
281 281 # or slicing:
282 282 In [3]: rc[::2]
283 283 Out[3]: <DirectView [0,2]>
284 284
285 285 are always the same as:
286 286
287 287 .. sourcecode:: ipython
288 288
289 289 In [2]: rc[rc.ids[-1]]
290 290 Out[2]: <DirectView 3>
291 291
292 292 In [3]: rc[rc.ids[::2]]
293 293 Out[3]: <DirectView [0,2]>
294 294
295 295 Also note that the slice is evaluated at the time of construction of the DirectView, so the
296 296 targets will not change over time if engines are added/removed from the cluster.
297 297
298 298 Execution via DirectView
299 299 ************************
300 300
301 301 The DirectView is the simplest way to work with one or more engines directly (hence the name).
302 302
303 303
304 304 Data movement via DirectView
305 305 ****************************
306 306
307 307 Since a Python namespace is just a :class:`dict`, :class:`DirectView` objects provide
308 308 dictionary-style access by key and methods such as :meth:`get` and
309 309 :meth:`update` for convenience. This make the remote namespaces of the engines
310 310 appear as a local dictionary. Underneath, these methods call :meth:`apply`:
311 311
312 312 .. sourcecode:: ipython
313 313
314 314 In [51]: dview['a']=['foo','bar']
315 315
316 316 In [52]: dview['a']
317 317 Out[52]: [ ['foo', 'bar'], ['foo', 'bar'], ['foo', 'bar'], ['foo', 'bar'] ]
318 318
319 319 Scatter and gather
320 320 ------------------
321 321
322 322 Sometimes it is useful to partition a sequence and push the partitions to
323 323 different engines. In MPI language, this is know as scatter/gather and we
324 324 follow that terminology. However, it is important to remember that in
325 325 IPython's :class:`Client` class, :meth:`scatter` is from the
326 326 interactive IPython session to the engines and :meth:`gather` is from the
327 327 engines back to the interactive IPython session. For scatter/gather operations
328 328 between engines, MPI should be used:
329 329
330 330 .. sourcecode:: ipython
331 331
332 332 In [58]: dview.scatter('a',range(16))
333 333 Out[58]: [None,None,None,None]
334 334
335 335 In [59]: dview['a']
336 336 Out[59]: [ [0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15] ]
337 337
338 338 In [60]: dview.gather('a')
339 339 Out[60]: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
340 340
341 341 Push and pull
342 342 -------------
343 343
344 344 push
345 345
346 346 pull
347 347
348 348
349 349
350 350
351 351
352 352 LoadBalancedView
353 353 ----------------
354 354
355 355 The :class:`.LoadBalancedView`
356 356
357 357
358 358 Data Movement
359 359 =============
360 360
361 361 Reference
362 362
363 363 Results
364 364 =======
365 365
366 366 AsyncResults
367 367 ------------
368 368
369 369 Our primary representation is the AsyncResult object, based on the object of the same name in
370 370 the built-in :mod:`multiprocessing.pool` module. Our version provides a superset of that
371 371 interface.
372 372
373 373 The basic principle of the AsyncResult is the encapsulation of one or more results not yet completed. Execution methods (including data movement, such as push/pull) will all return
374 374 AsyncResults when `block=False`.
375 375
376 376 The mp.pool.AsyncResult interface
377 377 ---------------------------------
378 378
379 379 The basic interface of the AsyncResult is exactly that of the AsyncResult in :mod:`multiprocessing.pool`, and consists of four methods:
380 380
381 381 .. AsyncResult spec directly from docs.python.org
382 382
383 383 .. class:: AsyncResult
384 384
385 385 The stdlib AsyncResult spec
386 386
387 387 .. method:: wait([timeout])
388 388
389 389 Wait until the result is available or until *timeout* seconds pass. This
390 390 method always returns ``None``.
391 391
392 392 .. method:: ready()
393 393
394 394 Return whether the call has completed.
395 395
396 396 .. method:: successful()
397 397
398 398 Return whether the call completed without raising an exception. Will
399 399 raise :exc:`AssertionError` if the result is not ready.
400 400
401 401 .. method:: get([timeout])
402 402
403 403 Return the result when it arrives. If *timeout* is not ``None`` and the
404 404 result does not arrive within *timeout* seconds then
405 405 :exc:`TimeoutError` is raised. If the remote call raised
406 406 an exception then that exception will be reraised as a :exc:`RemoteError`
407 407 by :meth:`get`.
408 408
409 409
410 410 While an AsyncResult is not done, you can check on it with its :meth:`ready` method, which will
411 411 return whether the AR is done. You can also wait on an AsyncResult with its :meth:`wait` method.
412 412 This method blocks until the result arrives. If you don't want to wait forever, you can pass a
413 413 timeout (in seconds) as an argument to :meth:`wait`. :meth:`wait` will *always return None*, and
414 414 should never raise an error.
415 415
416 416 :meth:`ready` and :meth:`wait` are insensitive to the success or failure of the call. After a
417 417 result is done, :meth:`successful` will tell you whether the call completed without raising an
418 418 exception.
419 419
420 420 If you actually want the result of the call, you can use :meth:`get`. Initially, :meth:`get`
421 421 behaves just like :meth:`wait`, in that it will block until the result is ready, or until a
422 422 timeout is met. However, unlike :meth:`wait`, :meth:`get` will raise a :exc:`TimeoutError` if
423 423 the timeout is reached and the result is still not ready. If the result arrives before the
424 424 timeout is reached, then :meth:`get` will return the result itself if no exception was raised,
425 425 and will raise an exception if there was.
426 426
427 427 Here is where we start to expand on the multiprocessing interface. Rather than raising the
428 428 original exception, a RemoteError will be raised, encapsulating the remote exception with some
429 429 metadata. If the AsyncResult represents multiple calls (e.g. any time `targets` is plural), then
430 430 a CompositeError, a subclass of RemoteError, will be raised.
431 431
432 432 .. seealso::
433 433
434 434 For more information on remote exceptions, see :ref:`the section in the Direct Interface
435 435 <Parallel_exceptions>`.
436 436
437 437 Extended interface
438 438 ******************
439 439
440 440
441 441 Other extensions of the AsyncResult interface include convenience wrappers for :meth:`get`.
442 442 AsyncResults have a property, :attr:`result`, with the short alias :attr:`r`, which simply call
443 443 :meth:`get`. Since our object is designed for representing *parallel* results, it is expected
444 444 that many calls (any of those submitted via DirectView) will map results to engine IDs. We
445 445 provide a :meth:`get_dict`, which is also a wrapper on :meth:`get`, which returns a dictionary
446 446 of the individual results, keyed by engine ID.
447 447
448 448 You can also prevent a submitted job from actually executing, via the AsyncResult's :meth:`abort` method. This will instruct engines to not execute the job when it arrives.
449 449
450 450 The larger extension of the AsyncResult API is the :attr:`metadata` attribute. The metadata
451 451 is a dictionary (with attribute access) that contains, logically enough, metadata about the
452 452 execution.
453 453
454 454 Metadata keys:
455 455
456 456 timestamps
457 457
458 458 submitted
459 459 When the task left the Client
460 460 started
461 461 When the task started execution on the engine
462 462 completed
463 463 When execution finished on the engine
464 464 received
465 465 When the result arrived on the Client
466 466
467 467 note that it is not known when the result arrived in 0MQ on the client, only when it
468 468 arrived in Python via :meth:`Client.spin`, so in interactive use, this may not be
469 469 strictly informative.
470 470
471 471 Information about the engine
472 472
473 473 engine_id
474 474 The integer id
475 475 engine_uuid
476 476 The UUID of the engine
477 477
478 478 output of the call
479 479
480 480 pyerr
481 481 Python exception, if there was one
482 482 pyout
483 483 Python output
484 484 stderr
485 485 stderr stream
486 486 stdout
487 487 stdout (e.g. print) stream
488 488
489 489 And some extended information
490 490
491 491 status
492 492 either 'ok' or 'error'
493 493 msg_id
494 494 The UUID of the message
495 495 after
496 496 For tasks: the time-based msg_id dependencies
497 497 follow
498 498 For tasks: the location-based msg_id dependencies
499 499
500 500 While in most cases, the Clients that submitted a request will be the ones using the results,
501 501 other Clients can also request results directly from the Hub. This is done via the Client's
502 502 :meth:`get_result` method. This method will *always* return an AsyncResult object. If the call
503 503 was not submitted by the client, then it will be a subclass, called :class:`AsyncHubResult`.
504 504 These behave in the same way as an AsyncResult, but if the result is not ready, waiting on an
505 505 AsyncHubResult polls the Hub, which is much more expensive than the passive polling used
506 506 in regular AsyncResults.
507 507
508 508
509 509 The Client keeps track of all results
510 510 history, results, metadata
511 511
512 512 Querying the Hub
513 513 ================
514 514
515 515 The Hub sees all traffic that may pass through the schedulers between engines and clients.
516 516 It does this so that it can track state, allowing multiple clients to retrieve results of
517 517 computations submitted by their peers, as well as persisting the state to a database.
518 518
519 519 queue_status
520 520
521 521 You can check the status of the queues of the engines with this command.
522 522
523 523 result_status
524 524
525 525 check on results
526 526
527 527 purge_results
528 528
529 529 forget results (conserve resources)
530 530
531 531 Controlling the Engines
532 532 =======================
533 533
534 534 There are a few actions you can do with Engines that do not involve execution. These
535 535 messages are sent via the Control socket, and bypass any long queues of waiting execution
536 536 jobs
537 537
538 538 abort
539 539
540 540 Sometimes you may want to prevent a job you have submitted from actually running. The method
541 541 for this is :meth:`abort`. It takes a container of msg_ids, and instructs the Engines to not
542 542 run the jobs if they arrive. The jobs will then fail with an AbortedTask error.
543 543
544 544 clear
545 545
546 546 You may want to purge the Engine(s) namespace of any data you have left in it. After
547 547 running `clear`, there will be no names in the Engine's namespace
548 548
549 549 shutdown
550 550
551 551 You can also instruct engines (and the Controller) to terminate from a Client. This
552 552 can be useful when a job is finished, since you can shutdown all the processes with a
553 553 single command.
554 554
555 555 Synchronization
556 556 ===============
557 557
558 558 Since the Client is a synchronous object, events do not automatically trigger in your
559 559 interactive session - you must poll the 0MQ sockets for incoming messages. Note that
560 560 this polling *does not* actually make any network requests. It simply performs a `select`
561 561 operation, to check if messages are already in local memory, waiting to be handled.
562 562
563 563 The method that handles incoming messages is :meth:`spin`. This method flushes any waiting
564 564 messages on the various incoming sockets, and updates the state of the Client.
565 565
566 566 If you need to wait for particular results to finish, you can use the :meth:`wait` method,
567 567 which will call :meth:`spin` until the messages are no longer outstanding. Anything that
568 568 represents a collection of messages, such as a list of msg_ids or one or more AsyncResult
569 569 objects, can be passed as argument to wait. A timeout can be specified, which will prevent
570 570 the call from blocking for more than a specified time, but the default behavior is to wait
571 571 forever.
572 572
573 573
574 574
575 575 The client also has an `outstanding` attribute - a ``set`` of msg_ids that are awaiting replies.
576 576 This is the default if wait is called with no arguments - i.e. wait on *all* outstanding
577 577 messages.
578 578
579 579
580 580 .. note::
581 581
582 582 TODO wait example
583 583
584 584 Map
585 585 ===
586 586
587 587 Many parallel computing problems can be expressed as a `map`, or running a single program with a
588 588 variety of different inputs. Python has a built-in :py-func:`map`, which does exactly this, and
589 589 many parallel execution tools in Python, such as the built-in :py-class:`multiprocessing.Pool`
590 590 object provide implementations of `map`. All View objects provide a :meth:`map` method as well,
591 591 but the load-balanced and direct implementations differ.
592 592
593 593 Views' map methods can be called on any number of sequences, but they can also take the `block`
594 594 and `bound` keyword arguments, just like :meth:`~client.apply`, but *only as keywords*.
595 595
596 596 .. sourcecode:: python
597 597
598 598 dview.map(*sequences, block=None)
599 599
600 600
601 601 * iter, map_async, reduce
602 602
603 603 Decorators and RemoteFunctions
604 604 ==============================
605 605
606 606 @parallel
607 607
608 608 @remote
609 609
610 610 RemoteFunction
611 611
612 612 ParallelFunction
613 613
614 614 Dependencies
615 615 ============
616 616
617 617 @depend
618 618
619 619 @require
620 620
621 621 Dependency
@@ -1,253 +1,253 b''
1 1 .. _ip1par:
2 2
3 3 ============================
4 4 Overview and getting started
5 5 ============================
6 6
7 7 Introduction
8 8 ============
9 9
10 10 This section gives an overview of IPython's sophisticated and powerful
11 11 architecture for parallel and distributed computing. This architecture
12 12 abstracts out parallelism in a very general way, which enables IPython to
13 13 support many different styles of parallelism including:
14 14
15 15 * Single program, multiple data (SPMD) parallelism.
16 16 * Multiple program, multiple data (MPMD) parallelism.
17 17 * Message passing using MPI.
18 18 * Task farming.
19 19 * Data parallel.
20 20 * Combinations of these approaches.
21 21 * Custom user defined approaches.
22 22
23 23 Most importantly, IPython enables all types of parallel applications to
24 24 be developed, executed, debugged and monitored *interactively*. Hence,
25 25 the ``I`` in IPython. The following are some example usage cases for IPython:
26 26
27 27 * Quickly parallelize algorithms that are embarrassingly parallel
28 28 using a number of simple approaches. Many simple things can be
29 29 parallelized interactively in one or two lines of code.
30 30
31 31 * Steer traditional MPI applications on a supercomputer from an
32 32 IPython session on your laptop.
33 33
34 34 * Analyze and visualize large datasets (that could be remote and/or
35 35 distributed) interactively using IPython and tools like
36 36 matplotlib/TVTK.
37 37
38 38 * Develop, test and debug new parallel algorithms
39 39 (that may use MPI) interactively.
40 40
41 41 * Tie together multiple MPI jobs running on different systems into
42 42 one giant distributed and parallel system.
43 43
44 44 * Start a parallel job on your cluster and then have a remote
45 45 collaborator connect to it and pull back data into their
46 46 local IPython session for plotting and analysis.
47 47
48 48 * Run a set of tasks on a set of CPUs using dynamic load balancing.
49 49
50 50 Architecture overview
51 51 =====================
52 52
53 53 The IPython architecture consists of four components:
54 54
55 55 * The IPython engine.
56 56 * The IPython hub.
57 57 * The IPython schedulers.
58 58 * The controller client.
59 59
60 60 These components live in the :mod:`IPython.parallel` package and are
61 61 installed with IPython. They do, however, have additional dependencies
62 62 that must be installed. For more information, see our
63 63 :ref:`installation documentation <install_index>`.
64 64
65 65 .. TODO: include zmq in install_index
66 66
67 67 IPython engine
68 68 ---------------
69 69
70 70 The IPython engine is a Python instance that takes Python commands over a
71 71 network connection. Eventually, the IPython engine will be a full IPython
72 72 interpreter, but for now, it is a regular Python interpreter. The engine
73 73 can also handle incoming and outgoing Python objects sent over a network
74 74 connection. When multiple engines are started, parallel and distributed
75 75 computing becomes possible. An important feature of an IPython engine is
76 76 that it blocks while user code is being executed. Read on for how the
77 77 IPython controller solves this problem to expose a clean asynchronous API
78 78 to the user.
79 79
80 80 IPython controller
81 81 ------------------
82 82
83 83 The IPython controller processes provide an interface for working with a set of engines.
84 84 At a general level, the controller is a collection of processes to which IPython engines
85 85 and clients can connect. The controller is composed of a :class:`Hub` and a collection of
86 86 :class:`Schedulers`. These Schedulers are typically run in separate processes but on the
87 87 same machine as the Hub, but can be run anywhere from local threads or on remote machines.
88 88
89 89 The controller also provides a single point of contact for users who wish to
90 90 utilize the engines connected to the controller. There are different ways of
91 91 working with a controller. In IPython, all of these models are implemented via
92 92 the client's :meth:`.View.apply` method, with various arguments, or
93 93 constructing :class:`.View` objects to represent subsets of engines. The two
94 94 primary models for interacting with engines are:
95 95
96 96 * A **Direct** interface, where engines are addressed explicitly.
97 97 * A **LoadBalanced** interface, where the Scheduler is trusted with assigning work to
98 98 appropriate engines.
99 99
100 100 Advanced users can readily extend the View models to enable other
101 101 styles of parallelism.
102 102
103 103 .. note::
104 104
105 105 A single controller and set of engines can be used with multiple models
106 106 simultaneously. This opens the door for lots of interesting things.
107 107
108 108
109 109 The Hub
110 110 *******
111 111
112 112 The center of an IPython cluster is the Hub. This is the process that keeps
113 113 track of engine connections, schedulers, clients, as well as all task requests and
114 114 results. The primary role of the Hub is to facilitate queries of the cluster state, and
115 115 minimize the necessary information required to establish the many connections involved in
116 116 connecting new clients and engines.
117 117
118 118
119 119 Schedulers
120 120 **********
121 121
122 122 All actions that can be performed on the engine go through a Scheduler. While the engines
123 123 themselves block when user code is run, the schedulers hide that from the user to provide
124 124 a fully asynchronous interface to a set of engines.
125 125
126 126
127 127 IPython client and views
128 128 ------------------------
129 129
130 130 There is one primary object, the :class:`~.parallel.Client`, for connecting to a cluster.
131 For each execution model, there is a corresponding :class:`~.parallel.view.View`. These views
131 For each execution model, there is a corresponding :class:`~.parallel.View`. These views
132 132 allow users to interact with a set of engines through the interface. Here are the two default
133 133 views:
134 134
135 135 * The :class:`DirectView` class for explicit addressing.
136 136 * The :class:`LoadBalancedView` class for destination-agnostic scheduling.
137 137
138 138 Security
139 139 --------
140 140
141 141 IPython uses ZeroMQ for networking, which has provided many advantages, but
142 142 one of the setbacks is its utter lack of security [ZeroMQ]_. By default, no IPython
143 143 connections are encrypted, but open ports only listen on localhost. The only
144 144 source of security for IPython is via ssh-tunnel. IPython supports both shell
145 145 (`openssh`) and `paramiko` based tunnels for connections. There is a key necessary
146 146 to submit requests, but due to the lack of encryption, it does not provide
147 147 significant security if loopback traffic is compromised.
148 148
149 149 In our architecture, the controller is the only process that listens on
150 150 network ports, and is thus the main point of vulnerability. The standard model
151 151 for secure connections is to designate that the controller listen on
152 152 localhost, and use ssh-tunnels to connect clients and/or
153 153 engines.
154 154
155 155 To connect and authenticate to the controller an engine or client needs
156 156 some information that the controller has stored in a JSON file.
157 157 Thus, the JSON files need to be copied to a location where
158 158 the clients and engines can find them. Typically, this is the
159 159 :file:`~/.ipython/cluster_default/security` directory on the host where the
160 160 client/engine is running (which could be a different host than the controller).
161 161 Once the JSON files are copied over, everything should work fine.
162 162
163 163 Currently, there are two JSON files that the controller creates:
164 164
165 165 ipcontroller-engine.json
166 166 This JSON file has the information necessary for an engine to connect
167 167 to a controller.
168 168
169 169 ipcontroller-client.json
170 170 The client's connection information. This may not differ from the engine's,
171 171 but since the controller may listen on different ports for clients and
172 172 engines, it is stored separately.
173 173
174 174 More details of how these JSON files are used are given below.
175 175
176 176 A detailed description of the security model and its implementation in IPython
177 177 can be found :ref:`here <parallelsecurity>`.
178 178
179 179 .. warning::
180 180
181 181 Even at its most secure, the Controller listens on ports on localhost, and
182 182 every time you make a tunnel, you open a localhost port on the connecting
183 183 machine that points to the Controller. If localhost on the Controller's
184 184 machine, or the machine of any client or engine, is untrusted, then your
185 185 Controller is insecure. There is no way around this with ZeroMQ.
186 186
187 187
188 188
189 189 Getting Started
190 190 ===============
191 191
192 192 To use IPython for parallel computing, you need to start one instance of the
193 193 controller and one or more instances of the engine. Initially, it is best to
194 194 simply start a controller and engines on a single host using the
195 195 :command:`ipcluster` command. To start a controller and 4 engines on your
196 196 localhost, just do::
197 197
198 198 $ ipcluster start -n 4
199 199
200 200 More details about starting the IPython controller and engines can be found
201 201 :ref:`here <parallel_process>`
202 202
203 203 Once you have started the IPython controller and one or more engines, you
204 204 are ready to use the engines to do something useful. To make sure
205 205 everything is working correctly, try the following commands:
206 206
207 207 .. sourcecode:: ipython
208 208
209 209 In [1]: from IPython.parallel import Client
210 210
211 211 In [2]: c = Client()
212 212
213 213 In [4]: c.ids
214 214 Out[4]: set([0, 1, 2, 3])
215 215
216 216 In [5]: c[:].apply_sync(lambda : "Hello, World")
217 217 Out[5]: [ 'Hello, World', 'Hello, World', 'Hello, World', 'Hello, World' ]
218 218
219 219
220 220 When a client is created with no arguments, the client tries to find the corresponding JSON file
221 221 in the local `~/.ipython/cluster_default/security` directory. Or if you specified a profile,
222 222 you can use that with the Client. This should cover most cases:
223 223
224 224 .. sourcecode:: ipython
225 225
226 226 In [2]: c = Client(profile='myprofile')
227 227
228 228 If you have put the JSON file in a different location or it has a different name, create the
229 229 client like this:
230 230
231 231 .. sourcecode:: ipython
232 232
233 233 In [2]: c = Client('/path/to/my/ipcontroller-client.json')
234 234
235 235 Remember, a client needs to be able to see the Hub's ports to connect. So if they are on a
236 236 different machine, you may need to use an ssh server to tunnel access to that machine,
237 237 then you would connect to it with:
238 238
239 239 .. sourcecode:: ipython
240 240
241 241 In [2]: c = Client(sshserver='myhub.example.com')
242 242
243 243 Where 'myhub.example.com' is the url or IP address of the machine on
244 244 which the Hub process is running (or another machine that has direct access to the Hub's ports).
245 245
246 246 The SSH server may already be specified in ipcontroller-client.json, if the controller was
247 247 instructed at its launch time.
248 248
249 249 You are now ready to learn more about the :ref:`Direct
250 250 <parallel_multiengine>` and :ref:`LoadBalanced <parallel_task>` interfaces to the
251 251 controller.
252 252
253 253 .. [ZeroMQ] ZeroMQ. http://www.zeromq.org
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
General Comments 0
You need to be logged in to leave comments. Login now