ipcontrollerapp.py
452 lines
| 16.5 KiB
| text/x-python
|
PythonLexer
MinRK
|
r3604 | #!/usr/bin/env python | ||
# encoding: utf-8 | ||||
""" | ||||
The IPython controller application. | ||||
MinRK
|
r4018 | |||
Authors: | ||||
* Brian Granger | ||||
* MinRK | ||||
MinRK
|
r3604 | """ | ||
#----------------------------------------------------------------------------- | ||||
MinRK
|
r4018 | # Copyright (C) 2008-2011 The IPython Development Team | ||
MinRK
|
r3604 | # | ||
# Distributed under the terms of the BSD License. The full license is in | ||||
# the file COPYING, distributed as part of this software. | ||||
#----------------------------------------------------------------------------- | ||||
#----------------------------------------------------------------------------- | ||||
# Imports | ||||
#----------------------------------------------------------------------------- | ||||
from __future__ import with_statement | ||||
MinRK
|
r5429 | import json | ||
MinRK
|
r3604 | import os | ||
MinRK
|
r3614 | import socket | ||
MinRK
|
r3631 | import stat | ||
import sys | ||||
MinRK
|
r3604 | |||
MinRK
|
r3985 | from multiprocessing import Process | ||
MinRK
|
r3604 | import zmq | ||
MinRK
|
r3985 | from zmq.devices import ProcessMonitoredQueue | ||
MinRK
|
r3604 | from zmq.log.handlers import PUBHandler | ||
Fernando Perez
|
r5417 | |||
MinRK
|
r4024 | from IPython.core.profiledir import ProfileDir | ||
MinRK
|
r3688 | |||
MinRK
|
r3993 | from IPython.parallel.apps.baseapp import ( | ||
MinRK
|
r3992 | BaseParallelApplication, | ||
MinRK
|
r4115 | base_aliases, | ||
base_flags, | ||||
MinRK
|
r5214 | catch_config_error, | ||
MinRK
|
r3604 | ) | ||
MinRK
|
r3985 | from IPython.utils.importstring import import_item | ||
MinRK
|
r5172 | from IPython.utils.traitlets import Instance, Unicode, Bool, List, Dict, TraitError | ||
MinRK
|
r3985 | |||
MinRK
|
r4962 | from IPython.zmq.session import ( | ||
Session, session_aliases, session_flags, default_secure | ||||
) | ||||
MinRK
|
r3985 | from IPython.parallel.controller.heartmonitor import HeartMonitor | ||
MinRK
|
r3992 | from IPython.parallel.controller.hub import HubFactory | ||
MinRK
|
r3985 | from IPython.parallel.controller.scheduler import TaskScheduler,launch_scheduler | ||
from IPython.parallel.controller.sqlitedb import SQLiteDB | ||||
MinRK
|
r5626 | from IPython.parallel.util import signal_children, split_url, disambiguate_url | ||
MinRK
|
r3604 | |||
MinRK
|
r3985 | # conditional import of MongoDB backend class | ||
try: | ||||
from IPython.parallel.controller.mongodb import MongoDB | ||||
except ImportError: | ||||
maybe_mongo = [] | ||||
else: | ||||
maybe_mongo = [MongoDB] | ||||
MinRK
|
r3604 | |||
#----------------------------------------------------------------------------- | ||||
# Module level variables | ||||
#----------------------------------------------------------------------------- | ||||
#: The default config file name for this application | ||||
MinRK
|
r3672 | default_config_file_name = u'ipcontroller_config.py' | ||
MinRK
|
r3604 | |||
_description = """Start the IPython controller for parallel computing. | ||||
The IPython controller provides a gateway between the IPython engines and | ||||
clients. The controller needs to be started before the engines and can be | ||||
configured using command line options or using a cluster directory. Cluster | ||||
directories contain config, log and security files and are usually located in | ||||
MinRK
|
r4024 | your ipython directory and named as "profile_name". See the `profile` | ||
Brian E. Granger
|
r4218 | and `profile-dir` options for details. | ||
MinRK
|
r3604 | """ | ||
Brian Granger
|
r4216 | _examples = """ | ||
ipcontroller --ip=192.168.0.1 --port=1000 # listen on ip, port for engines | ||||
ipcontroller --scheme=pure # use the pure zeromq scheduler | ||||
""" | ||||
MinRK
|
r3604 | |||
#----------------------------------------------------------------------------- | ||||
# The main application | ||||
#----------------------------------------------------------------------------- | ||||
MinRK
|
r3985 | flags = {} | ||
flags.update(base_flags) | ||||
flags.update({ | ||||
MinRK
|
r3990 | 'usethreads' : ( {'IPControllerApp' : {'use_threads' : True}}, | ||
MinRK
|
r3985 | 'Use threads instead of processes for the schedulers'), | ||
MinRK
|
r3994 | 'sqlitedb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.sqlitedb.SQLiteDB'}}, | ||
MinRK
|
r3985 | 'use the SQLiteDB backend'), | ||
MinRK
|
r3994 | 'mongodb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.mongodb.MongoDB'}}, | ||
MinRK
|
r3985 | 'use the MongoDB backend'), | ||
MinRK
|
r3994 | 'dictdb' : ({'HubFactory' : {'db_class' : 'IPython.parallel.controller.dictdb.DictDB'}}, | ||
MinRK
|
r3985 | 'use the in-memory DictDB backend'), | ||
MinRK
|
r3994 | 'reuse' : ({'IPControllerApp' : {'reuse_files' : True}}, | ||
MinRK
|
r3990 | 'reuse existing json connection files') | ||
MinRK
|
r3985 | }) | ||
MinRK
|
r4962 | flags.update(session_flags) | ||
MinRK
|
r4115 | aliases = dict( | ||
ssh = 'IPControllerApp.ssh_server', | ||||
MinRK
|
r4585 | enginessh = 'IPControllerApp.engine_ssh_server', | ||
MinRK
|
r4115 | location = 'IPControllerApp.location', | ||
url = 'HubFactory.url', | ||||
ip = 'HubFactory.ip', | ||||
transport = 'HubFactory.transport', | ||||
port = 'HubFactory.regport', | ||||
ping = 'HeartMonitor.period', | ||||
scheme = 'TaskScheduler.scheme_name', | ||||
hwm = 'TaskScheduler.hwm', | ||||
) | ||||
aliases.update(base_aliases) | ||||
MinRK
|
r4962 | aliases.update(session_aliases) | ||
MinRK
|
r3985 | |||
Brian Granger
|
r4216 | |||
MinRK
|
r3992 | class IPControllerApp(BaseParallelApplication): | ||
MinRK
|
r3604 | |||
MinRK
|
r3672 | name = u'ipcontroller' | ||
MinRK
|
r3604 | description = _description | ||
Brian Granger
|
r4216 | examples = _examples | ||
MinRK
|
r3991 | config_file_name = Unicode(default_config_file_name) | ||
MinRK
|
r4006 | classes = [ProfileDir, Session, HubFactory, TaskScheduler, HeartMonitor, SQLiteDB] + maybe_mongo | ||
MinRK
|
r3992 | |||
# change default to True | ||||
auto_create = Bool(True, config=True, | ||||
MinRK
|
r4000 | help="""Whether to create profile dir if it doesn't exist.""") | ||
MinRK
|
r3614 | |||
MinRK
|
r3985 | reuse_files = Bool(False, config=True, | ||
MinRK
|
r4000 | help='Whether to reuse existing json connection files.' | ||
MinRK
|
r3985 | ) | ||
ssh_server = Unicode(u'', config=True, | ||||
help="""ssh url for clients to use when connecting to the Controller | ||||
processes. It should be of the form: [user@]server[:port]. The | ||||
MinRK
|
r4000 | Controller's listening addresses must be accessible from the ssh server""", | ||
MinRK
|
r3985 | ) | ||
MinRK
|
r4585 | engine_ssh_server = Unicode(u'', config=True, | ||
help="""ssh url for engines to use when connecting to the Controller | ||||
processes. It should be of the form: [user@]server[:port]. The | ||||
Controller's listening addresses must be accessible from the ssh server""", | ||||
) | ||||
MinRK
|
r3985 | location = Unicode(u'', config=True, | ||
help="""The external IP or domain name of the Controller, used for disambiguating | ||||
engine and client connections.""", | ||||
) | ||||
import_statements = List([], config=True, | ||||
help="import statements to be run at startup. Necessary in some environments" | ||||
) | ||||
MinRK
|
r3990 | use_threads = Bool(False, config=True, | ||
MinRK
|
r3985 | help='Use threads instead of processes for the schedulers', | ||
MinRK
|
r4847 | ) | ||
engine_json_file = Unicode('ipcontroller-engine.json', config=True, | ||||
help="JSON filename where engine connection info will be stored.") | ||||
client_json_file = Unicode('ipcontroller-client.json', config=True, | ||||
help="JSON filename where client connection info will be stored.") | ||||
def _cluster_id_changed(self, name, old, new): | ||||
super(IPControllerApp, self)._cluster_id_changed(name, old, new) | ||||
MinRK
|
r4850 | self.engine_json_file = "%s-engine.json" % self.name | ||
self.client_json_file = "%s-client.json" % self.name | ||||
MinRK
|
r4847 | |||
MinRK
|
r3985 | |||
# internal | ||||
children = List() | ||||
MinRK
|
r3988 | mq_class = Unicode('zmq.devices.ProcessMonitoredQueue') | ||
MinRK
|
r3985 | |||
MinRK
|
r3990 | def _use_threads_changed(self, name, old, new): | ||
MinRK
|
r3985 | self.mq_class = 'zmq.devices.%sMonitoredQueue'%('Thread' if new else 'Process') | ||
MinRK
|
r4115 | aliases = Dict(aliases) | ||
MinRK
|
r3985 | flags = Dict(flags) | ||
MinRK
|
r3614 | |||
MinRK
|
r3985 | |||
MinRK
|
r3614 | def save_connection_dict(self, fname, cdict): | ||
"""save a connection dict to json file.""" | ||||
MinRK
|
r3985 | c = self.config | ||
MinRK
|
r3614 | url = cdict['url'] | ||
location = cdict['location'] | ||||
if not location: | ||||
try: | ||||
proto,ip,port = split_url(url) | ||||
except AssertionError: | ||||
pass | ||||
else: | ||||
MinRK
|
r4239 | try: | ||
location = socket.gethostbyname_ex(socket.gethostname())[2][-1] | ||||
except (socket.gaierror, IndexError): | ||||
self.log.warn("Could not identify this machine's IP, assuming 127.0.0.1." | ||||
" You may need to specify '--location=<external_ip_address>' to help" | ||||
" IPython decide when to connect via loopback.") | ||||
location = '127.0.0.1' | ||||
MinRK
|
r3614 | cdict['location'] = location | ||
MinRK
|
r3992 | fname = os.path.join(self.profile_dir.security_dir, fname) | ||
MinRK
|
r5483 | self.log.info("writing connection info to %s", fname) | ||
MinRK
|
r5429 | with open(fname, 'w') as f: | ||
MinRK
|
r3614 | f.write(json.dumps(cdict, indent=2)) | ||
os.chmod(fname, stat.S_IRUSR|stat.S_IWUSR) | ||||
MinRK
|
r3630 | |||
def load_config_from_json(self): | ||||
"""load config from existing json connector files.""" | ||||
MinRK
|
r3985 | c = self.config | ||
MinRK
|
r4962 | self.log.debug("loading config from JSON") | ||
MinRK
|
r3630 | # load from engine config | ||
MinRK
|
r5483 | fname = os.path.join(self.profile_dir.security_dir, self.engine_json_file) | ||
self.log.info("loading connection info from %s", fname) | ||||
with open(fname) as f: | ||||
MinRK
|
r3630 | cfg = json.loads(f.read()) | ||
MinRK
|
r5626 | key = cfg['exec_key'] | ||
# json gives unicode, Session.key wants bytes | ||||
c.Session.key = key.encode('ascii') | ||||
MinRK
|
r3630 | xport,addr = cfg['url'].split('://') | ||
c.HubFactory.engine_transport = xport | ||||
ip,ports = addr.split(':') | ||||
c.HubFactory.engine_ip = ip | ||||
c.HubFactory.regport = int(ports) | ||||
MinRK
|
r3985 | self.location = cfg['location'] | ||
MinRK
|
r4585 | if not self.engine_ssh_server: | ||
self.engine_ssh_server = cfg['ssh'] | ||||
MinRK
|
r3630 | # load client config | ||
MinRK
|
r5483 | fname = os.path.join(self.profile_dir.security_dir, self.client_json_file) | ||
self.log.info("loading connection info from %s", fname) | ||||
with open(fname) as f: | ||||
MinRK
|
r3630 | cfg = json.loads(f.read()) | ||
assert key == cfg['exec_key'], "exec_key mismatch between engine and client keys" | ||||
xport,addr = cfg['url'].split('://') | ||||
c.HubFactory.client_transport = xport | ||||
ip,ports = addr.split(':') | ||||
c.HubFactory.client_ip = ip | ||||
MinRK
|
r4585 | if not self.ssh_server: | ||
self.ssh_server = cfg['ssh'] | ||||
MinRK
|
r3630 | assert int(ports) == c.HubFactory.regport, "regport mismatch" | ||
MinRK
|
r4962 | def load_secondary_config(self): | ||
"""secondary config, loading from JSON and setting defaults""" | ||||
if self.reuse_files: | ||||
try: | ||||
self.load_config_from_json() | ||||
except (AssertionError,IOError) as e: | ||||
self.log.error("Could not load config from JSON: %s" % e) | ||||
self.reuse_files=False | ||||
# switch Session.key default to secure | ||||
default_secure(self.config) | ||||
self.log.debug("Config changed") | ||||
self.log.debug(repr(self.config)) | ||||
MinRK
|
r3985 | def init_hub(self): | ||
c = self.config | ||||
MinRK
|
r3604 | |||
MinRK
|
r3985 | self.do_import_statements() | ||
MinRK
|
r3604 | |||
try: | ||||
MinRK
|
r3985 | self.factory = HubFactory(config=c, log=self.log) | ||
# self.start_logging() | ||||
self.factory.init_hub() | ||||
MinRK
|
r5172 | except TraitError: | ||
raise | ||||
except Exception: | ||||
MinRK
|
r3604 | self.log.error("Couldn't construct the Controller", exc_info=True) | ||
self.exit(1) | ||||
MinRK
|
r3614 | |||
MinRK
|
r4962 | if not self.reuse_files: | ||
MinRK
|
r3630 | # save to new json config files | ||
f = self.factory | ||||
Thomas Kluyver
|
r5287 | cdict = {'exec_key' : f.session.key.decode('ascii'), | ||
MinRK
|
r3985 | 'ssh' : self.ssh_server, | ||
MinRK
|
r3630 | 'url' : "%s://%s:%s"%(f.client_transport, f.client_ip, f.regport), | ||
MinRK
|
r3985 | 'location' : self.location | ||
MinRK
|
r3630 | } | ||
MinRK
|
r4847 | self.save_connection_dict(self.client_json_file, cdict) | ||
MinRK
|
r3630 | edict = cdict | ||
edict['url']="%s://%s:%s"%((f.client_transport, f.client_ip, f.regport)) | ||||
MinRK
|
r4585 | edict['ssh'] = self.engine_ssh_server | ||
MinRK
|
r4847 | self.save_connection_dict(self.engine_json_file, edict) | ||
MinRK
|
r3985 | |||
# | ||||
def init_schedulers(self): | ||||
children = self.children | ||||
MinRK
|
r3989 | mq = import_item(str(self.mq_class)) | ||
MinRK
|
r3614 | |||
MinRK
|
r3985 | hub = self.factory | ||
MinRK
|
r5170 | # disambiguate url, in case of * | ||
monitor_url = disambiguate_url(hub.monitor_url) | ||||
# maybe_inproc = 'inproc://monitor' if self.use_threads else monitor_url | ||||
MinRK
|
r3985 | # IOPub relay (in a Process) | ||
MinRK
|
r4155 | q = mq(zmq.PUB, zmq.SUB, zmq.PUB, b'N/A',b'iopub') | ||
MinRK
|
r3985 | q.bind_in(hub.client_info['iopub']) | ||
q.bind_out(hub.engine_info['iopub']) | ||||
MinRK
|
r4155 | q.setsockopt_out(zmq.SUBSCRIBE, b'') | ||
MinRK
|
r5170 | q.connect_mon(monitor_url) | ||
MinRK
|
r3985 | q.daemon=True | ||
children.append(q) | ||||
# Multiplexer Queue (in a Process) | ||||
MinRK
|
r4725 | q = mq(zmq.ROUTER, zmq.ROUTER, zmq.PUB, b'in', b'out') | ||
MinRK
|
r3985 | q.bind_in(hub.client_info['mux']) | ||
MinRK
|
r4155 | q.setsockopt_in(zmq.IDENTITY, b'mux') | ||
MinRK
|
r3985 | q.bind_out(hub.engine_info['mux']) | ||
MinRK
|
r5170 | q.connect_mon(monitor_url) | ||
MinRK
|
r3985 | q.daemon=True | ||
children.append(q) | ||||
# Control Queue (in a Process) | ||||
MinRK
|
r4725 | q = mq(zmq.ROUTER, zmq.ROUTER, zmq.PUB, b'incontrol', b'outcontrol') | ||
MinRK
|
r3985 | q.bind_in(hub.client_info['control']) | ||
MinRK
|
r4155 | q.setsockopt_in(zmq.IDENTITY, b'control') | ||
MinRK
|
r3985 | q.bind_out(hub.engine_info['control']) | ||
MinRK
|
r5170 | q.connect_mon(monitor_url) | ||
MinRK
|
r3985 | q.daemon=True | ||
children.append(q) | ||||
try: | ||||
scheme = self.config.TaskScheduler.scheme_name | ||||
except AttributeError: | ||||
scheme = TaskScheduler.scheme_name.get_default_value() | ||||
# Task Queue (in a Process) | ||||
if scheme == 'pure': | ||||
self.log.warn("task::using pure XREQ Task scheduler") | ||||
MinRK
|
r4725 | q = mq(zmq.ROUTER, zmq.DEALER, zmq.PUB, b'intask', b'outtask') | ||
MinRK
|
r3985 | # q.setsockopt_out(zmq.HWM, hub.hwm) | ||
q.bind_in(hub.client_info['task'][1]) | ||||
MinRK
|
r4155 | q.setsockopt_in(zmq.IDENTITY, b'task') | ||
MinRK
|
r3985 | q.bind_out(hub.engine_info['task']) | ||
MinRK
|
r5170 | q.connect_mon(monitor_url) | ||
MinRK
|
r3985 | q.daemon=True | ||
children.append(q) | ||||
elif scheme == 'none': | ||||
self.log.warn("task::using no Task scheduler") | ||||
else: | ||||
self.log.info("task::using Python %s Task scheduler"%scheme) | ||||
sargs = (hub.client_info['task'][1], hub.engine_info['task'], | ||||
MinRK
|
r5170 | monitor_url, disambiguate_url(hub.client_info['notification'])) | ||
MinRK
|
r3989 | kwargs = dict(logname='scheduler', loglevel=self.log_level, | ||
log_url = self.log_url, config=dict(self.config)) | ||||
MinRK
|
r4092 | if 'Process' in self.mq_class: | ||
# run the Python scheduler in a Process | ||||
q = Process(target=launch_scheduler, args=sargs, kwargs=kwargs) | ||||
q.daemon=True | ||||
children.append(q) | ||||
else: | ||||
# single-threaded Controller | ||||
kwargs['in_thread'] = True | ||||
launch_scheduler(*sargs, **kwargs) | ||||
MinRK
|
r3985 | |||
MinRK
|
r3605 | |||
def save_urls(self): | ||||
"""save the registration urls to files.""" | ||||
MinRK
|
r3985 | c = self.config | ||
MinRK
|
r3605 | |||
MinRK
|
r3992 | sec_dir = self.profile_dir.security_dir | ||
MinRK
|
r3605 | cf = self.factory | ||
with open(os.path.join(sec_dir, 'ipcontroller-engine.url'), 'w') as f: | ||||
f.write("%s://%s:%s"%(cf.engine_transport, cf.engine_ip, cf.regport)) | ||||
with open(os.path.join(sec_dir, 'ipcontroller-client.url'), 'w') as f: | ||||
f.write("%s://%s:%s"%(cf.client_transport, cf.client_ip, cf.regport)) | ||||
MinRK
|
r3604 | |||
MinRK
|
r3985 | def do_import_statements(self): | ||
statements = self.import_statements | ||||
MinRK
|
r3604 | for s in statements: | ||
try: | ||||
self.log.msg("Executing statement: '%s'" % s) | ||||
exec s in globals(), locals() | ||||
except: | ||||
self.log.msg("Error running statement: %s" % s) | ||||
MinRK
|
r3989 | def forward_logging(self): | ||
if self.log_url: | ||||
self.log.info("Forwarding logging to %s"%self.log_url) | ||||
context = zmq.Context.instance() | ||||
lsock = context.socket(zmq.PUB) | ||||
lsock.connect(self.log_url) | ||||
handler = PUBHandler(lsock) | ||||
self.log.removeHandler(self._log_handler) | ||||
handler.root_topic = 'controller' | ||||
handler.setLevel(self.log_level) | ||||
self.log.addHandler(handler) | ||||
self._log_handler = handler | ||||
MinRK
|
r3986 | |||
MinRK
|
r5214 | @catch_config_error | ||
MinRK
|
r3986 | def initialize(self, argv=None): | ||
super(IPControllerApp, self).initialize(argv) | ||||
MinRK
|
r3989 | self.forward_logging() | ||
MinRK
|
r4962 | self.load_secondary_config() | ||
MinRK
|
r3986 | self.init_hub() | ||
self.init_schedulers() | ||||
MinRK
|
r3985 | def start(self): | ||
MinRK
|
r3605 | # Start the subprocesses: | ||
MinRK
|
r3604 | self.factory.start() | ||
MinRK
|
r3985 | child_procs = [] | ||
for child in self.children: | ||||
child.start() | ||||
if isinstance(child, ProcessMonitoredQueue): | ||||
child_procs.append(child.launcher) | ||||
elif isinstance(child, Process): | ||||
child_procs.append(child) | ||||
if child_procs: | ||||
signal_children(child_procs) | ||||
MinRK
|
r3604 | self.write_pid_file(overwrite=True) | ||
MinRK
|
r3985 | |||
MinRK
|
r3604 | try: | ||
self.factory.loop.start() | ||||
except KeyboardInterrupt: | ||||
self.log.critical("Interrupted, Exiting...\n") | ||||
MinRK
|
r3986 | |||
MinRK
|
r3604 | |||
def launch_new_instance(): | ||||
"""Create and run the IPython controller""" | ||||
MinRK
|
r4095 | if sys.platform == 'win32': | ||
# make sure we don't get called from a multiprocessing subprocess | ||||
# this can result in infinite Controllers being started on Windows | ||||
# which doesn't have a proper fork, so multiprocessing is wonky | ||||
# this only comes up when IPython has been installed using vanilla | ||||
# setuptools, and *not* distribute. | ||||
MinRK
|
r4096 | import multiprocessing | ||
p = multiprocessing.current_process() | ||||
# the main process has name 'MainProcess' | ||||
# subprocesses will have names like 'Process-1' | ||||
if p.name != 'MainProcess': | ||||
# we are a subprocess, don't start another Controller! | ||||
return | ||||
MinRK
|
r3999 | app = IPControllerApp.instance() | ||
MinRK
|
r3986 | app.initialize() | ||
MinRK
|
r3604 | app.start() | ||
if __name__ == '__main__': | ||||
launch_new_instance() | ||||