##// END OF EJS Templates
allow cython cache dir to be deleted...
allow cython cache dir to be deleted workaround by clearing the distutils mkpath cache, which prevents it from ever creating a given path more than once. closes #3010

File last commit:

r11064:3bd12729
r11168:10d191bd
Show More
engine.py
305 lines | 12.7 KiB | text/x-python | PythonLexer
"""A simple engine that talks to a controller over 0MQ.
it handles registration, etc. and launches a kernel
connected to the Controller's Schedulers.
Authors:
* Min RK
"""
#-----------------------------------------------------------------------------
# Copyright (C) 2010-2011 The IPython Development Team
#
# Distributed under the terms of the BSD License. The full license is in
# the file COPYING, distributed as part of this software.
#-----------------------------------------------------------------------------
from __future__ import print_function
import sys
import time
from getpass import getpass
import zmq
from zmq.eventloop import ioloop, zmqstream
from IPython.external.ssh import tunnel
# internal
from IPython.utils.localinterfaces import LOCALHOST
from IPython.utils.traitlets import (
Instance, Dict, Integer, Type, Float, Integer, Unicode, CBytes, Bool
)
from IPython.utils.py3compat import cast_bytes
from IPython.parallel.controller.heartmonitor import Heart
from IPython.parallel.factory import RegistrationFactory
from IPython.parallel.util import disambiguate_url
from IPython.kernel.zmq.session import Message
from IPython.kernel.zmq.ipkernel import Kernel
from IPython.kernel.zmq.kernelapp import IPKernelApp
class EngineFactory(RegistrationFactory):
"""IPython engine"""
# configurables:
out_stream_factory=Type('IPython.kernel.zmq.iostream.OutStream', config=True,
help="""The OutStream for handling stdout/err.
Typically 'IPython.kernel.zmq.iostream.OutStream'""")
display_hook_factory=Type('IPython.kernel.zmq.displayhook.ZMQDisplayHook', config=True,
help="""The class for handling displayhook.
Typically 'IPython.kernel.zmq.displayhook.ZMQDisplayHook'""")
location=Unicode(config=True,
help="""The location (an IP address) of the controller. This is
used for disambiguating URLs, to determine whether
loopback should be used to connect or the public address.""")
timeout=Float(5.0, config=True,
help="""The time (in seconds) to wait for the Controller to respond
to registration requests before giving up.""")
max_heartbeat_misses=Integer(50, config=True,
help="""The maximum number of times a check for the heartbeat ping of a
controller can be missed before shutting down the engine.
If set to 0, the check is disabled.""")
sshserver=Unicode(config=True,
help="""The SSH server to use for tunneling connections to the Controller.""")
sshkey=Unicode(config=True,
help="""The SSH private key file to use when tunneling connections to the Controller.""")
paramiko=Bool(sys.platform == 'win32', config=True,
help="""Whether to use paramiko instead of openssh for tunnels.""")
# not configurable:
connection_info = Dict()
user_ns = Dict()
id = Integer(allow_none=True)
registrar = Instance('zmq.eventloop.zmqstream.ZMQStream')
kernel = Instance(Kernel)
hb_check_period=Integer()
# States for the heartbeat monitoring
# Initial values for monitored and pinged must satisfy "monitored > pinged == False" so that
# during the first check no "missed" ping is reported. Must be floats for Python 3 compatibility.
_hb_last_pinged = 0.0
_hb_last_monitored = 0.0
_hb_missed_beats = 0
# The zmq Stream which receives the pings from the Heart
_hb_listener = None
bident = CBytes()
ident = Unicode()
def _ident_changed(self, name, old, new):
self.bident = cast_bytes(new)
using_ssh=Bool(False)
def __init__(self, **kwargs):
super(EngineFactory, self).__init__(**kwargs)
self.ident = self.session.session
def init_connector(self):
"""construct connection function, which handles tunnels."""
self.using_ssh = bool(self.sshkey or self.sshserver)
if self.sshkey and not self.sshserver:
# We are using ssh directly to the controller, tunneling localhost to localhost
self.sshserver = self.url.split('://')[1].split(':')[0]
if self.using_ssh:
if tunnel.try_passwordless_ssh(self.sshserver, self.sshkey, self.paramiko):
password=False
else:
password = getpass("SSH Password for %s: "%self.sshserver)
else:
password = False
def connect(s, url):
url = disambiguate_url(url, self.location)
if self.using_ssh:
self.log.debug("Tunneling connection to %s via %s", url, self.sshserver)
return tunnel.tunnel_connection(s, url, self.sshserver,
keyfile=self.sshkey, paramiko=self.paramiko,
password=password,
)
else:
return s.connect(url)
def maybe_tunnel(url):
"""like connect, but don't complete the connection (for use by heartbeat)"""
url = disambiguate_url(url, self.location)
if self.using_ssh:
self.log.debug("Tunneling connection to %s via %s", url, self.sshserver)
url,tunnelobj = tunnel.open_tunnel(url, self.sshserver,
keyfile=self.sshkey, paramiko=self.paramiko,
password=password,
)
return str(url)
return connect, maybe_tunnel
def register(self):
"""send the registration_request"""
self.log.info("Registering with controller at %s"%self.url)
ctx = self.context
connect,maybe_tunnel = self.init_connector()
reg = ctx.socket(zmq.DEALER)
reg.setsockopt(zmq.IDENTITY, self.bident)
connect(reg, self.url)
self.registrar = zmqstream.ZMQStream(reg, self.loop)
content = dict(uuid=self.ident)
self.registrar.on_recv(lambda msg: self.complete_registration(msg, connect, maybe_tunnel))
# print (self.session.key)
self.session.send(self.registrar, "registration_request", content=content)
def _report_ping(self, msg):
"""Callback for when the heartmonitor.Heart receives a ping"""
#self.log.debug("Received a ping: %s", msg)
self._hb_last_pinged = time.time()
def complete_registration(self, msg, connect, maybe_tunnel):
# print msg
self._abort_dc.stop()
ctx = self.context
loop = self.loop
identity = self.bident
idents,msg = self.session.feed_identities(msg)
msg = self.session.unserialize(msg)
content = msg['content']
info = self.connection_info
def url(key):
"""get zmq url for given channel"""
return str(info["interface"] + ":%i" % info[key])
if content['status'] == 'ok':
self.id = int(content['id'])
# launch heartbeat
# possibly forward hb ports with tunnels
hb_ping = maybe_tunnel(url('hb_ping'))
hb_pong = maybe_tunnel(url('hb_pong'))
hb_monitor = None
if self.max_heartbeat_misses > 0:
# Add a monitor socket which will record the last time a ping was seen
mon = self.context.socket(zmq.SUB)
mport = mon.bind_to_random_port('tcp://%s' % LOCALHOST)
mon.setsockopt(zmq.SUBSCRIBE, b"")
self._hb_listener = zmqstream.ZMQStream(mon, self.loop)
self._hb_listener.on_recv(self._report_ping)
hb_monitor = "tcp://%s:%i" % (LOCALHOST, mport)
heart = Heart(hb_ping, hb_pong, hb_monitor , heart_id=identity)
heart.start()
# create Shell Connections (MUX, Task, etc.):
shell_addrs = url('mux'), url('task')
# Use only one shell stream for mux and tasks
stream = zmqstream.ZMQStream(ctx.socket(zmq.ROUTER), loop)
stream.setsockopt(zmq.IDENTITY, identity)
shell_streams = [stream]
for addr in shell_addrs:
connect(stream, addr)
# control stream:
control_addr = url('control')
control_stream = zmqstream.ZMQStream(ctx.socket(zmq.ROUTER), loop)
control_stream.setsockopt(zmq.IDENTITY, identity)
connect(control_stream, control_addr)
# create iopub stream:
iopub_addr = url('iopub')
iopub_socket = ctx.socket(zmq.PUB)
iopub_socket.setsockopt(zmq.IDENTITY, identity)
connect(iopub_socket, iopub_addr)
# disable history:
self.config.HistoryManager.hist_file = ':memory:'
# Redirect input streams and set a display hook.
if self.out_stream_factory:
sys.stdout = self.out_stream_factory(self.session, iopub_socket, u'stdout')
sys.stdout.topic = cast_bytes('engine.%i.stdout' % self.id)
sys.stderr = self.out_stream_factory(self.session, iopub_socket, u'stderr')
sys.stderr.topic = cast_bytes('engine.%i.stderr' % self.id)
if self.display_hook_factory:
sys.displayhook = self.display_hook_factory(self.session, iopub_socket)
sys.displayhook.topic = cast_bytes('engine.%i.pyout' % self.id)
self.kernel = Kernel(parent=self, int_id=self.id, ident=self.ident, session=self.session,
control_stream=control_stream, shell_streams=shell_streams, iopub_socket=iopub_socket,
loop=loop, user_ns=self.user_ns, log=self.log)
self.kernel.shell.display_pub.topic = cast_bytes('engine.%i.displaypub' % self.id)
# periodically check the heartbeat pings of the controller
# Should be started here and not in "start()" so that the right period can be taken
# from the hubs HeartBeatMonitor.period
if self.max_heartbeat_misses > 0:
# Use a slightly bigger check period than the hub signal period to not warn unnecessary
self.hb_check_period = int(content['hb_period'])+10
self.log.info("Starting to monitor the heartbeat signal from the hub every %i ms." , self.hb_check_period)
self._hb_reporter = ioloop.PeriodicCallback(self._hb_monitor, self.hb_check_period, self.loop)
self._hb_reporter.start()
else:
self.log.info("Monitoring of the heartbeat signal from the hub is not enabled.")
# FIXME: This is a hack until IPKernelApp and IPEngineApp can be fully merged
app = IPKernelApp(parent=self, shell=self.kernel.shell, kernel=self.kernel, log=self.log)
app.init_profile_dir()
app.init_code()
self.kernel.start()
else:
self.log.fatal("Registration Failed: %s"%msg)
raise Exception("Registration Failed: %s"%msg)
self.log.info("Completed registration with id %i"%self.id)
def abort(self):
self.log.fatal("Registration timed out after %.1f seconds"%self.timeout)
if self.url.startswith('127.'):
self.log.fatal("""
If the controller and engines are not on the same machine,
you will have to instruct the controller to listen on an external IP (in ipcontroller_config.py):
c.HubFactory.ip='*' # for all interfaces, internal and external
c.HubFactory.ip='192.168.1.101' # or any interface that the engines can see
or tunnel connections via ssh.
""")
self.session.send(self.registrar, "unregistration_request", content=dict(id=self.id))
time.sleep(1)
sys.exit(255)
def _hb_monitor(self):
"""Callback to monitor the heartbeat from the controller"""
self._hb_listener.flush()
if self._hb_last_monitored > self._hb_last_pinged:
self._hb_missed_beats += 1
self.log.warn("No heartbeat in the last %s ms (%s time(s) in a row).", self.hb_check_period, self._hb_missed_beats)
else:
#self.log.debug("Heartbeat received (after missing %s beats).", self._hb_missed_beats)
self._hb_missed_beats = 0
if self._hb_missed_beats >= self.max_heartbeat_misses:
self.log.fatal("Maximum number of heartbeats misses reached (%s times %s ms), shutting down.",
self.max_heartbeat_misses, self.hb_check_period)
self.session.send(self.registrar, "unregistration_request", content=dict(id=self.id))
self.loop.stop()
self._hb_last_monitored = time.time()
def start(self):
dc = ioloop.DelayedCallback(self.register, 0, self.loop)
dc.start()
self._abort_dc = ioloop.DelayedCallback(self.abort, self.timeout*1000, self.loop)
self._abort_dc.start()