##// END OF EJS Templates
Post discussion with @ellisonbg
Post discussion with @ellisonbg

File last commit:

r16569:eb3c0ac4
r16950:48e8c003
Show More
hub.py
1440 lines | 53.3 KiB | text/x-python | PythonLexer
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 """The IPython Controller Hub with 0MQ
MinRK
pyin -> execute_input
r16567
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 This is the master object that handles connections from engines and clients,
and monitors traffic through the various queues.
"""
MinRK
pyin -> execute_input
r16567 # Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 from __future__ import print_function
MinRK
enables resume of ipcontroller...
r7891 import json
import os
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 import sys
import time
MinRK
resort imports in a cleaner order
r3631 from datetime import datetime
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599
import zmq
MinRK
Refactor newparallel to use Config system...
r3604 from zmq.eventloop import ioloop
from zmq.eventloop.zmqstream import ZMQStream
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599
# internal:
MinRK
Refactor newparallel to use Config system...
r3604 from IPython.utils.importstring import import_item
MinRK
apply extract_dates at a higher level in IPython.parallel...
r13512 from IPython.utils.jsonutil import extract_dates
MinRK
avoid executing code in utils.localinterfaces at import time...
r12591 from IPython.utils.localinterfaces import localhost
Thomas Kluyver
Fix references to dict.iteritems and dict.itervalues
r13361 from IPython.utils.py3compat import cast_bytes, unicode_type, iteritems
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 from IPython.utils.traitlets import (
MinRK
add Integer traitlet...
r5344 HasTraits, Instance, Integer, Unicode, Dict, Set, Tuple, CBytes, DottedObjectName
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 )
MinRK
Refactor newparallel to use Config system...
r3604
MinRK
General improvements to database backend...
r3780 from IPython.parallel import error, util
MinRK
reorganize Factory classes to follow relocation of Session object
r4007 from IPython.parallel.factory import RegistrationFactory
MinRK
mv IPython.zmq to IPython.kernel.zmq
r9372 from IPython.kernel.zmq.session import SessionFactory
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599
MinRK
eliminate relative imports
r3642 from .heartmonitor import HeartMonitor
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599
#-----------------------------------------------------------------------------
# Code
#-----------------------------------------------------------------------------
def _passer(*args, **kwargs):
return
MinRK
propagate iopub to clients
r3602 def _printer(*args, **kwargs):
print (args)
print (kwargs)
MinRK
SGE test related fixes...
r3668 def empty_record():
"""Return an empty dict with all record keys."""
return {
'msg_id' : None,
'header' : None,
MinRK
migrate subheader usage to new metadata
r7957 'metadata' : None,
MinRK
SGE test related fixes...
r3668 'content': None,
'buffers': None,
'submitted': None,
'client_uuid' : None,
'engine_uuid' : None,
'started': None,
'completed': None,
'resubmitted': None,
MinRK
add 'received' timestamp to DB...
r6469 'received': None,
MinRK
SGE test related fixes...
r3668 'result_header' : None,
MinRK
migrate subheader usage to new metadata
r7957 'result_metadata' : None,
MinRK
SGE test related fixes...
r3668 'result_content' : None,
'result_buffers' : None,
'queue' : None,
MinRK
pyin -> execute_input
r16567 'execute_input' : None,
MinRK
pyout -> execute_result...
r16568 'execute_result': None,
MinRK
pyerr -> error
r16569 'error': None,
MinRK
SGE test related fixes...
r3668 'stdout': '',
'stderr': '',
}
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 def init_record(msg):
MinRK
improved logging + Hub,Engine,Scheduler are Configurable
r3603 """Initialize a TaskRecord based on a request."""
MinRK
handle datetime objects in Session...
r4008 header = msg['header']
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 return {
'msg_id' : header['msg_id'],
'header' : header,
'content': msg['content'],
MinRK
migrate subheader usage to new metadata
r7957 'metadata': msg['metadata'],
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 'buffers': msg['buffers'],
MinRK
merge IPython.parallel.streamsession into IPython.zmq.session...
r4006 'submitted': header['date'],
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 'client_uuid' : None,
'engine_uuid' : None,
'started': None,
'completed': None,
'resubmitted': None,
MinRK
add 'received' timestamp to DB...
r6469 'received': None,
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 'result_header' : None,
MinRK
migrate subheader usage to new metadata
r7957 'result_metadata': None,
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 'result_content' : None,
'result_buffers' : None,
MinRK
propagate iopub to clients
r3602 'queue' : None,
MinRK
pyin -> execute_input
r16567 'execute_input' : None,
MinRK
pyout -> execute_result...
r16568 'execute_result': None,
MinRK
pyerr -> error
r16569 'error': None,
MinRK
propagate iopub to clients
r3602 'stdout': '',
'stderr': '',
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 }
MinRK
improved logging + Hub,Engine,Scheduler are Configurable
r3603 class EngineConnector(HasTraits):
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 """A simple object for accessing the various zmq connections of an object.
Attributes are:
id (int): engine ID
MinRK
enables resume of ipcontroller...
r7891 uuid (unicode): engine UUID
pending: set of msg_ids
stallback: DelayedCallback for stalled registration
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 """
MinRK
enables resume of ipcontroller...
r7891
id = Integer(0)
uuid = Unicode()
pending = Set()
stallback = Instance(ioloop.DelayedCallback)
MinRK
Refactor newparallel to use Config system...
r3604
MinRK
Use NoDB by default...
r7509 _db_shortcuts = {
'sqlitedb' : 'IPython.parallel.controller.sqlitedb.SQLiteDB',
'mongodb' : 'IPython.parallel.controller.mongodb.MongoDB',
'dictdb' : 'IPython.parallel.controller.dictdb.DictDB',
'nodb' : 'IPython.parallel.controller.dictdb.NoDB',
}
MinRK
Refactor newparallel to use Config system...
r3604 class HubFactory(RegistrationFactory):
"""The Configurable for setting up a Hub."""
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Refactor newparallel to use Config system...
r3604 # port-pairs for monitoredqueues:
MinRK
add Integer traitlet...
r5344 hb = Tuple(Integer,Integer,config=True,
MinRK
use individual ports, rather than full urls in connection files
r7890 help="""PUB/ROUTER Port pair for Engine heartbeats""")
MinRK
Refactor newparallel to use Config system...
r3604 def _hb_default(self):
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 return tuple(util.select_random_ports(2))
MinRK
add Integer traitlet...
r5344 mux = Tuple(Integer,Integer,config=True,
MinRK
use individual ports, rather than full urls in connection files
r7890 help="""Client/Engine Port pair for MUX queue""")
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Refactor newparallel to use Config system...
r3604 def _mux_default(self):
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 return tuple(util.select_random_ports(2))
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
add Integer traitlet...
r5344 task = Tuple(Integer,Integer,config=True,
MinRK
use individual ports, rather than full urls in connection files
r7890 help="""Client/Engine Port pair for Task queue""")
MinRK
Refactor newparallel to use Config system...
r3604 def _task_default(self):
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 return tuple(util.select_random_ports(2))
MinRK
add Integer traitlet...
r5344 control = Tuple(Integer,Integer,config=True,
MinRK
use individual ports, rather than full urls in connection files
r7890 help="""Client/Engine Port pair for Control queue""")
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Refactor newparallel to use Config system...
r3604 def _control_default(self):
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 return tuple(util.select_random_ports(2))
MinRK
add Integer traitlet...
r5344 iopub = Tuple(Integer,Integer,config=True,
MinRK
use individual ports, rather than full urls in connection files
r7890 help="""Client/Engine Port pair for IOPub relay""")
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Refactor newparallel to use Config system...
r3604 def _iopub_default(self):
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 return tuple(util.select_random_ports(2))
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Refactor newparallel to use Config system...
r3604 # single ports:
MinRK
add Integer traitlet...
r5344 mon_port = Integer(config=True,
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="""Monitor (SUB) port for queue traffic""")
MinRK
Refactor newparallel to use Config system...
r3604 def _mon_port_default(self):
MinRK
General improvements to database backend...
r3780 return util.select_random_ports(1)[0]
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
add Integer traitlet...
r5344 notifier_port = Integer(config=True,
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="""PUB port for sending engine status notifications""")
MinRK
Refactor newparallel to use Config system...
r3604 def _notifier_port_default(self):
MinRK
General improvements to database backend...
r3780 return util.select_random_ports(1)[0]
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
avoid executing code in utils.localinterfaces at import time...
r12591 engine_ip = Unicode(config=True,
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="IP on which to listen for engine connections. [default: loopback]")
MinRK
avoid executing code in utils.localinterfaces at import time...
r12591 def _engine_ip_default(self):
return localhost()
MinRK
cleanup parallel traits...
r3988 engine_transport = Unicode('tcp', config=True,
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="0MQ transport for engine connections. [default: tcp]")
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
avoid executing code in utils.localinterfaces at import time...
r12591 client_ip = Unicode(config=True,
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="IP on which to listen for client connections. [default: loopback]")
MinRK
cleanup parallel traits...
r3988 client_transport = Unicode('tcp', config=True,
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="0MQ transport for client connections. [default : tcp]")
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
avoid executing code in utils.localinterfaces at import time...
r12591 monitor_ip = Unicode(config=True,
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="IP on which to listen for monitor messages. [default: loopback]")
MinRK
cleanup parallel traits...
r3988 monitor_transport = Unicode('tcp', config=True,
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 help="0MQ transport for monitor messages. [default : tcp]")
Thomas Kluyver
Fix parallel test suite
r13383
MinRK
avoid executing code in utils.localinterfaces at import time...
r12591 _client_ip_default = _monitor_ip_default = _engine_ip_default
Thomas Kluyver
Fix parallel test suite
r13383
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
cleanup parallel traits...
r3988 monitor_url = Unicode('')
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Use NoDB by default...
r7509 db_class = DottedObjectName('NoDB',
config=True, help="""The class to use for the DB backend
Options include:
SQLiteDB: SQLite
MongoDB : use MongoDB
DictDB : in-memory storage (fastest, but be mindful of memory growth of the Hub)
NoDB : disable database altogether (default)
""")
Bernardo B. Marques
remove all trailling spaces
r4872
TD22057
Changed Hub.registration_timeout to be a config input.
r15764 registration_timeout = Integer(0, config=True,
MinRK
validate registration_timeout in HubFactory...
r15802 help="Engine registration timeout in seconds [default: max(30,"
"10*heartmonitor.period)]" )
def _registration_timeout_default(self):
if self.heartmonitor is None:
# early initialization, this value will be ignored
return 0
# heartmonitor period is in milliseconds, so 10x in seconds is .01
return max(30, int(.01 * self.heartmonitor.period))
TD22057
Changed Hub.registration_timeout to be a config input.
r15764
MinRK
Refactor newparallel to use Config system...
r3604 # not configurable
MinRK
organize IPython.parallel into subpackages
r3673 db = Instance('IPython.parallel.controller.dictdb.BaseDB')
heartmonitor = Instance('IPython.parallel.controller.heartmonitor.HeartMonitor')
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 def _ip_changed(self, name, old, new):
self.engine_ip = new
self.client_ip = new
self.monitor_ip = new
self._update_monitor_url()
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Refactor newparallel to use Config system...
r3604 def _update_monitor_url(self):
MinRK
minor controller logging adjustments...
r5695 self.monitor_url = "%s://%s:%i" % (self.monitor_transport, self.monitor_ip, self.mon_port)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 def _transport_changed(self, name, old, new):
self.engine_transport = new
self.client_transport = new
self.monitor_transport = new
MinRK
Refactor newparallel to use Config system...
r3604 self._update_monitor_url()
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Refactor newparallel to use Config system...
r3604 def __init__(self, **kwargs):
super(HubFactory, self).__init__(**kwargs)
self._update_monitor_url()
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Refactor newparallel to use Config system...
r3604 def construct(self):
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 self.init_hub()
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Refactor newparallel to use Config system...
r3604 def start(self):
self.heartmonitor.start()
MinRK
rework logging connections
r3610 self.log.info("Heartmonitor started")
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
use individual ports, rather than full urls in connection files
r7890 def client_url(self, channel):
"""return full zmq url for a named client channel"""
return "%s://%s:%i" % (self.client_transport, self.client_ip, self.client_info[channel])
def engine_url(self, channel):
"""return full zmq url for a named engine channel"""
return "%s://%s:%i" % (self.engine_transport, self.engine_ip, self.engine_info[channel])
MinRK
all ipcluster scripts in some degree of working order with new config
r3985 def init_hub(self):
MinRK
use individual ports, rather than full urls in connection files
r7890 """construct Hub object"""
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Refactor newparallel to use Config system...
r3604 ctx = self.context
loop = self.loop
MinRK
AttributeError check on config no longer works...
r12796 if 'TaskScheduler.scheme_name' in self.config:
MinRK
simplify IPython.parallel connections...
r7889 scheme = self.config.TaskScheduler.scheme_name
MinRK
AttributeError check on config no longer works...
r12796 else:
MinRK
simplify IPython.parallel connections...
r7889 from .scheduler import TaskScheduler
scheme = TaskScheduler.scheme_name.get_default_value()
# build connection dicts
engine = self.engine_info = {
MinRK
use individual ports, rather than full urls in connection files
r7890 'interface' : "%s://%s" % (self.engine_transport, self.engine_ip),
'registration' : self.regport,
'control' : self.control[1],
'mux' : self.mux[1],
'hb_ping' : self.hb[0],
'hb_pong' : self.hb[1],
'task' : self.task[1],
'iopub' : self.iopub[1],
MinRK
simplify IPython.parallel connections...
r7889 }
client = self.client_info = {
MinRK
use individual ports, rather than full urls in connection files
r7890 'interface' : "%s://%s" % (self.client_transport, self.client_ip),
'registration' : self.regport,
'control' : self.control[0],
'mux' : self.mux[0],
'task' : self.task[0],
MinRK
simplify IPython.parallel connections...
r7889 'task_scheme' : scheme,
MinRK
use individual ports, rather than full urls in connection files
r7890 'iopub' : self.iopub[0],
'notification' : self.notifier_port,
MinRK
simplify IPython.parallel connections...
r7889 }
self.log.debug("Hub engine addrs: %s", self.engine_info)
self.log.debug("Hub client addrs: %s", self.client_info)
MinRK
Refactor newparallel to use Config system...
r3604 # Registrar socket
MinRK
use ROUTER/DEALER socket names instead of XREP/XREQ...
r4725 q = ZMQStream(ctx.socket(zmq.ROUTER), loop)
MinRK
set unlimited HWM for all relay devices...
r10614 util.set_hwm(q, 0)
MinRK
use individual ports, rather than full urls in connection files
r7890 q.bind(self.client_url('registration'))
self.log.info("Hub listening on %s for registration.", self.client_url('registration'))
MinRK
Refactor newparallel to use Config system...
r3604 if self.client_ip != self.engine_ip:
MinRK
use individual ports, rather than full urls in connection files
r7890 q.bind(self.engine_url('registration'))
self.log.info("Hub listening on %s for registration.", self.engine_url('registration'))
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Refactor newparallel to use Config system...
r3604 ### Engine connections ###
# heartbeat
hpub = ctx.socket(zmq.PUB)
MinRK
use individual ports, rather than full urls in connection files
r7890 hpub.bind(self.engine_url('hb_ping'))
MinRK
use ROUTER/DEALER socket names instead of XREP/XREQ...
r4725 hrep = ctx.socket(zmq.ROUTER)
MinRK
set unlimited HWM for all relay devices...
r10614 util.set_hwm(hrep, 0)
MinRK
use individual ports, rather than full urls in connection files
r7890 hrep.bind(self.engine_url('hb_pong'))
MinRK
use `parent=self` throughout IPython...
r11064 self.heartmonitor = HeartMonitor(loop=loop, parent=self, log=self.log,
MinRK
reorganize Factory classes to follow relocation of Session object
r4007 pingstream=ZMQStream(hpub,loop),
pongstream=ZMQStream(hrep,loop)
)
MinRK
Refactor newparallel to use Config system...
r3604
### Client connections ###
MinRK
simplify IPython.parallel connections...
r7889
MinRK
Refactor newparallel to use Config system...
r3604 # Notifier socket
n = ZMQStream(ctx.socket(zmq.PUB), loop)
MinRK
use individual ports, rather than full urls in connection files
r7890 n.bind(self.client_url('notification'))
MinRK
Refactor newparallel to use Config system...
r3604
### build and launch the queues ###
# monitor socket
sub = ctx.socket(zmq.SUB)
Thomas Kluyver
Tweaks to improve automated conversion to Python 3 code.
r4110 sub.setsockopt(zmq.SUBSCRIBE, b"")
MinRK
Refactor newparallel to use Config system...
r3604 sub.bind(self.monitor_url)
MinRK
add default ip<x>z_config files
r3630 sub.bind('inproc://monitor')
MinRK
Refactor newparallel to use Config system...
r3604 sub = ZMQStream(sub, loop)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Refactor newparallel to use Config system...
r3604 # connect the db
MinRK
Use NoDB by default...
r7509 db_class = _db_shortcuts.get(self.db_class.lower(), self.db_class)
self.log.info('Hub using DB backend: %r', (db_class.split('.')[-1]))
self.db = import_item(str(db_class))(session=self.session.session,
MinRK
use `parent=self` throughout IPython...
r11064 parent=self, log=self.log)
MinRK
Refactor newparallel to use Config system...
r3604 time.sleep(.25)
MinRK
add Client.resubmit for re-running tasks...
r3874
# resubmit stream
MinRK
use ROUTER/DEALER socket names instead of XREP/XREQ...
r4725 r = ZMQStream(ctx.socket(zmq.DEALER), loop)
MinRK
use individual ports, rather than full urls in connection files
r7890 url = util.disambiguate_url(self.client_url('task'))
MinRK
add Client.resubmit for re-running tasks...
r3874 r.connect(url)
TD22057
Changed Hub.registration_timeout to be a config input.
r15764 # convert seconds to msec
registration_timeout = 1000*self.registration_timeout
MinRK
Refactor newparallel to use Config system...
r3604 self.hub = Hub(loop=loop, session=self.session, monitor=sub, heartmonitor=self.heartmonitor,
MinRK
add Client.resubmit for re-running tasks...
r3874 query=q, notifier=n, resubmit=r, db=self.db,
MinRK
newparallel tweaks, fixes...
r3622 engine_info=self.engine_info, client_info=self.client_info,
TD22057
Changed Hub.registration_timeout to be a config input.
r15764 log=self.log, registration_timeout=registration_timeout)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Refactor newparallel to use Config system...
r3604
MinRK
reorganize Factory classes to follow relocation of Session object
r4007 class Hub(SessionFactory):
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 """The IPython Controller Hub with 0MQ connections
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 Parameters
==========
loop: zmq IOLoop instance
MinRK
merge IPython.parallel.streamsession into IPython.zmq.session...
r4006 session: Session object
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 <removed> context: zmq context for creating new connections (?)
queue: ZMQStream for monitoring the command queue (SUB)
MinRK
remove remaining references to deprecated XREP/XREQ names...
r7538 query: ZMQStream for engine registration and client queries requests (ROUTER)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 heartbeat: HeartMonitor object checking the pulse of the engines
notifier: ZMQStream for broadcasting engine registration changes (PUB)
db: connection to db for out of memory logging of commands
NotImplemented
MinRK
newparallel tweaks, fixes...
r3622 engine_info: dict of zmq connection information for engines to connect
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 to the queues.
MinRK
newparallel tweaks, fixes...
r3622 client_info: dict of zmq connection information for engines to connect
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 to the queues.
"""
MinRK
enables resume of ipcontroller...
r7891
engine_state_file = Unicode()
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 # internal data structures:
MinRK
Refactor newparallel to use Config system...
r3604 ids=Set() # engine IDs
keytable=Dict()
by_ident=Dict()
engines=Dict()
clients=Dict()
hearts=Dict()
pending=Set()
queues=Dict() # pending msg_ids keyed by engine_id
tasks=Dict() # pending msg_ids submitted as tasks, keyed by client_id
completed=Dict() # completed msg_ids keyed by engine_id
all_completed=Set() # completed msg_ids keyed by engine_id
MinRK
SGE test related fixes...
r3668 dead_engines=Set() # completed msg_ids keyed by engine_id
MinRK
better handle aborted/unschedulers tasks
r3687 unassigned=Set() # set of task msg_ds not yet assigned a destination
MinRK
Refactor newparallel to use Config system...
r3604 incoming_registrations=Dict()
MinRK
add Integer traitlet...
r5344 registration_timeout=Integer()
_idcounter=Integer(0)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
improved logging + Hub,Engine,Scheduler are Configurable
r3603 # objects from constructor:
MinRK
remove all PAIR sockets, Merge registration+query
r3657 query=Instance(ZMQStream)
MinRK
Refactor newparallel to use Config system...
r3604 monitor=Instance(ZMQStream)
notifier=Instance(ZMQStream)
MinRK
add Client.resubmit for re-running tasks...
r3874 resubmit=Instance(ZMQStream)
heartmonitor=Instance(HeartMonitor)
MinRK
improved logging + Hub,Engine,Scheduler are Configurable
r3603 db=Instance(object)
MinRK
newparallel tweaks, fixes...
r3622 client_info=Dict()
engine_info=Dict()
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
improved logging + Hub,Engine,Scheduler are Configurable
r3603 def __init__(self, **kwargs):
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 """
# universal:
loop: IOLoop for creating future connections
session: streamsession for sending serialized data
# engine:
queue: ZMQStream for monitoring queue messages
MinRK
remove all PAIR sockets, Merge registration+query
r3657 query: ZMQStream for engine+client registration and client requests
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 heartbeat: HeartMonitor object for tracking engines
# extra:
db: ZMQStream for db connection (NotImplemented)
MinRK
newparallel tweaks, fixes...
r3622 engine_info: zmq address/protocol dict for engine connections
client_info: zmq address/protocol dict for client connections
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 """
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
improved logging + Hub,Engine,Scheduler are Configurable
r3603 super(Hub, self).__init__(**kwargs)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 # register our callbacks
MinRK
remove all PAIR sockets, Merge registration+query
r3657 self.query.on_recv(self.dispatch_query)
MinRK
improved logging + Hub,Engine,Scheduler are Configurable
r3603 self.monitor.on_recv(self.dispatch_monitor_traffic)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
improved logging + Hub,Engine,Scheduler are Configurable
r3603 self.heartmonitor.add_heart_failure_handler(self.handle_heart_failure)
self.heartmonitor.add_new_heart_handler(self.handle_new_heart)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
update parallel code for py3k...
r4155 self.monitor_handlers = {b'in' : self.save_queue_request,
b'out': self.save_queue_result,
b'intask': self.save_task_request,
b'outtask': self.save_task_result,
b'tracktask': self.save_task_destination,
b'incontrol': _passer,
b'outcontrol': _passer,
b'iopub': self.save_iopub_message,
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 }
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
remove all PAIR sockets, Merge registration+query
r3657 self.query_handlers = {'queue_request': self.queue_status,
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 'result_request': self.get_results,
MinRK
General improvements to database backend...
r3780 'history_request': self.get_history,
'db_request': self.db_query,
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 'purge_request': self.purge_results,
'load_request': self.check_load,
'resubmit_request': self.resubmit_task,
'shutdown_request': self.shutdown_request,
MinRK
remove all PAIR sockets, Merge registration+query
r3657 'registration_request' : self.register_engine,
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 'unregistration_request' : self.unregister_engine,
'connection_request': self.connection_request,
}
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
add Client.resubmit for re-running tasks...
r3874 # ignore resubmit replies
self.resubmit.on_recv(lambda msg: None, copy=False)
MinRK
rework logging connections
r3610 self.log.info("hub::created hub")
MinRK
enables resume of ipcontroller...
r7891
MinRK
improved logging + Hub,Engine,Scheduler are Configurable
r3603 @property
def _next_id(self):
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 """gemerate a new ID.
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
adapt kernel's ipcluster and Launchers to newparallel
r3605 No longer reuse old ids, just count from 0."""
MinRK
improved logging + Hub,Engine,Scheduler are Configurable
r3603 newid = self._idcounter
self._idcounter += 1
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 return newid
MinRK
improved logging + Hub,Engine,Scheduler are Configurable
r3603 # newid = 0
Thomas Kluyver
Fix references to dict.iteritems and dict.itervalues
r13361 # incoming = [id[0] for id in itervalues(self.incoming_registrations)]
MinRK
improved logging + Hub,Engine,Scheduler are Configurable
r3603 # # print newid, self.ids, self.incoming_registrations
# while newid in self.ids or newid in incoming:
# newid += 1
# return newid
MinRK
enables resume of ipcontroller...
r7891
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 #-----------------------------------------------------------------------------
# message validation
#-----------------------------------------------------------------------------
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 def _validate_targets(self, targets):
"""turn any valid targets argument into a list of integer ids"""
if targets is None:
# default to all
MinRK
queue_status works when no engines are registered...
r6093 return self.ids
Bernardo B. Marques
remove all trailling spaces
r4872
Thomas Kluyver
Replace references to unicode and basestring
r13353 if isinstance(targets, (int,str,unicode_type)):
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 # only one target specified
targets = [targets]
_targets = []
for t in targets:
# map raw identities to ids
Thomas Kluyver
Replace references to unicode and basestring
r13353 if isinstance(t, (str,unicode_type)):
MinRK
discard parallel.util.asbytes in favor of py3compat.cast_bytes
r6813 t = self.by_ident.get(cast_bytes(t), t)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 _targets.append(t)
targets = _targets
bad_targets = [ t for t in targets if t not in self.ids ]
if bad_targets:
MinRK
minor controller logging adjustments...
r5695 raise IndexError("No Such Engine: %r" % bad_targets)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 if not targets:
raise IndexError("No Engines Registered")
return targets
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 #-----------------------------------------------------------------------------
# dispatch methods (1 per stream)
#-----------------------------------------------------------------------------
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
add log_errors decorator for on_recv callbacks...
r6324 @util.log_errors
MinRK
propagate iopub to clients
r3602 def dispatch_monitor_traffic(self, msg):
"""all ME and Task queue messages come through here, as well as
IOPub traffic."""
MinRK
receive tasks, even when no engines are registered...
r6092 self.log.debug("monitor traffic: %r", msg[0])
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 switch = msg[0]
MinRK
cleanup Hub/Scheduler to prevent '%s'%<nonascii> errors
r3996 try:
idents, msg = self.session.feed_identities(msg[1:])
except ValueError:
idents=[]
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 if not idents:
MinRK
soften messages for monitor messages with bad topics
r6816 self.log.error("Monitor message without topic: %r", msg)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 return
MinRK
propagate iopub to clients
r3602 handler = self.monitor_handlers.get(switch, None)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 if handler is not None:
handler(idents, msg)
else:
MinRK
soften messages for monitor messages with bad topics
r6816 self.log.error("Unrecognized monitor topic: %r", switch)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
add log_errors decorator for on_recv callbacks...
r6324 @util.log_errors
MinRK
remove all PAIR sockets, Merge registration+query
r3657 def dispatch_query(self, msg):
"""Route registration requests and queries from clients."""
MinRK
use HMAC digest to sign messages instead of cleartext key...
r4000 try:
idents, msg = self.session.feed_identities(msg)
except ValueError:
idents = []
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 if not idents:
MinRK
minor controller logging adjustments...
r5695 self.log.error("Bad Query Message: %r", msg)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 return
client_id = idents[0]
try:
Brian E. Granger
Renaming unpack_message to unserialize and updating docstrings.
r4231 msg = self.session.unserialize(msg, content=True)
MinRK
use HMAC digest to sign messages instead of cleartext key...
r4000 except Exception:
MinRK
cleanup pass
r3644 content = error.wrap_exception()
MinRK
minor controller logging adjustments...
r5695 self.log.error("Bad Query Message: %r", msg, exc_info=True)
Bernardo B. Marques
remove all trailling spaces
r4872 self.session.send(self.query, "hub_error", ident=client_id,
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 content=content)
return
# print client_id, header, parent, content
#switch on message type:
Brian E. Granger
Fixing code to assume msg_type and msg_id are top-level....
r4230 msg_type = msg['header']['msg_type']
MinRK
minor controller logging adjustments...
r5695 self.log.info("client::client %r requested %r", client_id, msg_type)
MinRK
remove all PAIR sockets, Merge registration+query
r3657 handler = self.query_handlers.get(msg_type, None)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 try:
MinRK
minor controller logging adjustments...
r5695 assert handler is not None, "Bad Message Type: %r" % msg_type
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 except:
MinRK
cleanup pass
r3644 content = error.wrap_exception()
MinRK
minor controller logging adjustments...
r5695 self.log.error("Bad Message Type: %r", msg_type, exc_info=True)
Bernardo B. Marques
remove all trailling spaces
r4872 self.session.send(self.query, "hub_error", ident=client_id,
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 content=content)
return
MinRK
add Client.resubmit for re-running tasks...
r3874
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 else:
MinRK
remove all PAIR sockets, Merge registration+query
r3657 handler(idents, msg)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 def dispatch_db(self, msg):
""""""
raise NotImplementedError
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 #---------------------------------------------------------------------------
# handler methods (1 per event)
#---------------------------------------------------------------------------
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 #----------------------- Heartbeat --------------------------------------
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 def handle_new_heart(self, heart):
"""handler to attach to heartbeater.
Called when a new heart starts to beat.
Triggers completion of registration."""
MinRK
minor controller logging adjustments...
r5695 self.log.debug("heartbeat::handle_new_heart(%r)", heart)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 if heart not in self.incoming_registrations:
MinRK
minor controller logging adjustments...
r5695 self.log.info("heartbeat::ignoring new heart: %r", heart)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 else:
self.finish_registration(heart)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 def handle_heart_failure(self, heart):
"""handler to attach to heartbeater.
called when a previously registered heart fails to respond to beat request.
triggers unregistration"""
MinRK
minor controller logging adjustments...
r5695 self.log.debug("heartbeat::handle_heart_failure(%r)", heart)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 eid = self.hearts.get(heart, None)
MinRK
enables resume of ipcontroller...
r7891 uuid = self.engines[eid].uuid
MinRK
minor controller logging adjustments...
r5695 if eid is None or self.keytable[eid] in self.dead_engines:
self.log.info("heartbeat::ignoring heart failure %r (not an engine or already dead)", heart)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 else:
MinRK
enables resume of ipcontroller...
r7891 self.unregister_engine(heart, dict(content=dict(id=eid, queue=uuid)))
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 #----------------------- MUX Queue Traffic ------------------------------
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 def save_queue_request(self, idents, msg):
if len(idents) < 2:
MinRK
minor controller logging adjustments...
r5695 self.log.error("invalid identity prefix: %r", idents)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 return
queue_id, client_id = idents[:2]
try:
Brian E. Granger
Renaming unpack_message to unserialize and updating docstrings.
r4231 msg = self.session.unserialize(msg)
MinRK
cleanup Hub/Scheduler to prevent '%s'%<nonascii> errors
r3996 except Exception:
MinRK
minor controller logging adjustments...
r5695 self.log.error("queue::client %r sent invalid message to %r: %r", client_id, queue_id, msg, exc_info=True)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 return
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 eid = self.by_ident.get(queue_id, None)
if eid is None:
MinRK
minor controller logging adjustments...
r5695 self.log.error("queue::target %r not registered", queue_id)
self.log.debug("queue:: valid are: %r", self.by_ident.keys())
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 return
record = init_record(msg)
MinRK
handle datetime objects in Session...
r4008 msg_id = record['msg_id']
MinRK
minor controller logging adjustments...
r5695 self.log.info("queue::client %r submitted request %r to %s", client_id, msg_id, eid)
MinRK
update parallel code for py3k...
r4155 # Unicode in records
MinRK
enforce ascii identities in parallel code...
r4160 record['engine_uuid'] = queue_id.decode('ascii')
MinRK
Don't set explicit IDENTITY for clients...
r7888 record['client_uuid'] = msg['header']['session']
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 record['queue'] = 'mux'
MinRK
Add SQLite backend, DB backends are Configurable...
r3646
MinRK
SGE test related fixes...
r3668 try:
# it's posible iopub arrived first:
existing = self.db.get_record(msg_id)
Thomas Kluyver
Fix references to dict.iteritems and dict.itervalues
r13361 for key,evalue in iteritems(existing):
MinRK
add Client.resubmit for re-running tasks...
r3874 rvalue = record.get(key, None)
MinRK
SGE test related fixes...
r3668 if evalue and rvalue and evalue != rvalue:
MinRK
minor controller logging adjustments...
r5695 self.log.warn("conflicting initial state for record: %r:%r <%r> %r", msg_id, rvalue, key, evalue)
MinRK
SGE test related fixes...
r3668 elif evalue and not rvalue:
record[key] = evalue
MinRK
prevent few remaining db requests from crashing Hub...
r4014 try:
self.db.update_record(msg_id, record)
except Exception:
MinRK
minor controller logging adjustments...
r5695 self.log.error("DB Error updating record %r", msg_id, exc_info=True)
MinRK
SGE test related fixes...
r3668 except KeyError:
MinRK
prevent few remaining db requests from crashing Hub...
r4014 try:
self.db.add_record(msg_id, record)
except Exception:
MinRK
minor controller logging adjustments...
r5695 self.log.error("DB Error adding record %r", msg_id, exc_info=True)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 self.pending.add(msg_id)
self.queues[eid].append(msg_id)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 def save_queue_result(self, idents, msg):
if len(idents) < 2:
MinRK
minor controller logging adjustments...
r5695 self.log.error("invalid identity prefix: %r", idents)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 return
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 client_id, queue_id = idents[:2]
try:
Brian E. Granger
Renaming unpack_message to unserialize and updating docstrings.
r4231 msg = self.session.unserialize(msg)
MinRK
cleanup Hub/Scheduler to prevent '%s'%<nonascii> errors
r3996 except Exception:
MinRK
minor controller logging adjustments...
r5695 self.log.error("queue::engine %r sent invalid message to %r: %r",
queue_id, client_id, msg, exc_info=True)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 return
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 eid = self.by_ident.get(queue_id, None)
if eid is None:
MinRK
minor controller logging adjustments...
r5695 self.log.error("queue::unknown engine %r is sending a reply: ", queue_id)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 return
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 parent = msg['parent_header']
if not parent:
return
msg_id = parent['msg_id']
if msg_id in self.pending:
self.pending.remove(msg_id)
self.all_completed.add(msg_id)
self.queues[eid].remove(msg_id)
self.completed[eid].append(msg_id)
MinRK
minor controller logging adjustments...
r5695 self.log.info("queue::request %r completed on %s", msg_id, eid)
MinRK
more graceful handling of dying engines
r3651 elif msg_id not in self.all_completed:
# it could be a result from a dead engine that died before delivering the
# result
MinRK
minor controller logging adjustments...
r5695 self.log.warn("queue:: unknown msg finished %r", msg_id)
MinRK
more graceful handling of dying engines
r3651 return
# update record anyway, because the unregistration could have been premature
MinRK
handle datetime objects in Session...
r4008 rheader = msg['header']
MinRK
migrate subheader usage to new metadata
r7957 md = msg['metadata']
MinRK
merge IPython.parallel.streamsession into IPython.zmq.session...
r4006 completed = rheader['date']
MinRK
fix a couple of datetime entries in the Hub
r15303 started = extract_dates(md.get('started', None))
MinRK
more graceful handling of dying engines
r3651 result = {
'result_header' : rheader,
MinRK
migrate subheader usage to new metadata
r7957 'result_metadata': md,
MinRK
more graceful handling of dying engines
r3651 'result_content': msg['content'],
MinRK
add 'received' timestamp to DB...
r6469 'received': datetime.now(),
MinRK
more graceful handling of dying engines
r3651 'started' : started,
'completed' : completed
}
MinRK
Add SQLite backend, DB backends are Configurable...
r3646
MinRK
more graceful handling of dying engines
r3651 result['result_buffers'] = msg['buffers']
MinRK
General improvements to database backend...
r3780 try:
self.db.update_record(msg_id, result)
except Exception:
MinRK
minor controller logging adjustments...
r5695 self.log.error("DB Error updating record %r", msg_id, exc_info=True)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 #--------------------- Task Queue Traffic ------------------------------
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 def save_task_request(self, idents, msg):
"""Save the submission of a task."""
client_id = idents[0]
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 try:
Brian E. Granger
Renaming unpack_message to unserialize and updating docstrings.
r4231 msg = self.session.unserialize(msg)
MinRK
cleanup Hub/Scheduler to prevent '%s'%<nonascii> errors
r3996 except Exception:
MinRK
minor controller logging adjustments...
r5695 self.log.error("task::client %r sent invalid task message: %r",
client_id, msg, exc_info=True)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 return
record = init_record(msg)
MinRK
Add SQLite backend, DB backends are Configurable...
r3646
MinRK
Don't set explicit IDENTITY for clients...
r7888 record['client_uuid'] = msg['header']['session']
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 record['queue'] = 'task'
header = msg['header']
msg_id = header['msg_id']
self.pending.add(msg_id)
MinRK
better handle aborted/unschedulers tasks
r3687 self.unassigned.add(msg_id)
MinRK
SGE test related fixes...
r3668 try:
# it's posible iopub arrived first:
existing = self.db.get_record(msg_id)
MinRK
add Client.resubmit for re-running tasks...
r3874 if existing['resubmitted']:
for key in ('submitted', 'client_uuid', 'buffers'):
# don't clobber these keys on resubmit
# submitted and client_uuid should be different
# and buffers might be big, and shouldn't have changed
record.pop(key)
# still check content,header which should not change
# but are not expensive to compare as buffers
Thomas Kluyver
Fix references to dict.iteritems and dict.itervalues
r13361 for key,evalue in iteritems(existing):
MinRK
add Client.resubmit for re-running tasks...
r3874 if key.endswith('buffers'):
# don't compare buffers
continue
rvalue = record.get(key, None)
MinRK
SGE test related fixes...
r3668 if evalue and rvalue and evalue != rvalue:
MinRK
minor controller logging adjustments...
r5695 self.log.warn("conflicting initial state for record: %r:%r <%r> %r", msg_id, rvalue, key, evalue)
MinRK
SGE test related fixes...
r3668 elif evalue and not rvalue:
record[key] = evalue
MinRK
prevent few remaining db requests from crashing Hub...
r4014 try:
self.db.update_record(msg_id, record)
except Exception:
MinRK
minor controller logging adjustments...
r5695 self.log.error("DB Error updating record %r", msg_id, exc_info=True)
MinRK
SGE test related fixes...
r3668 except KeyError:
MinRK
prevent few remaining db requests from crashing Hub...
r4014 try:
self.db.add_record(msg_id, record)
except Exception:
MinRK
minor controller logging adjustments...
r5695 self.log.error("DB Error adding record %r", msg_id, exc_info=True)
MinRK
General improvements to database backend...
r3780 except Exception:
MinRK
minor controller logging adjustments...
r5695 self.log.error("DB Error saving task request %r", msg_id, exc_info=True)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 def save_task_result(self, idents, msg):
"""save the result of a completed task."""
client_id = idents[0]
try:
Brian E. Granger
Renaming unpack_message to unserialize and updating docstrings.
r4231 msg = self.session.unserialize(msg)
MinRK
cleanup Hub/Scheduler to prevent '%s'%<nonascii> errors
r3996 except Exception:
MinRK
minor controller logging adjustments...
r5695 self.log.error("task::invalid task result message send to %r: %r",
client_id, msg, exc_info=True)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 return
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 parent = msg['parent_header']
if not parent:
# print msg
MinRK
minor controller logging adjustments...
r5695 self.log.warn("Task %r had no parent!", msg)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 return
msg_id = parent['msg_id']
MinRK
better handle aborted/unschedulers tasks
r3687 if msg_id in self.unassigned:
self.unassigned.remove(msg_id)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
handle datetime objects in Session...
r4008 header = msg['header']
MinRK
migrate subheader usage to new metadata
r7957 md = msg['metadata']
engine_uuid = md.get('engine', u'')
MinRK
discard parallel.util.asbytes in favor of py3compat.cast_bytes
r6813 eid = self.by_ident.get(cast_bytes(engine_uuid), None)
MinRK
fix subheaders for execute_reply and aborted messages...
r6807
MinRK
migrate subheader usage to new metadata
r7957 status = md.get('status', None)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 if msg_id in self.pending:
MinRK
minor controller logging adjustments...
r5695 self.log.info("task::task %r finished on %s", msg_id, eid)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 self.pending.remove(msg_id)
self.all_completed.add(msg_id)
if eid is not None:
MinRK
fix subheaders for execute_reply and aborted messages...
r6807 if status != 'aborted':
self.completed[eid].append(msg_id)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 if msg_id in self.tasks[eid]:
self.tasks[eid].remove(msg_id)
MinRK
merge IPython.parallel.streamsession into IPython.zmq.session...
r4006 completed = header['date']
MinRK
fix a couple of datetime entries in the Hub
r15303 started = extract_dates(md.get('started', None))
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 result = {
'result_header' : header,
MinRK
migrate subheader usage to new metadata
r7957 'result_metadata': msg['metadata'],
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 'result_content': msg['content'],
'started' : started,
'completed' : completed,
MinRK
add 'received' timestamp to DB...
r6469 'received' : datetime.now(),
'engine_uuid': engine_uuid,
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 }
MinRK
Add SQLite backend, DB backends are Configurable...
r3646
result['result_buffers'] = msg['buffers']
MinRK
General improvements to database backend...
r3780 try:
self.db.update_record(msg_id, result)
except Exception:
MinRK
minor controller logging adjustments...
r5695 self.log.error("DB Error saving task request %r", msg_id, exc_info=True)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 else:
MinRK
minor controller logging adjustments...
r5695 self.log.debug("task::unknown task %r finished", msg_id)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 def save_task_destination(self, idents, msg):
try:
Brian E. Granger
Renaming unpack_message to unserialize and updating docstrings.
r4231 msg = self.session.unserialize(msg, content=True)
MinRK
cleanup Hub/Scheduler to prevent '%s'%<nonascii> errors
r3996 except Exception:
MinRK
rework logging connections
r3610 self.log.error("task::invalid task tracking message", exc_info=True)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 return
content = msg['content']
MinRK
testing fixes
r3641 # print (content)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 msg_id = content['msg_id']
engine_uuid = content['engine_id']
MinRK
discard parallel.util.asbytes in favor of py3compat.cast_bytes
r6813 eid = self.by_ident[cast_bytes(engine_uuid)]
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
minor controller logging adjustments...
r5695 self.log.info("task::task %r arrived on %r", msg_id, eid)
MinRK
better handle aborted/unschedulers tasks
r3687 if msg_id in self.unassigned:
self.unassigned.remove(msg_id)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 # else:
MinRK
cleanup Hub/Scheduler to prevent '%s'%<nonascii> errors
r3996 # self.log.debug("task::task %r not listed as MIA?!"%(msg_id))
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 self.tasks[eid].append(msg_id)
# self.pending[msg_id][1].update(received=datetime.now(),engine=(eid,engine_uuid))
MinRK
General improvements to database backend...
r3780 try:
self.db.update_record(msg_id, dict(engine_uuid=engine_uuid))
except Exception:
MinRK
minor controller logging adjustments...
r5695 self.log.error("DB Error saving task destination %r", msg_id, exc_info=True)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 def mia_task_request(self, idents, msg):
raise NotImplementedError
client_id = idents[0]
# content = dict(mia=self.mia,status='ok')
# self.session.send('mia_reply', content=content, idents=client_id)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
propagate iopub to clients
r3602 #--------------------- IOPub Traffic ------------------------------
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
propagate iopub to clients
r3602 def save_iopub_message(self, topics, msg):
"""save an iopub message into the db"""
MinRK
testing fixes
r3641 # print (topics)
MinRK
propagate iopub to clients
r3602 try:
Brian E. Granger
Renaming unpack_message to unserialize and updating docstrings.
r4231 msg = self.session.unserialize(msg, content=True)
MinRK
cleanup Hub/Scheduler to prevent '%s'%<nonascii> errors
r3996 except Exception:
MinRK
rework logging connections
r3610 self.log.error("iopub::invalid IOPub message", exc_info=True)
MinRK
propagate iopub to clients
r3602 return
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
propagate iopub to clients
r3602 parent = msg['parent_header']
MinRK
improved logging + Hub,Engine,Scheduler are Configurable
r3603 if not parent:
MinRK
reduce parentless IOPub message to warning level
r6886 self.log.warn("iopub::IOPub message lacks parent: %r", msg)
MinRK
improved logging + Hub,Engine,Scheduler are Configurable
r3603 return
MinRK
propagate iopub to clients
r3602 msg_id = parent['msg_id']
Brian E. Granger
Fixing code to assume msg_type and msg_id are top-level....
r4230 msg_type = msg['header']['msg_type']
MinRK
propagate iopub to clients
r3602 content = msg['content']
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
propagate iopub to clients
r3602 # ensure msg_id is in db
try:
rec = self.db.get_record(msg_id)
MinRK
SGE test related fixes...
r3668 except KeyError:
rec = empty_record()
rec['msg_id'] = msg_id
self.db.add_record(msg_id, rec)
MinRK
propagate iopub to clients
r3602 # stream
d = {}
if msg_type == 'stream':
name = content['name']
s = rec[name] or ''
d[name] = s + content['data']
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
pyerr -> error
r16569 elif msg_type == 'error':
d['error'] = content
MinRK
pyin -> execute_input
r16567 elif msg_type == 'execute_input':
d['execute_input'] = content['code']
MinRK
pyout -> execute_result...
r16568 elif msg_type in ('display_data', 'execute_result'):
MinRK
fix & test HubResults from execute requests
r7508 d[msg_type] = content
elif msg_type == 'status':
pass
MinRK
ignore data_pub in Hub
r8108 elif msg_type == 'data_pub':
self.log.info("ignored data_pub message for %s" % msg_id)
MinRK
propagate iopub to clients
r3602 else:
MinRK
fix & test HubResults from execute requests
r7508 self.log.warn("unhandled iopub msg_type: %r", msg_type)
if not d:
return
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
General improvements to database backend...
r3780 try:
self.db.update_record(msg_id, d)
except Exception:
MinRK
minor controller logging adjustments...
r5695 self.log.error("DB Error saving iopub message %r", msg_id, exc_info=True)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 #-------------------------------------------------------------------------
# Registration requests
#-------------------------------------------------------------------------
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 def connection_request(self, client_id, msg):
"""Reply with connection addresses for clients."""
MinRK
minor controller logging adjustments...
r5695 self.log.info("client::client %r connected", client_id)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 content = dict(status='ok')
jsonable = {}
Thomas Kluyver
Fix references to dict.iteritems and dict.itervalues
r13361 for k,v in iteritems(self.keytable):
MinRK
SGE test related fixes...
r3668 if v not in self.dead_engines:
MinRK
enables resume of ipcontroller...
r7891 jsonable[str(k)] = v
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 content['engines'] = jsonable
MinRK
remove all PAIR sockets, Merge registration+query
r3657 self.session.send(self.query, 'connection_reply', content, parent=msg, ident=client_id)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 def register_engine(self, reg, msg):
"""Register a new engine."""
content = msg['content']
try:
MinRK
enables resume of ipcontroller...
r7891 uuid = content['uuid']
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 except KeyError:
MinRK
rework logging connections
r3610 self.log.error("registration::queue not specified", exc_info=True)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 return
MinRK
enables resume of ipcontroller...
r7891
MinRK
improved logging + Hub,Engine,Scheduler are Configurable
r3603 eid = self._next_id
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
enables resume of ipcontroller...
r7891 self.log.debug("registration::register_engine(%i, %r)", eid, uuid)
Bernardo B. Marques
remove all trailling spaces
r4872
Jan Schulz
Add the heartbeat period to the registration reply...
r8569 content = dict(id=eid,status='ok',hb_period=self.heartmonitor.period)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 # check if requesting available IDs:
MinRK
IPython.parallel py3compat
r7893 if cast_bytes(uuid) in self.by_ident:
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 try:
MinRK
enables resume of ipcontroller...
r7891 raise KeyError("uuid %r in use" % uuid)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 except:
MinRK
cleanup pass
r3644 content = error.wrap_exception()
MinRK
enables resume of ipcontroller...
r7891 self.log.error("uuid %r in use", uuid, exc_info=True)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 else:
Thomas Kluyver
Fix references to dict.iteritems and dict.itervalues
r13361 for h, ec in iteritems(self.incoming_registrations):
MinRK
enables resume of ipcontroller...
r7891 if uuid == h:
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 try:
MinRK
enables resume of ipcontroller...
r7891 raise KeyError("heart_id %r in use" % uuid)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 except:
MinRK
enables resume of ipcontroller...
r7891 self.log.error("heart_id %r in use", uuid, exc_info=True)
MinRK
cleanup pass
r3644 content = error.wrap_exception()
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 break
MinRK
enables resume of ipcontroller...
r7891 elif uuid == ec.uuid:
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 try:
MinRK
enables resume of ipcontroller...
r7891 raise KeyError("uuid %r in use" % uuid)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 except:
MinRK
enables resume of ipcontroller...
r7891 self.log.error("uuid %r in use", uuid, exc_info=True)
MinRK
cleanup pass
r3644 content = error.wrap_exception()
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 break
Bernardo B. Marques
remove all trailling spaces
r4872
msg = self.session.send(self.query, "registration_reply",
content=content,
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 ident=reg)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
remove a few dangling asbytes from rebase
r7892 heart = cast_bytes(uuid)
MinRK
enables resume of ipcontroller...
r7891
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 if content['status'] == 'ok':
MinRK
improved logging + Hub,Engine,Scheduler are Configurable
r3603 if heart in self.heartmonitor.hearts:
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 # already beating
MinRK
enables resume of ipcontroller...
r7891 self.incoming_registrations[heart] = EngineConnector(id=eid,uuid=uuid)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 self.finish_registration(heart)
else:
purge = lambda : self._purge_stalled_registration(heart)
dc = ioloop.DelayedCallback(purge, self.registration_timeout, self.loop)
dc.start()
MinRK
enables resume of ipcontroller...
r7891 self.incoming_registrations[heart] = EngineConnector(id=eid,uuid=uuid,stallback=dc)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 else:
MinRK
minor controller logging adjustments...
r5695 self.log.error("registration::registration %i failed: %r", eid, content['evalue'])
MinRK
enables resume of ipcontroller...
r7891
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 return eid
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 def unregister_engine(self, ident, msg):
"""Unregister an engine that explicitly requested to leave."""
try:
eid = msg['content']['id']
except:
MinRK
minor controller logging adjustments...
r5695 self.log.error("registration::bad engine id for unregistration: %r", ident, exc_info=True)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 return
MinRK
minor controller logging adjustments...
r5695 self.log.info("registration::unregister_engine(%r)", eid)
MinRK
more graceful handling of dying engines
r3651 # print (eid)
MinRK
SGE test related fixes...
r3668 uuid = self.keytable[eid]
MinRK
enables resume of ipcontroller...
r7891 content=dict(id=eid, uuid=uuid)
MinRK
SGE test related fixes...
r3668 self.dead_engines.add(uuid)
# self.ids.remove(eid)
# uuid = self.keytable.pop(eid)
Bernardo B. Marques
remove all trailling spaces
r4872 #
MinRK
SGE test related fixes...
r3668 # ec = self.engines.pop(eid)
# self.hearts.pop(ec.heartbeat)
# self.by_ident.pop(ec.queue)
# self.completed.pop(eid)
handleit = lambda : self._handle_stranded_msgs(eid, uuid)
dc = ioloop.DelayedCallback(handleit, self.registration_timeout, self.loop)
dc.start()
############## TODO: HANDLE IT ################
MinRK
enables resume of ipcontroller...
r7891
self._save_engine_state()
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 if self.notifier:
self.session.send(self.notifier, "unregistration_notification", content=content)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
more graceful handling of dying engines
r3651 def _handle_stranded_msgs(self, eid, uuid):
"""Handle messages known to be on an engine when the engine unregisters.
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
more graceful handling of dying engines
r3651 It is possible that this will fire prematurely - that is, an engine will
go down after completing a result, and the client will be notified
that the result failed and later receive the actual result.
"""
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
SGE test related fixes...
r3668 outstanding = self.queues[eid]
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
more graceful handling of dying engines
r3651 for msg_id in outstanding:
self.pending.remove(msg_id)
self.all_completed.add(msg_id)
try:
MinRK
minor controller logging adjustments...
r5695 raise error.EngineError("Engine %r died while running task %r" % (eid, msg_id))
MinRK
more graceful handling of dying engines
r3651 except:
content = error.wrap_exception()
# build a fake header:
header = {}
header['engine'] = uuid
MinRK
General improvements to database backend...
r3780 header['date'] = datetime.now()
MinRK
more graceful handling of dying engines
r3651 rec = dict(result_content=content, result_header=header, result_buffers=[])
rec['completed'] = header['date']
rec['engine_uuid'] = uuid
MinRK
General improvements to database backend...
r3780 try:
self.db.update_record(msg_id, rec)
except Exception:
MinRK
minor controller logging adjustments...
r5695 self.log.error("DB Error handling stranded msg %r", msg_id, exc_info=True)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 def finish_registration(self, heart):
"""Second half of engine registration, called after our HeartMonitor
has received a beat from the Engine's Heart."""
Bernardo B. Marques
remove all trailling spaces
r4872 try:
MinRK
enables resume of ipcontroller...
r7891 ec = self.incoming_registrations.pop(heart)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 except KeyError:
MinRK
rework logging connections
r3610 self.log.error("registration::tried to finish nonexistant registration", exc_info=True)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 return
MinRK
enables resume of ipcontroller...
r7891 self.log.info("registration::finished registering engine %i:%s", ec.id, ec.uuid)
if ec.stallback is not None:
ec.stallback.stop()
eid = ec.id
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 self.ids.add(eid)
MinRK
enables resume of ipcontroller...
r7891 self.keytable[eid] = ec.uuid
self.engines[eid] = ec
MinRK
IPython.parallel py3compat
r7893 self.by_ident[cast_bytes(ec.uuid)] = ec.id
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 self.queues[eid] = list()
self.tasks[eid] = list()
self.completed[eid] = list()
self.hearts[heart] = eid
MinRK
enables resume of ipcontroller...
r7891 content = dict(id=eid, uuid=self.engines[eid].uuid)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 if self.notifier:
self.session.send(self.notifier, "registration_notification", content=content)
MinRK
minor controller logging adjustments...
r5695 self.log.info("engine::Engine Connected: %i", eid)
MinRK
enables resume of ipcontroller...
r7891
self._save_engine_state()
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 def _purge_stalled_registration(self, heart):
if heart in self.incoming_registrations:
MinRK
enables resume of ipcontroller...
r7891 ec = self.incoming_registrations.pop(heart)
self.log.info("registration::purging stalled registration: %i", ec.id)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 else:
pass
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 #-------------------------------------------------------------------------
MinRK
enables resume of ipcontroller...
r7891 # Engine State
#-------------------------------------------------------------------------
def _cleanup_engine_state_file(self):
"""cleanup engine state mapping"""
if os.path.exists(self.engine_state_file):
self.log.debug("cleaning up engine state: %s", self.engine_state_file)
try:
os.remove(self.engine_state_file)
except IOError:
self.log.error("Couldn't cleanup file: %s", self.engine_state_file, exc_info=True)
def _save_engine_state(self):
"""save engine mapping to JSON file"""
if not self.engine_state_file:
return
self.log.debug("save engine state to %s" % self.engine_state_file)
state = {}
engines = {}
Thomas Kluyver
Fix references to dict.iteritems and dict.itervalues
r13361 for eid, ec in iteritems(self.engines):
MinRK
enables resume of ipcontroller...
r7891 if ec.uuid not in self.dead_engines:
engines[eid] = ec.uuid
state['engines'] = engines
state['next_id'] = self._idcounter
with open(self.engine_state_file, 'w') as f:
json.dump(state, f)
def _load_engine_state(self):
"""load engine mapping from JSON file"""
if not os.path.exists(self.engine_state_file):
return
self.log.info("loading engine state from %s" % self.engine_state_file)
with open(self.engine_state_file) as f:
state = json.load(f)
save_notifier = self.notifier
self.notifier = None
Thomas Kluyver
Fix references to dict.iteritems and dict.itervalues
r13361 for eid, uuid in iteritems(state['engines']):
MinRK
enables resume of ipcontroller...
r7891 heart = uuid.encode('ascii')
# start with this heart as current and beating:
self.heartmonitor.responses.add(heart)
self.heartmonitor.hearts.add(heart)
self.incoming_registrations[heart] = EngineConnector(id=int(eid), uuid=uuid)
self.finish_registration(heart)
self.notifier = save_notifier
self._idcounter = state['next_id']
#-------------------------------------------------------------------------
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 # Client Requests
#-------------------------------------------------------------------------
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 def shutdown_request(self, client_id, msg):
"""handle shutdown request."""
MinRK
remove all PAIR sockets, Merge registration+query
r3657 self.session.send(self.query, 'shutdown_reply', content={'status': 'ok'}, ident=client_id)
MinRK
update API after sagedays29...
r3664 # also notify other clients of shutdown
self.session.send(self.notifier, 'shutdown_notice', content={'status': 'ok'})
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 dc = ioloop.DelayedCallback(lambda : self._shutdown(), 1000, self.loop)
dc.start()
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 def _shutdown(self):
MinRK
rework logging connections
r3610 self.log.info("hub::hub shutting down.")
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 time.sleep(0.1)
sys.exit(0)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 def check_load(self, client_id, msg):
content = msg['content']
try:
targets = content['targets']
targets = self._validate_targets(targets)
except:
MinRK
cleanup pass
r3644 content = error.wrap_exception()
Bernardo B. Marques
remove all trailling spaces
r4872 self.session.send(self.query, "hub_error",
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 content=content, ident=client_id)
return
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 content = dict(status='ok')
# loads = {}
for t in targets:
content[bytes(t)] = len(self.queues[t])+len(self.tasks[t])
MinRK
remove all PAIR sockets, Merge registration+query
r3657 self.session.send(self.query, "load_reply", content=content, ident=client_id)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 def queue_status(self, client_id, msg):
"""Return the Queue status of one or more targets.
Thomas Kluyver
Various docs fixes
r13595
If verbose, return the msg_ids, else return len of each type.
Keys:
* queue (pending MUX jobs)
* tasks (pending Task jobs)
* completed (finished jobs from both queues)
"""
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 content = msg['content']
targets = content['targets']
try:
targets = self._validate_targets(targets)
except:
MinRK
cleanup pass
r3644 content = error.wrap_exception()
Bernardo B. Marques
remove all trailling spaces
r4872 self.session.send(self.query, "hub_error",
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 content=content, ident=client_id)
return
verbose = content.get('verbose', False)
content = dict(status='ok')
for t in targets:
queue = self.queues[t]
completed = self.completed[t]
tasks = self.tasks[t]
if not verbose:
queue = len(queue)
completed = len(completed)
tasks = len(tasks)
MinRK
update parallel code for py3k...
r4155 content[str(t)] = {'queue': queue, 'completed': completed , 'tasks': tasks}
MinRK
better handle aborted/unschedulers tasks
r3687 content['unassigned'] = list(self.unassigned) if verbose else len(self.unassigned)
MinRK
update parallel code for py3k...
r4155 # print (content)
MinRK
remove all PAIR sockets, Merge registration+query
r3657 self.session.send(self.query, "queue_reply", content=content, ident=client_id)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 def purge_results(self, client_id, msg):
"""Purge results from memory. This method is more valuable before we move
to a DB based message storage mechanism."""
content = msg['content']
MinRK
fix purge_results for args other than specified msg_id...
r4146 self.log.info("Dropping records with %s", content)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 msg_ids = content.get('msg_ids', [])
reply = dict(status='ok')
if msg_ids == 'all':
MinRK
General improvements to database backend...
r3780 try:
self.db.drop_matching_records(dict(completed={'$ne':None}))
except Exception:
reply = error.wrap_exception()
MinRK
log exceptions in user requests in the Hub
r15304 self.log.exception("Error dropping records")
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 else:
Thomas Kluyver
Fix parallel test suite
r13383 pending = [m for m in msg_ids if (m in self.pending)]
MinRK
various db backend fixes...
r3875 if pending:
try:
MinRK
minor controller logging adjustments...
r5695 raise IndexError("msg pending: %r" % pending[0])
MinRK
various db backend fixes...
r3875 except:
reply = error.wrap_exception()
MinRK
log exceptions in user requests in the Hub
r15304 self.log.exception("Error dropping records")
MinRK
various db backend fixes...
r3875 else:
try:
self.db.drop_matching_records(dict(msg_id={'$in':msg_ids}))
except Exception:
reply = error.wrap_exception()
MinRK
log exceptions in user requests in the Hub
r15304 self.log.exception("Error dropping records")
MinRK
various db backend fixes...
r3875
if reply['status'] == 'ok':
eids = content.get('engine_ids', [])
for eid in eids:
if eid not in self.engines:
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 try:
MinRK
minor controller logging adjustments...
r5695 raise IndexError("No such engine: %i" % eid)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 except:
MinRK
cleanup pass
r3644 reply = error.wrap_exception()
MinRK
log exceptions in user requests in the Hub
r15304 self.log.exception("Error dropping records")
MinRK
various db backend fixes...
r3875 break
MinRK
enables resume of ipcontroller...
r7891 uid = self.engines[eid].uuid
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 try:
MinRK
various db backend fixes...
r3875 self.db.drop_matching_records(dict(engine_uuid=uid, completed={'$ne':None}))
except Exception:
MinRK
cleanup pass
r3644 reply = error.wrap_exception()
MinRK
log exceptions in user requests in the Hub
r15304 self.log.exception("Error dropping records")
MinRK
various db backend fixes...
r3875 break
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
remove all PAIR sockets, Merge registration+query
r3657 self.session.send(self.query, 'purge_reply', content=reply, ident=client_id)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
add Client.resubmit for re-running tasks...
r3874 def resubmit_task(self, client_id, msg):
"""Resubmit one or more tasks."""
def finish(reply):
self.session.send(self.query, 'resubmit_reply', content=reply, ident=client_id)
content = msg['content']
msg_ids = content['msg_ids']
reply = dict(status='ok')
try:
records = self.db.find_records({'msg_id' : {'$in' : msg_ids}}, keys=[
'header', 'content', 'buffers'])
except Exception:
self.log.error('db::db error finding tasks to resubmit', exc_info=True)
return finish(error.wrap_exception())
# validate msg_ids
found_ids = [ rec['msg_id'] for rec in records ]
MinRK
resubmitted tasks are now wholly separate (new msg_ids)...
r6817 pending_ids = [ msg_id for msg_id in found_ids if msg_id in self.pending ]
MinRK
add Client.resubmit for re-running tasks...
r3874 if len(records) > len(msg_ids):
try:
raise RuntimeError("DB appears to be in an inconsistent state."
"More matching records were found than should exist")
except Exception:
MinRK
log exceptions in user requests in the Hub
r15304 self.log.exception("Failed to resubmit task")
MinRK
add Client.resubmit for re-running tasks...
r3874 return finish(error.wrap_exception())
elif len(records) < len(msg_ids):
missing = [ m for m in msg_ids if m not in found_ids ]
try:
MinRK
minor controller logging adjustments...
r5695 raise KeyError("No such msg(s): %r" % missing)
MinRK
add Client.resubmit for re-running tasks...
r3874 except KeyError:
MinRK
log exceptions in user requests in the Hub
r15304 self.log.exception("Failed to resubmit task")
MinRK
add Client.resubmit for re-running tasks...
r3874 return finish(error.wrap_exception())
MinRK
resubmitted tasks are now wholly separate (new msg_ids)...
r6817 elif pending_ids:
pass
# no need to raise on resubmit of pending task, now that we
# resubmit under new ID, but do we want to raise anyway?
# msg_id = invalid_ids[0]
# try:
# raise ValueError("Task(s) %r appears to be inflight" % )
# except Exception:
# return finish(error.wrap_exception())
# mapping of original IDs to resubmitted IDs
resubmitted = {}
# send the messages
for rec in records:
header = rec['header']
MinRK
add missing buffers to resubmitted tasks...
r7294 msg = self.session.msg(header['msg_type'], parent=header)
MinRK
resubmitted tasks are now wholly separate (new msg_ids)...
r6817 msg_id = msg['msg_id']
msg['content'] = rec['content']
MinRK
preserve header for resubmitted tasks...
r7262
# use the old header, but update msg_id and timestamp
fresh = msg['header']
header['msg_id'] = fresh['msg_id']
header['date'] = fresh['date']
MinRK
resubmitted tasks are now wholly separate (new msg_ids)...
r6817 msg['header'] = header
self.session.send(self.resubmit, msg, buffers=rec['buffers'])
resubmitted[rec['msg_id']] = msg_id
self.pending.add(msg_id)
MinRK
add missing buffers to resubmitted tasks...
r7294 msg['buffers'] = rec['buffers']
MinRK
add Client.resubmit for re-running tasks...
r3874 try:
MinRK
resubmitted tasks are now wholly separate (new msg_ids)...
r6817 self.db.add_record(msg_id, init_record(msg))
MinRK
add Client.resubmit for re-running tasks...
r3874 except Exception:
MinRK
resubmitted tasks are now wholly separate (new msg_ids)...
r6817 self.log.error("db::DB Error updating record: %s", msg_id, exc_info=True)
MinRK
Don't set explicit IDENTITY for clients...
r7888 return finish(error.wrap_exception())
MinRK
add Client.resubmit for re-running tasks...
r3874
MinRK
resubmitted tasks are now wholly separate (new msg_ids)...
r6817 finish(dict(status='ok', resubmitted=resubmitted))
# store the new IDs in the Task DB
Thomas Kluyver
Fix references to dict.iteritems and dict.itervalues
r13361 for msg_id, resubmit_id in iteritems(resubmitted):
MinRK
resubmitted tasks are now wholly separate (new msg_ids)...
r6817 try:
self.db.update_record(msg_id, {'resubmitted' : resubmit_id})
except Exception:
self.log.error("db::DB Error updating record: %s", msg_id, exc_info=True)
MinRK
add Client.resubmit for re-running tasks...
r3874
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
General improvements to database backend...
r3780 def _extract_record(self, rec):
"""decompose a TaskRecord dict into subsection of reply for get_result"""
io_dict = {}
MinRK
pyerr -> error
r16569 for key in ('execute_input', 'execute_result', 'error', 'stdout', 'stderr'):
MinRK
General improvements to database backend...
r3780 io_dict[key] = rec[key]
MinRK
migrate subheader usage to new metadata
r7957 content = {
'header': rec['header'],
'metadata': rec['metadata'],
'result_metadata': rec['result_metadata'],
'result_header' : rec['result_header'],
'result_content': rec['result_content'],
'received' : rec['received'],
'io' : io_dict,
}
MinRK
General improvements to database backend...
r3780 if rec['result_buffers']:
Thomas Kluyver
Fix parallel test suite
r13383 buffers = list(map(bytes, rec['result_buffers']))
MinRK
General improvements to database backend...
r3780 else:
buffers = []
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
General improvements to database backend...
r3780 return content, buffers
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 def get_results(self, client_id, msg):
"""Get the result of 1 or more messages."""
content = msg['content']
msg_ids = sorted(set(content['msg_ids']))
statusonly = content.get('status_only', False)
pending = []
completed = []
content = dict(status='ok')
content['pending'] = pending
content['completed'] = completed
buffers = []
if not statusonly:
MinRK
General improvements to database backend...
r3780 try:
matches = self.db.find_records(dict(msg_id={'$in':msg_ids}))
# turn match list into dict, for faster lookup
records = {}
for rec in matches:
records[rec['msg_id']] = rec
except Exception:
content = error.wrap_exception()
MinRK
log exceptions in user requests in the Hub
r15304 self.log.exception("Failed to get results")
Bernardo B. Marques
remove all trailling spaces
r4872 self.session.send(self.query, "result_reply", content=content,
MinRK
General improvements to database backend...
r3780 parent=msg, ident=client_id)
return
else:
records = {}
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 for msg_id in msg_ids:
if msg_id in self.pending:
pending.append(msg_id)
MinRK
add Client.resubmit for re-running tasks...
r3874 elif msg_id in self.all_completed:
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 completed.append(msg_id)
if not statusonly:
MinRK
General improvements to database backend...
r3780 c,bufs = self._extract_record(records[msg_id])
content[msg_id] = c
buffers.extend(bufs)
MinRK
add Client.resubmit for re-running tasks...
r3874 elif msg_id in records:
if rec['completed']:
completed.append(msg_id)
c,bufs = self._extract_record(records[msg_id])
content[msg_id] = c
buffers.extend(bufs)
else:
pending.append(msg_id)
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 else:
try:
raise KeyError('No such message: '+msg_id)
except:
MinRK
cleanup pass
r3644 content = error.wrap_exception()
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 break
Bernardo B. Marques
remove all trailling spaces
r4872 self.session.send(self.query, "result_reply", content=content,
MinRK
Controller renamed to Hub (keeping ipcontrollerz)
r3599 parent=msg, ident=client_id,
buffers=buffers)
MinRK
General improvements to database backend...
r3780 def get_history(self, client_id, msg):
"""Get a list of all msg_ids in our DB records"""
try:
msg_ids = self.db.get_history()
except Exception as e:
content = error.wrap_exception()
MinRK
log exceptions in user requests in the Hub
r15304 self.log.exception("Failed to get history")
MinRK
General improvements to database backend...
r3780 else:
content = dict(status='ok', history=msg_ids)
Bernardo B. Marques
remove all trailling spaces
r4872
self.session.send(self.query, "history_reply", content=content,
MinRK
General improvements to database backend...
r3780 parent=msg, ident=client_id)
def db_query(self, client_id, msg):
"""Perform a raw query on the task record database."""
content = msg['content']
MinRK
apply extract_dates at a higher level in IPython.parallel...
r13512 query = extract_dates(content.get('query', {}))
MinRK
General improvements to database backend...
r3780 keys = content.get('keys', None)
buffers = []
empty = list()
try:
records = self.db.find_records(query, keys)
except Exception as e:
content = error.wrap_exception()
MinRK
log exceptions in user requests in the Hub
r15304 self.log.exception("DB query failed")
MinRK
General improvements to database backend...
r3780 else:
# extract buffers from reply content:
if keys is not None:
buffer_lens = [] if 'buffers' in keys else None
result_buffer_lens = [] if 'result_buffers' in keys else None
else:
MinRK
exclude buffers from default db_query...
r6094 buffer_lens = None
result_buffer_lens = None
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
General improvements to database backend...
r3780 for rec in records:
# buffers may be None, so double check
MinRK
exclude buffers from default db_query...
r6094 b = rec.pop('buffers', empty) or empty
MinRK
General improvements to database backend...
r3780 if buffer_lens is not None:
buffer_lens.append(len(b))
buffers.extend(b)
MinRK
exclude buffers from default db_query...
r6094 rb = rec.pop('result_buffers', empty) or empty
MinRK
General improvements to database backend...
r3780 if result_buffer_lens is not None:
result_buffer_lens.append(len(rb))
buffers.extend(rb)
content = dict(status='ok', records=records, buffer_lens=buffer_lens,
result_buffer_lens=result_buffer_lens)
MinRK
update parallel code for py3k...
r4155 # self.log.debug (content)
Bernardo B. Marques
remove all trailling spaces
r4872 self.session.send(self.query, "db_reply", content=content,
MinRK
General improvements to database backend...
r3780 parent=msg, ident=client_id,
buffers=buffers)