upstream/ipython Commit - r3875:ffe043d4

various db backend fixes...

MinRK -

r3875:ffe043d4

parent child

IPython/parallel/tests/test_mongodb.py

0 created 644 +37 0

			@@ -0,0 +1,37 b''
		1	"""Tests for mongodb backend"""
		2
		3	#-------------------------------------------------------------------------------
		4	# Copyright (C) 2011 The IPython Development Team
		5	#
		6	# Distributed under the terms of the BSD License. The full license is in
		7	# the file COPYING, distributed as part of this software.
		8	#-------------------------------------------------------------------------------
		9
		10	#-------------------------------------------------------------------------------
		11	# Imports
		12	#-------------------------------------------------------------------------------
		13
		14	from nose import SkipTest
		15
		16	from pymongo import Connection
		17	from IPython.parallel.controller.mongodb import MongoDB
		18
		19	from . import test_db
		20
		21	try:
		22	c = Connection()
		23	except Exception:
		24	c=None
		25
		26	class TestMongoBackend(test_db.TestDictBackend):
		27	"""MongoDB backend tests"""
		28
		29	def create_db(self):
		30	try:
		31	return MongoDB(database='iptestdb', _connection=c)
		32	except Exception:
		33	raise SkipTest("Couldn't connect to mongodb")
		34
		35	def teardown(self):
		36	if c is not None:
		37	c.drop_database('iptestdb')

IPython/parallel/controller/dictdb.py

0 +1 -1

              """A Task logger that presents our DB interface,
              but exists entirely in memory and implemented with dicts.
              TaskRecords are dicts of the form:
              {
                  'msg_id' : str(uuid),
                  'client_uuid' : str(uuid),
                  'engine_uuid' : str(uuid) or None,
                  'header' : dict(header),
                  'content': dict(content),
                  'buffers': list(buffers),
                  'submitted': datetime,
                  'started': datetime or None,
                  'completed': datetime or None,
                  'resubmitted': datetime or None,
                  'result_header' : dict(header) or None,
                  'result_content' : dict(content) or None,
                  'result_buffers' : list(buffers) or None,
              }
              With this info, many of the special categories of tasks can be defined by query:
              pending:  completed is None
              client's outstanding: client_uuid = uuid && completed is None
              MIA: arrived is None (and completed is None)
              etc.
              EngineRecords are dicts of the form:
              {
                  'eid' : int(id),
                  'uuid': str(uuid)
              }
              This may be extended, but is currently.
              We support a subset of mongodb operators:
                  $lt,$gt,$lte,$gte,$ne,$in,$nin,$all,$mod,$exists
              """
              #-----------------------------------------------------------------------------
              #  Copyright (C) 2010  The IPython Development Team
              #
              #  Distributed under the terms of the BSD License.  The full license is in
              #  the file COPYING, distributed as part of this software.
              #-----------------------------------------------------------------------------
              from datetime import datetime
              from IPython.config.configurable import Configurable
              from IPython.utils.traitlets import Dict, CUnicode
              filters = {
               '$lt' : lambda a,b: a < b,
               '$gt' : lambda a,b: b > a,
               '$eq' : lambda a,b: a == b,
               '$ne' : lambda a,b: a != b,
               '$lte': lambda a,b: a <= b,
               '$gte': lambda a,b: a >= b,
               '$in' : lambda a,b: a in b,
               '$nin': lambda a,b: a not in b,
               '$all': lambda a,b: all([ a in bb for bb in b ]),
               '$mod': lambda a,b: a%b[0] == b[1],
               '$exists' : lambda a,b: (b and a is not None) or (a is None and not b)
              }
              class CompositeFilter(object):
                  """Composite filter for matching multiple properties."""
                  def __init__(self, dikt):
                      self.tests = []
                      self.values = []
                      for key, value in dikt.iteritems():
                          self.tests.append(filters[key])
                          self.values.append(value)
                  def __call__(self, value):
                      for test,check in zip(self.tests, self.values):
                          if not test(value, check):
                              return False
                      return True
              class BaseDB(Configurable):
                  """Empty Parent class so traitlets work on DB."""
                  # base configurable traits:
                  session = CUnicode("")
              class DictDB(BaseDB):
                  """Basic in-memory dict-based object for saving Task Records.
                  This is the first object to present the DB interface
                  for logging tasks out of memory.
                  The interface is based on MongoDB, so adding a MongoDB
                  backend should be straightforward.
                  """
                  _records = Dict()
                  def _match_one(self, rec, tests):
                      """Check if a specific record matches tests."""
                      for key,test in tests.iteritems():
                          if not test(rec.get(key, None)):
                              return False
                      return True
                  def _match(self, check):
                      """Find all the matches for a check dict."""
                      matches = []
                      tests = {}
                      for k,v in check.iteritems():
                          if isinstance(v, dict):
                              tests[k] = CompositeFilter(v)
                          else:
                              tests[k] = lambda o: o==v
                      for rec in self._records.itervalues():
                          if self._match_one(rec, tests):
                              matches.append(rec)
                      return matches
                  def _extract_subdict(self, rec, keys):
                      """extract subdict of keys"""
                      d = {}
                      d['msg_id'] = rec['msg_id']
                      for key in keys:
                          d[key] = rec[key]
                      return d
                  def add_record(self, msg_id, rec):
                      """Add a new Task Record, by msg_id."""
                      if self._records.has_key(msg_id):
                          raise KeyError("Already have msg_id %r"%(msg_id))
                      self._records[msg_id] = rec
                  def get_record(self, msg_id):
                      """Get a specific Task Record, by msg_id."""
                      if not self._records.has_key(msg_id):
                          raise KeyError("No such msg_id %r"%(msg_id))
                      return self._records[msg_id]
                  def update_record(self, msg_id, rec):
                      """Update the data in an existing record."""
                      self._records[msg_id].update(rec)
                  def drop_matching_records(self, check):
                      """Remove a record from the DB."""
                      matches = self._match(check)
                      for m in matches:
-                         del self._records[m]
+                         del self._records[m['msg_id']]
                  def drop_record(self, msg_id):
                      """Remove a record from the DB."""
                      del self._records[msg_id]
                  def find_records(self, check, keys=None):
                      """Find records matching a query dict, optionally extracting subset of keys.
                      Returns dict keyed by msg_id of matching records.
                      Parameters
                      ----------
                      check: dict
                          mongodb-style query argument
                      keys: list of strs [optional]
                          if specified, the subset of keys to extract.  msg_id will *always* be
                          included.
                      """
                      matches = self._match(check)
                      if keys:
                          return [ self._extract_subdict(rec, keys) for rec in matches ]
                      else:
                          return matches
                  def get_history(self):
                      """get all msg_ids, ordered by time submitted."""
                      msg_ids = self._records.keys()
                      return sorted(msg_ids, key=lambda m: self._records[m]['submitted'])

IPython/parallel/controller/hub.py

0 +23 -25

              #!/usr/bin/env python
              """The IPython Controller Hub with 0MQ
              This is the master object that handles connections from engines and clients,
              and monitors traffic through the various queues.
              """
              #-----------------------------------------------------------------------------
              #  Copyright (C) 2010  The IPython Development Team
              #
              #  Distributed under the terms of the BSD License.  The full license is in
              #  the file COPYING, distributed as part of this software.
              #-----------------------------------------------------------------------------
              #-----------------------------------------------------------------------------
              # Imports
              #-----------------------------------------------------------------------------
              from __future__ import print_function
              import sys
              import time
              from datetime import datetime
              import zmq
              from zmq.eventloop import ioloop
              from zmq.eventloop.zmqstream import ZMQStream
              # internal:
              from IPython.utils.importstring import import_item
              from IPython.utils.traitlets import HasTraits, Instance, Int, CStr, Str, Dict, Set, List, Bool
              from IPython.parallel import error, util
              from IPython.parallel.factory import RegistrationFactory, LoggingFactory
              from .heartmonitor import HeartMonitor
              #-----------------------------------------------------------------------------
              # Code
              #-----------------------------------------------------------------------------
              def _passer(*args, **kwargs):
                  return
              def _printer(*args, **kwargs):
                  print (args)
                  print (kwargs)
              def empty_record():
                  """Return an empty dict with all record keys."""
                  return {
                      'msg_id' : None,
                      'header' : None,
                      'content': None,
                      'buffers': None,
                      'submitted': None,
                      'client_uuid' : None,
                      'engine_uuid' : None,
                      'started': None,
                      'completed': None,
                      'resubmitted': None,
                      'result_header' : None,
                      'result_content' : None,
                      'result_buffers' : None,
                      'queue' : None,
                      'pyin' : None,
                      'pyout': None,
                      'pyerr': None,
                      'stdout': '',
                      'stderr': '',
                  }
              def init_record(msg):
                  """Initialize a TaskRecord based on a request."""
                  header = msg['header']
                  return {
                      'msg_id' : header['msg_id'],
                      'header' : header,
                      'content': msg['content'],
                      'buffers': msg['buffers'],
                      'submitted': datetime.strptime(header['date'], util.ISO8601),
                      'client_uuid' : None,
                      'engine_uuid' : None,
                      'started': None,
                      'completed': None,
                      'resubmitted': None,
                      'result_header' : None,
                      'result_content' : None,
                      'result_buffers' : None,
                      'queue' : None,
                      'pyin' : None,
                      'pyout': None,
                      'pyerr': None,
                      'stdout': '',
                      'stderr': '',
                  }
              class EngineConnector(HasTraits):
                  """A simple object for accessing the various zmq connections of an object.
                  Attributes are:
                  id (int): engine ID
                  uuid (str): uuid (unused?)
                  queue (str): identity of queue's XREQ socket
                  registration (str): identity of registration XREQ socket
                  heartbeat (str): identity of heartbeat XREQ socket
                  """
                  id=Int(0)
                  queue=Str()
                  control=Str()
                  registration=Str()
                  heartbeat=Str()
                  pending=Set()
              class HubFactory(RegistrationFactory):
                  """The Configurable for setting up a Hub."""
                  # name of a scheduler scheme
                  scheme = Str('leastload', config=True)
                  # port-pairs for monitoredqueues:
                  hb = Instance(list, config=True)
                  def _hb_default(self):
                      return util.select_random_ports(2)
                  mux = Instance(list, config=True)
                  def _mux_default(self):
                      return util.select_random_ports(2)
                  task = Instance(list, config=True)
                  def _task_default(self):
                      return util.select_random_ports(2)
                  control = Instance(list, config=True)
                  def _control_default(self):
                      return util.select_random_ports(2)
                  iopub = Instance(list, config=True)
                  def _iopub_default(self):
                      return util.select_random_ports(2)
                  # single ports:
                  mon_port = Instance(int, config=True)
                  def _mon_port_default(self):
                      return util.select_random_ports(1)[0]
                  notifier_port = Instance(int, config=True)
                  def _notifier_port_default(self):
                      return util.select_random_ports(1)[0]
                  ping = Int(1000, config=True) # ping frequency
                  engine_ip = CStr('127.0.0.1', config=True)
                  engine_transport = CStr('tcp', config=True)
                  client_ip = CStr('127.0.0.1', config=True)
                  client_transport = CStr('tcp', config=True)
                  monitor_ip = CStr('127.0.0.1', config=True)
                  monitor_transport = CStr('tcp', config=True)
                  monitor_url = CStr('')
                  db_class = CStr('IPython.parallel.controller.dictdb.DictDB', config=True)
                  # not configurable
                  db = Instance('IPython.parallel.controller.dictdb.BaseDB')
                  heartmonitor = Instance('IPython.parallel.controller.heartmonitor.HeartMonitor')
                  subconstructors = List()
                  _constructed = Bool(False)
                  def _ip_changed(self, name, old, new):
                      self.engine_ip = new
                      self.client_ip = new
                      self.monitor_ip = new
                      self._update_monitor_url()
                  def _update_monitor_url(self):
                      self.monitor_url = "%s://%s:%i"%(self.monitor_transport, self.monitor_ip, self.mon_port)
                  def _transport_changed(self, name, old, new):
                      self.engine_transport = new
                      self.client_transport = new
                      self.monitor_transport = new
                      self._update_monitor_url()
                  def __init__(self, **kwargs):
                      super(HubFactory, self).__init__(**kwargs)
                      self._update_monitor_url()
                      # self.on_trait_change(self._sync_ips, 'ip')
                      # self.on_trait_change(self._sync_transports, 'transport')
                      self.subconstructors.append(self.construct_hub)
                  def construct(self):
                      assert not self._constructed, "already constructed!"
                      for subc in self.subconstructors:
                          subc()
                      self._constructed = True
                  def start(self):
                      assert self._constructed, "must be constructed by self.construct() first!"
                      self.heartmonitor.start()
                      self.log.info("Heartmonitor started")
                  def construct_hub(self):
                      """construct"""
                      client_iface = "%s://%s:"%(self.client_transport, self.client_ip) + "%i"
                      engine_iface = "%s://%s:"%(self.engine_transport, self.engine_ip) + "%i"
                      ctx = self.context
                      loop = self.loop
                      # Registrar socket
                      q = ZMQStream(ctx.socket(zmq.XREP), loop)
                      q.bind(client_iface % self.regport)
                      self.log.info("Hub listening on %s for registration."%(client_iface%self.regport))
                      if self.client_ip != self.engine_ip:
                          q.bind(engine_iface % self.regport)
                          self.log.info("Hub listening on %s for registration."%(engine_iface%self.regport))
                      ### Engine connections ###
                      # heartbeat
                      hpub = ctx.socket(zmq.PUB)
                      hpub.bind(engine_iface % self.hb[0])
                      hrep = ctx.socket(zmq.XREP)
                      hrep.bind(engine_iface % self.hb[1])
                      self.heartmonitor = HeartMonitor(loop=loop, pingstream=ZMQStream(hpub,loop), pongstream=ZMQStream(hrep,loop),
                                              period=self.ping, logname=self.log.name)
                      ### Client connections ###
                      # Notifier socket
                      n = ZMQStream(ctx.socket(zmq.PUB), loop)
                      n.bind(client_iface%self.notifier_port)
                      ### build and launch the queues ###
                      # monitor socket
                      sub = ctx.socket(zmq.SUB)
                      sub.setsockopt(zmq.SUBSCRIBE, "")
                      sub.bind(self.monitor_url)
                      sub.bind('inproc://monitor')
                      sub = ZMQStream(sub, loop)
                      # connect the db
                      self.log.info('Hub using DB backend: %r'%(self.db_class.split()[-1]))
                      # cdir = self.config.Global.cluster_dir
                      self.db = import_item(self.db_class)(session=self.session.session, config=self.config)
                      time.sleep(.25)
                      # build connection dicts
                      self.engine_info = {
                          'control' : engine_iface%self.control[1],
                          'mux': engine_iface%self.mux[1],
                          'heartbeat': (engine_iface%self.hb[0], engine_iface%self.hb[1]),
                          'task' : engine_iface%self.task[1],
                          'iopub' : engine_iface%self.iopub[1],
                          # 'monitor' : engine_iface%self.mon_port,
                          }
                      self.client_info = {
                          'control' : client_iface%self.control[0],
                          'mux': client_iface%self.mux[0],
                          'task' : (self.scheme, client_iface%self.task[0]),
                          'iopub' : client_iface%self.iopub[0],
                          'notification': client_iface%self.notifier_port
                          }
                      self.log.debug("Hub engine addrs: %s"%self.engine_info)
                      self.log.debug("Hub client addrs: %s"%self.client_info)
                      # resubmit stream
                      r = ZMQStream(ctx.socket(zmq.XREQ), loop)
                      url = util.disambiguate_url(self.client_info['task'][-1])
                      r.setsockopt(zmq.IDENTITY, self.session.session)
                      r.connect(url)
                      self.hub = Hub(loop=loop, session=self.session, monitor=sub, heartmonitor=self.heartmonitor,
                              query=q, notifier=n, resubmit=r, db=self.db,
                              engine_info=self.engine_info, client_info=self.client_info,
                              logname=self.log.name)
              class Hub(LoggingFactory):
                  """The IPython Controller Hub with 0MQ connections
                  Parameters
                  ==========
                  loop: zmq IOLoop instance
                  session: StreamSession object
                  <removed> context: zmq context for creating new connections (?)
                  queue: ZMQStream for monitoring the command queue (SUB)
                  query: ZMQStream for engine registration and client queries requests (XREP)
                  heartbeat: HeartMonitor object checking the pulse of the engines
                  notifier: ZMQStream for broadcasting engine registration changes (PUB)
                  db: connection to db for out of memory logging of commands
                              NotImplemented
                  engine_info: dict of zmq connection information for engines to connect
                              to the queues.
                  client_info: dict of zmq connection information for engines to connect
                              to the queues.
                  """
                  # internal data structures:
                  ids=Set() # engine IDs
                  keytable=Dict()
                  by_ident=Dict()
                  engines=Dict()
                  clients=Dict()
                  hearts=Dict()
                  pending=Set()
                  queues=Dict()  # pending msg_ids keyed by engine_id
                  tasks=Dict() # pending msg_ids submitted as tasks, keyed by client_id
                  completed=Dict() # completed msg_ids keyed by engine_id
                  all_completed=Set() # completed msg_ids keyed by engine_id
                  dead_engines=Set() # completed msg_ids keyed by engine_id
                  unassigned=Set() # set of task msg_ds not yet assigned a destination
                  incoming_registrations=Dict()
                  registration_timeout=Int()
                  _idcounter=Int(0)
                  # objects from constructor:
                  loop=Instance(ioloop.IOLoop)
                  query=Instance(ZMQStream)
                  monitor=Instance(ZMQStream)
                  notifier=Instance(ZMQStream)
                  resubmit=Instance(ZMQStream)
                  heartmonitor=Instance(HeartMonitor)
                  db=Instance(object)
                  client_info=Dict()
                  engine_info=Dict()
                  def __init__(self, **kwargs):
                      """
                      # universal:
                      loop: IOLoop for creating future connections
                      session: streamsession for sending serialized data
                      # engine:
                      queue: ZMQStream for monitoring queue messages
                      query: ZMQStream for engine+client registration and client requests
                      heartbeat: HeartMonitor object for tracking engines
                      # extra:
                      db: ZMQStream for db connection (NotImplemented)
                      engine_info: zmq address/protocol dict for engine connections
                      client_info: zmq address/protocol dict for client connections
                      """
                      super(Hub, self).__init__(**kwargs)
                      self.registration_timeout = max(5000, 2*self.heartmonitor.period)
                      # validate connection dicts:
                      for k,v in self.client_info.iteritems():
                          if k == 'task':
                              util.validate_url_container(v[1])
                          else:
                              util.validate_url_container(v)
                      # util.validate_url_container(self.client_info)
                      util.validate_url_container(self.engine_info)
                      # register our callbacks
                      self.query.on_recv(self.dispatch_query)
                      self.monitor.on_recv(self.dispatch_monitor_traffic)
                      self.heartmonitor.add_heart_failure_handler(self.handle_heart_failure)
                      self.heartmonitor.add_new_heart_handler(self.handle_new_heart)
                      self.monitor_handlers = { 'in' : self.save_queue_request,
                                              'out': self.save_queue_result,
                                              'intask': self.save_task_request,
                                              'outtask': self.save_task_result,
                                              'tracktask': self.save_task_destination,
                                              'incontrol': _passer,
                                              'outcontrol': _passer,
                                              'iopub': self.save_iopub_message,
                      }
                      self.query_handlers = {'queue_request': self.queue_status,
                                              'result_request': self.get_results,
                                              'history_request': self.get_history,
                                              'db_request': self.db_query,
                                              'purge_request': self.purge_results,
                                              'load_request': self.check_load,
                                              'resubmit_request': self.resubmit_task,
                                              'shutdown_request': self.shutdown_request,
                                              'registration_request' : self.register_engine,
                                              'unregistration_request' : self.unregister_engine,
                                              'connection_request': self.connection_request,
                      }
                      # ignore resubmit replies
                      self.resubmit.on_recv(lambda msg: None, copy=False)
                      self.log.info("hub::created hub")
                  @property
                  def _next_id(self):
                      """gemerate a new ID.
                      No longer reuse old ids, just count from 0."""
                      newid = self._idcounter
                      self._idcounter += 1
                      return newid
                      # newid = 0
                      # incoming = [id[0] for id in self.incoming_registrations.itervalues()]
                      # # print newid, self.ids, self.incoming_registrations
                      # while newid in self.ids or newid in incoming:
                      #     newid += 1
                      # return newid
                  #-----------------------------------------------------------------------------
                  # message validation
                  #-----------------------------------------------------------------------------
                  def _validate_targets(self, targets):
                      """turn any valid targets argument into a list of integer ids"""
                      if targets is None:
                          # default to all
                          targets = self.ids
                      if isinstance(targets, (int,str,unicode)):
                          # only one target specified
                          targets = [targets]
                      _targets = []
                      for t in targets:
                          # map raw identities to ids
                          if isinstance(t, (str,unicode)):
                              t = self.by_ident.get(t, t)
                          _targets.append(t)
                      targets = _targets
                      bad_targets = [ t for t in targets if t not in self.ids ]
                      if bad_targets:
                          raise IndexError("No Such Engine: %r"%bad_targets)
                      if not targets:
                          raise IndexError("No Engines Registered")
                      return targets
                  #-----------------------------------------------------------------------------
                  # dispatch methods (1 per stream)
                  #-----------------------------------------------------------------------------
                  # def dispatch_registration_request(self, msg):
                  #     """"""
                  #     self.log.debug("registration::dispatch_register_request(%s)"%msg)
                  #     idents,msg = self.session.feed_identities(msg)
                  #     if not idents:
                  #         self.log.error("Bad Query Message: %s"%msg, exc_info=True)
                  #         return
                  #     try:
                  #         msg = self.session.unpack_message(msg,content=True)
                  #     except:
                  #         self.log.error("registration::got bad registration message: %s"%msg, exc_info=True)
                  #         return
                  #
                  #     msg_type = msg['msg_type']
                  #     content = msg['content']
                  #
                  #     handler = self.query_handlers.get(msg_type, None)
                  #     if handler is None:
                  #         self.log.error("registration::got bad registration message: %s"%msg)
                  #     else:
                  #         handler(idents, msg)
                  def dispatch_monitor_traffic(self, msg):
                      """all ME and Task queue messages come through here, as well as
                      IOPub traffic."""
                      self.log.debug("monitor traffic: %r"%msg[:2])
                      switch = msg[0]
                      idents, msg = self.session.feed_identities(msg[1:])
                      if not idents:
                          self.log.error("Bad Monitor Message: %r"%msg)
                          return
                      handler = self.monitor_handlers.get(switch, None)
                      if handler is not None:
                          handler(idents, msg)
                      else:
                          self.log.error("Invalid monitor topic: %r"%switch)
                  def dispatch_query(self, msg):
                      """Route registration requests and queries from clients."""
                      idents, msg = self.session.feed_identities(msg)
                      if not idents:
                          self.log.error("Bad Query Message: %r"%msg)
                          return
                      client_id = idents[0]
                      try:
                          msg = self.session.unpack_message(msg, content=True)
                      except:
                          content = error.wrap_exception()
                          self.log.error("Bad Query Message: %r"%msg, exc_info=True)
                          self.session.send(self.query, "hub_error", ident=client_id,
                                  content=content)
                          return
                      # print client_id, header, parent, content
                      #switch on message type:
                      msg_type = msg['msg_type']
                      self.log.info("client::client %r requested %r"%(client_id, msg_type))
                      handler = self.query_handlers.get(msg_type, None)
                      try:
                          assert handler is not None, "Bad Message Type: %r"%msg_type
                      except:
                          content = error.wrap_exception()
                          self.log.error("Bad Message Type: %r"%msg_type, exc_info=True)
                          self.session.send(self.query, "hub_error", ident=client_id,
                                  content=content)
                          return
                      else:
                          handler(idents, msg)
                  def dispatch_db(self, msg):
                      """"""
                      raise NotImplementedError
                  #---------------------------------------------------------------------------
                  # handler methods (1 per event)
                  #---------------------------------------------------------------------------
                  #----------------------- Heartbeat --------------------------------------
                  def handle_new_heart(self, heart):
                      """handler to attach to heartbeater.
                      Called when a new heart starts to beat.
                      Triggers completion of registration."""
                      self.log.debug("heartbeat::handle_new_heart(%r)"%heart)
                      if heart not in self.incoming_registrations:
                          self.log.info("heartbeat::ignoring new heart: %r"%heart)
                      else:
                          self.finish_registration(heart)
                  def handle_heart_failure(self, heart):
                      """handler to attach to heartbeater.
                      called when a previously registered heart fails to respond to beat request.
                      triggers unregistration"""
                      self.log.debug("heartbeat::handle_heart_failure(%r)"%heart)
                      eid = self.hearts.get(heart, None)
                      queue = self.engines[eid].queue
                      if eid is None:
                          self.log.info("heartbeat::ignoring heart failure %r"%heart)
                      else:
                          self.unregister_engine(heart, dict(content=dict(id=eid, queue=queue)))
                  #----------------------- MUX Queue Traffic ------------------------------
                  def save_queue_request(self, idents, msg):
                      if len(idents) < 2:
                          self.log.error("invalid identity prefix: %s"%idents)
                          return
                      queue_id, client_id = idents[:2]
                      try:
                          msg = self.session.unpack_message(msg, content=False)
                      except:
                          self.log.error("queue::client %r sent invalid message to %r: %s"%(client_id, queue_id, msg), exc_info=True)
                          return
                      eid = self.by_ident.get(queue_id, None)
                      if eid is None:
                          self.log.error("queue::target %r not registered"%queue_id)
                          self.log.debug("queue::    valid are: %s"%(self.by_ident.keys()))
                          return
                      header = msg['header']
                      msg_id = header['msg_id']
                      record = init_record(msg)
                      record['engine_uuid'] = queue_id
                      record['client_uuid'] = client_id
                      record['queue'] = 'mux'
                      try:
                          # it's posible iopub arrived first:
                          existing = self.db.get_record(msg_id)
                          for key,evalue in existing.iteritems():
                              rvalue = record.get(key, None)
                              if evalue and rvalue and evalue != rvalue:
                                  self.log.warn("conflicting initial state for record: %r:%r <%r> %r"%(msg_id, rvalue, key, evalue))
                              elif evalue and not rvalue:
                                  record[key] = evalue
                          self.db.update_record(msg_id, record)
                      except KeyError:
                          self.db.add_record(msg_id, record)
                      self.pending.add(msg_id)
                      self.queues[eid].append(msg_id)
                  def save_queue_result(self, idents, msg):
                      if len(idents) < 2:
                          self.log.error("invalid identity prefix: %s"%idents)
                          return
                      client_id, queue_id = idents[:2]
                      try:
                          msg = self.session.unpack_message(msg, content=False)
                      except:
                          self.log.error("queue::engine %r sent invalid message to %r: %s"%(
                                  queue_id,client_id, msg), exc_info=True)
                          return
                      eid = self.by_ident.get(queue_id, None)
                      if eid is None:
                          self.log.error("queue::unknown engine %r is sending a reply: "%queue_id)
                          # self.log.debug("queue::       %s"%msg[2:])
                          return
                      parent = msg['parent_header']
                      if not parent:
                          return
                      msg_id = parent['msg_id']
                      if msg_id in self.pending:
                          self.pending.remove(msg_id)
                          self.all_completed.add(msg_id)
                          self.queues[eid].remove(msg_id)
                          self.completed[eid].append(msg_id)
                      elif msg_id not in self.all_completed:
                          # it could be a result from a dead engine that died before delivering the
                          # result
                          self.log.warn("queue:: unknown msg finished %s"%msg_id)
                          return
                      # update record anyway, because the unregistration could have been premature
                      rheader = msg['header']
                      completed = datetime.strptime(rheader['date'], util.ISO8601)
                      started = rheader.get('started', None)
                      if started is not None:
                          started = datetime.strptime(started, util.ISO8601)
                      result = {
                          'result_header' : rheader,
                          'result_content': msg['content'],
                          'started' : started,
                          'completed' : completed
                      }
                      result['result_buffers'] = msg['buffers']
                      try:
                          self.db.update_record(msg_id, result)
                      except Exception:
                          self.log.error("DB Error updating record %r"%msg_id, exc_info=True)
                  #--------------------- Task Queue Traffic ------------------------------
                  def save_task_request(self, idents, msg):
                      """Save the submission of a task."""
                      client_id = idents[0]
                      try:
                          msg = self.session.unpack_message(msg, content=False)
                      except:
                          self.log.error("task::client %r sent invalid task message: %s"%(
                                  client_id, msg), exc_info=True)
                          return
                      record = init_record(msg)
                      record['client_uuid'] = client_id
                      record['queue'] = 'task'
                      header = msg['header']
                      msg_id = header['msg_id']
                      self.pending.add(msg_id)
                      self.unassigned.add(msg_id)
                      try:
                          # it's posible iopub arrived first:
                          existing = self.db.get_record(msg_id)
                          if existing['resubmitted']:
                              for key in ('submitted', 'client_uuid', 'buffers'):
                                  # don't clobber these keys on resubmit
                                  # submitted and client_uuid should be different
                                  # and buffers might be big, and shouldn't have changed
                                  record.pop(key)
                                  # still check content,header which should not change
                                  # but are not expensive to compare as buffers
                          for key,evalue in existing.iteritems():
                              if key.endswith('buffers'):
                                  # don't compare buffers
                                  continue
                              rvalue = record.get(key, None)
                              if evalue and rvalue and evalue != rvalue:
                                  self.log.warn("conflicting initial state for record: %r:%r <%r> %r"%(msg_id, rvalue, key, evalue))
                              elif evalue and not rvalue:
                                  record[key] = evalue
                          self.db.update_record(msg_id, record)
                      except KeyError:
                          self.db.add_record(msg_id, record)
                      except Exception:
                          self.log.error("DB Error saving task request %r"%msg_id, exc_info=True)
                  def save_task_result(self, idents, msg):
                      """save the result of a completed task."""
                      client_id = idents[0]
                      try:
                          msg = self.session.unpack_message(msg, content=False)
                      except:
                          self.log.error("task::invalid task result message send to %r: %s"%(
                                  client_id, msg), exc_info=True)
                          raise
                          return
                      parent = msg['parent_header']
                      if not parent:
                          # print msg
                          self.log.warn("Task %r had no parent!"%msg)
                          return
                      msg_id = parent['msg_id']
                      if msg_id in self.unassigned:
                          self.unassigned.remove(msg_id)
                      header = msg['header']
                      engine_uuid = header.get('engine', None)
                      eid = self.by_ident.get(engine_uuid, None)
                      if msg_id in self.pending:
                          self.pending.remove(msg_id)
                          self.all_completed.add(msg_id)
                          if eid is not None:
                              self.completed[eid].append(msg_id)
                              if msg_id in self.tasks[eid]:
                                  self.tasks[eid].remove(msg_id)
                          completed = datetime.strptime(header['date'], util.ISO8601)
                          started = header.get('started', None)
                          if started is not None:
                              started = datetime.strptime(started, util.ISO8601)
                          result = {
                              'result_header' : header,
                              'result_content': msg['content'],
                              'started' : started,
                              'completed' : completed,
                              'engine_uuid': engine_uuid
                          }
                          result['result_buffers'] = msg['buffers']
                          try:
                              self.db.update_record(msg_id, result)
                          except Exception:
                              self.log.error("DB Error saving task request %r"%msg_id, exc_info=True)
                      else:
                          self.log.debug("task::unknown task %s finished"%msg_id)
                  def save_task_destination(self, idents, msg):
                      try:
                          msg = self.session.unpack_message(msg, content=True)
                      except:
                          self.log.error("task::invalid task tracking message", exc_info=True)
                          return
                      content = msg['content']
                      # print (content)
                      msg_id = content['msg_id']
                      engine_uuid = content['engine_id']
                      eid = self.by_ident[engine_uuid]
                      self.log.info("task::task %s arrived on %s"%(msg_id, eid))
                      if msg_id in self.unassigned:
                          self.unassigned.remove(msg_id)
                      # else:
                      #     self.log.debug("task::task %s not listed as MIA?!"%(msg_id))
                      self.tasks[eid].append(msg_id)
                      # self.pending[msg_id][1].update(received=datetime.now(),engine=(eid,engine_uuid))
                      try:
                          self.db.update_record(msg_id, dict(engine_uuid=engine_uuid))
                      except Exception:
                          self.log.error("DB Error saving task destination %r"%msg_id, exc_info=True)
                  def mia_task_request(self, idents, msg):
                      raise NotImplementedError
                      client_id = idents[0]
                      # content = dict(mia=self.mia,status='ok')
                      # self.session.send('mia_reply', content=content, idents=client_id)
                  #--------------------- IOPub Traffic ------------------------------
                  def save_iopub_message(self, topics, msg):
                      """save an iopub message into the db"""
                      # print (topics)
                      try:
                          msg = self.session.unpack_message(msg, content=True)
                      except:
                          self.log.error("iopub::invalid IOPub message", exc_info=True)
                          return
                      parent = msg['parent_header']
                      if not parent:
                          self.log.error("iopub::invalid IOPub message: %s"%msg)
                          return
                      msg_id = parent['msg_id']
                      msg_type = msg['msg_type']
                      content = msg['content']
                      # ensure msg_id is in db
                      try:
                          rec = self.db.get_record(msg_id)
                      except KeyError:
                          rec = empty_record()
                          rec['msg_id'] = msg_id
                          self.db.add_record(msg_id, rec)
                      # stream
                      d = {}
                      if msg_type == 'stream':
                          name = content['name']
                          s = rec[name] or ''
                          d[name] = s + content['data']
                      elif msg_type == 'pyerr':
                          d['pyerr'] = content
                      elif msg_type == 'pyin':
                          d['pyin'] = content['code']
                      else:
                          d[msg_type] = content.get('data', '')
                      try:
                          self.db.update_record(msg_id, d)
                      except Exception:
                          self.log.error("DB Error saving iopub message %r"%msg_id, exc_info=True)
                  #-------------------------------------------------------------------------
                  # Registration requests
                  #-------------------------------------------------------------------------
                  def connection_request(self, client_id, msg):
                      """Reply with connection addresses for clients."""
                      self.log.info("client::client %s connected"%client_id)
                      content = dict(status='ok')
                      content.update(self.client_info)
                      jsonable = {}
                      for k,v in self.keytable.iteritems():
                          if v not in self.dead_engines:
                              jsonable[str(k)] = v
                      content['engines'] = jsonable
                      self.session.send(self.query, 'connection_reply', content, parent=msg, ident=client_id)
                  def register_engine(self, reg, msg):
                      """Register a new engine."""
                      content = msg['content']
                      try:
                          queue = content['queue']
                      except KeyError:
                          self.log.error("registration::queue not specified", exc_info=True)
                          return
                      heart = content.get('heartbeat', None)
                      """register a new engine, and create the socket(s) necessary"""
                      eid = self._next_id
                      # print (eid, queue, reg, heart)
                      self.log.debug("registration::register_engine(%i, %r, %r, %r)"%(eid, queue, reg, heart))
                      content = dict(id=eid,status='ok')
                      content.update(self.engine_info)
                      # check if requesting available IDs:
                      if queue in self.by_ident:
                          try:
                              raise KeyError("queue_id %r in use"%queue)
                          except:
                              content = error.wrap_exception()
                              self.log.error("queue_id %r in use"%queue, exc_info=True)
                      elif heart in self.hearts: # need to check unique hearts?
                          try:
                              raise KeyError("heart_id %r in use"%heart)
                          except:
                              self.log.error("heart_id %r in use"%heart, exc_info=True)
                              content = error.wrap_exception()
                      else:
                          for h, pack in self.incoming_registrations.iteritems():
                              if heart == h:
                                  try:
                                      raise KeyError("heart_id %r in use"%heart)
                                  except:
                                      self.log.error("heart_id %r in use"%heart, exc_info=True)
                                      content = error.wrap_exception()
                                  break
                              elif queue == pack[1]:
                                  try:
                                      raise KeyError("queue_id %r in use"%queue)
                                  except:
                                      self.log.error("queue_id %r in use"%queue, exc_info=True)
                                      content = error.wrap_exception()
                                  break
                      msg = self.session.send(self.query, "registration_reply",
                              content=content,
                              ident=reg)
                      if content['status'] == 'ok':
                          if heart in self.heartmonitor.hearts:
                              # already beating
                              self.incoming_registrations[heart] = (eid,queue,reg[0],None)
                              self.finish_registration(heart)
                          else:
                              purge = lambda : self._purge_stalled_registration(heart)
                              dc = ioloop.DelayedCallback(purge, self.registration_timeout, self.loop)
                              dc.start()
                              self.incoming_registrations[heart] = (eid,queue,reg[0],dc)
                      else:
                          self.log.error("registration::registration %i failed: %s"%(eid, content['evalue']))
                      return eid
                  def unregister_engine(self, ident, msg):
                      """Unregister an engine that explicitly requested to leave."""
                      try:
                          eid = msg['content']['id']
                      except:
                          self.log.error("registration::bad engine id for unregistration: %s"%ident, exc_info=True)
                          return
                      self.log.info("registration::unregister_engine(%s)"%eid)
                      # print (eid)
                      uuid = self.keytable[eid]
                      content=dict(id=eid, queue=uuid)
                      self.dead_engines.add(uuid)
                      # self.ids.remove(eid)
                      # uuid = self.keytable.pop(eid)
                      #
                      # ec = self.engines.pop(eid)
                      # self.hearts.pop(ec.heartbeat)
                      # self.by_ident.pop(ec.queue)
                      # self.completed.pop(eid)
                      handleit = lambda : self._handle_stranded_msgs(eid, uuid)
                      dc = ioloop.DelayedCallback(handleit, self.registration_timeout, self.loop)
                      dc.start()
                      ############## TODO: HANDLE IT ################
                      if self.notifier:
                          self.session.send(self.notifier, "unregistration_notification", content=content)
                  def _handle_stranded_msgs(self, eid, uuid):
                      """Handle messages known to be on an engine when the engine unregisters.
                      It is possible that this will fire prematurely - that is, an engine will
                      go down after completing a result, and the client will be notified
                      that the result failed and later receive the actual result.
                      """
                      outstanding = self.queues[eid]
                      for msg_id in outstanding:
                          self.pending.remove(msg_id)
                          self.all_completed.add(msg_id)
                          try:
                              raise error.EngineError("Engine %r died while running task %r"%(eid, msg_id))
                          except:
                              content = error.wrap_exception()
                          # build a fake header:
                          header = {}
                          header['engine'] = uuid
                          header['date'] = datetime.now()
                          rec = dict(result_content=content, result_header=header, result_buffers=[])
                          rec['completed'] = header['date']
                          rec['engine_uuid'] = uuid
                          try:
                              self.db.update_record(msg_id, rec)
                          except Exception:
                              self.log.error("DB Error handling stranded msg %r"%msg_id, exc_info=True)
                  def finish_registration(self, heart):
                      """Second half of engine registration, called after our HeartMonitor
                      has received a beat from the Engine's Heart."""
                      try:
                          (eid,queue,reg,purge) = self.incoming_registrations.pop(heart)
                      except KeyError:
                          self.log.error("registration::tried to finish nonexistant registration", exc_info=True)
                          return
                      self.log.info("registration::finished registering engine %i:%r"%(eid,queue))
                      if purge is not None:
                          purge.stop()
                      control = queue
                      self.ids.add(eid)
                      self.keytable[eid] = queue
                      self.engines[eid] = EngineConnector(id=eid, queue=queue, registration=reg,
                                                  control=control, heartbeat=heart)
                      self.by_ident[queue] = eid
                      self.queues[eid] = list()
                      self.tasks[eid] = list()
                      self.completed[eid] = list()
                      self.hearts[heart] = eid
                      content = dict(id=eid, queue=self.engines[eid].queue)
                      if self.notifier:
                          self.session.send(self.notifier, "registration_notification", content=content)
                      self.log.info("engine::Engine Connected: %i"%eid)
                  def _purge_stalled_registration(self, heart):
                      if heart in self.incoming_registrations:
                          eid = self.incoming_registrations.pop(heart)[0]
                          self.log.info("registration::purging stalled registration: %i"%eid)
                      else:
                          pass
                  #-------------------------------------------------------------------------
                  # Client Requests
                  #-------------------------------------------------------------------------
                  def shutdown_request(self, client_id, msg):
                      """handle shutdown request."""
                      self.session.send(self.query, 'shutdown_reply', content={'status': 'ok'}, ident=client_id)
                      # also notify other clients of shutdown
                      self.session.send(self.notifier, 'shutdown_notice', content={'status': 'ok'})
                      dc = ioloop.DelayedCallback(lambda : self._shutdown(), 1000, self.loop)
                      dc.start()
                  def _shutdown(self):
                      self.log.info("hub::hub shutting down.")
                      time.sleep(0.1)
                      sys.exit(0)
                  def check_load(self, client_id, msg):
                      content = msg['content']
                      try:
                          targets = content['targets']
                          targets = self._validate_targets(targets)
                      except:
                          content = error.wrap_exception()
                          self.session.send(self.query, "hub_error",
                                  content=content, ident=client_id)
                          return
                      content = dict(status='ok')
                      # loads = {}
                      for t in targets:
                          content[bytes(t)] = len(self.queues[t])+len(self.tasks[t])
                      self.session.send(self.query, "load_reply", content=content, ident=client_id)
                  def queue_status(self, client_id, msg):
                      """Return the Queue status of one or more targets.
                      if verbose: return the msg_ids
                      else: return len of each type.
                      keys: queue (pending MUX jobs)
                          tasks (pending Task jobs)
                          completed (finished jobs from both queues)"""
                      content = msg['content']
                      targets = content['targets']
                      try:
                          targets = self._validate_targets(targets)
                      except:
                          content = error.wrap_exception()
                          self.session.send(self.query, "hub_error",
                                  content=content, ident=client_id)
                          return
                      verbose = content.get('verbose', False)
                      content = dict(status='ok')
                      for t in targets:
                          queue = self.queues[t]
                          completed = self.completed[t]
                          tasks = self.tasks[t]
                          if not verbose:
                              queue = len(queue)
                              completed = len(completed)
                              tasks = len(tasks)
                          content[bytes(t)] = {'queue': queue, 'completed': completed , 'tasks': tasks}
                      content['unassigned'] = list(self.unassigned) if verbose else len(self.unassigned)
                      self.session.send(self.query, "queue_reply", content=content, ident=client_id)
                  def purge_results(self, client_id, msg):
                      """Purge results from memory. This method is more valuable before we move
                      to a DB based message storage mechanism."""
                      content = msg['content']
                      msg_ids = content.get('msg_ids', [])
                      reply = dict(status='ok')
                      if msg_ids == 'all':
                          try:
                              self.db.drop_matching_records(dict(completed={'$ne':None}))
                          except Exception:
                              reply = error.wrap_exception()
                      else:
-                         for msg_id in msg_ids:
-                             if msg_id in self.all_completed:
-                                 self.db.drop_record(msg_id)
-                             else:
-                                 if msg_id in self.pending:
-                                     try:
-                                         raise IndexError("msg pending: %r"%msg_id)
-                                     except:
-                                         reply = error.wrap_exception()
-                                 else:
+                         pending = filter(lambda m: m in self.pending, msg_ids)
+                         if pending:
+                             try:
+                                 raise IndexError("msg pending: %r"%pending[0])
+                             except:
+                                 reply = error.wrap_exception()
+                         else:
+                             try:
+                                 self.db.drop_matching_records(dict(msg_id={'$in':msg_ids}))
+                             except Exception:
+                                 reply = error.wrap_exception()
+                         if reply['status'] == 'ok':
+                             eids = content.get('engine_ids', [])
+                             for eid in eids:
+                                 if eid not in self.engines:
                                      try:
-                                         raise IndexError("No such msg: %r"%msg_id)
+                                         raise IndexError("No such engine: %i"%eid)
                                      except:
                                          reply = error.wrap_exception()
-                                 break
-                         eids = content.get('engine_ids', [])
-                         for eid in eids:
-                             if eid not in self.engines:
+                                     break
+                                 msg_ids = self.completed.pop(eid)
+                                 uid = self.engines[eid].queue
                                  try:
-                                     raise IndexError("No such engine: %i"%eid)
-                                 except:
+                                     self.db.drop_matching_records(dict(engine_uuid=uid, completed={'$ne':None}))
+                                 except Exception:
                                      reply = error.wrap_exception()
-                                 break
-                             msg_ids = self.completed.pop(eid)
-                             uid = self.engines[eid].queue
-                             try:
-                                 self.db.drop_matching_records(dict(engine_uuid=uid, completed={'$ne':None}))
-                             except Exception:
-                                 reply = error.wrap_exception()
-                                 break
+                                     break
                      self.session.send(self.query, 'purge_reply', content=reply, ident=client_id)
                  def resubmit_task(self, client_id, msg):
                      """Resubmit one or more tasks."""
                      def finish(reply):
                          self.session.send(self.query, 'resubmit_reply', content=reply, ident=client_id)
                      content = msg['content']
                      msg_ids = content['msg_ids']
                      reply = dict(status='ok')
                      try:
                          records = self.db.find_records({'msg_id' : {'$in' : msg_ids}}, keys=[
                              'header', 'content', 'buffers'])
                      except Exception:
                          self.log.error('db::db error finding tasks to resubmit', exc_info=True)
                          return finish(error.wrap_exception())
                      # validate msg_ids
                      found_ids = [ rec['msg_id'] for rec in records ]
                      invalid_ids = filter(lambda m: m in self.pending, found_ids)
                      if len(records) > len(msg_ids):
                          try:
                              raise RuntimeError("DB appears to be in an inconsistent state."
                                  "More matching records were found than should exist")
                          except Exception:
                              return finish(error.wrap_exception())
                      elif len(records) < len(msg_ids):
                          missing = [ m for m in msg_ids if m not in found_ids ]
                          try:
                              raise KeyError("No such msg(s): %s"%missing)
                          except KeyError:
                              return finish(error.wrap_exception())
                      elif invalid_ids:
                          msg_id = invalid_ids[0]
                          try:
                              raise ValueError("Task %r appears to be inflight"%(msg_id))
                          except Exception:
                              return finish(error.wrap_exception())
                      # clear the existing records
                      rec = empty_record()
                      map(rec.pop, ['msg_id', 'header', 'content', 'buffers', 'submitted'])
                      rec['resubmitted'] = datetime.now()
                      rec['queue'] = 'task'
                      rec['client_uuid'] = client_id[0]
                      try:
                          for msg_id in msg_ids:
                              self.all_completed.discard(msg_id)
                              self.db.update_record(msg_id, rec)
                      except Exception:
                          self.log.error('db::db error upating record', exc_info=True)
                          reply = error.wrap_exception()
                      else:
                          # send the messages
                          for rec in records:
                              header = rec['header']
                              msg = self.session.msg(header['msg_type'])
                              msg['content'] = rec['content']
                              msg['header'] = header
                              msg['msg_id'] = rec['msg_id']
                              self.session.send(self.resubmit, msg, buffers=rec['buffers'])
                      finish(dict(status='ok'))
                  def _extract_record(self, rec):
                      """decompose a TaskRecord dict into subsection of reply for get_result"""
                      io_dict = {}
                      for key in 'pyin pyout pyerr stdout stderr'.split():
                              io_dict[key] = rec[key]
                      content = { 'result_content': rec['result_content'],
                                          'header': rec['header'],
                                          'result_header' : rec['result_header'],
                                          'io' : io_dict,
                                        }
                      if rec['result_buffers']:
                          buffers = map(str, rec['result_buffers'])
                      else:
                          buffers = []
                      return content, buffers
                  def get_results(self, client_id, msg):
                      """Get the result of 1 or more messages."""
                      content = msg['content']
                      msg_ids = sorted(set(content['msg_ids']))
                      statusonly = content.get('status_only', False)
                      pending = []
                      completed = []
                      content = dict(status='ok')
                      content['pending'] = pending
                      content['completed'] = completed
                      buffers = []
                      if not statusonly:
                          try:
                              matches = self.db.find_records(dict(msg_id={'$in':msg_ids}))
                              # turn match list into dict, for faster lookup
                              records = {}
                              for rec in matches:
                                  records[rec['msg_id']] = rec
                          except Exception:
                              content = error.wrap_exception()
                              self.session.send(self.query, "result_reply", content=content,
                                                                  parent=msg, ident=client_id)
                              return
                      else:
                          records = {}
                      for msg_id in msg_ids:
                          if msg_id in self.pending:
                              pending.append(msg_id)
                          elif msg_id in self.all_completed:
                              completed.append(msg_id)
                              if not statusonly:
                                  c,bufs = self._extract_record(records[msg_id])
                                  content[msg_id] = c
                                  buffers.extend(bufs)
                          elif msg_id in records:
                              if rec['completed']:
                                  completed.append(msg_id)
                                  c,bufs = self._extract_record(records[msg_id])
                                  content[msg_id] = c
                                  buffers.extend(bufs)
                              else:
                                  pending.append(msg_id)
                          else:
                              try:
                                  raise KeyError('No such message: '+msg_id)
                              except:
                                  content = error.wrap_exception()
                              break
                      self.session.send(self.query, "result_reply", content=content,
                                                          parent=msg, ident=client_id,
                                                          buffers=buffers)
                  def get_history(self, client_id, msg):
                      """Get a list of all msg_ids in our DB records"""
                      try:
                          msg_ids = self.db.get_history()
                      except Exception as e:
                          content = error.wrap_exception()
                      else:
                          content = dict(status='ok', history=msg_ids)
                      self.session.send(self.query, "history_reply", content=content,
                                                          parent=msg, ident=client_id)
                  def db_query(self, client_id, msg):
                      """Perform a raw query on the task record database."""
                      content = msg['content']
                      query = content.get('query', {})
                      keys = content.get('keys', None)
                      query = util.extract_dates(query)
                      buffers = []
                      empty = list()
                      try:
                          records = self.db.find_records(query, keys)
                      except Exception as e:
                          content = error.wrap_exception()
                      else:
                          # extract buffers from reply content:
                          if keys is not None:
                              buffer_lens = [] if 'buffers' in keys else None
                              result_buffer_lens = [] if 'result_buffers' in keys else None
                          else:
                              buffer_lens = []
                              result_buffer_lens = []
                          for rec in records:
                              # buffers may be None, so double check
                              if buffer_lens is not None:
                                  b = rec.pop('buffers', empty) or empty
                                  buffer_lens.append(len(b))
                                  buffers.extend(b)
                              if result_buffer_lens is not None:
                                  rb = rec.pop('result_buffers', empty) or empty
                                  result_buffer_lens.append(len(rb))
                                  buffers.extend(rb)
                          content = dict(status='ok', records=records, buffer_lens=buffer_lens,
                                                  result_buffer_lens=result_buffer_lens)
                      self.session.send(self.query, "db_reply", content=content,
                                                          parent=msg, ident=client_id,
                                                          buffers=buffers)

IPython/parallel/controller/mongodb.py

0 +17 -12

              """A TaskRecord backend using mongodb"""
              #-----------------------------------------------------------------------------
              #  Copyright (C) 2010  The IPython Development Team
              #
              #  Distributed under the terms of the BSD License.  The full license is in
              #  the file COPYING, distributed as part of this software.
              #-----------------------------------------------------------------------------
-             from datetime import datetime
              from pymongo import Connection
              from pymongo.binary import Binary
-             from IPython.utils.traitlets import Dict, List, CUnicode
+             from IPython.utils.traitlets import Dict, List, CUnicode, CStr, Instance
              from .dictdb import BaseDB
              #-----------------------------------------------------------------------------
              # MongoDB class
              #-----------------------------------------------------------------------------
              class MongoDB(BaseDB):
                  """MongoDB TaskRecord backend."""
                  connection_args = List(config=True) # args passed to pymongo.Connection
                  connection_kwargs = Dict(config=True) # kwargs passed to pymongo.Connection
                  database = CUnicode(config=True) # name of the mongodb database
-                 _table = Dict()
+                 _connection = Instance(Connection) # pymongo connection
                  def __init__(self, **kwargs):
                      super(MongoDB, self).__init__(**kwargs)
-                     self._connection = Connection(*self.connection_args, **self.connection_kwargs)
+                     if self._connection is None:
+                         self._connection = Connection(*self.connection_args, **self.connection_kwargs)
                      if not self.database:
                          self.database = self.session
                      self._db = self._connection[self.database]
                      self._records = self._db['task_records']
+                     self._records.ensure_index('msg_id', unique=True)
+                     self._records.ensure_index('submitted') # for sorting history
+                     # for rec in self._records.find
                  def _binary_buffers(self, rec):
                      for key in ('buffers', 'result_buffers'):
                          if rec.get(key, None):
                              rec[key] = map(Binary, rec[key])
                      return rec
                  def add_record(self, msg_id, rec):
                      """Add a new Task Record, by msg_id."""
                      # print rec
                      rec = self._binary_buffers(rec)
-                     obj_id = self._records.insert(rec)
-                     self._table[msg_id] = obj_id
+                     self._records.insert(rec)
                  def get_record(self, msg_id):
                      """Get a specific Task Record, by msg_id."""
-                     return self._records.find_one(self._table[msg_id])
+                     r = self._records.find_one({'msg_id': msg_id})
+                     if not r:
+                         # r will be '' if nothing is found
+                         raise KeyError(msg_id)
+                     return r
                  def update_record(self, msg_id, rec):
                      """Update the data in an existing record."""
                      rec = self._binary_buffers(rec)
-                     obj_id = self._table[msg_id]
-                     self._records.update({'_id':obj_id}, {'$set': rec})
+                     self._records.update({'msg_id':msg_id}, {'$set': rec})
                  def drop_matching_records(self, check):
                      """Remove a record from the DB."""
                      self._records.remove(check)
                  def drop_record(self, msg_id):
                      """Remove a record from the DB."""
-                     obj_id = self._table.pop(msg_id)
-                     self._records.remove(obj_id)
+                     self._records.remove({'msg_id':msg_id})
                  def find_records(self, check, keys=None):
                      """Find records matching a query dict, optionally extracting subset of keys.
                      Returns list of matching records.
                      Parameters
                      ----------
                      check: dict
                          mongodb-style query argument
                      keys: list of strs [optional]
                          if specified, the subset of keys to extract.  msg_id will *always* be
                          included.
                      """
                      if keys and 'msg_id' not in keys:
                          keys.append('msg_id')
                      matches = list(self._records.find(check,keys))
                      for rec in matches:
                          rec.pop('_id')
                      return matches
                  def get_history(self):
                      """get all msg_ids, ordered by time submitted."""
                      cursor = self._records.find({},{'msg_id':1}).sort('submitted')
                      return [ rec['msg_id'] for rec in cursor ]

IPython/parallel/controller/sqlitedb.py

0 +25 -11

              """A TaskRecord backend using sqlite3"""
              #-----------------------------------------------------------------------------
              #  Copyright (C) 2011  The IPython Development Team
              #
              #  Distributed under the terms of the BSD License.  The full license is in
              #  the file COPYING, distributed as part of this software.
              #-----------------------------------------------------------------------------
              import json
              import os
              import cPickle as pickle
              from datetime import datetime
              import sqlite3
              from zmq.eventloop import ioloop
              from IPython.utils.traitlets import CUnicode, CStr, Instance, List
              from .dictdb import BaseDB
              from IPython.parallel.util import ISO8601
              #-----------------------------------------------------------------------------
              # SQLite operators, adapters, and converters
              #-----------------------------------------------------------------------------
              operators = {
               '$lt' : "<",
               '$gt' : ">",
               # null is handled weird with ==,!=
-              '$eq' : "IS",
-              '$ne' : "IS NOT",
+              '$eq' : "=",
+              '$ne' : "!=",
               '$lte': "<=",
               '$gte': ">=",
-              '$in' : ('IS', ' OR '),
-              '$nin': ('IS NOT', ' AND '),
+              '$in' : ('=', ' OR '),
+              '$nin': ('!=', ' AND '),
               # '$all': None,
               # '$mod': None,
               # '$exists' : None
              }
+             null_operators = {
+             '=' : "IS NULL",
+             '!=' : "IS NOT NULL",
+             }
              def _adapt_datetime(dt):
                  return dt.strftime(ISO8601)
              def _convert_datetime(ds):
                  if ds is None:
                      return ds
                  else:
                      return datetime.strptime(ds, ISO8601)
              def _adapt_dict(d):
                  return json.dumps(d)
              def _convert_dict(ds):
                  if ds is None:
                      return ds
                  else:
                      return json.loads(ds)
              def _adapt_bufs(bufs):
                  # this is *horrible*
                  # copy buffers into single list and pickle it:
                  if bufs and isinstance(bufs[0], (bytes, buffer)):
                      return sqlite3.Binary(pickle.dumps(map(bytes, bufs),-1))
                  elif bufs:
                      return bufs
                  else:
                      return None
              def _convert_bufs(bs):
                  if bs is None:
                      return []
                  else:
                      return pickle.loads(bytes(bs))
              #-----------------------------------------------------------------------------
              # SQLiteDB class
              #-----------------------------------------------------------------------------
              class SQLiteDB(BaseDB):
                  """SQLite3 TaskRecord backend."""
                  filename = CUnicode('tasks.db', config=True)
                  location = CUnicode('', config=True)
                  table = CUnicode("", config=True)
                  _db = Instance('sqlite3.Connection')
                  _keys = List(['msg_id' ,
                          'header' ,
                          'content',
                          'buffers',
                          'submitted',
                          'client_uuid' ,
                          'engine_uuid' ,
                          'started',
                          'completed',
                          'resubmitted',
                          'result_header' ,
                          'result_content' ,
                          'result_buffers' ,
                          'queue' ,
                          'pyin' ,
                          'pyout',
                          'pyerr',
                          'stdout',
                          'stderr',
                      ])
                  def __init__(self, **kwargs):
                      super(SQLiteDB, self).__init__(**kwargs)
                      if not self.table:
                          # use session, and prefix _, since starting with # is illegal
                          self.table = '_'+self.session.replace('-','_')
                      if not self.location:
                          if hasattr(self.config.Global, 'cluster_dir'):
                              self.location = self.config.Global.cluster_dir
                          else:
                              self.location = '.'
                      self._init_db()
                      # register db commit as 2s periodic callback
                      # to prevent clogging pipes
                      # assumes we are being run in a zmq ioloop app
                      loop = ioloop.IOLoop.instance()
                      pc = ioloop.PeriodicCallback(self._db.commit, 2000, loop)
                      pc.start()
                  def _defaults(self, keys=None):
                      """create an empty record"""
                      d = {}
                      keys = self._keys if keys is None else keys
                      for key in keys:
                          d[key] = None
                      return d
                  def _init_db(self):
                      """Connect to the database and get new session number."""
                      # register adapters
                      sqlite3.register_adapter(datetime, _adapt_datetime)
                      sqlite3.register_converter('datetime', _convert_datetime)
                      sqlite3.register_adapter(dict, _adapt_dict)
                      sqlite3.register_converter('dict', _convert_dict)
                      sqlite3.register_adapter(list, _adapt_bufs)
                      sqlite3.register_converter('bufs', _convert_bufs)
                      # connect to the db
                      dbfile = os.path.join(self.location, self.filename)
                      self._db = sqlite3.connect(dbfile, detect_types=sqlite3.PARSE_DECLTYPES,
                          # isolation_level = None)#,
                           cached_statements=64)
                      # print dir(self._db)
                      self._db.execute("""CREATE TABLE IF NOT EXISTS %s
                              (msg_id text PRIMARY KEY,
                              header dict text,
                              content dict text,
                              buffers bufs blob,
                              submitted datetime text,
                              client_uuid text,
                              engine_uuid text,
                              started datetime text,
                              completed datetime text,
                              resubmitted datetime text,
                              result_header dict text,
                              result_content dict text,
                              result_buffers bufs blob,
                              queue text,
                              pyin text,
                              pyout text,
                              pyerr text,
                              stdout text,
                              stderr text)
                              """%self.table)
                      self._db.commit()
                  def _dict_to_list(self, d):
                      """turn a mongodb-style record dict into a list."""
                      return [ d[key] for key in self._keys ]
                  def _list_to_dict(self, line, keys=None):
                      """Inverse of dict_to_list"""
                      keys = self._keys if keys is None else keys
                      d = self._defaults(keys)
                      for key,value in zip(keys, line):
                          d[key] = value
                      return d
                  def _render_expression(self, check):
                      """Turn a mongodb-style search dict into an SQL query."""
                      expressions = []
                      args = []
                      skeys = set(check.keys())
                      skeys.difference_update(set(self._keys))
                      skeys.difference_update(set(['buffers', 'result_buffers']))
                      if skeys:
                          raise KeyError("Illegal testing key(s): %s"%skeys)
                      for name,sub_check in check.iteritems():
                          if isinstance(sub_check, dict):
                              for test,value in sub_check.iteritems():
                                  try:
                                      op = operators[test]
                                  except KeyError:
                                      raise KeyError("Unsupported operator: %r"%test)
                                  if isinstance(op, tuple):
                                      op, join = op
-                                 expr = "%s %s ?"%(name, op)
-                                 if isinstance(value, (tuple,list)):
-                                     expr = '( %s )'%( join.join([expr]*len(value)) )
-                                     args.extend(value)
+                                 if value is None and op in null_operators:
+                                         expr = "%s %s"%null_operators[op]
                                  else:
-                                     args.append(value)
+                                     expr = "%s %s ?"%(name, op)
+                                     if isinstance(value, (tuple,list)):
+                                         if op in null_operators and any([v is None for v in value]):
+                                             # equality tests don't work with NULL
+                                             raise ValueError("Cannot use %r test with NULL values on SQLite backend"%test)
+                                         expr = '( %s )'%( join.join([expr]*len(value)) )
+                                         args.extend(value)
+                                     else:
+                                         args.append(value)
                                  expressions.append(expr)
                          else:
                              # it's an equality check
-                             expressions.append("%s IS ?"%name)
-                             args.append(sub_check)
+                             if sub_check is None:
+                                 expressions.append("%s IS NULL")
+                             else:
+                                 expressions.append("%s = ?"%name)
+                                 args.append(sub_check)
                      expr = " AND ".join(expressions)
                      return expr, args
                  def add_record(self, msg_id, rec):
                      """Add a new Task Record, by msg_id."""
                      d = self._defaults()
                      d.update(rec)
                      d['msg_id'] = msg_id
                      line = self._dict_to_list(d)
                      tups = '(%s)'%(','.join(['?']*len(line)))
                      self._db.execute("INSERT INTO %s VALUES %s"%(self.table, tups), line)
                      # self._db.commit()
                  def get_record(self, msg_id):
                      """Get a specific Task Record, by msg_id."""
                      cursor = self._db.execute("""SELECT * FROM %s WHERE msg_id==?"""%self.table, (msg_id,))
                      line = cursor.fetchone()
                      if line is None:
                          raise KeyError("No such msg: %r"%msg_id)
                      return self._list_to_dict(line)
                  def update_record(self, msg_id, rec):
                      """Update the data in an existing record."""
                      query = "UPDATE %s SET "%self.table
                      sets = []
                      keys = sorted(rec.keys())
                      values = []
                      for key in keys:
                          sets.append('%s = ?'%key)
                          values.append(rec[key])
                      query += ', '.join(sets)
                      query += ' WHERE msg_id == ?'
                      values.append(msg_id)
                      self._db.execute(query, values)
                      # self._db.commit()
                  def drop_record(self, msg_id):
                      """Remove a record from the DB."""
                      self._db.execute("""DELETE FROM %s WHERE msg_id==?"""%self.table, (msg_id,))
                      # self._db.commit()
                  def drop_matching_records(self, check):
                      """Remove a record from the DB."""
                      expr,args = self._render_expression(check)
                      query = "DELETE FROM %s WHERE %s"%(self.table, expr)
                      self._db.execute(query,args)
                      # self._db.commit()
                  def find_records(self, check, keys=None):
                      """Find records matching a query dict, optionally extracting subset of keys.
                      Returns list of matching records.
                      Parameters
                      ----------
                      check: dict
                          mongodb-style query argument
                      keys: list of strs [optional]
                          if specified, the subset of keys to extract.  msg_id will *always* be
                          included.
                      """
                      if keys:
                          bad_keys = [ key for key in keys if key not in self._keys ]
                          if bad_keys:
                              raise KeyError("Bad record key(s): %s"%bad_keys)
                      if keys:
                          # ensure msg_id is present and first:
                          if 'msg_id' in keys:
                              keys.remove('msg_id')
                          keys.insert(0, 'msg_id')
                          req = ', '.join(keys)
                      else:
                          req = '*'
                      expr,args = self._render_expression(check)
                      query = """SELECT %s FROM %s WHERE %s"""%(req, self.table, expr)
                      cursor = self._db.execute(query, args)
                      matches = cursor.fetchall()
                      records = []
                      for line in matches:
                          rec = self._list_to_dict(line, keys)
                          records.append(rec)
                      return records
                  def get_history(self):
                      """get all msg_ids, ordered by time submitted."""
                      query = """SELECT msg_id FROM %s ORDER by submitted ASC"""%self.table
                      cursor = self._db.execute(query)
                      # will be a list of length 1 tuples
                      return [ tup[0] for tup in cursor.fetchall()]
              __all__ = ['SQLiteDB']
  No newline at end of file

IPython/parallel/tests/test_client.py

0 +7 0

              """Tests for parallel client.py"""
              #-------------------------------------------------------------------------------
              #  Copyright (C) 2011  The IPython Development Team
              #
              #  Distributed under the terms of the BSD License.  The full license is in
              #  the file COPYING, distributed as part of this software.
              #-------------------------------------------------------------------------------
              #-------------------------------------------------------------------------------
              # Imports
              #-------------------------------------------------------------------------------
              import time
              from datetime import datetime
              from tempfile import mktemp
              import zmq
              from IPython.parallel.client import client as clientmod
              from IPython.parallel import error
              from IPython.parallel import AsyncResult, AsyncHubResult
              from IPython.parallel import LoadBalancedView, DirectView
              from clienttest import ClusterTestCase, segfault, wait, add_engines
              def setup():
                  add_engines(4)
              class TestClient(ClusterTestCase):
                  def test_ids(self):
                      n = len(self.client.ids)
                      self.add_engines(3)
                      self.assertEquals(len(self.client.ids), n+3)
                  def test_view_indexing(self):
                      """test index access for views"""
                      self.add_engines(2)
                      targets = self.client._build_targets('all')[-1]
                      v = self.client[:]
                      self.assertEquals(v.targets, targets)
                      t = self.client.ids[2]
                      v = self.client[t]
                      self.assert_(isinstance(v, DirectView))
                      self.assertEquals(v.targets, t)
                      t = self.client.ids[2:4]
                      v = self.client[t]
                      self.assert_(isinstance(v, DirectView))
                      self.assertEquals(v.targets, t)
                      v = self.client[::2]
                      self.assert_(isinstance(v, DirectView))
                      self.assertEquals(v.targets, targets[::2])
                      v = self.client[1::3]
                      self.assert_(isinstance(v, DirectView))
                      self.assertEquals(v.targets, targets[1::3])
                      v = self.client[:-3]
                      self.assert_(isinstance(v, DirectView))
                      self.assertEquals(v.targets, targets[:-3])
                      v = self.client[-1]
                      self.assert_(isinstance(v, DirectView))
                      self.assertEquals(v.targets, targets[-1])
                      self.assertRaises(TypeError, lambda : self.client[None])
                  def test_lbview_targets(self):
                      """test load_balanced_view targets"""
                      v = self.client.load_balanced_view()
                      self.assertEquals(v.targets, None)
                      v = self.client.load_balanced_view(-1)
                      self.assertEquals(v.targets, [self.client.ids[-1]])
                      v = self.client.load_balanced_view('all')
                      self.assertEquals(v.targets, self.client.ids)
                  def test_targets(self):
                      """test various valid targets arguments"""
                      build = self.client._build_targets
                      ids = self.client.ids
                      idents,targets = build(None)
                      self.assertEquals(ids, targets)
                  def test_clear(self):
                      """test clear behavior"""
                      # self.add_engines(2)
                      v = self.client[:]
                      v.block=True
                      v.push(dict(a=5))
                      v.pull('a')
                      id0 = self.client.ids[-1]
                      self.client.clear(targets=id0, block=True)
                      a = self.client[:-1].get('a')
                      self.assertRaisesRemote(NameError, self.client[id0].get, 'a')
                      self.client.clear(block=True)
                      for i in self.client.ids:
                          # print i
                          self.assertRaisesRemote(NameError, self.client[i].get, 'a')
                  def test_get_result(self):
                      """test getting results from the Hub."""
                      c = clientmod.Client(profile='iptest')
                      # self.add_engines(1)
                      t = c.ids[-1]
                      ar = c[t].apply_async(wait, 1)
                      # give the monitor time to notice the message
                      time.sleep(.25)
                      ahr = self.client.get_result(ar.msg_ids)
                      self.assertTrue(isinstance(ahr, AsyncHubResult))
                      self.assertEquals(ahr.get(), ar.get())
                      ar2 = self.client.get_result(ar.msg_ids)
                      self.assertFalse(isinstance(ar2, AsyncHubResult))
                      c.close()
                  def test_ids_list(self):
                      """test client.ids"""
                      # self.add_engines(2)
                      ids = self.client.ids
                      self.assertEquals(ids, self.client._ids)
                      self.assertFalse(ids is self.client._ids)
                      ids.remove(ids[-1])
                      self.assertNotEquals(ids, self.client._ids)
                  def test_queue_status(self):
                      # self.addEngine(4)
                      ids = self.client.ids
                      id0 = ids[0]
                      qs = self.client.queue_status(targets=id0)
                      self.assertTrue(isinstance(qs, dict))
                      self.assertEquals(sorted(qs.keys()), ['completed', 'queue', 'tasks'])
                      allqs = self.client.queue_status()
                      self.assertTrue(isinstance(allqs, dict))
                      self.assertEquals(sorted(allqs.keys()), sorted(self.client.ids + ['unassigned']))
                      unassigned = allqs.pop('unassigned')
                      for eid,qs in allqs.items():
                          self.assertTrue(isinstance(qs, dict))
                          self.assertEquals(sorted(qs.keys()), ['completed', 'queue', 'tasks'])
                  def test_shutdown(self):
                      # self.addEngine(4)
                      ids = self.client.ids
                      id0 = ids[0]
                      self.client.shutdown(id0, block=True)
                      while id0 in self.client.ids:
                          time.sleep(0.1)
                          self.client.spin()
                      self.assertRaises(IndexError, lambda : self.client[id0])
                  def test_result_status(self):
                      pass
                      # to be written
                  def test_db_query_dt(self):
                      """test db query by date"""
                      hist = self.client.hub_history()
                      middle = self.client.db_query({'msg_id' : hist[len(hist)/2]})[0]
                      tic = middle['submitted']
                      before = self.client.db_query({'submitted' : {'$lt' : tic}})
                      after = self.client.db_query({'submitted' : {'$gte' : tic}})
                      self.assertEquals(len(before)+len(after),len(hist))
                      for b in before:
                          self.assertTrue(b['submitted'] < tic)
                      for a in after:
                          self.assertTrue(a['submitted'] >= tic)
                      same = self.client.db_query({'submitted' : tic})
                      for s in same:
                          self.assertTrue(s['submitted'] == tic)
                  def test_db_query_keys(self):
                      """test extracting subset of record keys"""
                      found = self.client.db_query({'msg_id': {'$ne' : ''}},keys=['submitted', 'completed'])
                      for rec in found:
                          self.assertEquals(set(rec.keys()), set(['msg_id', 'submitted', 'completed']))
                  def test_db_query_msg_id(self):
                      """ensure msg_id is always in db queries"""
                      found = self.client.db_query({'msg_id': {'$ne' : ''}},keys=['submitted', 'completed'])
                      for rec in found:
                          self.assertTrue('msg_id' in rec.keys())
                      found = self.client.db_query({'msg_id': {'$ne' : ''}},keys=['submitted'])
                      for rec in found:
                          self.assertTrue('msg_id' in rec.keys())
                      found = self.client.db_query({'msg_id': {'$ne' : ''}},keys=['msg_id'])
                      for rec in found:
                          self.assertTrue('msg_id' in rec.keys())
                  def test_db_query_in(self):
                      """test db query with '$in','$nin' operators"""
                      hist = self.client.hub_history()
                      even = hist[::2]
                      odd = hist[1::2]
                      recs = self.client.db_query({ 'msg_id' : {'$in' : even}})
                      found = [ r['msg_id'] for r in recs ]
                      self.assertEquals(set(even), set(found))
                      recs = self.client.db_query({ 'msg_id' : {'$nin' : even}})
                      found = [ r['msg_id'] for r in recs ]
                      self.assertEquals(set(odd), set(found))
                  def test_hub_history(self):
                      hist = self.client.hub_history()
                      recs = self.client.db_query({ 'msg_id' : {"$ne":''}})
                      recdict = {}
                      for rec in recs:
                          recdict[rec['msg_id']] = rec
                      latest = datetime(1984,1,1)
                      for msg_id in hist:
                          rec = recdict[msg_id]
                          newt = rec['submitted']
                          self.assertTrue(newt >= latest)
                          latest = newt
                      ar = self.client[-1].apply_async(lambda : 1)
                      ar.get()
                      time.sleep(0.25)
                      self.assertEquals(self.client.hub_history()[-1:],ar.msg_ids)
                  def test_resubmit(self):
                      def f():
                          import random
                          return random.random()
                      v = self.client.load_balanced_view()
                      ar = v.apply_async(f)
                      r1 = ar.get(1)
                      ahr = self.client.resubmit(ar.msg_ids)
                      r2 = ahr.get(1)
                      self.assertFalse(r1 == r2)
                  def test_resubmit_inflight(self):
                      """ensure ValueError on resubmit of inflight task"""
                      v = self.client.load_balanced_view()
                      ar = v.apply_async(time.sleep,1)
                      # give the message a chance to arrive
                      time.sleep(0.2)
                      self.assertRaisesRemote(ValueError, self.client.resubmit, ar.msg_ids)
                      ar.get(2)
                  def test_resubmit_badkey(self):
                      """ensure KeyError on resubmit of nonexistant task"""
                      self.assertRaisesRemote(KeyError, self.client.resubmit, ['invalid'])
+                 def test_purge_results(self):
+                     hist = self.client.hub_history()
+                     self.client.purge_results(hist)
+                     newhist = self.client.hub_history()
+                     self.assertTrue(len(newhist) == 0)

IPython/parallel/tests/test_db.py

0 +7 -19

              """Tests for db backends"""
              #-------------------------------------------------------------------------------
              #  Copyright (C) 2011  The IPython Development Team
              #
              #  Distributed under the terms of the BSD License.  The full license is in
              #  the file COPYING, distributed as part of this software.
              #-------------------------------------------------------------------------------
              #-------------------------------------------------------------------------------
              # Imports
              #-------------------------------------------------------------------------------
              import tempfile
              import time
-             import uuid
              from datetime import datetime, timedelta
-             from random import choice, randint
              from unittest import TestCase
              from nose import SkipTest
              from IPython.parallel import error, streamsession as ss
              from IPython.parallel.controller.dictdb import DictDB
              from IPython.parallel.controller.sqlitedb import SQLiteDB
              from IPython.parallel.controller.hub import init_record, empty_record
              #-------------------------------------------------------------------------------
              # TestCases
              #-------------------------------------------------------------------------------
              class TestDictBackend(TestCase):
                  def setUp(self):
                      self.session = ss.StreamSession()
                      self.db = self.create_db()
                      self.load_records(16)
                  def create_db(self):
                      return DictDB()
                  def load_records(self, n=1):
                      """load n records for testing"""
                      #sleep 1/10 s, to ensure timestamp is different to previous calls
                      time.sleep(0.1)
                      msg_ids = []
                      for i in range(n):
                          msg = self.session.msg('apply_request', content=dict(a=5))
                          msg['buffers'] = []
                          rec = init_record(msg)
                          msg_ids.append(msg['msg_id'])
                          self.db.add_record(msg['msg_id'], rec)
                      return msg_ids
                  def test_add_record(self):
                      before = self.db.get_history()
                      self.load_records(5)
                      after = self.db.get_history()
                      self.assertEquals(len(after), len(before)+5)
                      self.assertEquals(after[:-5],before)
                  def test_drop_record(self):
                      msg_id = self.load_records()[-1]
                      rec = self.db.get_record(msg_id)
                      self.db.drop_record(msg_id)
                      self.assertRaises(KeyError,self.db.get_record, msg_id)
                  def _round_to_millisecond(self, dt):
                      """necessary because mongodb rounds microseconds"""
                      micro = dt.microsecond
                      extra = int(str(micro)[-3:])
                      return dt - timedelta(microseconds=extra)
                  def test_update_record(self):
                      now = self._round_to_millisecond(datetime.now())
                      #
                      msg_id = self.db.get_history()[-1]
                      rec1 = self.db.get_record(msg_id)
                      data = {'stdout': 'hello there', 'completed' : now}
                      self.db.update_record(msg_id, data)
                      rec2 = self.db.get_record(msg_id)
                      self.assertEquals(rec2['stdout'], 'hello there')
                      self.assertEquals(rec2['completed'], now)
                      rec1.update(data)
                      self.assertEquals(rec1, rec2)
                  # def test_update_record_bad(self):
                  #     """test updating nonexistant records"""
                  #     msg_id = str(uuid.uuid4())
                  #     data = {'stdout': 'hello there'}
                  #     self.assertRaises(KeyError, self.db.update_record, msg_id, data)
                  def test_find_records_dt(self):
                      """test finding records by date"""
                      hist = self.db.get_history()
                      middle = self.db.get_record(hist[len(hist)/2])
                      tic = middle['submitted']
                      before = self.db.find_records({'submitted' : {'$lt' : tic}})
                      after = self.db.find_records({'submitted' : {'$gte' : tic}})
                      self.assertEquals(len(before)+len(after),len(hist))
                      for b in before:
                          self.assertTrue(b['submitted'] < tic)
                      for a in after:
                          self.assertTrue(a['submitted'] >= tic)
                      same = self.db.find_records({'submitted' : tic})
                      for s in same:
                          self.assertTrue(s['submitted'] == tic)
                  def test_find_records_keys(self):
                      """test extracting subset of record keys"""
                      found = self.db.find_records({'msg_id': {'$ne' : ''}},keys=['submitted', 'completed'])
                      for rec in found:
                          self.assertEquals(set(rec.keys()), set(['msg_id', 'submitted', 'completed']))
                  def test_find_records_msg_id(self):
                      """ensure msg_id is always in found records"""
                      found = self.db.find_records({'msg_id': {'$ne' : ''}},keys=['submitted', 'completed'])
                      for rec in found:
                          self.assertTrue('msg_id' in rec.keys())
                      found = self.db.find_records({'msg_id': {'$ne' : ''}},keys=['submitted'])
                      for rec in found:
                          self.assertTrue('msg_id' in rec.keys())
                      found = self.db.find_records({'msg_id': {'$ne' : ''}},keys=['msg_id'])
                      for rec in found:
                          self.assertTrue('msg_id' in rec.keys())
                  def test_find_records_in(self):
                      """test finding records with '$in','$nin' operators"""
                      hist = self.db.get_history()
                      even = hist[::2]
                      odd = hist[1::2]
                      recs = self.db.find_records({ 'msg_id' : {'$in' : even}})
                      found = [ r['msg_id'] for r in recs ]
                      self.assertEquals(set(even), set(found))
                      recs = self.db.find_records({ 'msg_id' : {'$nin' : even}})
                      found = [ r['msg_id'] for r in recs ]
                      self.assertEquals(set(odd), set(found))
                  def test_get_history(self):
                      msg_ids = self.db.get_history()
                      latest = datetime(1984,1,1)
                      for msg_id in msg_ids:
                          rec = self.db.get_record(msg_id)
                          newt = rec['submitted']
                          self.assertTrue(newt >= latest)
                          latest = newt
                      msg_id = self.load_records(1)[-1]
                      self.assertEquals(self.db.get_history()[-1],msg_id)
                  def test_datetime(self):
                      """get/set timestamps with datetime objects"""
                      msg_id = self.db.get_history()[-1]
                      rec = self.db.get_record(msg_id)
                      self.assertTrue(isinstance(rec['submitted'], datetime))
                      self.db.update_record(msg_id, dict(completed=datetime.now()))
                      rec = self.db.get_record(msg_id)
                      self.assertTrue(isinstance(rec['completed'], datetime))
+                 def test_drop_matching(self):
+                     msg_ids = self.load_records(10)
+                     query = {'msg_id' : {'$in':msg_ids}}
+                     self.db.drop_matching_records(query)
+                     recs = self.db.find_records(query)
+                     self.assertTrue(len(recs)==0)
              class TestSQLiteBackend(TestDictBackend):
                  def create_db(self):
                      return SQLiteDB(location=tempfile.gettempdir())
                  def tearDown(self):
                      self.db._db.close()
-             # optional MongoDB test
-             try:
-                 from IPython.parallel.controller.mongodb import MongoDB
-             except ImportError:
-                 pass
-             else:
-                 class TestMongoBackend(TestDictBackend):
-                     def create_db(self):
-                         try:
-                             return MongoDB(database='iptestdb')
-                         except Exception:
-                             raise SkipTest("Couldn't connect to mongodb instance")
-                     def tearDown(self):
-                         self.db._connection.drop_database('iptestdb')

IPython/testing/iptest.py

0 +1 0

              # -*- coding: utf-8 -*-
              """IPython Test Suite Runner.
              This module provides a main entry point to a user script to test IPython
              itself from the command line. There are two ways of running this script:
 . With the syntax `iptest all`.  This runs our entire test suite by
                 calling this script (with different arguments) recursively.  This
                 causes modules and package to be tested in different processes, using nose
                 or trial where appropriate.
 . With the regular nose syntax, like `iptest -vvs IPython`.  In this form
                 the script simply calls nose, but with special command line flags and
                 plugins loaded.
              """
              #-----------------------------------------------------------------------------
              #  Copyright (C) 2009  The IPython Development Team
              #
              #  Distributed under the terms of the BSD License.  The full license is in
              #  the file COPYING, distributed as part of this software.
              #-----------------------------------------------------------------------------
              #-----------------------------------------------------------------------------
              # Imports
              #-----------------------------------------------------------------------------
              # Stdlib
              import os
              import os.path as path
              import signal
              import sys
              import subprocess
              import tempfile
              import time
              import warnings
              # Note: monkeypatch!
              # We need to monkeypatch a small problem in nose itself first, before importing
              # it for actual use.  This should get into nose upstream, but its release cycle
              # is slow and we need it for our parametric tests to work correctly.
              from IPython.testing import nosepatch
              # Now, proceed to import nose itself
              import nose.plugins.builtin
              from nose.core import TestProgram
              # Our own imports
              from IPython.utils.path import get_ipython_module_path
              from IPython.utils.process import find_cmd, pycmd2argv
              from IPython.utils.sysinfo import sys_info
              from IPython.testing import globalipapp
              from IPython.testing.plugin.ipdoctest import IPythonDoctest
              from IPython.external.decorators import KnownFailure
              pjoin = path.join
              #-----------------------------------------------------------------------------
              # Globals
              #-----------------------------------------------------------------------------
              #-----------------------------------------------------------------------------
              # Warnings control
              #-----------------------------------------------------------------------------
              # Twisted generates annoying warnings with Python 2.6, as will do other code
              # that imports 'sets' as of today
              warnings.filterwarnings('ignore', 'the sets module is deprecated',
                                      DeprecationWarning )
              # This one also comes from Twisted
              warnings.filterwarnings('ignore', 'the sha module is deprecated',
                                      DeprecationWarning)
              # Wx on Fedora11 spits these out
              warnings.filterwarnings('ignore', 'wxPython/wxWidgets release number mismatch',
                                      UserWarning)
              #-----------------------------------------------------------------------------
              # Logic for skipping doctests
              #-----------------------------------------------------------------------------
              def test_for(mod, min_version=None):
                  """Test to see if mod is importable."""
                  try:
                      __import__(mod)
                  except (ImportError, RuntimeError):
                      # GTK reports Runtime error if it can't be initialized even if  it's
                      # importable.
                      return False
                  else:
                      if min_version:
                          return sys.modules[mod].__version__ >= min_version
                      else:
                          return True
              # Global dict where we can store information on what we have and what we don't
              # have available at test run time
              have = {}
              have['curses'] = test_for('_curses')
              have['matplotlib'] = test_for('matplotlib')
              have['pexpect'] = test_for('pexpect')
              have['pymongo'] = test_for('pymongo')
              have['wx'] = test_for('wx')
              have['wx.aui'] = test_for('wx.aui')
              if os.name == 'nt':
                  have['zmq'] = test_for('zmq', '2.1.7')
              else:
                  have['zmq'] = test_for('zmq', '2.1.4')
              have['qt'] = test_for('IPython.external.qt')
              #-----------------------------------------------------------------------------
              # Functions and classes
              #-----------------------------------------------------------------------------
              def report():
                  """Return a string with a summary report of test-related variables."""
                  out = [ sys_info(), '\n']
                  avail = []
                  not_avail = []
                  for k, is_avail in have.items():
                      if is_avail:
                          avail.append(k)
                      else:
                          not_avail.append(k)
                  if avail:
                      out.append('\nTools and libraries available at test time:\n')
                      avail.sort()
                      out.append('   ' + ' '.join(avail)+'\n')
                  if not_avail:
                      out.append('\nTools and libraries NOT available at test time:\n')
                      not_avail.sort()
                      out.append('   ' + ' '.join(not_avail)+'\n')
                  return ''.join(out)
              def make_exclude():
                  """Make patterns of modules and packages to exclude from testing.
                  For the IPythonDoctest plugin, we need to exclude certain patterns that
                  cause testing problems.  We should strive to minimize the number of
                  skipped modules, since this means untested code.
                  These modules and packages will NOT get scanned by nose at all for tests.
                  """
                  # Simple utility to make IPython paths more readably, we need a lot of
                  # these below
                  ipjoin = lambda *paths: pjoin('IPython', *paths)
                  exclusions = [ipjoin('external'),
                                pjoin('IPython_doctest_plugin'),
                                ipjoin('quarantine'),
                                ipjoin('deathrow'),
                                ipjoin('testing', 'attic'),
                                # This guy is probably attic material
                                ipjoin('testing', 'mkdoctests'),
                                # Testing inputhook will need a lot of thought, to figure out
                                # how to have tests that don't lock up with the gui event
                                # loops in the picture
                                ipjoin('lib', 'inputhook'),
                                # Config files aren't really importable stand-alone
                                ipjoin('config', 'default'),
                                ipjoin('config', 'profile'),
                                ]
                  if not have['wx']:
                      exclusions.append(ipjoin('lib', 'inputhookwx'))
                  # We do this unconditionally, so that the test suite doesn't import
                  # gtk, changing the default encoding and masking some unicode bugs.
                  exclusions.append(ipjoin('lib', 'inputhookgtk'))
                  # These have to be skipped on win32 because the use echo, rm, cd, etc.
                  # See ticket https://bugs.launchpad.net/bugs/366982
                  if sys.platform == 'win32':
                      exclusions.append(ipjoin('testing', 'plugin', 'test_exampleip'))
                      exclusions.append(ipjoin('testing', 'plugin', 'dtexample'))
                  if not have['pexpect']:
                      exclusions.extend([ipjoin('scripts', 'irunner'),
                                         ipjoin('lib', 'irunner'),
                                         ipjoin('lib', 'tests', 'test_irunner')])
                  if not have['zmq']:
                      exclusions.append(ipjoin('zmq'))
                      exclusions.append(ipjoin('frontend', 'qt'))
                      exclusions.append(ipjoin('parallel'))
                  elif not have['qt']:
                      exclusions.append(ipjoin('frontend', 'qt'))
                  if not have['pymongo']:
                      exclusions.append(ipjoin('parallel', 'controller', 'mongodb'))
+                     exclusions.append(ipjoin('parallel', 'tests', 'test_mongodb'))
                  if not have['matplotlib']:
                      exclusions.extend([ipjoin('lib', 'pylabtools'),
                                         ipjoin('lib', 'tests', 'test_pylabtools')])
                  # This is needed for the reg-exp to match on win32 in the ipdoctest plugin.
                  if sys.platform == 'win32':
                      exclusions = [s.replace('\\','\\\\') for s in exclusions]
                  return exclusions
              class IPTester(object):
                  """Call that calls iptest or trial in a subprocess.
                  """
                  #: string, name of test runner that will be called
                  runner = None
                  #: list, parameters for test runner
                  params = None
                  #: list, arguments of system call to be made to call test runner
                  call_args = None
                  #: list, process ids of subprocesses we start (for cleanup)
                  pids = None
                  def __init__(self, runner='iptest', params=None):
                      """Create new test runner."""
                      p = os.path
                      if runner == 'iptest':
                          iptest_app = get_ipython_module_path('IPython.testing.iptest')
                          self.runner = pycmd2argv(iptest_app) + sys.argv[1:]
                      else:
                          raise Exception('Not a valid test runner: %s' % repr(runner))
                      if params is None:
                          params = []
                      if isinstance(params, str):
                          params = [params]
                      self.params = params
                      # Assemble call
                      self.call_args = self.runner+self.params
                      # Store pids of anything we start to clean up on deletion, if possible
                      # (on posix only, since win32 has no os.kill)
                      self.pids = []
                  if sys.platform == 'win32':
                      def _run_cmd(self):
                          # On Windows, use os.system instead of subprocess.call, because I
                          # was having problems with subprocess and I just don't know enough
                          # about win32 to debug this reliably.  Os.system may be the 'old
                          # fashioned' way to do it, but it works just fine.  If someone
                          # later can clean this up that's fine, as long as the tests run
                          # reliably in win32.
                          # What types of problems are you having. They may be related to
                          # running Python in unboffered mode. BG.
                          return os.system(' '.join(self.call_args))
                  else:
                      def _run_cmd(self):
                          # print >> sys.stderr, '*** CMD:', ' '.join(self.call_args) # dbg
                          subp = subprocess.Popen(self.call_args)
                          self.pids.append(subp.pid)
                          # If this fails, the pid will be left in self.pids and cleaned up
                          # later, but if the wait call succeeds, then we can clear the
                          # stored pid.
                          retcode = subp.wait()
                          self.pids.pop()
                          return retcode
                  def run(self):
                      """Run the stored commands"""
                      try:
                          return self._run_cmd()
                      except:
                          import traceback
                          traceback.print_exc()
                          return 1  # signal failure
                  def __del__(self):
                      """Cleanup on exit by killing any leftover processes."""
                      if not hasattr(os, 'kill'):
                          return
                      for pid in self.pids:
                          try:
                              print 'Cleaning stale PID:', pid
                              os.kill(pid, signal.SIGKILL)
                          except OSError:
                              # This is just a best effort, if we fail or the process was
                              # really gone, ignore it.
                              pass
              def make_runners():
                  """Define the top-level packages that need to be tested.
                  """
                  # Packages to be tested via nose, that only depend on the stdlib
                  nose_pkg_names = ['config', 'core', 'extensions', 'frontend', 'lib',
                                   'scripts', 'testing', 'utils' ]
                  if have['zmq']:
                      nose_pkg_names.append('parallel')
                  # For debugging this code, only load quick stuff
                  #nose_pkg_names = ['core', 'extensions']  # dbg
                  # Make fully qualified package names prepending 'IPython.' to our name lists
                  nose_packages = ['IPython.%s' % m for m in nose_pkg_names ]
                  # Make runners
                  runners = [ (v, IPTester('iptest', params=v)) for v in nose_packages ]
                  return runners
              def run_iptest():
                  """Run the IPython test suite using nose.
                  This function is called when this script is **not** called with the form
                  `iptest all`.  It simply calls nose with appropriate command line flags
                  and accepts all of the standard nose arguments.
                  """
                  warnings.filterwarnings('ignore',
                      'This will be removed soon.  Use IPython.testing.util instead')
                  argv = sys.argv + [ '--detailed-errors',  # extra info in tracebacks
                                      # Loading ipdoctest causes problems with Twisted, but
                                      # our test suite runner now separates things and runs
                                      # all Twisted tests with trial.
                                      '--with-ipdoctest',
                                      '--ipdoctest-tests','--ipdoctest-extension=txt',
                                      # We add --exe because of setuptools' imbecility (it
                                      # blindly does chmod +x on ALL files).  Nose does the
                                      # right thing and it tries to avoid executables,
                                      # setuptools unfortunately forces our hand here.  This
                                      # has been discussed on the distutils list and the
                                      # setuptools devs refuse to fix this problem!
                                      '--exe',
                                      ]
                  if nose.__version__ >= '0.11':
                      # I don't fully understand why we need this one, but depending on what
                      # directory the test suite is run from, if we don't give it, 0 tests
                      # get run.  Specifically, if the test suite is run from the source dir
                      # with an argument (like 'iptest.py IPython.core', 0 tests are run,
                      # even if the same call done in this directory works fine).  It appears
                      # that if the requested package is in the current dir, nose bails early
                      # by default.  Since it's otherwise harmless, leave it in by default
                      # for nose >= 0.11, though unfortunately nose 0.10 doesn't support it.
                      argv.append('--traverse-namespace')
                  # Construct list of plugins, omitting the existing doctest plugin, which
                  # ours replaces (and extends).
                  plugins = [IPythonDoctest(make_exclude()), KnownFailure()]
                  for p in nose.plugins.builtin.plugins:
                      plug = p()
                      if plug.name == 'doctest':
                          continue
                      plugins.append(plug)
                  # We need a global ipython running in this process
                  globalipapp.start_ipython()
                  # Now nose can run
                  TestProgram(argv=argv, plugins=plugins)
              def run_iptestall():
                  """Run the entire IPython test suite by calling nose and trial.
                  This function constructs :class:`IPTester` instances for all IPython
                  modules and package and then runs each of them.  This causes the modules
                  and packages of IPython to be tested each in their own subprocess using
                  nose or twisted.trial appropriately.
                  """
                  runners = make_runners()
                  # Run the test runners in a temporary dir so we can nuke it when finished
                  # to clean up any junk files left over by accident.  This also makes it
                  # robust against being run in non-writeable directories by mistake, as the
                  # temp dir will always be user-writeable.
                  curdir = os.getcwd()
                  testdir = tempfile.gettempdir()
                  os.chdir(testdir)
                  # Run all test runners, tracking execution time
                  failed = []
                  t_start = time.time()
                  try:
                      for (name, runner) in runners:
                          print '*'*70
                          print 'IPython test group:',name
                          res = runner.run()
                          if res:
                              failed.append( (name, runner) )
                  finally:
                      os.chdir(curdir)
                  t_end = time.time()
                  t_tests = t_end - t_start
                  nrunners = len(runners)
                  nfail = len(failed)
                  # summarize results
                  print
                  print '*'*70
                  print 'Test suite completed for system with the following information:'
                  print report()
                  print 'Ran %s test groups in %.3fs' % (nrunners, t_tests)
                  print
                  print 'Status:'
                  if not failed:
                      print 'OK'
                  else:
                      # If anything went wrong, point out what command to rerun manually to
                      # see the actual errors and individual summary
                      print 'ERROR - %s out of %s test groups failed.' % (nfail, nrunners)
                      for name, failed_runner in failed:
                          print '-'*40
                          print 'Runner failed:',name
                          print 'You may wish to rerun this one individually, with:'
                          print ' '.join(failed_runner.call_args)
                          print
              def main():
                  for arg in sys.argv[1:]:
                      if arg.startswith('IPython'):
                          # This is in-process
                          run_iptest()
                  else:
                      # This starts subprocesses
                      run_iptestall()
              if __name__ == '__main__':
                  main()

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages